From beddfc0f7ff1ff05f9f64cdbe534bd59e76dd80f Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Sat, 18 Mar 2023 22:58:53 +0800 Subject: [PATCH] Add GPTNeoX convert script --- converter/huggingface_gptneox_convert.py | 334 ++++++++++++++++++ .../fastertransformer/1/1-gpu/config.ini | 12 + .../1/1-gpu/model.final_layernorm.bias.bin | 3 + .../1/1-gpu/model.final_layernorm.weight.bin | 3 + .../model.layers.0.attention.dense.bias.bin | 3 + ...odel.layers.0.attention.dense.weight.0.bin | 3 + ...ers.0.attention.query_key_value.bias.0.bin | 3 + ...s.0.attention.query_key_value.weight.0.bin | 3 + .../model.layers.0.input_layernorm.bias.bin | 3 + .../model.layers.0.input_layernorm.weight.bin | 3 + .../model.layers.0.mlp.dense_4h_to_h.bias.bin | 3 + ...el.layers.0.mlp.dense_4h_to_h.weight.0.bin | 3 + ...odel.layers.0.mlp.dense_h_to_4h.bias.0.bin | 3 + ...el.layers.0.mlp.dense_h_to_4h.weight.0.bin | 3 + ...layers.0.post_attention_layernorm.bias.bin | 3 + ...yers.0.post_attention_layernorm.weight.bin | 3 + .../model.layers.1.attention.dense.bias.bin | 3 + ...odel.layers.1.attention.dense.weight.0.bin | 3 + ...ers.1.attention.query_key_value.bias.0.bin | 3 + ...s.1.attention.query_key_value.weight.0.bin | 3 + .../model.layers.1.input_layernorm.bias.bin | 3 + .../model.layers.1.input_layernorm.weight.bin | 3 + .../model.layers.1.mlp.dense_4h_to_h.bias.bin | 3 + ...el.layers.1.mlp.dense_4h_to_h.weight.0.bin | 3 + ...odel.layers.1.mlp.dense_h_to_4h.bias.0.bin | 3 + ...el.layers.1.mlp.dense_h_to_4h.weight.0.bin | 3 + ...layers.1.post_attention_layernorm.bias.bin | 3 + ...yers.1.post_attention_layernorm.weight.bin | 3 + .../model.layers.2.attention.dense.bias.bin | 3 + ...odel.layers.2.attention.dense.weight.0.bin | 3 + ...ers.2.attention.query_key_value.bias.0.bin | 3 + ...s.2.attention.query_key_value.weight.0.bin | 3 + .../model.layers.2.input_layernorm.bias.bin | 3 + .../model.layers.2.input_layernorm.weight.bin | 3 + .../model.layers.2.mlp.dense_4h_to_h.bias.bin | 3 + ...el.layers.2.mlp.dense_4h_to_h.weight.0.bin | 3 + ...odel.layers.2.mlp.dense_h_to_4h.bias.0.bin | 3 + ...el.layers.2.mlp.dense_h_to_4h.weight.0.bin | 3 + ...layers.2.post_attention_layernorm.bias.bin | 3 + ...yers.2.post_attention_layernorm.weight.bin | 3 + .../model.layers.3.attention.dense.bias.bin | 3 + ...odel.layers.3.attention.dense.weight.0.bin | 3 + ...ers.3.attention.query_key_value.bias.0.bin | 3 + ...s.3.attention.query_key_value.weight.0.bin | 3 + .../model.layers.3.input_layernorm.bias.bin | 3 + .../model.layers.3.input_layernorm.weight.bin | 3 + .../model.layers.3.mlp.dense_4h_to_h.bias.bin | 3 + ...el.layers.3.mlp.dense_4h_to_h.weight.0.bin | 3 + ...odel.layers.3.mlp.dense_h_to_4h.bias.0.bin | 3 + ...el.layers.3.mlp.dense_h_to_4h.weight.0.bin | 3 + ...layers.3.post_attention_layernorm.bias.bin | 3 + ...yers.3.post_attention_layernorm.weight.bin | 3 + .../model.layers.4.attention.dense.bias.bin | 3 + ...odel.layers.4.attention.dense.weight.0.bin | 3 + ...ers.4.attention.query_key_value.bias.0.bin | 3 + ...s.4.attention.query_key_value.weight.0.bin | 3 + .../model.layers.4.input_layernorm.bias.bin | 3 + .../model.layers.4.input_layernorm.weight.bin | 3 + .../model.layers.4.mlp.dense_4h_to_h.bias.bin | 3 + ...el.layers.4.mlp.dense_4h_to_h.weight.0.bin | 3 + ...odel.layers.4.mlp.dense_h_to_4h.bias.0.bin | 3 + ...el.layers.4.mlp.dense_h_to_4h.weight.0.bin | 3 + ...layers.4.post_attention_layernorm.bias.bin | 3 + ...yers.4.post_attention_layernorm.weight.bin | 3 + .../1/1-gpu/model.lm_head.weight.bin | 3 + .../fastertransformer/1/1-gpu/model.wte.bin | 3 + 66 files changed, 538 insertions(+) create mode 100644 converter/huggingface_gptneox_convert.py create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/config.ini create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.final_layernorm.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.final_layernorm.weight.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.dense.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.dense.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.bias.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.weight.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.bias.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.weight.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.dense.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.dense.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.bias.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.weight.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.bias.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.weight.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.dense.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.dense.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.bias.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.weight.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.bias.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.weight.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.dense.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.dense.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.bias.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.weight.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.bias.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.weight.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.dense.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.dense.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.bias.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.weight.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.bias.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.weight.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.lm_head.weight.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.wte.bin diff --git a/converter/huggingface_gptneox_convert.py b/converter/huggingface_gptneox_convert.py new file mode 100644 index 0000000..02b79fb --- /dev/null +++ b/converter/huggingface_gptneox_convert.py @@ -0,0 +1,334 @@ +import argparse +import configparser +import multiprocessing +import os +import sys +from pathlib import Path + +import numpy as np +import torch +from transformers import GPTNeoXForCausalLM # 4.21.1 + + +def get_weight_data_type(data_type): + if data_type == "fp32": + return np.float32 + elif data_type == "fp16": + return np.float16 + else: + assert False, f"Invalid weight data type {data_type}" + + +def prefix_prompt_convert(args, config, weight_data_type): + + saved_dir = args.saved_dir + "/%d-gpu/" % args.infer_gpu_num + + prompt_in_file_list = args.prompt_in_file_list.split(",") + + task_list = [] + for idx, prompt_in_file in enumerate(prompt_in_file_list): + weights = torch.load(prompt_in_file) + task_name = prompt_in_file.split("/")[-1].split(".")[-3] + + total_size = weights.nelement() + n_layers = config["num_hidden_layers"] + n_head = config["num_heads"] + size_per_head = config["hidden_size"] // n_head + prefix_prompt_len = total_size // (2 * n_layers * n_head * size_per_head) + + task_list.append((task_name, prefix_prompt_len)) + # GPT NeoX + weights = weights.view( + prefix_prompt_len, n_layers, 2, n_head, size_per_head + ) ## prefix_seq_len, num_layers, 2, num_heads, size_per_head + # weights=weights.view(prefix_prompt_len,28,2,16,256) ## prefix_seq_len, num_layers, 2, num_heads, size_per_head + weights = weights.permute( + 1, 2, 3, 0, 4 + ) ## num_layers, 2, num_heads, perfix_seq_len, size_per_head + local_head_num = n_head // args.infer_gpu_num + weights_split = torch.split(weights, local_head_num, dim=2) + for i in range(args.infer_gpu_num): + output_file_path = ( + saved_dir + + "/model.prefix_prompt." + + task_name + + ".weight." + + str(i) + + ".bin" + ) + weights_split[i].detach().cpu().numpy().astype(weight_data_type).tofile( + output_file_path + ) + + return task_list + + +def split_and_convert_process(i, saved_dir, factor, key, args, config, val): + + if ( + key.find("input_layernorm.weight") != -1 + or key.find("input_layernorm.bias") != -1 + or key.find("attention.dense.bias") != -1 + or key.find("post_attention_layernorm.weight") != -1 + or key.find("post_attention_layernorm.bias") != -1 + or key.find("mlp.dense_4h_to_h.bias") != -1 + or key.find("final_layernorm.weight") != -1 + or key.find("final_layernorm.bias") != -1 + ): + + # shared weights, only need to convert the weights of rank 0 + if i == 0: + saved_path = saved_dir + "/model." + key + ".bin" + val.tofile(saved_path) + + elif ( + key.find("attention.dense.weight") != -1 + or key.find("mlp.dense_4h_to_h.weight") != -1 + ): + split_vals = np.split(val, factor, axis=0) + for j in range(factor): + saved_path = saved_dir + "/model." + key + ".%d.bin" % (i * factor + j) + split_vals[j].tofile(saved_path) + + elif ( + key.find("mlp.dense_h_to_4h.weight") != -1 + or key.find("mlp.dense_h_to_4h.bias") != -1 + ): + + split_vals = np.split(val, factor, axis=-1) + for j in range(factor): + saved_path = saved_dir + "/model." + key + ".%d.bin" % (i * factor + j) + split_vals[j].tofile(saved_path) + + elif key.find("attention.query_key_value.bias") != -1: + local_dim = (int)(val.shape[-1] / 3) + n_head = config["num_attention_heads"] + + val = val.reshape(n_head, 3, local_dim // n_head) + val = np.transpose(val, [1, 0, 2]).reshape(3, local_dim) + split_vals = np.split(val, factor, axis=-1) + + for j in range(factor): + saved_path = saved_dir + "/model." + key + ".%d.bin" % (i * factor + j) + split_vals[j].tofile(saved_path) + + elif key.find("attention.query_key_value.weight") != -1: + hidden_dim = val.shape[0] + local_dim = (int)(val.shape[-1] / 3) + n_head = config["num_attention_heads"] + # Note that the HF qkv weight are stored as [hidden_size, num_heads, 3, head_hidden] + # FT needs the shape of [hidden_size, 3, num_heads, head_hidden] + val = val.reshape(hidden_dim, n_head, 3, local_dim // n_head) + val = np.transpose(val, [0, 2, 1, 3]).reshape(hidden_dim, 3, local_dim) + + # print(np.mean(np.abs(val[:, 0, :]))) + split_vals = np.split(val, factor, axis=-1) + + for j in range(factor): + saved_path = saved_dir + "/model." + key + ".%d.bin" % (i * factor + j) + split_vals[j].tofile(saved_path) + + else: + print("[ERROR] cannot find key '{}'".format(key)) + + +def split_and_convert(args): + saved_dir = args.saved_dir + "/%d-gpu/" % args.infer_gpu_num + + if os.path.exists(saved_dir) == False: + os.makedirs(saved_dir) + ckpt_name = args.in_file + + t_gpu_num = args.trained_gpu_num + i_gpu_num = args.infer_gpu_num + assert i_gpu_num % t_gpu_num == 0 + + factor = (int)(i_gpu_num / t_gpu_num) + + # load position_embedding from rank 0 + # model = torch.load(ckpt_name) + model = GPTNeoXForCausalLM.from_pretrained(args.in_file) + hf_config = vars(model.config) + if "gpt_j_residual" not in hf_config: + hf_config["gpt_j_residual"] = 0 + + np_weight_data_type = get_weight_data_type(args.weight_data_type) + + task_list = [] + if args.prompt_in_file_list is not None: + task_list = prefix_prompt_convert(args, hf_config, np_weight_data_type) + + try: + model_name = args.model_name + config = configparser.ConfigParser() + config["gptneox"] = {} + config["gptneox"]["model_name"] = model_name + config["gptneox"]["head_num"] = str(hf_config["num_attention_heads"]) + n_embd = hf_config["hidden_size"] + config["gptneox"]["size_per_head"] = str( + n_embd // hf_config["num_attention_heads"] + ) + config["gptneox"]["inter_size"] = str(n_embd * 4) + config["gptneox"]["num_layer"] = str(hf_config["num_hidden_layers"]) + if "rotary_dim" in hf_config: + rotary_dim = hf_config["rotary_dim"] + else: + rotary_dim = n_embd // hf_config["num_attention_heads"] + config["gptneox"]["rotary_embedding"] = str(rotary_dim) + config["gptneox"]["vocab_size"] = str(hf_config["vocab_size"]) + config["gptneox"]["start_id"] = str(hf_config["bos_token_id"]) + config["gptneox"]["end_id"] = str(hf_config["eos_token_id"]) + config["gptneox"]["use_gptj_residual"] = str(int(hf_config["gpt_j_residual"])) + config["gptneox"]["weight_data_type"] = args.weight_data_type + + if len(task_list) > 0: + config["gptneox"]["num_tasks"] = str(len(task_list)) + config["gptneox"]["prompt_learning_type"] = str(2) + for idx, (task_name, prompt_length) in enumerate(task_list): + config[f"task_{idx}"] = {} + config[f"task_{idx}"]["task_name"] = task_name + config[f"task_{idx}"]["prompt_length"] = str(prompt_length) + with open((Path(saved_dir) / f"config.ini").as_posix(), "w") as configfile: + config.write(configfile) + except Exception as e: + print(f"Fail to save the config in config.ini.", e) + + ft_model_name_pattern = [ + "input_layernorm.bias", + "input_layernorm.weight", + "attention.query_key_value.bias", + "attention.query_key_value.weight", + "attention.dense.bias", + "attention.dense.weight", + "post_attention_layernorm.bias", + "post_attention_layernorm.weight", + "mlp.dense_h_to_4h.bias", + "mlp.dense_h_to_4h.weight", + "mlp.dense_4h_to_h.bias", + "mlp.dense_4h_to_h.weight", + ] + + torch.multiprocessing.set_start_method("spawn") + pool = multiprocessing.Pool(args.processes) + for name, param in model.named_parameters(): + if name.find("weight") == -1 and name.find("bias") == -1: + continue + elif name == "gpt_neox.embed_in.weight": + param.detach().cpu().numpy().astype(np_weight_data_type).tofile( + saved_dir + "model.wte.bin" + ) + elif name == "gpt_neox.final_layer_norm.bias": + param.detach().cpu().numpy().astype(np_weight_data_type).tofile( + saved_dir + "model.final_layernorm.bias.bin" + ) + elif name == "gpt_neox.final_layer_norm.weight": + param.detach().cpu().numpy().astype(np_weight_data_type).tofile( + saved_dir + "model.final_layernorm.weight.bin" + ) + elif name == "embed_out.weight": + param.detach().cpu().numpy().astype(np_weight_data_type).tofile( + saved_dir + "model.lm_head.weight.bin" + ) + else: + processed = False + for i in range(len(ft_model_name_pattern)): + if name.find(ft_model_name_pattern[i]) != -1: + new_name = name.replace("gpt_neox.", "") + pool.starmap( + split_and_convert_process, + [ + ( + 0, + saved_dir, + factor, + new_name, + args, + vars(model.config), + param.detach() + .cpu() + .numpy() + .astype(np_weight_data_type) + .T, + ) + ], + ) + processed = True + break + + if not processed: + raise Exception("Unused layer", name) + + pool.close() + pool.join() + + # Post-process biases if use_gptj_residual is True + if hf_config["gpt_j_residual"]: + for layer_idx in range(hf_config["n_layer"]): + attn_bias = np.fromfile( + saved_dir + f"/model.layers.{layer_idx}.attention.dense.bias.bin", + dtype=np.float32, + ) + mlp_bias = np.fromfile( + saved_dir + f"/model.layers.{layer_idx}.mlp.dense_4h_to_h.bias.bin", + dtype=np.float32, + ) + + (attn_bias + mlp_bias).tofile( + saved_dir + f"/model.layers.{layer_idx}.mlp.attention.bias.sum.bin" + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) + parser.add_argument( + "-saved_dir", "-o", type=str, help="file name of output file", required=True + ) + parser.add_argument( + "-in_file", + "-i", + type=str, + help="file name of input checkpoint file", + required=True, + ) + parser.add_argument( + "-prompt_in_file_list", + "-p_i_list", + type=str, + help="list of the prompt weight file path," + "separate by (,). e.g. -prompt_in_file_list prefix_prompt.task0.weight,prefix_prompt.task1.weight", + ) + parser.add_argument( + "-trained_gpu_num", + "-t_g", + type=int, + help="How many gpus for inference", + default=1, + ) + parser.add_argument( + "-infer_gpu_num", + "-i_g", + type=int, + help="How many gpus for inference", + required=True, + ) + parser.add_argument( + "-processes", + "-p", + type=int, + help="How many processes to spawn for conversion (default: 4)", + default=4, + ) + parser.add_argument( + "-weight_data_type", type=str, default="fp32", choices=["fp32", "fp16"] + ) + parser.add_argument( + "-model_name", "-m_n", type=str, help="model name", required=True + ) + + args = parser.parse_args() + print("\n=============== Argument ===============") + for key in vars(args): + print("{}: {}".format(key, vars(args)[key])) + print("========================================") + + split_and_convert(args) diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/config.ini b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/config.ini new file mode 100644 index 0000000..364e528 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/config.ini @@ -0,0 +1,12 @@ +[gptneox] +model_name = tiny-random-GPTNeoX +head_num = 4 +size_per_head = 8 +inter_size = 128 +num_layer = 5 +rotary_embedding = 8 +vocab_size = 1024 +start_id = 0 +end_id = 0 +use_gptj_residual = 0 +weight_data_type = fp32 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.final_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.final_layernorm.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.final_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.final_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.final_layernorm.weight.bin new file mode 100644 index 0000000..d0c1df9 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.final_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.dense.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.dense.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.dense.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.dense.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.dense.weight.0.bin new file mode 100644 index 0000000..8b8e259 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.dense.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75ec999d1d55bc4af21e7ee8101f7540ff53f73725fc332f175bac14fda1b83a +size 4096 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.bias.0.bin new file mode 100644 index 0000000..795c566 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1a4f5721c1c4610af7f71078f3a68c330536d679803b0e0507ee8dc10c5dfca +size 384 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.weight.0.bin new file mode 100644 index 0000000..c2b21e0 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d0af572105f74f7711069438049a1b539af19b43e4d341fd314b5c67792ce28 +size 12288 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.weight.bin new file mode 100644 index 0000000..d0c1df9 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.weight.0.bin new file mode 100644 index 0000000..ea3d668 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aca4617a559ee69fe7c96a62087f3b18700da03d5ad974ab8c58c01d32a5a65e +size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.bias.0.bin new file mode 100644 index 0000000..8ecaa74 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b18c58c739716e76429634a61375c45b3b5cd470c22ab6d3e14cee23dd992e1 +size 148 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.weight.0.bin new file mode 100644 index 0000000..2710ebb --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c7eae417acdf83e12125252829446e277269e3aeff2543148576a6b267934d3 +size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.weight.bin new file mode 100644 index 0000000..d0c1df9 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.dense.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.dense.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.dense.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.dense.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.dense.weight.0.bin new file mode 100644 index 0000000..49949fd --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.dense.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9e123291a9e860163e8e6acd45f4e46ab7f65a3da84767d9c45541ff2e61a27 +size 4096 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.bias.0.bin new file mode 100644 index 0000000..795c566 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1a4f5721c1c4610af7f71078f3a68c330536d679803b0e0507ee8dc10c5dfca +size 384 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.weight.0.bin new file mode 100644 index 0000000..5bb173f --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aa877cf3a9c6f51414a3773bf2036af613f999dff1d08966c84f5c0164be0bb +size 12288 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.weight.bin new file mode 100644 index 0000000..d0c1df9 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.weight.0.bin new file mode 100644 index 0000000..d7f4715 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0568057def9087e03b35fdcc8ed89ad88bcf672b9a8d3562d816e95b4de8b10f +size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.bias.0.bin new file mode 100644 index 0000000..8ecaa74 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b18c58c739716e76429634a61375c45b3b5cd470c22ab6d3e14cee23dd992e1 +size 148 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.weight.0.bin new file mode 100644 index 0000000..8f56f64 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38d439b2f0ce9e61bdc56aaea018bac52ba20f5eb6a9af39f11b920afa98a74d +size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.weight.bin new file mode 100644 index 0000000..d0c1df9 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.dense.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.dense.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.dense.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.dense.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.dense.weight.0.bin new file mode 100644 index 0000000..8b08e52 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.dense.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:439187a6e4a716b263062a0393e015944688d5ade8becc855b18b53799a1b9f4 +size 4096 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.bias.0.bin new file mode 100644 index 0000000..795c566 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1a4f5721c1c4610af7f71078f3a68c330536d679803b0e0507ee8dc10c5dfca +size 384 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.weight.0.bin new file mode 100644 index 0000000..9eb12ab --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff118539b509bc4d35473c1c4d2ecee86276a1b56f8b1c128fb343fbe7126b29 +size 12288 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.weight.bin new file mode 100644 index 0000000..d0c1df9 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.weight.0.bin new file mode 100644 index 0000000..6f73bec --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:373d0d807fe8142dc107c53ba616cc7735e391ccc99947143e9490abf56ab807 +size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.bias.0.bin new file mode 100644 index 0000000..8ecaa74 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b18c58c739716e76429634a61375c45b3b5cd470c22ab6d3e14cee23dd992e1 +size 148 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.weight.0.bin new file mode 100644 index 0000000..4a08f14 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0954135db648aa7945baaa5f861b1b8012188dd199b9b8bc7c8343757ded04fc +size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.weight.bin new file mode 100644 index 0000000..d0c1df9 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.dense.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.dense.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.dense.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.dense.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.dense.weight.0.bin new file mode 100644 index 0000000..e7a56fe --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.dense.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8318d93f17e38918736e07b5b70f5148d4b28f8096190902477118700b0a762e +size 4096 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.bias.0.bin new file mode 100644 index 0000000..795c566 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1a4f5721c1c4610af7f71078f3a68c330536d679803b0e0507ee8dc10c5dfca +size 384 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.weight.0.bin new file mode 100644 index 0000000..8cd8427 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d5b8deb13e270dea2c77ea5a9f3d65d375bb008d8820128bcfec3c6efb4a454 +size 12288 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.weight.bin new file mode 100644 index 0000000..d0c1df9 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.weight.0.bin new file mode 100644 index 0000000..82cc84e --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1a725384598dc3cf2889e659a0b136abd98def073ec1c632f36db3803a987a2 +size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.bias.0.bin new file mode 100644 index 0000000..8ecaa74 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b18c58c739716e76429634a61375c45b3b5cd470c22ab6d3e14cee23dd992e1 +size 148 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.weight.0.bin new file mode 100644 index 0000000..d7cf885 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dccd82f3a3e32ea7619e48b972c77ff9def1b3fee58a2df924092a707e30152 +size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.weight.bin new file mode 100644 index 0000000..d0c1df9 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.dense.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.dense.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.dense.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.dense.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.dense.weight.0.bin new file mode 100644 index 0000000..73078db --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.dense.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43e013e9d9ddfc3f604562e7136e02ef97bcc40ecd42f94a236945b6e05e014a +size 4096 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.bias.0.bin new file mode 100644 index 0000000..795c566 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1a4f5721c1c4610af7f71078f3a68c330536d679803b0e0507ee8dc10c5dfca +size 384 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.weight.0.bin new file mode 100644 index 0000000..3798642 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202256fec152abace918bd2da29d3eb9a9213920622756bee56f11162903f043 +size 12288 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.weight.bin new file mode 100644 index 0000000..d0c1df9 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.weight.0.bin new file mode 100644 index 0000000..95febcf --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1715bf310c65c00750acacf5247186e3422f4c85fde6f056ba21b380a8097b80 +size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.bias.0.bin new file mode 100644 index 0000000..8ecaa74 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b18c58c739716e76429634a61375c45b3b5cd470c22ab6d3e14cee23dd992e1 +size 148 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.weight.0.bin new file mode 100644 index 0000000..7cc16e7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:482a3307809bfb1eb7a34fc3780ed76e2dc4ba51536eef4d9d616d846db729e6 +size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.weight.bin new file mode 100644 index 0000000..d0c1df9 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.lm_head.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.lm_head.weight.bin new file mode 100644 index 0000000..0680a59 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.lm_head.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2d2ae19f874eb46f4c94c8c58930a9df564bdb9205aedcc47f0daadc14ae9a5 +size 131072 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.wte.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.wte.bin new file mode 100644 index 0000000..8398d4c --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.wte.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51a808360ffa74213dcbfac40776866c50b1bc18fc3d6993e856ae2ffa92e0d0 +size 131072