From 65743ba1b98eff7837397e4b7386e3d0aeb80ed7 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Sat, 18 Mar 2023 22:19:07 +0800 Subject: [PATCH 1/3] Remove useless --- converter/huggingface_gptj_convert.py | 253 ------------------ converter/testdata/1-gpu/config.ini | 86 ------ .../1-gpu/model.final_layernorm.bias.bin | 3 - .../1-gpu/model.final_layernorm.weight.bin | 3 - ...odel.layers.0.attention.dense.weight.0.bin | 3 - ...s.0.attention.query_key_value.weight.0.bin | 3 - .../model.layers.0.input_layernorm.bias.bin | 3 - .../model.layers.0.input_layernorm.weight.bin | 3 - .../model.layers.0.mlp.dense_4h_to_h.bias.bin | 3 - ...el.layers.0.mlp.dense_4h_to_h.weight.0.bin | 3 - ...odel.layers.0.mlp.dense_h_to_4h.bias.0.bin | 3 - ...el.layers.0.mlp.dense_h_to_4h.weight.0.bin | 3 - ...odel.layers.1.attention.dense.weight.0.bin | 3 - ...s.1.attention.query_key_value.weight.0.bin | 3 - .../model.layers.1.input_layernorm.bias.bin | 3 - .../model.layers.1.input_layernorm.weight.bin | 3 - .../model.layers.1.mlp.dense_4h_to_h.bias.bin | 3 - ...el.layers.1.mlp.dense_4h_to_h.weight.0.bin | 3 - ...odel.layers.1.mlp.dense_h_to_4h.bias.0.bin | 3 - ...el.layers.1.mlp.dense_h_to_4h.weight.0.bin | 3 - ...odel.layers.2.attention.dense.weight.0.bin | 3 - ...s.2.attention.query_key_value.weight.0.bin | 3 - .../model.layers.2.input_layernorm.bias.bin | 3 - .../model.layers.2.input_layernorm.weight.bin | 3 - .../model.layers.2.mlp.dense_4h_to_h.bias.bin | 3 - ...el.layers.2.mlp.dense_4h_to_h.weight.0.bin | 3 - ...odel.layers.2.mlp.dense_h_to_4h.bias.0.bin | 3 - ...el.layers.2.mlp.dense_h_to_4h.weight.0.bin | 3 - ...odel.layers.3.attention.dense.weight.0.bin | 3 - ...s.3.attention.query_key_value.weight.0.bin | 3 - .../model.layers.3.input_layernorm.bias.bin | 3 - .../model.layers.3.input_layernorm.weight.bin | 3 - .../model.layers.3.mlp.dense_4h_to_h.bias.bin | 3 - ...el.layers.3.mlp.dense_4h_to_h.weight.0.bin | 3 - ...odel.layers.3.mlp.dense_h_to_4h.bias.0.bin | 3 - ...el.layers.3.mlp.dense_h_to_4h.weight.0.bin | 3 - ...odel.layers.4.attention.dense.weight.0.bin | 3 - ...s.4.attention.query_key_value.weight.0.bin | 3 - .../model.layers.4.input_layernorm.bias.bin | 3 - .../model.layers.4.input_layernorm.weight.bin | 3 - .../model.layers.4.mlp.dense_4h_to_h.bias.bin | 3 - ...el.layers.4.mlp.dense_4h_to_h.weight.0.bin | 3 - ...odel.layers.4.mlp.dense_h_to_4h.bias.0.bin | 3 - ...el.layers.4.mlp.dense_h_to_4h.weight.0.bin | 3 - .../testdata/1-gpu/model.lm_head.bias.bin | 3 - .../testdata/1-gpu/model.lm_head.weight.bin | 3 - converter/testdata/1-gpu/model.wte.bin | 3 - 47 files changed, 474 deletions(-) delete mode 100644 converter/huggingface_gptj_convert.py delete mode 100644 converter/testdata/1-gpu/config.ini delete mode 100644 converter/testdata/1-gpu/model.final_layernorm.bias.bin delete mode 100644 converter/testdata/1-gpu/model.final_layernorm.weight.bin delete mode 100644 converter/testdata/1-gpu/model.layers.0.attention.dense.weight.0.bin delete mode 100644 converter/testdata/1-gpu/model.layers.0.attention.query_key_value.weight.0.bin delete mode 100644 converter/testdata/1-gpu/model.layers.0.input_layernorm.bias.bin delete mode 100644 converter/testdata/1-gpu/model.layers.0.input_layernorm.weight.bin delete mode 100644 converter/testdata/1-gpu/model.layers.0.mlp.dense_4h_to_h.bias.bin delete mode 100644 converter/testdata/1-gpu/model.layers.0.mlp.dense_4h_to_h.weight.0.bin delete mode 100644 converter/testdata/1-gpu/model.layers.0.mlp.dense_h_to_4h.bias.0.bin delete mode 100644 converter/testdata/1-gpu/model.layers.0.mlp.dense_h_to_4h.weight.0.bin delete mode 100644 converter/testdata/1-gpu/model.layers.1.attention.dense.weight.0.bin delete mode 100644 converter/testdata/1-gpu/model.layers.1.attention.query_key_value.weight.0.bin delete mode 100644 converter/testdata/1-gpu/model.layers.1.input_layernorm.bias.bin delete mode 100644 converter/testdata/1-gpu/model.layers.1.input_layernorm.weight.bin delete mode 100644 converter/testdata/1-gpu/model.layers.1.mlp.dense_4h_to_h.bias.bin delete mode 100644 converter/testdata/1-gpu/model.layers.1.mlp.dense_4h_to_h.weight.0.bin delete mode 100644 converter/testdata/1-gpu/model.layers.1.mlp.dense_h_to_4h.bias.0.bin delete mode 100644 converter/testdata/1-gpu/model.layers.1.mlp.dense_h_to_4h.weight.0.bin delete mode 100644 converter/testdata/1-gpu/model.layers.2.attention.dense.weight.0.bin delete mode 100644 converter/testdata/1-gpu/model.layers.2.attention.query_key_value.weight.0.bin delete mode 100644 converter/testdata/1-gpu/model.layers.2.input_layernorm.bias.bin delete mode 100644 converter/testdata/1-gpu/model.layers.2.input_layernorm.weight.bin delete mode 100644 converter/testdata/1-gpu/model.layers.2.mlp.dense_4h_to_h.bias.bin delete mode 100644 converter/testdata/1-gpu/model.layers.2.mlp.dense_4h_to_h.weight.0.bin delete mode 100644 converter/testdata/1-gpu/model.layers.2.mlp.dense_h_to_4h.bias.0.bin delete mode 100644 converter/testdata/1-gpu/model.layers.2.mlp.dense_h_to_4h.weight.0.bin delete mode 100644 converter/testdata/1-gpu/model.layers.3.attention.dense.weight.0.bin delete mode 100644 converter/testdata/1-gpu/model.layers.3.attention.query_key_value.weight.0.bin delete mode 100644 converter/testdata/1-gpu/model.layers.3.input_layernorm.bias.bin delete mode 100644 converter/testdata/1-gpu/model.layers.3.input_layernorm.weight.bin delete mode 100644 converter/testdata/1-gpu/model.layers.3.mlp.dense_4h_to_h.bias.bin delete mode 100644 converter/testdata/1-gpu/model.layers.3.mlp.dense_4h_to_h.weight.0.bin delete mode 100644 converter/testdata/1-gpu/model.layers.3.mlp.dense_h_to_4h.bias.0.bin delete mode 100644 converter/testdata/1-gpu/model.layers.3.mlp.dense_h_to_4h.weight.0.bin delete mode 100644 converter/testdata/1-gpu/model.layers.4.attention.dense.weight.0.bin delete mode 100644 converter/testdata/1-gpu/model.layers.4.attention.query_key_value.weight.0.bin delete mode 100644 converter/testdata/1-gpu/model.layers.4.input_layernorm.bias.bin delete mode 100644 converter/testdata/1-gpu/model.layers.4.input_layernorm.weight.bin delete mode 100644 converter/testdata/1-gpu/model.layers.4.mlp.dense_4h_to_h.bias.bin delete mode 100644 converter/testdata/1-gpu/model.layers.4.mlp.dense_4h_to_h.weight.0.bin delete mode 100644 converter/testdata/1-gpu/model.layers.4.mlp.dense_h_to_4h.bias.0.bin delete mode 100644 converter/testdata/1-gpu/model.layers.4.mlp.dense_h_to_4h.weight.0.bin delete mode 100644 converter/testdata/1-gpu/model.lm_head.bias.bin delete mode 100644 converter/testdata/1-gpu/model.lm_head.weight.bin delete mode 100644 converter/testdata/1-gpu/model.wte.bin diff --git a/converter/huggingface_gptj_convert.py b/converter/huggingface_gptj_convert.py deleted file mode 100644 index c435819..0000000 --- a/converter/huggingface_gptj_convert.py +++ /dev/null @@ -1,253 +0,0 @@ -# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved. -# Modified by Brendan Dolan-Gavitt, 2022 -# Modified by Meng Zhang, 2023 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import configparser -import multiprocessing -import os -import sys -from pathlib import Path - -import numpy as np -import torch -from transformers import GPTJForCausalLM - -dir_path = os.path.dirname(os.path.realpath(__file__)) -sys.path.append(dir_path + "/../../../..") -sys.path.append(dir_path) - - -def get_weight_data_type(data_type): - if data_type == "fp32": - return np.float32 - elif data_type == "fp16": - return np.float16 - else: - assert False, f"Invalid weight data type {data_type}" - - -def split_and_convert_process(i, saved_dir, factor, key, val): - if ( - key.find("input_layernorm.weight") != -1 - or key.find("input_layernorm.bias") != -1 - or key.find("attention.dense.bias") != -1 - or key.find("post_attention_layernorm.weight") != -1 - or key.find("post_attention_layernorm.bias") != -1 - or key.find("mlp.dense_4h_to_h.bias") != -1 - or key.find("final_layernorm.weight") != -1 - or key.find("final_layernorm.bias") != -1 - ): - - # shared weights, only need to convert the weights of rank 0 - if i == 0: - saved_path = saved_dir + "/model." + key + ".bin" - val.tofile(saved_path) - - elif ( - key.find("attention.dense.weight") != -1 - or key.find("mlp.dense_4h_to_h.weight") != -1 - ): - split_vals = np.split(val, factor, axis=0) - for j in range(factor): - saved_path = saved_dir + "/model." + key + ".%d.bin" % (i * factor + j) - split_vals[j].tofile(saved_path) - - elif ( - key.find("mlp.dense_h_to_4h.weight") != -1 - or key.find("mlp.dense_h_to_4h.bias") != -1 - ): - - split_vals = np.split(val, factor, axis=-1) - for j in range(factor): - saved_path = saved_dir + "/model." + key + ".%d.bin" % (i * factor + j) - split_vals[j].tofile(saved_path) - - elif key.find("attention.query_key_value.weight") != -1: - split_vals = np.split(val, factor, axis=-1) - - for j in range(factor): - saved_path = saved_dir + "/model." + key + ".%d.bin" % (i * factor + j) - split_vals[j].tofile(saved_path) - - else: - print("[ERROR] cannot find key '{}'".format(key)) - - -def split_and_convert(args): - saved_dir = args.saved_dir + "/%d-gpu/" % args.infer_gpu_num - - if os.path.exists(saved_dir) is False: - os.makedirs(saved_dir) - - t_gpu_num = args.trained_gpu_num - i_gpu_num = args.infer_gpu_num - assert i_gpu_num % t_gpu_num == 0 - - factor = (int)(i_gpu_num / t_gpu_num) - - model = GPTJForCausalLM.from_pretrained(args.in_file) - - try: - config = configparser.ConfigParser() - config["gpt"] = {} - for key in vars(args): - config["gpt"][key] = f"{vars(args)[key]}" - for k, v in vars(model.config).items(): - config["gpt"][k] = f"{v}" - config["gpt"]["weight_data_type"] = args.weight_data_type - with open((Path(saved_dir) / "config.ini").as_posix(), "w") as configfile: - config.write(configfile) - except Exception: - print("Fail to save the config in config.ini.") - np_weight_data_type = get_weight_data_type(args.weight_data_type) - - huggingface_model_name_pattern = [ - "ln_1.bias", - "ln_1.weight", - "attn.q_proj.weight", - "attn.out_proj.weight", - "mlp.fc_in.bias", - "mlp.fc_in.weight", - "mlp.fc_out.bias", - "mlp.fc_out.weight", - ] - - ft_model_name_pattern = [ - "input_layernorm.bias", - "input_layernorm.weight", - "attention.query_key_value.weight", - "attention.dense.weight", - "mlp.dense_h_to_4h.bias", - "mlp.dense_h_to_4h.weight", - "mlp.dense_4h_to_h.bias", - "mlp.dense_4h_to_h.weight", - ] - - torch.multiprocessing.set_start_method("spawn") - pool = multiprocessing.Pool(args.processes) - for name, param in model.named_parameters(): - if name.find("weight") == -1 and name.find("bias") == -1: - continue - print(name) - if name == "transformer.wte.weight": - param.detach().cpu().numpy().astype(np_weight_data_type).tofile( - saved_dir + "model.wte.bin" - ) - elif name == "transformer.ln_f.bias": - param.detach().cpu().numpy().astype(np_weight_data_type).tofile( - saved_dir + "model.final_layernorm.bias.bin" - ) - elif name == "transformer.ln_f.weight": - param.detach().cpu().numpy().astype(np_weight_data_type).tofile( - saved_dir + "model.final_layernorm.weight.bin" - ) - elif name == "lm_head.weight": - param.detach().cpu().numpy().astype(np_weight_data_type).tofile( - saved_dir + "model.lm_head.weight.bin" - ) - elif name == "lm_head.bias": - param.detach().cpu().numpy().astype(np_weight_data_type).tofile( - saved_dir + "model.lm_head.bias.bin" - ) - else: - for i in range(len(huggingface_model_name_pattern)): - if name.find(huggingface_model_name_pattern[i]) != -1: - # Special case for QKV weights - if name.find("attn.q_proj.weight") != -1: - layer = name.split(".")[2] - base_k = f"transformer.h.{layer}." - w = model.state_dict() - QKV_w = torch.stack( - [ - w[base_k + "attn.q_proj.weight"], - w[base_k + "attn.k_proj.weight"], - w[base_k + "attn.v_proj.weight"], - ] - ) # [qkv, n_heads * dim_head, latent_space] - QKV_w = QKV_w.permute(2, 0, 1) - weights = ( - QKV_w.detach().cpu().numpy().astype(np_weight_data_type) - ) - else: - weights = ( - param.detach().cpu().numpy().astype(np_weight_data_type) - ) - - # Some weights need to be transposed - if ( - name.find("mlp.fc_in.weight") != -1 - or name.find("mlp.fc_out.weight") != -1 - or name.find("attn.out_proj.weight") != -1 - ): - weights = weights.T - - new_name = name.replace("transformer.h.", "layers.").replace( - huggingface_model_name_pattern[i], ft_model_name_pattern[i] - ) - - pool.starmap( - split_and_convert_process, - [(0, saved_dir, factor, new_name, weights)], - ) - - pool.close() - pool.join() - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) - parser.add_argument( - "-saved_dir", "-o", type=str, help="file name of output file", required=True - ) - parser.add_argument( - "-in_file", "-i", type=str, help="HF model name or directory", required=True - ) - parser.add_argument( - "-trained_gpu_num", - "-t_g", - type=int, - help="How many gpus for training", - default=1, - ) - parser.add_argument( - "-infer_gpu_num", - "-i_g", - type=int, - help="How many gpus for inference", - required=True, - ) - parser.add_argument( - "-processes", - "-p", - type=int, - help="How many processes to spawn for conversion (default: 4)", - default=4, - ) - parser.add_argument( - "-weight_data_type", - type=str, - default="fp32", - choices=["fp32", "fp16"], - help="output weight data type", - ) - - args = parser.parse_args() - print("\n=============== Argument ===============") - for key in vars(args): - print("{}: {}".format(key, vars(args)[key])) - print("========================================") - - split_and_convert(args) diff --git a/converter/testdata/1-gpu/config.ini b/converter/testdata/1-gpu/config.ini deleted file mode 100644 index 975a385..0000000 --- a/converter/testdata/1-gpu/config.ini +++ /dev/null @@ -1,86 +0,0 @@ -[gpt] -saved_dir = out -in_file = hf-internal-testing/tiny-random-gptj -trained_gpu_num = 1 -infer_gpu_num = 1 -processes = 4 -weight_data_type = fp32 -vocab_size = 1000 -n_positions = 512 -n_embd = 32 -n_layer = 5 -n_head = 4 -n_inner = None -rotary_dim = 4 -activation_function = gelu_new -resid_pdrop = 0.0 -embd_pdrop = 0.0 -attn_pdrop = 0.0 -layer_norm_epsilon = 1e-05 -initializer_range = 0.02 -use_cache = True -bos_token_id = 98 -eos_token_id = 98 -return_dict = True -output_hidden_states = False -output_attentions = False -torchscript = False -torch_dtype = None -use_bfloat16 = False -tf_legacy_loss = False -pruned_heads = {} -tie_word_embeddings = False -is_encoder_decoder = False -is_decoder = False -cross_attention_hidden_size = None -add_cross_attention = False -tie_encoder_decoder = False -max_length = 20 -min_length = 0 -do_sample = False -early_stopping = False -num_beams = 1 -num_beam_groups = 1 -diversity_penalty = 0.0 -temperature = 1.0 -top_k = 50 -top_p = 1.0 -typical_p = 1.0 -repetition_penalty = 1.0 -length_penalty = 1.0 -no_repeat_ngram_size = 0 -encoder_no_repeat_ngram_size = 0 -bad_words_ids = None -num_return_sequences = 1 -chunk_size_feed_forward = 0 -output_scores = False -return_dict_in_generate = False -forced_bos_token_id = None -forced_eos_token_id = None -remove_invalid_values = False -exponential_decay_length_penalty = None -suppress_tokens = None -begin_suppress_tokens = None -architectures = None -finetuning_task = None -id2label = {0: 'LABEL_0', 1: 'LABEL_1'} -label2id = {'LABEL_0': 0, 'LABEL_1': 1} -tokenizer_class = None -prefix = None -pad_token_id = 98 -sep_token_id = None -decoder_start_token_id = None -task_specific_params = None -problem_type = None -_name_or_path = hf-internal-testing/tiny-random-gptj -_commit_hash = b96595a4bcdeb272096214589efa0314259853a0 -transformers_version = 4.11.0.dev0 -attention_probs_dropout_prob = 0.0 -gradient_checkpointing = False -hidden_act = gelu -hidden_dropout_prob = 0.0 -intermediate_size = 37 -model_type = gptj -n_ctx = 512 -scale_attn_weights = True -type_vocab_size = 16 diff --git a/converter/testdata/1-gpu/model.final_layernorm.bias.bin b/converter/testdata/1-gpu/model.final_layernorm.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/1-gpu/model.final_layernorm.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/1-gpu/model.final_layernorm.weight.bin b/converter/testdata/1-gpu/model.final_layernorm.weight.bin deleted file mode 100644 index d0c1df9..0000000 --- a/converter/testdata/1-gpu/model.final_layernorm.weight.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 -size 128 diff --git a/converter/testdata/1-gpu/model.layers.0.attention.dense.weight.0.bin b/converter/testdata/1-gpu/model.layers.0.attention.dense.weight.0.bin deleted file mode 100644 index 48a4419..0000000 --- a/converter/testdata/1-gpu/model.layers.0.attention.dense.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:125bd07dc5e7a5c6444f7689ba78df0f4c7959b6dcfbaf7c89edd0634a147ea0 -size 4096 diff --git a/converter/testdata/1-gpu/model.layers.0.attention.query_key_value.weight.0.bin b/converter/testdata/1-gpu/model.layers.0.attention.query_key_value.weight.0.bin deleted file mode 100644 index 33c2eca..0000000 --- a/converter/testdata/1-gpu/model.layers.0.attention.query_key_value.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:11fd52f2b94fad5fe54e2e03c5848050703ec4d798a43a4c1813f109a6703883 -size 12288 diff --git a/converter/testdata/1-gpu/model.layers.0.input_layernorm.bias.bin b/converter/testdata/1-gpu/model.layers.0.input_layernorm.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/1-gpu/model.layers.0.input_layernorm.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/1-gpu/model.layers.0.input_layernorm.weight.bin b/converter/testdata/1-gpu/model.layers.0.input_layernorm.weight.bin deleted file mode 100644 index d0c1df9..0000000 --- a/converter/testdata/1-gpu/model.layers.0.input_layernorm.weight.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 -size 128 diff --git a/converter/testdata/1-gpu/model.layers.0.mlp.dense_4h_to_h.bias.bin b/converter/testdata/1-gpu/model.layers.0.mlp.dense_4h_to_h.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/1-gpu/model.layers.0.mlp.dense_4h_to_h.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/1-gpu/model.layers.0.mlp.dense_4h_to_h.weight.0.bin b/converter/testdata/1-gpu/model.layers.0.mlp.dense_4h_to_h.weight.0.bin deleted file mode 100644 index 4da1133..0000000 --- a/converter/testdata/1-gpu/model.layers.0.mlp.dense_4h_to_h.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c7459667751237cef87ba33e5faf46e82ea33f7f53aaae7bf95a8667b6f9e639 -size 16384 diff --git a/converter/testdata/1-gpu/model.layers.0.mlp.dense_h_to_4h.bias.0.bin b/converter/testdata/1-gpu/model.layers.0.mlp.dense_h_to_4h.bias.0.bin deleted file mode 100644 index c30ec76..0000000 --- a/converter/testdata/1-gpu/model.layers.0.mlp.dense_h_to_4h.bias.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:076a27c79e5ace2a3d47f9dd2e83e4ff6ea8872b3c2218f66c92b89b55f36560 -size 512 diff --git a/converter/testdata/1-gpu/model.layers.0.mlp.dense_h_to_4h.weight.0.bin b/converter/testdata/1-gpu/model.layers.0.mlp.dense_h_to_4h.weight.0.bin deleted file mode 100644 index 668eaee..0000000 --- a/converter/testdata/1-gpu/model.layers.0.mlp.dense_h_to_4h.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:55de52e2d73e7e92962bc02673a7dce6123d77e486ac4f8ce7bead1a6727d227 -size 16384 diff --git a/converter/testdata/1-gpu/model.layers.1.attention.dense.weight.0.bin b/converter/testdata/1-gpu/model.layers.1.attention.dense.weight.0.bin deleted file mode 100644 index 97a8f58..0000000 --- a/converter/testdata/1-gpu/model.layers.1.attention.dense.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ec12f3d7c73cbc440b476ad66bb4a1ef43881c4d740e8114dc1578bb0197a17e -size 4096 diff --git a/converter/testdata/1-gpu/model.layers.1.attention.query_key_value.weight.0.bin b/converter/testdata/1-gpu/model.layers.1.attention.query_key_value.weight.0.bin deleted file mode 100644 index 33b977e..0000000 --- a/converter/testdata/1-gpu/model.layers.1.attention.query_key_value.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:86d3c23c240260084ac27bd98d52524f0b3559d8106839d85bc927b44750bd81 -size 12288 diff --git a/converter/testdata/1-gpu/model.layers.1.input_layernorm.bias.bin b/converter/testdata/1-gpu/model.layers.1.input_layernorm.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/1-gpu/model.layers.1.input_layernorm.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/1-gpu/model.layers.1.input_layernorm.weight.bin b/converter/testdata/1-gpu/model.layers.1.input_layernorm.weight.bin deleted file mode 100644 index d0c1df9..0000000 --- a/converter/testdata/1-gpu/model.layers.1.input_layernorm.weight.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 -size 128 diff --git a/converter/testdata/1-gpu/model.layers.1.mlp.dense_4h_to_h.bias.bin b/converter/testdata/1-gpu/model.layers.1.mlp.dense_4h_to_h.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/1-gpu/model.layers.1.mlp.dense_4h_to_h.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/1-gpu/model.layers.1.mlp.dense_4h_to_h.weight.0.bin b/converter/testdata/1-gpu/model.layers.1.mlp.dense_4h_to_h.weight.0.bin deleted file mode 100644 index d6e7188..0000000 --- a/converter/testdata/1-gpu/model.layers.1.mlp.dense_4h_to_h.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:75338924b49fb652d556c260013520b58ca70c8bd782bf59732c5ca8d1de111d -size 16384 diff --git a/converter/testdata/1-gpu/model.layers.1.mlp.dense_h_to_4h.bias.0.bin b/converter/testdata/1-gpu/model.layers.1.mlp.dense_h_to_4h.bias.0.bin deleted file mode 100644 index c30ec76..0000000 --- a/converter/testdata/1-gpu/model.layers.1.mlp.dense_h_to_4h.bias.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:076a27c79e5ace2a3d47f9dd2e83e4ff6ea8872b3c2218f66c92b89b55f36560 -size 512 diff --git a/converter/testdata/1-gpu/model.layers.1.mlp.dense_h_to_4h.weight.0.bin b/converter/testdata/1-gpu/model.layers.1.mlp.dense_h_to_4h.weight.0.bin deleted file mode 100644 index 622a346..0000000 --- a/converter/testdata/1-gpu/model.layers.1.mlp.dense_h_to_4h.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:be97681ddf43d42ee25d0439c6958edac3549b0edda8a4e3e8bce4275916bb7c -size 16384 diff --git a/converter/testdata/1-gpu/model.layers.2.attention.dense.weight.0.bin b/converter/testdata/1-gpu/model.layers.2.attention.dense.weight.0.bin deleted file mode 100644 index 9fef556..0000000 --- a/converter/testdata/1-gpu/model.layers.2.attention.dense.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:52bfbb09f85dba050a997ddd1869ae74aa8e76a70a63fd4b7c047531128bec07 -size 4096 diff --git a/converter/testdata/1-gpu/model.layers.2.attention.query_key_value.weight.0.bin b/converter/testdata/1-gpu/model.layers.2.attention.query_key_value.weight.0.bin deleted file mode 100644 index f84cc8b..0000000 --- a/converter/testdata/1-gpu/model.layers.2.attention.query_key_value.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:869b24c7a494832730ba41e0286966b23e700e4239d39198e78551a7ef5e8d7f -size 12288 diff --git a/converter/testdata/1-gpu/model.layers.2.input_layernorm.bias.bin b/converter/testdata/1-gpu/model.layers.2.input_layernorm.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/1-gpu/model.layers.2.input_layernorm.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/1-gpu/model.layers.2.input_layernorm.weight.bin b/converter/testdata/1-gpu/model.layers.2.input_layernorm.weight.bin deleted file mode 100644 index d0c1df9..0000000 --- a/converter/testdata/1-gpu/model.layers.2.input_layernorm.weight.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 -size 128 diff --git a/converter/testdata/1-gpu/model.layers.2.mlp.dense_4h_to_h.bias.bin b/converter/testdata/1-gpu/model.layers.2.mlp.dense_4h_to_h.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/1-gpu/model.layers.2.mlp.dense_4h_to_h.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/1-gpu/model.layers.2.mlp.dense_4h_to_h.weight.0.bin b/converter/testdata/1-gpu/model.layers.2.mlp.dense_4h_to_h.weight.0.bin deleted file mode 100644 index a71148a..0000000 --- a/converter/testdata/1-gpu/model.layers.2.mlp.dense_4h_to_h.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:029775363050d9e4c232301274a8085474ca7992a42cabc1112ff46938c5a178 -size 16384 diff --git a/converter/testdata/1-gpu/model.layers.2.mlp.dense_h_to_4h.bias.0.bin b/converter/testdata/1-gpu/model.layers.2.mlp.dense_h_to_4h.bias.0.bin deleted file mode 100644 index c30ec76..0000000 --- a/converter/testdata/1-gpu/model.layers.2.mlp.dense_h_to_4h.bias.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:076a27c79e5ace2a3d47f9dd2e83e4ff6ea8872b3c2218f66c92b89b55f36560 -size 512 diff --git a/converter/testdata/1-gpu/model.layers.2.mlp.dense_h_to_4h.weight.0.bin b/converter/testdata/1-gpu/model.layers.2.mlp.dense_h_to_4h.weight.0.bin deleted file mode 100644 index 63df77d..0000000 --- a/converter/testdata/1-gpu/model.layers.2.mlp.dense_h_to_4h.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:95beee5255e9d3d5d255ebc6d3f18465e1ab005b7e33e3d0d6495eb7a6178eed -size 16384 diff --git a/converter/testdata/1-gpu/model.layers.3.attention.dense.weight.0.bin b/converter/testdata/1-gpu/model.layers.3.attention.dense.weight.0.bin deleted file mode 100644 index dfc1df1..0000000 --- a/converter/testdata/1-gpu/model.layers.3.attention.dense.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7af1303e224fc1185c5d936ca8fb3ab336e46f1bd09c94f1e749936d0b023713 -size 4096 diff --git a/converter/testdata/1-gpu/model.layers.3.attention.query_key_value.weight.0.bin b/converter/testdata/1-gpu/model.layers.3.attention.query_key_value.weight.0.bin deleted file mode 100644 index 2603376..0000000 --- a/converter/testdata/1-gpu/model.layers.3.attention.query_key_value.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:55961585790611913b727f6043af3589274c4b2350ea68bcacf25009e750cc37 -size 12288 diff --git a/converter/testdata/1-gpu/model.layers.3.input_layernorm.bias.bin b/converter/testdata/1-gpu/model.layers.3.input_layernorm.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/1-gpu/model.layers.3.input_layernorm.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/1-gpu/model.layers.3.input_layernorm.weight.bin b/converter/testdata/1-gpu/model.layers.3.input_layernorm.weight.bin deleted file mode 100644 index d0c1df9..0000000 --- a/converter/testdata/1-gpu/model.layers.3.input_layernorm.weight.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 -size 128 diff --git a/converter/testdata/1-gpu/model.layers.3.mlp.dense_4h_to_h.bias.bin b/converter/testdata/1-gpu/model.layers.3.mlp.dense_4h_to_h.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/1-gpu/model.layers.3.mlp.dense_4h_to_h.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/1-gpu/model.layers.3.mlp.dense_4h_to_h.weight.0.bin b/converter/testdata/1-gpu/model.layers.3.mlp.dense_4h_to_h.weight.0.bin deleted file mode 100644 index 01f32ad..0000000 --- a/converter/testdata/1-gpu/model.layers.3.mlp.dense_4h_to_h.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9f37f3e28a6e716d08d29d02126854bd6e6248763b9a30af2f2e1dcf7b8fd9a5 -size 16384 diff --git a/converter/testdata/1-gpu/model.layers.3.mlp.dense_h_to_4h.bias.0.bin b/converter/testdata/1-gpu/model.layers.3.mlp.dense_h_to_4h.bias.0.bin deleted file mode 100644 index c30ec76..0000000 --- a/converter/testdata/1-gpu/model.layers.3.mlp.dense_h_to_4h.bias.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:076a27c79e5ace2a3d47f9dd2e83e4ff6ea8872b3c2218f66c92b89b55f36560 -size 512 diff --git a/converter/testdata/1-gpu/model.layers.3.mlp.dense_h_to_4h.weight.0.bin b/converter/testdata/1-gpu/model.layers.3.mlp.dense_h_to_4h.weight.0.bin deleted file mode 100644 index 10fcdeb..0000000 --- a/converter/testdata/1-gpu/model.layers.3.mlp.dense_h_to_4h.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d903dfb50cce4eae673a2d96a7814353ac1436c0c4d65490bb3ed7d8f8cfc586 -size 16384 diff --git a/converter/testdata/1-gpu/model.layers.4.attention.dense.weight.0.bin b/converter/testdata/1-gpu/model.layers.4.attention.dense.weight.0.bin deleted file mode 100644 index 2d07ce1..0000000 --- a/converter/testdata/1-gpu/model.layers.4.attention.dense.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cb76390eda7af1d23d5b162446b4c5da0b791ec337d20d9a643b9716377ae514 -size 4096 diff --git a/converter/testdata/1-gpu/model.layers.4.attention.query_key_value.weight.0.bin b/converter/testdata/1-gpu/model.layers.4.attention.query_key_value.weight.0.bin deleted file mode 100644 index b0008a0..0000000 --- a/converter/testdata/1-gpu/model.layers.4.attention.query_key_value.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f54ff65f519de2db6343ec257da92091162381082fbb5dba04df50d8e12a158 -size 12288 diff --git a/converter/testdata/1-gpu/model.layers.4.input_layernorm.bias.bin b/converter/testdata/1-gpu/model.layers.4.input_layernorm.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/1-gpu/model.layers.4.input_layernorm.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/1-gpu/model.layers.4.input_layernorm.weight.bin b/converter/testdata/1-gpu/model.layers.4.input_layernorm.weight.bin deleted file mode 100644 index d0c1df9..0000000 --- a/converter/testdata/1-gpu/model.layers.4.input_layernorm.weight.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 -size 128 diff --git a/converter/testdata/1-gpu/model.layers.4.mlp.dense_4h_to_h.bias.bin b/converter/testdata/1-gpu/model.layers.4.mlp.dense_4h_to_h.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/1-gpu/model.layers.4.mlp.dense_4h_to_h.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/1-gpu/model.layers.4.mlp.dense_4h_to_h.weight.0.bin b/converter/testdata/1-gpu/model.layers.4.mlp.dense_4h_to_h.weight.0.bin deleted file mode 100644 index 41b7c12..0000000 --- a/converter/testdata/1-gpu/model.layers.4.mlp.dense_4h_to_h.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:67e7225a2de28f7e21a72b7ea1cab517071744c53d13f4c6964cbc27f4d261e4 -size 16384 diff --git a/converter/testdata/1-gpu/model.layers.4.mlp.dense_h_to_4h.bias.0.bin b/converter/testdata/1-gpu/model.layers.4.mlp.dense_h_to_4h.bias.0.bin deleted file mode 100644 index c30ec76..0000000 --- a/converter/testdata/1-gpu/model.layers.4.mlp.dense_h_to_4h.bias.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:076a27c79e5ace2a3d47f9dd2e83e4ff6ea8872b3c2218f66c92b89b55f36560 -size 512 diff --git a/converter/testdata/1-gpu/model.layers.4.mlp.dense_h_to_4h.weight.0.bin b/converter/testdata/1-gpu/model.layers.4.mlp.dense_h_to_4h.weight.0.bin deleted file mode 100644 index 975aa56..0000000 --- a/converter/testdata/1-gpu/model.layers.4.mlp.dense_h_to_4h.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cb7582613a3b40b73ac166af2e6b9773511cf6fba7bec12f340119f588c8ea48 -size 16384 diff --git a/converter/testdata/1-gpu/model.lm_head.bias.bin b/converter/testdata/1-gpu/model.lm_head.bias.bin deleted file mode 100644 index fb155d8..0000000 --- a/converter/testdata/1-gpu/model.lm_head.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fc19b1997119425765295aeab72d76faa6927d4f83985d328c26f20468d6cc76 -size 4000 diff --git a/converter/testdata/1-gpu/model.lm_head.weight.bin b/converter/testdata/1-gpu/model.lm_head.weight.bin deleted file mode 100644 index 530ff0d..0000000 --- a/converter/testdata/1-gpu/model.lm_head.weight.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:884fae8e52776e900d3800048825503048c95ab2850c38c1a595c6da962d3286 -size 128000 diff --git a/converter/testdata/1-gpu/model.wte.bin b/converter/testdata/1-gpu/model.wte.bin deleted file mode 100644 index c815f00..0000000 --- a/converter/testdata/1-gpu/model.wte.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d0d0fec061d1987d0f19acbd319c3471037691cebecd5aa2b00cedcdc2c6177c -size 128000 From beddfc0f7ff1ff05f9f64cdbe534bd59e76dd80f Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Sat, 18 Mar 2023 22:58:53 +0800 Subject: [PATCH 2/3] Add GPTNeoX convert script --- converter/huggingface_gptneox_convert.py | 334 ++++++++++++++++++ .../fastertransformer/1/1-gpu/config.ini | 12 + .../1/1-gpu/model.final_layernorm.bias.bin | 3 + .../1/1-gpu/model.final_layernorm.weight.bin | 3 + .../model.layers.0.attention.dense.bias.bin | 3 + ...odel.layers.0.attention.dense.weight.0.bin | 3 + ...ers.0.attention.query_key_value.bias.0.bin | 3 + ...s.0.attention.query_key_value.weight.0.bin | 3 + .../model.layers.0.input_layernorm.bias.bin | 3 + .../model.layers.0.input_layernorm.weight.bin | 3 + .../model.layers.0.mlp.dense_4h_to_h.bias.bin | 3 + ...el.layers.0.mlp.dense_4h_to_h.weight.0.bin | 3 + ...odel.layers.0.mlp.dense_h_to_4h.bias.0.bin | 3 + ...el.layers.0.mlp.dense_h_to_4h.weight.0.bin | 3 + ...layers.0.post_attention_layernorm.bias.bin | 3 + ...yers.0.post_attention_layernorm.weight.bin | 3 + .../model.layers.1.attention.dense.bias.bin | 3 + ...odel.layers.1.attention.dense.weight.0.bin | 3 + ...ers.1.attention.query_key_value.bias.0.bin | 3 + ...s.1.attention.query_key_value.weight.0.bin | 3 + .../model.layers.1.input_layernorm.bias.bin | 3 + .../model.layers.1.input_layernorm.weight.bin | 3 + .../model.layers.1.mlp.dense_4h_to_h.bias.bin | 3 + ...el.layers.1.mlp.dense_4h_to_h.weight.0.bin | 3 + ...odel.layers.1.mlp.dense_h_to_4h.bias.0.bin | 3 + ...el.layers.1.mlp.dense_h_to_4h.weight.0.bin | 3 + ...layers.1.post_attention_layernorm.bias.bin | 3 + ...yers.1.post_attention_layernorm.weight.bin | 3 + .../model.layers.2.attention.dense.bias.bin | 3 + ...odel.layers.2.attention.dense.weight.0.bin | 3 + ...ers.2.attention.query_key_value.bias.0.bin | 3 + ...s.2.attention.query_key_value.weight.0.bin | 3 + .../model.layers.2.input_layernorm.bias.bin | 3 + .../model.layers.2.input_layernorm.weight.bin | 3 + .../model.layers.2.mlp.dense_4h_to_h.bias.bin | 3 + ...el.layers.2.mlp.dense_4h_to_h.weight.0.bin | 3 + ...odel.layers.2.mlp.dense_h_to_4h.bias.0.bin | 3 + ...el.layers.2.mlp.dense_h_to_4h.weight.0.bin | 3 + ...layers.2.post_attention_layernorm.bias.bin | 3 + ...yers.2.post_attention_layernorm.weight.bin | 3 + .../model.layers.3.attention.dense.bias.bin | 3 + ...odel.layers.3.attention.dense.weight.0.bin | 3 + ...ers.3.attention.query_key_value.bias.0.bin | 3 + ...s.3.attention.query_key_value.weight.0.bin | 3 + .../model.layers.3.input_layernorm.bias.bin | 3 + .../model.layers.3.input_layernorm.weight.bin | 3 + .../model.layers.3.mlp.dense_4h_to_h.bias.bin | 3 + ...el.layers.3.mlp.dense_4h_to_h.weight.0.bin | 3 + ...odel.layers.3.mlp.dense_h_to_4h.bias.0.bin | 3 + ...el.layers.3.mlp.dense_h_to_4h.weight.0.bin | 3 + ...layers.3.post_attention_layernorm.bias.bin | 3 + ...yers.3.post_attention_layernorm.weight.bin | 3 + .../model.layers.4.attention.dense.bias.bin | 3 + ...odel.layers.4.attention.dense.weight.0.bin | 3 + ...ers.4.attention.query_key_value.bias.0.bin | 3 + ...s.4.attention.query_key_value.weight.0.bin | 3 + .../model.layers.4.input_layernorm.bias.bin | 3 + .../model.layers.4.input_layernorm.weight.bin | 3 + .../model.layers.4.mlp.dense_4h_to_h.bias.bin | 3 + ...el.layers.4.mlp.dense_4h_to_h.weight.0.bin | 3 + ...odel.layers.4.mlp.dense_h_to_4h.bias.0.bin | 3 + ...el.layers.4.mlp.dense_h_to_4h.weight.0.bin | 3 + ...layers.4.post_attention_layernorm.bias.bin | 3 + ...yers.4.post_attention_layernorm.weight.bin | 3 + .../1/1-gpu/model.lm_head.weight.bin | 3 + .../fastertransformer/1/1-gpu/model.wte.bin | 3 + 66 files changed, 538 insertions(+) create mode 100644 converter/huggingface_gptneox_convert.py create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/config.ini create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.final_layernorm.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.final_layernorm.weight.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.dense.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.dense.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.bias.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.weight.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.bias.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.weight.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.dense.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.dense.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.bias.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.weight.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.bias.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.weight.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.dense.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.dense.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.bias.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.weight.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.bias.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.weight.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.dense.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.dense.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.bias.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.weight.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.bias.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.weight.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.dense.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.dense.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.bias.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.weight.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.bias.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.weight.0.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.bias.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.weight.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.lm_head.weight.bin create mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.wte.bin diff --git a/converter/huggingface_gptneox_convert.py b/converter/huggingface_gptneox_convert.py new file mode 100644 index 0000000..02b79fb --- /dev/null +++ b/converter/huggingface_gptneox_convert.py @@ -0,0 +1,334 @@ +import argparse +import configparser +import multiprocessing +import os +import sys +from pathlib import Path + +import numpy as np +import torch +from transformers import GPTNeoXForCausalLM # 4.21.1 + + +def get_weight_data_type(data_type): + if data_type == "fp32": + return np.float32 + elif data_type == "fp16": + return np.float16 + else: + assert False, f"Invalid weight data type {data_type}" + + +def prefix_prompt_convert(args, config, weight_data_type): + + saved_dir = args.saved_dir + "/%d-gpu/" % args.infer_gpu_num + + prompt_in_file_list = args.prompt_in_file_list.split(",") + + task_list = [] + for idx, prompt_in_file in enumerate(prompt_in_file_list): + weights = torch.load(prompt_in_file) + task_name = prompt_in_file.split("/")[-1].split(".")[-3] + + total_size = weights.nelement() + n_layers = config["num_hidden_layers"] + n_head = config["num_heads"] + size_per_head = config["hidden_size"] // n_head + prefix_prompt_len = total_size // (2 * n_layers * n_head * size_per_head) + + task_list.append((task_name, prefix_prompt_len)) + # GPT NeoX + weights = weights.view( + prefix_prompt_len, n_layers, 2, n_head, size_per_head + ) ## prefix_seq_len, num_layers, 2, num_heads, size_per_head + # weights=weights.view(prefix_prompt_len,28,2,16,256) ## prefix_seq_len, num_layers, 2, num_heads, size_per_head + weights = weights.permute( + 1, 2, 3, 0, 4 + ) ## num_layers, 2, num_heads, perfix_seq_len, size_per_head + local_head_num = n_head // args.infer_gpu_num + weights_split = torch.split(weights, local_head_num, dim=2) + for i in range(args.infer_gpu_num): + output_file_path = ( + saved_dir + + "/model.prefix_prompt." + + task_name + + ".weight." + + str(i) + + ".bin" + ) + weights_split[i].detach().cpu().numpy().astype(weight_data_type).tofile( + output_file_path + ) + + return task_list + + +def split_and_convert_process(i, saved_dir, factor, key, args, config, val): + + if ( + key.find("input_layernorm.weight") != -1 + or key.find("input_layernorm.bias") != -1 + or key.find("attention.dense.bias") != -1 + or key.find("post_attention_layernorm.weight") != -1 + or key.find("post_attention_layernorm.bias") != -1 + or key.find("mlp.dense_4h_to_h.bias") != -1 + or key.find("final_layernorm.weight") != -1 + or key.find("final_layernorm.bias") != -1 + ): + + # shared weights, only need to convert the weights of rank 0 + if i == 0: + saved_path = saved_dir + "/model." + key + ".bin" + val.tofile(saved_path) + + elif ( + key.find("attention.dense.weight") != -1 + or key.find("mlp.dense_4h_to_h.weight") != -1 + ): + split_vals = np.split(val, factor, axis=0) + for j in range(factor): + saved_path = saved_dir + "/model." + key + ".%d.bin" % (i * factor + j) + split_vals[j].tofile(saved_path) + + elif ( + key.find("mlp.dense_h_to_4h.weight") != -1 + or key.find("mlp.dense_h_to_4h.bias") != -1 + ): + + split_vals = np.split(val, factor, axis=-1) + for j in range(factor): + saved_path = saved_dir + "/model." + key + ".%d.bin" % (i * factor + j) + split_vals[j].tofile(saved_path) + + elif key.find("attention.query_key_value.bias") != -1: + local_dim = (int)(val.shape[-1] / 3) + n_head = config["num_attention_heads"] + + val = val.reshape(n_head, 3, local_dim // n_head) + val = np.transpose(val, [1, 0, 2]).reshape(3, local_dim) + split_vals = np.split(val, factor, axis=-1) + + for j in range(factor): + saved_path = saved_dir + "/model." + key + ".%d.bin" % (i * factor + j) + split_vals[j].tofile(saved_path) + + elif key.find("attention.query_key_value.weight") != -1: + hidden_dim = val.shape[0] + local_dim = (int)(val.shape[-1] / 3) + n_head = config["num_attention_heads"] + # Note that the HF qkv weight are stored as [hidden_size, num_heads, 3, head_hidden] + # FT needs the shape of [hidden_size, 3, num_heads, head_hidden] + val = val.reshape(hidden_dim, n_head, 3, local_dim // n_head) + val = np.transpose(val, [0, 2, 1, 3]).reshape(hidden_dim, 3, local_dim) + + # print(np.mean(np.abs(val[:, 0, :]))) + split_vals = np.split(val, factor, axis=-1) + + for j in range(factor): + saved_path = saved_dir + "/model." + key + ".%d.bin" % (i * factor + j) + split_vals[j].tofile(saved_path) + + else: + print("[ERROR] cannot find key '{}'".format(key)) + + +def split_and_convert(args): + saved_dir = args.saved_dir + "/%d-gpu/" % args.infer_gpu_num + + if os.path.exists(saved_dir) == False: + os.makedirs(saved_dir) + ckpt_name = args.in_file + + t_gpu_num = args.trained_gpu_num + i_gpu_num = args.infer_gpu_num + assert i_gpu_num % t_gpu_num == 0 + + factor = (int)(i_gpu_num / t_gpu_num) + + # load position_embedding from rank 0 + # model = torch.load(ckpt_name) + model = GPTNeoXForCausalLM.from_pretrained(args.in_file) + hf_config = vars(model.config) + if "gpt_j_residual" not in hf_config: + hf_config["gpt_j_residual"] = 0 + + np_weight_data_type = get_weight_data_type(args.weight_data_type) + + task_list = [] + if args.prompt_in_file_list is not None: + task_list = prefix_prompt_convert(args, hf_config, np_weight_data_type) + + try: + model_name = args.model_name + config = configparser.ConfigParser() + config["gptneox"] = {} + config["gptneox"]["model_name"] = model_name + config["gptneox"]["head_num"] = str(hf_config["num_attention_heads"]) + n_embd = hf_config["hidden_size"] + config["gptneox"]["size_per_head"] = str( + n_embd // hf_config["num_attention_heads"] + ) + config["gptneox"]["inter_size"] = str(n_embd * 4) + config["gptneox"]["num_layer"] = str(hf_config["num_hidden_layers"]) + if "rotary_dim" in hf_config: + rotary_dim = hf_config["rotary_dim"] + else: + rotary_dim = n_embd // hf_config["num_attention_heads"] + config["gptneox"]["rotary_embedding"] = str(rotary_dim) + config["gptneox"]["vocab_size"] = str(hf_config["vocab_size"]) + config["gptneox"]["start_id"] = str(hf_config["bos_token_id"]) + config["gptneox"]["end_id"] = str(hf_config["eos_token_id"]) + config["gptneox"]["use_gptj_residual"] = str(int(hf_config["gpt_j_residual"])) + config["gptneox"]["weight_data_type"] = args.weight_data_type + + if len(task_list) > 0: + config["gptneox"]["num_tasks"] = str(len(task_list)) + config["gptneox"]["prompt_learning_type"] = str(2) + for idx, (task_name, prompt_length) in enumerate(task_list): + config[f"task_{idx}"] = {} + config[f"task_{idx}"]["task_name"] = task_name + config[f"task_{idx}"]["prompt_length"] = str(prompt_length) + with open((Path(saved_dir) / f"config.ini").as_posix(), "w") as configfile: + config.write(configfile) + except Exception as e: + print(f"Fail to save the config in config.ini.", e) + + ft_model_name_pattern = [ + "input_layernorm.bias", + "input_layernorm.weight", + "attention.query_key_value.bias", + "attention.query_key_value.weight", + "attention.dense.bias", + "attention.dense.weight", + "post_attention_layernorm.bias", + "post_attention_layernorm.weight", + "mlp.dense_h_to_4h.bias", + "mlp.dense_h_to_4h.weight", + "mlp.dense_4h_to_h.bias", + "mlp.dense_4h_to_h.weight", + ] + + torch.multiprocessing.set_start_method("spawn") + pool = multiprocessing.Pool(args.processes) + for name, param in model.named_parameters(): + if name.find("weight") == -1 and name.find("bias") == -1: + continue + elif name == "gpt_neox.embed_in.weight": + param.detach().cpu().numpy().astype(np_weight_data_type).tofile( + saved_dir + "model.wte.bin" + ) + elif name == "gpt_neox.final_layer_norm.bias": + param.detach().cpu().numpy().astype(np_weight_data_type).tofile( + saved_dir + "model.final_layernorm.bias.bin" + ) + elif name == "gpt_neox.final_layer_norm.weight": + param.detach().cpu().numpy().astype(np_weight_data_type).tofile( + saved_dir + "model.final_layernorm.weight.bin" + ) + elif name == "embed_out.weight": + param.detach().cpu().numpy().astype(np_weight_data_type).tofile( + saved_dir + "model.lm_head.weight.bin" + ) + else: + processed = False + for i in range(len(ft_model_name_pattern)): + if name.find(ft_model_name_pattern[i]) != -1: + new_name = name.replace("gpt_neox.", "") + pool.starmap( + split_and_convert_process, + [ + ( + 0, + saved_dir, + factor, + new_name, + args, + vars(model.config), + param.detach() + .cpu() + .numpy() + .astype(np_weight_data_type) + .T, + ) + ], + ) + processed = True + break + + if not processed: + raise Exception("Unused layer", name) + + pool.close() + pool.join() + + # Post-process biases if use_gptj_residual is True + if hf_config["gpt_j_residual"]: + for layer_idx in range(hf_config["n_layer"]): + attn_bias = np.fromfile( + saved_dir + f"/model.layers.{layer_idx}.attention.dense.bias.bin", + dtype=np.float32, + ) + mlp_bias = np.fromfile( + saved_dir + f"/model.layers.{layer_idx}.mlp.dense_4h_to_h.bias.bin", + dtype=np.float32, + ) + + (attn_bias + mlp_bias).tofile( + saved_dir + f"/model.layers.{layer_idx}.mlp.attention.bias.sum.bin" + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) + parser.add_argument( + "-saved_dir", "-o", type=str, help="file name of output file", required=True + ) + parser.add_argument( + "-in_file", + "-i", + type=str, + help="file name of input checkpoint file", + required=True, + ) + parser.add_argument( + "-prompt_in_file_list", + "-p_i_list", + type=str, + help="list of the prompt weight file path," + "separate by (,). e.g. -prompt_in_file_list prefix_prompt.task0.weight,prefix_prompt.task1.weight", + ) + parser.add_argument( + "-trained_gpu_num", + "-t_g", + type=int, + help="How many gpus for inference", + default=1, + ) + parser.add_argument( + "-infer_gpu_num", + "-i_g", + type=int, + help="How many gpus for inference", + required=True, + ) + parser.add_argument( + "-processes", + "-p", + type=int, + help="How many processes to spawn for conversion (default: 4)", + default=4, + ) + parser.add_argument( + "-weight_data_type", type=str, default="fp32", choices=["fp32", "fp16"] + ) + parser.add_argument( + "-model_name", "-m_n", type=str, help="model name", required=True + ) + + args = parser.parse_args() + print("\n=============== Argument ===============") + for key in vars(args): + print("{}: {}".format(key, vars(args)[key])) + print("========================================") + + split_and_convert(args) diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/config.ini b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/config.ini new file mode 100644 index 0000000..364e528 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/config.ini @@ -0,0 +1,12 @@ +[gptneox] +model_name = tiny-random-GPTNeoX +head_num = 4 +size_per_head = 8 +inter_size = 128 +num_layer = 5 +rotary_embedding = 8 +vocab_size = 1024 +start_id = 0 +end_id = 0 +use_gptj_residual = 0 +weight_data_type = fp32 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.final_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.final_layernorm.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.final_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.final_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.final_layernorm.weight.bin new file mode 100644 index 0000000..d0c1df9 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.final_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.dense.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.dense.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.dense.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.dense.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.dense.weight.0.bin new file mode 100644 index 0000000..8b8e259 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.dense.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75ec999d1d55bc4af21e7ee8101f7540ff53f73725fc332f175bac14fda1b83a +size 4096 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.bias.0.bin new file mode 100644 index 0000000..795c566 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1a4f5721c1c4610af7f71078f3a68c330536d679803b0e0507ee8dc10c5dfca +size 384 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.weight.0.bin new file mode 100644 index 0000000..c2b21e0 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d0af572105f74f7711069438049a1b539af19b43e4d341fd314b5c67792ce28 +size 12288 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.weight.bin new file mode 100644 index 0000000..d0c1df9 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.weight.0.bin new file mode 100644 index 0000000..ea3d668 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aca4617a559ee69fe7c96a62087f3b18700da03d5ad974ab8c58c01d32a5a65e +size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.bias.0.bin new file mode 100644 index 0000000..8ecaa74 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b18c58c739716e76429634a61375c45b3b5cd470c22ab6d3e14cee23dd992e1 +size 148 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.weight.0.bin new file mode 100644 index 0000000..2710ebb --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c7eae417acdf83e12125252829446e277269e3aeff2543148576a6b267934d3 +size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.weight.bin new file mode 100644 index 0000000..d0c1df9 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.dense.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.dense.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.dense.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.dense.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.dense.weight.0.bin new file mode 100644 index 0000000..49949fd --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.dense.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9e123291a9e860163e8e6acd45f4e46ab7f65a3da84767d9c45541ff2e61a27 +size 4096 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.bias.0.bin new file mode 100644 index 0000000..795c566 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1a4f5721c1c4610af7f71078f3a68c330536d679803b0e0507ee8dc10c5dfca +size 384 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.weight.0.bin new file mode 100644 index 0000000..5bb173f --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aa877cf3a9c6f51414a3773bf2036af613f999dff1d08966c84f5c0164be0bb +size 12288 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.weight.bin new file mode 100644 index 0000000..d0c1df9 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.weight.0.bin new file mode 100644 index 0000000..d7f4715 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0568057def9087e03b35fdcc8ed89ad88bcf672b9a8d3562d816e95b4de8b10f +size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.bias.0.bin new file mode 100644 index 0000000..8ecaa74 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b18c58c739716e76429634a61375c45b3b5cd470c22ab6d3e14cee23dd992e1 +size 148 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.weight.0.bin new file mode 100644 index 0000000..8f56f64 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38d439b2f0ce9e61bdc56aaea018bac52ba20f5eb6a9af39f11b920afa98a74d +size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.weight.bin new file mode 100644 index 0000000..d0c1df9 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.dense.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.dense.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.dense.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.dense.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.dense.weight.0.bin new file mode 100644 index 0000000..8b08e52 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.dense.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:439187a6e4a716b263062a0393e015944688d5ade8becc855b18b53799a1b9f4 +size 4096 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.bias.0.bin new file mode 100644 index 0000000..795c566 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1a4f5721c1c4610af7f71078f3a68c330536d679803b0e0507ee8dc10c5dfca +size 384 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.weight.0.bin new file mode 100644 index 0000000..9eb12ab --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff118539b509bc4d35473c1c4d2ecee86276a1b56f8b1c128fb343fbe7126b29 +size 12288 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.weight.bin new file mode 100644 index 0000000..d0c1df9 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.weight.0.bin new file mode 100644 index 0000000..6f73bec --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:373d0d807fe8142dc107c53ba616cc7735e391ccc99947143e9490abf56ab807 +size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.bias.0.bin new file mode 100644 index 0000000..8ecaa74 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b18c58c739716e76429634a61375c45b3b5cd470c22ab6d3e14cee23dd992e1 +size 148 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.weight.0.bin new file mode 100644 index 0000000..4a08f14 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0954135db648aa7945baaa5f861b1b8012188dd199b9b8bc7c8343757ded04fc +size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.weight.bin new file mode 100644 index 0000000..d0c1df9 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.dense.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.dense.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.dense.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.dense.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.dense.weight.0.bin new file mode 100644 index 0000000..e7a56fe --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.dense.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8318d93f17e38918736e07b5b70f5148d4b28f8096190902477118700b0a762e +size 4096 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.bias.0.bin new file mode 100644 index 0000000..795c566 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1a4f5721c1c4610af7f71078f3a68c330536d679803b0e0507ee8dc10c5dfca +size 384 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.weight.0.bin new file mode 100644 index 0000000..8cd8427 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d5b8deb13e270dea2c77ea5a9f3d65d375bb008d8820128bcfec3c6efb4a454 +size 12288 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.weight.bin new file mode 100644 index 0000000..d0c1df9 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.weight.0.bin new file mode 100644 index 0000000..82cc84e --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1a725384598dc3cf2889e659a0b136abd98def073ec1c632f36db3803a987a2 +size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.bias.0.bin new file mode 100644 index 0000000..8ecaa74 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b18c58c739716e76429634a61375c45b3b5cd470c22ab6d3e14cee23dd992e1 +size 148 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.weight.0.bin new file mode 100644 index 0000000..d7cf885 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dccd82f3a3e32ea7619e48b972c77ff9def1b3fee58a2df924092a707e30152 +size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.weight.bin new file mode 100644 index 0000000..d0c1df9 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.dense.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.dense.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.dense.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.dense.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.dense.weight.0.bin new file mode 100644 index 0000000..73078db --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.dense.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43e013e9d9ddfc3f604562e7136e02ef97bcc40ecd42f94a236945b6e05e014a +size 4096 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.bias.0.bin new file mode 100644 index 0000000..795c566 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1a4f5721c1c4610af7f71078f3a68c330536d679803b0e0507ee8dc10c5dfca +size 384 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.weight.0.bin new file mode 100644 index 0000000..3798642 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202256fec152abace918bd2da29d3eb9a9213920622756bee56f11162903f043 +size 12288 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.weight.bin new file mode 100644 index 0000000..d0c1df9 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.weight.0.bin new file mode 100644 index 0000000..95febcf --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1715bf310c65c00750acacf5247186e3422f4c85fde6f056ba21b380a8097b80 +size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.bias.0.bin new file mode 100644 index 0000000..8ecaa74 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b18c58c739716e76429634a61375c45b3b5cd470c22ab6d3e14cee23dd992e1 +size 148 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.weight.0.bin new file mode 100644 index 0000000..7cc16e7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:482a3307809bfb1eb7a34fc3780ed76e2dc4ba51536eef4d9d616d846db729e6 +size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.bias.bin new file mode 100644 index 0000000..67415c7 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.weight.bin new file mode 100644 index 0000000..d0c1df9 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 +size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.lm_head.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.lm_head.weight.bin new file mode 100644 index 0000000..0680a59 --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.lm_head.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2d2ae19f874eb46f4c94c8c58930a9df564bdb9205aedcc47f0daadc14ae9a5 +size 131072 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.wte.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.wte.bin new file mode 100644 index 0000000..8398d4c --- /dev/null +++ b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.wte.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51a808360ffa74213dcbfac40776866c50b1bc18fc3d6993e856ae2ffa92e0d0 +size 131072 From fe8e02aec612afb17a5e90b92b75d645d08ba21c Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Mon, 20 Mar 2023 16:51:28 +0800 Subject: [PATCH 3/3] Add testdata for GPTNeoX --- converter/huggingface_gptneox_convert.py | 135 +++--------- .../fastertransformer/1/1-gpu/config.ini | 12 - .../1/1-gpu/model.final_layernorm.bias.bin | 3 - .../1/1-gpu/model.final_layernorm.weight.bin | 3 - .../model.layers.0.attention.dense.bias.bin | 3 - ...odel.layers.0.attention.dense.weight.0.bin | 3 - ...ers.0.attention.query_key_value.bias.0.bin | 3 - ...s.0.attention.query_key_value.weight.0.bin | 3 - .../model.layers.0.input_layernorm.bias.bin | 3 - .../model.layers.0.input_layernorm.weight.bin | 3 - .../model.layers.0.mlp.dense_4h_to_h.bias.bin | 3 - ...el.layers.0.mlp.dense_4h_to_h.weight.0.bin | 3 - ...odel.layers.0.mlp.dense_h_to_4h.bias.0.bin | 3 - ...el.layers.0.mlp.dense_h_to_4h.weight.0.bin | 3 - ...layers.0.post_attention_layernorm.bias.bin | 3 - ...yers.0.post_attention_layernorm.weight.bin | 3 - .../model.layers.1.attention.dense.bias.bin | 3 - ...odel.layers.1.attention.dense.weight.0.bin | 3 - ...ers.1.attention.query_key_value.bias.0.bin | 3 - ...s.1.attention.query_key_value.weight.0.bin | 3 - .../model.layers.1.input_layernorm.bias.bin | 3 - .../model.layers.1.input_layernorm.weight.bin | 3 - .../model.layers.1.mlp.dense_4h_to_h.bias.bin | 3 - ...el.layers.1.mlp.dense_4h_to_h.weight.0.bin | 3 - ...odel.layers.1.mlp.dense_h_to_4h.bias.0.bin | 3 - ...el.layers.1.mlp.dense_h_to_4h.weight.0.bin | 3 - ...layers.1.post_attention_layernorm.bias.bin | 3 - ...yers.1.post_attention_layernorm.weight.bin | 3 - .../model.layers.2.attention.dense.bias.bin | 3 - ...odel.layers.2.attention.dense.weight.0.bin | 3 - ...ers.2.attention.query_key_value.bias.0.bin | 3 - ...s.2.attention.query_key_value.weight.0.bin | 3 - .../model.layers.2.input_layernorm.bias.bin | 3 - .../model.layers.2.input_layernorm.weight.bin | 3 - .../model.layers.2.mlp.dense_4h_to_h.bias.bin | 3 - ...el.layers.2.mlp.dense_4h_to_h.weight.0.bin | 3 - ...odel.layers.2.mlp.dense_h_to_4h.bias.0.bin | 3 - ...el.layers.2.mlp.dense_h_to_4h.weight.0.bin | 3 - ...layers.2.post_attention_layernorm.bias.bin | 3 - ...yers.2.post_attention_layernorm.weight.bin | 3 - .../model.layers.3.attention.dense.bias.bin | 3 - ...odel.layers.3.attention.dense.weight.0.bin | 3 - ...ers.3.attention.query_key_value.bias.0.bin | 3 - ...s.3.attention.query_key_value.weight.0.bin | 3 - .../model.layers.3.input_layernorm.bias.bin | 3 - .../model.layers.3.input_layernorm.weight.bin | 3 - .../model.layers.3.mlp.dense_4h_to_h.bias.bin | 3 - ...el.layers.3.mlp.dense_4h_to_h.weight.0.bin | 3 - ...odel.layers.3.mlp.dense_h_to_4h.bias.0.bin | 3 - ...el.layers.3.mlp.dense_h_to_4h.weight.0.bin | 3 - ...layers.3.post_attention_layernorm.bias.bin | 3 - ...yers.3.post_attention_layernorm.weight.bin | 3 - .../model.layers.4.attention.dense.bias.bin | 3 - ...odel.layers.4.attention.dense.weight.0.bin | 3 - ...ers.4.attention.query_key_value.bias.0.bin | 3 - ...s.4.attention.query_key_value.weight.0.bin | 3 - .../model.layers.4.input_layernorm.bias.bin | 3 - .../model.layers.4.input_layernorm.weight.bin | 3 - .../model.layers.4.mlp.dense_4h_to_h.bias.bin | 3 - ...el.layers.4.mlp.dense_4h_to_h.weight.0.bin | 3 - ...odel.layers.4.mlp.dense_h_to_4h.bias.0.bin | 3 - ...el.layers.4.mlp.dense_h_to_4h.weight.0.bin | 3 - ...layers.4.post_attention_layernorm.bias.bin | 3 - ...yers.4.post_attention_layernorm.weight.bin | 3 - .../1/1-gpu/model.lm_head.weight.bin | 3 - .../fastertransformer/1/1-gpu/model.wte.bin | 3 - converter/tests/gptneox/docker-compose.yaml | 17 ++ .../fastertransformer/1/1-gpu/config.ini | 12 + .../1/1-gpu/model.final_layernorm.bias.bin | 3 + .../1/1-gpu/model.final_layernorm.weight.bin | 3 + .../model.layers.0.attention.dense.bias.bin | 3 + ...odel.layers.0.attention.dense.weight.0.bin | 3 + ...ers.0.attention.query_key_value.bias.0.bin | 3 + ...s.0.attention.query_key_value.weight.0.bin | 3 + .../model.layers.0.input_layernorm.bias.bin | 3 + .../model.layers.0.input_layernorm.weight.bin | 3 + .../model.layers.0.mlp.dense_4h_to_h.bias.bin | 3 + ...el.layers.0.mlp.dense_4h_to_h.weight.0.bin | 3 + ...odel.layers.0.mlp.dense_h_to_4h.bias.0.bin | 3 + ...el.layers.0.mlp.dense_h_to_4h.weight.0.bin | 3 + ...layers.0.post_attention_layernorm.bias.bin | 3 + ...yers.0.post_attention_layernorm.weight.bin | 3 + .../model.layers.1.attention.dense.bias.bin | 3 + ...odel.layers.1.attention.dense.weight.0.bin | 3 + ...ers.1.attention.query_key_value.bias.0.bin | 3 + ...s.1.attention.query_key_value.weight.0.bin | 3 + .../model.layers.1.input_layernorm.bias.bin | 3 + .../model.layers.1.input_layernorm.weight.bin | 3 + .../model.layers.1.mlp.dense_4h_to_h.bias.bin | 3 + ...el.layers.1.mlp.dense_4h_to_h.weight.0.bin | 3 + ...odel.layers.1.mlp.dense_h_to_4h.bias.0.bin | 3 + ...el.layers.1.mlp.dense_h_to_4h.weight.0.bin | 3 + ...layers.1.post_attention_layernorm.bias.bin | 3 + ...yers.1.post_attention_layernorm.weight.bin | 3 + .../model.layers.10.attention.dense.bias.bin | 3 + ...del.layers.10.attention.dense.weight.0.bin | 3 + ...rs.10.attention.query_key_value.bias.0.bin | 3 + ....10.attention.query_key_value.weight.0.bin | 3 + .../model.layers.10.input_layernorm.bias.bin | 3 + ...model.layers.10.input_layernorm.weight.bin | 3 + ...model.layers.10.mlp.dense_4h_to_h.bias.bin | 3 + ...l.layers.10.mlp.dense_4h_to_h.weight.0.bin | 3 + ...del.layers.10.mlp.dense_h_to_4h.bias.0.bin | 3 + ...l.layers.10.mlp.dense_h_to_4h.weight.0.bin | 3 + ...ayers.10.post_attention_layernorm.bias.bin | 3 + ...ers.10.post_attention_layernorm.weight.bin | 3 + .../model.layers.11.attention.dense.bias.bin | 3 + ...del.layers.11.attention.dense.weight.0.bin | 3 + ...rs.11.attention.query_key_value.bias.0.bin | 3 + ....11.attention.query_key_value.weight.0.bin | 3 + .../model.layers.11.input_layernorm.bias.bin | 3 + ...model.layers.11.input_layernorm.weight.bin | 3 + ...model.layers.11.mlp.dense_4h_to_h.bias.bin | 3 + ...l.layers.11.mlp.dense_4h_to_h.weight.0.bin | 3 + ...del.layers.11.mlp.dense_h_to_4h.bias.0.bin | 3 + ...l.layers.11.mlp.dense_h_to_4h.weight.0.bin | 3 + ...ayers.11.post_attention_layernorm.bias.bin | 3 + ...ers.11.post_attention_layernorm.weight.bin | 3 + .../model.layers.2.attention.dense.bias.bin | 3 + ...odel.layers.2.attention.dense.weight.0.bin | 3 + ...ers.2.attention.query_key_value.bias.0.bin | 3 + ...s.2.attention.query_key_value.weight.0.bin | 3 + .../model.layers.2.input_layernorm.bias.bin | 3 + .../model.layers.2.input_layernorm.weight.bin | 3 + .../model.layers.2.mlp.dense_4h_to_h.bias.bin | 3 + ...el.layers.2.mlp.dense_4h_to_h.weight.0.bin | 3 + ...odel.layers.2.mlp.dense_h_to_4h.bias.0.bin | 3 + ...el.layers.2.mlp.dense_h_to_4h.weight.0.bin | 3 + ...layers.2.post_attention_layernorm.bias.bin | 3 + ...yers.2.post_attention_layernorm.weight.bin | 3 + .../model.layers.3.attention.dense.bias.bin | 3 + ...odel.layers.3.attention.dense.weight.0.bin | 3 + ...ers.3.attention.query_key_value.bias.0.bin | 3 + ...s.3.attention.query_key_value.weight.0.bin | 3 + .../model.layers.3.input_layernorm.bias.bin | 3 + .../model.layers.3.input_layernorm.weight.bin | 3 + .../model.layers.3.mlp.dense_4h_to_h.bias.bin | 3 + ...el.layers.3.mlp.dense_4h_to_h.weight.0.bin | 3 + ...odel.layers.3.mlp.dense_h_to_4h.bias.0.bin | 3 + ...el.layers.3.mlp.dense_h_to_4h.weight.0.bin | 3 + ...layers.3.post_attention_layernorm.bias.bin | 3 + ...yers.3.post_attention_layernorm.weight.bin | 3 + .../model.layers.4.attention.dense.bias.bin | 3 + ...odel.layers.4.attention.dense.weight.0.bin | 3 + ...ers.4.attention.query_key_value.bias.0.bin | 3 + ...s.4.attention.query_key_value.weight.0.bin | 3 + .../model.layers.4.input_layernorm.bias.bin | 3 + .../model.layers.4.input_layernorm.weight.bin | 3 + .../model.layers.4.mlp.dense_4h_to_h.bias.bin | 3 + ...el.layers.4.mlp.dense_4h_to_h.weight.0.bin | 3 + ...odel.layers.4.mlp.dense_h_to_4h.bias.0.bin | 3 + ...el.layers.4.mlp.dense_h_to_4h.weight.0.bin | 3 + ...layers.4.post_attention_layernorm.bias.bin | 3 + ...yers.4.post_attention_layernorm.weight.bin | 3 + .../model.layers.5.attention.dense.bias.bin | 3 + ...odel.layers.5.attention.dense.weight.0.bin | 3 + ...ers.5.attention.query_key_value.bias.0.bin | 3 + ...s.5.attention.query_key_value.weight.0.bin | 3 + .../model.layers.5.input_layernorm.bias.bin | 3 + .../model.layers.5.input_layernorm.weight.bin | 3 + .../model.layers.5.mlp.dense_4h_to_h.bias.bin | 3 + ...el.layers.5.mlp.dense_4h_to_h.weight.0.bin | 3 + ...odel.layers.5.mlp.dense_h_to_4h.bias.0.bin | 3 + ...el.layers.5.mlp.dense_h_to_4h.weight.0.bin | 3 + ...layers.5.post_attention_layernorm.bias.bin | 3 + ...yers.5.post_attention_layernorm.weight.bin | 3 + .../model.layers.6.attention.dense.bias.bin | 3 + ...odel.layers.6.attention.dense.weight.0.bin | 3 + ...ers.6.attention.query_key_value.bias.0.bin | 3 + ...s.6.attention.query_key_value.weight.0.bin | 3 + .../model.layers.6.input_layernorm.bias.bin | 3 + .../model.layers.6.input_layernorm.weight.bin | 3 + .../model.layers.6.mlp.dense_4h_to_h.bias.bin | 3 + ...el.layers.6.mlp.dense_4h_to_h.weight.0.bin | 3 + ...odel.layers.6.mlp.dense_h_to_4h.bias.0.bin | 3 + ...el.layers.6.mlp.dense_h_to_4h.weight.0.bin | 3 + ...layers.6.post_attention_layernorm.bias.bin | 3 + ...yers.6.post_attention_layernorm.weight.bin | 3 + .../model.layers.7.attention.dense.bias.bin | 3 + ...odel.layers.7.attention.dense.weight.0.bin | 3 + ...ers.7.attention.query_key_value.bias.0.bin | 3 + ...s.7.attention.query_key_value.weight.0.bin | 3 + .../model.layers.7.input_layernorm.bias.bin | 3 + .../model.layers.7.input_layernorm.weight.bin | 3 + .../model.layers.7.mlp.dense_4h_to_h.bias.bin | 3 + ...el.layers.7.mlp.dense_4h_to_h.weight.0.bin | 3 + ...odel.layers.7.mlp.dense_h_to_4h.bias.0.bin | 3 + ...el.layers.7.mlp.dense_h_to_4h.weight.0.bin | 3 + ...layers.7.post_attention_layernorm.bias.bin | 3 + ...yers.7.post_attention_layernorm.weight.bin | 3 + .../model.layers.8.attention.dense.bias.bin | 3 + ...odel.layers.8.attention.dense.weight.0.bin | 3 + ...ers.8.attention.query_key_value.bias.0.bin | 3 + ...s.8.attention.query_key_value.weight.0.bin | 3 + .../model.layers.8.input_layernorm.bias.bin | 3 + .../model.layers.8.input_layernorm.weight.bin | 3 + .../model.layers.8.mlp.dense_4h_to_h.bias.bin | 3 + ...el.layers.8.mlp.dense_4h_to_h.weight.0.bin | 3 + ...odel.layers.8.mlp.dense_h_to_4h.bias.0.bin | 3 + ...el.layers.8.mlp.dense_h_to_4h.weight.0.bin | 3 + ...layers.8.post_attention_layernorm.bias.bin | 3 + ...yers.8.post_attention_layernorm.weight.bin | 3 + .../model.layers.9.attention.dense.bias.bin | 3 + ...odel.layers.9.attention.dense.weight.0.bin | 3 + ...ers.9.attention.query_key_value.bias.0.bin | 3 + ...s.9.attention.query_key_value.weight.0.bin | 3 + .../model.layers.9.input_layernorm.bias.bin | 3 + .../model.layers.9.input_layernorm.weight.bin | 3 + .../model.layers.9.mlp.dense_4h_to_h.bias.bin | 3 + ...el.layers.9.mlp.dense_4h_to_h.weight.0.bin | 3 + ...odel.layers.9.mlp.dense_h_to_4h.bias.0.bin | 3 + ...el.layers.9.mlp.dense_h_to_4h.weight.0.bin | 3 + ...layers.9.post_attention_layernorm.bias.bin | 3 + ...yers.9.post_attention_layernorm.weight.bin | 3 + .../1/1-gpu/model.lm_head.weight.bin | 3 + .../fastertransformer/1/1-gpu/model.wte.bin | 3 + .../testdata/fastertransformer/config.pbtxt | 208 ++++++++++++++++++ 217 files changed, 710 insertions(+), 310 deletions(-) delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/config.ini delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.final_layernorm.bias.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.final_layernorm.weight.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.dense.bias.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.dense.weight.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.bias.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.weight.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.bias.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.weight.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.bias.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.weight.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.bias.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.weight.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.bias.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.weight.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.dense.bias.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.dense.weight.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.bias.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.weight.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.bias.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.weight.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.bias.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.weight.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.bias.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.weight.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.bias.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.weight.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.dense.bias.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.dense.weight.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.bias.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.weight.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.bias.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.weight.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.bias.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.weight.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.bias.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.weight.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.bias.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.weight.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.dense.bias.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.dense.weight.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.bias.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.weight.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.bias.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.weight.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.bias.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.weight.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.bias.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.weight.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.bias.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.weight.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.dense.bias.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.dense.weight.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.bias.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.weight.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.bias.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.weight.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.bias.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.weight.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.bias.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.weight.0.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.bias.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.weight.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.lm_head.weight.bin delete mode 100644 converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.wte.bin create mode 100644 converter/tests/gptneox/docker-compose.yaml create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/config.ini create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.final_layernorm.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.final_layernorm.weight.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.attention.dense.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.attention.dense.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.bias.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.weight.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.bias.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.weight.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.attention.dense.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.attention.dense.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.bias.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.weight.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.bias.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.weight.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.attention.dense.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.attention.dense.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.attention.query_key_value.bias.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.attention.query_key_value.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.input_layernorm.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.input_layernorm.weight.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.mlp.dense_4h_to_h.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.mlp.dense_4h_to_h.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.mlp.dense_h_to_4h.bias.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.mlp.dense_h_to_4h.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.post_attention_layernorm.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.post_attention_layernorm.weight.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.attention.dense.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.attention.dense.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.attention.query_key_value.bias.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.attention.query_key_value.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.input_layernorm.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.input_layernorm.weight.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.mlp.dense_4h_to_h.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.mlp.dense_4h_to_h.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.mlp.dense_h_to_4h.bias.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.mlp.dense_h_to_4h.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.post_attention_layernorm.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.post_attention_layernorm.weight.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.attention.dense.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.attention.dense.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.bias.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.weight.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.bias.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.weight.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.attention.dense.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.attention.dense.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.bias.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.weight.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.bias.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.weight.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.attention.dense.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.attention.dense.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.bias.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.weight.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.bias.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.weight.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.attention.dense.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.attention.dense.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.attention.query_key_value.bias.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.attention.query_key_value.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.input_layernorm.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.input_layernorm.weight.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.mlp.dense_4h_to_h.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.mlp.dense_4h_to_h.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.mlp.dense_h_to_4h.bias.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.mlp.dense_h_to_4h.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.post_attention_layernorm.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.post_attention_layernorm.weight.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.attention.dense.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.attention.dense.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.attention.query_key_value.bias.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.attention.query_key_value.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.input_layernorm.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.input_layernorm.weight.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.mlp.dense_4h_to_h.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.mlp.dense_4h_to_h.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.mlp.dense_h_to_4h.bias.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.mlp.dense_h_to_4h.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.post_attention_layernorm.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.post_attention_layernorm.weight.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.attention.dense.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.attention.dense.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.attention.query_key_value.bias.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.attention.query_key_value.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.input_layernorm.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.input_layernorm.weight.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.mlp.dense_4h_to_h.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.mlp.dense_4h_to_h.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.mlp.dense_h_to_4h.bias.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.mlp.dense_h_to_4h.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.post_attention_layernorm.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.post_attention_layernorm.weight.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.attention.dense.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.attention.dense.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.attention.query_key_value.bias.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.attention.query_key_value.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.input_layernorm.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.input_layernorm.weight.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.mlp.dense_4h_to_h.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.mlp.dense_4h_to_h.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.mlp.dense_h_to_4h.bias.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.mlp.dense_h_to_4h.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.post_attention_layernorm.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.post_attention_layernorm.weight.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.attention.dense.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.attention.dense.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.attention.query_key_value.bias.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.attention.query_key_value.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.input_layernorm.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.input_layernorm.weight.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.mlp.dense_4h_to_h.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.mlp.dense_4h_to_h.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.mlp.dense_h_to_4h.bias.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.mlp.dense_h_to_4h.weight.0.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.post_attention_layernorm.bias.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.post_attention_layernorm.weight.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.lm_head.weight.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.wte.bin create mode 100644 converter/tests/gptneox/testdata/fastertransformer/config.pbtxt diff --git a/converter/huggingface_gptneox_convert.py b/converter/huggingface_gptneox_convert.py index 02b79fb..7852bf1 100644 --- a/converter/huggingface_gptneox_convert.py +++ b/converter/huggingface_gptneox_convert.py @@ -2,12 +2,13 @@ import argparse import configparser import multiprocessing import os +import shutil import sys from pathlib import Path import numpy as np import torch -from transformers import GPTNeoXForCausalLM # 4.21.1 +from transformers import GPTNeoXForCausalLM def get_weight_data_type(data_type): @@ -19,51 +20,7 @@ def get_weight_data_type(data_type): assert False, f"Invalid weight data type {data_type}" -def prefix_prompt_convert(args, config, weight_data_type): - - saved_dir = args.saved_dir + "/%d-gpu/" % args.infer_gpu_num - - prompt_in_file_list = args.prompt_in_file_list.split(",") - - task_list = [] - for idx, prompt_in_file in enumerate(prompt_in_file_list): - weights = torch.load(prompt_in_file) - task_name = prompt_in_file.split("/")[-1].split(".")[-3] - - total_size = weights.nelement() - n_layers = config["num_hidden_layers"] - n_head = config["num_heads"] - size_per_head = config["hidden_size"] // n_head - prefix_prompt_len = total_size // (2 * n_layers * n_head * size_per_head) - - task_list.append((task_name, prefix_prompt_len)) - # GPT NeoX - weights = weights.view( - prefix_prompt_len, n_layers, 2, n_head, size_per_head - ) ## prefix_seq_len, num_layers, 2, num_heads, size_per_head - # weights=weights.view(prefix_prompt_len,28,2,16,256) ## prefix_seq_len, num_layers, 2, num_heads, size_per_head - weights = weights.permute( - 1, 2, 3, 0, 4 - ) ## num_layers, 2, num_heads, perfix_seq_len, size_per_head - local_head_num = n_head // args.infer_gpu_num - weights_split = torch.split(weights, local_head_num, dim=2) - for i in range(args.infer_gpu_num): - output_file_path = ( - saved_dir - + "/model.prefix_prompt." - + task_name - + ".weight." - + str(i) - + ".bin" - ) - weights_split[i].detach().cpu().numpy().astype(weight_data_type).tofile( - output_file_path - ) - - return task_list - - -def split_and_convert_process(i, saved_dir, factor, key, args, config, val): +def split_and_convert_process(saved_dir, factor, key, args, config, val): if ( key.find("input_layernorm.weight") != -1 @@ -75,11 +32,8 @@ def split_and_convert_process(i, saved_dir, factor, key, args, config, val): or key.find("final_layernorm.weight") != -1 or key.find("final_layernorm.bias") != -1 ): - - # shared weights, only need to convert the weights of rank 0 - if i == 0: - saved_path = saved_dir + "/model." + key + ".bin" - val.tofile(saved_path) + saved_path = saved_dir + f"/model.{key}.bin" + val.tofile(saved_path) elif ( key.find("attention.dense.weight") != -1 @@ -87,7 +41,7 @@ def split_and_convert_process(i, saved_dir, factor, key, args, config, val): ): split_vals = np.split(val, factor, axis=0) for j in range(factor): - saved_path = saved_dir + "/model." + key + ".%d.bin" % (i * factor + j) + saved_path = saved_dir + f"/model.{key}.{j}.bin" split_vals[j].tofile(saved_path) elif ( @@ -97,7 +51,7 @@ def split_and_convert_process(i, saved_dir, factor, key, args, config, val): split_vals = np.split(val, factor, axis=-1) for j in range(factor): - saved_path = saved_dir + "/model." + key + ".%d.bin" % (i * factor + j) + saved_path = saved_dir + f"/model.{key}.{j}.bin" split_vals[j].tofile(saved_path) elif key.find("attention.query_key_value.bias") != -1: @@ -109,7 +63,7 @@ def split_and_convert_process(i, saved_dir, factor, key, args, config, val): split_vals = np.split(val, factor, axis=-1) for j in range(factor): - saved_path = saved_dir + "/model." + key + ".%d.bin" % (i * factor + j) + saved_path = saved_dir + f"/model.{key}.{j}.bin" split_vals[j].tofile(saved_path) elif key.find("attention.query_key_value.weight") != -1: @@ -125,7 +79,7 @@ def split_and_convert_process(i, saved_dir, factor, key, args, config, val): split_vals = np.split(val, factor, axis=-1) for j in range(factor): - saved_path = saved_dir + "/model." + key + ".%d.bin" % (i * factor + j) + saved_path = saved_dir + f"/model.{key}.{j}.bin" split_vals[j].tofile(saved_path) else: @@ -149,45 +103,30 @@ def split_and_convert(args): # model = torch.load(ckpt_name) model = GPTNeoXForCausalLM.from_pretrained(args.in_file) hf_config = vars(model.config) - if "gpt_j_residual" not in hf_config: - hf_config["gpt_j_residual"] = 0 np_weight_data_type = get_weight_data_type(args.weight_data_type) - task_list = [] - if args.prompt_in_file_list is not None: - task_list = prefix_prompt_convert(args, hf_config, np_weight_data_type) - try: model_name = args.model_name + n_heads = hf_config["num_attention_heads"] + head_size = hf_config["hidden_size"] // n_heads + rotary_dim = int(head_size * hf_config["rotary_pct"]) + use_gptj_residual = int(hf_config["use_parallel_residual"]) + config = configparser.ConfigParser() config["gptneox"] = {} config["gptneox"]["model_name"] = model_name - config["gptneox"]["head_num"] = str(hf_config["num_attention_heads"]) - n_embd = hf_config["hidden_size"] - config["gptneox"]["size_per_head"] = str( - n_embd // hf_config["num_attention_heads"] - ) - config["gptneox"]["inter_size"] = str(n_embd * 4) + config["gptneox"]["head_num"] = str(n_heads) + config["gptneox"]["size_per_head"] = str(head_size) + config["gptneox"]["inter_size"] = str(hf_config["intermediate_size"]) config["gptneox"]["num_layer"] = str(hf_config["num_hidden_layers"]) - if "rotary_dim" in hf_config: - rotary_dim = hf_config["rotary_dim"] - else: - rotary_dim = n_embd // hf_config["num_attention_heads"] config["gptneox"]["rotary_embedding"] = str(rotary_dim) config["gptneox"]["vocab_size"] = str(hf_config["vocab_size"]) config["gptneox"]["start_id"] = str(hf_config["bos_token_id"]) config["gptneox"]["end_id"] = str(hf_config["eos_token_id"]) - config["gptneox"]["use_gptj_residual"] = str(int(hf_config["gpt_j_residual"])) + config["gptneox"]["use_gptj_residual"] = str(use_gptj_residual) config["gptneox"]["weight_data_type"] = args.weight_data_type - if len(task_list) > 0: - config["gptneox"]["num_tasks"] = str(len(task_list)) - config["gptneox"]["prompt_learning_type"] = str(2) - for idx, (task_name, prompt_length) in enumerate(task_list): - config[f"task_{idx}"] = {} - config[f"task_{idx}"]["task_name"] = task_name - config[f"task_{idx}"]["prompt_length"] = str(prompt_length) with open((Path(saved_dir) / f"config.ini").as_posix(), "w") as configfile: config.write(configfile) except Exception as e: @@ -211,24 +150,19 @@ def split_and_convert(args): torch.multiprocessing.set_start_method("spawn") pool = multiprocessing.Pool(args.processes) for name, param in model.named_parameters(): + array = param.detach().cpu().numpy().astype(np_weight_data_type) + # print("input shape", name, array.shape) if name.find("weight") == -1 and name.find("bias") == -1: + print("skipped", name) continue elif name == "gpt_neox.embed_in.weight": - param.detach().cpu().numpy().astype(np_weight_data_type).tofile( - saved_dir + "model.wte.bin" - ) + array.tofile(saved_dir + "model.wte.bin") elif name == "gpt_neox.final_layer_norm.bias": - param.detach().cpu().numpy().astype(np_weight_data_type).tofile( - saved_dir + "model.final_layernorm.bias.bin" - ) + array.tofile(saved_dir + "model.final_layernorm.bias.bin") elif name == "gpt_neox.final_layer_norm.weight": - param.detach().cpu().numpy().astype(np_weight_data_type).tofile( - saved_dir + "model.final_layernorm.weight.bin" - ) + array.tofile(saved_dir + "model.final_layernorm.weight.bin") elif name == "embed_out.weight": - param.detach().cpu().numpy().astype(np_weight_data_type).tofile( - saved_dir + "model.lm_head.weight.bin" - ) + array.tofile(saved_dir + "model.lm_head.weight.bin") else: processed = False for i in range(len(ft_model_name_pattern)): @@ -238,17 +172,12 @@ def split_and_convert(args): split_and_convert_process, [ ( - 0, saved_dir, factor, new_name, args, vars(model.config), - param.detach() - .cpu() - .numpy() - .astype(np_weight_data_type) - .T, + array.T, ) ], ) @@ -256,13 +185,13 @@ def split_and_convert(args): break if not processed: - raise Exception("Unused layer", name) + print("Unused layer", name) pool.close() pool.join() # Post-process biases if use_gptj_residual is True - if hf_config["gpt_j_residual"]: + if use_gptj_residual: for layer_idx in range(hf_config["n_layer"]): attn_bias = np.fromfile( saved_dir + f"/model.layers.{layer_idx}.attention.dense.bias.bin", @@ -290,13 +219,6 @@ if __name__ == "__main__": help="file name of input checkpoint file", required=True, ) - parser.add_argument( - "-prompt_in_file_list", - "-p_i_list", - type=str, - help="list of the prompt weight file path," - "separate by (,). e.g. -prompt_in_file_list prefix_prompt.task0.weight,prefix_prompt.task1.weight", - ) parser.add_argument( "-trained_gpu_num", "-t_g", @@ -331,4 +253,5 @@ if __name__ == "__main__": print("{}: {}".format(key, vars(args)[key])) print("========================================") + shutil.rmtree(args.saved_dir, ignore_errors=True) split_and_convert(args) diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/config.ini b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/config.ini deleted file mode 100644 index 364e528..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/config.ini +++ /dev/null @@ -1,12 +0,0 @@ -[gptneox] -model_name = tiny-random-GPTNeoX -head_num = 4 -size_per_head = 8 -inter_size = 128 -num_layer = 5 -rotary_embedding = 8 -vocab_size = 1024 -start_id = 0 -end_id = 0 -use_gptj_residual = 0 -weight_data_type = fp32 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.final_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.final_layernorm.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.final_layernorm.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.final_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.final_layernorm.weight.bin deleted file mode 100644 index d0c1df9..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.final_layernorm.weight.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.dense.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.dense.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.dense.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.dense.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.dense.weight.0.bin deleted file mode 100644 index 8b8e259..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.dense.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:75ec999d1d55bc4af21e7ee8101f7540ff53f73725fc332f175bac14fda1b83a -size 4096 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.bias.0.bin deleted file mode 100644 index 795c566..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.bias.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a1a4f5721c1c4610af7f71078f3a68c330536d679803b0e0507ee8dc10c5dfca -size 384 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.weight.0.bin deleted file mode 100644 index c2b21e0..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8d0af572105f74f7711069438049a1b539af19b43e4d341fd314b5c67792ce28 -size 12288 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.weight.bin deleted file mode 100644 index d0c1df9..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.weight.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.weight.0.bin deleted file mode 100644 index ea3d668..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:aca4617a559ee69fe7c96a62087f3b18700da03d5ad974ab8c58c01d32a5a65e -size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.bias.0.bin deleted file mode 100644 index 8ecaa74..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.bias.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3b18c58c739716e76429634a61375c45b3b5cd470c22ab6d3e14cee23dd992e1 -size 148 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.weight.0.bin deleted file mode 100644 index 2710ebb..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8c7eae417acdf83e12125252829446e277269e3aeff2543148576a6b267934d3 -size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.weight.bin deleted file mode 100644 index d0c1df9..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.weight.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.dense.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.dense.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.dense.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.dense.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.dense.weight.0.bin deleted file mode 100644 index 49949fd..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.dense.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b9e123291a9e860163e8e6acd45f4e46ab7f65a3da84767d9c45541ff2e61a27 -size 4096 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.bias.0.bin deleted file mode 100644 index 795c566..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.bias.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a1a4f5721c1c4610af7f71078f3a68c330536d679803b0e0507ee8dc10c5dfca -size 384 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.weight.0.bin deleted file mode 100644 index 5bb173f..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3aa877cf3a9c6f51414a3773bf2036af613f999dff1d08966c84f5c0164be0bb -size 12288 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.weight.bin deleted file mode 100644 index d0c1df9..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.weight.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.weight.0.bin deleted file mode 100644 index d7f4715..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0568057def9087e03b35fdcc8ed89ad88bcf672b9a8d3562d816e95b4de8b10f -size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.bias.0.bin deleted file mode 100644 index 8ecaa74..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.bias.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3b18c58c739716e76429634a61375c45b3b5cd470c22ab6d3e14cee23dd992e1 -size 148 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.weight.0.bin deleted file mode 100644 index 8f56f64..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38d439b2f0ce9e61bdc56aaea018bac52ba20f5eb6a9af39f11b920afa98a74d -size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.weight.bin deleted file mode 100644 index d0c1df9..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.weight.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.dense.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.dense.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.dense.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.dense.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.dense.weight.0.bin deleted file mode 100644 index 8b08e52..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.dense.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:439187a6e4a716b263062a0393e015944688d5ade8becc855b18b53799a1b9f4 -size 4096 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.bias.0.bin deleted file mode 100644 index 795c566..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.bias.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a1a4f5721c1c4610af7f71078f3a68c330536d679803b0e0507ee8dc10c5dfca -size 384 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.weight.0.bin deleted file mode 100644 index 9eb12ab..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ff118539b509bc4d35473c1c4d2ecee86276a1b56f8b1c128fb343fbe7126b29 -size 12288 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.weight.bin deleted file mode 100644 index d0c1df9..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.weight.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.weight.0.bin deleted file mode 100644 index 6f73bec..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:373d0d807fe8142dc107c53ba616cc7735e391ccc99947143e9490abf56ab807 -size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.bias.0.bin deleted file mode 100644 index 8ecaa74..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.bias.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3b18c58c739716e76429634a61375c45b3b5cd470c22ab6d3e14cee23dd992e1 -size 148 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.weight.0.bin deleted file mode 100644 index 4a08f14..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0954135db648aa7945baaa5f861b1b8012188dd199b9b8bc7c8343757ded04fc -size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.weight.bin deleted file mode 100644 index d0c1df9..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.weight.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.dense.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.dense.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.dense.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.dense.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.dense.weight.0.bin deleted file mode 100644 index e7a56fe..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.dense.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8318d93f17e38918736e07b5b70f5148d4b28f8096190902477118700b0a762e -size 4096 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.bias.0.bin deleted file mode 100644 index 795c566..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.bias.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a1a4f5721c1c4610af7f71078f3a68c330536d679803b0e0507ee8dc10c5dfca -size 384 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.weight.0.bin deleted file mode 100644 index 8cd8427..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1d5b8deb13e270dea2c77ea5a9f3d65d375bb008d8820128bcfec3c6efb4a454 -size 12288 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.weight.bin deleted file mode 100644 index d0c1df9..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.weight.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.weight.0.bin deleted file mode 100644 index 82cc84e..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d1a725384598dc3cf2889e659a0b136abd98def073ec1c632f36db3803a987a2 -size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.bias.0.bin deleted file mode 100644 index 8ecaa74..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.bias.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3b18c58c739716e76429634a61375c45b3b5cd470c22ab6d3e14cee23dd992e1 -size 148 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.weight.0.bin deleted file mode 100644 index d7cf885..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3dccd82f3a3e32ea7619e48b972c77ff9def1b3fee58a2df924092a707e30152 -size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.weight.bin deleted file mode 100644 index d0c1df9..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.weight.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.dense.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.dense.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.dense.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.dense.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.dense.weight.0.bin deleted file mode 100644 index 73078db..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.dense.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:43e013e9d9ddfc3f604562e7136e02ef97bcc40ecd42f94a236945b6e05e014a -size 4096 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.bias.0.bin deleted file mode 100644 index 795c566..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.bias.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a1a4f5721c1c4610af7f71078f3a68c330536d679803b0e0507ee8dc10c5dfca -size 384 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.weight.0.bin deleted file mode 100644 index 3798642..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:202256fec152abace918bd2da29d3eb9a9213920622756bee56f11162903f043 -size 12288 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.weight.bin deleted file mode 100644 index d0c1df9..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.weight.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.weight.0.bin deleted file mode 100644 index 95febcf..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1715bf310c65c00750acacf5247186e3422f4c85fde6f056ba21b380a8097b80 -size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.bias.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.bias.0.bin deleted file mode 100644 index 8ecaa74..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.bias.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3b18c58c739716e76429634a61375c45b3b5cd470c22ab6d3e14cee23dd992e1 -size 148 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.weight.0.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.weight.0.bin deleted file mode 100644 index 7cc16e7..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.weight.0.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:482a3307809bfb1eb7a34fc3780ed76e2dc4ba51536eef4d9d616d846db729e6 -size 4736 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.bias.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.bias.bin deleted file mode 100644 index 67415c7..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.bias.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38723a2e5e8a17aa7950dc008209944e898f69a7bd10a23c839d341e935fd5ca -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.weight.bin deleted file mode 100644 index d0c1df9..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.weight.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b638277a8690e175a9137feff1e43c067f9faf4e2f600caf468fb05b0403b717 -size 128 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.lm_head.weight.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.lm_head.weight.bin deleted file mode 100644 index 0680a59..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.lm_head.weight.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b2d2ae19f874eb46f4c94c8c58930a9df564bdb9205aedcc47f0daadc14ae9a5 -size 131072 diff --git a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.wte.bin b/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.wte.bin deleted file mode 100644 index 8398d4c..0000000 --- a/converter/testdata/tiny-random-GPTNeoXModel/fastertransformer/1/1-gpu/model.wte.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:51a808360ffa74213dcbfac40776866c50b1bc18fc3d6993e856ae2ffa92e0d0 -size 131072 diff --git a/converter/tests/gptneox/docker-compose.yaml b/converter/tests/gptneox/docker-compose.yaml new file mode 100644 index 0000000..7db01be --- /dev/null +++ b/converter/tests/gptneox/docker-compose.yaml @@ -0,0 +1,17 @@ +version: '3.3' +services: + triton: + image: ghcr.io/tabbyml/fastertransformer_backend:main + command: mpirun -n 1 --allow-run-as-root /opt/tritonserver/bin/tritonserver --model-repository=/model + shm_size: 1gb + ports: + - "8001:8001" + volumes: + - ./testdata:/model + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/config.ini b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/config.ini new file mode 100644 index 0000000..b69e6d6 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/config.ini @@ -0,0 +1,12 @@ +[gptneox] +model_name = gptneox +head_num = 12 +size_per_head = 64 +inter_size = 3072 +num_layer = 12 +rotary_embedding = 64 +vocab_size = 50304 +start_id = 0 +end_id = 0 +use_gptj_residual = 0 +weight_data_type = fp32 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.final_layernorm.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.final_layernorm.bias.bin new file mode 100644 index 0000000..abaf167 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.final_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8e8e6b531dabfeff987975cc6c937428139beb7f5699b8b938545716d8eeca7 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.final_layernorm.weight.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.final_layernorm.weight.bin new file mode 100644 index 0000000..6ce97fe --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.final_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a20104b8a794c5dfa02c2847d25a2697ae60971ea296192dc88a9eaaa479f606 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.attention.dense.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.attention.dense.bias.bin new file mode 100644 index 0000000..36d08b4 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.attention.dense.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3df9dbc92f1617b447db1aa855da24d07660a12c41b22dee002696813b3ebbba +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.attention.dense.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.attention.dense.weight.0.bin new file mode 100644 index 0000000..f62eebf --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.attention.dense.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eba16ee6fe2391cf2efcb7162e4662dd905a58cb8610ff32d07813e7575d6fc +size 2359296 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.bias.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.bias.0.bin new file mode 100644 index 0000000..c9cbcc8 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93fb9545a9c7ba88f8927bb1929a3d1d74962d18909ee21225e6b9756c77c332 +size 9216 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.weight.0.bin new file mode 100644 index 0000000..5466a57 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.attention.query_key_value.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35a3c434d0af69813352edd06e29e671e8e160de1221a90abe6f308cd6b624ab +size 7077888 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.bias.bin new file mode 100644 index 0000000..3568840 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f324b52b78c5e245f0b68559ac1b40c7c54aa1d48005ca2d12887cb53dbbc9b2 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.weight.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.weight.bin new file mode 100644 index 0000000..7fad597 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.input_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c632d0534e7e7e4eab3d843a0a1f0d0f3843aa9b1aefd6b706ad71cf3de08ed +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.bias.bin new file mode 100644 index 0000000..cc0f2c6 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e2b220335cce206b3928380f069259c01e382937189f1d86d40641bea319a27 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.weight.0.bin new file mode 100644 index 0000000..f01f76c --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_4h_to_h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d59fe9d26e5a6b5bf67a2d631bf012cb67e30a8aee34d5a8b5589a1a3b767d0 +size 9437184 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.bias.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.bias.0.bin new file mode 100644 index 0000000..06b6608 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fb0620b893fc6c19a0d99f7b0b19fcbc81262b7de48532a0cfd74a6c2ceb11e +size 12288 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.weight.0.bin new file mode 100644 index 0000000..5cbd120 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.mlp.dense_h_to_4h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea4b2ed74e487155f748f4ad057070db5a1ef9bbe18f155caf05f1889a7db0e4 +size 9437184 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.bias.bin new file mode 100644 index 0000000..4048b48 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e949431800d44dc72725fb2a14c1b6257df33a087c89a5363504b2796f033ef +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.weight.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.weight.bin new file mode 100644 index 0000000..0ed7955 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.0.post_attention_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e63fbed352f174592299da93e92c5652b45d4d439aea92867c30bc292d2d8891 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.attention.dense.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.attention.dense.bias.bin new file mode 100644 index 0000000..868537a --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.attention.dense.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca3de5c5d037abd4ba67b56fca3fd35e11f2d3355313684066188424579295f0 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.attention.dense.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.attention.dense.weight.0.bin new file mode 100644 index 0000000..4ff6251 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.attention.dense.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bfab993d3877ea0dcdbb35f1bfbe371cf03dee47e23a124e1a3004c9abde182 +size 2359296 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.bias.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.bias.0.bin new file mode 100644 index 0000000..d3f8ead --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef2961811f9b09add00d1acf68bbb0d983b2a266415bcf4167c108cf0af2b162 +size 9216 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.weight.0.bin new file mode 100644 index 0000000..6e7294a --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.attention.query_key_value.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:678d7bdfff7a580935c141a3c8d56121925ad5688ca4839150f7c38aa07b3816 +size 7077888 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.bias.bin new file mode 100644 index 0000000..a481b6d --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22ee83ada70710e37e74558e042db460e1264b2373ad2272891f8681dd32992f +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.weight.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.weight.bin new file mode 100644 index 0000000..4b9001c --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.input_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea7f1c9586a26d8b4c7db214c5e036691d24a6aaa0700434e34fb89789e8413d +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.bias.bin new file mode 100644 index 0000000..6c0de70 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea6d9660941b2258411fa5a95bc2efae46f9b37b2c2677dbfd15fc4bfa97b7d8 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.weight.0.bin new file mode 100644 index 0000000..876f935 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_4h_to_h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df394855b5b83e6cef0b009b66d75bc6ad9536f63ea81c118821269ddb389811 +size 9437184 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.bias.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.bias.0.bin new file mode 100644 index 0000000..fd5dc86 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:970ccc870e06e091619acffcd71eec44e61ea4e38490094850dea049a55ee4d4 +size 12288 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.weight.0.bin new file mode 100644 index 0000000..80c7de7 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.mlp.dense_h_to_4h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:669ab6458f06db05c0310bf04a4c6681b00faa5ad498d144552ddc8b0ebcad1c +size 9437184 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.bias.bin new file mode 100644 index 0000000..e496c43 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d77f19d5997955ea3d639595ca6d0a1d94f16eb0a54e6ed9256d151e84a32160 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.weight.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.weight.bin new file mode 100644 index 0000000..bc9c947 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.1.post_attention_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a208323db32e45ec21cb6fa92754862a2d8db9dc32a148ede730567e1b53ea6d +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.attention.dense.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.attention.dense.bias.bin new file mode 100644 index 0000000..bc3fa16 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.attention.dense.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a991d95e2e3a0c716990227c3fca9617a3762963404bd1bbb1649d6031476d7 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.attention.dense.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.attention.dense.weight.0.bin new file mode 100644 index 0000000..09f7d3f --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.attention.dense.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0948b6eb35b6e388e7ad4cdf315abdf943960d7952dfebbebb4897023cc9a278 +size 2359296 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.attention.query_key_value.bias.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.attention.query_key_value.bias.0.bin new file mode 100644 index 0000000..e65a163 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.attention.query_key_value.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4755cff515205a9072c5f00b4e592939e5359f7b535d447bc1f7da44830ce011 +size 9216 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.attention.query_key_value.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.attention.query_key_value.weight.0.bin new file mode 100644 index 0000000..b52e44a --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.attention.query_key_value.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b52cdfe8cd07b6f604cd3d848c7ab44efaa5a260441b7da057c42488992b3d6 +size 7077888 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.input_layernorm.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.input_layernorm.bias.bin new file mode 100644 index 0000000..4485cb4 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.input_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21b95c5b59d5a99b5f833825f620ddde882036901fda433046d6047a2ad90b51 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.input_layernorm.weight.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.input_layernorm.weight.bin new file mode 100644 index 0000000..d2e677e --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.input_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e927894a15e7a284be51655cb46338d98d4ca77378d9f865dc00707ae62758b +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.mlp.dense_4h_to_h.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.mlp.dense_4h_to_h.bias.bin new file mode 100644 index 0000000..7cf570c --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.mlp.dense_4h_to_h.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbf8db67c06bf5b4bb5060f3ace4e5a84ddcc07dea1efc631b93a1244916da4f +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.mlp.dense_4h_to_h.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.mlp.dense_4h_to_h.weight.0.bin new file mode 100644 index 0000000..e743da6 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.mlp.dense_4h_to_h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f1d8f3a25a663c5a16d2f390c6db233c808ba22a5617a057f6a0d3350d70260 +size 9437184 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.mlp.dense_h_to_4h.bias.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.mlp.dense_h_to_4h.bias.0.bin new file mode 100644 index 0000000..371d4be --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.mlp.dense_h_to_4h.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85917a5c6f37da43778c01ddd45b5a444e55265cb17c9156044545c8b378fbbb +size 12288 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.mlp.dense_h_to_4h.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.mlp.dense_h_to_4h.weight.0.bin new file mode 100644 index 0000000..6990861 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.mlp.dense_h_to_4h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:291116b1849ae751a730ebd574548936105cff112b22e1b21a3830640f2b3b6e +size 9437184 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.post_attention_layernorm.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.post_attention_layernorm.bias.bin new file mode 100644 index 0000000..5289391 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.post_attention_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aedc1896553c1017fcbfa8672ba0ce8a26968f288009904e1e91966e6bb90518 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.post_attention_layernorm.weight.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.post_attention_layernorm.weight.bin new file mode 100644 index 0000000..3d13c43 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.10.post_attention_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d0447f6a6765688512b646eccc2fbb742b273d7f6ba14568a813b1e6d13d4d2 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.attention.dense.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.attention.dense.bias.bin new file mode 100644 index 0000000..82cae7b --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.attention.dense.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9abd3af37bf3a976fa9a6c2fd499e1e852fa8e6f80af55e868dbcc31b92c6cc +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.attention.dense.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.attention.dense.weight.0.bin new file mode 100644 index 0000000..2057d9b --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.attention.dense.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b249f47f438a6a13ac0b10f42308dc9c84cf53bab9300129140c56d44d8d4296 +size 2359296 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.attention.query_key_value.bias.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.attention.query_key_value.bias.0.bin new file mode 100644 index 0000000..d210f41 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.attention.query_key_value.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e13dee33e6ca1356ef5415df8a755832ca0a4d546256ac9beafc954011402756 +size 9216 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.attention.query_key_value.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.attention.query_key_value.weight.0.bin new file mode 100644 index 0000000..7537897 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.attention.query_key_value.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a14a2b3a7f763772f4401da40419fa7cc1caae2cd7bf416ce9b1ff78501954a +size 7077888 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.input_layernorm.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.input_layernorm.bias.bin new file mode 100644 index 0000000..03cb3d9 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.input_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf6231f0316632a83598eee3c9b2dedd42f6086f9aa9948ffb2c2575d42f855b +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.input_layernorm.weight.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.input_layernorm.weight.bin new file mode 100644 index 0000000..507baac --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.input_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:358702cdee17d0d06a44e5d4915f0cc4acd75bbe88e678d1d87e1034a823e24d +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.mlp.dense_4h_to_h.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.mlp.dense_4h_to_h.bias.bin new file mode 100644 index 0000000..7aa26da --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.mlp.dense_4h_to_h.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f10daf518dd3b8456cbbfcf0e97df875ac9d818559e29ffbc04b6aae9b00933f +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.mlp.dense_4h_to_h.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.mlp.dense_4h_to_h.weight.0.bin new file mode 100644 index 0000000..e360709 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.mlp.dense_4h_to_h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed18d4ed924f201ef49ba8c84699c81f9bfb6b470f3f58c3eda58e97a6acfb12 +size 9437184 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.mlp.dense_h_to_4h.bias.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.mlp.dense_h_to_4h.bias.0.bin new file mode 100644 index 0000000..398aa50 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.mlp.dense_h_to_4h.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7240fb1faaa2309b82123f827715f6294b408e97d688d7fc514d7ce14910b8a +size 12288 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.mlp.dense_h_to_4h.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.mlp.dense_h_to_4h.weight.0.bin new file mode 100644 index 0000000..02f3d10 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.mlp.dense_h_to_4h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:511d7ea871bb241619c99331ac746dceb467be2eb690031bbaec19b9b747c337 +size 9437184 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.post_attention_layernorm.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.post_attention_layernorm.bias.bin new file mode 100644 index 0000000..7e82829 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.post_attention_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d35391ad45043b104204e9f80c538bf30e17e538fc38b171b37d87f2e368b517 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.post_attention_layernorm.weight.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.post_attention_layernorm.weight.bin new file mode 100644 index 0000000..1844f35 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.11.post_attention_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0b7d344fd44e6b84f9e01c13493425a1d4d64f1b05360b2af3d62a624e0f56d +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.attention.dense.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.attention.dense.bias.bin new file mode 100644 index 0000000..2a7cc3d --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.attention.dense.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b56ec36e26bee33f87dcb99d1a34ad4a4ef314ae186ab0f8905f1cdb17a48fb +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.attention.dense.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.attention.dense.weight.0.bin new file mode 100644 index 0000000..33d6493 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.attention.dense.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaa0e24588292124f8bb75bfc2dba133a93a858389e8ec1c46eddf2831e62619 +size 2359296 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.bias.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.bias.0.bin new file mode 100644 index 0000000..5a4f400 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96908969d3cb3f937afbd46580634f1fabf6008d9a67db26b9fc6d612a6646eb +size 9216 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.weight.0.bin new file mode 100644 index 0000000..d8ed2b5 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.attention.query_key_value.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf3533607c5762d74c194c9e9de52217b075cabd1429078f98ad125f8e355f60 +size 7077888 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.bias.bin new file mode 100644 index 0000000..12da824 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0700cab36c40ebca68bb21877b9d6d53d2a28e66d81715a45477da730bd52e4 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.weight.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.weight.bin new file mode 100644 index 0000000..7425bda --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.input_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c35503aa0ba445d5a3a462f1805351e5e8359d44eb0ab185df4c4a0637540fa +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.bias.bin new file mode 100644 index 0000000..a0390ec --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9f26d615c213bca2610bf7d4787678422e290a81ace7c6ec02fe1a7a4956e91 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.weight.0.bin new file mode 100644 index 0000000..ee63c59 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_4h_to_h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11ae0330f6f55f61fd3ad9f69557b948cc93c4839e2f95779289f525cadb49c9 +size 9437184 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.bias.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.bias.0.bin new file mode 100644 index 0000000..25d84ef --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dc26ca0d9dcb757fdedf903d125fa81f08e70ff1b2b337bd17501b034e3af2d +size 12288 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.weight.0.bin new file mode 100644 index 0000000..53ffbf5 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.mlp.dense_h_to_4h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a2b4ad1773e3415b98242830c3ca6179ed557a716343af3a0ab04cce2fb4fe5 +size 9437184 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.bias.bin new file mode 100644 index 0000000..1b950db --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed71f49f5cc2bde762ff61cdbecea358229b71aa0de618b3c47fee4e9c15b573 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.weight.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.weight.bin new file mode 100644 index 0000000..80f5b54 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.2.post_attention_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ccc5ca8b79956a815cf55d3abacdd19343ebdde6063c57c024e4370693fef82 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.attention.dense.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.attention.dense.bias.bin new file mode 100644 index 0000000..bd9f5e0 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.attention.dense.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69192041d0fbdcefd4842aee8f096c9c8be4a7101eb17c832e161f920522dc67 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.attention.dense.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.attention.dense.weight.0.bin new file mode 100644 index 0000000..b43519f --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.attention.dense.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb5b3d7db619c79fd4295d764a428e21be531ab150fb7c609fcac5ffdb44371a +size 2359296 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.bias.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.bias.0.bin new file mode 100644 index 0000000..9941681 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d340631496cfb8e39db607703096b0de94985b19fc0fdc481a59797c8cf490a +size 9216 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.weight.0.bin new file mode 100644 index 0000000..2e7b4ec --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.attention.query_key_value.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e665f2a175bb8d31c013752b165eba2b55d56b1c7f3e9acb67b3a7bdea0fac2d +size 7077888 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.bias.bin new file mode 100644 index 0000000..68de542 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0257d8bd6309e82e3f7187dafbd6addc9906e4cec2d29efc0f124b5502f87963 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.weight.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.weight.bin new file mode 100644 index 0000000..3b39a4a --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.input_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bef2513c61a003dadce2563c7a736f9f4b3ec8d80b6e38191da19cf68203a068 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.bias.bin new file mode 100644 index 0000000..c71ab96 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02b05a1e5131cb6551f826503d57c4c8b5440caefc6e4d8779181930027cea6c +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.weight.0.bin new file mode 100644 index 0000000..d4589af --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_4h_to_h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abf79be083d770af551b7058283cc9980b0d96e14dd02d4454986bdde8a2b94f +size 9437184 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.bias.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.bias.0.bin new file mode 100644 index 0000000..43732f3 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed59e44938c8f312abc8a4170cd6549951a2c03a72b9cf66d91065636e086c35 +size 12288 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.weight.0.bin new file mode 100644 index 0000000..623262d --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.mlp.dense_h_to_4h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c6249375d6a73f511df02e42db4e575515a36998030c41007b852ea36460675 +size 9437184 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.bias.bin new file mode 100644 index 0000000..1cd1080 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60bf353c91d711ee3e66d54ae42948c63c014b3f55b655ce791b559a2deebc1c +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.weight.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.weight.bin new file mode 100644 index 0000000..aca0517 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.3.post_attention_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b46c9094b353c558b3f33921b93d490059d831f6f53936064e1a1810ce804868 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.attention.dense.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.attention.dense.bias.bin new file mode 100644 index 0000000..7e33acb --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.attention.dense.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4df14280a937099176da15dd35d6c0ec033e804568dbafc0e4d478cc46bf606c +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.attention.dense.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.attention.dense.weight.0.bin new file mode 100644 index 0000000..a885299 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.attention.dense.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:303c2535ff82c446017c17a22ce3396d31f904ad03fe8dad96a3624deb92d314 +size 2359296 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.bias.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.bias.0.bin new file mode 100644 index 0000000..d571d99 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea6c5b0c270d0829bee70acb8754937a350af1a58391da691846df9f7257ec2b +size 9216 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.weight.0.bin new file mode 100644 index 0000000..0cbb587 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.attention.query_key_value.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb35a15bf51a44fecb5e3a91fe92ce34d2d4b624cb8b8cfb66e758ec47326a45 +size 7077888 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.bias.bin new file mode 100644 index 0000000..b970644 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff06b36e63d7d4a99047b6e60dc1eb8ad3285b4c392f8f3b7d670a11e665c3b6 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.weight.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.weight.bin new file mode 100644 index 0000000..96d343f --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.input_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de2d76abb0655d8ac2637f5b21d67d1cb1732fc2461716a74b0104edc935d489 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.bias.bin new file mode 100644 index 0000000..b551ef2 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78da9207b62dd15e7d2a7311238cb467931f4aaaf8c302285308a92399d5eb44 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.weight.0.bin new file mode 100644 index 0000000..7ed3a12 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_4h_to_h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fad341cc8c6e3e13f9eca3589f06cd7ab65a44a910efe1788ca71a7b038339c +size 9437184 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.bias.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.bias.0.bin new file mode 100644 index 0000000..fe61a7a --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eafc33d1ac4361a999b10482dbd696c463d247f8b5d7072bc138af947819fcd +size 12288 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.weight.0.bin new file mode 100644 index 0000000..a09d906 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.mlp.dense_h_to_4h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cb67e4351c871670554e29d6435e6840168f92891fc57f567c5ed668b841b31 +size 9437184 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.bias.bin new file mode 100644 index 0000000..197f9d6 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2198459a24e2116aa42c3697ed193e35c3c5efd8fa0f8e73e20bf4055c5f2263 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.weight.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.weight.bin new file mode 100644 index 0000000..1dc9158 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.4.post_attention_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:768a5c3681076f607b3d8cf4fe4302f139da630759df9ea4d7e2354b3d955f25 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.attention.dense.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.attention.dense.bias.bin new file mode 100644 index 0000000..6d25d17 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.attention.dense.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aafc8226bb1a02ec0e7b6d8758798f68d92dfccad1ae33a70ba10f61531659f +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.attention.dense.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.attention.dense.weight.0.bin new file mode 100644 index 0000000..f0426fd --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.attention.dense.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2764a3bc9493eb3015fdb1fb800341b939509291caefcbd170f549d4553cdcb +size 2359296 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.attention.query_key_value.bias.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.attention.query_key_value.bias.0.bin new file mode 100644 index 0000000..096207b --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.attention.query_key_value.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae56c34e09700a3bba5a9f9867dd58df341165c1b29db89702a4cc4f06ef0ce9 +size 9216 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.attention.query_key_value.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.attention.query_key_value.weight.0.bin new file mode 100644 index 0000000..8439a90 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.attention.query_key_value.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c51fa1f7a59ab7f1aa4030c2561a5ac29e540ffadc4664c848e2e408351543cd +size 7077888 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.input_layernorm.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.input_layernorm.bias.bin new file mode 100644 index 0000000..7c10d14 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.input_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f54bf65ac4e921772d05fdbfc438c8d97afb02b342582cdc703b1cc890da3e54 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.input_layernorm.weight.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.input_layernorm.weight.bin new file mode 100644 index 0000000..1582b24 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.input_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:217aab7e0e0a9963f2be641b59aba8ed1cfff8b3074acd59866a74b524e8c474 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.mlp.dense_4h_to_h.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.mlp.dense_4h_to_h.bias.bin new file mode 100644 index 0000000..43033bc --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.mlp.dense_4h_to_h.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4ea893467874708c4db13b747b0befe3a8487240112170422b92e9ac5f35286 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.mlp.dense_4h_to_h.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.mlp.dense_4h_to_h.weight.0.bin new file mode 100644 index 0000000..bfa9cc4 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.mlp.dense_4h_to_h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94305ee6204bdb33fb75857cb8709068f30b4d2ca9be1265e44fb89f723dc996 +size 9437184 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.mlp.dense_h_to_4h.bias.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.mlp.dense_h_to_4h.bias.0.bin new file mode 100644 index 0000000..a793d1b --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.mlp.dense_h_to_4h.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ac52c892d6832a7181ea2dc21dbf97140619adb9e6277b6b1a04d092ac89828 +size 12288 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.mlp.dense_h_to_4h.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.mlp.dense_h_to_4h.weight.0.bin new file mode 100644 index 0000000..9506cfa --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.mlp.dense_h_to_4h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:997863f2b1e615af8539a8fe3896e223502190fb6e09c0f4922f4d7b67f3cda5 +size 9437184 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.post_attention_layernorm.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.post_attention_layernorm.bias.bin new file mode 100644 index 0000000..dea85ee --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.post_attention_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37df9fe8f4e9ef808d3774dbef3296633ac1da50bb0a4c57cf5c6f49417471ea +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.post_attention_layernorm.weight.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.post_attention_layernorm.weight.bin new file mode 100644 index 0000000..c0fbfea --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.5.post_attention_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e9b36d8f6ef212096e1330c60492b2e3dc1b2035b77d5668a78ca918de04cae +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.attention.dense.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.attention.dense.bias.bin new file mode 100644 index 0000000..4f8a16f --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.attention.dense.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:413987eda9e1774ea4c0810e25b6e0e9bc8758da0b13eb793af8abe7b89ba546 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.attention.dense.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.attention.dense.weight.0.bin new file mode 100644 index 0000000..f168525 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.attention.dense.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35ac648b97a81d2c79672c594712914620f1aaf261cf0674bcd9b348eb3c1ae2 +size 2359296 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.attention.query_key_value.bias.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.attention.query_key_value.bias.0.bin new file mode 100644 index 0000000..5e7ac28 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.attention.query_key_value.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:795ca8593dd5eb754ab013968dcd776ed8c7d08ef6bacdcb46233f372bf0507b +size 9216 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.attention.query_key_value.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.attention.query_key_value.weight.0.bin new file mode 100644 index 0000000..328095f --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.attention.query_key_value.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56c14c338a109edf5e82f6cfaa1ec2aa63f2ced4cf4b7ae6634935de5eb71401 +size 7077888 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.input_layernorm.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.input_layernorm.bias.bin new file mode 100644 index 0000000..16b5b4e --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.input_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5e918f85890dc1aaece6adca71db03336418680dfa63112df81062629b6d8c9 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.input_layernorm.weight.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.input_layernorm.weight.bin new file mode 100644 index 0000000..f169cc4 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.input_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18eb5b74a7fb03b4ab6f39b1476178b9b94821f48a4c8b70e27378b04fcfa070 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.mlp.dense_4h_to_h.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.mlp.dense_4h_to_h.bias.bin new file mode 100644 index 0000000..b475bab --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.mlp.dense_4h_to_h.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2c65960adf2cd5c52ed1dac3db4bb268ef6c878de5ec1d1ae703d3875cec068 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.mlp.dense_4h_to_h.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.mlp.dense_4h_to_h.weight.0.bin new file mode 100644 index 0000000..bbde47a --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.mlp.dense_4h_to_h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84e73200851b95cc9e27b2a396cda6cb0914c1b2304e31eafeaa269b52e479f9 +size 9437184 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.mlp.dense_h_to_4h.bias.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.mlp.dense_h_to_4h.bias.0.bin new file mode 100644 index 0000000..46f4bab --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.mlp.dense_h_to_4h.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46b2098eb0598a051ef9b34ae350b01a38199b056bf393eb3a0c29a37042fddb +size 12288 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.mlp.dense_h_to_4h.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.mlp.dense_h_to_4h.weight.0.bin new file mode 100644 index 0000000..074da08 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.mlp.dense_h_to_4h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bef34409817da80e18f1cca62673701f24e1a66784bdaf8b5411f265aaf39d5e +size 9437184 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.post_attention_layernorm.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.post_attention_layernorm.bias.bin new file mode 100644 index 0000000..8e89dbe --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.post_attention_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb80a857a8179e66a996502299a55b51ecfde543e89b8da462560fe4b8c392b2 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.post_attention_layernorm.weight.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.post_attention_layernorm.weight.bin new file mode 100644 index 0000000..f7266b6 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.6.post_attention_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de8d95e7ef46488423d8e407f78a75eba1af0e8e4b4016b8da69df0da4303fac +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.attention.dense.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.attention.dense.bias.bin new file mode 100644 index 0000000..d4b0241 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.attention.dense.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf99b215a519cb481254b153412b7fc67c6d785886bb7bd3afc930a4835c5c63 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.attention.dense.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.attention.dense.weight.0.bin new file mode 100644 index 0000000..3e5b52c --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.attention.dense.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc06b2d29b3faa8cb0ff66fcf473a3b30c59d4945de4b8aa475809e30bf37f35 +size 2359296 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.attention.query_key_value.bias.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.attention.query_key_value.bias.0.bin new file mode 100644 index 0000000..727026e --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.attention.query_key_value.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:446dde58856317e610b65609022fe762762f48033258931e7ee84b507322a221 +size 9216 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.attention.query_key_value.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.attention.query_key_value.weight.0.bin new file mode 100644 index 0000000..bc65c79 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.attention.query_key_value.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf15e97e7ce6ff48bc6f6121c14e8ed7d419e4bc228893402e9ae248c24625bc +size 7077888 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.input_layernorm.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.input_layernorm.bias.bin new file mode 100644 index 0000000..4702e04 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.input_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a60f3596d7783b279b94a53b061804e063500b617b7b7000dd19d1c0a770e27e +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.input_layernorm.weight.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.input_layernorm.weight.bin new file mode 100644 index 0000000..bbbfbdc --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.input_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2ff9a1c95b86f744de8d03d3a85f9a6a7c3f9d9f76571aeaef1d6d5625bc7ce +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.mlp.dense_4h_to_h.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.mlp.dense_4h_to_h.bias.bin new file mode 100644 index 0000000..f272377 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.mlp.dense_4h_to_h.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d30ec692831f0e1e47328ac53cb7d2d96997730be97b2e68ead09c056782da8 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.mlp.dense_4h_to_h.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.mlp.dense_4h_to_h.weight.0.bin new file mode 100644 index 0000000..0f7e73a --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.mlp.dense_4h_to_h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39dfd974035000c2465cd5e0802e6cece4472dba738af717ca118b77881fc267 +size 9437184 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.mlp.dense_h_to_4h.bias.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.mlp.dense_h_to_4h.bias.0.bin new file mode 100644 index 0000000..28bb0d2 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.mlp.dense_h_to_4h.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78a253cf4b1169c83d08ebce2556a25e4d8c1f1807d0b0f1ee2d39718921eaaa +size 12288 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.mlp.dense_h_to_4h.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.mlp.dense_h_to_4h.weight.0.bin new file mode 100644 index 0000000..18d3337 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.mlp.dense_h_to_4h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b9ee3287a10fc140fa1e8cd1be9202518078d74e9d09de7a49c2df899516acc +size 9437184 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.post_attention_layernorm.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.post_attention_layernorm.bias.bin new file mode 100644 index 0000000..178e075 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.post_attention_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a83cd1b65b0e965211e8dc5601256fd2957cdf6e6736f23b6cc96b60a4789f89 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.post_attention_layernorm.weight.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.post_attention_layernorm.weight.bin new file mode 100644 index 0000000..373e1f8 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.7.post_attention_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b17f348c41fdbf47606dcf58c837bc6cd0ec3aababc033f57611ad91c69c994 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.attention.dense.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.attention.dense.bias.bin new file mode 100644 index 0000000..9076e72 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.attention.dense.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cce237dbeed06f08e365b0cfd3a6f728be563ef0087e0cd8ccf33f268aa7a8d1 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.attention.dense.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.attention.dense.weight.0.bin new file mode 100644 index 0000000..541019f --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.attention.dense.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62f444f7e01873011cfd1ae14227da6fb1c6887e74f66cef3ef3aa436a524171 +size 2359296 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.attention.query_key_value.bias.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.attention.query_key_value.bias.0.bin new file mode 100644 index 0000000..ab088cb --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.attention.query_key_value.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b8873735c79a17779e8cd36673b42a39925e0cd7d97b50927f5a919324abee7 +size 9216 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.attention.query_key_value.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.attention.query_key_value.weight.0.bin new file mode 100644 index 0000000..ad7a066 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.attention.query_key_value.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2637ac9882d7a60516cd6f1c5d7064fa961ba7ecb6923e22b2584a13b50647c7 +size 7077888 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.input_layernorm.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.input_layernorm.bias.bin new file mode 100644 index 0000000..2e5c5d6 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.input_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8d895d096ee63e0b4bf729d8df15e860c628070e9dda827bb4f9875358adc86 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.input_layernorm.weight.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.input_layernorm.weight.bin new file mode 100644 index 0000000..6daa6bd --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.input_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e556fc076c508eca6375581e99ae52cb61f149d27150156fe73d29b13c0bd149 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.mlp.dense_4h_to_h.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.mlp.dense_4h_to_h.bias.bin new file mode 100644 index 0000000..7118f9d --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.mlp.dense_4h_to_h.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74b984f38ed58a14453a4521dc9bd925d0f14ca80998acd258d1e4650dddf67b +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.mlp.dense_4h_to_h.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.mlp.dense_4h_to_h.weight.0.bin new file mode 100644 index 0000000..60d7469 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.mlp.dense_4h_to_h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fce31e6019f9015b1c4fb698dfc066d6b730456b3fc9871c5c60ed8310ed62fd +size 9437184 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.mlp.dense_h_to_4h.bias.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.mlp.dense_h_to_4h.bias.0.bin new file mode 100644 index 0000000..5f29630 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.mlp.dense_h_to_4h.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:067ae798cdba7dc16c8640c8043754745d3e27aecdc2a5b8c373a8c0f1a5c609 +size 12288 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.mlp.dense_h_to_4h.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.mlp.dense_h_to_4h.weight.0.bin new file mode 100644 index 0000000..1e4703c --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.mlp.dense_h_to_4h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb45659e522a32451a7509f231585530f26973ba795cb3a608a612c94febde1a +size 9437184 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.post_attention_layernorm.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.post_attention_layernorm.bias.bin new file mode 100644 index 0000000..8304590 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.post_attention_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:673c0b5ff96f02f62abe1930acb3d3277849b628a3c72314185c9a968dacd721 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.post_attention_layernorm.weight.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.post_attention_layernorm.weight.bin new file mode 100644 index 0000000..5b84adb --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.8.post_attention_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85f6509db93c4bb53d4be022ea3ce066b71fc9aa05297709c6c38b57f1dc4f6b +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.attention.dense.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.attention.dense.bias.bin new file mode 100644 index 0000000..df5f5ea --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.attention.dense.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60ac4f60ccf50fa0f7e026284aa873e59b9aaec9e4a697f3081b5d3ebac4047d +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.attention.dense.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.attention.dense.weight.0.bin new file mode 100644 index 0000000..5209880 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.attention.dense.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a41cb1149d3247d25c6d04ce66864a481ee4478585e9e4bc6079a410e4ff7262 +size 2359296 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.attention.query_key_value.bias.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.attention.query_key_value.bias.0.bin new file mode 100644 index 0000000..31d1158 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.attention.query_key_value.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:828dc51bbbeac3525163c1967815e50d490951e533fc6f6f8e7e5ff0d87187eb +size 9216 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.attention.query_key_value.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.attention.query_key_value.weight.0.bin new file mode 100644 index 0000000..70e71d7 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.attention.query_key_value.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1f2a5750ae7221668ff2694d6c328a0edbd3bf617a28be56f849ad21f24d4b5 +size 7077888 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.input_layernorm.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.input_layernorm.bias.bin new file mode 100644 index 0000000..2f5a3e1 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.input_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c3127f553f223b43f566b77fea92c44e8bc8279550113b8c5d650db2c2c354b +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.input_layernorm.weight.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.input_layernorm.weight.bin new file mode 100644 index 0000000..22a83a9 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.input_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4c11c738308e73c57912f047bd77b0e6d1536b8b05148837c783fd89dc87079 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.mlp.dense_4h_to_h.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.mlp.dense_4h_to_h.bias.bin new file mode 100644 index 0000000..f837131 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.mlp.dense_4h_to_h.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbf06c186b8b5b4c234ae4935047ad59bdc3c4a7255d2b9aa8f54f2f842f8128 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.mlp.dense_4h_to_h.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.mlp.dense_4h_to_h.weight.0.bin new file mode 100644 index 0000000..272f6a5 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.mlp.dense_4h_to_h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86e15305ceac9db5846abcec4776105917a0bb600676954f0f6255e4a7658a66 +size 9437184 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.mlp.dense_h_to_4h.bias.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.mlp.dense_h_to_4h.bias.0.bin new file mode 100644 index 0000000..0b3658f --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.mlp.dense_h_to_4h.bias.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eea2b9650a9d96c201a6def28acbfc9853fb3240bb73d9cd63697e563f9e56ea +size 12288 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.mlp.dense_h_to_4h.weight.0.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.mlp.dense_h_to_4h.weight.0.bin new file mode 100644 index 0000000..3ef44cb --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.mlp.dense_h_to_4h.weight.0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff3fbe719c5b73808761e5c181264eedd91a02ed6002d7db5744e73d0df7fbfa +size 9437184 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.post_attention_layernorm.bias.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.post_attention_layernorm.bias.bin new file mode 100644 index 0000000..f455f02 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.post_attention_layernorm.bias.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ced59efa97925981b976a6aab664f26915307a0582d01466e71097590d85f10 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.post_attention_layernorm.weight.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.post_attention_layernorm.weight.bin new file mode 100644 index 0000000..4244ecb --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.layers.9.post_attention_layernorm.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea4fb1e87a8188caf70323627bfe1b3259340211c187a38bd722bc602335ed66 +size 3072 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.lm_head.weight.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.lm_head.weight.bin new file mode 100644 index 0000000..bcb6818 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.lm_head.weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:261562ff303321c74453e564203d5bdaec4fbc82f54501f2bd404625a3ee7dd9 +size 154533888 diff --git a/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.wte.bin b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.wte.bin new file mode 100644 index 0000000..b40f2e6 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/1/1-gpu/model.wte.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dad91439286d48b9a3c9e08f1d054c847271e00baec02e9f1dab30b2f48528e6 +size 154533888 diff --git a/converter/tests/gptneox/testdata/fastertransformer/config.pbtxt b/converter/tests/gptneox/testdata/fastertransformer/config.pbtxt new file mode 100644 index 0000000..2099c45 --- /dev/null +++ b/converter/tests/gptneox/testdata/fastertransformer/config.pbtxt @@ -0,0 +1,208 @@ +name: "fastertransformer" +backend: "fastertransformer" +default_model_filename: "gptneox" +max_batch_size: 1024 + +model_transaction_policy { + decoupled: False +} + +input [ + { + name: "input_ids" + data_type: TYPE_UINT32 + dims: [ -1 ] + }, + { + name: "start_id" + data_type: TYPE_UINT32 + dims: [ 1 ] + reshape: { shape: [ ] } + optional: true + }, + { + name: "end_id" + data_type: TYPE_UINT32 + dims: [ 1 ] + reshape: { shape: [ ] } + optional: true + }, + { + name: "input_lengths" + data_type: TYPE_UINT32 + dims: [ 1 ] + reshape: { shape: [ ] } + }, + { + name: "request_output_len" + data_type: TYPE_UINT32 + dims: [ -1 ] + }, + { + name: "runtime_top_k" + data_type: TYPE_UINT32 + dims: [ 1 ] + reshape: { shape: [ ] } + optional: true + }, + { + name: "runtime_top_p" + data_type: TYPE_FP32 + dims: [ 1 ] + reshape: { shape: [ ] } + optional: true + }, + { + name: "beam_search_diversity_rate" + data_type: TYPE_FP32 + dims: [ 1 ] + reshape: { shape: [ ] } + optional: true + }, + { + name: "temperature" + data_type: TYPE_FP32 + dims: [ 1 ] + reshape: { shape: [ ] } + optional: true + }, + { + name: "len_penalty" + data_type: TYPE_FP32 + dims: [ 1 ] + reshape: { shape: [ ] } + optional: true + }, + { + name: "repetition_penalty" + data_type: TYPE_FP32 + dims: [ 1 ] + reshape: { shape: [ ] } + optional: true + }, + { + name: "random_seed" + data_type: TYPE_UINT64 + dims: [ 1 ] + reshape: { shape: [ ] } + optional: true + }, + { + name: "is_return_log_probs" + data_type: TYPE_BOOL + dims: [ 1 ] + reshape: { shape: [ ] } + optional: true + }, + { + name: "beam_width" + data_type: TYPE_UINT32 + dims: [ 1 ] + reshape: { shape: [ ] } + optional: true + }, + { + name: "bad_words_list" + data_type: TYPE_INT32 + dims: [ 2, -1 ] + optional: true + }, + { + name: "stop_words_list" + data_type: TYPE_INT32 + dims: [ 2, -1 ] + optional: true + }, + { + name: "prompt_learning_task_name_ids" + data_type: TYPE_UINT32 + dims: [ 1 ] + reshape: { shape: [ ] } + optional: true + }, + { + name: "top_p_decay" + data_type: TYPE_FP32 + dims: [ 1 ] + reshape: { shape: [ ] } + optional: true + }, + { + name: "top_p_min" + data_type: TYPE_FP32 + dims: [ 1 ] + reshape: { shape: [ ] } + optional: true + }, + { + name: "top_p_reset_ids" + data_type: TYPE_UINT32 + dims: [ 1 ] + reshape: { shape: [ ] } + optional: true + } +] +output [ + { + name: "output_ids" + data_type: TYPE_UINT32 + dims: [ -1, -1 ] + }, + { + name: "sequence_length" + data_type: TYPE_UINT32 + dims: [ -1 ] + }, + { + name: "cum_log_probs" + data_type: TYPE_FP32 + dims: [ -1 ] + }, + { + name: "output_log_probs" + data_type: TYPE_FP32 + dims: [ -1, -1 ] + } +] +instance_group [ + { + count: 1 + kind: KIND_CPU + } +] +parameters { + key: "tensor_para_size" + value: { + string_value: "1" + } +} +parameters { + key: "pipeline_para_size" + value: { + string_value: "1" + } +} +parameters { + key: "data_type" + value: { + string_value: "fp32" + } +} +parameters { + key: "model_type" + value: { + string_value: "GPT-NeoX" + } +} +parameters { + key: "model_checkpoint_path" + value: { + string_value: "/model/fastertransformer/1/1-gpu" + } +} +parameters { + key: "enable_custom_all_reduce" + value: { + string_value: "0" + } +}