Add gptj converter (#19)

* Rename deployment-next to development * Add GPTJ converter
2023-03-27 11:12:52 +08:00 · 2023-03-27 11:12:52 +08:00 · 9d92821cf5
parent d796476013
commit 9d92821cf5
14 changed files with 186 additions and 5 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -1,7 +1,3 @@
-data
-deployment
-deployment-next
-
 **/.git
 **/node_modules
 **/__pycache__
--- a/2
+++ b/2
@ -28,4 +28,4 @@ RUN poetry export --without-hashes -o requirements.txt

 RUN --mount=type=cache,target=/root/.cache pip install -i $PYPI_INDEX_URL --extra-index-url https://pypi.org/simple --no-dependencies -r requirements.txt

-COPY . .
+COPY tabby ./tabby
--- a/deployment/scripts/triton.sh
+++ b/deployment/scripts/triton.sh
@ -1,6 +1,9 @@
 #!/bin/bash
 set -e

+if [ -d "$MODEL_NAME" ]; then
+MODEL_DIR="$MODEL_NAME"
+else
 # Get model dir.
 MODEL_DIR=$(python3 <<EOF
 from huggingface_hub import snapshot_download
@ -8,6 +11,7 @@ from huggingface_hub import snapshot_download
 print(snapshot_download(repo_id='$MODEL_NAME', allow_patterns='triton/**/*', local_files_only=True))
 EOF
 )
+fi

 # Set model dir in triton config.
 sed -i 's@${MODEL_DIR}@'$MODEL_DIR'@g' $MODEL_DIR/triton/fastertransformer/config.pbtxt
--- a/deployment-next/.gitignore
+++ b/deployment-next/.gitignore
--- a/deployment-next/Makefile
+++ b/deployment-next/Makefile
--- a/deployment-next/README.md
+++ b/deployment-next/README.md
--- a/deployment-next/config/repository.toml
+++ b/deployment-next/config/repository.toml
--- a/deployment-next/config/vector.toml
+++ b/deployment-next/config/vector.toml
--- a/deployment-next/docker-compose.dev.yml
+++ b/deployment-next/docker-compose.dev.yml
--- a/deployment-next/docker-compose.triton.yml
+++ b/deployment-next/docker-compose.triton.yml
--- a/deployment-next/docker-compose.yml
+++ b/deployment-next/docker-compose.yml
--- a/deployment-next/scripts/triton.sh
+++ b/deployment-next/scripts/triton.sh
--- a/tabby/tools/converter/huggingface_gptj_convert.py
+++ b/tabby/tools/converter/huggingface_gptj_convert.py
@ -0,0 +1,181 @@
+import configparser
+from argparse import ArgumentParser
+from os import makedirs
+from pathlib import Path
+
+import numpy as np
+import torch
+from transformers import PretrainedConfig
+
+torch.set_printoptions(linewidth=130, sci_mode=False)
+np.set_printoptions(linewidth=130, suppress=True)
+
+# This converter is used to convert the huggingface gpt-j-6B model
+# in https://huggingface.co/EleutherAI/gpt-j-6B/blob/main/pytorch_model.bin.
+
+
+def savebin(param, save_path):
+    if isinstance(param, torch.Tensor):
+        param = param.cpu().float().numpy()
+    np.squeeze(param).astype(np.float16).tofile(save_path + ".bin")
+
+
+def param2file(pt_param, layer_id, save_dir, dest_key):
+    base_n = save_dir + "/model.layers." + str(layer_id) + "."
+    save_path = base_n + dest_key
+    savebin(pt_param, save_path)
+
+
+def param2distributed(
+    pt_param,
+    layer_id,
+    save_dir,
+    dest_key,
+    n_inference_gpus,
+    split_axis,
+):
+    np_param = pt_param.cpu().float().numpy()
+    base_n = save_dir + "/model.layers." + str(layer_id) + "."
+    save_path = base_n + dest_key
+    split_param = np.split(np_param, n_inference_gpus, axis=split_axis)
+    for i, p in enumerate(split_param):
+        savebin(p, save_path + f".{i}")
+
+
+def save(w, save_dir, n_inference_gpus, n_layers, layer_id):
+    makedirs(save_dir, exist_ok=True)
+
+    savebin(w["transformer.wte.weight"], save_dir + "/model.wte")
+    l = layer_id
+    print(f"Saving layer {l + 1} / {n_layers}")
+    base_k = "transformer.h." + str(l) + "."
+    param2file(w[base_k + "ln_1.bias"], l, save_dir, "input_layernorm.bias")
+    param2file(w[base_k + "ln_1.weight"], l, save_dir, "input_layernorm.weight")
+    param2distributed(
+        w[base_k + "mlp.fc_in.weight"].T,
+        l,
+        save_dir,
+        "mlp.dense_h_to_4h.weight",
+        n_inference_gpus,
+        split_axis=-1,  # split fast indx
+    )
+    param2distributed(
+        w[base_k + "mlp.fc_in.bias"],
+        l,
+        save_dir,
+        "mlp.dense_h_to_4h.bias",
+        n_inference_gpus,
+        split_axis=-1,  # split fast indx
+    )
+
+    param2distributed(
+        w[base_k + "mlp.fc_out.weight"].T,
+        l,
+        save_dir,
+        "mlp.dense_4h_to_h.weight",
+        n_inference_gpus,
+        split_axis=0,  # split slow indx
+    )
+    param2file(w[base_k + "mlp.fc_out.bias"], l, save_dir, "mlp.dense_4h_to_h.bias")
+    param2distributed(
+        w[base_k + "attn.out_proj.weight"].T,
+        l,
+        save_dir,
+        "attention.dense.weight",
+        n_inference_gpus,
+        split_axis=0,  # split slow indx
+    )
+    QKV_w = torch.stack(
+        [
+            w[base_k + "attn.q_proj.weight"],
+            w[base_k + "attn.k_proj.weight"],
+            w[base_k + "attn.v_proj.weight"],
+        ]
+    )  # [qkv, n_heads * dim_head, latent_space]
+    QKV_w = QKV_w.permute(2, 0, 1)
+    param2distributed(
+        QKV_w,
+        l,
+        save_dir,
+        "attention.query_key_value.weight",
+        n_inference_gpus,
+        split_axis=-1,  # split fast indx
+    )
+    # Other unneeded per-layer params:
+    # attn.attention.masked_bias = torch.tensor(-1e9)
+    # attn.attention.bias = torch.tril(torch.ones(1, 1, 2048, 2048))
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser(
+        description="Convert GPT-J slim checkpoint to FasterTransformer",
+    )
+    parser.add_argument(
+        "--output-dir",
+        help="Folder where binary files are stored",
+    )
+    parser.add_argument(
+        "--ckpt-dir",
+        help="File of GPT-J huggingface checkpoint",
+    )
+    parser.add_argument(
+        "--n-inference-gpus",
+        help="Number of GPUs used for inference runtime",
+        default=1,
+        type=int,
+    )
+    args = parser.parse_args()
+
+    ckpt_file = args.ckpt_dir + "/pytorch_model.bin"
+    checkpoint = torch.load(ckpt_file)
+    print(f"loading from {ckpt_file}")
+
+    out_path = args.output_dir
+    output_dir = out_path + f"/{args.n_inference_gpus}-gpu/"
+    print(f"saving to {output_dir}")
+
+    config_file = args.ckpt_dir + "/config.json"
+    hf_config = PretrainedConfig.from_json_file(config_file).to_dict()
+
+    # NOTE: save parameters to config files (loaded by triton backends)
+    config = configparser.ConfigParser()
+    config["gptj"] = {}
+    try:
+        config["gptj"]["model_name"] = (
+            "gptj" if hf_config["_name_or_path"] == "" else hf_config["_name_or_path"]
+        )
+        config["gptj"]["head_num"] = str(hf_config["n_head"])
+        n_embd = hf_config["n_embd"]
+        config["gptj"]["size_per_head"] = str(n_embd // hf_config["n_head"])
+        config["gptj"]["inter_size"] = str(n_embd * 4)
+        config["gptj"]["num_layer"] = str(hf_config["n_layer"])
+        rotary_dim = (
+            n_embd // hf_config["n_head"]
+            if hf_config["rotary_dim"] is None
+            else hf_config["rotary_dim"]
+        )
+        config["gptj"]["rotary_embedding"] = str(hf_config["rotary_dim"])
+        config["gptj"]["vocab_size"] = str(hf_config["vocab_size"])
+        config["gptj"]["start_id"] = str(hf_config["bos_token_id"])
+        config["gptj"]["end_id"] = str(hf_config["eos_token_id"])
+        config["gptj"]["weight_data_type"] = "fp16"
+        Path(output_dir).mkdir(exist_ok=True, parents=True)
+        with open(output_dir + "/config.ini", "w") as configfile:
+            config.write(configfile)
+    except:
+        print(f"Fail to save the config in config.ini.")
+
+    n_layers = hf_config["n_layer"]
+    for i in range(n_layers):
+        save(checkpoint, output_dir, args.n_inference_gpus, n_layers, i)
+    savebin(
+        checkpoint["transformer.ln_f.weight"],
+        output_dir + "/model.final_layernorm.weight",
+    )
+    savebin(
+        checkpoint["transformer.ln_f.bias"], output_dir + "/model.final_layernorm.bias"
+    )
+    savebin(checkpoint["lm_head.weight"], output_dir + "/model.lm_head.weight")
+    savebin(checkpoint["lm_head.bias"], output_dir + "/model.lm_head.bias")
+
+    print("done")
--- a/tabby/tools/converter/huggingface_gptneox_convert.py
+++ b/tabby/tools/converter/huggingface_gptneox_convert.py