From 0f72788d82dcd5694dbb532155e2d5285008e037 Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Mon, 12 Jun 2023 16:14:12 -0700
Subject: [PATCH] refactor: remove unused code

---
 tabby/tools/analytic/main.sh                  |  74 -----
 tabby/tools/build_dataset/__main__.py         | 126 ---------
 tabby/tools/build_dataset/args.py             |  14 -
 tabby/tools/build_dataset/metrics.py          |  27 --
 ...gramming-languages-to-file-extensions.json |  28 --
 .../converter/huggingface_gptj_convert.py     | 181 -------------
 .../converter/huggingface_gptneox_convert.py  | 256 ------------------
 tabby/tools/download_models.py                |  47 ----
 tabby/tools/repository/Makefile               |  11 -
 tabby/tools/repository/testdata/config.toml   |   4 -
 .../testdata/repositories/.gitignore          |   1 -
 .../repository/testdata/repositories/.gitkeep |   0
 tabby/tools/repository/updater.py             |  43 ---
 13 files changed, 812 deletions(-)
 delete mode 100755 tabby/tools/analytic/main.sh
 delete mode 100644 tabby/tools/build_dataset/__main__.py
 delete mode 100644 tabby/tools/build_dataset/args.py
 delete mode 100644 tabby/tools/build_dataset/metrics.py
 delete mode 100644 tabby/tools/build_dataset/programming-languages-to-file-extensions.json
 delete mode 100644 tabby/tools/converter/huggingface_gptj_convert.py
 delete mode 100644 tabby/tools/converter/huggingface_gptneox_convert.py
 delete mode 100644 tabby/tools/download_models.py
 delete mode 100644 tabby/tools/repository/Makefile
 delete mode 100644 tabby/tools/repository/testdata/config.toml
 delete mode 100644 tabby/tools/repository/testdata/repositories/.gitignore
 delete mode 100644 tabby/tools/repository/testdata/repositories/.gitkeep
 delete mode 100644 tabby/tools/repository/updater.py

diff --git a/tabby/tools/analytic/main.sh b/tabby/tools/analytic/main.sh
deleted file mode 100755
index bb6bdd1..0000000
--- a/tabby/tools/analytic/main.sh
+++ /dev/null
@@ -1,74 +0,0 @@
-#!/bin/bash
-set -e
-
-DB_FILE=${DB_FILE:-"/data/logs/duckdb/duck.db"}
-LOGS_DIR=${LOGS_DIR:-"/data/logs"}
-TABBY_SERVER_LOGS="${LOGS_DIR}/events/tabby-server/*.json"
-
-# Init schema
-function init_scheme() {
-mkdir -p $(dirname $DB_FILE)
-cat <<EOF | duckdb
-CREATE TABLE IF NOT EXISTS completion_events (
-  id STRING,
-  created uint64,
-  prompt STRING,
-  choices STRUCT(index UINT64, text STRING)[],
-  view BOOLEAN,
-  "select" BOOLEAN
-);
-CREATE UNIQUE INDEX IF NOT EXISTS completion_events_id ON completion_events (id);
-EOF
-}
-
-# Update table
-function collect_tabby_server_logs() {
-# Executing collect job only when files exists.
-if compgen -G "${TABBY_SERVER_LOGS}" > /dev/null; then
-
-cat <<EOF | duckdb
-CREATE TEMP TABLE events AS
-SELECT data.* FROM '${TABBY_SERVER_LOGS}';
-
-CREATE TEMP TABLE t AS
-SELECT id, created, prompt, choices, IFNULL(rhs.view, false) AS view, IFNULL(rhs.select, false) AS select
-FROM
-  (
-    SELECT
-      id,
-      FIRST(created) AS created,
-      FIRST(prompt) AS prompt,
-      FIRST(choices) AS choices
-    FROM events WHERE id IS NOT NULL GROUP BY 1) lhs
-LEFT JOIN (
-    SELECT
-      completion_id,
-      (SUM(IF(type == 'view', 1, 0)) > 0) AS view,
-      (SUM(IF(type == 'select', 1, 0)) > 0) AS select
-    FROM events
-    WHERE completion_id IS NOT NULL
-    GROUP BY 1
-) rhs ON (lhs.id = rhs.completion_id);
-
-INSERT INTO completion_events SELECT t.* FROM t LEFT JOIN completion_events rhs ON (t.id = rhs.id) WHERE rhs.id IS NULL;
-EOF
-
-else
-  echo "No files match ${TABBY_SERVER_LOGS}"
-fi
-}
-
-function duckdb() {
-  local SQL=$(tee)
-  cat << EOF | python3 -
-import sys
-import duckdb
-conn = duckdb.connect('$DB_FILE')
-print(conn.sql("""
-$SQL
-"""))
-EOF
-}
-
-init_scheme
-"$@"
diff --git a/tabby/tools/build_dataset/__main__.py b/tabby/tools/build_dataset/__main__.py
deleted file mode 100644
index e76cf5f..0000000
--- a/tabby/tools/build_dataset/__main__.py
+++ /dev/null
@@ -1,126 +0,0 @@
-import base64
-import glob
-import json
-import os
-
-import pandas as pd
-from datasets import Dataset
-from transformers import HfArgumentParser
-
-from . import metrics
-from .args import PreprocessProjectArgs
-
-
-def parse_args():
-    parser = HfArgumentParser(PreprocessProjectArgs)
-    return parser.parse_args()
-
-
-def read_languages_to_file_extensions():
-    path = os.path.abspath(os.path.dirname(__file__))
-    path = os.path.join(path, "programming-languages-to-file-extensions.json")
-    with open(path) as f:
-        return json.load(f)
-
-
-def read_valid_extensions():
-    content = read_languages_to_file_extensions()
-    extensions = []
-    for k, exts in content.items():
-        extensions += exts
-    return set(extensions)
-
-
-def read_extension_to_language_mappings():
-    content = read_languages_to_file_extensions()
-    mappings = dict()
-    for k, exts in content.items():
-        for x in exts:
-            mappings[x] = k
-    return mappings
-
-
-def dataset_iter(project_dir, files):
-    def gen():
-        mappings = read_extension_to_language_mappings()
-        for x in files:
-            _, extname = os.path.splitext(x)
-
-            with open(x) as f:
-                try:
-                    content = f.read()
-                except UnicodeDecodeError:
-                    print("Cannot decode unicode", x)
-                    continue
-
-            segments = x.removeprefix(project_dir).split(os.sep)
-            project = segments[1]
-            file = os.path.join(*segments[2:])
-            yield dict(
-                id=to_id(project, file),
-                project=project,
-                file=file,
-                language=mappings[extname],
-                content=content,
-                **metrics.compute(content),
-            )
-
-    return gen
-
-
-def count_by_language(dataset):
-    key = "language"
-    df = (
-        pd.DataFrame(dataset[key], columns=[key])
-        .groupby([key])
-        .size()
-        .to_frame("count")
-    )
-    return df
-
-
-def to_id(*args):
-    token = ":".join(args)
-    return base64.urlsafe_b64encode(token.encode("utf-8")).decode("utf-8").rstrip("=")
-
-
-def basic_filters(line_max=100, line_mean=100, alpha_frac=0.25):
-    def fn(example):
-        """Filter files based on line length and % alphanumeric characters"""
-        if example["max_line_length"] > line_max:
-            return False
-        elif example["avg_line_length"] > line_mean:
-            return False
-        elif example["alphanum_fraction"] < alpha_frac:
-            return False
-        return True
-
-    return fn
-
-
-if __name__ == "__main__":
-    valid_extensions = read_valid_extensions()
-
-    def is_valid_file(x):
-        if not os.path.isfile(x):
-            return False
-
-        _, extname = os.path.splitext(x)
-        if not extname in valid_extensions:
-            return False
-
-        return True
-
-    args = parse_args()
-    files = list(
-        filter(is_valid_file, glob.glob(args.project_dir + "/**/*", recursive=True))
-    )
-
-    ds = Dataset.from_generator(dataset_iter(os.path.abspath(args.project_dir), files))
-    ds = ds.filter(basic_filters())
-    ds.save_to_disk(args.output_dir)
-    ds.to_json(os.path.join(args.output_dir, "dumps.json"))
-
-    print("\n## Summary")
-    print("Number of source files", len(ds))
-    print("Number of source files by languages", count_by_language(ds).to_json())
diff --git a/tabby/tools/build_dataset/args.py b/tabby/tools/build_dataset/args.py
deleted file mode 100644
index 99c4af3..0000000
--- a/tabby/tools/build_dataset/args.py
+++ /dev/null
@@ -1,14 +0,0 @@
-from dataclasses import dataclass, field
-from typing import Optional
-
-
-@dataclass
-class PreprocessProjectArgs:
-    # add arguments in the following format
-    project_dir: Optional[str] = field(
-        metadata={"help": "Project directory."},
-    )
-
-    output_dir: Optional[str] = field(
-        metadata={"help": "Output save path directory."},
-    )
diff --git a/tabby/tools/build_dataset/metrics.py b/tabby/tools/build_dataset/metrics.py
deleted file mode 100644
index 8fd82ab..0000000
--- a/tabby/tools/build_dataset/metrics.py
+++ /dev/null
@@ -1,27 +0,0 @@
-def max_line_length(content):
-    return max([0] + [len(x) for x in content.splitlines()])
-
-
-def avg_line_length(content):
-    lines = [len(x) for x in content.splitlines()]
-    total = sum(lines)
-    if len(lines) != 0:
-        return total / len(lines)
-    else:
-        return 0
-
-
-def alphanum_fraction(content):
-    alphanum = [x for x in content if x.isalpha() or x.isnumeric()]
-    if len(content) != 0:
-        return len(alphanum) / len(content)
-    else:
-        return 0
-
-
-def compute(content):
-    return dict(
-        max_line_length=max_line_length(content),
-        avg_line_length=avg_line_length(content),
-        alphanum_fraction=alphanum_fraction(content),
-    )
diff --git a/tabby/tools/build_dataset/programming-languages-to-file-extensions.json b/tabby/tools/build_dataset/programming-languages-to-file-extensions.json
deleted file mode 100644
index 721615f..0000000
--- a/tabby/tools/build_dataset/programming-languages-to-file-extensions.json
+++ /dev/null
@@ -1,28 +0,0 @@
-{
-  "c": [".c", ".h"],
-  "csharp": [".cs"],
-  "cpp": [".cpp", ".hpp", ".c++", ".h++", ".cc", ".hh", ".C", ".H"],
-  "css": [".css"],
-  "dockerfile": ["Dockerfile"],
-  "go": [".go"],
-  "haskell": [".hs"],
-  "html": [".html"],
-  "java": [".java"],
-  "javascript": [".js"],
-  "julia": [".jl"],
-  "lua": [".lua"],
-  "makefile": ["Makefile"],
-  "markdown": [".md", ".markdown"],
-  "php": [".php", ".php3", ".php4", ".php5", ".phps", ".phpt"],
-  "perl": [".pl", ".pm", ".pod", ".perl"],
-  "powershell": [".ps1", ".psd1", ".psm1"],
-  "python": [".py"],
-  "ruby": [".rb"],
-  "rust": [".rs"],
-  "sql": [".sql"],
-  "scala": [".scala"],
-  "shellscript": [".sh", ".bash", ".command", ".zsh"],
-  "typescript": [".ts", ".tsx"],
-  "tex": [".tex"],
-  "vb": [".vb"]
-}
diff --git a/tabby/tools/converter/huggingface_gptj_convert.py b/tabby/tools/converter/huggingface_gptj_convert.py
deleted file mode 100644
index ddd568e..0000000
--- a/tabby/tools/converter/huggingface_gptj_convert.py
+++ /dev/null
@@ -1,181 +0,0 @@
-import configparser
-from argparse import ArgumentParser
-from os import makedirs
-from pathlib import Path
-
-import numpy as np
-import torch
-from transformers import PretrainedConfig
-
-torch.set_printoptions(linewidth=130, sci_mode=False)
-np.set_printoptions(linewidth=130, suppress=True)
-
-# This converter is used to convert the huggingface gpt-j-6B model
-# in https://huggingface.co/EleutherAI/gpt-j-6B/blob/main/pytorch_model.bin.
-
-
-def savebin(param, save_path):
-    if isinstance(param, torch.Tensor):
-        param = param.cpu().float().numpy()
-    np.squeeze(param).astype(np.float16).tofile(save_path + ".bin")
-
-
-def param2file(pt_param, layer_id, save_dir, dest_key):
-    base_n = save_dir + "/model.layers." + str(layer_id) + "."
-    save_path = base_n + dest_key
-    savebin(pt_param, save_path)
-
-
-def param2distributed(
-    pt_param,
-    layer_id,
-    save_dir,
-    dest_key,
-    n_inference_gpus,
-    split_axis,
-):
-    np_param = pt_param.cpu().float().numpy()
-    base_n = save_dir + "/model.layers." + str(layer_id) + "."
-    save_path = base_n + dest_key
-    split_param = np.split(np_param, n_inference_gpus, axis=split_axis)
-    for i, p in enumerate(split_param):
-        savebin(p, save_path + f".{i}")
-
-
-def save(w, save_dir, n_inference_gpus, n_layers, layer_id):
-    makedirs(save_dir, exist_ok=True)
-
-    savebin(w["transformer.wte.weight"], save_dir + "/model.wte")
-    l = layer_id
-    print(f"Saving layer {l + 1} / {n_layers}")
-    base_k = "transformer.h." + str(l) + "."
-    param2file(w[base_k + "ln_1.bias"], l, save_dir, "input_layernorm.bias")
-    param2file(w[base_k + "ln_1.weight"], l, save_dir, "input_layernorm.weight")
-    param2distributed(
-        w[base_k + "mlp.fc_in.weight"].T,
-        l,
-        save_dir,
-        "mlp.dense_h_to_4h.weight",
-        n_inference_gpus,
-        split_axis=-1,  # split fast indx
-    )
-    param2distributed(
-        w[base_k + "mlp.fc_in.bias"],
-        l,
-        save_dir,
-        "mlp.dense_h_to_4h.bias",
-        n_inference_gpus,
-        split_axis=-1,  # split fast indx
-    )
-
-    param2distributed(
-        w[base_k + "mlp.fc_out.weight"].T,
-        l,
-        save_dir,
-        "mlp.dense_4h_to_h.weight",
-        n_inference_gpus,
-        split_axis=0,  # split slow indx
-    )
-    param2file(w[base_k + "mlp.fc_out.bias"], l, save_dir, "mlp.dense_4h_to_h.bias")
-    param2distributed(
-        w[base_k + "attn.out_proj.weight"].T,
-        l,
-        save_dir,
-        "attention.dense.weight",
-        n_inference_gpus,
-        split_axis=0,  # split slow indx
-    )
-    QKV_w = torch.stack(
-        [
-            w[base_k + "attn.q_proj.weight"],
-            w[base_k + "attn.k_proj.weight"],
-            w[base_k + "attn.v_proj.weight"],
-        ]
-    )  # [qkv, n_heads * dim_head, latent_space]
-    QKV_w = QKV_w.permute(2, 0, 1)
-    param2distributed(
-        QKV_w,
-        l,
-        save_dir,
-        "attention.query_key_value.weight",
-        n_inference_gpus,
-        split_axis=-1,  # split fast indx
-    )
-    # Other unneeded per-layer params:
-    # attn.attention.masked_bias = torch.tensor(-1e9)
-    # attn.attention.bias = torch.tril(torch.ones(1, 1, 2048, 2048))
-
-
-if __name__ == "__main__":
-    parser = ArgumentParser(
-        description="Convert GPT-J slim checkpoint to FasterTransformer",
-    )
-    parser.add_argument(
-        "--output-dir",
-        help="Folder where binary files are stored",
-    )
-    parser.add_argument(
-        "--ckpt-dir",
-        help="File of GPT-J huggingface checkpoint",
-    )
-    parser.add_argument(
-        "--n-inference-gpus",
-        help="Number of GPUs used for inference runtime",
-        default=1,
-        type=int,
-    )
-    args = parser.parse_args()
-
-    ckpt_file = args.ckpt_dir + "/pytorch_model.bin"
-    checkpoint = torch.load(ckpt_file)
-    print(f"loading from {ckpt_file}")
-
-    out_path = args.output_dir
-    output_dir = out_path + f"/{args.n_inference_gpus}-gpu/"
-    print(f"saving to {output_dir}")
-
-    config_file = args.ckpt_dir + "/config.json"
-    hf_config = PretrainedConfig.from_json_file(config_file).to_dict()
-
-    # NOTE: save parameters to config files (loaded by triton backends)
-    config = configparser.ConfigParser()
-    config["gptj"] = {}
-    try:
-        config["gptj"]["model_name"] = (
-            "gptj" if hf_config["_name_or_path"] == "" else hf_config["_name_or_path"]
-        )
-        config["gptj"]["head_num"] = str(hf_config["n_head"])
-        n_embd = hf_config["n_embd"]
-        config["gptj"]["size_per_head"] = str(n_embd // hf_config["n_head"])
-        config["gptj"]["inter_size"] = str(n_embd * 4)
-        config["gptj"]["num_layer"] = str(hf_config["n_layer"])
-        rotary_dim = (
-            n_embd // hf_config["n_head"]
-            if hf_config["rotary_dim"] is None
-            else hf_config["rotary_dim"]
-        )
-        config["gptj"]["rotary_embedding"] = str(hf_config["rotary_dim"])
-        config["gptj"]["vocab_size"] = str(hf_config["vocab_size"])
-        config["gptj"]["start_id"] = str(hf_config["bos_token_id"])
-        config["gptj"]["end_id"] = str(hf_config["eos_token_id"])
-        config["gptj"]["weight_data_type"] = "fp16"
-        Path(output_dir).mkdir(exist_ok=True, parents=True)
-        with open(output_dir + "/config.ini", "w") as configfile:
-            config.write(configfile)
-    except:
-        print(f"Fail to save the config in config.ini.")
-
-    n_layers = hf_config["n_layer"]
-    for i in range(n_layers):
-        save(checkpoint, output_dir, args.n_inference_gpus, n_layers, i)
-    savebin(
-        checkpoint["transformer.ln_f.weight"],
-        output_dir + "/model.final_layernorm.weight",
-    )
-    savebin(
-        checkpoint["transformer.ln_f.bias"], output_dir + "/model.final_layernorm.bias"
-    )
-    savebin(checkpoint["lm_head.weight"], output_dir + "/model.lm_head.weight")
-    savebin(checkpoint["lm_head.bias"], output_dir + "/model.lm_head.bias")
-
-    print("done")
diff --git a/tabby/tools/converter/huggingface_gptneox_convert.py b/tabby/tools/converter/huggingface_gptneox_convert.py
deleted file mode 100644
index 636b669..0000000
--- a/tabby/tools/converter/huggingface_gptneox_convert.py
+++ /dev/null
@@ -1,256 +0,0 @@
-import argparse
-import configparser
-import multiprocessing
-import os
-from pathlib import Path
-
-import numpy as np
-import torch
-from transformers import GPTNeoXForCausalLM
-
-
-def get_weight_data_type(data_type):
-    if data_type == "fp32":
-        return np.float32
-    elif data_type == "fp16":
-        return np.float16
-    else:
-        assert False, f"Invalid weight data type {data_type}"
-
-
-def split_and_convert_process(saved_dir, factor, key, args, config, val):
-
-    if (
-        key.find("input_layernorm.weight") != -1
-        or key.find("input_layernorm.bias") != -1
-        or key.find("post_attention_layernorm.weight") != -1
-        or key.find("post_attention_layernorm.bias") != -1
-        or key.find("final_layernorm.weight") != -1
-        or key.find("final_layernorm.bias") != -1
-    ):
-        saved_path = saved_dir + f"/model.{key}.bin"
-        val.tofile(saved_path)
-
-    elif (
-        key.find("attention.dense.bias") != -1
-        or key.find("mlp.dense_4h_to_h.bias") != -1
-    ):
-        saved_path = saved_dir + f"/model.{key}.bin"
-        val = (val / factor) if factor > 1 else val
-        val.tofile(saved_path)
-
-    else:
-        if (
-            key.find("attention.dense.weight") != -1
-            or key.find("mlp.dense_4h_to_h.weight") != -1
-        ):
-            split_vals = np.split(val, factor, axis=0)
-
-        elif (
-            key.find("mlp.dense_h_to_4h.weight") != -1
-            or key.find("mlp.dense_h_to_4h.bias") != -1
-        ):
-            split_vals = np.split(val, factor, axis=-1)
-
-        elif key.find("attention.query_key_value.bias") != -1:
-            local_dim = (int)(val.shape[-1] / 3)
-            n_head = config["num_attention_heads"]
-
-            val = val.reshape(n_head, 3, local_dim // n_head)
-            val = np.transpose(val, [1, 0, 2]).reshape(3, local_dim)
-            split_vals = np.split(val, factor, axis=-1)
-
-        elif key.find("attention.query_key_value.weight") != -1:
-            hidden_dim = val.shape[0]
-            local_dim = (int)(val.shape[-1] / 3)
-            n_head = config["num_attention_heads"]
-            # Note that the HF qkv weight are stored as [hidden_size, num_heads, 3, head_hidden]
-            # FT needs the shape of [hidden_size, 3, num_heads, head_hidden]
-            val = val.reshape(hidden_dim, n_head, 3, local_dim // n_head)
-            val = np.transpose(val, [0, 2, 1, 3]).reshape(hidden_dim, 3, local_dim)
-
-            # print(np.mean(np.abs(val[:, 0, :])))
-            split_vals = np.split(val, factor, axis=-1)
-
-        else:
-            print("[ERROR] cannot find key '{}'".format(key))
-            return
-
-        for j in range(factor):
-            saved_path = saved_dir + f"/model.{key}.{j}.bin"
-            split_vals[j].tofile(saved_path)
-
-
-def split_and_convert(args):
-    saved_dir = args.saved_dir + "/%d-gpu/" % args.infer_gpu_num
-
-    if os.path.exists(saved_dir) == False:
-        os.makedirs(saved_dir)
-
-    factor = args.infer_gpu_num
-
-    # load position_embedding from rank 0
-    # model = torch.load(ckpt_name)
-    model = GPTNeoXForCausalLM.from_pretrained(args.in_file)
-    hf_config = vars(model.config)
-
-    np_weight_data_type = get_weight_data_type(args.weight_data_type)
-
-    try:
-        model_name = args.model_name
-        n_heads = hf_config["num_attention_heads"]
-        head_size = hf_config["hidden_size"] // n_heads
-        rotary_dim = int(head_size * hf_config["rotary_pct"])
-        use_gptj_residual = int(hf_config["use_parallel_residual"])
-
-        config = configparser.ConfigParser()
-        config["gptneox"] = {}
-        config["gptneox"]["model_name"] = model_name
-        config["gptneox"]["head_num"] = str(n_heads)
-        config["gptneox"]["size_per_head"] = str(head_size)
-        config["gptneox"]["inter_size"] = str(hf_config["intermediate_size"])
-        config["gptneox"]["num_layer"] = str(hf_config["num_hidden_layers"])
-        config["gptneox"]["rotary_embedding"] = str(rotary_dim)
-        config["gptneox"]["vocab_size"] = str(hf_config["vocab_size"])
-        config["gptneox"]["start_id"] = str(hf_config["bos_token_id"])
-        config["gptneox"]["end_id"] = str(hf_config["eos_token_id"])
-        config["gptneox"]["use_gptj_residual"] = str(use_gptj_residual)
-        config["gptneox"]["weight_data_type"] = args.weight_data_type
-
-        with open((Path(saved_dir) / f"config.ini").as_posix(), "w") as configfile:
-            config.write(configfile)
-    except Exception as e:
-        print(f"Fail to save the config in config.ini.", e)
-
-    ft_model_name_pattern = [
-        "input_layernorm.bias",
-        "input_layernorm.weight",
-        "attention.query_key_value.bias",
-        "attention.query_key_value.weight",
-        "attention.dense.bias",
-        "attention.dense.weight",
-        "post_attention_layernorm.bias",
-        "post_attention_layernorm.weight",
-        "mlp.dense_h_to_4h.bias",
-        "mlp.dense_h_to_4h.weight",
-        "mlp.dense_4h_to_h.bias",
-        "mlp.dense_4h_to_h.weight",
-    ]
-
-    huggingface_model_file_list = [
-        hf_file_name
-        for hf_file_name in os.listdir(args.in_file)
-        if hf_file_name.endswith(".bin")
-    ]
-    if len(huggingface_model_file_list) > 1:
-        multiprocessing_context = multiprocessing.get_context()
-        pool_fn = multiprocessing_context.Pool
-    else:
-        torch.multiprocessing.set_start_method("spawn")
-        pool_fn = multiprocessing.Pool
-
-    pool = pool_fn(args.processes)
-
-    for name, param in model.named_parameters():
-        array = param.detach().cpu().numpy().astype(np_weight_data_type)
-        # print("input shape", name, array.shape)
-        if name.find("weight") == -1 and name.find("bias") == -1:
-            print("skipped", name)
-            continue
-        elif name == "gpt_neox.embed_in.weight":
-            array.tofile(saved_dir + "model.wte.bin")
-        elif name == "gpt_neox.final_layer_norm.bias":
-            array.tofile(saved_dir + "model.final_layernorm.bias.bin")
-        elif name == "gpt_neox.final_layer_norm.weight":
-            array.tofile(saved_dir + "model.final_layernorm.weight.bin")
-        elif name == "embed_out.weight":
-            array.tofile(saved_dir + "model.lm_head.weight.bin")
-        else:
-            processed = False
-            for i in range(len(ft_model_name_pattern)):
-                if name.find(ft_model_name_pattern[i]) != -1:
-                    new_name = name.replace("gpt_neox.", "")
-                    pool.starmap(
-                        split_and_convert_process,
-                        [
-                            (
-                                saved_dir,
-                                factor,
-                                new_name,
-                                args,
-                                vars(model.config),
-                                array.T,
-                            )
-                        ],
-                    )
-                    processed = True
-                    break
-
-            if not processed:
-                print("Unused layer", name)
-
-    pool.close()
-    pool.join()
-
-    # Post-process biases if use_gptj_residual is True
-    if use_gptj_residual:
-        for layer_idx in range(hf_config["num_hidden_layers"]):
-            attn_bias = np.fromfile(
-                saved_dir + f"/model.layers.{layer_idx}.attention.dense.bias.bin",
-                dtype=np_weight_data_type,
-            )
-            mlp_bias = np.fromfile(
-                saved_dir + f"/model.layers.{layer_idx}.mlp.dense_4h_to_h.bias.bin",
-                dtype=np_weight_data_type,
-            )
-
-            (attn_bias + mlp_bias).astype(np_weight_data_type).tofile(
-                saved_dir + f"/model.layers.{layer_idx}.mlp.attention.bias.sum.bin"
-            )
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
-    parser.add_argument(
-        "-saved_dir", "-o", type=str, help="file name of output file", required=True
-    )
-    parser.add_argument(
-        "-in_file",
-        "-i",
-        type=str,
-        help="file name of input checkpoint file",
-        required=True,
-    )
-    parser.add_argument(
-        "-infer_gpu_num",
-        "-i_g",
-        type=int,
-        help="How many gpus for inference",
-        required=True,
-    )
-    parser.add_argument(
-        "-processes",
-        "-p",
-        type=int,
-        help="How many processes to spawn for conversion (default: 4)",
-        default=4,
-    )
-    parser.add_argument(
-        "-weight_data_type", type=str, default="fp32", choices=["fp32", "fp16"]
-    )
-    parser.add_argument(
-        "-model_name", "-m_n", type=str, help="model name", required=True
-    )
-
-    args = parser.parse_args()
-    print("\n=============== Argument ===============")
-    for key in vars(args):
-        print("{}: {}".format(key, vars(args)[key]))
-    print("========================================")
-
-    target_dir_path = os.path.join(args.saved_dir, "%d-gpu" % args.infer_gpu_num)
-    assert not os.path.exists(target_dir_path), (
-        "target path has exist, please remove %s first." % target_dir_path
-    )
-
-    split_and_convert(args)
diff --git a/tabby/tools/download_models.py b/tabby/tools/download_models.py
deleted file mode 100644
index 26a065b..0000000
--- a/tabby/tools/download_models.py
+++ /dev/null
@@ -1,47 +0,0 @@
-from dataclasses import dataclass, field
-
-from huggingface_hub import snapshot_download
-from transformers import AutoModelForCausalLM, AutoTokenizer, HfArgumentParser
-
-
-@dataclass
-class Arguments:
-    repo_id: str = field(
-        metadata={"help": "Huggingface model repository id, e.g TabbyML/NeoX-160M"}
-    )
-    prefer_local_files: bool = field(
-        metadata={
-            "help": "Whether prefer loading local files (skip remote version check if local files are valid)."
-        },
-        default=True,
-    )
-
-
-def parse_args():
-    parser = HfArgumentParser(Arguments)
-    return parser.parse_args()
-
-
-def preload(local_files_only=False):
-    AutoTokenizer.from_pretrained(args.repo_id, local_files_only=local_files_only)
-    AutoModelForCausalLM.from_pretrained(
-        args.repo_id, local_files_only=local_files_only
-    )
-    snapshot_download(
-        repo_id=args.repo_id,
-        allow_patterns="triton/**/*",
-        local_files_only=local_files_only,
-    )
-
-
-if __name__ == "__main__":
-    args = parse_args()
-    print(f"Loading {args.repo_id}, this will take a while...")
-    try:
-        preload(local_files_only=args.prefer_local_files)
-    except Exception as e:
-        if "offline" in str(e) or "local_files_only" in str(e):
-            preload(local_files_only=False)
-        else:
-            raise e
-    print(f"Loaded {args.repo_id} !")
diff --git a/tabby/tools/repository/Makefile b/tabby/tools/repository/Makefile
deleted file mode 100644
index 94d910e..0000000
--- a/tabby/tools/repository/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-all:
-
-CMD := poetry run python updater.py --data_dir=./testdata/repositories --config_file=testdata/config.toml
-
-test:
-	# Cleanup
-	rm -rf testdata/repositories/quickjs
-	# Initialize
-	$(CMD)
-	# Updating
-	$(CMD)
diff --git a/tabby/tools/repository/testdata/config.toml b/tabby/tools/repository/testdata/config.toml
deleted file mode 100644
index dc681fe..0000000
--- a/tabby/tools/repository/testdata/config.toml
+++ /dev/null
@@ -1,4 +0,0 @@
-data_dir = "/repositories"
-
-[repositories.quickjs]
-url = "https://gitee.com/vsf-linux/quickjs.git"
diff --git a/tabby/tools/repository/testdata/repositories/.gitignore b/tabby/tools/repository/testdata/repositories/.gitignore
deleted file mode 100644
index 0d17186..0000000
--- a/tabby/tools/repository/testdata/repositories/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-quickjs
diff --git a/tabby/tools/repository/testdata/repositories/.gitkeep b/tabby/tools/repository/testdata/repositories/.gitkeep
deleted file mode 100644
index e69de29..0000000
diff --git a/tabby/tools/repository/updater.py b/tabby/tools/repository/updater.py
deleted file mode 100644
index 6060500..0000000
--- a/tabby/tools/repository/updater.py
+++ /dev/null
@@ -1,43 +0,0 @@
-import os
-import pathlib
-import shutil
-from dataclasses import dataclass, field
-
-import toml
-from git import Repo
-from transformers import HfArgumentParser
-
-
-@dataclass
-class Arguments:
-    data_dir: str = field(metadata={"help": "Base dir for repositories"})
-    config_file: str = field(metadata={"help": "Configuration file for tabby updater"})
-
-
-def parse_args():
-    parser = HfArgumentParser(Arguments)
-    return parser.parse_args()
-
-
-if __name__ == "__main__":
-    args = parse_args()
-
-    config = toml.load(args.config_file)
-    repositories = config["projects"]
-
-    for x in pathlib.Path(args.data_dir).glob("*"):
-        if x.is_dir() and x.name not in repositories:
-            print("Remove unused dir:", x)
-            shutil.rmtree(str(x))
-        elif x.is_file():
-            print("Remove unused file:", x)
-            x.unlink()
-
-    for name, config in repositories.items():
-        path = pathlib.Path(args.data_dir, name)
-        if path.is_dir():
-            repo = Repo(path)
-        else:
-            Repo.clone_from(config["git_url"], path.absolute(), depth=1)
-
-    os.system(f"gitup {args.data_dir}")