refactor: remove unused code
parent
a8a145d9b3
commit
0f72788d82
|
|
@ -1,74 +0,0 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
DB_FILE=${DB_FILE:-"/data/logs/duckdb/duck.db"}
|
||||
LOGS_DIR=${LOGS_DIR:-"/data/logs"}
|
||||
TABBY_SERVER_LOGS="${LOGS_DIR}/events/tabby-server/*.json"
|
||||
|
||||
# Init schema
|
||||
function init_scheme() {
|
||||
mkdir -p $(dirname $DB_FILE)
|
||||
cat <<EOF | duckdb
|
||||
CREATE TABLE IF NOT EXISTS completion_events (
|
||||
id STRING,
|
||||
created uint64,
|
||||
prompt STRING,
|
||||
choices STRUCT(index UINT64, text STRING)[],
|
||||
view BOOLEAN,
|
||||
"select" BOOLEAN
|
||||
);
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS completion_events_id ON completion_events (id);
|
||||
EOF
|
||||
}
|
||||
|
||||
# Update table
|
||||
function collect_tabby_server_logs() {
|
||||
# Executing collect job only when files exists.
|
||||
if compgen -G "${TABBY_SERVER_LOGS}" > /dev/null; then
|
||||
|
||||
cat <<EOF | duckdb
|
||||
CREATE TEMP TABLE events AS
|
||||
SELECT data.* FROM '${TABBY_SERVER_LOGS}';
|
||||
|
||||
CREATE TEMP TABLE t AS
|
||||
SELECT id, created, prompt, choices, IFNULL(rhs.view, false) AS view, IFNULL(rhs.select, false) AS select
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
id,
|
||||
FIRST(created) AS created,
|
||||
FIRST(prompt) AS prompt,
|
||||
FIRST(choices) AS choices
|
||||
FROM events WHERE id IS NOT NULL GROUP BY 1) lhs
|
||||
LEFT JOIN (
|
||||
SELECT
|
||||
completion_id,
|
||||
(SUM(IF(type == 'view', 1, 0)) > 0) AS view,
|
||||
(SUM(IF(type == 'select', 1, 0)) > 0) AS select
|
||||
FROM events
|
||||
WHERE completion_id IS NOT NULL
|
||||
GROUP BY 1
|
||||
) rhs ON (lhs.id = rhs.completion_id);
|
||||
|
||||
INSERT INTO completion_events SELECT t.* FROM t LEFT JOIN completion_events rhs ON (t.id = rhs.id) WHERE rhs.id IS NULL;
|
||||
EOF
|
||||
|
||||
else
|
||||
echo "No files match ${TABBY_SERVER_LOGS}"
|
||||
fi
|
||||
}
|
||||
|
||||
function duckdb() {
|
||||
local SQL=$(tee)
|
||||
cat << EOF | python3 -
|
||||
import sys
|
||||
import duckdb
|
||||
conn = duckdb.connect('$DB_FILE')
|
||||
print(conn.sql("""
|
||||
$SQL
|
||||
"""))
|
||||
EOF
|
||||
}
|
||||
|
||||
init_scheme
|
||||
"$@"
|
||||
|
|
@ -1,126 +0,0 @@
|
|||
import base64
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
|
||||
import pandas as pd
|
||||
from datasets import Dataset
|
||||
from transformers import HfArgumentParser
|
||||
|
||||
from . import metrics
|
||||
from .args import PreprocessProjectArgs
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = HfArgumentParser(PreprocessProjectArgs)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def read_languages_to_file_extensions():
|
||||
path = os.path.abspath(os.path.dirname(__file__))
|
||||
path = os.path.join(path, "programming-languages-to-file-extensions.json")
|
||||
with open(path) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def read_valid_extensions():
|
||||
content = read_languages_to_file_extensions()
|
||||
extensions = []
|
||||
for k, exts in content.items():
|
||||
extensions += exts
|
||||
return set(extensions)
|
||||
|
||||
|
||||
def read_extension_to_language_mappings():
|
||||
content = read_languages_to_file_extensions()
|
||||
mappings = dict()
|
||||
for k, exts in content.items():
|
||||
for x in exts:
|
||||
mappings[x] = k
|
||||
return mappings
|
||||
|
||||
|
||||
def dataset_iter(project_dir, files):
|
||||
def gen():
|
||||
mappings = read_extension_to_language_mappings()
|
||||
for x in files:
|
||||
_, extname = os.path.splitext(x)
|
||||
|
||||
with open(x) as f:
|
||||
try:
|
||||
content = f.read()
|
||||
except UnicodeDecodeError:
|
||||
print("Cannot decode unicode", x)
|
||||
continue
|
||||
|
||||
segments = x.removeprefix(project_dir).split(os.sep)
|
||||
project = segments[1]
|
||||
file = os.path.join(*segments[2:])
|
||||
yield dict(
|
||||
id=to_id(project, file),
|
||||
project=project,
|
||||
file=file,
|
||||
language=mappings[extname],
|
||||
content=content,
|
||||
**metrics.compute(content),
|
||||
)
|
||||
|
||||
return gen
|
||||
|
||||
|
||||
def count_by_language(dataset):
|
||||
key = "language"
|
||||
df = (
|
||||
pd.DataFrame(dataset[key], columns=[key])
|
||||
.groupby([key])
|
||||
.size()
|
||||
.to_frame("count")
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
def to_id(*args):
|
||||
token = ":".join(args)
|
||||
return base64.urlsafe_b64encode(token.encode("utf-8")).decode("utf-8").rstrip("=")
|
||||
|
||||
|
||||
def basic_filters(line_max=100, line_mean=100, alpha_frac=0.25):
|
||||
def fn(example):
|
||||
"""Filter files based on line length and % alphanumeric characters"""
|
||||
if example["max_line_length"] > line_max:
|
||||
return False
|
||||
elif example["avg_line_length"] > line_mean:
|
||||
return False
|
||||
elif example["alphanum_fraction"] < alpha_frac:
|
||||
return False
|
||||
return True
|
||||
|
||||
return fn
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
valid_extensions = read_valid_extensions()
|
||||
|
||||
def is_valid_file(x):
|
||||
if not os.path.isfile(x):
|
||||
return False
|
||||
|
||||
_, extname = os.path.splitext(x)
|
||||
if not extname in valid_extensions:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
args = parse_args()
|
||||
files = list(
|
||||
filter(is_valid_file, glob.glob(args.project_dir + "/**/*", recursive=True))
|
||||
)
|
||||
|
||||
ds = Dataset.from_generator(dataset_iter(os.path.abspath(args.project_dir), files))
|
||||
ds = ds.filter(basic_filters())
|
||||
ds.save_to_disk(args.output_dir)
|
||||
ds.to_json(os.path.join(args.output_dir, "dumps.json"))
|
||||
|
||||
print("\n## Summary")
|
||||
print("Number of source files", len(ds))
|
||||
print("Number of source files by languages", count_by_language(ds).to_json())
|
||||
|
|
@ -1,14 +0,0 @@
|
|||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class PreprocessProjectArgs:
|
||||
# add arguments in the following format
|
||||
project_dir: Optional[str] = field(
|
||||
metadata={"help": "Project directory."},
|
||||
)
|
||||
|
||||
output_dir: Optional[str] = field(
|
||||
metadata={"help": "Output save path directory."},
|
||||
)
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
def max_line_length(content):
|
||||
return max([0] + [len(x) for x in content.splitlines()])
|
||||
|
||||
|
||||
def avg_line_length(content):
|
||||
lines = [len(x) for x in content.splitlines()]
|
||||
total = sum(lines)
|
||||
if len(lines) != 0:
|
||||
return total / len(lines)
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
def alphanum_fraction(content):
|
||||
alphanum = [x for x in content if x.isalpha() or x.isnumeric()]
|
||||
if len(content) != 0:
|
||||
return len(alphanum) / len(content)
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
def compute(content):
|
||||
return dict(
|
||||
max_line_length=max_line_length(content),
|
||||
avg_line_length=avg_line_length(content),
|
||||
alphanum_fraction=alphanum_fraction(content),
|
||||
)
|
||||
|
|
@ -1,28 +0,0 @@
|
|||
{
|
||||
"c": [".c", ".h"],
|
||||
"csharp": [".cs"],
|
||||
"cpp": [".cpp", ".hpp", ".c++", ".h++", ".cc", ".hh", ".C", ".H"],
|
||||
"css": [".css"],
|
||||
"dockerfile": ["Dockerfile"],
|
||||
"go": [".go"],
|
||||
"haskell": [".hs"],
|
||||
"html": [".html"],
|
||||
"java": [".java"],
|
||||
"javascript": [".js"],
|
||||
"julia": [".jl"],
|
||||
"lua": [".lua"],
|
||||
"makefile": ["Makefile"],
|
||||
"markdown": [".md", ".markdown"],
|
||||
"php": [".php", ".php3", ".php4", ".php5", ".phps", ".phpt"],
|
||||
"perl": [".pl", ".pm", ".pod", ".perl"],
|
||||
"powershell": [".ps1", ".psd1", ".psm1"],
|
||||
"python": [".py"],
|
||||
"ruby": [".rb"],
|
||||
"rust": [".rs"],
|
||||
"sql": [".sql"],
|
||||
"scala": [".scala"],
|
||||
"shellscript": [".sh", ".bash", ".command", ".zsh"],
|
||||
"typescript": [".ts", ".tsx"],
|
||||
"tex": [".tex"],
|
||||
"vb": [".vb"]
|
||||
}
|
||||
|
|
@ -1,181 +0,0 @@
|
|||
import configparser
|
||||
from argparse import ArgumentParser
|
||||
from os import makedirs
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import PretrainedConfig
|
||||
|
||||
torch.set_printoptions(linewidth=130, sci_mode=False)
|
||||
np.set_printoptions(linewidth=130, suppress=True)
|
||||
|
||||
# This converter is used to convert the huggingface gpt-j-6B model
|
||||
# in https://huggingface.co/EleutherAI/gpt-j-6B/blob/main/pytorch_model.bin.
|
||||
|
||||
|
||||
def savebin(param, save_path):
|
||||
if isinstance(param, torch.Tensor):
|
||||
param = param.cpu().float().numpy()
|
||||
np.squeeze(param).astype(np.float16).tofile(save_path + ".bin")
|
||||
|
||||
|
||||
def param2file(pt_param, layer_id, save_dir, dest_key):
|
||||
base_n = save_dir + "/model.layers." + str(layer_id) + "."
|
||||
save_path = base_n + dest_key
|
||||
savebin(pt_param, save_path)
|
||||
|
||||
|
||||
def param2distributed(
|
||||
pt_param,
|
||||
layer_id,
|
||||
save_dir,
|
||||
dest_key,
|
||||
n_inference_gpus,
|
||||
split_axis,
|
||||
):
|
||||
np_param = pt_param.cpu().float().numpy()
|
||||
base_n = save_dir + "/model.layers." + str(layer_id) + "."
|
||||
save_path = base_n + dest_key
|
||||
split_param = np.split(np_param, n_inference_gpus, axis=split_axis)
|
||||
for i, p in enumerate(split_param):
|
||||
savebin(p, save_path + f".{i}")
|
||||
|
||||
|
||||
def save(w, save_dir, n_inference_gpus, n_layers, layer_id):
|
||||
makedirs(save_dir, exist_ok=True)
|
||||
|
||||
savebin(w["transformer.wte.weight"], save_dir + "/model.wte")
|
||||
l = layer_id
|
||||
print(f"Saving layer {l + 1} / {n_layers}")
|
||||
base_k = "transformer.h." + str(l) + "."
|
||||
param2file(w[base_k + "ln_1.bias"], l, save_dir, "input_layernorm.bias")
|
||||
param2file(w[base_k + "ln_1.weight"], l, save_dir, "input_layernorm.weight")
|
||||
param2distributed(
|
||||
w[base_k + "mlp.fc_in.weight"].T,
|
||||
l,
|
||||
save_dir,
|
||||
"mlp.dense_h_to_4h.weight",
|
||||
n_inference_gpus,
|
||||
split_axis=-1, # split fast indx
|
||||
)
|
||||
param2distributed(
|
||||
w[base_k + "mlp.fc_in.bias"],
|
||||
l,
|
||||
save_dir,
|
||||
"mlp.dense_h_to_4h.bias",
|
||||
n_inference_gpus,
|
||||
split_axis=-1, # split fast indx
|
||||
)
|
||||
|
||||
param2distributed(
|
||||
w[base_k + "mlp.fc_out.weight"].T,
|
||||
l,
|
||||
save_dir,
|
||||
"mlp.dense_4h_to_h.weight",
|
||||
n_inference_gpus,
|
||||
split_axis=0, # split slow indx
|
||||
)
|
||||
param2file(w[base_k + "mlp.fc_out.bias"], l, save_dir, "mlp.dense_4h_to_h.bias")
|
||||
param2distributed(
|
||||
w[base_k + "attn.out_proj.weight"].T,
|
||||
l,
|
||||
save_dir,
|
||||
"attention.dense.weight",
|
||||
n_inference_gpus,
|
||||
split_axis=0, # split slow indx
|
||||
)
|
||||
QKV_w = torch.stack(
|
||||
[
|
||||
w[base_k + "attn.q_proj.weight"],
|
||||
w[base_k + "attn.k_proj.weight"],
|
||||
w[base_k + "attn.v_proj.weight"],
|
||||
]
|
||||
) # [qkv, n_heads * dim_head, latent_space]
|
||||
QKV_w = QKV_w.permute(2, 0, 1)
|
||||
param2distributed(
|
||||
QKV_w,
|
||||
l,
|
||||
save_dir,
|
||||
"attention.query_key_value.weight",
|
||||
n_inference_gpus,
|
||||
split_axis=-1, # split fast indx
|
||||
)
|
||||
# Other unneeded per-layer params:
|
||||
# attn.attention.masked_bias = torch.tensor(-1e9)
|
||||
# attn.attention.bias = torch.tril(torch.ones(1, 1, 2048, 2048))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = ArgumentParser(
|
||||
description="Convert GPT-J slim checkpoint to FasterTransformer",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
help="Folder where binary files are stored",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ckpt-dir",
|
||||
help="File of GPT-J huggingface checkpoint",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--n-inference-gpus",
|
||||
help="Number of GPUs used for inference runtime",
|
||||
default=1,
|
||||
type=int,
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
ckpt_file = args.ckpt_dir + "/pytorch_model.bin"
|
||||
checkpoint = torch.load(ckpt_file)
|
||||
print(f"loading from {ckpt_file}")
|
||||
|
||||
out_path = args.output_dir
|
||||
output_dir = out_path + f"/{args.n_inference_gpus}-gpu/"
|
||||
print(f"saving to {output_dir}")
|
||||
|
||||
config_file = args.ckpt_dir + "/config.json"
|
||||
hf_config = PretrainedConfig.from_json_file(config_file).to_dict()
|
||||
|
||||
# NOTE: save parameters to config files (loaded by triton backends)
|
||||
config = configparser.ConfigParser()
|
||||
config["gptj"] = {}
|
||||
try:
|
||||
config["gptj"]["model_name"] = (
|
||||
"gptj" if hf_config["_name_or_path"] == "" else hf_config["_name_or_path"]
|
||||
)
|
||||
config["gptj"]["head_num"] = str(hf_config["n_head"])
|
||||
n_embd = hf_config["n_embd"]
|
||||
config["gptj"]["size_per_head"] = str(n_embd // hf_config["n_head"])
|
||||
config["gptj"]["inter_size"] = str(n_embd * 4)
|
||||
config["gptj"]["num_layer"] = str(hf_config["n_layer"])
|
||||
rotary_dim = (
|
||||
n_embd // hf_config["n_head"]
|
||||
if hf_config["rotary_dim"] is None
|
||||
else hf_config["rotary_dim"]
|
||||
)
|
||||
config["gptj"]["rotary_embedding"] = str(hf_config["rotary_dim"])
|
||||
config["gptj"]["vocab_size"] = str(hf_config["vocab_size"])
|
||||
config["gptj"]["start_id"] = str(hf_config["bos_token_id"])
|
||||
config["gptj"]["end_id"] = str(hf_config["eos_token_id"])
|
||||
config["gptj"]["weight_data_type"] = "fp16"
|
||||
Path(output_dir).mkdir(exist_ok=True, parents=True)
|
||||
with open(output_dir + "/config.ini", "w") as configfile:
|
||||
config.write(configfile)
|
||||
except:
|
||||
print(f"Fail to save the config in config.ini.")
|
||||
|
||||
n_layers = hf_config["n_layer"]
|
||||
for i in range(n_layers):
|
||||
save(checkpoint, output_dir, args.n_inference_gpus, n_layers, i)
|
||||
savebin(
|
||||
checkpoint["transformer.ln_f.weight"],
|
||||
output_dir + "/model.final_layernorm.weight",
|
||||
)
|
||||
savebin(
|
||||
checkpoint["transformer.ln_f.bias"], output_dir + "/model.final_layernorm.bias"
|
||||
)
|
||||
savebin(checkpoint["lm_head.weight"], output_dir + "/model.lm_head.weight")
|
||||
savebin(checkpoint["lm_head.bias"], output_dir + "/model.lm_head.bias")
|
||||
|
||||
print("done")
|
||||
|
|
@ -1,256 +0,0 @@
|
|||
import argparse
|
||||
import configparser
|
||||
import multiprocessing
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from transformers import GPTNeoXForCausalLM
|
||||
|
||||
|
||||
def get_weight_data_type(data_type):
|
||||
if data_type == "fp32":
|
||||
return np.float32
|
||||
elif data_type == "fp16":
|
||||
return np.float16
|
||||
else:
|
||||
assert False, f"Invalid weight data type {data_type}"
|
||||
|
||||
|
||||
def split_and_convert_process(saved_dir, factor, key, args, config, val):
|
||||
|
||||
if (
|
||||
key.find("input_layernorm.weight") != -1
|
||||
or key.find("input_layernorm.bias") != -1
|
||||
or key.find("post_attention_layernorm.weight") != -1
|
||||
or key.find("post_attention_layernorm.bias") != -1
|
||||
or key.find("final_layernorm.weight") != -1
|
||||
or key.find("final_layernorm.bias") != -1
|
||||
):
|
||||
saved_path = saved_dir + f"/model.{key}.bin"
|
||||
val.tofile(saved_path)
|
||||
|
||||
elif (
|
||||
key.find("attention.dense.bias") != -1
|
||||
or key.find("mlp.dense_4h_to_h.bias") != -1
|
||||
):
|
||||
saved_path = saved_dir + f"/model.{key}.bin"
|
||||
val = (val / factor) if factor > 1 else val
|
||||
val.tofile(saved_path)
|
||||
|
||||
else:
|
||||
if (
|
||||
key.find("attention.dense.weight") != -1
|
||||
or key.find("mlp.dense_4h_to_h.weight") != -1
|
||||
):
|
||||
split_vals = np.split(val, factor, axis=0)
|
||||
|
||||
elif (
|
||||
key.find("mlp.dense_h_to_4h.weight") != -1
|
||||
or key.find("mlp.dense_h_to_4h.bias") != -1
|
||||
):
|
||||
split_vals = np.split(val, factor, axis=-1)
|
||||
|
||||
elif key.find("attention.query_key_value.bias") != -1:
|
||||
local_dim = (int)(val.shape[-1] / 3)
|
||||
n_head = config["num_attention_heads"]
|
||||
|
||||
val = val.reshape(n_head, 3, local_dim // n_head)
|
||||
val = np.transpose(val, [1, 0, 2]).reshape(3, local_dim)
|
||||
split_vals = np.split(val, factor, axis=-1)
|
||||
|
||||
elif key.find("attention.query_key_value.weight") != -1:
|
||||
hidden_dim = val.shape[0]
|
||||
local_dim = (int)(val.shape[-1] / 3)
|
||||
n_head = config["num_attention_heads"]
|
||||
# Note that the HF qkv weight are stored as [hidden_size, num_heads, 3, head_hidden]
|
||||
# FT needs the shape of [hidden_size, 3, num_heads, head_hidden]
|
||||
val = val.reshape(hidden_dim, n_head, 3, local_dim // n_head)
|
||||
val = np.transpose(val, [0, 2, 1, 3]).reshape(hidden_dim, 3, local_dim)
|
||||
|
||||
# print(np.mean(np.abs(val[:, 0, :])))
|
||||
split_vals = np.split(val, factor, axis=-1)
|
||||
|
||||
else:
|
||||
print("[ERROR] cannot find key '{}'".format(key))
|
||||
return
|
||||
|
||||
for j in range(factor):
|
||||
saved_path = saved_dir + f"/model.{key}.{j}.bin"
|
||||
split_vals[j].tofile(saved_path)
|
||||
|
||||
|
||||
def split_and_convert(args):
|
||||
saved_dir = args.saved_dir + "/%d-gpu/" % args.infer_gpu_num
|
||||
|
||||
if os.path.exists(saved_dir) == False:
|
||||
os.makedirs(saved_dir)
|
||||
|
||||
factor = args.infer_gpu_num
|
||||
|
||||
# load position_embedding from rank 0
|
||||
# model = torch.load(ckpt_name)
|
||||
model = GPTNeoXForCausalLM.from_pretrained(args.in_file)
|
||||
hf_config = vars(model.config)
|
||||
|
||||
np_weight_data_type = get_weight_data_type(args.weight_data_type)
|
||||
|
||||
try:
|
||||
model_name = args.model_name
|
||||
n_heads = hf_config["num_attention_heads"]
|
||||
head_size = hf_config["hidden_size"] // n_heads
|
||||
rotary_dim = int(head_size * hf_config["rotary_pct"])
|
||||
use_gptj_residual = int(hf_config["use_parallel_residual"])
|
||||
|
||||
config = configparser.ConfigParser()
|
||||
config["gptneox"] = {}
|
||||
config["gptneox"]["model_name"] = model_name
|
||||
config["gptneox"]["head_num"] = str(n_heads)
|
||||
config["gptneox"]["size_per_head"] = str(head_size)
|
||||
config["gptneox"]["inter_size"] = str(hf_config["intermediate_size"])
|
||||
config["gptneox"]["num_layer"] = str(hf_config["num_hidden_layers"])
|
||||
config["gptneox"]["rotary_embedding"] = str(rotary_dim)
|
||||
config["gptneox"]["vocab_size"] = str(hf_config["vocab_size"])
|
||||
config["gptneox"]["start_id"] = str(hf_config["bos_token_id"])
|
||||
config["gptneox"]["end_id"] = str(hf_config["eos_token_id"])
|
||||
config["gptneox"]["use_gptj_residual"] = str(use_gptj_residual)
|
||||
config["gptneox"]["weight_data_type"] = args.weight_data_type
|
||||
|
||||
with open((Path(saved_dir) / f"config.ini").as_posix(), "w") as configfile:
|
||||
config.write(configfile)
|
||||
except Exception as e:
|
||||
print(f"Fail to save the config in config.ini.", e)
|
||||
|
||||
ft_model_name_pattern = [
|
||||
"input_layernorm.bias",
|
||||
"input_layernorm.weight",
|
||||
"attention.query_key_value.bias",
|
||||
"attention.query_key_value.weight",
|
||||
"attention.dense.bias",
|
||||
"attention.dense.weight",
|
||||
"post_attention_layernorm.bias",
|
||||
"post_attention_layernorm.weight",
|
||||
"mlp.dense_h_to_4h.bias",
|
||||
"mlp.dense_h_to_4h.weight",
|
||||
"mlp.dense_4h_to_h.bias",
|
||||
"mlp.dense_4h_to_h.weight",
|
||||
]
|
||||
|
||||
huggingface_model_file_list = [
|
||||
hf_file_name
|
||||
for hf_file_name in os.listdir(args.in_file)
|
||||
if hf_file_name.endswith(".bin")
|
||||
]
|
||||
if len(huggingface_model_file_list) > 1:
|
||||
multiprocessing_context = multiprocessing.get_context()
|
||||
pool_fn = multiprocessing_context.Pool
|
||||
else:
|
||||
torch.multiprocessing.set_start_method("spawn")
|
||||
pool_fn = multiprocessing.Pool
|
||||
|
||||
pool = pool_fn(args.processes)
|
||||
|
||||
for name, param in model.named_parameters():
|
||||
array = param.detach().cpu().numpy().astype(np_weight_data_type)
|
||||
# print("input shape", name, array.shape)
|
||||
if name.find("weight") == -1 and name.find("bias") == -1:
|
||||
print("skipped", name)
|
||||
continue
|
||||
elif name == "gpt_neox.embed_in.weight":
|
||||
array.tofile(saved_dir + "model.wte.bin")
|
||||
elif name == "gpt_neox.final_layer_norm.bias":
|
||||
array.tofile(saved_dir + "model.final_layernorm.bias.bin")
|
||||
elif name == "gpt_neox.final_layer_norm.weight":
|
||||
array.tofile(saved_dir + "model.final_layernorm.weight.bin")
|
||||
elif name == "embed_out.weight":
|
||||
array.tofile(saved_dir + "model.lm_head.weight.bin")
|
||||
else:
|
||||
processed = False
|
||||
for i in range(len(ft_model_name_pattern)):
|
||||
if name.find(ft_model_name_pattern[i]) != -1:
|
||||
new_name = name.replace("gpt_neox.", "")
|
||||
pool.starmap(
|
||||
split_and_convert_process,
|
||||
[
|
||||
(
|
||||
saved_dir,
|
||||
factor,
|
||||
new_name,
|
||||
args,
|
||||
vars(model.config),
|
||||
array.T,
|
||||
)
|
||||
],
|
||||
)
|
||||
processed = True
|
||||
break
|
||||
|
||||
if not processed:
|
||||
print("Unused layer", name)
|
||||
|
||||
pool.close()
|
||||
pool.join()
|
||||
|
||||
# Post-process biases if use_gptj_residual is True
|
||||
if use_gptj_residual:
|
||||
for layer_idx in range(hf_config["num_hidden_layers"]):
|
||||
attn_bias = np.fromfile(
|
||||
saved_dir + f"/model.layers.{layer_idx}.attention.dense.bias.bin",
|
||||
dtype=np_weight_data_type,
|
||||
)
|
||||
mlp_bias = np.fromfile(
|
||||
saved_dir + f"/model.layers.{layer_idx}.mlp.dense_4h_to_h.bias.bin",
|
||||
dtype=np_weight_data_type,
|
||||
)
|
||||
|
||||
(attn_bias + mlp_bias).astype(np_weight_data_type).tofile(
|
||||
saved_dir + f"/model.layers.{layer_idx}.mlp.attention.bias.sum.bin"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
|
||||
parser.add_argument(
|
||||
"-saved_dir", "-o", type=str, help="file name of output file", required=True
|
||||
)
|
||||
parser.add_argument(
|
||||
"-in_file",
|
||||
"-i",
|
||||
type=str,
|
||||
help="file name of input checkpoint file",
|
||||
required=True,
|
||||
)
|
||||
parser.add_argument(
|
||||
"-infer_gpu_num",
|
||||
"-i_g",
|
||||
type=int,
|
||||
help="How many gpus for inference",
|
||||
required=True,
|
||||
)
|
||||
parser.add_argument(
|
||||
"-processes",
|
||||
"-p",
|
||||
type=int,
|
||||
help="How many processes to spawn for conversion (default: 4)",
|
||||
default=4,
|
||||
)
|
||||
parser.add_argument(
|
||||
"-weight_data_type", type=str, default="fp32", choices=["fp32", "fp16"]
|
||||
)
|
||||
parser.add_argument(
|
||||
"-model_name", "-m_n", type=str, help="model name", required=True
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
print("\n=============== Argument ===============")
|
||||
for key in vars(args):
|
||||
print("{}: {}".format(key, vars(args)[key]))
|
||||
print("========================================")
|
||||
|
||||
target_dir_path = os.path.join(args.saved_dir, "%d-gpu" % args.infer_gpu_num)
|
||||
assert not os.path.exists(target_dir_path), (
|
||||
"target path has exist, please remove %s first." % target_dir_path
|
||||
)
|
||||
|
||||
split_and_convert(args)
|
||||
|
|
@ -1,47 +0,0 @@
|
|||
from dataclasses import dataclass, field
|
||||
|
||||
from huggingface_hub import snapshot_download
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, HfArgumentParser
|
||||
|
||||
|
||||
@dataclass
|
||||
class Arguments:
|
||||
repo_id: str = field(
|
||||
metadata={"help": "Huggingface model repository id, e.g TabbyML/NeoX-160M"}
|
||||
)
|
||||
prefer_local_files: bool = field(
|
||||
metadata={
|
||||
"help": "Whether prefer loading local files (skip remote version check if local files are valid)."
|
||||
},
|
||||
default=True,
|
||||
)
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = HfArgumentParser(Arguments)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def preload(local_files_only=False):
|
||||
AutoTokenizer.from_pretrained(args.repo_id, local_files_only=local_files_only)
|
||||
AutoModelForCausalLM.from_pretrained(
|
||||
args.repo_id, local_files_only=local_files_only
|
||||
)
|
||||
snapshot_download(
|
||||
repo_id=args.repo_id,
|
||||
allow_patterns="triton/**/*",
|
||||
local_files_only=local_files_only,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
print(f"Loading {args.repo_id}, this will take a while...")
|
||||
try:
|
||||
preload(local_files_only=args.prefer_local_files)
|
||||
except Exception as e:
|
||||
if "offline" in str(e) or "local_files_only" in str(e):
|
||||
preload(local_files_only=False)
|
||||
else:
|
||||
raise e
|
||||
print(f"Loaded {args.repo_id} !")
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
all:
|
||||
|
||||
CMD := poetry run python updater.py --data_dir=./testdata/repositories --config_file=testdata/config.toml
|
||||
|
||||
test:
|
||||
# Cleanup
|
||||
rm -rf testdata/repositories/quickjs
|
||||
# Initialize
|
||||
$(CMD)
|
||||
# Updating
|
||||
$(CMD)
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
data_dir = "/repositories"
|
||||
|
||||
[repositories.quickjs]
|
||||
url = "https://gitee.com/vsf-linux/quickjs.git"
|
||||
|
|
@ -1 +0,0 @@
|
|||
quickjs
|
||||
|
|
@ -1,43 +0,0 @@
|
|||
import os
|
||||
import pathlib
|
||||
import shutil
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
import toml
|
||||
from git import Repo
|
||||
from transformers import HfArgumentParser
|
||||
|
||||
|
||||
@dataclass
|
||||
class Arguments:
|
||||
data_dir: str = field(metadata={"help": "Base dir for repositories"})
|
||||
config_file: str = field(metadata={"help": "Configuration file for tabby updater"})
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = HfArgumentParser(Arguments)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
|
||||
config = toml.load(args.config_file)
|
||||
repositories = config["projects"]
|
||||
|
||||
for x in pathlib.Path(args.data_dir).glob("*"):
|
||||
if x.is_dir() and x.name not in repositories:
|
||||
print("Remove unused dir:", x)
|
||||
shutil.rmtree(str(x))
|
||||
elif x.is_file():
|
||||
print("Remove unused file:", x)
|
||||
x.unlink()
|
||||
|
||||
for name, config in repositories.items():
|
||||
path = pathlib.Path(args.data_dir, name)
|
||||
if path.is_dir():
|
||||
repo = Repo(path)
|
||||
else:
|
||||
Repo.clone_from(config["git_url"], path.absolute(), depth=1)
|
||||
|
||||
os.system(f"gitup {args.data_dir}")
|
||||
Loading…
Reference in New Issue