feat: integrate projects / dataset information in admin. (#38)
* feat: add projects page in admin * feat: integrate update_dataset job * feat: display dataset info in projectsadd-more-languages
parent
a241c08fc3
commit
1c61ef3944
|
|
@ -7,8 +7,6 @@ services:
|
||||||
environment:
|
environment:
|
||||||
MODEL_NAME: ${MODEL_NAME}
|
MODEL_NAME: ${MODEL_NAME}
|
||||||
MODEL_BACKEND: triton
|
MODEL_BACKEND: triton
|
||||||
LOGS_DIR: /data/logs
|
|
||||||
DAGU_DAGS: /app/tabby/tasks
|
|
||||||
ports:
|
ports:
|
||||||
- "5000:5000"
|
- "5000:5000"
|
||||||
- "8080:8080"
|
- "8080:8080"
|
||||||
|
|
|
||||||
|
|
@ -4,15 +4,28 @@ set -e
|
||||||
# Shared environment variables
|
# Shared environment variables
|
||||||
export LOGS_DIR="${LOGS_DIR:-/data/logs}"
|
export LOGS_DIR="${LOGS_DIR:-/data/logs}"
|
||||||
export DB_FILE="${DB_FILE:-/data/logs/duckdb/duck.db}"
|
export DB_FILE="${DB_FILE:-/data/logs/duckdb/duck.db}"
|
||||||
|
export CONFIG_FILE=${CONFIG_FILE:-/data/config/tabby.toml}
|
||||||
|
|
||||||
# server
|
# server
|
||||||
export MODEL_NAME="${MODEL_NAME:-TabbyML/J-350M}"
|
export MODEL_NAME="${MODEL_NAME:-TabbyML/J-350M}"
|
||||||
export MODEL_BACKEND="${MODEL_BACKEND:-python}"
|
export MODEL_BACKEND="${MODEL_BACKEND:-python}"
|
||||||
|
|
||||||
|
# projects
|
||||||
|
export GIT_REPOSITORIES_DIR="${REPOSITORIES_DIR:-/data/repositories}"
|
||||||
|
export DATASET_DIR="${REPOSITORIES_DIR:-/data/dataset}"
|
||||||
|
|
||||||
# dagu
|
# dagu
|
||||||
export DAGU_DAGS="tabby/tasks"
|
export DAGU_DAGS="tabby/tasks"
|
||||||
|
|
||||||
init() {
|
init() {
|
||||||
|
if [ ! -f $CONFIG_FILE ]; then
|
||||||
|
mkdir -p $(dirname $CONFIG_FILE)
|
||||||
|
touch $CONFIG_FILE
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Disable safe directory check
|
||||||
|
git config --global --add safe.directory '*'
|
||||||
|
|
||||||
python -m tabby.tools.download_models --repo_id=$MODEL_NAME
|
python -m tabby.tools.download_models --repo_id=$MODEL_NAME
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,5 +0,0 @@
|
||||||
[repositories.quickjs]
|
|
||||||
url = "https://gitee.com/vsf-linux/quickjs.git"
|
|
||||||
|
|
||||||
[repositories.transformers]
|
|
||||||
url = "https://gitee.com/angzhao/transformers.git"
|
|
||||||
|
|
@ -0,0 +1,51 @@
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import streamlit as st
|
||||||
|
import toml
|
||||||
|
from datasets import load_from_disk
|
||||||
|
from git import Repo
|
||||||
|
|
||||||
|
st.set_page_config(page_title="Tabby Admin - Projects")
|
||||||
|
|
||||||
|
dataset_dir = os.environ.get("DATASET_DIR", None)
|
||||||
|
git_repositories_dir = os.environ.get("GIT_REPOSITORIES_DIR", None)
|
||||||
|
config_file = os.environ.get("CONFIG_FILE", None)
|
||||||
|
config = toml.load(config_file)
|
||||||
|
|
||||||
|
|
||||||
|
def dataset_info():
|
||||||
|
st.subheader("Dataset")
|
||||||
|
if not Path(dataset_dir).is_dir():
|
||||||
|
st.write("*Not populated*")
|
||||||
|
return
|
||||||
|
|
||||||
|
info = load_from_disk(dataset_dir)
|
||||||
|
st.write("Source files: ", len(info))
|
||||||
|
|
||||||
|
|
||||||
|
def project_list():
|
||||||
|
data = config.get("projects", {})
|
||||||
|
|
||||||
|
if len(data) <= 0:
|
||||||
|
st.write("Your project list is empty")
|
||||||
|
st.write(f"Edit `{config_file}` to add projects")
|
||||||
|
return
|
||||||
|
|
||||||
|
for k, v in data.items():
|
||||||
|
st.subheader(k)
|
||||||
|
st.write(f'Git: {v["git_url"]}')
|
||||||
|
|
||||||
|
git_repository = Path(git_repositories_dir, k)
|
||||||
|
if not git_repository.is_dir():
|
||||||
|
st.write(f"Status: *Before Initialization*")
|
||||||
|
continue
|
||||||
|
|
||||||
|
repo = Repo(git_repository)
|
||||||
|
sha = repo.active_branch.commit.hexsha
|
||||||
|
st.write(f"Status: `{sha}`")
|
||||||
|
|
||||||
|
|
||||||
|
dataset_info()
|
||||||
|
st.write("---")
|
||||||
|
project_list()
|
||||||
|
|
@ -10,7 +10,7 @@ def random_completion_id():
|
||||||
|
|
||||||
def trim_with_stop_words(output: str, stopwords: list) -> str:
|
def trim_with_stop_words(output: str, stopwords: list) -> str:
|
||||||
for w in sorted(stopwords, key=len, reverse=True):
|
for w in sorted(stopwords, key=len, reverse=True):
|
||||||
if output.endswith(w):
|
index = output.find(w)
|
||||||
output = output[: -len(w)]
|
if index != -1:
|
||||||
break
|
output = output[:index]
|
||||||
return output
|
return output
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ from . import models
|
||||||
|
|
||||||
def setup_logging(logdir):
|
def setup_logging(logdir):
|
||||||
try:
|
try:
|
||||||
shutil.rmtree(logdir + "/*")
|
shutil.rmtree(logdir)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,15 +1,18 @@
|
||||||
# schedule: "5 4 * * *" # Run at 04:05.
|
schedule: "5 4 * * *" # Run daily at 04:05.
|
||||||
|
|
||||||
env:
|
env:
|
||||||
- PATH: "/opt/conda/bin:$PATH"
|
- PATH: "$PATH"
|
||||||
- APP_DIR: /app
|
- APP_DIR: /app
|
||||||
|
- CONFIG_FILE: "$CONFIG_FILE"
|
||||||
|
- GIT_REPOSITORIES_DIR: "$GIT_REPOSITORIES_DIR"
|
||||||
|
- DATASET_DIR: "$DATASET_DIR"
|
||||||
steps:
|
steps:
|
||||||
- name: Update repositories
|
- name: Update repositories
|
||||||
dir: $APP_DIR
|
dir: $APP_DIR
|
||||||
command: python -m tabby.tools.repository.updater --data_dir=/repositories --config_file=/config/repository.toml
|
command: python -m tabby.tools.repository.updater --data_dir=$GIT_REPOSITORIES_DIR --config_file=$CONFIG_FILE
|
||||||
|
|
||||||
- name: Generate dataset
|
- name: Generate dataset
|
||||||
dir: $APP_DIR
|
dir: $APP_DIR
|
||||||
command: python -m tabby.tools.preprocess.build_dataset --project_dir /repositories --output_dir /dataset
|
command: python -m tabby.tools.preprocess.build_dataset --project_dir=$GIT_REPOSITORIES_DIR --output_dir=$DATASET_DIR
|
||||||
depends:
|
depends:
|
||||||
- Update repositories
|
- Update repositories
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
|
import shutil
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
import toml
|
import toml
|
||||||
|
|
@ -22,13 +23,21 @@ if __name__ == "__main__":
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
|
|
||||||
config = toml.load(args.config_file)
|
config = toml.load(args.config_file)
|
||||||
repositories = config["repositories"]
|
repositories = config["projects"]
|
||||||
|
|
||||||
|
for x in pathlib.Path(args.data_dir).glob("*"):
|
||||||
|
if x.is_dir() and x.name not in repositories:
|
||||||
|
print("Remove unused dir:", x)
|
||||||
|
shutil.rmtree(str(x))
|
||||||
|
elif x.is_file():
|
||||||
|
print("Remove unused file:", x)
|
||||||
|
x.unlink()
|
||||||
|
|
||||||
for name, config in repositories.items():
|
for name, config in repositories.items():
|
||||||
path = pathlib.Path(args.data_dir, name)
|
path = pathlib.Path(args.data_dir, name)
|
||||||
if path.is_dir():
|
if path.is_dir():
|
||||||
repo = Repo(path)
|
repo = Repo(path)
|
||||||
else:
|
else:
|
||||||
Repo.clone_from(config["url"], path.absolute(), depth=1)
|
Repo.clone_from(config["git_url"], path.absolute(), depth=1)
|
||||||
|
|
||||||
os.system(f"gitup {args.data_dir}")
|
os.system(f"gitup {args.data_dir}")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue