feat: support single container (#46)

* docs: update readme

* fix: do not exclude peft

* Free disk space before docker building

* fix: fix docker-compose

* fix: dockercompose user to 1000

* fix dockerfile

* fix: cachedir ownership
add-more-languages
Meng Zhang 2023-04-05 20:19:43 +08:00 committed by GitHub
parent 0d89a1221a
commit db77d7f267
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 114 additions and 71 deletions

View File

@ -21,6 +21,20 @@ jobs:
id-token: write id-token: write
steps: steps:
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tool-cache: false
# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: true
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@v3 uses: actions/checkout@v3

View File

@ -1,47 +1,68 @@
# syntax = docker/dockerfile:1.5 # syntax = docker/dockerfile:1.5
FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime FROM tabbyml/fastertransformer_backend
# Install utilities RUN apt update && apt -y install build-essential libssl-dev zlib1g-dev \
libbz2-dev libreadline-dev libsqlite3-dev curl \
libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev
RUN mkdir -p /home/app
RUN chown 1000 /home/app
USER 1000
WORKDIR /home/app
ENV HOME /home/app
# Setup pyenv
RUN git clone --depth=1 https://github.com/pyenv/pyenv.git .pyenv
ENV PATH="$HOME/.pyenv/shims:/home/app/.pyenv/bin:$PATH"
ARG PYTHON_VERSION=3.10.10
RUN pyenv install ${PYTHON_VERSION}
RUN pyenv global ${PYTHON_VERSION}
ARG PYPI_INDEX_URL=https://pypi.org/simple
ARG POETRY_VERSION=1.4.0
RUN --mount=type=cache,target=$HOME/.cache pip install -i $PYPI_INDEX_URL "poetry==$POETRY_VERSION"
# vector
RUN <<EOF RUN <<EOF
apt-get -y update curl --proto '=https' --tlsv1.2 -sSf https://sh.vector.dev | bash -s -- -y
apt-get -y install git curl
EOF EOF
ENV PATH "$HOME/.vector/bin:$PATH"
# Supervisord
RUN --mount=type=cache,target=$HOME/.cache pip install -i $PYPI_INDEX_URL supervisor
RUN mkdir -p ~/.bin
ENV PATH "$HOME/.bin:$PATH"
# Install dagu # Install dagu
RUN <<EOF RUN <<EOF
curl -L https://github.com/yohamta/dagu/releases/download/v1.10.2/dagu_1.10.2_Linux_x86_64.tar.gz > dagu.tar.gz curl -L https://github.com/yohamta/dagu/releases/download/v1.10.2/dagu_1.10.2_Linux_x86_64.tar.gz > dagu.tar.gz
tar zxvf dagu.tar.gz tar zxvf dagu.tar.gz
mv dagu /usr/local/bin mv dagu ~/.bin/
rm dagu.tar.gz LICENSE.md README.md rm dagu.tar.gz LICENSE.md README.md
EOF EOF
ARG PYPI_INDEX_URL=https://pypi.org/simple # Install tabby dependencies
ARG POETRY_VERSION=1.4.0 COPY poetry.lock pyproject.toml ./
RUN poetry export --without-hashes > requirements.txt
RUN --mount=type=cache,target=$HOME/.cache pip install -i $PYPI_INDEX_URL --no-dependencies -r requirements.txt
WORKDIR /app
RUN --mount=type=cache,target=/root/.cache pip install -i $PYPI_INDEX_URL "poetry==$POETRY_VERSION"
COPY poetry.lock pyproject.toml /app/
RUN poetry export --without-hashes -o requirements.txt
RUN --mount=type=cache,target=/root/.cache pip install -i $PYPI_INDEX_URL --extra-index-url https://pypi.org/simple --no-dependencies -r requirements.txt
## FIX bitandsands
ENV LD_LIBRARY_PATH "$LD_LIBRARY_PATH:/opt/conda/lib"
RUN ln -s /opt/conda/lib/libcudart.so.11.7.99 /opt/conda/lib/libcudart.so
# vector
RUN <<EOF
curl --proto '=https' --tlsv1.2 -sSf https://sh.vector.dev | bash -s -- -y
mkdir -p /var/lib/vector
EOF
ENV PATH "$PATH:/root/.vector/bin"
# Supervisord
RUN --mount=type=cache,target=/root/.cache pip install -i $PYPI_INDEX_URL --extra-index-url https://pypi.org/simple supervisor
COPY tabby ./tabby COPY tabby ./tabby
COPY deployment/scripts/tabby.sh /usr/bin COPY deployment/scripts/tabby.sh ./.bin/
COPY deployment/scripts/triton.sh ./.bin/
# Setup file permissions
USER root
RUN mkdir -p /var/lib/vector
RUN chown 1000 /var/lib/vector
RUN mkdir -p $HOME/.cache
RUN chown 1000 $HOME/.cache
USER 1000
CMD ["tabby.sh"] CMD ["tabby.sh"]

View File

@ -2,4 +2,4 @@ MODEL_NAME=TabbyML/J-350M
# Volumes # Volumes
DATA_VOLUME="../data:/data" DATA_VOLUME="../data:/data"
HF_VOLUME="../data/hf_cache:/root/.cache/huggingface" HF_VOLUME="../data/hf_cache:/home/app/.cache/huggingface"

View File

@ -0,0 +1,20 @@
version: '3.3'
services:
tabby:
image: tabbyml/tabby
container_name: tabby
environment:
MODEL_NAME: ${MODEL_NAME}
ports:
- "5000:5000"
- "8080:8080"
- "8501:8501"
volumes:
- ${DATA_VOLUME}
- ${HF_VOLUME}
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:5000"]
interval: 2s
timeout: 2s
start_period: 1200s

View File

@ -4,9 +4,11 @@ services:
tabby: tabby:
image: tabbyml/tabby image: tabbyml/tabby
container_name: tabby container_name: tabby
shm_size: 1gb
environment: environment:
MODEL_NAME: ${MODEL_NAME} MODEL_NAME: ${MODEL_NAME}
MODEL_BACKEND: triton MODEL_BACKEND: triton
MODEL_REPLICA: ${MODEL_REPLICA:-1}
ports: ports:
- "5000:5000" - "5000:5000"
- "8080:8080" - "8080:8080"
@ -19,15 +21,6 @@ services:
interval: 2s interval: 2s
timeout: 2s timeout: 2s
start_period: 1200s start_period: 1200s
triton:
image: tabbyml/fastertransformer_backend
container_name: tabby-triton
command: triton.sh
shm_size: 1gb
volumes:
- ./scripts/triton.sh:/usr/bin/triton.sh:ro
- ${HF_VOLUME}
deploy: deploy:
resources: resources:
reservations: reservations:
@ -35,9 +28,3 @@ services:
- driver: nvidia - driver: nvidia
count: all count: all
capabilities: [gpu] capabilities: [gpu]
environment:
MODEL_NAME: ${MODEL_NAME}
MODEL_REPLICA: ${MODEL_REPLICA:-1}
depends_on:
tabby:
condition: service_healthy

View File

@ -35,9 +35,20 @@ python -m tabby.tools.download_models --repo_id=$MODEL_NAME
supervisor() { supervisor() {
if [[ "$MODEL_BACKEND" == "triton" ]]
then
local TRITON_SERVER=$(cat <<EOF
[program:triton]
command=triton.sh
EOF
)
fi
supervisord -n -c <(cat <<EOF supervisord -n -c <(cat <<EOF
[supervisord] [supervisord]
logfile = /var/log/supervisord.log logfile = ${LOGS_DIR}/supervisord.log
loglevel = debug loglevel = debug
[program:server] [program:server]
@ -54,6 +65,8 @@ command=dagu scheduler
[program:dagu_server] [program:dagu_server]
command=dagu server --host 0.0.0.0 --port 8080 command=dagu server --host 0.0.0.0 --port 8080
$TRITON_SERVER
EOF EOF
) )
} }

View File

@ -23,5 +23,5 @@ sed -i "s/count: [[:digit:]]\+/count: $MODEL_REPLICA/g" $MODEL_DIR/triton/faster
# Start triton server. # Start triton server.
mpirun -n 1 \ mpirun -n 1 \
--allow-run-as-root /opt/tritonserver/bin/tritonserver \ /opt/tritonserver/bin/tritonserver \
--model-repository=$MODEL_DIR/triton --model-repository=$MODEL_DIR/triton

View File

@ -1,14 +1,11 @@
DEFAULT := docker-compose -f ../deployment/docker-compose.yml DEFAULT :=
UP_FLAGS := up --remove-orphans UP_FLAGS := up --remove-orphans
build: build:
$(DEFAULT) -f docker-compose.dev.yml build docker-compose -f ../deployment/docker-compose.yml -f docker-compose.dev.yml build
dev: dev:
$(DEFAULT) -f docker-compose.dev.yml $(UP_FLAGS) docker-compose -f ../deployment/docker-compose.yml -f docker-compose.dev.yml $(UP_FLAGS)
dev-python: dev-python:
$(DEFAULT) -f docker-compose.dev.yml -f docker-compose.python.yml $(UP_FLAGS) docker-compose -f ../deployment/docker-compose.python.yml -f docker-compose.dev.yml $(UP_FLAGS)
clean:
$(DEFAULT) -f docker-compose.dev.yml down

View File

@ -5,11 +5,13 @@ services:
build: build:
context: .. context: ..
args: args:
PYPI_INDEX_URL: https://pypi.tuna.tsinghua.edu.cn/simple PYPI_INDEX_URL: https://mirrors.aliyun.com/pypi/simple
PYTHON_BUILD_MIRROR_URL: https://repo.huaweicloud.com/python
environment: environment:
UVICORN_RELOAD: true UVICORN_RELOAD: true
VECTOR_WATCH_CONFIG: true VECTOR_WATCH_CONFIG: true
STREAMLIT_RUN_ON_SAVE: true STREAMLIT_RUN_ON_SAVE: true
volumes: volumes:
- ../:/app - ../tabby:/home/app/tabby
- ../deployment/scripts/tabby.sh:/usr/bin/tabby.sh:ro - ../deployment/scripts/tabby.sh:/home/app/.bin/tabby.sh:ro
- ../deployment/scripts/triton.sh:/home/app/.bin/triton.sh:ro

View File

@ -1,11 +0,0 @@
version: '3.3'
services:
tabby:
image: tabbyml/tabby
environment:
MODEL_BACKEND: python
triton:
profiles:
- donotstart

View File

@ -3,7 +3,7 @@ from components import monaco
from utils.service_info import ServiceInfo from utils.service_info import ServiceInfo
SERVICES = [ SERVICES = [
ServiceInfo(label="triton", health_url="http://triton:8002/metrics"), ServiceInfo(label="triton", health_url="http://localhost:8002/metrics"),
ServiceInfo(label="vector", health_url="http://localhost:8686/health"), ServiceInfo(label="vector", health_url="http://localhost:8686/health"),
ServiceInfo( ServiceInfo(
label="dagu", health_url="http://localhost:8080", url="http://localhost:8080" label="dagu", health_url="http://localhost:8080", url="http://localhost:8080"

View File

@ -35,7 +35,7 @@ MODEL_BACKEND = os.environ.get("MODEL_BACKEND", "python")
if MODEL_BACKEND == "triton": if MODEL_BACKEND == "triton":
model_backend = TritonService( model_backend = TritonService(
tokenizer_name=MODEL_NAME, tokenizer_name=MODEL_NAME,
host=os.environ.get("TRITON_HOST", "triton"), host=os.environ.get("TRITON_HOST", "localhost"),
port=os.environ.get("TRITON_PORT", "8001"), port=os.environ.get("TRITON_PORT", "8001"),
) )
else: else: