feat: support single container (#46)

* docs: update readme

* fix: do not exclude peft

* Free disk space before docker building

* fix: fix docker-compose

* fix: dockercompose user to 1000

* fix dockerfile

* fix: cachedir ownership
add-more-languages
Meng Zhang 2023-04-05 20:19:43 +08:00 committed by GitHub
parent 0d89a1221a
commit db77d7f267
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 114 additions and 71 deletions

View File

@ -21,6 +21,20 @@ jobs:
id-token: write
steps:
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tool-cache: false
# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: true
- name: Checkout repository
uses: actions/checkout@v3

View File

@ -1,47 +1,68 @@
# syntax = docker/dockerfile:1.5
FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime
FROM tabbyml/fastertransformer_backend
# Install utilities
RUN apt update && apt -y install build-essential libssl-dev zlib1g-dev \
libbz2-dev libreadline-dev libsqlite3-dev curl \
libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev
RUN mkdir -p /home/app
RUN chown 1000 /home/app
USER 1000
WORKDIR /home/app
ENV HOME /home/app
# Setup pyenv
RUN git clone --depth=1 https://github.com/pyenv/pyenv.git .pyenv
ENV PATH="$HOME/.pyenv/shims:/home/app/.pyenv/bin:$PATH"
ARG PYTHON_VERSION=3.10.10
RUN pyenv install ${PYTHON_VERSION}
RUN pyenv global ${PYTHON_VERSION}
ARG PYPI_INDEX_URL=https://pypi.org/simple
ARG POETRY_VERSION=1.4.0
RUN --mount=type=cache,target=$HOME/.cache pip install -i $PYPI_INDEX_URL "poetry==$POETRY_VERSION"
# vector
RUN <<EOF
apt-get -y update
apt-get -y install git curl
curl --proto '=https' --tlsv1.2 -sSf https://sh.vector.dev | bash -s -- -y
EOF
ENV PATH "$HOME/.vector/bin:$PATH"
# Supervisord
RUN --mount=type=cache,target=$HOME/.cache pip install -i $PYPI_INDEX_URL supervisor
RUN mkdir -p ~/.bin
ENV PATH "$HOME/.bin:$PATH"
# Install dagu
RUN <<EOF
curl -L https://github.com/yohamta/dagu/releases/download/v1.10.2/dagu_1.10.2_Linux_x86_64.tar.gz > dagu.tar.gz
tar zxvf dagu.tar.gz
mv dagu /usr/local/bin
mv dagu ~/.bin/
rm dagu.tar.gz LICENSE.md README.md
EOF
ARG PYPI_INDEX_URL=https://pypi.org/simple
ARG POETRY_VERSION=1.4.0
# Install tabby dependencies
COPY poetry.lock pyproject.toml ./
RUN poetry export --without-hashes > requirements.txt
RUN --mount=type=cache,target=$HOME/.cache pip install -i $PYPI_INDEX_URL --no-dependencies -r requirements.txt
WORKDIR /app
RUN --mount=type=cache,target=/root/.cache pip install -i $PYPI_INDEX_URL "poetry==$POETRY_VERSION"
COPY poetry.lock pyproject.toml /app/
RUN poetry export --without-hashes -o requirements.txt
RUN --mount=type=cache,target=/root/.cache pip install -i $PYPI_INDEX_URL --extra-index-url https://pypi.org/simple --no-dependencies -r requirements.txt
## FIX bitandsands
ENV LD_LIBRARY_PATH "$LD_LIBRARY_PATH:/opt/conda/lib"
RUN ln -s /opt/conda/lib/libcudart.so.11.7.99 /opt/conda/lib/libcudart.so
# vector
RUN <<EOF
curl --proto '=https' --tlsv1.2 -sSf https://sh.vector.dev | bash -s -- -y
mkdir -p /var/lib/vector
EOF
ENV PATH "$PATH:/root/.vector/bin"
# Supervisord
RUN --mount=type=cache,target=/root/.cache pip install -i $PYPI_INDEX_URL --extra-index-url https://pypi.org/simple supervisor
COPY tabby ./tabby
COPY deployment/scripts/tabby.sh /usr/bin
COPY deployment/scripts/tabby.sh ./.bin/
COPY deployment/scripts/triton.sh ./.bin/
# Setup file permissions
USER root
RUN mkdir -p /var/lib/vector
RUN chown 1000 /var/lib/vector
RUN mkdir -p $HOME/.cache
RUN chown 1000 $HOME/.cache
USER 1000
CMD ["tabby.sh"]

View File

@ -2,4 +2,4 @@ MODEL_NAME=TabbyML/J-350M
# Volumes
DATA_VOLUME="../data:/data"
HF_VOLUME="../data/hf_cache:/root/.cache/huggingface"
HF_VOLUME="../data/hf_cache:/home/app/.cache/huggingface"

View File

@ -0,0 +1,20 @@
version: '3.3'
services:
tabby:
image: tabbyml/tabby
container_name: tabby
environment:
MODEL_NAME: ${MODEL_NAME}
ports:
- "5000:5000"
- "8080:8080"
- "8501:8501"
volumes:
- ${DATA_VOLUME}
- ${HF_VOLUME}
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:5000"]
interval: 2s
timeout: 2s
start_period: 1200s

View File

@ -4,9 +4,11 @@ services:
tabby:
image: tabbyml/tabby
container_name: tabby
shm_size: 1gb
environment:
MODEL_NAME: ${MODEL_NAME}
MODEL_BACKEND: triton
MODEL_REPLICA: ${MODEL_REPLICA:-1}
ports:
- "5000:5000"
- "8080:8080"
@ -19,15 +21,6 @@ services:
interval: 2s
timeout: 2s
start_period: 1200s
triton:
image: tabbyml/fastertransformer_backend
container_name: tabby-triton
command: triton.sh
shm_size: 1gb
volumes:
- ./scripts/triton.sh:/usr/bin/triton.sh:ro
- ${HF_VOLUME}
deploy:
resources:
reservations:
@ -35,9 +28,3 @@ services:
- driver: nvidia
count: all
capabilities: [gpu]
environment:
MODEL_NAME: ${MODEL_NAME}
MODEL_REPLICA: ${MODEL_REPLICA:-1}
depends_on:
tabby:
condition: service_healthy

View File

@ -35,9 +35,20 @@ python -m tabby.tools.download_models --repo_id=$MODEL_NAME
supervisor() {
if [[ "$MODEL_BACKEND" == "triton" ]]
then
local TRITON_SERVER=$(cat <<EOF
[program:triton]
command=triton.sh
EOF
)
fi
supervisord -n -c <(cat <<EOF
[supervisord]
logfile = /var/log/supervisord.log
logfile = ${LOGS_DIR}/supervisord.log
loglevel = debug
[program:server]
@ -54,6 +65,8 @@ command=dagu scheduler
[program:dagu_server]
command=dagu server --host 0.0.0.0 --port 8080
$TRITON_SERVER
EOF
)
}

View File

@ -23,5 +23,5 @@ sed -i "s/count: [[:digit:]]\+/count: $MODEL_REPLICA/g" $MODEL_DIR/triton/faster
# Start triton server.
mpirun -n 1 \
--allow-run-as-root /opt/tritonserver/bin/tritonserver \
/opt/tritonserver/bin/tritonserver \
--model-repository=$MODEL_DIR/triton

View File

@ -1,14 +1,11 @@
DEFAULT := docker-compose -f ../deployment/docker-compose.yml
DEFAULT :=
UP_FLAGS := up --remove-orphans
build:
$(DEFAULT) -f docker-compose.dev.yml build
docker-compose -f ../deployment/docker-compose.yml -f docker-compose.dev.yml build
dev:
$(DEFAULT) -f docker-compose.dev.yml $(UP_FLAGS)
docker-compose -f ../deployment/docker-compose.yml -f docker-compose.dev.yml $(UP_FLAGS)
dev-python:
$(DEFAULT) -f docker-compose.dev.yml -f docker-compose.python.yml $(UP_FLAGS)
clean:
$(DEFAULT) -f docker-compose.dev.yml down
docker-compose -f ../deployment/docker-compose.python.yml -f docker-compose.dev.yml $(UP_FLAGS)

View File

@ -5,11 +5,13 @@ services:
build:
context: ..
args:
PYPI_INDEX_URL: https://pypi.tuna.tsinghua.edu.cn/simple
PYPI_INDEX_URL: https://mirrors.aliyun.com/pypi/simple
PYTHON_BUILD_MIRROR_URL: https://repo.huaweicloud.com/python
environment:
UVICORN_RELOAD: true
VECTOR_WATCH_CONFIG: true
STREAMLIT_RUN_ON_SAVE: true
volumes:
- ../:/app
- ../deployment/scripts/tabby.sh:/usr/bin/tabby.sh:ro
- ../tabby:/home/app/tabby
- ../deployment/scripts/tabby.sh:/home/app/.bin/tabby.sh:ro
- ../deployment/scripts/triton.sh:/home/app/.bin/triton.sh:ro

View File

@ -1,11 +0,0 @@
version: '3.3'
services:
tabby:
image: tabbyml/tabby
environment:
MODEL_BACKEND: python
triton:
profiles:
- donotstart

View File

@ -3,7 +3,7 @@ from components import monaco
from utils.service_info import ServiceInfo
SERVICES = [
ServiceInfo(label="triton", health_url="http://triton:8002/metrics"),
ServiceInfo(label="triton", health_url="http://localhost:8002/metrics"),
ServiceInfo(label="vector", health_url="http://localhost:8686/health"),
ServiceInfo(
label="dagu", health_url="http://localhost:8080", url="http://localhost:8080"

View File

@ -35,7 +35,7 @@ MODEL_BACKEND = os.environ.get("MODEL_BACKEND", "python")
if MODEL_BACKEND == "triton":
model_backend = TritonService(
tokenizer_name=MODEL_NAME,
host=os.environ.get("TRITON_HOST", "triton"),
host=os.environ.get("TRITON_HOST", "localhost"),
port=os.environ.get("TRITON_PORT", "8001"),
)
else: