feat: support single container (#46)
* docs: update readme * fix: do not exclude peft * Free disk space before docker building * fix: fix docker-compose * fix: dockercompose user to 1000 * fix dockerfile * fix: cachedir ownershipadd-more-languages
parent
0d89a1221a
commit
db77d7f267
|
|
@ -21,6 +21,20 @@ jobs:
|
|||
id-token: write
|
||||
|
||||
steps:
|
||||
- name: Free Disk Space (Ubuntu)
|
||||
uses: jlumbroso/free-disk-space@main
|
||||
with:
|
||||
# this might remove tools that are actually needed,
|
||||
# if set to "true" but frees about 6 GB
|
||||
tool-cache: false
|
||||
# all of these default to true, but feel free to set to
|
||||
# "false" if necessary for your workflow
|
||||
android: true
|
||||
dotnet: true
|
||||
haskell: true
|
||||
large-packages: true
|
||||
swap-storage: true
|
||||
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
|
|
|
|||
81
Dockerfile
81
Dockerfile
|
|
@ -1,47 +1,68 @@
|
|||
# syntax = docker/dockerfile:1.5
|
||||
|
||||
FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime
|
||||
FROM tabbyml/fastertransformer_backend
|
||||
|
||||
# Install utilities
|
||||
RUN apt update && apt -y install build-essential libssl-dev zlib1g-dev \
|
||||
libbz2-dev libreadline-dev libsqlite3-dev curl \
|
||||
libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev
|
||||
|
||||
RUN mkdir -p /home/app
|
||||
RUN chown 1000 /home/app
|
||||
|
||||
USER 1000
|
||||
WORKDIR /home/app
|
||||
ENV HOME /home/app
|
||||
|
||||
# Setup pyenv
|
||||
RUN git clone --depth=1 https://github.com/pyenv/pyenv.git .pyenv
|
||||
ENV PATH="$HOME/.pyenv/shims:/home/app/.pyenv/bin:$PATH"
|
||||
|
||||
ARG PYTHON_VERSION=3.10.10
|
||||
RUN pyenv install ${PYTHON_VERSION}
|
||||
RUN pyenv global ${PYTHON_VERSION}
|
||||
|
||||
ARG PYPI_INDEX_URL=https://pypi.org/simple
|
||||
ARG POETRY_VERSION=1.4.0
|
||||
|
||||
RUN --mount=type=cache,target=$HOME/.cache pip install -i $PYPI_INDEX_URL "poetry==$POETRY_VERSION"
|
||||
|
||||
# vector
|
||||
RUN <<EOF
|
||||
apt-get -y update
|
||||
apt-get -y install git curl
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.vector.dev | bash -s -- -y
|
||||
EOF
|
||||
ENV PATH "$HOME/.vector/bin:$PATH"
|
||||
|
||||
# Supervisord
|
||||
RUN --mount=type=cache,target=$HOME/.cache pip install -i $PYPI_INDEX_URL supervisor
|
||||
|
||||
RUN mkdir -p ~/.bin
|
||||
ENV PATH "$HOME/.bin:$PATH"
|
||||
|
||||
# Install dagu
|
||||
RUN <<EOF
|
||||
curl -L https://github.com/yohamta/dagu/releases/download/v1.10.2/dagu_1.10.2_Linux_x86_64.tar.gz > dagu.tar.gz
|
||||
tar zxvf dagu.tar.gz
|
||||
mv dagu /usr/local/bin
|
||||
mv dagu ~/.bin/
|
||||
rm dagu.tar.gz LICENSE.md README.md
|
||||
EOF
|
||||
|
||||
ARG PYPI_INDEX_URL=https://pypi.org/simple
|
||||
ARG POETRY_VERSION=1.4.0
|
||||
# Install tabby dependencies
|
||||
COPY poetry.lock pyproject.toml ./
|
||||
RUN poetry export --without-hashes > requirements.txt
|
||||
RUN --mount=type=cache,target=$HOME/.cache pip install -i $PYPI_INDEX_URL --no-dependencies -r requirements.txt
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache pip install -i $PYPI_INDEX_URL "poetry==$POETRY_VERSION"
|
||||
|
||||
COPY poetry.lock pyproject.toml /app/
|
||||
RUN poetry export --without-hashes -o requirements.txt
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache pip install -i $PYPI_INDEX_URL --extra-index-url https://pypi.org/simple --no-dependencies -r requirements.txt
|
||||
|
||||
## FIX bitandsands
|
||||
ENV LD_LIBRARY_PATH "$LD_LIBRARY_PATH:/opt/conda/lib"
|
||||
RUN ln -s /opt/conda/lib/libcudart.so.11.7.99 /opt/conda/lib/libcudart.so
|
||||
|
||||
# vector
|
||||
RUN <<EOF
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.vector.dev | bash -s -- -y
|
||||
mkdir -p /var/lib/vector
|
||||
EOF
|
||||
ENV PATH "$PATH:/root/.vector/bin"
|
||||
|
||||
# Supervisord
|
||||
RUN --mount=type=cache,target=/root/.cache pip install -i $PYPI_INDEX_URL --extra-index-url https://pypi.org/simple supervisor
|
||||
|
||||
COPY tabby ./tabby
|
||||
COPY deployment/scripts/tabby.sh /usr/bin
|
||||
COPY deployment/scripts/tabby.sh ./.bin/
|
||||
COPY deployment/scripts/triton.sh ./.bin/
|
||||
|
||||
# Setup file permissions
|
||||
USER root
|
||||
RUN mkdir -p /var/lib/vector
|
||||
RUN chown 1000 /var/lib/vector
|
||||
|
||||
RUN mkdir -p $HOME/.cache
|
||||
RUN chown 1000 $HOME/.cache
|
||||
|
||||
USER 1000
|
||||
CMD ["tabby.sh"]
|
||||
|
|
|
|||
|
|
@ -2,4 +2,4 @@ MODEL_NAME=TabbyML/J-350M
|
|||
|
||||
# Volumes
|
||||
DATA_VOLUME="../data:/data"
|
||||
HF_VOLUME="../data/hf_cache:/root/.cache/huggingface"
|
||||
HF_VOLUME="../data/hf_cache:/home/app/.cache/huggingface"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,20 @@
|
|||
version: '3.3'
|
||||
|
||||
services:
|
||||
tabby:
|
||||
image: tabbyml/tabby
|
||||
container_name: tabby
|
||||
environment:
|
||||
MODEL_NAME: ${MODEL_NAME}
|
||||
ports:
|
||||
- "5000:5000"
|
||||
- "8080:8080"
|
||||
- "8501:8501"
|
||||
volumes:
|
||||
- ${DATA_VOLUME}
|
||||
- ${HF_VOLUME}
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:5000"]
|
||||
interval: 2s
|
||||
timeout: 2s
|
||||
start_period: 1200s
|
||||
|
|
@ -4,9 +4,11 @@ services:
|
|||
tabby:
|
||||
image: tabbyml/tabby
|
||||
container_name: tabby
|
||||
shm_size: 1gb
|
||||
environment:
|
||||
MODEL_NAME: ${MODEL_NAME}
|
||||
MODEL_BACKEND: triton
|
||||
MODEL_REPLICA: ${MODEL_REPLICA:-1}
|
||||
ports:
|
||||
- "5000:5000"
|
||||
- "8080:8080"
|
||||
|
|
@ -19,15 +21,6 @@ services:
|
|||
interval: 2s
|
||||
timeout: 2s
|
||||
start_period: 1200s
|
||||
|
||||
triton:
|
||||
image: tabbyml/fastertransformer_backend
|
||||
container_name: tabby-triton
|
||||
command: triton.sh
|
||||
shm_size: 1gb
|
||||
volumes:
|
||||
- ./scripts/triton.sh:/usr/bin/triton.sh:ro
|
||||
- ${HF_VOLUME}
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
|
|
@ -35,9 +28,3 @@ services:
|
|||
- driver: nvidia
|
||||
count: all
|
||||
capabilities: [gpu]
|
||||
environment:
|
||||
MODEL_NAME: ${MODEL_NAME}
|
||||
MODEL_REPLICA: ${MODEL_REPLICA:-1}
|
||||
depends_on:
|
||||
tabby:
|
||||
condition: service_healthy
|
||||
|
|
|
|||
|
|
@ -35,9 +35,20 @@ python -m tabby.tools.download_models --repo_id=$MODEL_NAME
|
|||
|
||||
|
||||
supervisor() {
|
||||
if [[ "$MODEL_BACKEND" == "triton" ]]
|
||||
then
|
||||
|
||||
local TRITON_SERVER=$(cat <<EOF
|
||||
[program:triton]
|
||||
command=triton.sh
|
||||
EOF
|
||||
)
|
||||
|
||||
fi
|
||||
|
||||
supervisord -n -c <(cat <<EOF
|
||||
[supervisord]
|
||||
logfile = /var/log/supervisord.log
|
||||
logfile = ${LOGS_DIR}/supervisord.log
|
||||
loglevel = debug
|
||||
|
||||
[program:server]
|
||||
|
|
@ -54,6 +65,8 @@ command=dagu scheduler
|
|||
|
||||
[program:dagu_server]
|
||||
command=dagu server --host 0.0.0.0 --port 8080
|
||||
|
||||
$TRITON_SERVER
|
||||
EOF
|
||||
)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,5 +23,5 @@ sed -i "s/count: [[:digit:]]\+/count: $MODEL_REPLICA/g" $MODEL_DIR/triton/faster
|
|||
|
||||
# Start triton server.
|
||||
mpirun -n 1 \
|
||||
--allow-run-as-root /opt/tritonserver/bin/tritonserver \
|
||||
/opt/tritonserver/bin/tritonserver \
|
||||
--model-repository=$MODEL_DIR/triton
|
||||
|
|
|
|||
|
|
@ -1,14 +1,11 @@
|
|||
DEFAULT := docker-compose -f ../deployment/docker-compose.yml
|
||||
DEFAULT :=
|
||||
UP_FLAGS := up --remove-orphans
|
||||
|
||||
build:
|
||||
$(DEFAULT) -f docker-compose.dev.yml build
|
||||
docker-compose -f ../deployment/docker-compose.yml -f docker-compose.dev.yml build
|
||||
|
||||
dev:
|
||||
$(DEFAULT) -f docker-compose.dev.yml $(UP_FLAGS)
|
||||
docker-compose -f ../deployment/docker-compose.yml -f docker-compose.dev.yml $(UP_FLAGS)
|
||||
|
||||
dev-python:
|
||||
$(DEFAULT) -f docker-compose.dev.yml -f docker-compose.python.yml $(UP_FLAGS)
|
||||
|
||||
clean:
|
||||
$(DEFAULT) -f docker-compose.dev.yml down
|
||||
docker-compose -f ../deployment/docker-compose.python.yml -f docker-compose.dev.yml $(UP_FLAGS)
|
||||
|
|
|
|||
|
|
@ -5,11 +5,13 @@ services:
|
|||
build:
|
||||
context: ..
|
||||
args:
|
||||
PYPI_INDEX_URL: https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
PYPI_INDEX_URL: https://mirrors.aliyun.com/pypi/simple
|
||||
PYTHON_BUILD_MIRROR_URL: https://repo.huaweicloud.com/python
|
||||
environment:
|
||||
UVICORN_RELOAD: true
|
||||
VECTOR_WATCH_CONFIG: true
|
||||
STREAMLIT_RUN_ON_SAVE: true
|
||||
volumes:
|
||||
- ../:/app
|
||||
- ../deployment/scripts/tabby.sh:/usr/bin/tabby.sh:ro
|
||||
- ../tabby:/home/app/tabby
|
||||
- ../deployment/scripts/tabby.sh:/home/app/.bin/tabby.sh:ro
|
||||
- ../deployment/scripts/triton.sh:/home/app/.bin/triton.sh:ro
|
||||
|
|
|
|||
|
|
@ -1,11 +0,0 @@
|
|||
version: '3.3'
|
||||
|
||||
services:
|
||||
tabby:
|
||||
image: tabbyml/tabby
|
||||
environment:
|
||||
MODEL_BACKEND: python
|
||||
|
||||
triton:
|
||||
profiles:
|
||||
- donotstart
|
||||
|
|
@ -3,7 +3,7 @@ from components import monaco
|
|||
from utils.service_info import ServiceInfo
|
||||
|
||||
SERVICES = [
|
||||
ServiceInfo(label="triton", health_url="http://triton:8002/metrics"),
|
||||
ServiceInfo(label="triton", health_url="http://localhost:8002/metrics"),
|
||||
ServiceInfo(label="vector", health_url="http://localhost:8686/health"),
|
||||
ServiceInfo(
|
||||
label="dagu", health_url="http://localhost:8080", url="http://localhost:8080"
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ MODEL_BACKEND = os.environ.get("MODEL_BACKEND", "python")
|
|||
if MODEL_BACKEND == "triton":
|
||||
model_backend = TritonService(
|
||||
tokenizer_name=MODEL_NAME,
|
||||
host=os.environ.get("TRITON_HOST", "triton"),
|
||||
host=os.environ.get("TRITON_HOST", "localhost"),
|
||||
port=os.environ.get("TRITON_PORT", "8001"),
|
||||
)
|
||||
else:
|
||||
|
|
|
|||
Loading…
Reference in New Issue