feat: support single container (#46)
* docs: update readme * fix: do not exclude peft * Free disk space before docker building * fix: fix docker-compose * fix: dockercompose user to 1000 * fix dockerfile * fix: cachedir ownershipadd-more-languages
parent
0d89a1221a
commit
db77d7f267
|
|
@ -21,6 +21,20 @@ jobs:
|
||||||
id-token: write
|
id-token: write
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
|
- name: Free Disk Space (Ubuntu)
|
||||||
|
uses: jlumbroso/free-disk-space@main
|
||||||
|
with:
|
||||||
|
# this might remove tools that are actually needed,
|
||||||
|
# if set to "true" but frees about 6 GB
|
||||||
|
tool-cache: false
|
||||||
|
# all of these default to true, but feel free to set to
|
||||||
|
# "false" if necessary for your workflow
|
||||||
|
android: true
|
||||||
|
dotnet: true
|
||||||
|
haskell: true
|
||||||
|
large-packages: true
|
||||||
|
swap-storage: true
|
||||||
|
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
|
|
|
||||||
81
Dockerfile
81
Dockerfile
|
|
@ -1,47 +1,68 @@
|
||||||
# syntax = docker/dockerfile:1.5
|
# syntax = docker/dockerfile:1.5
|
||||||
|
|
||||||
FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime
|
FROM tabbyml/fastertransformer_backend
|
||||||
|
|
||||||
# Install utilities
|
RUN apt update && apt -y install build-essential libssl-dev zlib1g-dev \
|
||||||
|
libbz2-dev libreadline-dev libsqlite3-dev curl \
|
||||||
|
libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev
|
||||||
|
|
||||||
|
RUN mkdir -p /home/app
|
||||||
|
RUN chown 1000 /home/app
|
||||||
|
|
||||||
|
USER 1000
|
||||||
|
WORKDIR /home/app
|
||||||
|
ENV HOME /home/app
|
||||||
|
|
||||||
|
# Setup pyenv
|
||||||
|
RUN git clone --depth=1 https://github.com/pyenv/pyenv.git .pyenv
|
||||||
|
ENV PATH="$HOME/.pyenv/shims:/home/app/.pyenv/bin:$PATH"
|
||||||
|
|
||||||
|
ARG PYTHON_VERSION=3.10.10
|
||||||
|
RUN pyenv install ${PYTHON_VERSION}
|
||||||
|
RUN pyenv global ${PYTHON_VERSION}
|
||||||
|
|
||||||
|
ARG PYPI_INDEX_URL=https://pypi.org/simple
|
||||||
|
ARG POETRY_VERSION=1.4.0
|
||||||
|
|
||||||
|
RUN --mount=type=cache,target=$HOME/.cache pip install -i $PYPI_INDEX_URL "poetry==$POETRY_VERSION"
|
||||||
|
|
||||||
|
# vector
|
||||||
RUN <<EOF
|
RUN <<EOF
|
||||||
apt-get -y update
|
curl --proto '=https' --tlsv1.2 -sSf https://sh.vector.dev | bash -s -- -y
|
||||||
apt-get -y install git curl
|
|
||||||
EOF
|
EOF
|
||||||
|
ENV PATH "$HOME/.vector/bin:$PATH"
|
||||||
|
|
||||||
|
# Supervisord
|
||||||
|
RUN --mount=type=cache,target=$HOME/.cache pip install -i $PYPI_INDEX_URL supervisor
|
||||||
|
|
||||||
|
RUN mkdir -p ~/.bin
|
||||||
|
ENV PATH "$HOME/.bin:$PATH"
|
||||||
|
|
||||||
# Install dagu
|
# Install dagu
|
||||||
RUN <<EOF
|
RUN <<EOF
|
||||||
curl -L https://github.com/yohamta/dagu/releases/download/v1.10.2/dagu_1.10.2_Linux_x86_64.tar.gz > dagu.tar.gz
|
curl -L https://github.com/yohamta/dagu/releases/download/v1.10.2/dagu_1.10.2_Linux_x86_64.tar.gz > dagu.tar.gz
|
||||||
tar zxvf dagu.tar.gz
|
tar zxvf dagu.tar.gz
|
||||||
mv dagu /usr/local/bin
|
mv dagu ~/.bin/
|
||||||
rm dagu.tar.gz LICENSE.md README.md
|
rm dagu.tar.gz LICENSE.md README.md
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
ARG PYPI_INDEX_URL=https://pypi.org/simple
|
# Install tabby dependencies
|
||||||
ARG POETRY_VERSION=1.4.0
|
COPY poetry.lock pyproject.toml ./
|
||||||
|
RUN poetry export --without-hashes > requirements.txt
|
||||||
|
RUN --mount=type=cache,target=$HOME/.cache pip install -i $PYPI_INDEX_URL --no-dependencies -r requirements.txt
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
RUN --mount=type=cache,target=/root/.cache pip install -i $PYPI_INDEX_URL "poetry==$POETRY_VERSION"
|
|
||||||
|
|
||||||
COPY poetry.lock pyproject.toml /app/
|
|
||||||
RUN poetry export --without-hashes -o requirements.txt
|
|
||||||
|
|
||||||
RUN --mount=type=cache,target=/root/.cache pip install -i $PYPI_INDEX_URL --extra-index-url https://pypi.org/simple --no-dependencies -r requirements.txt
|
|
||||||
|
|
||||||
## FIX bitandsands
|
|
||||||
ENV LD_LIBRARY_PATH "$LD_LIBRARY_PATH:/opt/conda/lib"
|
|
||||||
RUN ln -s /opt/conda/lib/libcudart.so.11.7.99 /opt/conda/lib/libcudart.so
|
|
||||||
|
|
||||||
# vector
|
|
||||||
RUN <<EOF
|
|
||||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.vector.dev | bash -s -- -y
|
|
||||||
mkdir -p /var/lib/vector
|
|
||||||
EOF
|
|
||||||
ENV PATH "$PATH:/root/.vector/bin"
|
|
||||||
|
|
||||||
# Supervisord
|
|
||||||
RUN --mount=type=cache,target=/root/.cache pip install -i $PYPI_INDEX_URL --extra-index-url https://pypi.org/simple supervisor
|
|
||||||
|
|
||||||
COPY tabby ./tabby
|
COPY tabby ./tabby
|
||||||
COPY deployment/scripts/tabby.sh /usr/bin
|
COPY deployment/scripts/tabby.sh ./.bin/
|
||||||
|
COPY deployment/scripts/triton.sh ./.bin/
|
||||||
|
|
||||||
|
# Setup file permissions
|
||||||
|
USER root
|
||||||
|
RUN mkdir -p /var/lib/vector
|
||||||
|
RUN chown 1000 /var/lib/vector
|
||||||
|
|
||||||
|
RUN mkdir -p $HOME/.cache
|
||||||
|
RUN chown 1000 $HOME/.cache
|
||||||
|
|
||||||
|
USER 1000
|
||||||
CMD ["tabby.sh"]
|
CMD ["tabby.sh"]
|
||||||
|
|
|
||||||
|
|
@ -2,4 +2,4 @@ MODEL_NAME=TabbyML/J-350M
|
||||||
|
|
||||||
# Volumes
|
# Volumes
|
||||||
DATA_VOLUME="../data:/data"
|
DATA_VOLUME="../data:/data"
|
||||||
HF_VOLUME="../data/hf_cache:/root/.cache/huggingface"
|
HF_VOLUME="../data/hf_cache:/home/app/.cache/huggingface"
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,20 @@
|
||||||
|
version: '3.3'
|
||||||
|
|
||||||
|
services:
|
||||||
|
tabby:
|
||||||
|
image: tabbyml/tabby
|
||||||
|
container_name: tabby
|
||||||
|
environment:
|
||||||
|
MODEL_NAME: ${MODEL_NAME}
|
||||||
|
ports:
|
||||||
|
- "5000:5000"
|
||||||
|
- "8080:8080"
|
||||||
|
- "8501:8501"
|
||||||
|
volumes:
|
||||||
|
- ${DATA_VOLUME}
|
||||||
|
- ${HF_VOLUME}
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:5000"]
|
||||||
|
interval: 2s
|
||||||
|
timeout: 2s
|
||||||
|
start_period: 1200s
|
||||||
|
|
@ -4,9 +4,11 @@ services:
|
||||||
tabby:
|
tabby:
|
||||||
image: tabbyml/tabby
|
image: tabbyml/tabby
|
||||||
container_name: tabby
|
container_name: tabby
|
||||||
|
shm_size: 1gb
|
||||||
environment:
|
environment:
|
||||||
MODEL_NAME: ${MODEL_NAME}
|
MODEL_NAME: ${MODEL_NAME}
|
||||||
MODEL_BACKEND: triton
|
MODEL_BACKEND: triton
|
||||||
|
MODEL_REPLICA: ${MODEL_REPLICA:-1}
|
||||||
ports:
|
ports:
|
||||||
- "5000:5000"
|
- "5000:5000"
|
||||||
- "8080:8080"
|
- "8080:8080"
|
||||||
|
|
@ -19,15 +21,6 @@ services:
|
||||||
interval: 2s
|
interval: 2s
|
||||||
timeout: 2s
|
timeout: 2s
|
||||||
start_period: 1200s
|
start_period: 1200s
|
||||||
|
|
||||||
triton:
|
|
||||||
image: tabbyml/fastertransformer_backend
|
|
||||||
container_name: tabby-triton
|
|
||||||
command: triton.sh
|
|
||||||
shm_size: 1gb
|
|
||||||
volumes:
|
|
||||||
- ./scripts/triton.sh:/usr/bin/triton.sh:ro
|
|
||||||
- ${HF_VOLUME}
|
|
||||||
deploy:
|
deploy:
|
||||||
resources:
|
resources:
|
||||||
reservations:
|
reservations:
|
||||||
|
|
@ -35,9 +28,3 @@ services:
|
||||||
- driver: nvidia
|
- driver: nvidia
|
||||||
count: all
|
count: all
|
||||||
capabilities: [gpu]
|
capabilities: [gpu]
|
||||||
environment:
|
|
||||||
MODEL_NAME: ${MODEL_NAME}
|
|
||||||
MODEL_REPLICA: ${MODEL_REPLICA:-1}
|
|
||||||
depends_on:
|
|
||||||
tabby:
|
|
||||||
condition: service_healthy
|
|
||||||
|
|
|
||||||
|
|
@ -35,9 +35,20 @@ python -m tabby.tools.download_models --repo_id=$MODEL_NAME
|
||||||
|
|
||||||
|
|
||||||
supervisor() {
|
supervisor() {
|
||||||
|
if [[ "$MODEL_BACKEND" == "triton" ]]
|
||||||
|
then
|
||||||
|
|
||||||
|
local TRITON_SERVER=$(cat <<EOF
|
||||||
|
[program:triton]
|
||||||
|
command=triton.sh
|
||||||
|
EOF
|
||||||
|
)
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
supervisord -n -c <(cat <<EOF
|
supervisord -n -c <(cat <<EOF
|
||||||
[supervisord]
|
[supervisord]
|
||||||
logfile = /var/log/supervisord.log
|
logfile = ${LOGS_DIR}/supervisord.log
|
||||||
loglevel = debug
|
loglevel = debug
|
||||||
|
|
||||||
[program:server]
|
[program:server]
|
||||||
|
|
@ -54,6 +65,8 @@ command=dagu scheduler
|
||||||
|
|
||||||
[program:dagu_server]
|
[program:dagu_server]
|
||||||
command=dagu server --host 0.0.0.0 --port 8080
|
command=dagu server --host 0.0.0.0 --port 8080
|
||||||
|
|
||||||
|
$TRITON_SERVER
|
||||||
EOF
|
EOF
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -23,5 +23,5 @@ sed -i "s/count: [[:digit:]]\+/count: $MODEL_REPLICA/g" $MODEL_DIR/triton/faster
|
||||||
|
|
||||||
# Start triton server.
|
# Start triton server.
|
||||||
mpirun -n 1 \
|
mpirun -n 1 \
|
||||||
--allow-run-as-root /opt/tritonserver/bin/tritonserver \
|
/opt/tritonserver/bin/tritonserver \
|
||||||
--model-repository=$MODEL_DIR/triton
|
--model-repository=$MODEL_DIR/triton
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,11 @@
|
||||||
DEFAULT := docker-compose -f ../deployment/docker-compose.yml
|
DEFAULT :=
|
||||||
UP_FLAGS := up --remove-orphans
|
UP_FLAGS := up --remove-orphans
|
||||||
|
|
||||||
build:
|
build:
|
||||||
$(DEFAULT) -f docker-compose.dev.yml build
|
docker-compose -f ../deployment/docker-compose.yml -f docker-compose.dev.yml build
|
||||||
|
|
||||||
dev:
|
dev:
|
||||||
$(DEFAULT) -f docker-compose.dev.yml $(UP_FLAGS)
|
docker-compose -f ../deployment/docker-compose.yml -f docker-compose.dev.yml $(UP_FLAGS)
|
||||||
|
|
||||||
dev-python:
|
dev-python:
|
||||||
$(DEFAULT) -f docker-compose.dev.yml -f docker-compose.python.yml $(UP_FLAGS)
|
docker-compose -f ../deployment/docker-compose.python.yml -f docker-compose.dev.yml $(UP_FLAGS)
|
||||||
|
|
||||||
clean:
|
|
||||||
$(DEFAULT) -f docker-compose.dev.yml down
|
|
||||||
|
|
|
||||||
|
|
@ -5,11 +5,13 @@ services:
|
||||||
build:
|
build:
|
||||||
context: ..
|
context: ..
|
||||||
args:
|
args:
|
||||||
PYPI_INDEX_URL: https://pypi.tuna.tsinghua.edu.cn/simple
|
PYPI_INDEX_URL: https://mirrors.aliyun.com/pypi/simple
|
||||||
|
PYTHON_BUILD_MIRROR_URL: https://repo.huaweicloud.com/python
|
||||||
environment:
|
environment:
|
||||||
UVICORN_RELOAD: true
|
UVICORN_RELOAD: true
|
||||||
VECTOR_WATCH_CONFIG: true
|
VECTOR_WATCH_CONFIG: true
|
||||||
STREAMLIT_RUN_ON_SAVE: true
|
STREAMLIT_RUN_ON_SAVE: true
|
||||||
volumes:
|
volumes:
|
||||||
- ../:/app
|
- ../tabby:/home/app/tabby
|
||||||
- ../deployment/scripts/tabby.sh:/usr/bin/tabby.sh:ro
|
- ../deployment/scripts/tabby.sh:/home/app/.bin/tabby.sh:ro
|
||||||
|
- ../deployment/scripts/triton.sh:/home/app/.bin/triton.sh:ro
|
||||||
|
|
|
||||||
|
|
@ -1,11 +0,0 @@
|
||||||
version: '3.3'
|
|
||||||
|
|
||||||
services:
|
|
||||||
tabby:
|
|
||||||
image: tabbyml/tabby
|
|
||||||
environment:
|
|
||||||
MODEL_BACKEND: python
|
|
||||||
|
|
||||||
triton:
|
|
||||||
profiles:
|
|
||||||
- donotstart
|
|
||||||
|
|
@ -3,7 +3,7 @@ from components import monaco
|
||||||
from utils.service_info import ServiceInfo
|
from utils.service_info import ServiceInfo
|
||||||
|
|
||||||
SERVICES = [
|
SERVICES = [
|
||||||
ServiceInfo(label="triton", health_url="http://triton:8002/metrics"),
|
ServiceInfo(label="triton", health_url="http://localhost:8002/metrics"),
|
||||||
ServiceInfo(label="vector", health_url="http://localhost:8686/health"),
|
ServiceInfo(label="vector", health_url="http://localhost:8686/health"),
|
||||||
ServiceInfo(
|
ServiceInfo(
|
||||||
label="dagu", health_url="http://localhost:8080", url="http://localhost:8080"
|
label="dagu", health_url="http://localhost:8080", url="http://localhost:8080"
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,7 @@ MODEL_BACKEND = os.environ.get("MODEL_BACKEND", "python")
|
||||||
if MODEL_BACKEND == "triton":
|
if MODEL_BACKEND == "triton":
|
||||||
model_backend = TritonService(
|
model_backend = TritonService(
|
||||||
tokenizer_name=MODEL_NAME,
|
tokenizer_name=MODEL_NAME,
|
||||||
host=os.environ.get("TRITON_HOST", "triton"),
|
host=os.environ.get("TRITON_HOST", "localhost"),
|
||||||
port=os.environ.get("TRITON_PORT", "8001"),
|
port=os.environ.get("TRITON_PORT", "8001"),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue