feat: support single container (#46)

* docs: update readme * fix: do not exclude peft * Free disk space before docker building * fix: fix docker-compose * fix: dockercompose user to 1000 * fix dockerfile * fix: cachedir ownership
2023-04-05 20:19:43 +08:00 · 2023-04-05 20:19:43 +08:00 · db77d7f267
parent 0d89a1221a
commit db77d7f267
12 changed files with 114 additions and 71 deletions
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@ -21,6 +21,20 @@ jobs:
      id-token: write
    steps:
      - name: Free Disk Space (Ubuntu)
        uses: jlumbroso/free-disk-space@main
        with:
          # this might remove tools that are actually needed,
          # if set to "true" but frees about 6 GB
          tool-cache: false
          # all of these default to true, but feel free to set to
          # "false" if necessary for your workflow
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          swap-storage: true
      - name: Checkout repository
        uses: actions/checkout@v3
--- a/81
+++ b/81
@ -1,47 +1,68 @@
 # syntax = docker/dockerfile:1.5
-FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime
+FROM tabbyml/fastertransformer_backend
-# Install utilities
+RUN apt update && apt -y install build-essential libssl-dev zlib1g-dev \
  libbz2-dev libreadline-dev libsqlite3-dev curl \
  libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev
 RUN mkdir -p /home/app
 RUN chown 1000 /home/app
 USER 1000
 WORKDIR /home/app
 ENV HOME /home/app
 # Setup pyenv
 RUN git clone --depth=1 https://github.com/pyenv/pyenv.git .pyenv
 ENV PATH="$HOME/.pyenv/shims:/home/app/.pyenv/bin:$PATH"
 ARG PYTHON_VERSION=3.10.10
 RUN pyenv install ${PYTHON_VERSION}
 RUN pyenv global ${PYTHON_VERSION}
 ARG PYPI_INDEX_URL=https://pypi.org/simple
 ARG POETRY_VERSION=1.4.0
 RUN --mount=type=cache,target=$HOME/.cache pip install -i $PYPI_INDEX_URL "poetry==$POETRY_VERSION"
 # vector
 RUN <<EOF
-  apt-get -y update
+curl --proto '=https' --tlsv1.2 -sSf https://sh.vector.dev | bash -s -- -y
  apt-get -y install git curl
 EOF
 ENV PATH "$HOME/.vector/bin:$PATH"
 # Supervisord
 RUN --mount=type=cache,target=$HOME/.cache pip install -i $PYPI_INDEX_URL supervisor
 RUN mkdir -p ~/.bin
 ENV PATH "$HOME/.bin:$PATH"
 # Install dagu
 RUN <<EOF
  curl -L https://github.com/yohamta/dagu/releases/download/v1.10.2/dagu_1.10.2_Linux_x86_64.tar.gz > dagu.tar.gz
  tar zxvf dagu.tar.gz
-  mv dagu /usr/local/bin
+  mv dagu ~/.bin/
  rm dagu.tar.gz LICENSE.md README.md
 EOF
-ARG PYPI_INDEX_URL=https://pypi.org/simple
+# Install tabby dependencies
-ARG POETRY_VERSION=1.4.0
+COPY poetry.lock pyproject.toml ./
 RUN poetry export --without-hashes > requirements.txt
 RUN --mount=type=cache,target=$HOME/.cache pip install -i $PYPI_INDEX_URL --no-dependencies -r requirements.txt
 WORKDIR /app
 RUN --mount=type=cache,target=/root/.cache pip install -i $PYPI_INDEX_URL "poetry==$POETRY_VERSION"
 COPY poetry.lock pyproject.toml /app/
 RUN poetry export --without-hashes -o requirements.txt
 RUN --mount=type=cache,target=/root/.cache pip install -i $PYPI_INDEX_URL --extra-index-url https://pypi.org/simple --no-dependencies -r requirements.txt
 ## FIX bitandsands
 ENV LD_LIBRARY_PATH "$LD_LIBRARY_PATH:/opt/conda/lib"
 RUN ln -s /opt/conda/lib/libcudart.so.11.7.99 /opt/conda/lib/libcudart.so
 # vector
 RUN <<EOF
 curl --proto '=https' --tlsv1.2 -sSf https://sh.vector.dev | bash -s -- -y
 mkdir -p /var/lib/vector
 EOF
 ENV PATH "$PATH:/root/.vector/bin"
 # Supervisord
 RUN --mount=type=cache,target=/root/.cache pip install -i $PYPI_INDEX_URL --extra-index-url https://pypi.org/simple supervisor
 COPY tabby ./tabby
-COPY deployment/scripts/tabby.sh /usr/bin
+COPY deployment/scripts/tabby.sh ./.bin/
 COPY deployment/scripts/triton.sh ./.bin/
 # Setup file permissions
 USER root
 RUN mkdir -p /var/lib/vector
 RUN chown 1000 /var/lib/vector
 RUN mkdir -p $HOME/.cache
 RUN chown 1000 $HOME/.cache
 USER 1000
 CMD ["tabby.sh"]
--- a/deployment/.env
+++ b/deployment/.env
@ -2,4 +2,4 @@ MODEL_NAME=TabbyML/J-350M
 # Volumes
 DATA_VOLUME="../data:/data"
-HF_VOLUME="../data/hf_cache:/root/.cache/huggingface"
+HF_VOLUME="../data/hf_cache:/home/app/.cache/huggingface"
--- a/deployment/docker-compose.python.yml
+++ b/deployment/docker-compose.python.yml
@ -0,0 +1,20 @@
 version: '3.3'
 services:
  tabby:
    image: tabbyml/tabby
    container_name: tabby
    environment:
      MODEL_NAME: ${MODEL_NAME}
    ports:
      - "5000:5000"
      - "8080:8080"
      - "8501:8501"
    volumes:
      - ${DATA_VOLUME}
      - ${HF_VOLUME}
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:5000"]
      interval: 2s
      timeout: 2s
      start_period: 1200s
--- a/deployment/docker-compose.yml
+++ b/deployment/docker-compose.yml
@ -4,9 +4,11 @@ services:
  tabby:
    image: tabbyml/tabby
    container_name: tabby
    shm_size: 1gb
    environment:
      MODEL_NAME: ${MODEL_NAME}
      MODEL_BACKEND: triton
      MODEL_REPLICA: ${MODEL_REPLICA:-1}
    ports:
      - "5000:5000"
      - "8080:8080"
@ -19,15 +21,6 @@ services:
      interval: 2s
      timeout: 2s
      start_period: 1200s
  triton:
    image: tabbyml/fastertransformer_backend
    container_name: tabby-triton
    command: triton.sh
    shm_size: 1gb
    volumes:
      - ./scripts/triton.sh:/usr/bin/triton.sh:ro
      - ${HF_VOLUME}
    deploy:
      resources:
        reservations:
@ -35,9 +28,3 @@ services:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    environment:
      MODEL_NAME: ${MODEL_NAME}
      MODEL_REPLICA: ${MODEL_REPLICA:-1}
    depends_on:
      tabby:
        condition: service_healthy
--- a/deployment/scripts/tabby.sh
+++ b/deployment/scripts/tabby.sh
@ -35,9 +35,20 @@ python -m tabby.tools.download_models --repo_id=$MODEL_NAME
 supervisor() {
 if [[ "$MODEL_BACKEND" == "triton" ]]
 then
 local TRITON_SERVER=$(cat <<EOF
 [program:triton]
 command=triton.sh
 EOF
 )
 fi
 supervisord -n -c <(cat <<EOF
 [supervisord]
-logfile = /var/log/supervisord.log
+logfile = ${LOGS_DIR}/supervisord.log
 loglevel = debug
 [program:server]
@ -54,6 +65,8 @@ command=dagu scheduler
 [program:dagu_server]
 command=dagu server --host 0.0.0.0 --port 8080
 $TRITON_SERVER
 EOF
 )
 }
--- a/deployment/scripts/triton.sh
+++ b/deployment/scripts/triton.sh
@ -23,5 +23,5 @@ sed -i "s/count: [[:digit:]]\+/count: $MODEL_REPLICA/g" $MODEL_DIR/triton/faster
 # Start triton server.
 mpirun -n 1 \
-  --allow-run-as-root /opt/tritonserver/bin/tritonserver \
+  /opt/tritonserver/bin/tritonserver \
  --model-repository=$MODEL_DIR/triton
--- a/development/Makefile
+++ b/development/Makefile
@ -1,14 +1,11 @@
-DEFAULT := docker-compose -f ../deployment/docker-compose.yml
+DEFAULT :=
 UP_FLAGS := up --remove-orphans
 build:
-	$(DEFAULT) -f docker-compose.dev.yml build
+	docker-compose -f ../deployment/docker-compose.yml -f docker-compose.dev.yml build
 dev:
-	$(DEFAULT) -f docker-compose.dev.yml $(UP_FLAGS)
+	docker-compose -f ../deployment/docker-compose.yml -f docker-compose.dev.yml $(UP_FLAGS)
 dev-python:
-	$(DEFAULT) -f docker-compose.dev.yml -f docker-compose.python.yml $(UP_FLAGS)
+	docker-compose -f ../deployment/docker-compose.python.yml -f docker-compose.dev.yml $(UP_FLAGS)
 clean:
 	$(DEFAULT) -f docker-compose.dev.yml down
--- a/development/docker-compose.dev.yml
+++ b/development/docker-compose.dev.yml
@ -5,11 +5,13 @@ services:
    build:
      context: ..
      args:
-        PYPI_INDEX_URL: https://pypi.tuna.tsinghua.edu.cn/simple
+        PYPI_INDEX_URL: https://mirrors.aliyun.com/pypi/simple
        PYTHON_BUILD_MIRROR_URL: https://repo.huaweicloud.com/python
    environment:
      UVICORN_RELOAD: true
      VECTOR_WATCH_CONFIG: true
      STREAMLIT_RUN_ON_SAVE: true
    volumes:
-      - ../:/app
+      - ../tabby:/home/app/tabby
-      - ../deployment/scripts/tabby.sh:/usr/bin/tabby.sh:ro
+      - ../deployment/scripts/tabby.sh:/home/app/.bin/tabby.sh:ro
      - ../deployment/scripts/triton.sh:/home/app/.bin/triton.sh:ro
--- a/development/docker-compose.python.yml
+++ b/development/docker-compose.python.yml
@ -1,11 +0,0 @@
 version: '3.3'
 services:
  tabby:
    image: tabbyml/tabby
    environment:
      MODEL_BACKEND: python
  triton:
    profiles:
      - donotstart
--- a/tabby/admin/Home.py
+++ b/tabby/admin/Home.py
@ -3,7 +3,7 @@ from components import monaco
 from utils.service_info import ServiceInfo
 SERVICES = [
-    ServiceInfo(label="triton", health_url="http://triton:8002/metrics"),
+    ServiceInfo(label="triton", health_url="http://localhost:8002/metrics"),
    ServiceInfo(label="vector", health_url="http://localhost:8686/health"),
    ServiceInfo(
        label="dagu", health_url="http://localhost:8080", url="http://localhost:8080"
--- a/tabby/server/init.py
+++ b/tabby/server/init.py
@ -35,7 +35,7 @@ MODEL_BACKEND = os.environ.get("MODEL_BACKEND", "python")
 if MODEL_BACKEND == "triton":
    model_backend = TritonService(
        tokenizer_name=MODEL_NAME,
-        host=os.environ.get("TRITON_HOST", "triton"),
+        host=os.environ.get("TRITON_HOST", "localhost"),
        port=os.environ.get("TRITON_PORT", "8001"),
    )
 else: