feat: support single container (#46)

* docs: update readme * fix: do not exclude peft * Free disk space before docker building * fix: fix docker-compose * fix: dockercompose user to 1000 * fix dockerfile * fix: cachedir ownership
2023-04-05 20:19:43 +08:00 · 2023-04-05 20:19:43 +08:00 · db77d7f267
parent 0d89a1221a
commit db77d7f267
12 changed files with 114 additions and 71 deletions
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@ -21,6 +21,20 @@ jobs:
      id-token: write

    steps:
+      - name: Free Disk Space (Ubuntu)
+        uses: jlumbroso/free-disk-space@main
+        with:
+          # this might remove tools that are actually needed,
+          # if set to "true" but frees about 6 GB
+          tool-cache: false
+          # all of these default to true, but feel free to set to
+          # "false" if necessary for your workflow
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: true
+
      - name: Checkout repository
        uses: actions/checkout@v3

--- a/81
+++ b/81
@ -1,47 +1,68 @@
 # syntax = docker/dockerfile:1.5

-FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime
+FROM tabbyml/fastertransformer_backend

-# Install utilities
+RUN apt update && apt -y install build-essential libssl-dev zlib1g-dev \
+  libbz2-dev libreadline-dev libsqlite3-dev curl \
+  libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev
+
+RUN mkdir -p /home/app
+RUN chown 1000 /home/app
+
+USER 1000
+WORKDIR /home/app
+ENV HOME /home/app
+
+# Setup pyenv
+RUN git clone --depth=1 https://github.com/pyenv/pyenv.git .pyenv
+ENV PATH="$HOME/.pyenv/shims:/home/app/.pyenv/bin:$PATH"
+
+ARG PYTHON_VERSION=3.10.10
+RUN pyenv install ${PYTHON_VERSION}
+RUN pyenv global ${PYTHON_VERSION}
+
+ARG PYPI_INDEX_URL=https://pypi.org/simple
+ARG POETRY_VERSION=1.4.0
+
+RUN --mount=type=cache,target=$HOME/.cache pip install -i $PYPI_INDEX_URL "poetry==$POETRY_VERSION"
+
+# vector
 RUN <<EOF
-  apt-get -y update
-  apt-get -y install git curl
+curl --proto '=https' --tlsv1.2 -sSf https://sh.vector.dev | bash -s -- -y
 EOF
+ENV PATH "$HOME/.vector/bin:$PATH"
+
+# Supervisord
+RUN --mount=type=cache,target=$HOME/.cache pip install -i $PYPI_INDEX_URL supervisor
+
+RUN mkdir -p ~/.bin
+ENV PATH "$HOME/.bin:$PATH"

 # Install dagu
 RUN <<EOF
  curl -L https://github.com/yohamta/dagu/releases/download/v1.10.2/dagu_1.10.2_Linux_x86_64.tar.gz > dagu.tar.gz
  tar zxvf dagu.tar.gz
-  mv dagu /usr/local/bin
+  mv dagu ~/.bin/
  rm dagu.tar.gz LICENSE.md README.md
 EOF

-ARG PYPI_INDEX_URL=https://pypi.org/simple
-ARG POETRY_VERSION=1.4.0
+# Install tabby dependencies
+COPY poetry.lock pyproject.toml ./
+RUN poetry export --without-hashes > requirements.txt
+RUN --mount=type=cache,target=$HOME/.cache pip install -i $PYPI_INDEX_URL --no-dependencies -r requirements.txt

-WORKDIR /app
-
-RUN --mount=type=cache,target=/root/.cache pip install -i $PYPI_INDEX_URL "poetry==$POETRY_VERSION"
-
-COPY poetry.lock pyproject.toml /app/
-RUN poetry export --without-hashes -o requirements.txt
-
-RUN --mount=type=cache,target=/root/.cache pip install -i $PYPI_INDEX_URL --extra-index-url https://pypi.org/simple --no-dependencies -r requirements.txt
-
-## FIX bitandsands
-ENV LD_LIBRARY_PATH "$LD_LIBRARY_PATH:/opt/conda/lib"
-RUN ln -s /opt/conda/lib/libcudart.so.11.7.99 /opt/conda/lib/libcudart.so
-
-# vector
-RUN <<EOF
-curl --proto '=https' --tlsv1.2 -sSf https://sh.vector.dev | bash -s -- -y
-mkdir -p /var/lib/vector
-EOF
-ENV PATH "$PATH:/root/.vector/bin"
-
-# Supervisord
-RUN --mount=type=cache,target=/root/.cache pip install -i $PYPI_INDEX_URL --extra-index-url https://pypi.org/simple supervisor

 COPY tabby ./tabby
-COPY deployment/scripts/tabby.sh /usr/bin
+COPY deployment/scripts/tabby.sh ./.bin/
+COPY deployment/scripts/triton.sh ./.bin/
+
+# Setup file permissions
+USER root
+RUN mkdir -p /var/lib/vector
+RUN chown 1000 /var/lib/vector
+
+RUN mkdir -p $HOME/.cache
+RUN chown 1000 $HOME/.cache
+
+USER 1000
 CMD ["tabby.sh"]
--- a/deployment/.env
+++ b/deployment/.env
@ -2,4 +2,4 @@ MODEL_NAME=TabbyML/J-350M

 # Volumes
 DATA_VOLUME="../data:/data"
-HF_VOLUME="../data/hf_cache:/root/.cache/huggingface"
+HF_VOLUME="../data/hf_cache:/home/app/.cache/huggingface"
--- a/deployment/docker-compose.python.yml
+++ b/deployment/docker-compose.python.yml
@ -0,0 +1,20 @@
+version: '3.3'
+
+services:
+  tabby:
+    image: tabbyml/tabby
+    container_name: tabby
+    environment:
+      MODEL_NAME: ${MODEL_NAME}
+    ports:
+      - "5000:5000"
+      - "8080:8080"
+      - "8501:8501"
+    volumes:
+      - ${DATA_VOLUME}
+      - ${HF_VOLUME}
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:5000"]
+      interval: 2s
+      timeout: 2s
+      start_period: 1200s
--- a/deployment/docker-compose.yml
+++ b/deployment/docker-compose.yml
@ -4,9 +4,11 @@ services:
  tabby:
    image: tabbyml/tabby
    container_name: tabby
+    shm_size: 1gb
    environment:
      MODEL_NAME: ${MODEL_NAME}
      MODEL_BACKEND: triton
+      MODEL_REPLICA: ${MODEL_REPLICA:-1}
    ports:
      - "5000:5000"
      - "8080:8080"
@ -19,15 +21,6 @@ services:
      interval: 2s
      timeout: 2s
      start_period: 1200s
-
-  triton:
-    image: tabbyml/fastertransformer_backend
-    container_name: tabby-triton
-    command: triton.sh
-    shm_size: 1gb
-    volumes:
-      - ./scripts/triton.sh:/usr/bin/triton.sh:ro
-      - ${HF_VOLUME}
    deploy:
      resources:
        reservations:
@ -35,9 +28,3 @@ services:
            - driver: nvidia
              count: all
              capabilities: [gpu]
-    environment:
-      MODEL_NAME: ${MODEL_NAME}
-      MODEL_REPLICA: ${MODEL_REPLICA:-1}
-    depends_on:
-      tabby:
-        condition: service_healthy
--- a/deployment/scripts/tabby.sh
+++ b/deployment/scripts/tabby.sh
@ -35,9 +35,20 @@ python -m tabby.tools.download_models --repo_id=$MODEL_NAME


 supervisor() {
+if [[ "$MODEL_BACKEND" == "triton" ]]
+then
+
+local TRITON_SERVER=$(cat <<EOF
+[program:triton]
+command=triton.sh
+EOF
+)
+
+fi
+
 supervisord -n -c <(cat <<EOF
 [supervisord]
-logfile = /var/log/supervisord.log
+logfile = ${LOGS_DIR}/supervisord.log
 loglevel = debug

 [program:server]
@ -54,6 +65,8 @@ command=dagu scheduler

 [program:dagu_server]
 command=dagu server --host 0.0.0.0 --port 8080
+
+$TRITON_SERVER
 EOF
 )
 }
--- a/deployment/scripts/triton.sh
+++ b/deployment/scripts/triton.sh
@ -23,5 +23,5 @@ sed -i "s/count: [[:digit:]]\+/count: $MODEL_REPLICA/g" $MODEL_DIR/triton/faster

 # Start triton server.
 mpirun -n 1 \
-  --allow-run-as-root /opt/tritonserver/bin/tritonserver \
+  /opt/tritonserver/bin/tritonserver \
  --model-repository=$MODEL_DIR/triton
--- a/development/Makefile
+++ b/development/Makefile
@ -1,14 +1,11 @@
-DEFAULT := docker-compose -f ../deployment/docker-compose.yml
+DEFAULT :=
 UP_FLAGS := up --remove-orphans

 build:
-	$(DEFAULT) -f docker-compose.dev.yml build
+	docker-compose -f ../deployment/docker-compose.yml -f docker-compose.dev.yml build

 dev:
-	$(DEFAULT) -f docker-compose.dev.yml $(UP_FLAGS)
+	docker-compose -f ../deployment/docker-compose.yml -f docker-compose.dev.yml $(UP_FLAGS)

 dev-python:
-	$(DEFAULT) -f docker-compose.dev.yml -f docker-compose.python.yml $(UP_FLAGS)
-
-clean:
-	$(DEFAULT) -f docker-compose.dev.yml down
+	docker-compose -f ../deployment/docker-compose.python.yml -f docker-compose.dev.yml $(UP_FLAGS)
--- a/development/docker-compose.dev.yml
+++ b/development/docker-compose.dev.yml
@ -5,11 +5,13 @@ services:
    build:
      context: ..
      args:
-        PYPI_INDEX_URL: https://pypi.tuna.tsinghua.edu.cn/simple
+        PYPI_INDEX_URL: https://mirrors.aliyun.com/pypi/simple
+        PYTHON_BUILD_MIRROR_URL: https://repo.huaweicloud.com/python
    environment:
      UVICORN_RELOAD: true
      VECTOR_WATCH_CONFIG: true
      STREAMLIT_RUN_ON_SAVE: true
    volumes:
-      - ../:/app
-      - ../deployment/scripts/tabby.sh:/usr/bin/tabby.sh:ro
+      - ../tabby:/home/app/tabby
+      - ../deployment/scripts/tabby.sh:/home/app/.bin/tabby.sh:ro
+      - ../deployment/scripts/triton.sh:/home/app/.bin/triton.sh:ro
--- a/development/docker-compose.python.yml
+++ b/development/docker-compose.python.yml
@ -1,11 +0,0 @@
-version: '3.3'
-
-services:
-  tabby:
-    image: tabbyml/tabby
-    environment:
-      MODEL_BACKEND: python
-
-  triton:
-    profiles:
-      - donotstart
--- a/tabby/admin/Home.py
+++ b/tabby/admin/Home.py
@ -3,7 +3,7 @@ from components import monaco
 from utils.service_info import ServiceInfo

 SERVICES = [
-    ServiceInfo(label="triton", health_url="http://triton:8002/metrics"),
+    ServiceInfo(label="triton", health_url="http://localhost:8002/metrics"),
    ServiceInfo(label="vector", health_url="http://localhost:8686/health"),
    ServiceInfo(
        label="dagu", health_url="http://localhost:8080", url="http://localhost:8080"
--- a/tabby/server/init.py
+++ b/tabby/server/init.py
@ -35,7 +35,7 @@ MODEL_BACKEND = os.environ.get("MODEL_BACKEND", "python")
 if MODEL_BACKEND == "triton":
    model_backend = TritonService(
        tokenizer_name=MODEL_NAME,
-        host=os.environ.get("TRITON_HOST", "triton"),
+        host=os.environ.get("TRITON_HOST", "localhost"),
        port=os.environ.get("TRITON_PORT", "8001"),
    )
 else: