Add supervisord to support a single docker run deployment (#29)

* Add suppervisord in dockerfile

* Create supervisord

* Update README.md

* Update README.md
add-more-languages
Meng Zhang 2023-03-29 12:57:03 +08:00 committed by GitHub
parent 07a3cff13a
commit bf7d149a27
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 104 additions and 93 deletions

View File

@ -32,4 +32,17 @@ RUN --mount=type=cache,target=/root/.cache pip install -i $PYPI_INDEX_URL --extr
ENV LD_LIBRARY_PATH "$LD_LIBRARY_PATH:/opt/conda/lib"
RUN ln -s /opt/conda/lib/libcudart.so.11.7.99 /opt/conda/lib/libcudart.so
# vector
RUN <<EOF
curl --proto '=https' --tlsv1.2 -sSf https://sh.vector.dev | bash -s -- -y
mkdir -p /var/lib/vector
EOF
ENV PATH "$PATH:/root/.vector/bin"
COPY deployment/config/vector.toml /etc/vector/vector.toml
# Supervisord
RUN --mount=type=cache,target=/root/.cache pip install -i $PYPI_INDEX_URL --extra-index-url https://pypi.org/simple supervisor
COPY deployment/scripts/supervisord.sh /usr/bin
COPY tabby ./tabby
CMD ["supervisord.sh"]

View File

@ -26,11 +26,16 @@ An opensource / on-prem alternative to GitHub Copilot.
### Docker
The easiest way of getting started is using the `deployment/docker-compose.yml`:
The easiest way of getting started is using the official docker image:
```bash
docker-compose up
docker run \
-it --rm \
-v ./data:/data \
-v ./data/hf_cache:/root/.cache/huggingface \
-p 5000:5000 \
-p 8501:8501 \
-e MODEL_NAME=TabbyML/J-350M tabbyml/tabby
```
Note: To use GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). We also recommend using NVIDIA drivers with CUDA version 11.8 or higher.
You can then query the server using `/v1/completions` endpoint:
```bash
@ -39,6 +44,12 @@ curl -X POST http://localhost:5000/v1/completions -H 'Content-Type: application/
}'
```
To use the GPU backend (triton) for a faster inference experience, use `deployment/docker-compose.yml`:
```bash
docker-compose up
```
Note: To use GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). We also recommend using NVIDIA drivers with CUDA version 11.8 or higher.
We also provides an interactive playground in admin panel [localhost:8501](http://localhost:8501)
![image](https://user-images.githubusercontent.com/388154/227792390-ec19e9b9-ebbb-4a94-99ca-8a142ffb5e46.png)

View File

@ -1,6 +1,5 @@
MODEL_NAME=TabbyML/J-350M
# Volumes
DATA_VOLUME="../data:/data"
HF_VOLUME="../data/hf_cache:/root/.cache/huggingface"
LOGS_VOLUME="../data/logs:/logs"

View File

@ -4,7 +4,7 @@ address = "0.0.0.0:8686"
[sources.tabby_server_logs]
type = "file"
include = ["/logs/tabby-server/events.*.log"]
include = ["/data/logs/tabby-server/events.*.log"]
[transforms.process_tabby_server_logs]
type = "remap"
@ -16,4 +16,4 @@ type = "file"
inputs = [ "process_tabby_server_logs" ]
encoding = { codec = "json" }
framing = { method = "newline_delimited" }
path = "/logs/tabby-server/events.%Y-%m-%d.json"
path = "/data/logs/tabby-server/events.%Y-%m-%d.json"

View File

@ -1,40 +1,26 @@
version: '3.3'
services:
init:
tabby:
image: tabbyml/tabby
container_name: tabby-init
command: dagu start --params=MODEL_NAME=${MODEL_NAME} ./tabby/tasks/init.yaml
volumes:
- ${HF_VOLUME}
server:
image: tabbyml/tabby
container_name: tabby-server
command: uvicorn tabby.server:app --host 0.0.0.0 --port 5000
container_name: tabby
environment:
MODEL_NAME: ${MODEL_NAME}
MODEL_BACKEND: triton
EVENTS_LOG_DIR: /logs/tabby-server
EVENTS_LOG_DIR: /data/logs/tabby-server
DAGU_DAGS: /app/tabby/tasks
ports:
- "5000:5000"
volumes:
- ${HF_VOLUME}
- ${LOGS_VOLUME}
depends_on:
init:
condition: service_completed_successfully
triton:
condition: service_healthy
admin:
image: tabbyml/tabby
container_name: tabby-admin
command: streamlit run tabby/admin/Home.py
ports:
- "8080:8080"
- "8501:8501"
volumes:
- ${LOGS_VOLUME}
- ${DATA_VOLUME}
- ${HF_VOLUME}
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:5000"]
interval: 2s
timeout: 2s
start_period: 120s
triton:
image: tabbyml/fastertransformer_backend
@ -54,29 +40,5 @@ services:
environment:
MODEL_NAME: ${MODEL_NAME}
depends_on:
init:
condition: service_completed_successfully
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8002/metrics"]
interval: 2s
timeout: 2s
start_period: 120s
vector:
image: timberio/vector:0.28.1-alpine
container_name: tabby-vector
volumes:
- ./config/vector.toml:/etc/vector/vector.toml:ro
- ${LOGS_VOLUME}
dagu:
image: tabbyml/tabby
container_name: tabby-dagu
command: dagu scheduler
volumes:
- ${LOGS_VOLUME}
environment:
DAGU_DAGS: tabby/tasks
depends_on:
init:
condition: service_completed_successfully
tabby:
condition: service_healthy

View File

@ -0,0 +1,46 @@
#!/bin/bash
set -e
# Shared environment variables
export LOGS_DIR="${LOGS_DIR:-/data/logs}"
export DB_FILE="${DB_FILE:-/data/logs/duckdb/duck.db}"
# server
export MODEL_NAME="${MODEL_NAME:-TabbyML/J-350M}"
export MODEL_BACKEND="${MODEL_BACKEND:-python}"
export EVENTS_LOG_DIR="${LOGS_DIR}/tabby-server"
# dagu
export DAGU_DAGS="tabby/tasks"
init() {
python -m tabby.tools.download_models --repo_id=$MODEL_NAME
}
supervisor() {
supervisord -n -c <(cat <<EOF
[supervisord]
logfile = /var/log/supervisord.log
loglevel = debug
[program:server]
command=uvicorn tabby.server:app --host 0.0.0.0 --port 5000
[program:admin]
command=streamlit run tabby/admin/Home.py --server.port 8501
[program:vector]
command=vector
[program:dagu_scheduler]
command=dagu scheduler
[program:dagu_server]
command=dagu server --host 0.0.0.0 --port 8080
EOF
)
}
init
supervisor

View File

@ -1,30 +1,16 @@
version: '3.3'
services:
init:
tabby:
build:
context: ..
args:
PYPI_INDEX_URL: https://pypi.tuna.tsinghua.edu.cn/simple
volumes:
- ../:/app
server:
command: uvicorn tabby.server:app --host 0.0.0.0 --port 5000 --reload
volumes:
- ../:/app
admin:
command: streamlit run --server.runOnSave=true tabby/admin/Home.py
volumes:
- ../:/app
vector:
environment:
- VECTOR_WATCH_CONFIG=true
dagu:
ports:
- 8080:8080
UVICORN_RELOAD: true
VECTOR_WATCH_CONFIG: true
STREAMLIT_RUN_ON_SAVE: true
volumes:
- ../:/app
- ../deployment/config/vector.toml:/etc/vector/vector.toml:ro
- ../deployment/scripts/supervisord.sh:/usr/bin/supervisord.sh:ro

View File

@ -1,7 +1,7 @@
version: '3.3'
services:
server:
tabby:
image: tabbyml/tabby
environment:
MODEL_BACKEND: python

View File

@ -3,7 +3,7 @@ from components import monaco
from utils.service_info import ServiceInfo
SERVICES = [
ServiceInfo(label="server", url="http://server:5000"),
ServiceInfo(label="server", url="http://localhost:5000"),
ServiceInfo(label="triton", url="http://triton:8002/metrics"),
]

View File

@ -7,7 +7,7 @@ st.set_page_config(page_title="Tabby Admin - Metrics")
def query_data():
filepath = os.environ.get("DB_FILE", "/logs/duckdb/duck.db")
filepath = os.environ.get("DB_FILE", "/data/logs/duckdb/duck.db")
if not os.path.isfile(filepath):
return []

View File

@ -1,7 +1,9 @@
schedule: "*/3 * * * *" # Run every 3rd minute
env:
- PATH: "/opt/conda/bin:$PATH"
- PATH: "$PATH"
- LOGS_DIR: "$LOGS_DIR"
- DB_FILE: "$DB_FILE"
- APP_DIR: /app
steps:
- name: Collect Tabby

View File

@ -1,8 +0,0 @@
params: MODEL_NAME
env:
- PATH: "/opt/conda/bin:$PATH"
- APP_DIR: /app
steps:
- name: Download models
dir: $APP_DIR
command: python -m tabby.tools.download_models --repo_id=$MODEL_NAME

View File

@ -1,8 +1,8 @@
#!/bin/bash
set -e
DB_FILE=${DB_FILE:-"/logs/duckdb/duck.db"}
LOGS_DIR=${LOGS_DIR:-"/logs"}
DB_FILE=${DB_FILE:-"/data/logs/duckdb/duck.db"}
LOGS_DIR=${LOGS_DIR:-"/data/logs"}
TABBY_SERVER_LOGS="${LOGS_DIR}/tabby-server/events.*.json"
# Init schema