Add supervisord to support a single docker run deployment (#29)
* Add suppervisord in dockerfile * Create supervisord * Update README.md * Update README.mdadd-more-languages
parent
07a3cff13a
commit
bf7d149a27
13
Dockerfile
13
Dockerfile
|
|
@ -32,4 +32,17 @@ RUN --mount=type=cache,target=/root/.cache pip install -i $PYPI_INDEX_URL --extr
|
||||||
ENV LD_LIBRARY_PATH "$LD_LIBRARY_PATH:/opt/conda/lib"
|
ENV LD_LIBRARY_PATH "$LD_LIBRARY_PATH:/opt/conda/lib"
|
||||||
RUN ln -s /opt/conda/lib/libcudart.so.11.7.99 /opt/conda/lib/libcudart.so
|
RUN ln -s /opt/conda/lib/libcudart.so.11.7.99 /opt/conda/lib/libcudart.so
|
||||||
|
|
||||||
|
# vector
|
||||||
|
RUN <<EOF
|
||||||
|
curl --proto '=https' --tlsv1.2 -sSf https://sh.vector.dev | bash -s -- -y
|
||||||
|
mkdir -p /var/lib/vector
|
||||||
|
EOF
|
||||||
|
ENV PATH "$PATH:/root/.vector/bin"
|
||||||
|
COPY deployment/config/vector.toml /etc/vector/vector.toml
|
||||||
|
|
||||||
|
# Supervisord
|
||||||
|
RUN --mount=type=cache,target=/root/.cache pip install -i $PYPI_INDEX_URL --extra-index-url https://pypi.org/simple supervisor
|
||||||
|
COPY deployment/scripts/supervisord.sh /usr/bin
|
||||||
|
|
||||||
COPY tabby ./tabby
|
COPY tabby ./tabby
|
||||||
|
CMD ["supervisord.sh"]
|
||||||
|
|
|
||||||
17
README.md
17
README.md
|
|
@ -26,11 +26,16 @@ An opensource / on-prem alternative to GitHub Copilot.
|
||||||
|
|
||||||
### Docker
|
### Docker
|
||||||
|
|
||||||
The easiest way of getting started is using the `deployment/docker-compose.yml`:
|
The easiest way of getting started is using the official docker image:
|
||||||
```bash
|
```bash
|
||||||
docker-compose up
|
docker run \
|
||||||
|
-it --rm \
|
||||||
|
-v ./data:/data \
|
||||||
|
-v ./data/hf_cache:/root/.cache/huggingface \
|
||||||
|
-p 5000:5000 \
|
||||||
|
-p 8501:8501 \
|
||||||
|
-e MODEL_NAME=TabbyML/J-350M tabbyml/tabby
|
||||||
```
|
```
|
||||||
Note: To use GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). We also recommend using NVIDIA drivers with CUDA version 11.8 or higher.
|
|
||||||
|
|
||||||
You can then query the server using `/v1/completions` endpoint:
|
You can then query the server using `/v1/completions` endpoint:
|
||||||
```bash
|
```bash
|
||||||
|
|
@ -39,6 +44,12 @@ curl -X POST http://localhost:5000/v1/completions -H 'Content-Type: application/
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
To use the GPU backend (triton) for a faster inference experience, use `deployment/docker-compose.yml`:
|
||||||
|
```bash
|
||||||
|
docker-compose up
|
||||||
|
```
|
||||||
|
Note: To use GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). We also recommend using NVIDIA drivers with CUDA version 11.8 or higher.
|
||||||
|
|
||||||
We also provides an interactive playground in admin panel [localhost:8501](http://localhost:8501)
|
We also provides an interactive playground in admin panel [localhost:8501](http://localhost:8501)
|
||||||
|
|
||||||

|

|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
MODEL_NAME=TabbyML/J-350M
|
MODEL_NAME=TabbyML/J-350M
|
||||||
|
|
||||||
# Volumes
|
# Volumes
|
||||||
|
DATA_VOLUME="../data:/data"
|
||||||
HF_VOLUME="../data/hf_cache:/root/.cache/huggingface"
|
HF_VOLUME="../data/hf_cache:/root/.cache/huggingface"
|
||||||
|
|
||||||
LOGS_VOLUME="../data/logs:/logs"
|
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ address = "0.0.0.0:8686"
|
||||||
|
|
||||||
[sources.tabby_server_logs]
|
[sources.tabby_server_logs]
|
||||||
type = "file"
|
type = "file"
|
||||||
include = ["/logs/tabby-server/events.*.log"]
|
include = ["/data/logs/tabby-server/events.*.log"]
|
||||||
|
|
||||||
[transforms.process_tabby_server_logs]
|
[transforms.process_tabby_server_logs]
|
||||||
type = "remap"
|
type = "remap"
|
||||||
|
|
@ -16,4 +16,4 @@ type = "file"
|
||||||
inputs = [ "process_tabby_server_logs" ]
|
inputs = [ "process_tabby_server_logs" ]
|
||||||
encoding = { codec = "json" }
|
encoding = { codec = "json" }
|
||||||
framing = { method = "newline_delimited" }
|
framing = { method = "newline_delimited" }
|
||||||
path = "/logs/tabby-server/events.%Y-%m-%d.json"
|
path = "/data/logs/tabby-server/events.%Y-%m-%d.json"
|
||||||
|
|
|
||||||
|
|
@ -1,40 +1,26 @@
|
||||||
version: '3.3'
|
version: '3.3'
|
||||||
|
|
||||||
services:
|
services:
|
||||||
init:
|
tabby:
|
||||||
image: tabbyml/tabby
|
image: tabbyml/tabby
|
||||||
container_name: tabby-init
|
container_name: tabby
|
||||||
command: dagu start --params=MODEL_NAME=${MODEL_NAME} ./tabby/tasks/init.yaml
|
|
||||||
volumes:
|
|
||||||
- ${HF_VOLUME}
|
|
||||||
|
|
||||||
server:
|
|
||||||
image: tabbyml/tabby
|
|
||||||
container_name: tabby-server
|
|
||||||
command: uvicorn tabby.server:app --host 0.0.0.0 --port 5000
|
|
||||||
environment:
|
environment:
|
||||||
MODEL_NAME: ${MODEL_NAME}
|
MODEL_NAME: ${MODEL_NAME}
|
||||||
MODEL_BACKEND: triton
|
MODEL_BACKEND: triton
|
||||||
EVENTS_LOG_DIR: /logs/tabby-server
|
EVENTS_LOG_DIR: /data/logs/tabby-server
|
||||||
|
DAGU_DAGS: /app/tabby/tasks
|
||||||
ports:
|
ports:
|
||||||
- "5000:5000"
|
- "5000:5000"
|
||||||
volumes:
|
- "8080:8080"
|
||||||
- ${HF_VOLUME}
|
|
||||||
- ${LOGS_VOLUME}
|
|
||||||
depends_on:
|
|
||||||
init:
|
|
||||||
condition: service_completed_successfully
|
|
||||||
triton:
|
|
||||||
condition: service_healthy
|
|
||||||
|
|
||||||
admin:
|
|
||||||
image: tabbyml/tabby
|
|
||||||
container_name: tabby-admin
|
|
||||||
command: streamlit run tabby/admin/Home.py
|
|
||||||
ports:
|
|
||||||
- "8501:8501"
|
- "8501:8501"
|
||||||
volumes:
|
volumes:
|
||||||
- ${LOGS_VOLUME}
|
- ${DATA_VOLUME}
|
||||||
|
- ${HF_VOLUME}
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:5000"]
|
||||||
|
interval: 2s
|
||||||
|
timeout: 2s
|
||||||
|
start_period: 120s
|
||||||
|
|
||||||
triton:
|
triton:
|
||||||
image: tabbyml/fastertransformer_backend
|
image: tabbyml/fastertransformer_backend
|
||||||
|
|
@ -54,29 +40,5 @@ services:
|
||||||
environment:
|
environment:
|
||||||
MODEL_NAME: ${MODEL_NAME}
|
MODEL_NAME: ${MODEL_NAME}
|
||||||
depends_on:
|
depends_on:
|
||||||
init:
|
tabby:
|
||||||
condition: service_completed_successfully
|
condition: service_healthy
|
||||||
healthcheck:
|
|
||||||
test: ["CMD", "curl", "-f", "http://localhost:8002/metrics"]
|
|
||||||
interval: 2s
|
|
||||||
timeout: 2s
|
|
||||||
start_period: 120s
|
|
||||||
|
|
||||||
vector:
|
|
||||||
image: timberio/vector:0.28.1-alpine
|
|
||||||
container_name: tabby-vector
|
|
||||||
volumes:
|
|
||||||
- ./config/vector.toml:/etc/vector/vector.toml:ro
|
|
||||||
- ${LOGS_VOLUME}
|
|
||||||
|
|
||||||
dagu:
|
|
||||||
image: tabbyml/tabby
|
|
||||||
container_name: tabby-dagu
|
|
||||||
command: dagu scheduler
|
|
||||||
volumes:
|
|
||||||
- ${LOGS_VOLUME}
|
|
||||||
environment:
|
|
||||||
DAGU_DAGS: tabby/tasks
|
|
||||||
depends_on:
|
|
||||||
init:
|
|
||||||
condition: service_completed_successfully
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,46 @@
|
||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Shared environment variables
|
||||||
|
export LOGS_DIR="${LOGS_DIR:-/data/logs}"
|
||||||
|
export DB_FILE="${DB_FILE:-/data/logs/duckdb/duck.db}"
|
||||||
|
|
||||||
|
# server
|
||||||
|
export MODEL_NAME="${MODEL_NAME:-TabbyML/J-350M}"
|
||||||
|
export MODEL_BACKEND="${MODEL_BACKEND:-python}"
|
||||||
|
export EVENTS_LOG_DIR="${LOGS_DIR}/tabby-server"
|
||||||
|
|
||||||
|
# dagu
|
||||||
|
export DAGU_DAGS="tabby/tasks"
|
||||||
|
|
||||||
|
init() {
|
||||||
|
python -m tabby.tools.download_models --repo_id=$MODEL_NAME
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
supervisor() {
|
||||||
|
supervisord -n -c <(cat <<EOF
|
||||||
|
[supervisord]
|
||||||
|
logfile = /var/log/supervisord.log
|
||||||
|
loglevel = debug
|
||||||
|
|
||||||
|
[program:server]
|
||||||
|
command=uvicorn tabby.server:app --host 0.0.0.0 --port 5000
|
||||||
|
|
||||||
|
[program:admin]
|
||||||
|
command=streamlit run tabby/admin/Home.py --server.port 8501
|
||||||
|
|
||||||
|
[program:vector]
|
||||||
|
command=vector
|
||||||
|
|
||||||
|
[program:dagu_scheduler]
|
||||||
|
command=dagu scheduler
|
||||||
|
|
||||||
|
[program:dagu_server]
|
||||||
|
command=dagu server --host 0.0.0.0 --port 8080
|
||||||
|
EOF
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
init
|
||||||
|
supervisor
|
||||||
|
|
@ -1,30 +1,16 @@
|
||||||
version: '3.3'
|
version: '3.3'
|
||||||
|
|
||||||
services:
|
services:
|
||||||
init:
|
tabby:
|
||||||
build:
|
build:
|
||||||
context: ..
|
context: ..
|
||||||
args:
|
args:
|
||||||
PYPI_INDEX_URL: https://pypi.tuna.tsinghua.edu.cn/simple
|
PYPI_INDEX_URL: https://pypi.tuna.tsinghua.edu.cn/simple
|
||||||
volumes:
|
|
||||||
- ../:/app
|
|
||||||
|
|
||||||
server:
|
|
||||||
command: uvicorn tabby.server:app --host 0.0.0.0 --port 5000 --reload
|
|
||||||
volumes:
|
|
||||||
- ../:/app
|
|
||||||
|
|
||||||
admin:
|
|
||||||
command: streamlit run --server.runOnSave=true tabby/admin/Home.py
|
|
||||||
volumes:
|
|
||||||
- ../:/app
|
|
||||||
|
|
||||||
vector:
|
|
||||||
environment:
|
environment:
|
||||||
- VECTOR_WATCH_CONFIG=true
|
UVICORN_RELOAD: true
|
||||||
|
VECTOR_WATCH_CONFIG: true
|
||||||
dagu:
|
STREAMLIT_RUN_ON_SAVE: true
|
||||||
ports:
|
|
||||||
- 8080:8080
|
|
||||||
volumes:
|
volumes:
|
||||||
- ../:/app
|
- ../:/app
|
||||||
|
- ../deployment/config/vector.toml:/etc/vector/vector.toml:ro
|
||||||
|
- ../deployment/scripts/supervisord.sh:/usr/bin/supervisord.sh:ro
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
version: '3.3'
|
version: '3.3'
|
||||||
|
|
||||||
services:
|
services:
|
||||||
server:
|
tabby:
|
||||||
image: tabbyml/tabby
|
image: tabbyml/tabby
|
||||||
environment:
|
environment:
|
||||||
MODEL_BACKEND: python
|
MODEL_BACKEND: python
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ from components import monaco
|
||||||
from utils.service_info import ServiceInfo
|
from utils.service_info import ServiceInfo
|
||||||
|
|
||||||
SERVICES = [
|
SERVICES = [
|
||||||
ServiceInfo(label="server", url="http://server:5000"),
|
ServiceInfo(label="server", url="http://localhost:5000"),
|
||||||
ServiceInfo(label="triton", url="http://triton:8002/metrics"),
|
ServiceInfo(label="triton", url="http://triton:8002/metrics"),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ st.set_page_config(page_title="Tabby Admin - Metrics")
|
||||||
|
|
||||||
|
|
||||||
def query_data():
|
def query_data():
|
||||||
filepath = os.environ.get("DB_FILE", "/logs/duckdb/duck.db")
|
filepath = os.environ.get("DB_FILE", "/data/logs/duckdb/duck.db")
|
||||||
if not os.path.isfile(filepath):
|
if not os.path.isfile(filepath):
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,9 @@
|
||||||
schedule: "*/3 * * * *" # Run every 3rd minute
|
schedule: "*/3 * * * *" # Run every 3rd minute
|
||||||
|
|
||||||
env:
|
env:
|
||||||
- PATH: "/opt/conda/bin:$PATH"
|
- PATH: "$PATH"
|
||||||
|
- LOGS_DIR: "$LOGS_DIR"
|
||||||
|
- DB_FILE: "$DB_FILE"
|
||||||
- APP_DIR: /app
|
- APP_DIR: /app
|
||||||
steps:
|
steps:
|
||||||
- name: Collect Tabby
|
- name: Collect Tabby
|
||||||
|
|
|
||||||
|
|
@ -1,8 +0,0 @@
|
||||||
params: MODEL_NAME
|
|
||||||
env:
|
|
||||||
- PATH: "/opt/conda/bin:$PATH"
|
|
||||||
- APP_DIR: /app
|
|
||||||
steps:
|
|
||||||
- name: Download models
|
|
||||||
dir: $APP_DIR
|
|
||||||
command: python -m tabby.tools.download_models --repo_id=$MODEL_NAME
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
DB_FILE=${DB_FILE:-"/logs/duckdb/duck.db"}
|
DB_FILE=${DB_FILE:-"/data/logs/duckdb/duck.db"}
|
||||||
LOGS_DIR=${LOGS_DIR:-"/logs"}
|
LOGS_DIR=${LOGS_DIR:-"/data/logs"}
|
||||||
TABBY_SERVER_LOGS="${LOGS_DIR}/tabby-server/events.*.json"
|
TABBY_SERVER_LOGS="${LOGS_DIR}/tabby-server/events.*.json"
|
||||||
|
|
||||||
# Init schema
|
# Init schema
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue