Add supervisord to support a single docker run deployment (#29)
* Add suppervisord in dockerfile * Create supervisord * Update README.md * Update README.mdadd-more-languages
parent
07a3cff13a
commit
bf7d149a27
13
Dockerfile
13
Dockerfile
|
|
@ -32,4 +32,17 @@ RUN --mount=type=cache,target=/root/.cache pip install -i $PYPI_INDEX_URL --extr
|
|||
ENV LD_LIBRARY_PATH "$LD_LIBRARY_PATH:/opt/conda/lib"
|
||||
RUN ln -s /opt/conda/lib/libcudart.so.11.7.99 /opt/conda/lib/libcudart.so
|
||||
|
||||
# vector
|
||||
RUN <<EOF
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.vector.dev | bash -s -- -y
|
||||
mkdir -p /var/lib/vector
|
||||
EOF
|
||||
ENV PATH "$PATH:/root/.vector/bin"
|
||||
COPY deployment/config/vector.toml /etc/vector/vector.toml
|
||||
|
||||
# Supervisord
|
||||
RUN --mount=type=cache,target=/root/.cache pip install -i $PYPI_INDEX_URL --extra-index-url https://pypi.org/simple supervisor
|
||||
COPY deployment/scripts/supervisord.sh /usr/bin
|
||||
|
||||
COPY tabby ./tabby
|
||||
CMD ["supervisord.sh"]
|
||||
|
|
|
|||
17
README.md
17
README.md
|
|
@ -26,11 +26,16 @@ An opensource / on-prem alternative to GitHub Copilot.
|
|||
|
||||
### Docker
|
||||
|
||||
The easiest way of getting started is using the `deployment/docker-compose.yml`:
|
||||
The easiest way of getting started is using the official docker image:
|
||||
```bash
|
||||
docker-compose up
|
||||
docker run \
|
||||
-it --rm \
|
||||
-v ./data:/data \
|
||||
-v ./data/hf_cache:/root/.cache/huggingface \
|
||||
-p 5000:5000 \
|
||||
-p 8501:8501 \
|
||||
-e MODEL_NAME=TabbyML/J-350M tabbyml/tabby
|
||||
```
|
||||
Note: To use GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). We also recommend using NVIDIA drivers with CUDA version 11.8 or higher.
|
||||
|
||||
You can then query the server using `/v1/completions` endpoint:
|
||||
```bash
|
||||
|
|
@ -39,6 +44,12 @@ curl -X POST http://localhost:5000/v1/completions -H 'Content-Type: application/
|
|||
}'
|
||||
```
|
||||
|
||||
To use the GPU backend (triton) for a faster inference experience, use `deployment/docker-compose.yml`:
|
||||
```bash
|
||||
docker-compose up
|
||||
```
|
||||
Note: To use GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). We also recommend using NVIDIA drivers with CUDA version 11.8 or higher.
|
||||
|
||||
We also provides an interactive playground in admin panel [localhost:8501](http://localhost:8501)
|
||||
|
||||

|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
MODEL_NAME=TabbyML/J-350M
|
||||
|
||||
# Volumes
|
||||
DATA_VOLUME="../data:/data"
|
||||
HF_VOLUME="../data/hf_cache:/root/.cache/huggingface"
|
||||
|
||||
LOGS_VOLUME="../data/logs:/logs"
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ address = "0.0.0.0:8686"
|
|||
|
||||
[sources.tabby_server_logs]
|
||||
type = "file"
|
||||
include = ["/logs/tabby-server/events.*.log"]
|
||||
include = ["/data/logs/tabby-server/events.*.log"]
|
||||
|
||||
[transforms.process_tabby_server_logs]
|
||||
type = "remap"
|
||||
|
|
@ -16,4 +16,4 @@ type = "file"
|
|||
inputs = [ "process_tabby_server_logs" ]
|
||||
encoding = { codec = "json" }
|
||||
framing = { method = "newline_delimited" }
|
||||
path = "/logs/tabby-server/events.%Y-%m-%d.json"
|
||||
path = "/data/logs/tabby-server/events.%Y-%m-%d.json"
|
||||
|
|
|
|||
|
|
@ -1,40 +1,26 @@
|
|||
version: '3.3'
|
||||
|
||||
services:
|
||||
init:
|
||||
tabby:
|
||||
image: tabbyml/tabby
|
||||
container_name: tabby-init
|
||||
command: dagu start --params=MODEL_NAME=${MODEL_NAME} ./tabby/tasks/init.yaml
|
||||
volumes:
|
||||
- ${HF_VOLUME}
|
||||
|
||||
server:
|
||||
image: tabbyml/tabby
|
||||
container_name: tabby-server
|
||||
command: uvicorn tabby.server:app --host 0.0.0.0 --port 5000
|
||||
container_name: tabby
|
||||
environment:
|
||||
MODEL_NAME: ${MODEL_NAME}
|
||||
MODEL_BACKEND: triton
|
||||
EVENTS_LOG_DIR: /logs/tabby-server
|
||||
EVENTS_LOG_DIR: /data/logs/tabby-server
|
||||
DAGU_DAGS: /app/tabby/tasks
|
||||
ports:
|
||||
- "5000:5000"
|
||||
volumes:
|
||||
- ${HF_VOLUME}
|
||||
- ${LOGS_VOLUME}
|
||||
depends_on:
|
||||
init:
|
||||
condition: service_completed_successfully
|
||||
triton:
|
||||
condition: service_healthy
|
||||
|
||||
admin:
|
||||
image: tabbyml/tabby
|
||||
container_name: tabby-admin
|
||||
command: streamlit run tabby/admin/Home.py
|
||||
ports:
|
||||
- "8080:8080"
|
||||
- "8501:8501"
|
||||
volumes:
|
||||
- ${LOGS_VOLUME}
|
||||
- ${DATA_VOLUME}
|
||||
- ${HF_VOLUME}
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:5000"]
|
||||
interval: 2s
|
||||
timeout: 2s
|
||||
start_period: 120s
|
||||
|
||||
triton:
|
||||
image: tabbyml/fastertransformer_backend
|
||||
|
|
@ -54,29 +40,5 @@ services:
|
|||
environment:
|
||||
MODEL_NAME: ${MODEL_NAME}
|
||||
depends_on:
|
||||
init:
|
||||
condition: service_completed_successfully
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8002/metrics"]
|
||||
interval: 2s
|
||||
timeout: 2s
|
||||
start_period: 120s
|
||||
|
||||
vector:
|
||||
image: timberio/vector:0.28.1-alpine
|
||||
container_name: tabby-vector
|
||||
volumes:
|
||||
- ./config/vector.toml:/etc/vector/vector.toml:ro
|
||||
- ${LOGS_VOLUME}
|
||||
|
||||
dagu:
|
||||
image: tabbyml/tabby
|
||||
container_name: tabby-dagu
|
||||
command: dagu scheduler
|
||||
volumes:
|
||||
- ${LOGS_VOLUME}
|
||||
environment:
|
||||
DAGU_DAGS: tabby/tasks
|
||||
depends_on:
|
||||
init:
|
||||
condition: service_completed_successfully
|
||||
tabby:
|
||||
condition: service_healthy
|
||||
|
|
|
|||
|
|
@ -0,0 +1,46 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# Shared environment variables
|
||||
export LOGS_DIR="${LOGS_DIR:-/data/logs}"
|
||||
export DB_FILE="${DB_FILE:-/data/logs/duckdb/duck.db}"
|
||||
|
||||
# server
|
||||
export MODEL_NAME="${MODEL_NAME:-TabbyML/J-350M}"
|
||||
export MODEL_BACKEND="${MODEL_BACKEND:-python}"
|
||||
export EVENTS_LOG_DIR="${LOGS_DIR}/tabby-server"
|
||||
|
||||
# dagu
|
||||
export DAGU_DAGS="tabby/tasks"
|
||||
|
||||
init() {
|
||||
python -m tabby.tools.download_models --repo_id=$MODEL_NAME
|
||||
}
|
||||
|
||||
|
||||
supervisor() {
|
||||
supervisord -n -c <(cat <<EOF
|
||||
[supervisord]
|
||||
logfile = /var/log/supervisord.log
|
||||
loglevel = debug
|
||||
|
||||
[program:server]
|
||||
command=uvicorn tabby.server:app --host 0.0.0.0 --port 5000
|
||||
|
||||
[program:admin]
|
||||
command=streamlit run tabby/admin/Home.py --server.port 8501
|
||||
|
||||
[program:vector]
|
||||
command=vector
|
||||
|
||||
[program:dagu_scheduler]
|
||||
command=dagu scheduler
|
||||
|
||||
[program:dagu_server]
|
||||
command=dagu server --host 0.0.0.0 --port 8080
|
||||
EOF
|
||||
)
|
||||
}
|
||||
|
||||
init
|
||||
supervisor
|
||||
|
|
@ -1,30 +1,16 @@
|
|||
version: '3.3'
|
||||
|
||||
services:
|
||||
init:
|
||||
tabby:
|
||||
build:
|
||||
context: ..
|
||||
args:
|
||||
PYPI_INDEX_URL: https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
volumes:
|
||||
- ../:/app
|
||||
|
||||
server:
|
||||
command: uvicorn tabby.server:app --host 0.0.0.0 --port 5000 --reload
|
||||
volumes:
|
||||
- ../:/app
|
||||
|
||||
admin:
|
||||
command: streamlit run --server.runOnSave=true tabby/admin/Home.py
|
||||
volumes:
|
||||
- ../:/app
|
||||
|
||||
vector:
|
||||
environment:
|
||||
- VECTOR_WATCH_CONFIG=true
|
||||
|
||||
dagu:
|
||||
ports:
|
||||
- 8080:8080
|
||||
UVICORN_RELOAD: true
|
||||
VECTOR_WATCH_CONFIG: true
|
||||
STREAMLIT_RUN_ON_SAVE: true
|
||||
volumes:
|
||||
- ../:/app
|
||||
- ../deployment/config/vector.toml:/etc/vector/vector.toml:ro
|
||||
- ../deployment/scripts/supervisord.sh:/usr/bin/supervisord.sh:ro
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
version: '3.3'
|
||||
|
||||
services:
|
||||
server:
|
||||
tabby:
|
||||
image: tabbyml/tabby
|
||||
environment:
|
||||
MODEL_BACKEND: python
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ from components import monaco
|
|||
from utils.service_info import ServiceInfo
|
||||
|
||||
SERVICES = [
|
||||
ServiceInfo(label="server", url="http://server:5000"),
|
||||
ServiceInfo(label="server", url="http://localhost:5000"),
|
||||
ServiceInfo(label="triton", url="http://triton:8002/metrics"),
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ st.set_page_config(page_title="Tabby Admin - Metrics")
|
|||
|
||||
|
||||
def query_data():
|
||||
filepath = os.environ.get("DB_FILE", "/logs/duckdb/duck.db")
|
||||
filepath = os.environ.get("DB_FILE", "/data/logs/duckdb/duck.db")
|
||||
if not os.path.isfile(filepath):
|
||||
return []
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
schedule: "*/3 * * * *" # Run every 3rd minute
|
||||
|
||||
env:
|
||||
- PATH: "/opt/conda/bin:$PATH"
|
||||
- PATH: "$PATH"
|
||||
- LOGS_DIR: "$LOGS_DIR"
|
||||
- DB_FILE: "$DB_FILE"
|
||||
- APP_DIR: /app
|
||||
steps:
|
||||
- name: Collect Tabby
|
||||
|
|
|
|||
|
|
@ -1,8 +0,0 @@
|
|||
params: MODEL_NAME
|
||||
env:
|
||||
- PATH: "/opt/conda/bin:$PATH"
|
||||
- APP_DIR: /app
|
||||
steps:
|
||||
- name: Download models
|
||||
dir: $APP_DIR
|
||||
command: python -m tabby.tools.download_models --repo_id=$MODEL_NAME
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
DB_FILE=${DB_FILE:-"/logs/duckdb/duck.db"}
|
||||
LOGS_DIR=${LOGS_DIR:-"/logs"}
|
||||
DB_FILE=${DB_FILE:-"/data/logs/duckdb/duck.db"}
|
||||
LOGS_DIR=${LOGS_DIR:-"/data/logs"}
|
||||
TABBY_SERVER_LOGS="${LOGS_DIR}/tabby-server/events.*.json"
|
||||
|
||||
# Init schema
|
||||
|
|
|
|||
Loading…
Reference in New Issue