Prepare public release with a minimal deployment setup (#16)
* Move deployment to deployment-next * Add deployment setup * Update deployment-next * Remove vector label * update README.mdadd-more-languages
parent
562b8d9e7e
commit
1c3ec20f93
|
|
@ -1,4 +1,6 @@
|
|||
data
|
||||
deployment
|
||||
deployment-next
|
||||
|
||||
**/.git
|
||||
**/node_modules
|
||||
|
|
|
|||
|
|
@ -1 +1,2 @@
|
|||
__pycache__
|
||||
data
|
||||
|
|
|
|||
16
Makefile
16
Makefile
|
|
@ -14,19 +14,3 @@ $(PRE_COMMIT_HOOK):
|
|||
poetry run pre-commit install --install-hooks
|
||||
|
||||
setup-development-environment: install-poetry $(PRE_COMMIT_HOOK)
|
||||
|
||||
|
||||
UP_FLAGS := up --remove-orphans --remove-orphans
|
||||
DEV_FLAGS := $(UP_FLAGS) --build
|
||||
|
||||
up:
|
||||
docker-compose -f deployment/docker-compose.yml $(UP_FLAGS)
|
||||
|
||||
up-triton:
|
||||
docker-compose -f deployment/docker-compose.yml -f deployment/docker-compose.triton.yml $(UP_FLAGS)
|
||||
|
||||
dev:
|
||||
docker-compose -f deployment/docker-compose.yml -f deployment/docker-compose.dev.yml $(DEV_FLAGS)
|
||||
|
||||
dev-triton:
|
||||
docker-compose -f deployment/docker-compose.yml -f deployment/docker-compose.triton.yml -f deployment/docker-compose.dev.yml $(DEV_FLAGS)
|
||||
|
|
|
|||
|
|
@ -18,8 +18,5 @@ Assuming Linux workstation with:
|
|||
Use `make setup-development-environment` to setup basic dev environment, and `make dev` to start local development server.
|
||||
|
||||
## Deployment
|
||||
1. `make up-triton`
|
||||
2. Open Admin Panel [http://localhost:8501](http://localhost:8501)
|
||||
3. Types some code in editor!
|
||||
|
||||

|
||||
See [deployment](./deployment/README.md)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,14 @@
|
|||
UP_FLAGS := up --remove-orphans --remove-orphans
|
||||
DEV_FLAGS := $(UP_FLAGS) --build
|
||||
|
||||
up:
|
||||
docker-compose -f docker-compose.yml $(UP_FLAGS)
|
||||
|
||||
up-triton:
|
||||
docker-compose -f docker-compose.yml -f docker-compose.triton.yml $(UP_FLAGS)
|
||||
|
||||
dev:
|
||||
docker-compose -f docker-compose.yml -f docker-compose.dev.yml $(DEV_FLAGS)
|
||||
|
||||
dev-triton:
|
||||
docker-compose -f docker-compose.yml -f docker-compose.triton.yml -f docker-compose.dev.yml $(DEV_FLAGS)
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
## Deployment
|
||||
|
||||
1. Start service
|
||||
```bash
|
||||
docker-compose up
|
||||
```
|
||||
2. Test API endpoint with curl
|
||||
```bash
|
||||
curl -X POST http://localhost:5000/v1/completions -H 'Content-Type: application/json' --data '{
|
||||
"prompt": "def binarySearch(arr, left, right, x):\n mid = (left +"
|
||||
}'
|
||||
```
|
||||
|
|
@ -16,7 +16,7 @@ services:
|
|||
shm_size: 1gb
|
||||
volumes:
|
||||
- ./scripts:/scripts
|
||||
- ./data/hf_cache:/root/.cache/huggingface
|
||||
- ../data/hf_cache:/root/.cache/huggingface
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
version: '3.3'
|
||||
|
||||
services:
|
||||
init:
|
||||
image: tabbyml/tabby
|
||||
container_name: tabby-init
|
||||
command: python -m tabby.tools.model_preload --repo_id TabbyML/NeoX-70M
|
||||
volumes:
|
||||
- ../data/hf_cache:/root/.cache/huggingface
|
||||
|
||||
server:
|
||||
image: tabbyml/tabby
|
||||
container_name: tabby-server
|
||||
command: uvicorn tabby.server:app --host 0.0.0.0 --port 5000
|
||||
environment:
|
||||
- MODEL_NAME=TabbyML/NeoX-70M
|
||||
- EVENTS_LOG_DIR=/logs
|
||||
ports:
|
||||
- "5000:5000"
|
||||
volumes:
|
||||
- ../data/logs/tabby-server:/logs
|
||||
- ../data/hf_cache:/root/.cache/huggingface
|
||||
depends_on:
|
||||
init:
|
||||
condition: service_completed_successfully
|
||||
|
||||
admin:
|
||||
image: tabbyml/tabby
|
||||
container_name: tabby-admin
|
||||
command: streamlit run tabby/admin/Home.py
|
||||
ports:
|
||||
- "8501:8501"
|
||||
|
||||
vector:
|
||||
image: timberio/vector:0.28.1-alpine
|
||||
container_name: tabby-vector
|
||||
volumes:
|
||||
- ./config/vector.toml:/etc/vector/vector.toml:ro
|
||||
- ../data/logs:/logs
|
||||
|
||||
dagu:
|
||||
image: tabbyml/tabby
|
||||
container_name: tabby-dagu
|
||||
command: dagu server --host 0.0.0.0 --port 8080
|
||||
volumes:
|
||||
- ./config:/config:ro
|
||||
- ../data/repositories:/repositories
|
||||
- ../data/dataset:/dataset
|
||||
environment:
|
||||
DAGU_DAGS: tabby/tasks
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# Get model dir.
|
||||
MODEL_DIR=$(python3 <<EOF
|
||||
from huggingface_hub import snapshot_download
|
||||
|
||||
print(snapshot_download(repo_id='$MODEL_NAME', allow_patterns='triton/**/*', local_files_only=True))
|
||||
EOF
|
||||
)
|
||||
|
||||
# Set model dir in triton config.
|
||||
sed -i 's@${MODEL_DIR}@'$MODEL_DIR'@g' $MODEL_DIR/triton/fastertransformer/config.pbtxt
|
||||
|
||||
# Start triton server.
|
||||
mpirun -n 1 \
|
||||
--allow-run-as-root /opt/tritonserver/bin/tritonserver \
|
||||
--model-repository=$MODEL_DIR/triton
|
||||
|
|
@ -1,12 +1,7 @@
|
|||
## Deployment
|
||||
|
||||
1. Start service
|
||||
```bash
|
||||
docker-compose up
|
||||
```
|
||||
2. Test API endpoint with curl
|
||||
```bash
|
||||
curl -X POST http://localhost:5000/v1/completions -H 'Content-Type: application/json' --data '{
|
||||
"prompt": "def binarySearch(arr, left, right, x):\n mid = (left +"
|
||||
}'
|
||||
```
|
||||
1. `docker-compose up`
|
||||
2. Open Admin Panel [http://localhost:8501](http://localhost:8501)
|
||||
3. Types some code in editor!
|
||||
|
||||

|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ services:
|
|||
container_name: tabby-init
|
||||
command: python -m tabby.tools.model_preload --repo_id TabbyML/NeoX-70M
|
||||
volumes:
|
||||
- ./data/hf_cache:/root/.cache/huggingface
|
||||
- ../data/hf_cache:/root/.cache/huggingface
|
||||
|
||||
server:
|
||||
image: tabbyml/tabby
|
||||
|
|
@ -14,15 +14,16 @@ services:
|
|||
command: uvicorn tabby.server:app --host 0.0.0.0 --port 5000
|
||||
environment:
|
||||
- MODEL_NAME=TabbyML/NeoX-70M
|
||||
- EVENTS_LOG_DIR=/logs
|
||||
- MODEL_BACKEND=triton
|
||||
ports:
|
||||
- "5000:5000"
|
||||
volumes:
|
||||
- ./data/logs/tabby-server:/logs
|
||||
- ./data/hf_cache:/root/.cache/huggingface
|
||||
- ../data/hf_cache:/root/.cache/huggingface
|
||||
depends_on:
|
||||
init:
|
||||
condition: service_completed_successfully
|
||||
triton:
|
||||
condition: service_healthy
|
||||
|
||||
admin:
|
||||
image: tabbyml/tabby
|
||||
|
|
@ -31,20 +32,28 @@ services:
|
|||
ports:
|
||||
- "8501:8501"
|
||||
|
||||
vector:
|
||||
image: timberio/vector:0.28.1-alpine
|
||||
container_name: tabby-vector
|
||||
triton:
|
||||
image: tabbyml/fastertransformer_backend
|
||||
container_name: tabby-triton
|
||||
command: /scripts/triton.sh
|
||||
shm_size: 1gb
|
||||
volumes:
|
||||
- ./config/vector.toml:/etc/vector/vector.toml:ro
|
||||
- ./data/logs:/logs
|
||||
|
||||
dagu:
|
||||
image: tabbyml/tabby
|
||||
container_name: tabby-dagu
|
||||
command: dagu server --host 0.0.0.0 --port 8080
|
||||
volumes:
|
||||
- ./config:/config:ro
|
||||
- ./data/repositories:/repositories
|
||||
- ./data/dataset:/dataset
|
||||
- ./scripts:/scripts
|
||||
- ../data/hf_cache:/root/.cache/huggingface
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: all
|
||||
capabilities: [gpu]
|
||||
environment:
|
||||
DAGU_DAGS: tabby/tasks
|
||||
- MODEL_NAME=TabbyML/NeoX-70M
|
||||
depends_on:
|
||||
init:
|
||||
condition: service_completed_successfully
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8002/metrics"]
|
||||
interval: 2s
|
||||
timeout: 2s
|
||||
start_period: 120s
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ from utils.service_info import ServiceInfo
|
|||
SERVICES = [
|
||||
ServiceInfo(label="server", url="http://server:5000"),
|
||||
ServiceInfo(label="triton", url="http://triton:8002/metrics"),
|
||||
ServiceInfo(label="vector", url="http://vector:8686/health"),
|
||||
]
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue