Prepare public release with a minimal deployment setup (#16)

* Move deployment to deployment-next

* Add deployment setup

* Update deployment-next

* Remove vector label

* update README.md
add-more-languages
Meng Zhang 2023-03-26 22:44:15 +08:00 committed by GitHub
parent 562b8d9e7e
commit 1c3ec20f93
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 132 additions and 51 deletions

View File

@ -1,4 +1,6 @@
data
deployment
deployment-next
**/.git
**/node_modules

1
.gitignore vendored
View File

@ -1 +1,2 @@
__pycache__
data

View File

@ -14,19 +14,3 @@ $(PRE_COMMIT_HOOK):
poetry run pre-commit install --install-hooks
setup-development-environment: install-poetry $(PRE_COMMIT_HOOK)
UP_FLAGS := up --remove-orphans --remove-orphans
DEV_FLAGS := $(UP_FLAGS) --build
up:
docker-compose -f deployment/docker-compose.yml $(UP_FLAGS)
up-triton:
docker-compose -f deployment/docker-compose.yml -f deployment/docker-compose.triton.yml $(UP_FLAGS)
dev:
docker-compose -f deployment/docker-compose.yml -f deployment/docker-compose.dev.yml $(DEV_FLAGS)
dev-triton:
docker-compose -f deployment/docker-compose.yml -f deployment/docker-compose.triton.yml -f deployment/docker-compose.dev.yml $(DEV_FLAGS)

View File

@ -18,8 +18,5 @@ Assuming Linux workstation with:
Use `make setup-development-environment` to setup basic dev environment, and `make dev` to start local development server.
## Deployment
1. `make up-triton`
2. Open Admin Panel [http://localhost:8501](http://localhost:8501)
3. Types some code in editor!
![Screenshot from 2023-03-25 21-31-24](https://user-images.githubusercontent.com/388154/227720212-7e7480a1-abb1-4baf-b971-d391ea8136c8.png)
See [deployment](./deployment/README.md)

14
deployment-next/Makefile Normal file
View File

@ -0,0 +1,14 @@
UP_FLAGS := up --remove-orphans --remove-orphans
DEV_FLAGS := $(UP_FLAGS) --build
up:
docker-compose -f docker-compose.yml $(UP_FLAGS)
up-triton:
docker-compose -f docker-compose.yml -f docker-compose.triton.yml $(UP_FLAGS)
dev:
docker-compose -f docker-compose.yml -f docker-compose.dev.yml $(DEV_FLAGS)
dev-triton:
docker-compose -f docker-compose.yml -f docker-compose.triton.yml -f docker-compose.dev.yml $(DEV_FLAGS)

12
deployment-next/README.md Normal file
View File

@ -0,0 +1,12 @@
## Deployment
1. Start service
```bash
docker-compose up
```
2. Test API endpoint with curl
```bash
curl -X POST http://localhost:5000/v1/completions -H 'Content-Type: application/json' --data '{
"prompt": "def binarySearch(arr, left, right, x):\n mid = (left +"
}'
```

View File

@ -16,7 +16,7 @@ services:
shm_size: 1gb
volumes:
- ./scripts:/scripts
- ./data/hf_cache:/root/.cache/huggingface
- ../data/hf_cache:/root/.cache/huggingface
deploy:
resources:
reservations:

View File

@ -0,0 +1,50 @@
version: '3.3'
services:
init:
image: tabbyml/tabby
container_name: tabby-init
command: python -m tabby.tools.model_preload --repo_id TabbyML/NeoX-70M
volumes:
- ../data/hf_cache:/root/.cache/huggingface
server:
image: tabbyml/tabby
container_name: tabby-server
command: uvicorn tabby.server:app --host 0.0.0.0 --port 5000
environment:
- MODEL_NAME=TabbyML/NeoX-70M
- EVENTS_LOG_DIR=/logs
ports:
- "5000:5000"
volumes:
- ../data/logs/tabby-server:/logs
- ../data/hf_cache:/root/.cache/huggingface
depends_on:
init:
condition: service_completed_successfully
admin:
image: tabbyml/tabby
container_name: tabby-admin
command: streamlit run tabby/admin/Home.py
ports:
- "8501:8501"
vector:
image: timberio/vector:0.28.1-alpine
container_name: tabby-vector
volumes:
- ./config/vector.toml:/etc/vector/vector.toml:ro
- ../data/logs:/logs
dagu:
image: tabbyml/tabby
container_name: tabby-dagu
command: dagu server --host 0.0.0.0 --port 8080
volumes:
- ./config:/config:ro
- ../data/repositories:/repositories
- ../data/dataset:/dataset
environment:
DAGU_DAGS: tabby/tasks

View File

@ -0,0 +1,18 @@
#!/bin/bash
set -e
# Get model dir.
MODEL_DIR=$(python3 <<EOF
from huggingface_hub import snapshot_download
print(snapshot_download(repo_id='$MODEL_NAME', allow_patterns='triton/**/*', local_files_only=True))
EOF
)
# Set model dir in triton config.
sed -i 's@${MODEL_DIR}@'$MODEL_DIR'@g' $MODEL_DIR/triton/fastertransformer/config.pbtxt
# Start triton server.
mpirun -n 1 \
--allow-run-as-root /opt/tritonserver/bin/tritonserver \
--model-repository=$MODEL_DIR/triton

View File

@ -1,12 +1,7 @@
## Deployment
1. Start service
```bash
docker-compose up
```
2. Test API endpoint with curl
```bash
curl -X POST http://localhost:5000/v1/completions -H 'Content-Type: application/json' --data '{
"prompt": "def binarySearch(arr, left, right, x):\n mid = (left +"
}'
```
1. `docker-compose up`
2. Open Admin Panel [http://localhost:8501](http://localhost:8501)
3. Types some code in editor!
![Screenshot from 2023-03-25 21-31-24](https://user-images.githubusercontent.com/388154/227720212-7e7480a1-abb1-4baf-b971-d391ea8136c8.png)

View File

@ -6,7 +6,7 @@ services:
container_name: tabby-init
command: python -m tabby.tools.model_preload --repo_id TabbyML/NeoX-70M
volumes:
- ./data/hf_cache:/root/.cache/huggingface
- ../data/hf_cache:/root/.cache/huggingface
server:
image: tabbyml/tabby
@ -14,15 +14,16 @@ services:
command: uvicorn tabby.server:app --host 0.0.0.0 --port 5000
environment:
- MODEL_NAME=TabbyML/NeoX-70M
- EVENTS_LOG_DIR=/logs
- MODEL_BACKEND=triton
ports:
- "5000:5000"
volumes:
- ./data/logs/tabby-server:/logs
- ./data/hf_cache:/root/.cache/huggingface
- ../data/hf_cache:/root/.cache/huggingface
depends_on:
init:
condition: service_completed_successfully
triton:
condition: service_healthy
admin:
image: tabbyml/tabby
@ -31,20 +32,28 @@ services:
ports:
- "8501:8501"
vector:
image: timberio/vector:0.28.1-alpine
container_name: tabby-vector
triton:
image: tabbyml/fastertransformer_backend
container_name: tabby-triton
command: /scripts/triton.sh
shm_size: 1gb
volumes:
- ./config/vector.toml:/etc/vector/vector.toml:ro
- ./data/logs:/logs
dagu:
image: tabbyml/tabby
container_name: tabby-dagu
command: dagu server --host 0.0.0.0 --port 8080
volumes:
- ./config:/config:ro
- ./data/repositories:/repositories
- ./data/dataset:/dataset
- ./scripts:/scripts
- ../data/hf_cache:/root/.cache/huggingface
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
environment:
DAGU_DAGS: tabby/tasks
- MODEL_NAME=TabbyML/NeoX-70M
depends_on:
init:
condition: service_completed_successfully
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8002/metrics"]
interval: 2s
timeout: 2s
start_period: 120s

View File

@ -5,7 +5,6 @@ from utils.service_info import ServiceInfo
SERVICES = [
ServiceInfo(label="server", url="http://server:5000"),
ServiceInfo(label="triton", url="http://triton:8002/metrics"),
ServiceInfo(label="vector", url="http://vector:8686/health"),
]