Prepare public release with a minimal deployment setup (#16)
* Move deployment to deployment-next * Add deployment setup * Update deployment-next * Remove vector label * update README.mdadd-more-languages
parent
562b8d9e7e
commit
1c3ec20f93
|
|
@ -1,4 +1,6 @@
|
||||||
|
data
|
||||||
deployment
|
deployment
|
||||||
|
deployment-next
|
||||||
|
|
||||||
**/.git
|
**/.git
|
||||||
**/node_modules
|
**/node_modules
|
||||||
|
|
|
||||||
|
|
@ -1 +1,2 @@
|
||||||
__pycache__
|
__pycache__
|
||||||
|
data
|
||||||
|
|
|
||||||
16
Makefile
16
Makefile
|
|
@ -14,19 +14,3 @@ $(PRE_COMMIT_HOOK):
|
||||||
poetry run pre-commit install --install-hooks
|
poetry run pre-commit install --install-hooks
|
||||||
|
|
||||||
setup-development-environment: install-poetry $(PRE_COMMIT_HOOK)
|
setup-development-environment: install-poetry $(PRE_COMMIT_HOOK)
|
||||||
|
|
||||||
|
|
||||||
UP_FLAGS := up --remove-orphans --remove-orphans
|
|
||||||
DEV_FLAGS := $(UP_FLAGS) --build
|
|
||||||
|
|
||||||
up:
|
|
||||||
docker-compose -f deployment/docker-compose.yml $(UP_FLAGS)
|
|
||||||
|
|
||||||
up-triton:
|
|
||||||
docker-compose -f deployment/docker-compose.yml -f deployment/docker-compose.triton.yml $(UP_FLAGS)
|
|
||||||
|
|
||||||
dev:
|
|
||||||
docker-compose -f deployment/docker-compose.yml -f deployment/docker-compose.dev.yml $(DEV_FLAGS)
|
|
||||||
|
|
||||||
dev-triton:
|
|
||||||
docker-compose -f deployment/docker-compose.yml -f deployment/docker-compose.triton.yml -f deployment/docker-compose.dev.yml $(DEV_FLAGS)
|
|
||||||
|
|
|
||||||
|
|
@ -18,8 +18,5 @@ Assuming Linux workstation with:
|
||||||
Use `make setup-development-environment` to setup basic dev environment, and `make dev` to start local development server.
|
Use `make setup-development-environment` to setup basic dev environment, and `make dev` to start local development server.
|
||||||
|
|
||||||
## Deployment
|
## Deployment
|
||||||
1. `make up-triton`
|
|
||||||
2. Open Admin Panel [http://localhost:8501](http://localhost:8501)
|
|
||||||
3. Types some code in editor!
|
|
||||||
|
|
||||||

|
See [deployment](./deployment/README.md)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,14 @@
|
||||||
|
UP_FLAGS := up --remove-orphans --remove-orphans
|
||||||
|
DEV_FLAGS := $(UP_FLAGS) --build
|
||||||
|
|
||||||
|
up:
|
||||||
|
docker-compose -f docker-compose.yml $(UP_FLAGS)
|
||||||
|
|
||||||
|
up-triton:
|
||||||
|
docker-compose -f docker-compose.yml -f docker-compose.triton.yml $(UP_FLAGS)
|
||||||
|
|
||||||
|
dev:
|
||||||
|
docker-compose -f docker-compose.yml -f docker-compose.dev.yml $(DEV_FLAGS)
|
||||||
|
|
||||||
|
dev-triton:
|
||||||
|
docker-compose -f docker-compose.yml -f docker-compose.triton.yml -f docker-compose.dev.yml $(DEV_FLAGS)
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
## Deployment
|
||||||
|
|
||||||
|
1. Start service
|
||||||
|
```bash
|
||||||
|
docker-compose up
|
||||||
|
```
|
||||||
|
2. Test API endpoint with curl
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:5000/v1/completions -H 'Content-Type: application/json' --data '{
|
||||||
|
"prompt": "def binarySearch(arr, left, right, x):\n mid = (left +"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
@ -16,7 +16,7 @@ services:
|
||||||
shm_size: 1gb
|
shm_size: 1gb
|
||||||
volumes:
|
volumes:
|
||||||
- ./scripts:/scripts
|
- ./scripts:/scripts
|
||||||
- ./data/hf_cache:/root/.cache/huggingface
|
- ../data/hf_cache:/root/.cache/huggingface
|
||||||
deploy:
|
deploy:
|
||||||
resources:
|
resources:
|
||||||
reservations:
|
reservations:
|
||||||
|
|
@ -0,0 +1,50 @@
|
||||||
|
version: '3.3'
|
||||||
|
|
||||||
|
services:
|
||||||
|
init:
|
||||||
|
image: tabbyml/tabby
|
||||||
|
container_name: tabby-init
|
||||||
|
command: python -m tabby.tools.model_preload --repo_id TabbyML/NeoX-70M
|
||||||
|
volumes:
|
||||||
|
- ../data/hf_cache:/root/.cache/huggingface
|
||||||
|
|
||||||
|
server:
|
||||||
|
image: tabbyml/tabby
|
||||||
|
container_name: tabby-server
|
||||||
|
command: uvicorn tabby.server:app --host 0.0.0.0 --port 5000
|
||||||
|
environment:
|
||||||
|
- MODEL_NAME=TabbyML/NeoX-70M
|
||||||
|
- EVENTS_LOG_DIR=/logs
|
||||||
|
ports:
|
||||||
|
- "5000:5000"
|
||||||
|
volumes:
|
||||||
|
- ../data/logs/tabby-server:/logs
|
||||||
|
- ../data/hf_cache:/root/.cache/huggingface
|
||||||
|
depends_on:
|
||||||
|
init:
|
||||||
|
condition: service_completed_successfully
|
||||||
|
|
||||||
|
admin:
|
||||||
|
image: tabbyml/tabby
|
||||||
|
container_name: tabby-admin
|
||||||
|
command: streamlit run tabby/admin/Home.py
|
||||||
|
ports:
|
||||||
|
- "8501:8501"
|
||||||
|
|
||||||
|
vector:
|
||||||
|
image: timberio/vector:0.28.1-alpine
|
||||||
|
container_name: tabby-vector
|
||||||
|
volumes:
|
||||||
|
- ./config/vector.toml:/etc/vector/vector.toml:ro
|
||||||
|
- ../data/logs:/logs
|
||||||
|
|
||||||
|
dagu:
|
||||||
|
image: tabbyml/tabby
|
||||||
|
container_name: tabby-dagu
|
||||||
|
command: dagu server --host 0.0.0.0 --port 8080
|
||||||
|
volumes:
|
||||||
|
- ./config:/config:ro
|
||||||
|
- ../data/repositories:/repositories
|
||||||
|
- ../data/dataset:/dataset
|
||||||
|
environment:
|
||||||
|
DAGU_DAGS: tabby/tasks
|
||||||
|
|
@ -0,0 +1,18 @@
|
||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Get model dir.
|
||||||
|
MODEL_DIR=$(python3 <<EOF
|
||||||
|
from huggingface_hub import snapshot_download
|
||||||
|
|
||||||
|
print(snapshot_download(repo_id='$MODEL_NAME', allow_patterns='triton/**/*', local_files_only=True))
|
||||||
|
EOF
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set model dir in triton config.
|
||||||
|
sed -i 's@${MODEL_DIR}@'$MODEL_DIR'@g' $MODEL_DIR/triton/fastertransformer/config.pbtxt
|
||||||
|
|
||||||
|
# Start triton server.
|
||||||
|
mpirun -n 1 \
|
||||||
|
--allow-run-as-root /opt/tritonserver/bin/tritonserver \
|
||||||
|
--model-repository=$MODEL_DIR/triton
|
||||||
|
|
@ -1,12 +1,7 @@
|
||||||
## Deployment
|
## Deployment
|
||||||
|
|
||||||
1. Start service
|
1. `docker-compose up`
|
||||||
```bash
|
2. Open Admin Panel [http://localhost:8501](http://localhost:8501)
|
||||||
docker-compose up
|
3. Types some code in editor!
|
||||||
```
|
|
||||||
2. Test API endpoint with curl
|

|
||||||
```bash
|
|
||||||
curl -X POST http://localhost:5000/v1/completions -H 'Content-Type: application/json' --data '{
|
|
||||||
"prompt": "def binarySearch(arr, left, right, x):\n mid = (left +"
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ services:
|
||||||
container_name: tabby-init
|
container_name: tabby-init
|
||||||
command: python -m tabby.tools.model_preload --repo_id TabbyML/NeoX-70M
|
command: python -m tabby.tools.model_preload --repo_id TabbyML/NeoX-70M
|
||||||
volumes:
|
volumes:
|
||||||
- ./data/hf_cache:/root/.cache/huggingface
|
- ../data/hf_cache:/root/.cache/huggingface
|
||||||
|
|
||||||
server:
|
server:
|
||||||
image: tabbyml/tabby
|
image: tabbyml/tabby
|
||||||
|
|
@ -14,15 +14,16 @@ services:
|
||||||
command: uvicorn tabby.server:app --host 0.0.0.0 --port 5000
|
command: uvicorn tabby.server:app --host 0.0.0.0 --port 5000
|
||||||
environment:
|
environment:
|
||||||
- MODEL_NAME=TabbyML/NeoX-70M
|
- MODEL_NAME=TabbyML/NeoX-70M
|
||||||
- EVENTS_LOG_DIR=/logs
|
- MODEL_BACKEND=triton
|
||||||
ports:
|
ports:
|
||||||
- "5000:5000"
|
- "5000:5000"
|
||||||
volumes:
|
volumes:
|
||||||
- ./data/logs/tabby-server:/logs
|
- ../data/hf_cache:/root/.cache/huggingface
|
||||||
- ./data/hf_cache:/root/.cache/huggingface
|
|
||||||
depends_on:
|
depends_on:
|
||||||
init:
|
init:
|
||||||
condition: service_completed_successfully
|
condition: service_completed_successfully
|
||||||
|
triton:
|
||||||
|
condition: service_healthy
|
||||||
|
|
||||||
admin:
|
admin:
|
||||||
image: tabbyml/tabby
|
image: tabbyml/tabby
|
||||||
|
|
@ -31,20 +32,28 @@ services:
|
||||||
ports:
|
ports:
|
||||||
- "8501:8501"
|
- "8501:8501"
|
||||||
|
|
||||||
vector:
|
triton:
|
||||||
image: timberio/vector:0.28.1-alpine
|
image: tabbyml/fastertransformer_backend
|
||||||
container_name: tabby-vector
|
container_name: tabby-triton
|
||||||
|
command: /scripts/triton.sh
|
||||||
|
shm_size: 1gb
|
||||||
volumes:
|
volumes:
|
||||||
- ./config/vector.toml:/etc/vector/vector.toml:ro
|
- ./scripts:/scripts
|
||||||
- ./data/logs:/logs
|
- ../data/hf_cache:/root/.cache/huggingface
|
||||||
|
deploy:
|
||||||
dagu:
|
resources:
|
||||||
image: tabbyml/tabby
|
reservations:
|
||||||
container_name: tabby-dagu
|
devices:
|
||||||
command: dagu server --host 0.0.0.0 --port 8080
|
- driver: nvidia
|
||||||
volumes:
|
count: all
|
||||||
- ./config:/config:ro
|
capabilities: [gpu]
|
||||||
- ./data/repositories:/repositories
|
|
||||||
- ./data/dataset:/dataset
|
|
||||||
environment:
|
environment:
|
||||||
DAGU_DAGS: tabby/tasks
|
- MODEL_NAME=TabbyML/NeoX-70M
|
||||||
|
depends_on:
|
||||||
|
init:
|
||||||
|
condition: service_completed_successfully
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:8002/metrics"]
|
||||||
|
interval: 2s
|
||||||
|
timeout: 2s
|
||||||
|
start_period: 120s
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,6 @@ from utils.service_info import ServiceInfo
|
||||||
SERVICES = [
|
SERVICES = [
|
||||||
ServiceInfo(label="server", url="http://server:5000"),
|
ServiceInfo(label="server", url="http://server:5000"),
|
||||||
ServiceInfo(label="triton", url="http://triton:8002/metrics"),
|
ServiceInfo(label="triton", url="http://triton:8002/metrics"),
|
||||||
ServiceInfo(label="vector", url="http://vector:8686/health"),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue