2023-03-23 06:14:33 +00:00
|
|
|
version: '3.3'
|
|
|
|
|
|
|
|
|
|
services:
|
|
|
|
|
server:
|
|
|
|
|
image: tabbyml/tabby
|
|
|
|
|
environment:
|
|
|
|
|
- MODEL_BACKEND=triton
|
2023-03-25 09:39:40 +00:00
|
|
|
depends_on:
|
|
|
|
|
triton:
|
|
|
|
|
condition: service_healthy
|
2023-03-23 06:14:33 +00:00
|
|
|
|
|
|
|
|
triton:
|
|
|
|
|
image: tabbyml/fastertransformer_backend
|
|
|
|
|
container_name: tabby-triton
|
2023-03-25 09:39:40 +00:00
|
|
|
command: /scripts/triton.sh
|
2023-03-23 06:14:33 +00:00
|
|
|
shm_size: 1gb
|
|
|
|
|
volumes:
|
2023-03-25 09:39:40 +00:00
|
|
|
- ./scripts:/scripts
|
|
|
|
|
- ./data/hf_cache:/root/.cache/huggingface
|
2023-03-23 06:14:33 +00:00
|
|
|
deploy:
|
|
|
|
|
resources:
|
|
|
|
|
reservations:
|
|
|
|
|
devices:
|
|
|
|
|
- driver: nvidia
|
|
|
|
|
count: all
|
|
|
|
|
capabilities: [gpu]
|
2023-03-25 09:39:40 +00:00
|
|
|
environment:
|
|
|
|
|
- MODEL_NAME=TabbyML/NeoX-70M
|
|
|
|
|
depends_on:
|
|
|
|
|
init:
|
|
|
|
|
condition: service_completed_successfully
|
|
|
|
|
healthcheck:
|
|
|
|
|
test: ["CMD", "curl", "-f", "http://localhost:8002/metrics"]
|
|
|
|
|
interval: 2s
|
|
|
|
|
timeout: 2s
|
|
|
|
|
start_period: 2s
|