tabby/deployment/docker-compose.triton.yml

37 lines
835 B
YAML

version: '3.3'
services:
server:
image: tabbyml/tabby
environment:
- MODEL_BACKEND=triton
depends_on:
triton:
condition: service_healthy
triton:
image: tabbyml/fastertransformer_backend
container_name: tabby-triton
command: /scripts/triton.sh
shm_size: 1gb
volumes:
- ./scripts:/scripts
- ./data/hf_cache:/root/.cache/huggingface
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
environment:
- MODEL_NAME=TabbyML/NeoX-70M
depends_on:
init:
condition: service_completed_successfully
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8002/metrics"]
interval: 2s
timeout: 2s
start_period: 120s