version: '3.3' services: tabby: image: tabbyml/tabby container_name: tabby environment: MODEL_NAME: ${MODEL_NAME} MODEL_BACKEND: triton LOGS_DIR: /data/logs DAGU_DAGS: /app/tabby/tasks ports: - "5000:5000" - "8080:8080" - "8501:8501" volumes: - ${DATA_VOLUME} - ${HF_VOLUME} healthcheck: test: ["CMD", "curl", "-f", "http://localhost:5000"] interval: 2s timeout: 2s start_period: 1200s triton: image: tabbyml/fastertransformer_backend container_name: tabby-triton command: triton.sh shm_size: 1gb volumes: - ./scripts/triton.sh:/usr/bin/triton.sh:ro - ${HF_VOLUME} deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] environment: MODEL_NAME: ${MODEL_NAME} depends_on: tabby: condition: service_healthy