version: '3.3'

services:
  tabby:
    image: tabbyml/tabby
    container_name: tabby
    environment:
      MODEL_NAME: ${MODEL_NAME}
      MODEL_BACKEND: triton
      LOGS_DIR: /data/logs
      DAGU_DAGS: /app/tabby/tasks
    ports:
      - "5000:5000"
      - "8080:8080"
      - "8501:8501"
    volumes:
      - ${DATA_VOLUME}
      - ${HF_VOLUME}
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:5000"]
      interval: 2s
      timeout: 2s
      start_period: 1200s

  triton:
    image: tabbyml/fastertransformer_backend
    container_name: tabby-triton
    command: triton.sh
    shm_size: 1gb
    volumes:
      - ./scripts/triton.sh:/usr/bin/triton.sh:ro
      - ${HF_VOLUME}
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    environment:
      MODEL_NAME: ${MODEL_NAME}
    depends_on:
      tabby:
        condition: service_healthy