version: '3.3'

services:
  init:
    image: tabbyml/tabby
    container_name: tabby-init
    command: python -m tabby.tools.model_preload --repo_id TabbyML/NeoX-70M
    volumes:
      - ../data/hf_cache:/root/.cache/huggingface

  server:
    image: tabbyml/tabby
    container_name: tabby-server
    command: uvicorn tabby.server:app --host 0.0.0.0 --port 5000
    environment:
      - MODEL_NAME=TabbyML/NeoX-70M
      - MODEL_BACKEND=triton
    ports:
      - "5000:5000"
    volumes:
      - ../data/hf_cache:/root/.cache/huggingface
    depends_on:
      init:
        condition: service_completed_successfully
      triton:
        condition: service_healthy

  admin:
    image: tabbyml/tabby
    container_name: tabby-admin
    command: streamlit run tabby/admin/Home.py
    ports:
      - "8501:8501"

  triton:
    image: tabbyml/fastertransformer_backend
    container_name: tabby-triton
    command: /scripts/triton.sh
    shm_size: 1gb
    volumes:
      - ./scripts:/scripts
      - ../data/hf_cache:/root/.cache/huggingface
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    environment:
      - MODEL_NAME=TabbyML/NeoX-70M
    depends_on:
      init:
        condition: service_completed_successfully
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8002/metrics"]
      interval: 2s
      timeout: 2s
      start_period: 120s