version: '3.3'
services:
  triton:
    image: ghcr.io/tabbyml/fastertransformer_backend:main
    command: mpirun -n 1 --allow-run-as-root /opt/tritonserver/bin/tritonserver --model-repository=/model
    shm_size: 1gb
    ports:
      - "8001:8001"
    volumes:
      - ./testdata:/model
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]