version: '3.3' services: server: build: . working_dir: /app command: /opt/conda/bin/python server/app.py environment: - TOKENIZER_NAME=/tokenizer - TRITON_HOST=triton ports: - "5000:5000" volumes: - ./testdata/gptneox/tokenizer:/tokenizer links: - triton triton: image: tabbyml/fastertransformer_backend command: mpirun -n 1 --allow-run-as-root /opt/tritonserver/bin/tritonserver --model-repository=/model shm_size: 1gb volumes: - ./testdata/gptneox/models:/model deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu]