diff --git a/README.md b/README.md index be94d47..318699c 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,7 @@ make dev ``` or ```bash -make dev-python # Turn off triton backend (for non-cuda env developers) +make dev-triton # Turn on triton backend (for cuda env developers) ``` ## TODOs diff --git a/deployment/docker-compose.python.yml b/deployment/docker-compose.python.yml deleted file mode 100644 index dd48a5e..0000000 --- a/deployment/docker-compose.python.yml +++ /dev/null @@ -1,31 +0,0 @@ -version: '3.3' - -services: - init: - image: tabbyml/tabby - container_name: init - user: root - volumes: - - ${DATA_VOLUME} - - ${HF_VOLUME} - command: chown -R 1000 /data - - tabby: - image: tabbyml/tabby - container_name: tabby - environment: - MODEL_NAME: ${MODEL_NAME} - ports: - - "5000:5000" - - "8080:8080" - - "8501:8501" - volumes: - - ${DATA_VOLUME} - - ${HF_VOLUME} - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:5000"] - interval: 2s - timeout: 2s - start_period: 1200s - depends_on: - - init diff --git a/deployment/docker-compose.triton.yml b/deployment/docker-compose.triton.yml new file mode 100644 index 0000000..e52a79b --- /dev/null +++ b/deployment/docker-compose.triton.yml @@ -0,0 +1,15 @@ +version: '3.3' + +services: + tabby: + shm_size: 1gb + environment: + MODEL_BACKEND: triton + MODEL_REPLICA: ${MODEL_REPLICA:-1} + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] diff --git a/deployment/docker-compose.yml b/deployment/docker-compose.yml index 771e08a..dd48a5e 100644 --- a/deployment/docker-compose.yml +++ b/deployment/docker-compose.yml @@ -13,11 +13,8 @@ services: tabby: image: tabbyml/tabby container_name: tabby - shm_size: 1gb environment: MODEL_NAME: ${MODEL_NAME} - MODEL_BACKEND: triton - MODEL_REPLICA: ${MODEL_REPLICA:-1} ports: - "5000:5000" - "8080:8080" @@ -30,12 +27,5 @@ services: interval: 2s timeout: 2s start_period: 1200s - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: all - capabilities: [gpu] depends_on: - init diff --git a/deployment/skypilot/default.yml b/deployment/skypilot/default.yml index e1bd987..6e1aa06 100644 --- a/deployment/skypilot/default.yml +++ b/deployment/skypilot/default.yml @@ -22,4 +22,4 @@ setup: | run: | cd tabby/deployment - sudo MODEL_REPLICA=${MODEL_REPLICA:-8} docker-compose up + sudo MODEL_REPLICA=${MODEL_REPLICA:-8} docker-compose -f docker-compose.yml -f docker-compose.triton.yml up diff --git a/development/Makefile b/development/Makefile index b88b065..770edf0 100644 --- a/development/Makefile +++ b/development/Makefile @@ -7,5 +7,5 @@ build: dev: docker-compose -f ../deployment/docker-compose.yml -f docker-compose.dev.yml $(UP_FLAGS) -dev-python: - docker-compose -f ../deployment/docker-compose.python.yml -f docker-compose.dev.yml $(UP_FLAGS) +dev-triton: + docker-compose -f ../deployment/docker-compose.yml -f ../deployment/docker-compose.triton.yml -f docker-compose.dev.yml $(UP_FLAGS)