refactor: default docker-compose w/ python backend
parent
e36ddbac6b
commit
93e4f5b3ca
|
|
@ -84,7 +84,7 @@ make dev
|
||||||
```
|
```
|
||||||
or
|
or
|
||||||
```bash
|
```bash
|
||||||
make dev-python # Turn off triton backend (for non-cuda env developers)
|
make dev-triton # Turn on triton backend (for cuda env developers)
|
||||||
```
|
```
|
||||||
|
|
||||||
## TODOs
|
## TODOs
|
||||||
|
|
|
||||||
|
|
@ -1,31 +0,0 @@
|
||||||
version: '3.3'
|
|
||||||
|
|
||||||
services:
|
|
||||||
init:
|
|
||||||
image: tabbyml/tabby
|
|
||||||
container_name: init
|
|
||||||
user: root
|
|
||||||
volumes:
|
|
||||||
- ${DATA_VOLUME}
|
|
||||||
- ${HF_VOLUME}
|
|
||||||
command: chown -R 1000 /data
|
|
||||||
|
|
||||||
tabby:
|
|
||||||
image: tabbyml/tabby
|
|
||||||
container_name: tabby
|
|
||||||
environment:
|
|
||||||
MODEL_NAME: ${MODEL_NAME}
|
|
||||||
ports:
|
|
||||||
- "5000:5000"
|
|
||||||
- "8080:8080"
|
|
||||||
- "8501:8501"
|
|
||||||
volumes:
|
|
||||||
- ${DATA_VOLUME}
|
|
||||||
- ${HF_VOLUME}
|
|
||||||
healthcheck:
|
|
||||||
test: ["CMD", "curl", "-f", "http://localhost:5000"]
|
|
||||||
interval: 2s
|
|
||||||
timeout: 2s
|
|
||||||
start_period: 1200s
|
|
||||||
depends_on:
|
|
||||||
- init
|
|
||||||
|
|
@ -0,0 +1,15 @@
|
||||||
|
version: '3.3'
|
||||||
|
|
||||||
|
services:
|
||||||
|
tabby:
|
||||||
|
shm_size: 1gb
|
||||||
|
environment:
|
||||||
|
MODEL_BACKEND: triton
|
||||||
|
MODEL_REPLICA: ${MODEL_REPLICA:-1}
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
count: all
|
||||||
|
capabilities: [gpu]
|
||||||
|
|
@ -13,11 +13,8 @@ services:
|
||||||
tabby:
|
tabby:
|
||||||
image: tabbyml/tabby
|
image: tabbyml/tabby
|
||||||
container_name: tabby
|
container_name: tabby
|
||||||
shm_size: 1gb
|
|
||||||
environment:
|
environment:
|
||||||
MODEL_NAME: ${MODEL_NAME}
|
MODEL_NAME: ${MODEL_NAME}
|
||||||
MODEL_BACKEND: triton
|
|
||||||
MODEL_REPLICA: ${MODEL_REPLICA:-1}
|
|
||||||
ports:
|
ports:
|
||||||
- "5000:5000"
|
- "5000:5000"
|
||||||
- "8080:8080"
|
- "8080:8080"
|
||||||
|
|
@ -30,12 +27,5 @@ services:
|
||||||
interval: 2s
|
interval: 2s
|
||||||
timeout: 2s
|
timeout: 2s
|
||||||
start_period: 1200s
|
start_period: 1200s
|
||||||
deploy:
|
|
||||||
resources:
|
|
||||||
reservations:
|
|
||||||
devices:
|
|
||||||
- driver: nvidia
|
|
||||||
count: all
|
|
||||||
capabilities: [gpu]
|
|
||||||
depends_on:
|
depends_on:
|
||||||
- init
|
- init
|
||||||
|
|
|
||||||
|
|
@ -22,4 +22,4 @@ setup: |
|
||||||
|
|
||||||
run: |
|
run: |
|
||||||
cd tabby/deployment
|
cd tabby/deployment
|
||||||
sudo MODEL_REPLICA=${MODEL_REPLICA:-8} docker-compose up
|
sudo MODEL_REPLICA=${MODEL_REPLICA:-8} docker-compose -f docker-compose.yml -f docker-compose.triton.yml up
|
||||||
|
|
|
||||||
|
|
@ -7,5 +7,5 @@ build:
|
||||||
dev:
|
dev:
|
||||||
docker-compose -f ../deployment/docker-compose.yml -f docker-compose.dev.yml $(UP_FLAGS)
|
docker-compose -f ../deployment/docker-compose.yml -f docker-compose.dev.yml $(UP_FLAGS)
|
||||||
|
|
||||||
dev-python:
|
dev-triton:
|
||||||
docker-compose -f ../deployment/docker-compose.python.yml -f docker-compose.dev.yml $(UP_FLAGS)
|
docker-compose -f ../deployment/docker-compose.yml -f ../deployment/docker-compose.triton.yml -f docker-compose.dev.yml $(UP_FLAGS)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue