feat: support MODEL_REPLICA to control number of model instances in triton
parent
2cda4fd07b
commit
0a66a9d498
|
|
@ -39,6 +39,7 @@ services:
|
|||
capabilities: [gpu]
|
||||
environment:
|
||||
MODEL_NAME: ${MODEL_NAME}
|
||||
MODEL_REPLICA: ${MODEL_REPLICA:-1}
|
||||
depends_on:
|
||||
tabby:
|
||||
condition: service_healthy
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
MODEL_REPLICA=${MODEL_REPLICA:-1}
|
||||
|
||||
if [ -d "$MODEL_NAME" ]; then
|
||||
MODEL_DIR="$MODEL_NAME"
|
||||
else
|
||||
|
|
@ -16,6 +18,9 @@ fi
|
|||
# Set model dir in triton config.
|
||||
sed -i 's@${MODEL_DIR}@'$MODEL_DIR'@g' $MODEL_DIR/triton/fastertransformer/config.pbtxt
|
||||
|
||||
# SET model replica in triton config.
|
||||
sed -i "s/count: 1/count: $MODEL_REPLICA/g" $MODEL_DIR/triton/fastertransformer/config.pbtxt
|
||||
|
||||
# Start triton server.
|
||||
mpirun -n 1 \
|
||||
--allow-run-as-root /opt/tritonserver/bin/tritonserver \
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@ setup: |
|
|||
git clone https://github.com/TabbyML/tabby.git || true
|
||||
sudo curl -L "https://github.com/docker/compose/releases/download/v2.12.1/docker-compose-linux-x86_64" -o /usr/local/bin/docker-compose
|
||||
sudo chmod +x /usr/local/bin/docker-compose
|
||||
cd tabby && docker-compose pull
|
||||
cd tabby/deployment && docker-compose pull
|
||||
|
||||
run: |
|
||||
cd tabby/deployment
|
||||
docker-compose up
|
||||
MODEL_REPLICA=8 docker-compose up
|
||||
|
|
|
|||
Loading…
Reference in New Issue