feat: support MODEL_REPLICA to control number of model instances in triton

add-more-languages
Meng Zhang 2023-04-02 13:19:09 +08:00
parent 2cda4fd07b
commit 0a66a9d498
3 changed files with 8 additions and 2 deletions

View File

@ -39,6 +39,7 @@ services:
capabilities: [gpu]
environment:
MODEL_NAME: ${MODEL_NAME}
MODEL_REPLICA: ${MODEL_REPLICA:-1}
depends_on:
tabby:
condition: service_healthy

View File

@ -1,6 +1,8 @@
#!/bin/bash
set -e
MODEL_REPLICA=${MODEL_REPLICA:-1}
if [ -d "$MODEL_NAME" ]; then
MODEL_DIR="$MODEL_NAME"
else
@ -16,6 +18,9 @@ fi
# Set model dir in triton config.
sed -i 's@${MODEL_DIR}@'$MODEL_DIR'@g' $MODEL_DIR/triton/fastertransformer/config.pbtxt
# SET model replica in triton config.
sed -i "s/count: 1/count: $MODEL_REPLICA/g" $MODEL_DIR/triton/fastertransformer/config.pbtxt
# Start triton server.
mpirun -n 1 \
--allow-run-as-root /opt/tritonserver/bin/tritonserver \

View File

@ -7,8 +7,8 @@ setup: |
git clone https://github.com/TabbyML/tabby.git || true
sudo curl -L "https://github.com/docker/compose/releases/download/v2.12.1/docker-compose-linux-x86_64" -o /usr/local/bin/docker-compose
sudo chmod +x /usr/local/bin/docker-compose
cd tabby && docker-compose pull
cd tabby/deployment && docker-compose pull
run: |
cd tabby/deployment
docker-compose up
MODEL_REPLICA=8 docker-compose up