diff --git a/deployment/docker-compose.yml b/deployment/docker-compose.yml index 442b82f..2891207 100644 --- a/deployment/docker-compose.yml +++ b/deployment/docker-compose.yml @@ -39,6 +39,7 @@ services: capabilities: [gpu] environment: MODEL_NAME: ${MODEL_NAME} + MODEL_REPLICA: ${MODEL_REPLICA:-1} depends_on: tabby: condition: service_healthy diff --git a/deployment/scripts/triton.sh b/deployment/scripts/triton.sh index bb0b05c..28ac33a 100755 --- a/deployment/scripts/triton.sh +++ b/deployment/scripts/triton.sh @@ -1,6 +1,8 @@ #!/bin/bash set -e +MODEL_REPLICA=${MODEL_REPLICA:-1} + if [ -d "$MODEL_NAME" ]; then MODEL_DIR="$MODEL_NAME" else @@ -16,6 +18,9 @@ fi # Set model dir in triton config. sed -i 's@${MODEL_DIR}@'$MODEL_DIR'@g' $MODEL_DIR/triton/fastertransformer/config.pbtxt +# SET model replica in triton config. +sed -i "s/count: 1/count: $MODEL_REPLICA/g" $MODEL_DIR/triton/fastertransformer/config.pbtxt + # Start triton server. mpirun -n 1 \ --allow-run-as-root /opt/tritonserver/bin/tritonserver \ diff --git a/deployment/skypilot.yml b/deployment/skypilot.yml index 319c6f7..169a596 100644 --- a/deployment/skypilot.yml +++ b/deployment/skypilot.yml @@ -7,8 +7,8 @@ setup: | git clone https://github.com/TabbyML/tabby.git || true sudo curl -L "https://github.com/docker/compose/releases/download/v2.12.1/docker-compose-linux-x86_64" -o /usr/local/bin/docker-compose sudo chmod +x /usr/local/bin/docker-compose - cd tabby && docker-compose pull + cd tabby/deployment && docker-compose pull run: | cd tabby/deployment - docker-compose up + MODEL_REPLICA=8 docker-compose up