107 lines
1.7 KiB
Bash
107 lines
1.7 KiB
Bash
|
|
#!/bin/bash
|
||
|
|
|
||
|
|
record() {
|
||
|
|
echo $GPU_CONFIG,$MODEL_ID,$PARALLELISM,$1 >> record.csv
|
||
|
|
}
|
||
|
|
|
||
|
|
cleanup() {
|
||
|
|
MODAL_APP_ID=$(modal app list | grep tabby-server-loadtest | grep deployed | awk '{print $2}')
|
||
|
|
|
||
|
|
if [ -z $MODAL_APP_ID ]; then
|
||
|
|
modal app stop $MODAL_APP_ID
|
||
|
|
fi
|
||
|
|
}
|
||
|
|
|
||
|
|
loadtest() {
|
||
|
|
export GPU_CONFIG=$1
|
||
|
|
export MODEL_ID=$2
|
||
|
|
export PARALLELISM=$3
|
||
|
|
|
||
|
|
>&2 modal deploy server.py
|
||
|
|
|
||
|
|
export MODAL_PROCESS_ID=$!
|
||
|
|
export TABBY_API_HOST=https://wsxiaoys--tabby-server-loadtest-app.modal.run
|
||
|
|
|
||
|
|
# wait for warmup
|
||
|
|
>&2 echo "Waiting for warmup..."
|
||
|
|
|
||
|
|
|
||
|
|
n=0
|
||
|
|
while [[ "$(curl -s -o /dev/null -w ''%{http_code}'' $TABBY_API_HOST/v1/health)" != "200" ]]; do
|
||
|
|
if [ "$n" -ge 5 ]; then
|
||
|
|
# error after 5 retries.
|
||
|
|
return 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
sleep 10;
|
||
|
|
n=$((n+1))
|
||
|
|
done
|
||
|
|
|
||
|
|
>&2 echo "Start load testing..."
|
||
|
|
|
||
|
|
>&2 k6 run loadtest.js
|
||
|
|
SUCCESS=$?
|
||
|
|
METRICS=$(cat metrics.txt)
|
||
|
|
rm metrics.txt
|
||
|
|
|
||
|
|
if [ $SUCCESS -ne 0 ]; then
|
||
|
|
record $METRICS,FAILED
|
||
|
|
else
|
||
|
|
record $METRICS,SUCCESS
|
||
|
|
fi
|
||
|
|
|
||
|
|
cleanup
|
||
|
|
|
||
|
|
return $SUCCESS
|
||
|
|
}
|
||
|
|
|
||
|
|
function dichotomic_search {
|
||
|
|
min=$1
|
||
|
|
max=$2
|
||
|
|
command=$3
|
||
|
|
|
||
|
|
while (( $min < $max )); do
|
||
|
|
# Compute the mean between min and max, rounded up to the superior unit
|
||
|
|
current=$(( (min + max + 1 ) / 2 ))
|
||
|
|
|
||
|
|
if $command $current
|
||
|
|
then min=$current
|
||
|
|
else max=$((current - 1))
|
||
|
|
fi
|
||
|
|
done
|
||
|
|
}
|
||
|
|
|
||
|
|
test_t4() {
|
||
|
|
loadtest T4 $MODEL_ID $1
|
||
|
|
}
|
||
|
|
|
||
|
|
test_a10g() {
|
||
|
|
loadtest A10G $MODEL_ID $1
|
||
|
|
}
|
||
|
|
|
||
|
|
test_a100() {
|
||
|
|
loadtest A100 $MODEL_ID $1
|
||
|
|
}
|
||
|
|
|
||
|
|
test_1b3b_model() {
|
||
|
|
export MODEL_ID="$1"
|
||
|
|
|
||
|
|
dichotomic_search 1 12 test_t4
|
||
|
|
dichotomic_search 1 32 test_a10g
|
||
|
|
dichotomic_search 1 64 test_a100
|
||
|
|
}
|
||
|
|
|
||
|
|
test_7b_model() {
|
||
|
|
export MODEL_ID="$1"
|
||
|
|
|
||
|
|
dichotomic_search 1 8 test_a100
|
||
|
|
}
|
||
|
|
|
||
|
|
test_13b_model() {
|
||
|
|
export MODEL_ID="$1"
|
||
|
|
|
||
|
|
dichotomic_search 1 8 test_a100
|
||
|
|
}
|
||
|
|
|
||
|
|
# test_7b_model TabbyML/CodeLlama-7B
|
||
|
|
test_13b_model TabbyML/CodeLlama-13B
|