chore: adjust loadtest value based on release v0.3.0 (#572)

* chore: adjust loadtest value based on release v0.3.0

* Update default.loadtest.js
dedup-snippet-at-index
Meng Zhang 2023-10-16 15:13:26 -07:00 committed by GitHub
parent 28b5ea3c09
commit 98761c2a79
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 16 additions and 83 deletions

49
Cargo.lock generated
View File

@ -869,12 +869,6 @@ dependencies = [
"syn 1.0.109",
]
[[package]]
name = "diff"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
[[package]]
name = "digest"
version = "0.10.7"
@ -1219,21 +1213,6 @@ version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "goldie"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd1d4b95ae93c6d91591a2998aa7363113e51130ede293b3c92ac89b63e13914"
dependencies = [
"anyhow",
"once_cell",
"pretty_assertions",
"serde",
"serde_json",
"upon",
"yansi",
]
[[package]]
name = "h2"
version = "0.3.19"
@ -2335,16 +2314,6 @@ version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
[[package]]
name = "pretty_assertions"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66"
dependencies = [
"diff",
"yansi",
]
[[package]]
name = "prettyplease"
version = "0.1.25"
@ -3146,7 +3115,6 @@ dependencies = [
"clap",
"ctranslate2-bindings",
"futures",
"goldie",
"http-api-bindings",
"hyper",
"lazy_static",
@ -4093,17 +4061,6 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
[[package]]
name = "upon"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b77ce40602cb1a7dfcdd6336f6d8baa2803c898aafbc0d46156b59727f2e7135"
dependencies = [
"serde",
"unicode-ident",
"unicode-width",
]
[[package]]
name = "url"
version = "2.3.1"
@ -4592,12 +4549,6 @@ dependencies = [
"libc",
]
[[package]]
name = "yansi"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"
[[package]]
name = "zip"
version = "0.6.6"

View File

@ -1,8 +1,9 @@
smoke:
k6 run tests/*.smoke.js
loadtest:
ifdef TABBY_API_HOST
k6 run tests/*.loadtest.js
else
$(error TABBY_API_HOST is undefined)
endif
fix:
cargo clippy --fix --allow-dirty --allow-staged && cargo +nightly fmt

View File

@ -1,23 +1,28 @@
import http from "k6/http";
import { check, group, sleep } from "k6";
import { check, group, sleep, abortTest } from "k6";
export const options = {
stages: [
{ duration: "5s", target: 10 }, // simulate ramp-up of traffic from 1 to 10 users over 30s.
{ duration: "30s", target: 10 }, // stay at 10 users for 10 minutes
{ duration: "5s", target: 0 }, // ramp-down to 0 users
{ duration: "5s", target: 8 },
{ duration: "20s", target: 8 },
{ duration: "5s", target: 0 },
],
hosts: {
"api.tabbyml.com": __ENV.TABBY_API_HOST || "localhost:5000",
"api.tabbyml.com": __ENV.TABBY_API_HOST,
},
// Below thresholds are tested against TabbyML/StarCoder-1B served by NVIDIA T4 GPU.
thresholds: {
http_req_duration: ["p(99)<1000"], // 99% of requests must complete below 1000ms
http_req_failed: ['rate<0.01'], // http errors should be less than 1%
http_req_duration: ["med<400", "avg<1500", "p(95)<3000"],
},
};
export default () => {
const payload = JSON.stringify({
prompt: "def binarySearch(arr, left, right, x):\n mid = (left +",
language: "python",
segments: {
prefix: "def binarySearch(arr, left, right, x):\n mid = (left +"
},
});
const headers = { "Content-Type": "application/json" };
const res = http.post("http://api.tabbyml.com/v1/completions", payload, {

View File

@ -1,24 +0,0 @@
import http from "k6/http";
import { check, sleep } from "k6";
export const options = {
stages: [
{duration: '3s', target: 5},
],
hosts: {
'api.tabbyml.com': __ENV.TABBY_API_HOST || "localhost:5000"
},
};
const SLEEP_DURATION = 1;
export default function () {
const payload = JSON.stringify({
prompt: "def binarySearch(arr, left, right, x):\n mid = (left +",
});
const headers = { "Content-Type": "application/json" };
const res = http.post("http://api.tabbyml.com/v1/completions", payload, {
headers,
});
check(res, { success: (r) => r.status === 200 });
sleep(SLEEP_DURATION)
}