diff --git a/Cargo.lock b/Cargo.lock index 6cffadc..44855c5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -869,12 +869,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "diff" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" - [[package]] name = "digest" version = "0.10.7" @@ -1219,21 +1213,6 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" -[[package]] -name = "goldie" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd1d4b95ae93c6d91591a2998aa7363113e51130ede293b3c92ac89b63e13914" -dependencies = [ - "anyhow", - "once_cell", - "pretty_assertions", - "serde", - "serde_json", - "upon", - "yansi", -] - [[package]] name = "h2" version = "0.3.19" @@ -2335,16 +2314,6 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" -[[package]] -name = "pretty_assertions" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66" -dependencies = [ - "diff", - "yansi", -] - [[package]] name = "prettyplease" version = "0.1.25" @@ -3146,7 +3115,6 @@ dependencies = [ "clap", "ctranslate2-bindings", "futures", - "goldie", "http-api-bindings", "hyper", "lazy_static", @@ -4093,17 +4061,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" -[[package]] -name = "upon" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b77ce40602cb1a7dfcdd6336f6d8baa2803c898aafbc0d46156b59727f2e7135" -dependencies = [ - "serde", - "unicode-ident", - "unicode-width", -] - [[package]] name = "url" version = "2.3.1" @@ -4592,12 +4549,6 @@ dependencies = [ "libc", ] -[[package]] -name = "yansi" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" - [[package]] name = "zip" version = "0.6.6" diff --git a/Makefile b/Makefile index 25e7ca1..9aa672c 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,9 @@ -smoke: - k6 run tests/*.smoke.js - loadtest: +ifdef TABBY_API_HOST k6 run tests/*.loadtest.js +else + $(error TABBY_API_HOST is undefined) +endif fix: cargo clippy --fix --allow-dirty --allow-staged && cargo +nightly fmt diff --git a/tests/default.loadtest.js b/tests/default.loadtest.js index 3280376..f961632 100644 --- a/tests/default.loadtest.js +++ b/tests/default.loadtest.js @@ -1,23 +1,28 @@ import http from "k6/http"; -import { check, group, sleep } from "k6"; +import { check, group, sleep, abortTest } from "k6"; export const options = { stages: [ - { duration: "5s", target: 10 }, // simulate ramp-up of traffic from 1 to 10 users over 30s. - { duration: "30s", target: 10 }, // stay at 10 users for 10 minutes - { duration: "5s", target: 0 }, // ramp-down to 0 users + { duration: "5s", target: 8 }, + { duration: "20s", target: 8 }, + { duration: "5s", target: 0 }, ], hosts: { - "api.tabbyml.com": __ENV.TABBY_API_HOST || "localhost:5000", + "api.tabbyml.com": __ENV.TABBY_API_HOST, }, + // Below thresholds are tested against TabbyML/StarCoder-1B served by NVIDIA T4 GPU. thresholds: { - http_req_duration: ["p(99)<1000"], // 99% of requests must complete below 1000ms + http_req_failed: ['rate<0.01'], // http errors should be less than 1% + http_req_duration: ["med<400", "avg<1500", "p(95)<3000"], }, }; export default () => { const payload = JSON.stringify({ - prompt: "def binarySearch(arr, left, right, x):\n mid = (left +", + language: "python", + segments: { + prefix: "def binarySearch(arr, left, right, x):\n mid = (left +" + }, }); const headers = { "Content-Type": "application/json" }; const res = http.post("http://api.tabbyml.com/v1/completions", payload, { diff --git a/tests/default.smoke.js b/tests/default.smoke.js deleted file mode 100644 index 30697d9..0000000 --- a/tests/default.smoke.js +++ /dev/null @@ -1,24 +0,0 @@ -import http from "k6/http"; -import { check, sleep } from "k6"; - -export const options = { - stages: [ - {duration: '3s', target: 5}, - ], - hosts: { - 'api.tabbyml.com': __ENV.TABBY_API_HOST || "localhost:5000" - }, -}; -const SLEEP_DURATION = 1; - -export default function () { - const payload = JSON.stringify({ - prompt: "def binarySearch(arr, left, right, x):\n mid = (left +", - }); - const headers = { "Content-Type": "application/json" }; - const res = http.post("http://api.tabbyml.com/v1/completions", payload, { - headers, - }); - check(res, { success: (r) => r.status === 200 }); - sleep(SLEEP_DURATION) -}