chore: release 0.6.0 (#882)

* add loadtest

* release 0.6.0

* Release 0.6.0-rc.0

http-api-bindings@0.6.0-rc.0
juniper-axum@0.6.0-rc.0
llama-cpp-bindings@0.6.0-rc.0
tabby@0.6.0-rc.0
tabby-common@0.6.0-rc.0
tabby-download@0.6.0-rc.0
tabby-inference@0.6.0-rc.0
tabby-scheduler@0.6.0-rc.0
tabby-webserver@0.6.0-rc.0

Generated by cargo-workspaces

* Release 0.6.0-rc.1

http-api-bindings@0.6.0-rc.1
juniper-axum@0.6.0-rc.1
llama-cpp-bindings@0.6.0-rc.1
tabby@0.6.0-rc.1
tabby-common@0.6.0-rc.1
tabby-download@0.6.0-rc.1
tabby-inference@0.6.0-rc.1
tabby-scheduler@0.6.0-rc.1
tabby-webserver@0.6.0-rc.1

Generated by cargo-workspaces

* Release 0.6.0-rc.2

http-api-bindings@0.6.0-rc.2
juniper-axum@0.6.0-rc.2
llama-cpp-bindings@0.6.0-rc.2
tabby@0.6.0-rc.2
tabby-common@0.6.0-rc.2
tabby-download@0.6.0-rc.2
tabby-inference@0.6.0-rc.2
tabby-scheduler@0.6.0-rc.2
tabby-webserver@0.6.0-rc.2

Generated by cargo-workspaces

* Release 0.6.0

http-api-bindings@0.6.0
juniper-axum@0.6.0
llama-cpp-bindings@0.6.0
tabby@0.6.0
tabby-common@0.6.0
tabby-download@0.6.0
tabby-inference@0.6.0
tabby-scheduler@0.6.0
tabby-webserver@0.6.0

Generated by cargo-workspaces
add-prompt-lookup
Meng Zhang 2023-11-27 14:57:45 +08:00 committed by GitHub
parent 7d8bc3d488
commit b1481b0e2e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 30 additions and 25 deletions

View File

@ -1,4 +1,10 @@
# v0.6.0 [Unreleased] # v0.7.0 [Unreleased]
## Features
## Fixes and Improvements
# v0.6.0
## Features ## Features

18
Cargo.lock generated
View File

@ -1951,7 +1951,7 @@ dependencies = [
[[package]] [[package]]
name = "http-api-bindings" name = "http-api-bindings"
version = "0.6.0-dev" version = "0.6.0"
dependencies = [ dependencies = [
"async-trait", "async-trait",
"futures", "futures",
@ -2254,7 +2254,7 @@ dependencies = [
[[package]] [[package]]
name = "juniper-axum" name = "juniper-axum"
version = "0.6.0-dev" version = "0.6.0"
dependencies = [ dependencies = [
"axum", "axum",
"juniper", "juniper",
@ -2410,7 +2410,7 @@ checksum = "da2479e8c062e40bf0066ffa0bc823de0a9368974af99c9f6df941d2c231e03f"
[[package]] [[package]]
name = "llama-cpp-bindings" name = "llama-cpp-bindings"
version = "0.6.0-dev" version = "0.6.0"
dependencies = [ dependencies = [
"async-stream", "async-stream",
"async-trait", "async-trait",
@ -4554,7 +4554,7 @@ dependencies = [
[[package]] [[package]]
name = "tabby" name = "tabby"
version = "0.6.0-dev" version = "0.6.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"assert-json-diff", "assert-json-diff",
@ -4604,7 +4604,7 @@ dependencies = [
[[package]] [[package]]
name = "tabby-common" name = "tabby-common"
version = "0.6.0-dev" version = "0.6.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"async-trait", "async-trait",
@ -4624,7 +4624,7 @@ dependencies = [
[[package]] [[package]]
name = "tabby-download" name = "tabby-download"
version = "0.6.0-dev" version = "0.6.0"
dependencies = [ dependencies = [
"aim", "aim",
"anyhow", "anyhow",
@ -4636,7 +4636,7 @@ dependencies = [
[[package]] [[package]]
name = "tabby-inference" name = "tabby-inference"
version = "0.6.0-dev" version = "0.6.0"
dependencies = [ dependencies = [
"async-stream", "async-stream",
"async-trait", "async-trait",
@ -4649,7 +4649,7 @@ dependencies = [
[[package]] [[package]]
name = "tabby-scheduler" name = "tabby-scheduler"
version = "0.6.0-dev" version = "0.6.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"cargo-lock", "cargo-lock",
@ -4680,7 +4680,7 @@ dependencies = [
[[package]] [[package]]
name = "tabby-webserver" name = "tabby-webserver"
version = "0.6.0-dev" version = "0.6.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"async-trait", "async-trait",

View File

@ -13,7 +13,7 @@ members = [
] ]
[workspace.package] [workspace.package]
version = "0.6.0-dev" version = "0.6.0"
edition = "2021" edition = "2021"
authors = ["Meng Zhang"] authors = ["Meng Zhang"]
homepage = "https://github.com/TabbyML/tabby" homepage = "https://github.com/TabbyML/tabby"

View File

@ -1,6 +1,6 @@
[package] [package]
name = "http-api-bindings" name = "http-api-bindings"
version = "0.6.0-dev" version = "0.6.0"
edition = "2021" edition = "2021"
[dependencies] [dependencies]

View File

@ -1,6 +1,6 @@
[package] [package]
name = "llama-cpp-bindings" name = "llama-cpp-bindings"
version = "0.6.0-dev" version = "0.6.0"
edition = "2021" edition = "2021"
[features] [features]

View File

@ -1,6 +1,6 @@
[package] [package]
name = "tabby-common" name = "tabby-common"
version = "0.6.0-dev" version = "0.6.0"
edition = "2021" edition = "2021"
[dependencies] [dependencies]

View File

@ -1,6 +1,6 @@
[package] [package]
name = "tabby-download" name = "tabby-download"
version = "0.6.0-dev" version = "0.6.0"
edition = "2021" edition = "2021"
[dependencies] [dependencies]

View File

@ -1,6 +1,6 @@
[package] [package]
name = "tabby-inference" name = "tabby-inference"
version = "0.6.0-dev" version = "0.6.0"
edition = "2021" edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@ -1,6 +1,6 @@
[package] [package]
name = "tabby-scheduler" name = "tabby-scheduler"
version = "0.6.0-dev" version = "0.6.0"
edition = "2021" edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@ -1,6 +1,6 @@
[package] [package]
name = "tabby" name = "tabby"
version = "0.6.0-dev" version = "0.6.0"
edition = "2021" edition = "2021"
[features] [features]

View File

@ -7,13 +7,10 @@ export const options = {
{ duration: "20s", target: 8 }, { duration: "20s", target: 8 },
{ duration: "5s", target: 0 }, { duration: "5s", target: 0 },
], ],
hosts: {
"api.tabbyml.com": __ENV.TABBY_API_HOST,
},
// Below thresholds are tested against TabbyML/StarCoder-1B served by NVIDIA T4 GPU. // Below thresholds are tested against TabbyML/StarCoder-1B served by NVIDIA T4 GPU.
thresholds: { thresholds: {
http_req_failed: ['rate<0.01'], // http errors should be less than 1% http_req_failed: ['rate<0.01'], // http errors should be less than 1%
http_req_duration: ["med<2500", "avg<2200", "p(95)<2500"], http_req_duration: ["med<1800", "avg<1800", "p(95)<2000"],
}, },
}; };
@ -25,7 +22,7 @@ export default () => {
}, },
}); });
const headers = { "Content-Type": "application/json" }; const headers = { "Content-Type": "application/json" };
const res = http.post("http://api.tabbyml.com/v1/completions", payload, { const res = http.post(`${__ENV.TABBY_API_HOST}/v1/completions`, payload, {
headers, headers,
}); });
check(res, { success: (r) => r.status === 200 }); check(res, { success: (r) => r.status === 200 });

View File

@ -4,7 +4,7 @@ modal serve app.py
from modal import Image, Stub, asgi_app, gpu from modal import Image, Stub, asgi_app, gpu
IMAGE_NAME = "tabbyml/tabby:0.5.5" IMAGE_NAME = "tabbyml/tabby:0.6.0"
MODEL_ID = "TabbyML/StarCoder-1B" MODEL_ID = "TabbyML/StarCoder-1B"
GPU_CONFIG = gpu.T4() GPU_CONFIG = gpu.T4()
@ -58,6 +58,8 @@ def app():
"8000", "8000",
"--device", "--device",
"cuda", "cuda",
"--parallelism",
"4",
] ]
) )

View File

@ -13,7 +13,7 @@ from modal import Image, Stub, asgi_app, gpu
Next, we set the base docker image version, which model to serve, taking care to specify the GPU configuration required to fit the model into VRAM. Next, we set the base docker image version, which model to serve, taking care to specify the GPU configuration required to fit the model into VRAM.
```python ```python
IMAGE_NAME = "tabbyml/tabby:0.5.5" IMAGE_NAME = "tabbyml/tabby:0.6.0"
MODEL_ID = "TabbyML/StarCoder-1B" MODEL_ID = "TabbyML/StarCoder-1B"
GPU_CONFIG = gpu.T4() GPU_CONFIG = gpu.T4()
``` ```