chore: release 0.6.0 (#882)

* add loadtest

* release 0.6.0

* Release 0.6.0-rc.0

http-api-bindings@0.6.0-rc.0
juniper-axum@0.6.0-rc.0
llama-cpp-bindings@0.6.0-rc.0
tabby@0.6.0-rc.0
tabby-common@0.6.0-rc.0
tabby-download@0.6.0-rc.0
tabby-inference@0.6.0-rc.0
tabby-scheduler@0.6.0-rc.0
tabby-webserver@0.6.0-rc.0

Generated by cargo-workspaces

* Release 0.6.0-rc.1

http-api-bindings@0.6.0-rc.1
juniper-axum@0.6.0-rc.1
llama-cpp-bindings@0.6.0-rc.1
tabby@0.6.0-rc.1
tabby-common@0.6.0-rc.1
tabby-download@0.6.0-rc.1
tabby-inference@0.6.0-rc.1
tabby-scheduler@0.6.0-rc.1
tabby-webserver@0.6.0-rc.1

Generated by cargo-workspaces

* Release 0.6.0-rc.2

http-api-bindings@0.6.0-rc.2
juniper-axum@0.6.0-rc.2
llama-cpp-bindings@0.6.0-rc.2
tabby@0.6.0-rc.2
tabby-common@0.6.0-rc.2
tabby-download@0.6.0-rc.2
tabby-inference@0.6.0-rc.2
tabby-scheduler@0.6.0-rc.2
tabby-webserver@0.6.0-rc.2

Generated by cargo-workspaces

* Release 0.6.0

http-api-bindings@0.6.0
juniper-axum@0.6.0
llama-cpp-bindings@0.6.0
tabby@0.6.0
tabby-common@0.6.0
tabby-download@0.6.0
tabby-inference@0.6.0
tabby-scheduler@0.6.0
tabby-webserver@0.6.0

Generated by cargo-workspaces
add-prompt-lookup
Meng Zhang 2023-11-27 14:57:45 +08:00 committed by GitHub
parent 7d8bc3d488
commit b1481b0e2e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 30 additions and 25 deletions

View File

@ -1,4 +1,10 @@
# v0.6.0 [Unreleased]
# v0.7.0 [Unreleased]
## Features
## Fixes and Improvements
# v0.6.0
## Features

18
Cargo.lock generated
View File

@ -1951,7 +1951,7 @@ dependencies = [
[[package]]
name = "http-api-bindings"
version = "0.6.0-dev"
version = "0.6.0"
dependencies = [
"async-trait",
"futures",
@ -2254,7 +2254,7 @@ dependencies = [
[[package]]
name = "juniper-axum"
version = "0.6.0-dev"
version = "0.6.0"
dependencies = [
"axum",
"juniper",
@ -2410,7 +2410,7 @@ checksum = "da2479e8c062e40bf0066ffa0bc823de0a9368974af99c9f6df941d2c231e03f"
[[package]]
name = "llama-cpp-bindings"
version = "0.6.0-dev"
version = "0.6.0"
dependencies = [
"async-stream",
"async-trait",
@ -4554,7 +4554,7 @@ dependencies = [
[[package]]
name = "tabby"
version = "0.6.0-dev"
version = "0.6.0"
dependencies = [
"anyhow",
"assert-json-diff",
@ -4604,7 +4604,7 @@ dependencies = [
[[package]]
name = "tabby-common"
version = "0.6.0-dev"
version = "0.6.0"
dependencies = [
"anyhow",
"async-trait",
@ -4624,7 +4624,7 @@ dependencies = [
[[package]]
name = "tabby-download"
version = "0.6.0-dev"
version = "0.6.0"
dependencies = [
"aim",
"anyhow",
@ -4636,7 +4636,7 @@ dependencies = [
[[package]]
name = "tabby-inference"
version = "0.6.0-dev"
version = "0.6.0"
dependencies = [
"async-stream",
"async-trait",
@ -4649,7 +4649,7 @@ dependencies = [
[[package]]
name = "tabby-scheduler"
version = "0.6.0-dev"
version = "0.6.0"
dependencies = [
"anyhow",
"cargo-lock",
@ -4680,7 +4680,7 @@ dependencies = [
[[package]]
name = "tabby-webserver"
version = "0.6.0-dev"
version = "0.6.0"
dependencies = [
"anyhow",
"async-trait",

View File

@ -13,7 +13,7 @@ members = [
]
[workspace.package]
version = "0.6.0-dev"
version = "0.6.0"
edition = "2021"
authors = ["Meng Zhang"]
homepage = "https://github.com/TabbyML/tabby"

View File

@ -1,6 +1,6 @@
[package]
name = "http-api-bindings"
version = "0.6.0-dev"
version = "0.6.0"
edition = "2021"
[dependencies]

View File

@ -1,6 +1,6 @@
[package]
name = "llama-cpp-bindings"
version = "0.6.0-dev"
version = "0.6.0"
edition = "2021"
[features]

View File

@ -1,6 +1,6 @@
[package]
name = "tabby-common"
version = "0.6.0-dev"
version = "0.6.0"
edition = "2021"
[dependencies]

View File

@ -1,6 +1,6 @@
[package]
name = "tabby-download"
version = "0.6.0-dev"
version = "0.6.0"
edition = "2021"
[dependencies]

View File

@ -1,6 +1,6 @@
[package]
name = "tabby-inference"
version = "0.6.0-dev"
version = "0.6.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@ -1,6 +1,6 @@
[package]
name = "tabby-scheduler"
version = "0.6.0-dev"
version = "0.6.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@ -1,6 +1,6 @@
[package]
name = "tabby"
version = "0.6.0-dev"
version = "0.6.0"
edition = "2021"
[features]

View File

@ -7,13 +7,10 @@ export const options = {
{ duration: "20s", target: 8 },
{ duration: "5s", target: 0 },
],
hosts: {
"api.tabbyml.com": __ENV.TABBY_API_HOST,
},
// Below thresholds are tested against TabbyML/StarCoder-1B served by NVIDIA T4 GPU.
thresholds: {
http_req_failed: ['rate<0.01'], // http errors should be less than 1%
http_req_duration: ["med<2500", "avg<2200", "p(95)<2500"],
http_req_duration: ["med<1800", "avg<1800", "p(95)<2000"],
},
};
@ -25,7 +22,7 @@ export default () => {
},
});
const headers = { "Content-Type": "application/json" };
const res = http.post("http://api.tabbyml.com/v1/completions", payload, {
const res = http.post(`${__ENV.TABBY_API_HOST}/v1/completions`, payload, {
headers,
});
check(res, { success: (r) => r.status === 200 });

View File

@ -4,7 +4,7 @@ modal serve app.py
from modal import Image, Stub, asgi_app, gpu
IMAGE_NAME = "tabbyml/tabby:0.5.5"
IMAGE_NAME = "tabbyml/tabby:0.6.0"
MODEL_ID = "TabbyML/StarCoder-1B"
GPU_CONFIG = gpu.T4()
@ -58,6 +58,8 @@ def app():
"8000",
"--device",
"cuda",
"--parallelism",
"4",
]
)

View File

@ -13,7 +13,7 @@ from modal import Image, Stub, asgi_app, gpu
Next, we set the base docker image version, which model to serve, taking care to specify the GPU configuration required to fit the model into VRAM.
```python
IMAGE_NAME = "tabbyml/tabby:0.5.5"
IMAGE_NAME = "tabbyml/tabby:0.6.0"
MODEL_ID = "TabbyML/StarCoder-1B"
GPU_CONFIG = gpu.T4()
```