chore: release 0.6.0 (#882)
* add loadtest * release 0.6.0 * Release 0.6.0-rc.0 http-api-bindings@0.6.0-rc.0 juniper-axum@0.6.0-rc.0 llama-cpp-bindings@0.6.0-rc.0 tabby@0.6.0-rc.0 tabby-common@0.6.0-rc.0 tabby-download@0.6.0-rc.0 tabby-inference@0.6.0-rc.0 tabby-scheduler@0.6.0-rc.0 tabby-webserver@0.6.0-rc.0 Generated by cargo-workspaces * Release 0.6.0-rc.1 http-api-bindings@0.6.0-rc.1 juniper-axum@0.6.0-rc.1 llama-cpp-bindings@0.6.0-rc.1 tabby@0.6.0-rc.1 tabby-common@0.6.0-rc.1 tabby-download@0.6.0-rc.1 tabby-inference@0.6.0-rc.1 tabby-scheduler@0.6.0-rc.1 tabby-webserver@0.6.0-rc.1 Generated by cargo-workspaces * Release 0.6.0-rc.2 http-api-bindings@0.6.0-rc.2 juniper-axum@0.6.0-rc.2 llama-cpp-bindings@0.6.0-rc.2 tabby@0.6.0-rc.2 tabby-common@0.6.0-rc.2 tabby-download@0.6.0-rc.2 tabby-inference@0.6.0-rc.2 tabby-scheduler@0.6.0-rc.2 tabby-webserver@0.6.0-rc.2 Generated by cargo-workspaces * Release 0.6.0 http-api-bindings@0.6.0 juniper-axum@0.6.0 llama-cpp-bindings@0.6.0 tabby@0.6.0 tabby-common@0.6.0 tabby-download@0.6.0 tabby-inference@0.6.0 tabby-scheduler@0.6.0 tabby-webserver@0.6.0 Generated by cargo-workspacesadd-prompt-lookup
parent
7d8bc3d488
commit
b1481b0e2e
|
|
@ -1,4 +1,10 @@
|
||||||
# v0.6.0 [Unreleased]
|
# v0.7.0 [Unreleased]
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
## Fixes and Improvements
|
||||||
|
|
||||||
|
# v0.6.0
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1951,7 +1951,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "http-api-bindings"
|
name = "http-api-bindings"
|
||||||
version = "0.6.0-dev"
|
version = "0.6.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-trait",
|
"async-trait",
|
||||||
"futures",
|
"futures",
|
||||||
|
|
@ -2254,7 +2254,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "juniper-axum"
|
name = "juniper-axum"
|
||||||
version = "0.6.0-dev"
|
version = "0.6.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"axum",
|
"axum",
|
||||||
"juniper",
|
"juniper",
|
||||||
|
|
@ -2410,7 +2410,7 @@ checksum = "da2479e8c062e40bf0066ffa0bc823de0a9368974af99c9f6df941d2c231e03f"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "llama-cpp-bindings"
|
name = "llama-cpp-bindings"
|
||||||
version = "0.6.0-dev"
|
version = "0.6.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-stream",
|
"async-stream",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
|
|
@ -4554,7 +4554,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tabby"
|
name = "tabby"
|
||||||
version = "0.6.0-dev"
|
version = "0.6.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"assert-json-diff",
|
"assert-json-diff",
|
||||||
|
|
@ -4604,7 +4604,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tabby-common"
|
name = "tabby-common"
|
||||||
version = "0.6.0-dev"
|
version = "0.6.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
|
|
@ -4624,7 +4624,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tabby-download"
|
name = "tabby-download"
|
||||||
version = "0.6.0-dev"
|
version = "0.6.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aim",
|
"aim",
|
||||||
"anyhow",
|
"anyhow",
|
||||||
|
|
@ -4636,7 +4636,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tabby-inference"
|
name = "tabby-inference"
|
||||||
version = "0.6.0-dev"
|
version = "0.6.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-stream",
|
"async-stream",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
|
|
@ -4649,7 +4649,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tabby-scheduler"
|
name = "tabby-scheduler"
|
||||||
version = "0.6.0-dev"
|
version = "0.6.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"cargo-lock",
|
"cargo-lock",
|
||||||
|
|
@ -4680,7 +4680,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tabby-webserver"
|
name = "tabby-webserver"
|
||||||
version = "0.6.0-dev"
|
version = "0.6.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ members = [
|
||||||
]
|
]
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
version = "0.6.0-dev"
|
version = "0.6.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
authors = ["Meng Zhang"]
|
authors = ["Meng Zhang"]
|
||||||
homepage = "https://github.com/TabbyML/tabby"
|
homepage = "https://github.com/TabbyML/tabby"
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "http-api-bindings"
|
name = "http-api-bindings"
|
||||||
version = "0.6.0-dev"
|
version = "0.6.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "llama-cpp-bindings"
|
name = "llama-cpp-bindings"
|
||||||
version = "0.6.0-dev"
|
version = "0.6.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "tabby-common"
|
name = "tabby-common"
|
||||||
version = "0.6.0-dev"
|
version = "0.6.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "tabby-download"
|
name = "tabby-download"
|
||||||
version = "0.6.0-dev"
|
version = "0.6.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "tabby-inference"
|
name = "tabby-inference"
|
||||||
version = "0.6.0-dev"
|
version = "0.6.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "tabby-scheduler"
|
name = "tabby-scheduler"
|
||||||
version = "0.6.0-dev"
|
version = "0.6.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "tabby"
|
name = "tabby"
|
||||||
version = "0.6.0-dev"
|
version = "0.6.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
|
|
|
||||||
|
|
@ -7,13 +7,10 @@ export const options = {
|
||||||
{ duration: "20s", target: 8 },
|
{ duration: "20s", target: 8 },
|
||||||
{ duration: "5s", target: 0 },
|
{ duration: "5s", target: 0 },
|
||||||
],
|
],
|
||||||
hosts: {
|
|
||||||
"api.tabbyml.com": __ENV.TABBY_API_HOST,
|
|
||||||
},
|
|
||||||
// Below thresholds are tested against TabbyML/StarCoder-1B served by NVIDIA T4 GPU.
|
// Below thresholds are tested against TabbyML/StarCoder-1B served by NVIDIA T4 GPU.
|
||||||
thresholds: {
|
thresholds: {
|
||||||
http_req_failed: ['rate<0.01'], // http errors should be less than 1%
|
http_req_failed: ['rate<0.01'], // http errors should be less than 1%
|
||||||
http_req_duration: ["med<2500", "avg<2200", "p(95)<2500"],
|
http_req_duration: ["med<1800", "avg<1800", "p(95)<2000"],
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -25,7 +22,7 @@ export default () => {
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
const headers = { "Content-Type": "application/json" };
|
const headers = { "Content-Type": "application/json" };
|
||||||
const res = http.post("http://api.tabbyml.com/v1/completions", payload, {
|
const res = http.post(`${__ENV.TABBY_API_HOST}/v1/completions`, payload, {
|
||||||
headers,
|
headers,
|
||||||
});
|
});
|
||||||
check(res, { success: (r) => r.status === 200 });
|
check(res, { success: (r) => r.status === 200 });
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ modal serve app.py
|
||||||
|
|
||||||
from modal import Image, Stub, asgi_app, gpu
|
from modal import Image, Stub, asgi_app, gpu
|
||||||
|
|
||||||
IMAGE_NAME = "tabbyml/tabby:0.5.5"
|
IMAGE_NAME = "tabbyml/tabby:0.6.0"
|
||||||
MODEL_ID = "TabbyML/StarCoder-1B"
|
MODEL_ID = "TabbyML/StarCoder-1B"
|
||||||
GPU_CONFIG = gpu.T4()
|
GPU_CONFIG = gpu.T4()
|
||||||
|
|
||||||
|
|
@ -58,6 +58,8 @@ def app():
|
||||||
"8000",
|
"8000",
|
||||||
"--device",
|
"--device",
|
||||||
"cuda",
|
"cuda",
|
||||||
|
"--parallelism",
|
||||||
|
"4",
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ from modal import Image, Stub, asgi_app, gpu
|
||||||
Next, we set the base docker image version, which model to serve, taking care to specify the GPU configuration required to fit the model into VRAM.
|
Next, we set the base docker image version, which model to serve, taking care to specify the GPU configuration required to fit the model into VRAM.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
IMAGE_NAME = "tabbyml/tabby:0.5.5"
|
IMAGE_NAME = "tabbyml/tabby:0.6.0"
|
||||||
MODEL_ID = "TabbyML/StarCoder-1B"
|
MODEL_ID = "TabbyML/StarCoder-1B"
|
||||||
GPU_CONFIG = gpu.T4()
|
GPU_CONFIG = gpu.T4()
|
||||||
```
|
```
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue