feat: add Prometheus support to Tabby. (#838)
* Added Prometheus support to Tabby. 1) Added `axum-prometheus` to Cargo.toml 2) Added `metrics.rs` as a simple route to produce a metrics endpoint with prometheus-formatted events 3) Added `/v1/metrics` endpoint with API doc entry 4) Added the `PrometheusLayer` to the root layers. This change effectively allows for external Prometheus to scrape metrics from Tabby during execution in order to monitor operations (resources, timings and overall usage). * changed application/text to text/plain to adhere to rfc1341 * Update Makefile * Update Makefile * Update Makefile * Update Makefile * Update Makefile * Update Makefile * - Reworked code to fit upstream changes - Added Prometheus layer and handle to worker.rs * Update CHANGELOG.md * [autofix.ci] apply automated fixes * Simplified worker metrics route definition * [autofix.ci] apply automated fixes * [autofix.ci] apply automated fixes (attempt 2/3) --------- Co-authored-by: Meng Zhang <meng@tabbyml.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>release-fix-intellij-update-support-version-range
parent
7dbbfc39c2
commit
6dabecc3ee
|
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
* Add distribution support (running completion / chat model on different process / machine).
|
* Add distribution support (running completion / chat model on different process / machine).
|
||||||
* Add conversation history in chat playground.
|
* Add conversation history in chat playground.
|
||||||
|
* Add `/v1/metrics` endpoint for prometheus metrics collection.
|
||||||
|
|
||||||
## Fixes and Improvements
|
## Fixes and Improvements
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -486,6 +486,29 @@ dependencies = [
|
||||||
"tower-service",
|
"tower-service",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "axum-prometheus"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "97def327c5481791abb57ac295bfc70f2e1a0727675b7dbf74bd1b27a72b6fd8"
|
||||||
|
dependencies = [
|
||||||
|
"axum",
|
||||||
|
"axum-core",
|
||||||
|
"bytes",
|
||||||
|
"futures",
|
||||||
|
"futures-core",
|
||||||
|
"http",
|
||||||
|
"http-body",
|
||||||
|
"matchit",
|
||||||
|
"metrics",
|
||||||
|
"metrics-exporter-prometheus",
|
||||||
|
"once_cell",
|
||||||
|
"pin-project",
|
||||||
|
"tokio",
|
||||||
|
"tower",
|
||||||
|
"tower-http 0.4.0",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "axum-streams"
|
name = "axum-streams"
|
||||||
version = "0.9.1"
|
version = "0.9.1"
|
||||||
|
|
@ -1789,6 +1812,15 @@ dependencies = [
|
||||||
"ahash 0.7.7",
|
"ahash 0.7.7",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hashbrown"
|
||||||
|
version = "0.13.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "33ff8ae62cd3a9102e5637afc8452c55acf3844001bd5374e0b0bd7b6616c038"
|
||||||
|
dependencies = [
|
||||||
|
"ahash 0.8.3",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hashbrown"
|
name = "hashbrown"
|
||||||
version = "0.14.0"
|
version = "0.14.0"
|
||||||
|
|
@ -2433,6 +2465,15 @@ version = "0.11.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3ea9b256699eda7b0387ffbc776dd625e28bde3918446381781245b7a50349d8"
|
checksum = "3ea9b256699eda7b0387ffbc776dd625e28bde3918446381781245b7a50349d8"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mach2"
|
||||||
|
version = "0.4.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6d0d1830bcd151a6fc4aea1369af235b36c1528fe976b8ff678683c9995eade8"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "matchers"
|
name = "matchers"
|
||||||
version = "0.0.1"
|
version = "0.0.1"
|
||||||
|
|
@ -2534,6 +2575,61 @@ dependencies = [
|
||||||
"autocfg",
|
"autocfg",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "metrics"
|
||||||
|
version = "0.21.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fde3af1a009ed76a778cb84fdef9e7dbbdf5775ae3e4cc1f434a6a307f6f76c5"
|
||||||
|
dependencies = [
|
||||||
|
"ahash 0.8.3",
|
||||||
|
"metrics-macros",
|
||||||
|
"portable-atomic",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "metrics-exporter-prometheus"
|
||||||
|
version = "0.12.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8a4964177ddfdab1e3a2b37aec7cf320e14169abb0ed73999f558136409178d5"
|
||||||
|
dependencies = [
|
||||||
|
"base64 0.21.2",
|
||||||
|
"hyper",
|
||||||
|
"indexmap 1.9.3",
|
||||||
|
"ipnet",
|
||||||
|
"metrics",
|
||||||
|
"metrics-util",
|
||||||
|
"quanta",
|
||||||
|
"thiserror",
|
||||||
|
"tokio",
|
||||||
|
"tracing",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "metrics-macros"
|
||||||
|
version = "0.7.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ddece26afd34c31585c74a4db0630c376df271c285d682d1e55012197830b6df"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn 2.0.28",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "metrics-util"
|
||||||
|
version = "0.15.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4de2ed6e491ed114b40b732e4d1659a9d53992ebd87490c44a6ffe23739d973e"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-epoch",
|
||||||
|
"crossbeam-utils",
|
||||||
|
"hashbrown 0.13.1",
|
||||||
|
"metrics",
|
||||||
|
"num_cpus",
|
||||||
|
"quanta",
|
||||||
|
"sketches-ddsketch",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mime"
|
name = "mime"
|
||||||
version = "0.3.17"
|
version = "0.3.17"
|
||||||
|
|
@ -3293,6 +3389,22 @@ dependencies = [
|
||||||
"checked_int_cast",
|
"checked_int_cast",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quanta"
|
||||||
|
version = "0.11.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a17e662a7a8291a865152364c20c7abc5e60486ab2001e8ec10b24862de0b9ab"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-utils",
|
||||||
|
"libc",
|
||||||
|
"mach2",
|
||||||
|
"once_cell",
|
||||||
|
"raw-cpuid",
|
||||||
|
"wasi 0.11.0+wasi-snapshot-preview1",
|
||||||
|
"web-sys",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "question"
|
name = "question"
|
||||||
version = "0.2.2"
|
version = "0.2.2"
|
||||||
|
|
@ -3398,6 +3510,15 @@ dependencies = [
|
||||||
"rand_core 0.5.1",
|
"rand_core 0.5.1",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "raw-cpuid"
|
||||||
|
version = "10.7.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags 1.3.2",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rayon"
|
name = "rayon"
|
||||||
version = "1.7.0"
|
version = "1.7.0"
|
||||||
|
|
@ -4424,6 +4545,7 @@ dependencies = [
|
||||||
"async-stream",
|
"async-stream",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
"axum",
|
"axum",
|
||||||
|
"axum-prometheus",
|
||||||
"axum-streams",
|
"axum-streams",
|
||||||
"axum-tracing-opentelemetry",
|
"axum-tracing-opentelemetry",
|
||||||
"chrono",
|
"chrono",
|
||||||
|
|
|
||||||
1
Makefile
1
Makefile
|
|
@ -26,6 +26,7 @@ bump-release-version:
|
||||||
update-openapi-doc:
|
update-openapi-doc:
|
||||||
curl http://localhost:8080/api-docs/openapi.json | jq ' \
|
curl http://localhost:8080/api-docs/openapi.json | jq ' \
|
||||||
delpaths([ \
|
delpaths([ \
|
||||||
|
["paths", "/v1/metrics"], \
|
||||||
["paths", "/v1beta/chat/completions"], \
|
["paths", "/v1beta/chat/completions"], \
|
||||||
["paths", "/v1beta/search"], \
|
["paths", "/v1beta/search"], \
|
||||||
["components", "schemas", "CompletionRequest", "properties", "prompt"], \
|
["components", "schemas", "CompletionRequest", "properties", "prompt"], \
|
||||||
|
|
|
||||||
|
|
@ -49,6 +49,7 @@ async-trait.workspace = true
|
||||||
tabby-webserver = { path = "../../ee/tabby-webserver", optional = true }
|
tabby-webserver = { path = "../../ee/tabby-webserver", optional = true }
|
||||||
thiserror.workspace = true
|
thiserror.workspace = true
|
||||||
chrono = "0.4.31"
|
chrono = "0.4.31"
|
||||||
|
axum-prometheus = "0.4.0"
|
||||||
|
|
||||||
[dependencies.uuid]
|
[dependencies.uuid]
|
||||||
version = "1.3.3"
|
version = "1.3.3"
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,16 @@
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use axum::extract::State;
|
||||||
|
use axum_prometheus::metrics_exporter_prometheus::PrometheusHandle;
|
||||||
|
|
||||||
|
#[utoipa::path(
|
||||||
|
get,
|
||||||
|
path = "/v1/metrics",
|
||||||
|
tag = "v1",
|
||||||
|
responses(
|
||||||
|
(status = 200, description = "Success", body = String, content_type = "text/plain"),
|
||||||
|
)
|
||||||
|
)]
|
||||||
|
pub async fn metrics(State(state): State<Arc<PrometheusHandle>>) -> String {
|
||||||
|
state.render()
|
||||||
|
}
|
||||||
|
|
@ -2,10 +2,12 @@ mod chat;
|
||||||
mod completions;
|
mod completions;
|
||||||
mod events;
|
mod events;
|
||||||
mod health;
|
mod health;
|
||||||
|
mod metrics;
|
||||||
mod search;
|
mod search;
|
||||||
|
|
||||||
pub use chat::*;
|
pub use chat::*;
|
||||||
pub use completions::*;
|
pub use completions::*;
|
||||||
pub use events::*;
|
pub use events::*;
|
||||||
pub use health::*;
|
pub use health::*;
|
||||||
|
pub use metrics::*;
|
||||||
pub use search::*;
|
pub use search::*;
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ use std::{
|
||||||
};
|
};
|
||||||
|
|
||||||
use axum::{routing, Router, Server};
|
use axum::{routing, Router, Server};
|
||||||
|
use axum_prometheus::{metrics_exporter_prometheus::PrometheusHandle, PrometheusMetricLayer};
|
||||||
use axum_tracing_opentelemetry::opentelemetry_tracing_layer;
|
use axum_tracing_opentelemetry::opentelemetry_tracing_layer;
|
||||||
use clap::Args;
|
use clap::Args;
|
||||||
use tabby_common::{
|
use tabby_common::{
|
||||||
|
|
@ -49,7 +50,7 @@ Install following IDE / Editor extensions to get started with [Tabby](https://gi
|
||||||
servers(
|
servers(
|
||||||
(url = "/", description = "Server"),
|
(url = "/", description = "Server"),
|
||||||
),
|
),
|
||||||
paths(routes::log_event, routes::completions, routes::completions, routes::health, routes::search),
|
paths(routes::log_event, routes::completions, routes::completions, routes::health, routes::search, routes::metrics),
|
||||||
components(schemas(
|
components(schemas(
|
||||||
api::event::LogEventRequest,
|
api::event::LogEventRequest,
|
||||||
completion::CompletionRequest,
|
completion::CompletionRequest,
|
||||||
|
|
@ -108,9 +109,11 @@ pub async fn main(config: &Config, args: &ServeArgs) {
|
||||||
|
|
||||||
let logger = Arc::new(create_logger());
|
let logger = Arc::new(create_logger());
|
||||||
let code = Arc::new(create_code_search());
|
let code = Arc::new(create_code_search());
|
||||||
|
let (prometheus_layer, prometheus_handle) = PrometheusMetricLayer::pair();
|
||||||
|
let metrics_handle = Arc::new(prometheus_handle);
|
||||||
|
|
||||||
let app = Router::new()
|
let app = Router::new()
|
||||||
.merge(api_router(args, config, logger.clone(), code.clone()).await)
|
.merge(api_router(args, config, logger.clone(), code.clone(), metrics_handle).await)
|
||||||
.merge(SwaggerUi::new("/swagger-ui").url("/api-docs/openapi.json", ApiDoc::openapi()));
|
.merge(SwaggerUi::new("/swagger-ui").url("/api-docs/openapi.json", ApiDoc::openapi()));
|
||||||
|
|
||||||
#[cfg(feature = "ee")]
|
#[cfg(feature = "ee")]
|
||||||
|
|
@ -121,7 +124,8 @@ pub async fn main(config: &Config, args: &ServeArgs) {
|
||||||
|
|
||||||
let app = app
|
let app = app
|
||||||
.layer(CorsLayer::permissive())
|
.layer(CorsLayer::permissive())
|
||||||
.layer(opentelemetry_tracing_layer());
|
.layer(opentelemetry_tracing_layer())
|
||||||
|
.layer(prometheus_layer);
|
||||||
|
|
||||||
let address = SocketAddr::from((Ipv4Addr::UNSPECIFIED, args.port));
|
let address = SocketAddr::from((Ipv4Addr::UNSPECIFIED, args.port));
|
||||||
info!("Listening at {}", address);
|
info!("Listening at {}", address);
|
||||||
|
|
@ -148,6 +152,7 @@ async fn api_router(
|
||||||
config: &Config,
|
config: &Config,
|
||||||
logger: Arc<dyn EventLogger>,
|
logger: Arc<dyn EventLogger>,
|
||||||
code: Arc<dyn CodeSearch>,
|
code: Arc<dyn CodeSearch>,
|
||||||
|
metrics_handle: Arc<PrometheusHandle>,
|
||||||
) -> Router {
|
) -> Router {
|
||||||
let completion_state = if let Some(model) = &args.model {
|
let completion_state = if let Some(model) = &args.model {
|
||||||
Some(Arc::new(
|
Some(Arc::new(
|
||||||
|
|
@ -179,6 +184,7 @@ async fn api_router(
|
||||||
args.chat_model.as_deref(),
|
args.chat_model.as_deref(),
|
||||||
&args.device,
|
&args.device,
|
||||||
));
|
));
|
||||||
|
|
||||||
routers.push({
|
routers.push({
|
||||||
Router::new()
|
Router::new()
|
||||||
.route(
|
.route(
|
||||||
|
|
@ -193,6 +199,10 @@ async fn api_router(
|
||||||
"/v1/health",
|
"/v1/health",
|
||||||
routing::get(routes::health).with_state(health_state),
|
routing::get(routes::health).with_state(health_state),
|
||||||
)
|
)
|
||||||
|
.route(
|
||||||
|
"/v1/metrics",
|
||||||
|
routing::get(routes::metrics).with_state(metrics_handle),
|
||||||
|
)
|
||||||
});
|
});
|
||||||
|
|
||||||
if let Some(completion_state) = completion_state {
|
if let Some(completion_state) = completion_state {
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ use std::{
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use axum::{routing, Router};
|
use axum::{routing, Router};
|
||||||
|
use axum_prometheus::PrometheusMetricLayer;
|
||||||
use axum_tracing_opentelemetry::opentelemetry_tracing_layer;
|
use axum_tracing_opentelemetry::opentelemetry_tracing_layer;
|
||||||
use clap::Args;
|
use clap::Args;
|
||||||
use hyper::Server;
|
use hyper::Server;
|
||||||
|
|
@ -84,14 +85,22 @@ pub async fn main(kind: WorkerKind, args: &WorkerArgs) {
|
||||||
info!("Starting worker, this might takes a few minutes...");
|
info!("Starting worker, this might takes a few minutes...");
|
||||||
|
|
||||||
let context = WorkerContext::new(&args.url).await;
|
let context = WorkerContext::new(&args.url).await;
|
||||||
|
|
||||||
|
let (prometheus_layer, prometheus_handle) = PrometheusMetricLayer::pair();
|
||||||
|
|
||||||
let app = match kind {
|
let app = match kind {
|
||||||
WorkerKind::Completion => make_completion_route(context, args).await,
|
WorkerKind::Completion => make_completion_route(context, args).await,
|
||||||
WorkerKind::Chat => make_chat_route(context, args).await,
|
WorkerKind::Chat => make_chat_route(context, args).await,
|
||||||
};
|
};
|
||||||
|
|
||||||
let app = app
|
let app = app
|
||||||
|
.route(
|
||||||
|
"/v1/metrics",
|
||||||
|
routing::get(routes::metrics).with_state(Arc::new(prometheus_handle)),
|
||||||
|
)
|
||||||
.layer(CorsLayer::permissive())
|
.layer(CorsLayer::permissive())
|
||||||
.layer(opentelemetry_tracing_layer());
|
.layer(opentelemetry_tracing_layer())
|
||||||
|
.layer(prometheus_layer);
|
||||||
|
|
||||||
let address = SocketAddr::from((Ipv4Addr::UNSPECIFIED, args.port));
|
let address = SocketAddr::from((Ipv4Addr::UNSPECIFIED, args.port));
|
||||||
info!("Listening at {}", address);
|
info!("Listening at {}", address);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue