feat: add Prometheus support to Tabby. (#838)
* Added Prometheus support to Tabby. 1) Added `axum-prometheus` to Cargo.toml 2) Added `metrics.rs` as a simple route to produce a metrics endpoint with prometheus-formatted events 3) Added `/v1/metrics` endpoint with API doc entry 4) Added the `PrometheusLayer` to the root layers. This change effectively allows for external Prometheus to scrape metrics from Tabby during execution in order to monitor operations (resources, timings and overall usage). * changed application/text to text/plain to adhere to rfc1341 * Update Makefile * Update Makefile * Update Makefile * Update Makefile * Update Makefile * Update Makefile * - Reworked code to fit upstream changes - Added Prometheus layer and handle to worker.rs * Update CHANGELOG.md * [autofix.ci] apply automated fixes * Simplified worker metrics route definition * [autofix.ci] apply automated fixes * [autofix.ci] apply automated fixes (attempt 2/3) --------- Co-authored-by: Meng Zhang <meng@tabbyml.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>release-fix-intellij-update-support-version-range
parent
7dbbfc39c2
commit
6dabecc3ee
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
* Add distribution support (running completion / chat model on different process / machine).
|
||||
* Add conversation history in chat playground.
|
||||
* Add `/v1/metrics` endpoint for prometheus metrics collection.
|
||||
|
||||
## Fixes and Improvements
|
||||
|
||||
|
|
|
|||
|
|
@ -486,6 +486,29 @@ dependencies = [
|
|||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "axum-prometheus"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "97def327c5481791abb57ac295bfc70f2e1a0727675b7dbf74bd1b27a72b6fd8"
|
||||
dependencies = [
|
||||
"axum",
|
||||
"axum-core",
|
||||
"bytes",
|
||||
"futures",
|
||||
"futures-core",
|
||||
"http",
|
||||
"http-body",
|
||||
"matchit",
|
||||
"metrics",
|
||||
"metrics-exporter-prometheus",
|
||||
"once_cell",
|
||||
"pin-project",
|
||||
"tokio",
|
||||
"tower",
|
||||
"tower-http 0.4.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "axum-streams"
|
||||
version = "0.9.1"
|
||||
|
|
@ -1789,6 +1812,15 @@ dependencies = [
|
|||
"ahash 0.7.7",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "33ff8ae62cd3a9102e5637afc8452c55acf3844001bd5374e0b0bd7b6616c038"
|
||||
dependencies = [
|
||||
"ahash 0.8.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.14.0"
|
||||
|
|
@ -2433,6 +2465,15 @@ version = "0.11.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3ea9b256699eda7b0387ffbc776dd625e28bde3918446381781245b7a50349d8"
|
||||
|
||||
[[package]]
|
||||
name = "mach2"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d0d1830bcd151a6fc4aea1369af235b36c1528fe976b8ff678683c9995eade8"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "matchers"
|
||||
version = "0.0.1"
|
||||
|
|
@ -2534,6 +2575,61 @@ dependencies = [
|
|||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "metrics"
|
||||
version = "0.21.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fde3af1a009ed76a778cb84fdef9e7dbbdf5775ae3e4cc1f434a6a307f6f76c5"
|
||||
dependencies = [
|
||||
"ahash 0.8.3",
|
||||
"metrics-macros",
|
||||
"portable-atomic",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "metrics-exporter-prometheus"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a4964177ddfdab1e3a2b37aec7cf320e14169abb0ed73999f558136409178d5"
|
||||
dependencies = [
|
||||
"base64 0.21.2",
|
||||
"hyper",
|
||||
"indexmap 1.9.3",
|
||||
"ipnet",
|
||||
"metrics",
|
||||
"metrics-util",
|
||||
"quanta",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "metrics-macros"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ddece26afd34c31585c74a4db0630c376df271c285d682d1e55012197830b6df"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.28",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "metrics-util"
|
||||
version = "0.15.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4de2ed6e491ed114b40b732e4d1659a9d53992ebd87490c44a6ffe23739d973e"
|
||||
dependencies = [
|
||||
"crossbeam-epoch",
|
||||
"crossbeam-utils",
|
||||
"hashbrown 0.13.1",
|
||||
"metrics",
|
||||
"num_cpus",
|
||||
"quanta",
|
||||
"sketches-ddsketch",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mime"
|
||||
version = "0.3.17"
|
||||
|
|
@ -3293,6 +3389,22 @@ dependencies = [
|
|||
"checked_int_cast",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quanta"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a17e662a7a8291a865152364c20c7abc5e60486ab2001e8ec10b24862de0b9ab"
|
||||
dependencies = [
|
||||
"crossbeam-utils",
|
||||
"libc",
|
||||
"mach2",
|
||||
"once_cell",
|
||||
"raw-cpuid",
|
||||
"wasi 0.11.0+wasi-snapshot-preview1",
|
||||
"web-sys",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "question"
|
||||
version = "0.2.2"
|
||||
|
|
@ -3398,6 +3510,15 @@ dependencies = [
|
|||
"rand_core 0.5.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "raw-cpuid"
|
||||
version = "10.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon"
|
||||
version = "1.7.0"
|
||||
|
|
@ -4424,6 +4545,7 @@ dependencies = [
|
|||
"async-stream",
|
||||
"async-trait",
|
||||
"axum",
|
||||
"axum-prometheus",
|
||||
"axum-streams",
|
||||
"axum-tracing-opentelemetry",
|
||||
"chrono",
|
||||
|
|
|
|||
7
Makefile
7
Makefile
|
|
@ -24,8 +24,9 @@ bump-release-version:
|
|||
cargo ws version --allow-branch "r*" --no-individual-tags --force "*"
|
||||
|
||||
update-openapi-doc:
|
||||
curl http://localhost:8080/api-docs/openapi.json | jq ' \
|
||||
delpaths([ \
|
||||
curl http://localhost:8080/api-docs/openapi.json | jq ' \
|
||||
delpaths([ \
|
||||
["paths", "/v1/metrics"], \
|
||||
["paths", "/v1beta/chat/completions"], \
|
||||
["paths", "/v1beta/search"], \
|
||||
["components", "schemas", "CompletionRequest", "properties", "prompt"], \
|
||||
|
|
@ -37,4 +38,4 @@ update-openapi-doc:
|
|||
> website/static/openapi.json
|
||||
|
||||
update-graphql-schema:
|
||||
cargo run --package tabby-webserver --example update-schema
|
||||
cargo run --package tabby-webserver --example update-schema
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ async-trait.workspace = true
|
|||
tabby-webserver = { path = "../../ee/tabby-webserver", optional = true }
|
||||
thiserror.workspace = true
|
||||
chrono = "0.4.31"
|
||||
axum-prometheus = "0.4.0"
|
||||
|
||||
[dependencies.uuid]
|
||||
version = "1.3.3"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,16 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use axum::extract::State;
|
||||
use axum_prometheus::metrics_exporter_prometheus::PrometheusHandle;
|
||||
|
||||
#[utoipa::path(
|
||||
get,
|
||||
path = "/v1/metrics",
|
||||
tag = "v1",
|
||||
responses(
|
||||
(status = 200, description = "Success", body = String, content_type = "text/plain"),
|
||||
)
|
||||
)]
|
||||
pub async fn metrics(State(state): State<Arc<PrometheusHandle>>) -> String {
|
||||
state.render()
|
||||
}
|
||||
|
|
@ -2,10 +2,12 @@ mod chat;
|
|||
mod completions;
|
||||
mod events;
|
||||
mod health;
|
||||
mod metrics;
|
||||
mod search;
|
||||
|
||||
pub use chat::*;
|
||||
pub use completions::*;
|
||||
pub use events::*;
|
||||
pub use health::*;
|
||||
pub use metrics::*;
|
||||
pub use search::*;
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ use std::{
|
|||
};
|
||||
|
||||
use axum::{routing, Router, Server};
|
||||
use axum_prometheus::{metrics_exporter_prometheus::PrometheusHandle, PrometheusMetricLayer};
|
||||
use axum_tracing_opentelemetry::opentelemetry_tracing_layer;
|
||||
use clap::Args;
|
||||
use tabby_common::{
|
||||
|
|
@ -49,7 +50,7 @@ Install following IDE / Editor extensions to get started with [Tabby](https://gi
|
|||
servers(
|
||||
(url = "/", description = "Server"),
|
||||
),
|
||||
paths(routes::log_event, routes::completions, routes::completions, routes::health, routes::search),
|
||||
paths(routes::log_event, routes::completions, routes::completions, routes::health, routes::search, routes::metrics),
|
||||
components(schemas(
|
||||
api::event::LogEventRequest,
|
||||
completion::CompletionRequest,
|
||||
|
|
@ -108,9 +109,11 @@ pub async fn main(config: &Config, args: &ServeArgs) {
|
|||
|
||||
let logger = Arc::new(create_logger());
|
||||
let code = Arc::new(create_code_search());
|
||||
let (prometheus_layer, prometheus_handle) = PrometheusMetricLayer::pair();
|
||||
let metrics_handle = Arc::new(prometheus_handle);
|
||||
|
||||
let app = Router::new()
|
||||
.merge(api_router(args, config, logger.clone(), code.clone()).await)
|
||||
.merge(api_router(args, config, logger.clone(), code.clone(), metrics_handle).await)
|
||||
.merge(SwaggerUi::new("/swagger-ui").url("/api-docs/openapi.json", ApiDoc::openapi()));
|
||||
|
||||
#[cfg(feature = "ee")]
|
||||
|
|
@ -121,7 +124,8 @@ pub async fn main(config: &Config, args: &ServeArgs) {
|
|||
|
||||
let app = app
|
||||
.layer(CorsLayer::permissive())
|
||||
.layer(opentelemetry_tracing_layer());
|
||||
.layer(opentelemetry_tracing_layer())
|
||||
.layer(prometheus_layer);
|
||||
|
||||
let address = SocketAddr::from((Ipv4Addr::UNSPECIFIED, args.port));
|
||||
info!("Listening at {}", address);
|
||||
|
|
@ -148,6 +152,7 @@ async fn api_router(
|
|||
config: &Config,
|
||||
logger: Arc<dyn EventLogger>,
|
||||
code: Arc<dyn CodeSearch>,
|
||||
metrics_handle: Arc<PrometheusHandle>,
|
||||
) -> Router {
|
||||
let completion_state = if let Some(model) = &args.model {
|
||||
Some(Arc::new(
|
||||
|
|
@ -179,6 +184,7 @@ async fn api_router(
|
|||
args.chat_model.as_deref(),
|
||||
&args.device,
|
||||
));
|
||||
|
||||
routers.push({
|
||||
Router::new()
|
||||
.route(
|
||||
|
|
@ -193,6 +199,10 @@ async fn api_router(
|
|||
"/v1/health",
|
||||
routing::get(routes::health).with_state(health_state),
|
||||
)
|
||||
.route(
|
||||
"/v1/metrics",
|
||||
routing::get(routes::metrics).with_state(metrics_handle),
|
||||
)
|
||||
});
|
||||
|
||||
if let Some(completion_state) = completion_state {
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ use std::{
|
|||
|
||||
use anyhow::Result;
|
||||
use axum::{routing, Router};
|
||||
use axum_prometheus::PrometheusMetricLayer;
|
||||
use axum_tracing_opentelemetry::opentelemetry_tracing_layer;
|
||||
use clap::Args;
|
||||
use hyper::Server;
|
||||
|
|
@ -84,14 +85,22 @@ pub async fn main(kind: WorkerKind, args: &WorkerArgs) {
|
|||
info!("Starting worker, this might takes a few minutes...");
|
||||
|
||||
let context = WorkerContext::new(&args.url).await;
|
||||
|
||||
let (prometheus_layer, prometheus_handle) = PrometheusMetricLayer::pair();
|
||||
|
||||
let app = match kind {
|
||||
WorkerKind::Completion => make_completion_route(context, args).await,
|
||||
WorkerKind::Chat => make_chat_route(context, args).await,
|
||||
};
|
||||
|
||||
let app = app
|
||||
.route(
|
||||
"/v1/metrics",
|
||||
routing::get(routes::metrics).with_state(Arc::new(prometheus_handle)),
|
||||
)
|
||||
.layer(CorsLayer::permissive())
|
||||
.layer(opentelemetry_tracing_layer());
|
||||
.layer(opentelemetry_tracing_layer())
|
||||
.layer(prometheus_layer);
|
||||
|
||||
let address = SocketAddr::from((Ipv4Addr::UNSPECIFIED, args.port));
|
||||
info!("Listening at {}", address);
|
||||
|
|
|
|||
Loading…
Reference in New Issue