feat: add Prometheus support to Tabby. (#838)

* Added Prometheus support to Tabby.

1) Added `axum-prometheus` to Cargo.toml

2) Added `metrics.rs` as a simple route to produce a metrics endpoint with prometheus-formatted events

3) Added `/v1/metrics` endpoint with API doc entry

4) Added the `PrometheusLayer` to the root layers.

This change effectively allows for external Prometheus to scrape metrics from Tabby during execution in order to monitor operations (resources, timings and overall usage).

* changed application/text to text/plain to adhere to rfc1341

* Update Makefile

* Update Makefile

* Update Makefile

* Update Makefile

* Update Makefile

* Update Makefile

* - Reworked code to fit upstream changes
- Added Prometheus layer and handle to worker.rs

* Update CHANGELOG.md

* [autofix.ci] apply automated fixes

* Simplified worker metrics route definition

* [autofix.ci] apply automated fixes

* [autofix.ci] apply automated fixes (attempt 2/3)

---------

Co-authored-by: Meng Zhang <meng@tabbyml.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
release-fix-intellij-update-support-version-range
Jonathan Poisson 2023-11-19 18:40:57 -05:00 committed by GitHub
parent 7dbbfc39c2
commit 6dabecc3ee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 169 additions and 7 deletions

View File

@ -4,6 +4,7 @@
* Add distribution support (running completion / chat model on different process / machine).
* Add conversation history in chat playground.
* Add `/v1/metrics` endpoint for prometheus metrics collection.
## Fixes and Improvements

122
Cargo.lock generated
View File

@ -486,6 +486,29 @@ dependencies = [
"tower-service",
]
[[package]]
name = "axum-prometheus"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97def327c5481791abb57ac295bfc70f2e1a0727675b7dbf74bd1b27a72b6fd8"
dependencies = [
"axum",
"axum-core",
"bytes",
"futures",
"futures-core",
"http",
"http-body",
"matchit",
"metrics",
"metrics-exporter-prometheus",
"once_cell",
"pin-project",
"tokio",
"tower",
"tower-http 0.4.0",
]
[[package]]
name = "axum-streams"
version = "0.9.1"
@ -1789,6 +1812,15 @@ dependencies = [
"ahash 0.7.7",
]
[[package]]
name = "hashbrown"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33ff8ae62cd3a9102e5637afc8452c55acf3844001bd5374e0b0bd7b6616c038"
dependencies = [
"ahash 0.8.3",
]
[[package]]
name = "hashbrown"
version = "0.14.0"
@ -2433,6 +2465,15 @@ version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ea9b256699eda7b0387ffbc776dd625e28bde3918446381781245b7a50349d8"
[[package]]
name = "mach2"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d0d1830bcd151a6fc4aea1369af235b36c1528fe976b8ff678683c9995eade8"
dependencies = [
"libc",
]
[[package]]
name = "matchers"
version = "0.0.1"
@ -2534,6 +2575,61 @@ dependencies = [
"autocfg",
]
[[package]]
name = "metrics"
version = "0.21.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fde3af1a009ed76a778cb84fdef9e7dbbdf5775ae3e4cc1f434a6a307f6f76c5"
dependencies = [
"ahash 0.8.3",
"metrics-macros",
"portable-atomic",
]
[[package]]
name = "metrics-exporter-prometheus"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a4964177ddfdab1e3a2b37aec7cf320e14169abb0ed73999f558136409178d5"
dependencies = [
"base64 0.21.2",
"hyper",
"indexmap 1.9.3",
"ipnet",
"metrics",
"metrics-util",
"quanta",
"thiserror",
"tokio",
"tracing",
]
[[package]]
name = "metrics-macros"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddece26afd34c31585c74a4db0630c376df271c285d682d1e55012197830b6df"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.28",
]
[[package]]
name = "metrics-util"
version = "0.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4de2ed6e491ed114b40b732e4d1659a9d53992ebd87490c44a6ffe23739d973e"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
"hashbrown 0.13.1",
"metrics",
"num_cpus",
"quanta",
"sketches-ddsketch",
]
[[package]]
name = "mime"
version = "0.3.17"
@ -3293,6 +3389,22 @@ dependencies = [
"checked_int_cast",
]
[[package]]
name = "quanta"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a17e662a7a8291a865152364c20c7abc5e60486ab2001e8ec10b24862de0b9ab"
dependencies = [
"crossbeam-utils",
"libc",
"mach2",
"once_cell",
"raw-cpuid",
"wasi 0.11.0+wasi-snapshot-preview1",
"web-sys",
"winapi",
]
[[package]]
name = "question"
version = "0.2.2"
@ -3398,6 +3510,15 @@ dependencies = [
"rand_core 0.5.1",
]
[[package]]
name = "raw-cpuid"
version = "10.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332"
dependencies = [
"bitflags 1.3.2",
]
[[package]]
name = "rayon"
version = "1.7.0"
@ -4424,6 +4545,7 @@ dependencies = [
"async-stream",
"async-trait",
"axum",
"axum-prometheus",
"axum-streams",
"axum-tracing-opentelemetry",
"chrono",

View File

@ -24,8 +24,9 @@ bump-release-version:
cargo ws version --allow-branch "r*" --no-individual-tags --force "*"
update-openapi-doc:
curl http://localhost:8080/api-docs/openapi.json | jq ' \
delpaths([ \
curl http://localhost:8080/api-docs/openapi.json | jq ' \
delpaths([ \
["paths", "/v1/metrics"], \
["paths", "/v1beta/chat/completions"], \
["paths", "/v1beta/search"], \
["components", "schemas", "CompletionRequest", "properties", "prompt"], \
@ -37,4 +38,4 @@ update-openapi-doc:
> website/static/openapi.json
update-graphql-schema:
cargo run --package tabby-webserver --example update-schema
cargo run --package tabby-webserver --example update-schema

View File

@ -49,6 +49,7 @@ async-trait.workspace = true
tabby-webserver = { path = "../../ee/tabby-webserver", optional = true }
thiserror.workspace = true
chrono = "0.4.31"
axum-prometheus = "0.4.0"
[dependencies.uuid]
version = "1.3.3"

View File

@ -0,0 +1,16 @@
use std::sync::Arc;
use axum::extract::State;
use axum_prometheus::metrics_exporter_prometheus::PrometheusHandle;
#[utoipa::path(
get,
path = "/v1/metrics",
tag = "v1",
responses(
(status = 200, description = "Success", body = String, content_type = "text/plain"),
)
)]
pub async fn metrics(State(state): State<Arc<PrometheusHandle>>) -> String {
state.render()
}

View File

@ -2,10 +2,12 @@ mod chat;
mod completions;
mod events;
mod health;
mod metrics;
mod search;
pub use chat::*;
pub use completions::*;
pub use events::*;
pub use health::*;
pub use metrics::*;
pub use search::*;

View File

@ -5,6 +5,7 @@ use std::{
};
use axum::{routing, Router, Server};
use axum_prometheus::{metrics_exporter_prometheus::PrometheusHandle, PrometheusMetricLayer};
use axum_tracing_opentelemetry::opentelemetry_tracing_layer;
use clap::Args;
use tabby_common::{
@ -49,7 +50,7 @@ Install following IDE / Editor extensions to get started with [Tabby](https://gi
servers(
(url = "/", description = "Server"),
),
paths(routes::log_event, routes::completions, routes::completions, routes::health, routes::search),
paths(routes::log_event, routes::completions, routes::completions, routes::health, routes::search, routes::metrics),
components(schemas(
api::event::LogEventRequest,
completion::CompletionRequest,
@ -108,9 +109,11 @@ pub async fn main(config: &Config, args: &ServeArgs) {
let logger = Arc::new(create_logger());
let code = Arc::new(create_code_search());
let (prometheus_layer, prometheus_handle) = PrometheusMetricLayer::pair();
let metrics_handle = Arc::new(prometheus_handle);
let app = Router::new()
.merge(api_router(args, config, logger.clone(), code.clone()).await)
.merge(api_router(args, config, logger.clone(), code.clone(), metrics_handle).await)
.merge(SwaggerUi::new("/swagger-ui").url("/api-docs/openapi.json", ApiDoc::openapi()));
#[cfg(feature = "ee")]
@ -121,7 +124,8 @@ pub async fn main(config: &Config, args: &ServeArgs) {
let app = app
.layer(CorsLayer::permissive())
.layer(opentelemetry_tracing_layer());
.layer(opentelemetry_tracing_layer())
.layer(prometheus_layer);
let address = SocketAddr::from((Ipv4Addr::UNSPECIFIED, args.port));
info!("Listening at {}", address);
@ -148,6 +152,7 @@ async fn api_router(
config: &Config,
logger: Arc<dyn EventLogger>,
code: Arc<dyn CodeSearch>,
metrics_handle: Arc<PrometheusHandle>,
) -> Router {
let completion_state = if let Some(model) = &args.model {
Some(Arc::new(
@ -179,6 +184,7 @@ async fn api_router(
args.chat_model.as_deref(),
&args.device,
));
routers.push({
Router::new()
.route(
@ -193,6 +199,10 @@ async fn api_router(
"/v1/health",
routing::get(routes::health).with_state(health_state),
)
.route(
"/v1/metrics",
routing::get(routes::metrics).with_state(metrics_handle),
)
});
if let Some(completion_state) = completion_state {

View File

@ -6,6 +6,7 @@ use std::{
use anyhow::Result;
use axum::{routing, Router};
use axum_prometheus::PrometheusMetricLayer;
use axum_tracing_opentelemetry::opentelemetry_tracing_layer;
use clap::Args;
use hyper::Server;
@ -84,14 +85,22 @@ pub async fn main(kind: WorkerKind, args: &WorkerArgs) {
info!("Starting worker, this might takes a few minutes...");
let context = WorkerContext::new(&args.url).await;
let (prometheus_layer, prometheus_handle) = PrometheusMetricLayer::pair();
let app = match kind {
WorkerKind::Completion => make_completion_route(context, args).await,
WorkerKind::Chat => make_chat_route(context, args).await,
};
let app = app
.route(
"/v1/metrics",
routing::get(routes::metrics).with_state(Arc::new(prometheus_handle)),
)
.layer(CorsLayer::permissive())
.layer(opentelemetry_tracing_layer());
.layer(opentelemetry_tracing_layer())
.layer(prometheus_layer);
let address = SocketAddr::from((Ipv4Addr::UNSPECIFIED, args.port));
info!("Listening at {}", address);