diff --git a/Cargo.lock b/Cargo.lock index 27fdfc5..9a8894a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -431,7 +431,7 @@ dependencies = [ "anstyle", "bitflags", "clap_lex", - "strsim", + "strsim 0.10.0", ] [[package]] @@ -671,14 +671,38 @@ dependencies = [ "syn 2.0.28", ] +[[package]] +name = "darling" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d706e75d87e35569db781a9b5e2416cff1236a47ed380831f959382ccd5f858" +dependencies = [ + "darling_core 0.10.2", + "darling_macro 0.10.2", +] + [[package]] name = "darling" version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" dependencies = [ - "darling_core", - "darling_macro", + "darling_core 0.14.4", + "darling_macro 0.14.4", +] + +[[package]] +name = "darling_core" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c960ae2da4de88a91b2d920c2a7233b400bc33cb28453a2987822d8392519b" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim 0.9.3", + "syn 1.0.109", ] [[package]] @@ -691,7 +715,18 @@ dependencies = [ "ident_case", "proc-macro2", "quote", - "strsim", + "strsim 0.10.0", + "syn 1.0.109", +] + +[[package]] +name = "darling_macro" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b5a2f4ac4969822c62224815d069952656cadc7084fdca9751e6d959189b72" +dependencies = [ + "darling_core 0.10.2", + "quote", "syn 1.0.109", ] @@ -701,7 +736,7 @@ version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" dependencies = [ - "darling_core", + "darling_core 0.14.4", "quote", "syn 1.0.109", ] @@ -743,7 +778,7 @@ version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c11bdc11a0c47bc7d37d582b5285da6849c96681023680b906673c5707af7b0f" dependencies = [ - "darling", + "darling 0.14.4", "proc-macro2", "quote", "syn 1.0.109", @@ -1487,6 +1522,16 @@ version = "0.2.144" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1" +[[package]] +name = "libloading" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" +dependencies = [ + "cfg-if", + "winapi", +] + [[package]] name = "link-cplusplus" version = "1.0.8" @@ -1808,6 +1853,29 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" +[[package]] +name = "nvml-wrapper" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd21b9f5a1cce3c3515c9ffa85f5c7443e07162dae0ccf4339bb7ca38ad3454" +dependencies = [ + "bitflags", + "libloading", + "nvml-wrapper-sys", + "static_assertions", + "thiserror", + "wrapcenum-derive", +] + +[[package]] +name = "nvml-wrapper-sys" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c961a2ea9e91c59a69b78e69090f6f5b867bb46c0c56de9482da232437c4987e" +dependencies = [ + "libloading", +] + [[package]] name = "object" version = "0.30.3" @@ -2733,12 +2801,24 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strfmt" version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a8348af2d9fc3258c8733b8d9d8db2e56f54b2363a4b5b81585c7875ed65e65" +[[package]] +name = "strsim" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6446ced80d6c486436db5c078dde11a9f73d42b57fb273121e160b84f63d894c" + [[package]] name = "strsim" version = "0.10.0" @@ -2828,6 +2908,7 @@ dependencies = [ "hyper", "lazy_static", "mime_guess", + "nvml-wrapper", "opentelemetry", "opentelemetry-otlp", "rust-embed", @@ -4143,6 +4224,18 @@ dependencies = [ "winapi", ] +[[package]] +name = "wrapcenum-derive" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bcc065c85ad2c3bd12aa4118bf164835712e25080c392557801a13292c60aec" +dependencies = [ + "darling 0.10.2", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "xattr" version = "0.2.3" diff --git a/Dockerfile b/Dockerfile index 02069c9..1132eaf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -41,6 +41,11 @@ RUN apt-get update && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* +# Make link to libnvidia-ml.so (NVML) library +# so that we could get GPU stats. +RUN ln -s /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1 \ + /usr/lib/x86_64-linux-gnu/libnvidia-ml.so + COPY --from=builder /opt/tabby /opt/tabby ENV TABBY_ROOT=/data diff --git a/crates/tabby/Cargo.toml b/crates/tabby/Cargo.toml index 51a6125..290c304 100644 --- a/crates/tabby/Cargo.toml +++ b/crates/tabby/Cargo.toml @@ -34,6 +34,7 @@ tracing-opentelemetry = "0.18.0" tantivy = { workspace = true } anyhow = { workspace = true } sysinfo = "0.29.8" +nvml-wrapper = "0.9.0" [dependencies.uuid] diff --git a/crates/tabby/src/serve/health.rs b/crates/tabby/src/serve/health.rs index e97eb87..2ef5e6c 100644 --- a/crates/tabby/src/serve/health.rs +++ b/crates/tabby/src/serve/health.rs @@ -1,6 +1,8 @@ use std::{env::consts::ARCH, sync::Arc}; +use anyhow::Result; use axum::{extract::State, Json}; +use nvml_wrapper::Nvml; use serde::{Deserialize, Serialize}; use sysinfo::{CpuExt, System, SystemExt}; use utoipa::ToSchema; @@ -13,19 +15,17 @@ pub struct HealthState { arch: String, cpu_info: String, cpu_count: usize, + cuda_devices: Vec, version: Version, } impl HealthState { pub fn new(args: &super::ServeArgs) -> Self { - let mut sys = System::new_all(); - sys.refresh_cpu(); - let cpus = sys.cpus(); - let cpu_info = if !cpus.is_empty() { - let cpu = &cpus[0]; - cpu.brand().to_string() - } else { - "unknown".to_string() + let (cpu_info, cpu_count) = read_cpu_info(); + + let cuda_devices = match read_cuda_devices() { + Ok(s) => s, + Err(_) => vec![], }; Self { @@ -34,12 +34,43 @@ impl HealthState { compute_type: args.compute_type.to_string(), arch: ARCH.to_string(), cpu_info, - cpu_count: cpus.len(), + cpu_count, + cuda_devices, version: Version::new(), } } } +fn read_cpu_info() -> (String, usize) { + let mut system = System::new_all(); + system.refresh_cpu(); + let cpus = system.cpus(); + let count = cpus.len(); + let info = if count > 0 { + let cpu = &cpus[0]; + cpu.brand().to_string() + } else { + "unknown".to_string() + }; + + (info, count) +} + +fn read_cuda_devices() -> Result> { + // In cases of MacOS or docker containers where --gpus are not specified, + // the Nvml::init() would return an error. In these scenarios, we + // assign cuda_devices to be empty, indicating that the current runtime + // environment does not support cuda interface. + let nvml = Nvml::init()?; + let mut cuda_devices = vec![]; + let device_count = nvml.device_count()?; + for i in 0..device_count { + let name = nvml.device_by_index(i)?.name()?; + cuda_devices.push(name); + } + Ok(cuda_devices) +} + #[derive(Serialize, Deserialize, ToSchema, Clone, Debug)] pub struct Version { build_date: String,