TabbyML · wsxiaoys · Aug 21, 2023 · Aug 20, 2023 · Aug 20, 2023 · Aug 20, 2023
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Dockerfile b/Dockerfile
@@ -41,6 +41,11 @@ RUN apt-get update && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
+# Make link to libnvidia-ml.so (NVML) library
+# so that we could get GPU stats.
+RUN ln -s /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1 \
+        /usr/lib/x86_64-linux-gnu/libnvidia-ml.so
+
 COPY --from=builder /opt/tabby /opt/tabby
 
 ENV TABBY_ROOT=/data

diff --git a/crates/tabby/Cargo.toml b/crates/tabby/Cargo.toml
@@ -34,6 +34,7 @@ tracing-opentelemetry = "0.18.0"
 tantivy = { workspace = true }
 anyhow = { workspace = true }
 sysinfo = "0.29.8"
+nvml-wrapper = "0.9.0"
 
 
 [dependencies.uuid]

diff --git a/crates/tabby/src/serve/context.rs b/crates/tabby/src/serve/context.rs
@@ -0,0 +1,89 @@
+use anyhow::Result;
+use nvml_wrapper::Nvml;
+use sysinfo::{CpuExt, System, SystemExt};
+
+// TabbyContext is the struct for global shared context
+// to be used by router app.
+pub struct TabbyContext {
+    pub cpu_stats_manager: CPUStatsManager,
+    pub gpu_stats_manager: GPUStatsManager,
+}
+
+impl TabbyContext {
+    pub fn new() -> Self {
+        let cpu_stats_manager = CPUStatsManager::init();
+        let gpu_stats_manager = GPUStatsManager::init();
+        Self {
+            cpu_stats_manager,
+            gpu_stats_manager,
+        }
+    }
+}
+
+pub struct CPUStat {
+    pub info: String,
+    pub count: usize,
+}
+pub struct CPUStatsManager {
+    system: System,
+}
+
+impl CPUStatsManager {
+    pub fn init() -> Self {
+        Self {
+            system: System::new_all(),
+        }
+    }
+
+    pub fn get_stats(self: &mut Self) -> CPUStat {
+        self.system.refresh_cpu();
+        let cpus = self.system.cpus();
+        let count = cpus.len();
+        let info = if count > 0 {
+            let cpu = &cpus[0];
+            cpu.brand().to_string()
+        } else {
+            "unknown".to_string()
+        };
+
+        CPUStat { info, count }
+    }
+}
+
+pub struct GPUStatsManager {
+    // In compilation envs where there is no cuda,
+    // We do not use the nvml, therefore setting it as None.
+    nvml: Option<Nvml>,
+}
+
+impl GPUStatsManager {
+    pub fn init() -> Self {
+        // In cases of MacOS or docker containers where --gpus are not specified,
+        // the Nvml::init() would return an error. In there scenarios, we
+        // assign Self::nvml to be None, indicating that the current runtime
+        // environment does not support cuda interface.
+        let nvml_res = Nvml::init();
+        let nvml = match nvml_res {
+            Ok(n) => Some(n),
+            Err(_) => None,
+        };
+        Self { nvml }
+    }
+
+    pub fn get_stats(self: &Self) -> Result<Vec<String>> {
+        let mut res = vec![];
+        if self.nvml.is_none() {
+            // If cuda is not supported in the runtime environment,
+            // just return an empty list.
+            return Ok(res);
+        }
+
+        let nvml = self.nvml.as_ref().unwrap();
+        let device_count = nvml.device_count()?;
+        for i in 0..device_count {
+            let name = nvml.device_by_index(i)?.name()?;
+            res.push(name);
+        }
+        Ok(res)
+    }
+}
diff --git a/crates/tabby/src/serve/health.rs b/crates/tabby/src/serve/health.rs
@@ -2,9 +2,10 @@ use std::{env::consts::ARCH, sync::Arc};
 
 use axum::{extract::State, Json};
 use serde::{Deserialize, Serialize};
-use sysinfo::{CpuExt, System, SystemExt};
 use utoipa::ToSchema;
 
+use super::context::TabbyContext;
+
 #[derive(Serialize, Deserialize, ToSchema, Clone, Debug)]
 pub struct HealthState {
     model: String,
@@ -13,28 +14,28 @@ pub struct HealthState {
     arch: String,
     cpu_info: String,
     cpu_count: usize,
+    gpu_info: Vec<String>,
     version: Version,
 }
 
 impl HealthState {
-    pub fn new(args: &super::ServeArgs) -> Self {
-        let mut sys = System::new_all();
-        sys.refresh_cpu();
-        let cpus = sys.cpus();
-        let cpu_info = if !cpus.is_empty() {
-            let cpu = &cpus[0];
-            cpu.brand().to_string()
-        } else {
-            "unknown".to_string()
+    pub fn new(args: &super::ServeArgs, context: &mut TabbyContext) -> Self {
+        let cpu_stats = context.cpu_stats_manager.get_stats();
+
+        let gpu_info_res = context.gpu_stats_manager.get_stats();
+        let gpu_info = match gpu_info_res {
+            Ok(s) => s,
+            Err(_) => vec![],
         };
 
         Self {
             model: args.model.clone(),
             device: args.device.to_string(),
             compute_type: args.compute_type.to_string(),
             arch: ARCH.to_string(),
-            cpu_info,
-            cpu_count: cpus.len(),
+            cpu_info: cpu_stats.info,
+            cpu_count: cpu_stats.count,
+            gpu_info,
             version: Version::new(),
         }
     }