Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add gpu info to health state [TAB-162] #364

Merged
merged 15 commits into from
Aug 21, 2023
105 changes: 99 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ RUN apt-get update && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

# Make link to libnvidia-ml.so (NVML) library
# so that we could get GPU stats.
RUN ln -s /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1 \
vodkaslime marked this conversation as resolved.
Show resolved Hide resolved
/usr/lib/x86_64-linux-gnu/libnvidia-ml.so

COPY --from=builder /opt/tabby /opt/tabby

ENV TABBY_ROOT=/data
Expand Down
1 change: 1 addition & 0 deletions crates/tabby/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ tracing-opentelemetry = "0.18.0"
tantivy = { workspace = true }
anyhow = { workspace = true }
sysinfo = "0.29.8"
nvml-wrapper = "0.9.0"


[dependencies.uuid]
Expand Down
89 changes: 89 additions & 0 deletions crates/tabby/src/serve/context.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
use anyhow::Result;
use nvml_wrapper::Nvml;
use sysinfo::{CpuExt, System, SystemExt};

// TabbyContext is the struct for global shared context
// to be used by router app.
pub struct TabbyContext {
pub cpu_stats_manager: CPUStatsManager,
pub gpu_stats_manager: GPUStatsManager,
}

impl TabbyContext {
pub fn new() -> Self {
let cpu_stats_manager = CPUStatsManager::init();
let gpu_stats_manager = GPUStatsManager::init();
Self {
cpu_stats_manager,
gpu_stats_manager,
}
}
}

pub struct CPUStat {
pub info: String,
pub count: usize,
}
pub struct CPUStatsManager {
system: System,
}

impl CPUStatsManager {
pub fn init() -> Self {
Self {
system: System::new_all(),
}
}

pub fn get_stats(self: &mut Self) -> CPUStat {
vodkaslime marked this conversation as resolved.
Show resolved Hide resolved
self.system.refresh_cpu();
let cpus = self.system.cpus();
let count = cpus.len();
let info = if count > 0 {
let cpu = &cpus[0];
cpu.brand().to_string()
} else {
"unknown".to_string()
};

CPUStat { info, count }
}
}

pub struct GPUStatsManager {
// In compilation envs where there is no cuda,
// We do not use the nvml, therefore setting it as None.
nvml: Option<Nvml>,
}

impl GPUStatsManager {
pub fn init() -> Self {
// In cases of MacOS or docker containers where --gpus are not specified,
// the Nvml::init() would return an error. In there scenarios, we
// assign Self::nvml to be None, indicating that the current runtime
// environment does not support cuda interface.
let nvml_res = Nvml::init();
let nvml = match nvml_res {
Ok(n) => Some(n),
Err(_) => None,
};
Self { nvml }
}

pub fn get_stats(self: &Self) -> Result<Vec<String>> {
vodkaslime marked this conversation as resolved.
Show resolved Hide resolved
let mut res = vec![];
if self.nvml.is_none() {
// If cuda is not supported in the runtime environment,
// just return an empty list.
return Ok(res);
}

let nvml = self.nvml.as_ref().unwrap();
let device_count = nvml.device_count()?;
for i in 0..device_count {
let name = nvml.device_by_index(i)?.name()?;
res.push(name);
}
Ok(res)
}
}
25 changes: 13 additions & 12 deletions crates/tabby/src/serve/health.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@ use std::{env::consts::ARCH, sync::Arc};

use axum::{extract::State, Json};
use serde::{Deserialize, Serialize};
use sysinfo::{CpuExt, System, SystemExt};
use utoipa::ToSchema;

use super::context::TabbyContext;

#[derive(Serialize, Deserialize, ToSchema, Clone, Debug)]
pub struct HealthState {
model: String,
Expand All @@ -13,28 +14,28 @@ pub struct HealthState {
arch: String,
cpu_info: String,
cpu_count: usize,
gpu_info: Vec<String>,
version: Version,
}

impl HealthState {
pub fn new(args: &super::ServeArgs) -> Self {
let mut sys = System::new_all();
sys.refresh_cpu();
let cpus = sys.cpus();
let cpu_info = if !cpus.is_empty() {
let cpu = &cpus[0];
cpu.brand().to_string()
} else {
"unknown".to_string()
pub fn new(args: &super::ServeArgs, context: &mut TabbyContext) -> Self {
let cpu_stats = context.cpu_stats_manager.get_stats();

let gpu_info_res = context.gpu_stats_manager.get_stats();
let gpu_info = match gpu_info_res {
Ok(s) => s,
Err(_) => vec![],
};

Self {
model: args.model.clone(),
device: args.device.to_string(),
compute_type: args.compute_type.to_string(),
arch: ARCH.to_string(),
cpu_info,
cpu_count: cpus.len(),
cpu_info: cpu_stats.info,
cpu_count: cpu_stats.count,
gpu_info,
wsxiaoys marked this conversation as resolved.
Show resolved Hide resolved
version: Version::new(),
}
}
Expand Down
Loading
Loading