Skip to content

Commit

Permalink
xcp-rrdd: detect a reason for IPMI readings being unavailable
Browse files Browse the repository at this point in the history
When the IPMI devices are missing, this is printed in the error line:

$ /usr/bin/ipmitool -I open dcmi discover
Could not open device at /dev/ipmi0 or /dev/ipmi/0 or /dev/ipmidev/0:
No such file or directory

Signed-off-by: Guillaume <[email protected]>
  • Loading branch information
gthvn1 committed Feb 24, 2025
1 parent 11b677d commit fd816cf
Show file tree
Hide file tree
Showing 9 changed files with 96 additions and 72 deletions.
2 changes: 1 addition & 1 deletion ocaml/xcp-rrdd/bin/rrdp-dcmi/dune
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
(modes exe)
(name rrdp_dcmi)
(libraries

rrdd-plugin
rrdd-plugin.base
rrdd_plugins_libs
xapi-idl.rrd
xapi-log
Expand Down
61 changes: 42 additions & 19 deletions ocaml/xcp-rrdd/bin/rrdp-dcmi/rrdp_dcmi.ml
Original file line number Diff line number Diff line change
Expand Up @@ -29,29 +29,47 @@ let ipmitool args =
(* we connect to the local /dev/ipmi0 if available to read measurements from local BMC *)
ipmitool_bin :: args |> String.concat " "

type discovery_error = Devices_missing

let discovery_error_to_string = function
| Devices_missing ->
"IPMI devices are missing"

let discover () =
let read_out_line line =
(* this code runs once on startup, logging all the output here will be useful for debugging *)
D.debug "DCMI discover: %s" line ;
let line = String.trim line in
if String.equal line "Power management available" then
Some ()
else
None
in
let read_err_line line =
(* this code runs once on startup, logging all the output here will be useful for debugging *)
D.debug "DCMI discover: %s" line ;
let line = String.trim line in
if String.starts_with ~prefix:"Could not open device at" line then
Some Devices_missing
else
None
in
Utils.exec_cmd
(module Process.D)
~cmdstring:(ipmitool ["dcmi"; "discover"])
~f:(fun line ->
(* this code runs once on startup, logging all the output here will be useful for debugging *)
D.debug "DCMI discover: %s" line ;
if String.trim line = "Power management available" then
Some ()
else
None
)
~read_out_line ~read_err_line

let get_dcmi_power_reading () =
let read_out_line line =
(* example line: ' Instantaneous power reading: 34 Watts' *)
try Scanf.sscanf line " Instantaneous power reading : %f Watts" Option.some
with Scanf.Scan_failure _ | End_of_file -> None
in
let read_err_line _ = None in
Utils.exec_cmd
(module Process.D)
~cmdstring:(ipmitool ["dcmi"; "power"; "reading"])
~f:(fun line ->
(* example line: ' Instantaneous power reading: 34 Watts' *)
try
Scanf.sscanf line " Instantaneous power reading : %f Watts" Option.some
with Scanf.Scan_failure _ | End_of_file -> None
)
~read_out_line ~read_err_line

let gen_dcmi_power_reading value =
( Rrd.Host
Expand All @@ -63,18 +81,23 @@ let gen_dcmi_power_reading value =

let generate_dss () =
match get_dcmi_power_reading () with
| watts :: _ ->
| watts :: _, _ ->
[gen_dcmi_power_reading watts]
| _ ->
[]

let _ =
initialise () ;
match discover () with
| [] ->
D.warn "IPMI DCMI power readings not available, stopping." ;
exit 0
| _ ->
| () :: _, _ ->
D.info "IPMI DCMI power reading is available" ;
main_loop ~neg_shift:0.5 ~target:(Reporter.Local 1)
~protocol:Rrd_interface.V2 ~dss_f:generate_dss
| [], errs ->
let reason =
List.nth_opt errs 0
|> Option.map discovery_error_to_string
|> Option.value ~default:"unknown"
in
D.warn "IPMI DCMI power readings not available, stopping. Reason: %s"
reason
1 change: 1 addition & 0 deletions ocaml/xcp-rrdd/bin/rrdp-iostat/dune
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
mtime
mtime.clock.os
rrdd-plugin
rrdd-plugin.base
rrdd_plugin_xenctrl
rrdd_plugins_libs
str
Expand Down
16 changes: 11 additions & 5 deletions ocaml/xcp-rrdd/bin/rrdp-iostat/rrdp_iostat.ml
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ module Iostat = struct

(* Keep track of how many results headers we've seen so far *)
let parsing_section = ref 0 in
let process_line str =
let read_out_line str =
let res = Utils.cut str in
(* Keep values from the second set of outputs *)
( if !parsing_section = 2 then
Expand All @@ -151,7 +151,10 @@ module Iostat = struct
(* 2 iterations; 1 second between them *)

(* Iterate through each line and populate dev_values_map *)
let _ = Utils.exec_cmd (module Process.D) ~cmdstring ~f:process_line in
let read_err_line _ = None in
let _ =
Utils.exec_cmd (module Process.D) ~cmdstring ~read_out_line ~read_err_line
in

(* Now read the values out of dev_values_map for devices for which we have data *)
List.filter_map
Expand Down Expand Up @@ -341,16 +344,19 @@ let exec_tap_ctl_list () : ((string * string) * int) list =
D.error "Could not find device with physical path %s" phypath ;
None
in
let process_line str =
let read_out_line str =
try Scanf.sscanf str "pid=%d minor=%d state=%s args=%s@:%s" extract_vdis
with Scanf.Scan_failure _ | Failure _ | End_of_file ->
D.warn {|"%s" returned a line that could not be parsed. Ignoring.|}
tap_ctl ;
D.warn "Offending line: %s" str ;
None
in
let pid_and_minor_to_sr_and_vdi =
Utils.exec_cmd (module Process.D) ~cmdstring:tap_ctl ~f:process_line
let read_err_line _ = None in
let pid_and_minor_to_sr_and_vdi, _ =
Utils.exec_cmd
(module Process.D)
~cmdstring:tap_ctl ~read_out_line ~read_err_line
in
let sr_and_vdi_to_minor =
List.map
Expand Down
2 changes: 1 addition & 1 deletion ocaml/xcp-rrdd/bin/rrdp-xenpm/dune
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
(modes exe)
(name rrdp_xenpm)
(libraries

rrdd-plugin
rrdd-plugin.base
rrdd_plugins_libs
str
xapi-idl.rrd
Expand Down
12 changes: 8 additions & 4 deletions ocaml/xcp-rrdd/bin/rrdp-xenpm/rrdp_xenpm.ml
Original file line number Diff line number Diff line change
Expand Up @@ -57,27 +57,30 @@ let gen_pm_cpu_averages cpu_id time =

let get_cpu_averages () : int64 list =
let pattern = Str.regexp "average cpu frequency:[ \t]+\\([0-9]+\\)[ \t]*$" in
let match_fun s =
let read_out_line s =
if Str.string_match pattern s 0 then
Some (Int64.of_string (Str.matched_group 1 s))
else
None
in
let read_err_line _ = None in
Utils.exec_cmd
(module Process.D)
~cmdstring:(Printf.sprintf "%s %s" xenpm_bin "get-cpufreq-average")
~f:match_fun
~read_out_line ~read_err_line
|> fst

let get_states cpu_state : int64 list =
let pattern =
Str.regexp "[ \t]*residency[ \t]+\\[[ \t]*\\([0-9]+\\) ms\\][ \t]*"
in
let match_fun s =
let read_out_line s =
if Str.string_match pattern s 0 then
Some (Int64.of_string (Str.matched_group 1 s))
else
None
in
let read_err_line _ = None in
Utils.exec_cmd
(module Process.D)
~cmdstring:
Expand All @@ -89,7 +92,8 @@ let get_states cpu_state : int64 list =
"get-cpufreq-states"
)
)
~f:match_fun
~read_out_line ~read_err_line
|> fst

(* list_package [1;2;3;4] 2 = [[1;2];[3;4]] *)
let list_package (l : 'a list) (n : int) : 'a list list =
Expand Down
25 changes: 0 additions & 25 deletions ocaml/xcp-rrdd/lib/plugin/rrdd_plugin.mli
Original file line number Diff line number Diff line change
Expand Up @@ -14,31 +14,6 @@

(** Library to simplify writing an rrdd plugin. *)

(** Utility functions useful for rrdd plugins. *)
module Utils : sig
val now : unit -> int64
(** Return the current unix epoch as an int64. *)

val cut : string -> string list
(** Split a string into a list of strings as separated by spaces and/or
tabs. *)

val list_directory_unsafe : string -> string list
(** List the contents of a directory, including . and .. *)

val list_directory_entries_unsafe : string -> string list
(** List the contents of a directory, not including . and .. *)

val exec_cmd :
(module Debug.DEBUG)
-> cmdstring:string
-> f:(string -> 'a option)
-> 'a list
(** [exec_cmd cmd f] executes [cmd], applies [f] on each of the lines which
[cmd] outputs on stdout, and returns a list of resulting values for which
applying [f] returns [Some value]. *)
end

(** Asynchronous interface to create, cancel and query the state of stats
reporting threads. *)
module Reporter : sig
Expand Down
35 changes: 22 additions & 13 deletions ocaml/xcp-rrdd/lib/plugin/utils.ml
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,13 @@ let list_directory_entries_unsafe dir =
let dirlist = list_directory_unsafe dir in
List.filter (fun x -> x <> "." && x <> "..") dirlist

let exec_cmd (module D : Debug.DEBUG) ~cmdstring ~(f : string -> 'a option) =
let exec_cmd (module D : Debug.DEBUG) ~cmdstring
~(read_out_line : string -> 'a option) ~(read_err_line : string -> 'b option)
=
D.debug "Forking command %s" cmdstring ;
(* create pipe for reading from the command's output *)
(* create pipes for reading from the command's output *)
let out_readme, out_writeme = Unix.pipe () in
let err_readme, err_writeme = Unix.pipe () in
let cmd, args =
match Astring.String.cuts ~empty:false ~sep:" " cmdstring with
| [] ->
Expand All @@ -45,19 +48,25 @@ let exec_cmd (module D : Debug.DEBUG) ~cmdstring ~(f : string -> 'a option) =
(h, t)
in
let pid =
Forkhelpers.safe_close_and_exec None (Some out_writeme) None [] cmd args
Forkhelpers.safe_close_and_exec None (Some out_writeme) (Some err_writeme)
[] cmd args
in
Unix.close out_writeme ;
let in_channel = Unix.in_channel_of_descr out_readme in
let vals = ref [] in
let rec loop () =
let line = input_line in_channel in
let ret = f line in
(match ret with None -> () | Some v -> vals := v :: !vals) ;
loop ()
Unix.close err_writeme ;
let read_and_close f fd =
let in_channel = Unix.in_channel_of_descr fd in
let vals = ref [] in
let rec loop () =
let line = input_line in_channel in
let ret = f line in
(match ret with None -> () | Some v -> vals := v :: !vals) ;
loop ()
in
(try loop () with End_of_file -> ()) ;
Unix.close fd ; List.rev !vals
in
(try loop () with End_of_file -> ()) ;
Unix.close out_readme ;
let stdout = read_and_close read_out_line out_readme in
let stderr = read_and_close read_err_line err_readme in
let pid, status = Forkhelpers.waitpid pid in
( match status with
| Unix.WEXITED n ->
Expand All @@ -67,4 +76,4 @@ let exec_cmd (module D : Debug.DEBUG) ~cmdstring ~(f : string -> 'a option) =
| Unix.WSTOPPED s ->
D.debug "Process %d was stopped by signal %d" pid s
) ;
List.rev !vals
(stdout, stderr)
14 changes: 10 additions & 4 deletions ocaml/xcp-rrdd/lib/plugin/utils.mli
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,13 @@ val list_directory_entries_unsafe : string -> string list
(** List the contents of a directory, not including . and .. *)

val exec_cmd :
(module Debug.DEBUG) -> cmdstring:string -> f:(string -> 'a option) -> 'a list
(** [exec_cmd cmd f] executes [cmd], applies [f] on each of the lines which
[cmd] outputs on stdout, and returns a list of resulting values for which
applying [f] returns [Some value]. *)
(module Debug.DEBUG)
-> cmdstring:string
-> read_out_line:(string -> 'a option)
-> read_err_line:(string -> 'b option)
-> 'a list * 'b list
(** [exec_cmd cmd out_line err_line] executes [cmd], applies [read_out_line] to
each of the lines which [cmd] outputs on stdout, applies [read_err_line] to
each of the lines which [cmd] outputs on stderr, and returns a tuple of
list with each of the values that the [read_out_line] and [read_err_line]
returned [Some value]. *)

0 comments on commit fd816cf

Please sign in to comment.