Skip to content

Commit

Permalink
Review feedback
Browse files Browse the repository at this point in the history
Signed-off-by: Douglas Wightman <[email protected]>
  • Loading branch information
glowkey committed Dec 19, 2023
1 parent 493d2fa commit f67ffa9
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 42 deletions.
8 changes: 4 additions & 4 deletions pkg/cmd/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ var (
CLIRemoteHEInfo = "remote-hostengine-info"
CLIGPUDevices = "devices"
CLISwitchDevices = "switch-devices"
CLICpuDevices = "cpu-devices"
CLICPUDevices = "cpu-devices"
CLINoHostname = "no-hostname"
CLIUseFakeGpus = "fake-gpus"
CLIConfigMapData = "configmap-data"
Expand Down Expand Up @@ -114,7 +114,7 @@ func NewApp(buildVersion ...string) *cli.App {
EnvVars: []string{"DCGM_EXPORTER_USE_OLD_NAMESPACE"},
},
&cli.StringFlag{
Name: CLICpuDevices,
Name: CLICPUDevices,
Aliases: []string{"p"},
Value: FlexKey,
Usage: DeviceUsageStr,
Expand Down Expand Up @@ -352,7 +352,7 @@ func contextToConfig(c *cli.Context) (*dcgmexporter.Config, error) {
return nil, err
}

cOpt, err := parseDeviceOptions(c.String(CLICpuDevices))
cOpt, err := parseDeviceOptions(c.String(CLICPUDevices))
if err != nil {
return nil, err
}
Expand All @@ -369,7 +369,7 @@ func contextToConfig(c *cli.Context) (*dcgmexporter.Config, error) {
RemoteHEInfo: c.String(CLIRemoteHEInfo),
GPUDevices: gOpt,
SwitchDevices: sOpt,
CpuDevices: cOpt,
CPUDevices: cOpt,
NoHostname: c.Bool(CLINoHostname),
UseFakeGpus: c.Bool(CLIUseFakeGpus),
ConfigMapData: c.String(CLIConfigMapData),
Expand Down
6 changes: 3 additions & 3 deletions pkg/dcgmexporter/gpu_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import (
)

func NewDCGMCollector(c []Counter, config *Config, entityType dcgm.Field_Entity_Group) (*DCGMCollector, func(), error) {
sysInfo, err := InitializeSystemInfo(config.GPUDevices, config.SwitchDevices, config.CpuDevices, config.UseFakeGpus, entityType)
sysInfo, err := InitializeSystemInfo(config.GPUDevices, config.SwitchDevices, config.CPUDevices, config.UseFakeGpus, entityType)
if err != nil {
return nil, func() {}, err
}
Expand Down Expand Up @@ -100,7 +100,7 @@ func (c *DCGMCollector) GetMetrics() ([][]Metric, error) {
if c.SysInfo.InfoType == dcgm.FE_SWITCH || c.SysInfo.InfoType == dcgm.FE_LINK {
metrics[i] = ToSwitchMetric(vals, c.Counters, mi, c.UseOldNamespace, c.Hostname)
} else if c.SysInfo.InfoType == dcgm.FE_CPU || c.SysInfo.InfoType == dcgm.FE_CPU_CORE {
metrics[i] = ToCpuMetric(vals, c.Counters, mi, c.UseOldNamespace, c.Hostname)
metrics[i] = ToCPUMetric(vals, c.Counters, mi, c.UseOldNamespace, c.Hostname)
} else {
metrics[i] = ToMetric(vals, c.Counters, mi.DeviceInfo, mi.InstanceInfo, c.UseOldNamespace, c.Hostname)
}
Expand Down Expand Up @@ -163,7 +163,7 @@ func ToSwitchMetric(values []dcgm.FieldValue_v1, c []Counter, mi MonitoringInfo,
return metrics
}

func ToCpuMetric(values []dcgm.FieldValue_v1, c []Counter, mi MonitoringInfo, useOld bool, hostname string) []Metric {
func ToCPUMetric(values []dcgm.FieldValue_v1, c []Counter, mi MonitoringInfo, useOld bool, hostname string) []Metric {
var metrics []Metric
var labels = map[string]string{}

Expand Down
18 changes: 9 additions & 9 deletions pkg/dcgmexporter/gpu_collector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ var expectedMetrics = map[string]bool{
"DCGM_FI_DEV_VGPU_LICENSE_STATUS": true,
}

var expectedCpuMetrics = map[string]bool{
var expectedCPUMetrics = map[string]bool{
"DCGM_FI_DEV_CPU_UTIL_TOTAL": true,
}

Expand All @@ -57,7 +57,7 @@ func TestDCGMCollector(t *testing.T) {
_, cleanup = testDCGMGPUCollector(t, sampleCounters)
cleanup()

_, cleanup = testDCGMCpuCollector(t, sampleCounters)
_, cleanup = testDCGMCPUCollector(t, sampleCounters)
cleanup()
}

Expand Down Expand Up @@ -95,14 +95,14 @@ func testDCGMGPUCollector(t *testing.T, counters []Counter) (*DCGMCollector, fun
}

dcgmGetCpuHierarchy = func() (dcgm.CpuHierarchy_v1, error) {
Cpu := dcgm.CpuHierarchyCpu_v1{
CPU := dcgm.CpuHierarchyCpu_v1{
CpuId: 0,
OwnedCores: []uint64{0},
}
hierarchy := dcgm.CpuHierarchy_v1{
Version: 0,
NumCpus: 1,
Cpus: [dcgm.MAX_NUM_CPUS]dcgm.CpuHierarchyCpu_v1{Cpu},
Cpus: [dcgm.MAX_NUM_CPUS]dcgm.CpuHierarchyCpu_v1{CPU},
}

return hierarchy, nil
Expand Down Expand Up @@ -146,10 +146,10 @@ func testDCGMGPUCollector(t *testing.T, counters []Counter) (*DCGMCollector, fun
return g, cleanup
}

func testDCGMCpuCollector(t *testing.T, counters []Counter) (*DCGMCollector, func()) {
func testDCGMCPUCollector(t *testing.T, counters []Counter) (*DCGMCollector, func()) {
dOpt := DeviceOptions{true, []int{-1}, []int{-1}}
cfg := Config{
CpuDevices: dOpt,
CPUDevices: dOpt,
NoHostname: false,
UseOldNamespace: false,
UseFakeGpus: false,
Expand Down Expand Up @@ -181,14 +181,14 @@ func testDCGMCpuCollector(t *testing.T, counters []Counter) (*DCGMCollector, fun
}

dcgmGetCpuHierarchy = func() (dcgm.CpuHierarchy_v1, error) {
Cpu := dcgm.CpuHierarchyCpu_v1{
CPU := dcgm.CpuHierarchyCpu_v1{
CpuId: 0,
OwnedCores: []uint64{0, 1},
}
hierarchy := dcgm.CpuHierarchy_v1{
Version: 0,
NumCpus: 1,
Cpus: [dcgm.MAX_NUM_CPUS]dcgm.CpuHierarchyCpu_v1{Cpu},
Cpus: [dcgm.MAX_NUM_CPUS]dcgm.CpuHierarchyCpu_v1{CPU},
}

return hierarchy, nil
Expand Down Expand Up @@ -218,7 +218,7 @@ func testDCGMCpuCollector(t *testing.T, counters []Counter) (*DCGMCollector, fun
require.NotEmpty(t, metric.Value)
require.NotEqual(t, metric.Value, FailedToConvert)
}
require.Equal(t, seenMetrics, expectedCpuMetrics)
require.Equal(t, seenMetrics, expectedCPUMetrics)
}

return c, cleanup
Expand Down
7 changes: 6 additions & 1 deletion pkg/dcgmexporter/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ import (
"k8s.io/client-go/rest"
)

const (
CPU_FIELDS_START = 1100
DCP_FIELDS_START = 1000
)

func ExtractCounters(c *Config) ([]Counter, error) {
var err error
var records [][]string
Expand Down Expand Up @@ -140,7 +145,7 @@ func extractCounters(records [][]string, c *Config) ([]Counter, error) {
}

func fieldIsSupported(fieldID uint, c *Config) bool {
if fieldID < 1000 || fieldID > 1099 {
if fieldID < DCP_FIELDS_START || fieldID >= CPU_FIELDS_START {
return true
}

Expand Down
48 changes: 24 additions & 24 deletions pkg/dcgmexporter/system_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ type SwitchInfo struct {
NvLinks []dcgm.NvLinkStatus
}

type CpuInfo struct {
type CPUInfo struct {
EntityId uint
Cores []uint
}
Expand All @@ -78,7 +78,7 @@ type SystemInfo struct {
cOpt DeviceOptions
InfoType dcgm.Field_Entity_Group
Switches []SwitchInfo
Cpus []CpuInfo
CPUs []CPUInfo
}

type MonitoringInfo struct {
Expand Down Expand Up @@ -155,8 +155,8 @@ func SwitchIdExists(sysInfo *SystemInfo, switchId int) bool {
return false
}

func CpuIdExists(sysInfo *SystemInfo, cpuId int) bool {
for _, cpu := range sysInfo.Cpus {
func CPUIdExists(sysInfo *SystemInfo, cpuId int) bool {
for _, cpu := range sysInfo.CPUs {
if cpu.EntityId == uint(cpuId) {
return true
}
Expand Down Expand Up @@ -186,8 +186,8 @@ func LinkIdExists(sysInfo *SystemInfo, linkId int) bool {
return false
}

func CpuCoreIdExists(sysInfo *SystemInfo, coreId int) bool {
for _, cpu := range sysInfo.Cpus {
func CPUCoreIdExists(sysInfo *SystemInfo, coreId int) bool {
for _, cpu := range sysInfo.CPUs {
for _, core := range cpu.Cores {
if core == uint(coreId) {
return true
Expand All @@ -197,7 +197,7 @@ func CpuCoreIdExists(sysInfo *SystemInfo, coreId int) bool {
return false
}

func VerifyCpuDevicePresence(sysInfo *SystemInfo, sOpt DeviceOptions) error {
func VerifyCPUDevicePresence(sysInfo *SystemInfo, sOpt DeviceOptions) error {
if sOpt.Flex {
return nil
}
Expand All @@ -213,7 +213,7 @@ func VerifyCpuDevicePresence(sysInfo *SystemInfo, sOpt DeviceOptions) error {

if len(sOpt.MinorRange) > 0 && sOpt.MinorRange[0] != -1 {
for _, coreId := range sOpt.MinorRange {
if !CpuCoreIdExists(sysInfo, coreId) {
if !CPUCoreIdExists(sysInfo, coreId) {
return fmt.Errorf("couldn't find requested cpu core %d", coreId)
}
}
Expand Down Expand Up @@ -308,16 +308,16 @@ func InitializeCPUInfo(sysInfo SystemInfo, sOpt DeviceOptions) (SystemInfo, erro
for i := 0; i < int(hierarchy.NumCpus); i++ {
cores := getCoreArray([]uint64(hierarchy.Cpus[i].OwnedCores))

cpu := CpuInfo{
cpu := CPUInfo{
hierarchy.Cpus[i].CpuId,
cores,
}

sysInfo.Cpus = append(sysInfo.Cpus, cpu)
sysInfo.CPUs = append(sysInfo.CPUs, cpu)
}

sysInfo.cOpt = sOpt
err = VerifyCpuDevicePresence(&sysInfo, sOpt)
err = VerifyCPUDevicePresence(&sysInfo, sOpt)

return sysInfo, nil
}
Expand Down Expand Up @@ -454,8 +454,8 @@ func CreateCoreGroupsFromSystemInfo(sysInfo SystemInfo) ([]dcgm.GroupHandle, []f
var cleanups []func()

/* Create per-switch link groups */
for _, cpu := range sysInfo.Cpus {
if !IsCpuWatched(cpu.EntityId, sysInfo) {
for _, cpu := range sysInfo.CPUs {
if !IsCPUWatched(cpu.EntityId, sysInfo) {
continue
}

Expand Down Expand Up @@ -660,7 +660,7 @@ func IsLinkWatched(linkId uint, switchId uint, sysInfo SystemInfo) bool {
return false
}

func IsCpuWatched(cpuId uint, sysInfo SystemInfo) bool {
func IsCPUWatched(cpuId uint, sysInfo SystemInfo) bool {
if sysInfo.cOpt.Flex {
return true
}
Expand All @@ -683,8 +683,8 @@ func IsCoreWatched(coreId uint, cpuId uint, sysInfo SystemInfo) bool {
return true
}

for _, cpu := range sysInfo.Cpus {
if !IsCpuWatched(cpu.EntityId, sysInfo) {
for _, cpu := range sysInfo.CPUs {
if !IsCPUWatched(cpu.EntityId, sysInfo) {
return false
}

Expand All @@ -703,11 +703,11 @@ func IsCoreWatched(coreId uint, cpuId uint, sysInfo SystemInfo) bool {
return false
}

func AddAllCpus(sysInfo SystemInfo) []MonitoringInfo {
func AddAllCPUs(sysInfo SystemInfo) []MonitoringInfo {
var monitoring []MonitoringInfo

for _, cpu := range sysInfo.Cpus {
if !IsCpuWatched(cpu.EntityId, sysInfo) {
for _, cpu := range sysInfo.CPUs {
if !IsCPUWatched(cpu.EntityId, sysInfo) {
continue
}

Expand All @@ -728,12 +728,12 @@ func AddAllCpus(sysInfo SystemInfo) []MonitoringInfo {
return monitoring
}

func AddAllCpuCores(sysInfo SystemInfo) []MonitoringInfo {
func AddAllCPUCores(sysInfo SystemInfo) []MonitoringInfo {
var monitoring []MonitoringInfo

for _, cpu := range sysInfo.Cpus {
for _, cpu := range sysInfo.CPUs {
for _, core := range cpu.Cores {
if !IsCpuWatched(cpu.EntityId, sysInfo) {
if !IsCPUWatched(cpu.EntityId, sysInfo) {
continue
}

Expand Down Expand Up @@ -827,9 +827,9 @@ func GetMonitoredEntities(sysInfo SystemInfo) []MonitoringInfo {
} else if sysInfo.InfoType == dcgm.FE_LINK {
monitoring = AddAllLinks(sysInfo)
} else if sysInfo.InfoType == dcgm.FE_CPU {
monitoring = AddAllCpus(sysInfo)
monitoring = AddAllCPUs(sysInfo)
} else if sysInfo.InfoType == dcgm.FE_CPU_CORE {
monitoring = AddAllCpuCores(sysInfo)
monitoring = AddAllCPUCores(sysInfo)
} else if sysInfo.gOpt.Flex == true {
monitoring = AddAllGpuInstances(sysInfo, true)
} else {
Expand Down
2 changes: 1 addition & 1 deletion pkg/dcgmexporter/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ type Config struct {
RemoteHEInfo string
GPUDevices DeviceOptions
SwitchDevices DeviceOptions
CpuDevices DeviceOptions
CPUDevices DeviceOptions
NoHostname bool
UseFakeGpus bool
ConfigMapData string
Expand Down

0 comments on commit f67ffa9

Please sign in to comment.