Skip to content

Commit

Permalink
Broken unit tests were fixed. (#217)
Browse files Browse the repository at this point in the history
Signed-off-by: Vadym Fedorov <[email protected]>
  • Loading branch information
nvvfedorov authored Dec 8, 2023
1 parent c1bd6ac commit 512b16e
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 7 deletions.
32 changes: 32 additions & 0 deletions pkg/dcgmexporter/gpu_collector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,38 @@ func testDCGMCollector(t *testing.T, counters []Counter) (*DCGMCollector, func()
UseOldNamespace: false,
UseFakeGpus: false,
}

dcgmGetAllDeviceCount = func() (uint, error) {
return 1, nil
}

dcgmGetDeviceInfo = func(gpuId uint) (dcgm.Device, error) {
dev := dcgm.Device{
GPU: 0,
UUID: fmt.Sprintf("fake%d", gpuId),
}

return dev, nil
}

dcgmGetGpuInstanceHierarchy = func() (dcgm.MigHierarchy_v2, error) {
hierarchy := dcgm.MigHierarchy_v2{
Count: 0,
}
return hierarchy, nil
}

dcgmAddEntityToGroup = func(groupId dcgm.GroupHandle, entityGroupId dcgm.Field_Entity_Group, entityId uint) (err error) {
return nil
}

defer func() {
dcgmGetAllDeviceCount = dcgm.GetAllDeviceCount
dcgmGetDeviceInfo = dcgm.GetDeviceInfo
dcgmGetGpuInstanceHierarchy = dcgm.GetGpuInstanceHierarchy
dcgmAddEntityToGroup = dcgm.AddEntityToGroup
}()

c, cleanup, err := NewDCGMCollector(counters, &cfg, dcgm.FE_GPU)
require.NoError(t, err)

Expand Down
22 changes: 15 additions & 7 deletions pkg/dcgmexporter/system_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ import (

const PARENT_ID_IGNORED = 0

var (
dcgmGetAllDeviceCount = dcgm.GetAllDeviceCount
dcgmGetDeviceInfo = dcgm.GetDeviceInfo
dcgmGetGpuInstanceHierarchy = dcgm.GetGpuInstanceHierarchy
dcgmAddEntityToGroup = dcgm.AddEntityToGroup
dcgmCreateGroup = dcgm.CreateGroup
)

type GroupInfo struct {
groupHandle dcgm.GroupHandle
groupType dcgm.Field_Entity_Group
Expand Down Expand Up @@ -248,7 +256,7 @@ func InitializeNvSwitchInfo(sysInfo SystemInfo, sOpt DeviceOptions) (SystemInfo,
}

func InitializeGpuInfo(sysInfo SystemInfo, gOpt DeviceOptions, useFakeGpus bool) (SystemInfo, error) {
gpuCount, err := dcgm.GetAllDeviceCount()
gpuCount, err := dcgmGetAllDeviceCount()
if err != nil {
return sysInfo, err
}
Expand All @@ -257,7 +265,7 @@ func InitializeGpuInfo(sysInfo SystemInfo, gOpt DeviceOptions, useFakeGpus bool)
for i := uint(0); i < sysInfo.GpuCount; i++ {
// Default mig enabled to false
sysInfo.Gpus[i].MigEnabled = false
sysInfo.Gpus[i].DeviceInfo, err = dcgm.GetDeviceInfo(i)
sysInfo.Gpus[i].DeviceInfo, err = dcgmGetDeviceInfo(i)
if err != nil {
if useFakeGpus {
sysInfo.Gpus[i].DeviceInfo.GPU = i
Expand All @@ -268,7 +276,7 @@ func InitializeGpuInfo(sysInfo SystemInfo, gOpt DeviceOptions, useFakeGpus bool)
}
}

hierarchy, err := dcgm.GetGpuInstanceHierarchy()
hierarchy, err := dcgmGetGpuInstanceHierarchy()
if err != nil {
return sysInfo, err
}
Expand Down Expand Up @@ -309,7 +317,7 @@ func InitializeGpuInfo(sysInfo SystemInfo, gOpt DeviceOptions, useFakeGpus bool)
sysInfo.gOpt = gOpt
err = VerifyDevicePresence(&sysInfo, gOpt)

return sysInfo, nil
return sysInfo, err
}

func InitializeSystemInfo(gOpt DeviceOptions, sOpt DeviceOptions, useFakeGpus bool, entityType dcgm.Field_Entity_Group) (SystemInfo, error) {
Expand Down Expand Up @@ -341,7 +349,7 @@ func CreateLinkGroupsFromSystemInfo(sysInfo SystemInfo) ([]dcgm.GroupHandle, []f
continue
}

groupId, err := dcgm.CreateGroup(fmt.Sprintf("gpu-collector-group-%d", rand.Uint64()))
groupId, err := dcgmCreateGroup(fmt.Sprintf("gpu-collector-group-%d", rand.Uint64()))
if err != nil {
return nil, cleanups, err
}
Expand Down Expand Up @@ -372,13 +380,13 @@ func CreateLinkGroupsFromSystemInfo(sysInfo SystemInfo) ([]dcgm.GroupHandle, []f

func CreateGroupFromSystemInfo(sysInfo SystemInfo) (dcgm.GroupHandle, func(), error) {
monitoringInfo := GetMonitoredEntities(sysInfo)
groupId, err := dcgm.CreateGroup(fmt.Sprintf("gpu-collector-group-%d", rand.Uint64()))
groupId, err := dcgmCreateGroup(fmt.Sprintf("gpu-collector-group-%d", rand.Uint64()))
if err != nil {
return dcgm.GroupHandle{}, func() {}, err
}

for _, mi := range monitoringInfo {
err := dcgm.AddEntityToGroup(groupId, mi.Entity.EntityGroupId, mi.Entity.EntityId)
err := dcgmAddEntityToGroup(groupId, mi.Entity.EntityGroupId, mi.Entity.EntityId)
if err != nil {
return groupId, func() { dcgm.DestroyGroup(groupId) }, err
}
Expand Down

0 comments on commit 512b16e

Please sign in to comment.