Skip to content

Commit

Permalink
Make error message styling consistent
Browse files Browse the repository at this point in the history
The purpose of this PR is to achieve consistent error message styling. Styling follows the following conventions:

1. Error strings should not be capitalized (unless beginning with an exported name, a proper noun or an acronym) and should not end with punctuation. 

2. Error message is separated from the main message string using `;` separator.

3. Use of `%w` in place of `%v` to make it available to `errors.Is` and `errors.As` when supporting custom errors in future.
  • Loading branch information
rohit-arora-dev authored Feb 16, 2024
1 parent 2293a03 commit e7dad0c
Show file tree
Hide file tree
Showing 17 changed files with 355 additions and 164 deletions.
36 changes: 20 additions & 16 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,31 +15,35 @@ vendor/
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

## Directory-based project format:
.idea/
# if you decide to remove above rule then ignore the following:

# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
#.idea/**/workspace.xml
#.idea/**/tasks.xml
#.idea/**/usage.statistics.xml
#.idea/**/dictionaries
#.idea/**/shelf

# AWS User-specific
.idea/**/aws.xml
#.idea/**/aws.xml

# Generated files
.idea/**/contentModel.xml
#.idea/**/contentModel.xml

# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
#.idea/**/dataSources/
#.idea/**/dataSources.ids
#.idea/**/dataSources.local.xml
#.idea/**/sqlDataSources.xml
#.idea/**/dynamic.xml
#.idea/**/uiDesigner.xml
#.idea/**/dbnavigator.xml

# Gradle
.idea/**/gradle.xml
.idea/**/libraries
#.idea/**/gradle.xml
#.idea/**/libraries

# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
Expand Down
10 changes: 5 additions & 5 deletions internal/pkg/nvmlprovider/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func GetMIGDeviceInfoByID(uuid string) (*MIGDeviceInfo, error) {
ret := nvml.Init()
if ret != nvml.SUCCESS {
err = errors.New(nvml.ErrorString(ret))
logrus.Error("Can not init NVML library")
logrus.Error("Can not init NVML library.")
}
})
if err != nil {
Expand Down Expand Up @@ -88,22 +88,22 @@ func GetMIGDeviceInfoByID(uuid string) (*MIGDeviceInfo, error) {

tokens := strings.SplitN(uuid, "-", 2)
if len(tokens) != 2 || tokens[0] != "MIG" {
return nil, fmt.Errorf("Unable to parse UUID as MIG device")
return nil, fmt.Errorf("unable to parse UUID '%s' as MIG device", uuid)
}

tokens = strings.SplitN(tokens[1], "/", 3)
if len(tokens) != 3 || !strings.HasPrefix(tokens[0], "GPU-") {
return nil, fmt.Errorf("Unable to parse UUID as MIG device")
return nil, fmt.Errorf("unable to parse UUID '%s' as MIG device", uuid)
}

gi, err := strconv.Atoi(tokens[1])
if err != nil {
return nil, fmt.Errorf("Unable to parse UUID as MIG device")
return nil, fmt.Errorf("unable to parse UUID '%s' as MIG device", uuid)
}

ci, err := strconv.Atoi(tokens[2])
if err != nil {
return nil, fmt.Errorf("Unable to parse UUID as MIG device")
return nil, fmt.Errorf("unable to parse UUID '%s' as MIG device", uuid)
}

return &MIGDeviceInfo{
Expand Down
11 changes: 6 additions & 5 deletions pkg/cmd/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ restart:
defer func() {
if r := recover(); r != nil {
logrus.WithField(dcgmexporter.LoggerStackTrace, string(debug.Stack())).Error("Encountered a failure.")
err = fmt.Errorf("Encountered a failure: %v", r)
err = fmt.Errorf("encountered a failure; err: %v", r)
}
}()

Expand Down Expand Up @@ -361,14 +361,15 @@ func parseDeviceOptions(devices string) (dcgmexporter.DeviceOptions, error) {
letterAndRange := strings.Split(devices, ":")
count := len(letterAndRange)
if count > 2 {
return dOpt, fmt.Errorf("Invalid ranged device option '%s': there can only be one specified range", devices)
return dOpt, fmt.Errorf("invalid ranged device option '%s'; err: there can only be one specified range",
devices)
}

letter := letterAndRange[0]
if letter == FlexKey {
dOpt.Flex = true
if count > 1 {
return dOpt, fmt.Errorf("No range can be specified with the flex option 'f'")
return dOpt, fmt.Errorf("no range can be specified with the flex option 'f'")
}
} else if letter == MajorKey || letter == MinorKey {
var indices []int
Expand All @@ -381,7 +382,7 @@ func parseDeviceOptions(devices string) (dcgmexporter.DeviceOptions, error) {
rangeTokens := strings.Split(numberOrRange, "-")
rangeTokenCount := len(rangeTokens)
if rangeTokenCount > 2 {
return dOpt, fmt.Errorf("A range can only be '<number>-<number>', but found '%s'", numberOrRange)
return dOpt, fmt.Errorf("range can only be '<number>-<number>', but found '%s'", numberOrRange)
} else if rangeTokenCount == 1 {
number, err := strconv.Atoi(rangeTokens[0])
if err != nil {
Expand Down Expand Up @@ -412,7 +413,7 @@ func parseDeviceOptions(devices string) (dcgmexporter.DeviceOptions, error) {
dOpt.MinorRange = indices
}
} else {
return dOpt, fmt.Errorf("The only valid options preceding ':<range>' are 'g' or 'i', but found '%s'", letter)
return dOpt, fmt.Errorf("valid options preceding ':<range>' are 'g' or 'i', but found '%s'", letter)
}

return dOpt, nil
Expand Down
2 changes: 1 addition & 1 deletion pkg/dcgmexporter/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ func (d DCGMExporterMetric) String() string {
func IdentifyMetricType(s string) (DCGMExporterMetric, error) {
mv, ok := DCGMFields[s]
if !ok {
return mv, fmt.Errorf("Unknown DCGMExporterMetric field '%s'", s)
return mv, fmt.Errorf("unknown DCGMExporterMetric field '%s'", s)
}
return mv, nil
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/dcgmexporter/const_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ func TestIdentifyMetricType(t *testing.T) {
assert.NoError(t, err, "Expected metrics to be found.")
assert.Equal(t, output, tt.output, "Invalid output")
} else {
assert.Errorf(t, err, "Expected metrics to be not found")
assert.Errorf(t, err, "Expected metrics to be not found.")
}
})
}
Expand Down
8 changes: 5 additions & 3 deletions pkg/dcgmexporter/dcgm.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ func NewGroup() (dcgm.GroupHandle, func(), error) {
return group, func() {
err := dcgm.DestroyGroup(group)
if err != nil {
logrus.WithError(err).Warn("Cannot destroy field group")
logrus.WithError(err).Warn("Cannot destroy field group.")
}
}, nil
}
Expand Down Expand Up @@ -65,12 +65,14 @@ func NewFieldGroup(deviceFields []dcgm.Short) (dcgm.FieldHandle, func(), error)
return fieldGroup, func() {
err := dcgm.FieldGroupDestroy(fieldGroup)
if err != nil {
logrus.WithError(err).Warn("Cannot destroy field group")
logrus.WithError(err).Warn("Cannot destroy field group.")
}
}, nil
}

func WatchFieldGroup(group dcgm.GroupHandle, field dcgm.FieldHandle, updateFreq int64, maxKeepAge float64, maxKeepSamples int32) error {
func WatchFieldGroup(
group dcgm.GroupHandle, field dcgm.FieldHandle, updateFreq int64, maxKeepAge float64, maxKeepSamples int32,
) error {
err := dcgm.WatchFieldsWithGroupEx(field, group, updateFreq, maxKeepAge, maxKeepSamples)
if err != nil {
return err
Expand Down
22 changes: 14 additions & 8 deletions pkg/dcgmexporter/gpu_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,12 @@ import (

type DCGMCollectorConstructor func([]Counter, *Config, string, dcgm.Field_Entity_Group) (*DCGMCollector, func(), error)

func NewDCGMCollector(c []Counter, config *Config, hostname string, entityType dcgm.Field_Entity_Group) (*DCGMCollector, func(), error) {
func NewDCGMCollector(c []Counter, config *Config, hostname string, entityType dcgm.Field_Entity_Group) (*DCGMCollector,
func(), error) {
var deviceFields = NewDeviceFields(c, entityType)

if !ShouldMonitorDeviceType(deviceFields, entityType) {
return nil, func() {}, fmt.Errorf("No fields to watch for device type: %d", entityType)
return nil, func() {}, fmt.Errorf("no fields to watch for device type '%d'", entityType)
}

sysInfo, err := GetSystemInfo(config, entityType)
Expand Down Expand Up @@ -59,7 +60,8 @@ func NewDCGMCollector(c []Counter, config *Config, hostname string, entityType d
}

func GetSystemInfo(config *Config, entityType dcgm.Field_Entity_Group) (*SystemInfo, error) {
sysInfo, err := InitializeSystemInfo(config.GPUDevices, config.SwitchDevices, config.CPUDevices, config.UseFakeGPUs, entityType)
sysInfo, err := InitializeSystemInfo(config.GPUDevices, config.SwitchDevices, config.CPUDevices, config.UseFakeGPUs,
entityType)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -150,11 +152,13 @@ func FindCounterField(c []Counter, fieldId uint) (Counter, error) {
}
}

return c[0], fmt.Errorf("Could not find corresponding counter")
return c[0], fmt.Errorf("could not find counter corresponding to field ID '%d'", fieldId)
}

func ToSwitchMetric(metrics MetricsByCounter,
values []dcgm.FieldValue_v1, c []Counter, mi MonitoringInfo, useOld bool, hostname string) {
func ToSwitchMetric(
metrics MetricsByCounter,
values []dcgm.FieldValue_v1, c []Counter, mi MonitoringInfo, useOld bool, hostname string,
) {
labels := map[string]string{}

for _, val := range values {
Expand Down Expand Up @@ -196,8 +200,10 @@ func ToSwitchMetric(metrics MetricsByCounter,
}
}

func ToCPUMetric(metrics MetricsByCounter,
values []dcgm.FieldValue_v1, c []Counter, mi MonitoringInfo, useOld bool, hostname string) {
func ToCPUMetric(
metrics MetricsByCounter,
values []dcgm.FieldValue_v1, c []Counter, mi MonitoringInfo, useOld bool, hostname string,
) {
var labels = map[string]string{}

for _, val := range values {
Expand Down
14 changes: 9 additions & 5 deletions pkg/dcgmexporter/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,12 @@ import (
"strings"
"time"

"github.com/NVIDIA/dcgm-exporter/internal/pkg/nvmlprovider"
"github.com/sirupsen/logrus"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1alpha1"

"github.com/NVIDIA/dcgm-exporter/internal/pkg/nvmlprovider"
)

var (
Expand Down Expand Up @@ -114,7 +115,7 @@ func connectToServer(socket string) (*grpc.ClientConn, func(), error) {
)

if err != nil {
return nil, func() {}, fmt.Errorf("failure connecting to %s: %v", socket, err)
return nil, func() {}, fmt.Errorf("failure connecting to '%s'; err: %w", socket, err)
}

return conn, func() { conn.Close() }, nil
Expand All @@ -128,13 +129,15 @@ func (p *PodMapper) listPods(conn *grpc.ClientConn) (*podresourcesapi.ListPodRes

resp, err := client.List(ctx, &podresourcesapi.ListPodResourcesRequest{})
if err != nil {
return nil, fmt.Errorf("failure getting pod resources %v", err)
return nil, fmt.Errorf("failure getting pod resources; err: %w", err)
}

return resp, nil
}

func (p *PodMapper) toDeviceToPod(devicePods *podresourcesapi.ListPodResourcesResponse, sysInfo SystemInfo) map[string]PodInfo {
func (p *PodMapper) toDeviceToPod(
devicePods *podresourcesapi.ListPodResourcesResponse, sysInfo SystemInfo,
) map[string]PodInfo {
deviceToPodMap := make(map[string]PodInfo)

for _, pod := range devicePods.GetPodResources() {
Expand All @@ -159,7 +162,8 @@ func (p *PodMapper) toDeviceToPod(devicePods *podresourcesapi.ListPodResourcesRe
if strings.HasPrefix(deviceID, MIG_UUID_PREFIX) {
migDevice, err := nvmlGetMIGDeviceInfoByIDHook(deviceID)
if err == nil {
giIdentifier := GetGPUInstanceIdentifier(sysInfo, migDevice.ParentUUID, uint(migDevice.GPUInstanceID))
giIdentifier := GetGPUInstanceIdentifier(sysInfo, migDevice.ParentUUID,
uint(migDevice.GPUInstanceID))
deviceToPodMap[giIdentifier] = podInfo
}
gpuUUID := deviceID[len(MIG_UUID_PREFIX):]
Expand Down
11 changes: 7 additions & 4 deletions pkg/dcgmexporter/kubernetes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,15 @@ import (
"testing"
"time"

"github.com/NVIDIA/dcgm-exporter/internal/pkg/nvmlprovider"
"github.com/NVIDIA/dcgm-exporter/internal/pkg/testutils"
"github.com/NVIDIA/go-dcgm/pkg/dcgm"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"google.golang.org/grpc"
podresourcesapi "k8s.io/kubernetes/pkg/kubelet/apis/podresources/v1alpha1"
"k8s.io/kubernetes/pkg/kubelet/util"

"github.com/NVIDIA/dcgm-exporter/internal/pkg/nvmlprovider"
"github.com/NVIDIA/dcgm-exporter/internal/pkg/testutils"
)

var tmpDir string
Expand Down Expand Up @@ -111,7 +112,7 @@ func StartMockServer(t *testing.T, server *grpc.Server, socket string) func() {
case <-stopped:
return
case <-time.After(1 * time.Second):
t.Fatal("Failed waiting for gRPC server to stop")
t.Fatal("Failed waiting for gRPC server to stop.")
}
}
}
Expand All @@ -138,7 +139,9 @@ func NewPodResourcesMockServer(used []string) *PodResourcesMockServer {
}
}

func (s *PodResourcesMockServer) List(ctx context.Context, req *podresourcesapi.ListPodResourcesRequest) (*podresourcesapi.ListPodResourcesResponse, error) {
func (s *PodResourcesMockServer) List(
ctx context.Context, req *podresourcesapi.ListPodResourcesRequest,
) (*podresourcesapi.ListPodResourcesResponse, error) {
podResources := make([]*podresourcesapi.PodResources, len(s.gpus))

for i, gpu := range s.gpus {
Expand Down
Loading

0 comments on commit e7dad0c

Please sign in to comment.