diff --git a/.vscode/launch.json b/.vscode/launch.json index 386c1169..c84ff638 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -10,7 +10,7 @@ "request": "launch", "mode": "test", "program": "${workspaceFolder}/tests/e2e", - "args": ["-kubeconfig","~/.kube/config", "-chart","./../../deployment/","-image-repository","nvidia/dcgm-exporter","-arguments","{-f=/etc/dcgm-exporter/dcp-metrics-included.csv,--enable-dcgm-log=true,--dcgm-log-level=ERROR}"], + "args": ["-test.v","--ginkgo.v","-kubeconfig","~/.kube/config", "-chart","./../../deployment/","-image-repository","nvidia/dcgm-exporter","-arguments","{-f=/etc/dcgm-exporter/default-counters.csv,--enable-dcgm-log=true,--dcgm-log-level=ERROR}"], "env": {}, "buildFlags": "-tags=e2e" }, diff --git a/cmd/dcgm-exporter/main.go b/cmd/dcgm-exporter/main.go index de7401aa..2dedaae1 100644 --- a/cmd/dcgm-exporter/main.go +++ b/cmd/dcgm-exporter/main.go @@ -21,8 +21,9 @@ import ( "github.com/sirupsen/logrus" - "github.com/NVIDIA/dcgm-exporter/pkg/cmd" _ "go.uber.org/automaxprocs" + + "github.com/NVIDIA/dcgm-exporter/pkg/cmd" ) var ( diff --git a/go.mod b/go.mod index c4fd3a03..0bacee30 100644 --- a/go.mod +++ b/go.mod @@ -11,6 +11,8 @@ require ( github.com/google/uuid v1.5.0 github.com/gorilla/mux v1.8.1 github.com/mittwald/go-helm-client v0.12.8 + github.com/onsi/ginkgo/v2 v2.15.0 + github.com/onsi/gomega v1.32.0 github.com/prometheus/client_model v0.6.0 github.com/prometheus/common v0.47.0 github.com/prometheus/exporter-toolkit v0.11.0 @@ -69,6 +71,7 @@ require ( github.com/go-openapi/jsonpointer v0.20.2 // indirect github.com/go-openapi/jsonreference v0.20.4 // indirect github.com/go-openapi/swag v0.22.7 // indirect + github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect github.com/gobwas/glob v0.2.3 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/protobuf v1.5.3 // indirect @@ -76,6 +79,7 @@ require ( github.com/google/gnostic-models v0.6.8 // indirect github.com/google/go-cmp v0.6.0 // indirect github.com/google/gofuzz v1.2.0 // indirect + github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 // indirect github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect github.com/gorilla/websocket v1.5.1 // indirect github.com/gosuri/uitable v0.0.4 // indirect @@ -144,6 +148,7 @@ require ( golang.org/x/term v0.16.0 // indirect golang.org/x/text v0.14.0 // indirect golang.org/x/time v0.5.0 // indirect + golang.org/x/tools v0.16.1 // indirect google.golang.org/appengine v1.6.8 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240102182953-50ed04b92917 // indirect google.golang.org/protobuf v1.33.0 // indirect diff --git a/go.sum b/go.sum index 1718b884..92d476fa 100644 --- a/go.sum +++ b/go.sum @@ -55,6 +55,9 @@ github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/chai2010/gettext-go v1.0.2 h1:1Lwwip6Q2QGsAdl/ZKPCwTe9fe0CjlUbqj5bFNSjIRk= github.com/chai2010/gettext-go v1.0.2/go.mod h1:y+wnP2cHYaVj19NZhYKAwEMH2CI1gNHeQQ+5AjwawxA= +github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= +github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= +github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM= github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw= github.com/containerd/containerd v1.7.11 h1:lfGKw3eU35sjV0aG2eYZTiwFEY1pCzxdzicHP3SZILw= @@ -195,6 +198,7 @@ github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uG github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= github.com/huandu/xstrings v1.4.0 h1:D17IlohoQq4UcpqD7fDk80P7l+lwAmlFaBHgOipl2FU= github.com/huandu/xstrings v1.4.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= +github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= @@ -281,10 +285,10 @@ github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+ github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus= github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= -github.com/onsi/ginkgo/v2 v2.13.0 h1:0jY9lJquiL8fcf3M4LAXN5aMlS/b2BV86HFFPCPMgE4= -github.com/onsi/ginkgo/v2 v2.13.0/go.mod h1:TE309ZR8s5FsKKpuB1YAQYBzCaAfUgatB/xlT/ETL/o= -github.com/onsi/gomega v1.29.0 h1:KIA/t2t5UBzoirT4H9tsML45GEbo3ouUnBHsCfD2tVg= -github.com/onsi/gomega v1.29.0/go.mod h1:9sxs+SwGrKI0+PWe4Fxa9tFQQBG5xSsSbMXOI8PPpoQ= +github.com/onsi/ginkgo/v2 v2.15.0 h1:79HwNRBAZHOEwrczrgSOPy+eFTTlIGELKy5as+ClttY= +github.com/onsi/ginkgo/v2 v2.15.0/go.mod h1:HlxMHtYF57y6Dpf+mc5529KKmSq9h2FpCF+/ZkwUxKM= +github.com/onsi/gomega v1.32.0 h1:JRYU78fJ1LPxlckP6Txi/EYqJvjtMrDC04/MM5XRHPk= +github.com/onsi/gomega v1.32.0/go.mod h1:a4x4gW6Pz2yK1MAmvluYme5lvYTn61afQ2ETw/8n4Lg= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.1.0-rc5 h1:Ygwkfw9bpDvs+c9E34SdgGOj41dX/cbdlwvlWt0pnFI= @@ -439,6 +443,7 @@ golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190801041406-cbf593c0f2f3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/tests/e2e/Makefile b/tests/e2e/Makefile index 0d5349fd..0d252c6a 100644 --- a/tests/e2e/Makefile +++ b/tests/e2e/Makefile @@ -19,31 +19,31 @@ IMAGE_REPOSITORY ?= "nvcr.io/nvidia/k8s/dcgm-exporter" IMAGE_TAG ?= "3.3.5-3.4.0-ubuntu22.04" KUBECONFIG ?= "~/.kube/config" -.PHONY: e2e-test -e2e-test: +define TEST_CMD @if [ -z ${KUBECONFIG} ]; then \ echo "[ERR] KUBECONFIG is missing, must be set"; \ exit 1; \ fi - $(GO_CMD) test -failfast --tags=e2e -v . -args \ + $(GO_CMD) test --tags=e2e -v . \ + -args \ + --ginkgo.v \ + --ginkgo.no-color \ -kubeconfig=$(KUBECONFIG) \ -chart="$(CHART)" \ -namespace=$(NAMESPACE) \ -image-repository=$(IMAGE_REPOSITORY) \ - -image-tag=$(IMAGE_TAG) + -image-tag=$(IMAGE_TAG) +endef + +.PHONY: e2e-test +e2e-test: + @$(TEST_CMD) .PHONY: e2e-test-no-profiling e2e-test-no-profiling: - @if [ -z ${KUBECONFIG} ]; then \ - echo "[ERR] KUBECONFIG is missing, must be set"; \ - exit 1; \ - fi - $(GO_CMD) test -failfast --tags=e2e -v . -args \ - -kubeconfig=$(KUBECONFIG) \ - -chart="$(CHART)" \ - -namespace=$(NAMESPACE) \ - -image-repository=$(IMAGE_REPOSITORY) \ - -image-tag=$(IMAGE_TAG) \ + @$(TEST_CMD) \ -arguments="{-f=/etc/dcgm-exporter/default-counters.csv}" +binary: + go test -c --tags="e2e" . \ No newline at end of file diff --git a/tests/e2e/e2e_suite_test.go b/tests/e2e/e2e_suite_test.go index 9db91dc2..a27e1def 100644 --- a/tests/e2e/e2e_suite_test.go +++ b/tests/e2e/e2e_suite_test.go @@ -20,25 +20,20 @@ package e2e import ( "bytes" "context" - "flag" "fmt" "os" "slices" - "testing" - "time" - "github.com/prometheus/common/expfmt" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "github.com/stretchr/testify/suite" corev1 "k8s.io/api/core/v1" - clientset "k8s.io/client-go/kubernetes" - restclient "k8s.io/client-go/rest" + "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/clientcmd" "k8s.io/utils/ptr" "github.com/NVIDIA/dcgm-exporter/tests/e2e/internal/framework" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/prometheus/common/expfmt" ) const ( @@ -58,224 +53,285 @@ const ( var expectedLabels = []string{podLabel, namespaceLabel, containerLabel} -type suiteConfig struct { - namespace string +type testContextType struct { kubeconfig string chart string imageRepository string imageTag string arguments string + namespace string } -type Suite struct { - suite.Suite - suiteConfig - ctx context.Context - k8SClient *framework.KubeClient - k8sConfig *restclient.Config - helmClient *framework.HelmClient - helmReleaseName string - setupDone bool - labels map[string]string - workloadPod *corev1.Pod - dcgmExpPod *corev1.Pod -} +var _ = Describe("dcgm-exporter-e2e-suite", func() { + When("DCGM exporter is deployed on kubernetes", Ordered, func() { + // Init global suite vars + var ( + kubeClient *framework.KubeClient + helmClient *framework.HelmClient + labels map[string]string + helmReleaseName string + dcgmExpPod *corev1.Pod + workloadPod *corev1.Pod + ) -func NewSuite() *Suite { - if suiteCfg.kubeconfig == "" { - log.Warning("kubeconfig parameter is empty. Defaulting to ~/.kube/config") - } - - if len(suiteCfg.chart) == 0 { - log.Error("chart parameter is empty") - flag.Usage() - os.Exit(1) - } - - var err error - - log.Info(suiteCfg.kubeconfig) - - suiteCfg.kubeconfig, err = framework.ResolvePath(suiteCfg.kubeconfig) - if err != nil { - log.Fatal(err) - } - - if _, err := os.Stat(suiteCfg.kubeconfig); os.IsNotExist(err) { - log.Fatalf("kubeconfig file does not exist: %s", suiteCfg.kubeconfig) - } - - // Init out-of-cluster K8S client - k8sConfig, err := clientcmd.BuildConfigFromFlags("", suiteCfg.kubeconfig) - if err != nil { - log.Fatalf("unable to load kubeconfig from %s; err: %s", suiteCfg.kubeconfig, err) - } - - k8sClientset, err := clientset.NewForConfig(k8sConfig) - if err != nil { - log.Fatalf("cannot create k8s client: %s", err) - } - - helmClient, err := framework.NewHelmClient( - framework.HelmWithNamespace(suiteCfg.namespace), - framework.HelmWithKubeConfig(k8sConfig), - framework.HelmWithChart(suiteCfg.chart), - ) - if err != nil { - log.Fatalf("cannot create helm client: %s", err) - } - - return &Suite{ - ctx: context.Background(), - k8SClient: framework.NewKubeClient(k8sClientset), - k8sConfig: k8sConfig, - helmClient: helmClient, - suiteConfig: suiteCfg, - } -} + BeforeAll(func(ctx context.Context) { + if testContext.kubeconfig == "" { + _, _ = fmt.Fprintln(GinkgoWriter, "kubeconfig parameter is empty. Defaulting to ~/.kube/config") + } -func (s *Suite) SetupSuite() { - defer func() { - // This is only run if SetupSuite fails or panics, otherwise TearDownSuite will call it - if !s.setupDone { - s.TearDownSuite() - } - }() - s.T().Log("Starting E2E test setup...") - - s.labels = map[string]string{ - "e2eRunID": runID.String(), - } - - s.T().Logf("Creating namespace: %s...", s.namespace) - - _, err := s.k8SClient.CreateNamespace(s.ctx, s.namespace, s.labels) - s.Require().NoError(err, "Failed to create namespace") - - s.T().Logf("Namespace: %q created", s.namespace) - - s.T().Logf("Installing the helm chart: %s", s.chart) - - values := []string{ - fmt.Sprintf("serviceMonitor.enabled=%v", false), - } - - if s.arguments != "" { - values = append(values, fmt.Sprintf("arguments=%s", s.arguments)) - } - - if s.imageRepository != "" { - values = append(values, fmt.Sprintf("image.repository=%s", s.imageRepository)) - } - if s.imageTag != "" { - values = append(values, fmt.Sprintf("image.tag=%s", s.imageTag)) - } - - s.helmReleaseName, err = s.helmClient.Install(s.ctx, values, framework.HelmChartOptions{ - CleanupOnFail: true, - GenerateName: true, - Timeout: 5 * time.Minute, - Wait: true, - DryRun: false, - }) + if len(testContext.chart) == 0 { + Fail("chart parameter is empty") + } - s.Require().NoError(err, "Cannot install helm chart: %s", s.chart) + var err error - s.T().Logf("Helm chart: %q installed", s.chart) + testContext.kubeconfig, err = framework.ResolvePath(testContext.kubeconfig) + Expect(err).ShouldNot(HaveOccurred(), + "cannot resolve path to kubeconfig: %s, err: %v", testContext.kubeconfig, err) - s.T().Log("Test setup ready.") + if _, err := os.Stat(testContext.kubeconfig); os.IsNotExist(err) { + Fail(fmt.Sprintf("kubeconfig file does not exist: %s", testContext.kubeconfig)) + } - s.setupDone = true -} + // Init out-of-cluster K8S client + k8sConfig, err := clientcmd.BuildConfigFromFlags("", testContext.kubeconfig) + Expect(err).ShouldNot(HaveOccurred(), "unable to load kubeconfig from %s; err: %s", testContext.kubeconfig, err) + + k8sClientset, err := kubernetes.NewForConfig(k8sConfig) + Expect(err).ShouldNot(HaveOccurred(), "cannot create k8s client: %s", err) + + kubeClient = framework.NewKubeClient(k8sClientset) + + labels = map[string]string{ + "e2eRunID": runID.String(), + } + + _, _ = fmt.Fprintf(GinkgoWriter, "Creating namespace: %q started.\n", testContext.namespace) + + _, err = kubeClient.CreateNamespace(ctx, testContext.namespace, labels) + Expect(err).ShouldNot(HaveOccurred(), "Creating namespace: failed") + + _, _ = fmt.Fprintf(GinkgoWriter, "Creating namespace: %q completed\n", testContext.namespace) + + helmClient, err = framework.NewHelmClient( + framework.HelmWithNamespace(testContext.namespace), + framework.HelmWithKubeConfig(k8sConfig), + framework.HelmWithChart(testContext.chart), + ) + Expect(err).ShouldNot(HaveOccurred(), "Creating namespace: %q failed\n", testContext.namespace) + }) + + AfterAll(func(ctx context.Context) { + _, _ = fmt.Fprintln(GinkgoWriter, "Clean up: starting") + + if helmClient != nil { + if helmReleaseName != "" { + _, _ = fmt.Fprintf(GinkgoWriter, "Helm chart uninstall: release %q of the helm chart: %q started.\n", + helmReleaseName, + testContext.chart) + + err := helmClient.Uninstall(helmReleaseName) + if err != nil { + Fail(fmt.Sprintf("Helm chart uninstall: release: %s uninstall failed with error: %v", helmReleaseName, err)) + } else { + _, _ = fmt.Fprintf(GinkgoWriter, "Helm chart uninstall: release %q of the helm chart: %q completed.\n", + helmReleaseName, + testContext.chart) + } + } -func (s *Suite) TearDownSuite() { + err := helmClient.Cleanup() + if err != nil { + Fail(fmt.Sprintf("Helm Client: clean up failed: %v", err)) + } + } + + _, _ = fmt.Fprintf(GinkgoWriter, "Namespace deletion: %q namespace started.\n", testContext.namespace) + + if kubeClient != nil { + err := kubeClient.DeleteNamespace(ctx, testContext.namespace) + if err != nil { + Fail(fmt.Sprintf("Namespace deletion: Failed to delete namespace %q with error: %v", testContext.namespace, err)) + } else { + _, _ = fmt.Fprintf(GinkgoWriter, "Namespace deletion: %q namespace completed.\n", testContext.namespace) + } + } - var err error + _, _ = fmt.Fprintln(GinkgoWriter, "Clean up: completed") + }) - s.T().Log("Starting tear down E2E test setup...") + It("should install dcgm-exporter helm chart", func(ctx context.Context) { - if s.workloadPod != nil { + _, _ = fmt.Fprintf(GinkgoWriter, "Helm chart installation: %q chart started.\n", + testContext.chart) - err = s.k8SClient.DeletePod(s.ctx, s.namespace, s.workloadPod.Name) - s.Assert().NoErrorf(err, "Failed to delete pod: %s", s.workloadPod.Name) - if err == nil { - s.T().Logf("Workload pod: %s is deleted.", s.workloadPod.Name) - } - } + values := []string{ + fmt.Sprintf("serviceMonitor.enabled=%v", false), + } - s.T().Logf("Starting uninstall of the helm chart: %s...", s.chart) + if testContext.arguments != "" { + values = append(values, fmt.Sprintf("arguments=%s", testContext.arguments)) + } - err = s.helmClient.Uninstall(s.helmReleaseName) - s.Assert().NoErrorf(err, "Failed to uninstall release: %s with error: %v", s.helmReleaseName, err) - if err == nil { - s.T().Logf("The helm chart: %s is uninstalled", s.chart) - } + if testContext.imageRepository != "" { + values = append(values, fmt.Sprintf("image.repository=%s", testContext.imageRepository)) + } + if testContext.imageTag != "" { + values = append(values, fmt.Sprintf("image.tag=%s", testContext.imageTag)) + } - err = s.helmClient.Cleanup() - if err != nil { - s.T().Logf("Failed to clean up directories used by helm client: %v", err) - } + var err error - err = s.k8SClient.DeleteNamespace(s.ctx, s.namespace) - s.Assert().NoErrorf(err, "Failed to delete namespace %q with error: %v", s.namespace, err) - if err == nil { - s.T().Logf("Namespace: %q deleted", s.namespace) - } + helmReleaseName, err = helmClient.Install(ctx, values, framework.HelmChartOptions{ + CleanupOnFail: true, + GenerateName: true, + Timeout: 5 * time.Minute, + Wait: true, + DryRun: false, + }) + Expect(err).ShouldNot(HaveOccurred(), "Helm chart installation: %q chart failed with error err: %v", testContext.kubeconfig, err) - s.T().Log("Test setup teardown completed.") -} + _, _ = fmt.Fprintf(GinkgoWriter, "Helm chart installation: %q completed.\n", + testContext.chart) + _, _ = fmt.Fprintf(GinkgoWriter, "Helm chart installation: new %q release name.\n", + helmReleaseName) + }) -func (s *Suite) TestDCGMExporter() { - s.DCGMExporterPrechecks() + labelMap := map[string]string{dcgmExporterPodNameLabel: dcgmExporterPodNameLabelValue} - s.Run("Create workload pod", func() { + It("should create dcgm-exporter pod", func(ctx context.Context) { + _, _ = fmt.Fprintln(GinkgoWriter, "Pod creation verification: started") - s.T().Log("Creating a workload pod...") + Eventually(func(ctx context.Context) bool { + pods, err := kubeClient.GetPodsByLabel(ctx, testContext.namespace, labelMap) + if err != nil { + Fail(fmt.Sprintf("Pod creation: Failed with error: %v", err)) + return false + } - var err error + return len(pods) == 1 + }).WithPolling(time.Second).Within(15 * time.Minute).WithContext(ctx).Should(BeTrue()) - s.workloadPod, err = s.k8SClient.CreatePod(s.ctx, - s.namespace, - s.labels, - workloadPodName, - workloadContainerName, - workloadImage, - ) + _, _ = fmt.Fprintln(GinkgoWriter, "Pod creation verification: completed") + }) - s.Require().NoError(err, "Cannot create workload pod") + It("should read pod", func(ctx context.Context) { + _, _ = fmt.Fprintln(GinkgoWriter, "Read dcgm-exporter pod: started") + pods, err := kubeClient.GetPodsByLabel(ctx, testContext.namespace, labelMap) + if err != nil { + Fail(fmt.Sprintf("Pod creation: Failed with error: %v", err)) + } - s.Require().Eventuallyf(func() bool { - isReady, err := s.k8SClient.CheckPodCondition(s.ctx, s.namespace, s.workloadPod.Name, corev1.PodScheduled) - s.Require().NoErrorf(err, "Cannot get pod status: %v", err) - return isReady - }, 15*time.Minute, time.Second, "Failed to create pod: %s", s.workloadPod.Name) + Expect(pods).Should(HaveLen(1)) + + dcgmExpPod = &pods[0] + + _, _ = fmt.Fprintln(GinkgoWriter, "Read dcgm-exporter pod: completed") + }) + + It("should ensure that the dcgm-exporter pod is ready", func(ctx context.Context) { + _, _ = fmt.Fprintln(GinkgoWriter, "Checking pod status: started") + Eventually(func(ctx context.Context) bool { + isReady, err := kubeClient.CheckPodStatus(ctx, + testContext.namespace, + dcgmExpPod.Name, + func(namespace, podName string, status corev1.PodStatus) (bool, error) { + for _, c := range status.Conditions { + if c.Type != corev1.PodReady { + continue + } + if c.Status == corev1.ConditionTrue { + return true, nil + } + } - s.T().Log("The workload was created.") - }) + for _, c := range status.ContainerStatuses { + if c.State.Waiting != nil && c.State.Waiting.Reason == "CrashLoopBackOff" { + return false, fmt.Errorf("pod %s in namespace %s is in CrashLoopBackOff", podName, namespace) + } + } + + return false, nil + }) + if err != nil { + Fail(fmt.Sprintf("Checking pod status: Failed with error: %v", err)) + } + + return isReady + }).WithPolling(time.Second).Within(15 * time.Minute).WithContext(ctx).Should(BeTrue()) + _, _ = fmt.Fprintln(GinkgoWriter, "Checking pod status: completed") + }) + + It("should create a workload pod", func(ctx context.Context) { + _, _ = fmt.Fprintln(GinkgoWriter, "Workload pod creation: started") + + var err error + + workloadPod, err = kubeClient.CreatePod(ctx, + testContext.namespace, + labels, + workloadPodName, + workloadContainerName, + workloadImage, + ) + + Expect(err).ShouldNot(HaveOccurred(), + "Workload pod creation: Failed create workload pod with err: %v", err) + Eventually(func(ctx context.Context) bool { + isReady, err := kubeClient.CheckPodStatus(ctx, + testContext.namespace, + workloadPod.Name, func(namespace, podName string, status corev1.PodStatus) (bool, error) { + return status.Phase == corev1.PodSucceeded, nil + }) + if err != nil { + Fail(fmt.Sprintf("Workload pod creation: Checking pod status: Failed with error: %v", err)) + } + + return isReady + }).WithPolling(time.Second).Within(15 * time.Minute).WithContext(ctx).Should(BeTrue()) + + _, _ = fmt.Fprintln(GinkgoWriter, "Workload pod creation: completed") + }) + + It("should wait for 30 seconds, to read metrics", func() { + time.Sleep(30 * time.Second) + }) + + var metricsResponse []byte + + It("should read metrics", func(ctx context.Context) { + _, _ = fmt.Fprintln(GinkgoWriter, "Read metrics: started") + + Eventually(func(ctx context.Context) bool { + var err error + + metricsResponse, err = kubeClient.DoHttpRequest(ctx, + testContext.namespace, + dcgmExpPod.Name, + dcgmExporterPort, + "metrics") + if err != nil { + Fail(fmt.Sprintf("Read metrics: Failed with error: %v", err)) + } + + return len(metricsResponse) > 0 + }).WithPolling(time.Second).Within(time.Minute).WithContext(ctx).Should(BeTrue()) + _, _ = fmt.Fprintln(GinkgoWriter, "Read metrics: completed") + }) + + It("should verify metrics", func(ctx context.Context) { - s.T().Run("Verify metrics", func(t *testing.T) { - if s.T().Failed() { - s.T().Skip("Test skipped, because previous step failed") - } - require.EventuallyWithT(t, func(c *assert.CollectT) { - metrics, err := s.k8SClient.DoHttpRequest(s.ctx, - s.namespace, - s.dcgmExpPod.Name, - dcgmExporterPort, - "metrics") - assert.NoError(c, err, "Failed to make request to the /metrics endpoint") - assert.NotEmpty(c, metrics) + Expect(metricsResponse).ShouldNot(BeEmpty()) var parser expfmt.TextParser - metricFamilies, err := parser.TextToMetricFamilies(bytes.NewReader(metrics)) - require.NoError(c, err) - assert.Greater(c, len(metricFamilies), 0) + metricFamilies, err := parser.TextToMetricFamilies(bytes.NewReader(metricsResponse)) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(metricFamilies)).Should(BeNumerically(">", 0)) for _, metricFamily := range metricFamilies { - assert.NotNil(c, metricFamily) - + Expect(metricFamily).ShouldNot(BeNil()) metrics := metricFamily.GetMetric() - assert.NotNil(c, metrics) + Expect(metrics).ShouldNot(BeNil()) // Each metric must have namespace, pod and container labels for _, metric := range metrics { @@ -284,64 +340,18 @@ func (s *Suite) TestDCGMExporter() { labelName := ptr.Deref(label.Name, "") if slices.Contains(expectedLabels, labelName) { actualLabels = append(actualLabels, labelName) - assert.NotNil(c, label.Value) - assert.NotEmptyf(c, - ptr.Deref(label.Value, ""), - "The %s metric contains a label named %q label with empty value.", + Expect(label.Value).ShouldNot(BeNil()) + Expect(ptr.Deref(label.Value, "")).ShouldNot(BeEmpty(), "The %s metric contains a label named %q label with empty value.", ptr.Deref(metricFamily.Name, ""), labelName, ) } } - assert.Equalf(c, len(expectedLabels), len(actualLabels), + Expect(len(actualLabels)).Should(Equal(len(expectedLabels)), "Metric %s doesn't contains expected labels: %v, actual labels: %v", ptr.Deref(metricFamily.Name, ""), expectedLabels, metric.Label) } } - }, time.Minute, time.Second) + }) }) -} - -func (s *Suite) DCGMExporterPrechecks() { - s.Run("Checking pre-requisite: dcgm-exporter is up and running", - func() { - s.T().Log("Checking the dcgm-exporter pod....") - s.T().Log("It can take up to the 15 minutes.") - labelMap := map[string]string{dcgmExporterPodNameLabel: dcgmExporterPodNameLabelValue} - - var pod *corev1.Pod - - s.Require().Eventuallyf(func() bool { - pods, err := s.k8SClient.GetPodsByLabel(s.ctx, s.namespace, labelMap) - if err != nil { - log.Warnf("Error retrieving pods: %v", err) - return false - } - - s.Require().Lenf(pods, 1, "Expected a one pod only") - pod = &pods[0] - - return true - }, 15*time.Minute, time.Second, "The pod was not created") - - s.Require().NotNil(pod, "Nil value is not expected after pod created") - - var errs error - s.Require().Eventuallyf(func() bool { - isReady, err := s.k8SClient.CheckPodCondition(s.ctx, s.namespace, pod.Name, corev1.PodReady) - if err != nil { - errs = err - return true - } - - s.dcgmExpPod = pod - - return isReady - }, time.Minute, time.Second, "The %s pod is not running", pod.Name) - - s.Require().NoError(errs) - - s.T().Log("The dcgm-exporter pod is running") - }, - ) -} +}) diff --git a/tests/e2e/internal/framework/helm.go b/tests/e2e/internal/framework/helm.go index 8612606e..727771f1 100644 --- a/tests/e2e/internal/framework/helm.go +++ b/tests/e2e/internal/framework/helm.go @@ -64,6 +64,9 @@ func NewHelmClient(opts ...HelmClientOption) (*HelmClient, error) { Namespace: client.namespace, RepositoryConfig: client.repositoryConfig, RepositoryCache: client.repositoryCache, + DebugLog: func(format string, v ...interface{}) { + // suppress helm chart client debug log + }, }, RestConfig: client.k8sRestConfig, } diff --git a/tests/e2e/internal/framework/kube.go b/tests/e2e/internal/framework/kube.go index f1182cfb..17283bc4 100644 --- a/tests/e2e/internal/framework/kube.go +++ b/tests/e2e/internal/framework/kube.go @@ -75,27 +75,16 @@ func (c *KubeClient) GetPodsByLabel(ctx context.Context, namespace string, label return podList.Items, nil } -func (c *KubeClient) CheckPodCondition(ctx context.Context, +func (c *KubeClient) CheckPodStatus(ctx context.Context, namespace, podName string, - podConditionType corev1.PodConditionType) (bool, error) { + condition func(namespace, podName string, status corev1.PodStatus) (bool, error)) (bool, error) { pod, err := c.client.CoreV1().Pods(namespace).Get(ctx, podName, metav1.GetOptions{}) if err != nil { return false, fmt.Errorf("unexpected error getting pod %s; err: %w", podName, err) } - for _, c := range pod.Status.Conditions { - if c.Type != podConditionType { - continue - } - if c.Status == corev1.ConditionTrue { - return true, nil - } - } - - for _, c := range pod.Status.ContainerStatuses { - if c.State.Waiting != nil && c.State.Waiting.Reason == "CrashLoopBackOff" { - return false, fmt.Errorf("pod %s in namespace %s is in CrashLoopBackOff", pod.Name, pod.Namespace) - } + if condition != nil { + return condition(namespace, podName, pod.Status) } return false, nil @@ -118,6 +107,7 @@ func (c *KubeClient) CreatePod(ctx context.Context, Labels: labels, }, Spec: corev1.PodSpec{ + RestartPolicy: corev1.RestartPolicyNever, Containers: []corev1.Container{ { Name: containerName, diff --git a/tests/e2e/main_test.go b/tests/e2e/main_test.go index 289a0bf7..e0850d63 100644 --- a/tests/e2e/main_test.go +++ b/tests/e2e/main_test.go @@ -22,60 +22,64 @@ import ( "os" "testing" - "github.com/sirupsen/logrus" - "github.com/stretchr/testify/suite" - "github.com/google/uuid" + "github.com/onsi/ginkgo/v2" + "github.com/onsi/ginkgo/v2/types" + "github.com/onsi/gomega" ) var runID = uuid.New() -var log *logrus.Entry - -var suiteCfg = suiteConfig{} +var testContext = testContextType{} func TestMain(m *testing.M) { - // Create a new logger instance - logrus.SetOutput(os.Stdout) - logrus.SetFormatter(&logrus.TextFormatter{}) - logrus.SetLevel(logrus.InfoLevel) - log = logrus.WithField("e2eRunID", runID) - - flag.StringVar(&suiteCfg.kubeconfig, + flag.StringVar(&testContext.kubeconfig, "kubeconfig", "~/.kube/config", "path to the kubeconfig file.") - flag.StringVar(&suiteCfg.namespace, + flag.StringVar(&testContext.namespace, "namespace", "dcgm-exporter", "Namespace name to use for the DCGM-exporter deployment") - flag.StringVar(&suiteCfg.chart, + flag.StringVar(&testContext.chart, "chart", "", "Helm chart to use") - flag.StringVar(&suiteCfg.imageRepository, + flag.StringVar(&testContext.imageRepository, "image-repository", "", "DCGM-exporter image repository") - flag.StringVar(&suiteCfg.imageTag, + flag.StringVar(&testContext.imageTag, "image-tag", "", "DCGM-exporter image tag to use") - flag.StringVar(&suiteCfg.arguments, + flag.StringVar(&testContext.arguments, "arguments", "", - "DCGM-exporter command line") + `DCGM-exporter command line arguments. Example: -arguments="{-f=/etc/dcgm-exporter/default-counters.csv}"`) flag.Parse() + os.Exit(m.Run()) } -// TestRunSuite will be run by the 'go test' command -func TestRunSuite(t *testing.T) { - suite.Run(t, NewSuite()) +func createGinkgoConfig() (types.SuiteConfig, types.ReporterConfig) { + // fetch the current config + suiteConfig, reporterConfig := ginkgo.GinkgoConfiguration() + // Randomize specs as well as suites + suiteConfig.RandomizeAllSpecs = true + return suiteConfig, reporterConfig +} + +func TestE2E(t *testing.T) { + gomega.RegisterFailHandler(ginkgo.Fail) + + // Run tests through the Ginkgo runner with output to console + JUnit for Jenkins + suiteConfig, reporterConfig := createGinkgoConfig() + ginkgo.RunSpecs(t, "DCGM-exporter e2e suite", suiteConfig, reporterConfig) }