From b14e7f8892bb89bb93116e7a26520eac3e0880b4 Mon Sep 17 00:00:00 2001
From: Douglas Wightman <dwightman@nvidia.com>
Date: Tue, 7 Jan 2025 09:41:18 -0700
Subject: [PATCH] DCGM-Exporter 4.0.0

- Update to DCGM 4.0.0
- Major refactor to enable clean mock testing
- Refactor metric collection to align with prometheus best practices
- Many more bug fixes and improvements
---
 .devcontainer/Dockerfile                      |   70 +-
 .github/workflows/go.yml                      |    2 +-
 .gitignore                                    |    6 +-
 .vscode/launch.json                           |   22 +-
 Jenkinsfile                                   |   64 -
 Makefile                                      |   76 +-
 README.md                                     |    5 +-
 cmd/dcgm-exporter/main.go                     |   10 +-
 dcgm-exporter.yaml                            |   12 +-
 deployment/Chart.yaml                         |    4 +-
 deployment/templates/_helpers.tpl             |   20 +
 deployment/templates/daemonset.yaml           |   37 +
 deployment/templates/tls-secret.yaml          |   43 +
 .../templates/web-config-configmap.yaml       |   40 +
 deployment/values.yaml                        |   42 +-
 docker/{Dockerfile.ubi9 => Dockerfile.ubi}    |   77 +-
 docker/Dockerfile.ubuntu                      |   98 +
 docker/Dockerfile.ubuntu22.04                 |   88 -
 etc/default-counters.csv                      |   27 +-
 go.mod                                        |  142 +-
 go.sum                                        |  353 +--
 hack/VERSION                                  |    8 +-
 .../mocks/pkg/collector/mock_collector.go     |   81 +
 .../mocks/pkg/dcgmprovider/mock_client.go     |  507 +++
 .../mocks/pkg/deviceinfo/mock_device_info.go  |  266 ++
 .../pkg/devicewatcher/mock_device_watcher.go  |   87 +
 .../mock_device_watchlist_manager.go          |   85 +
 internal/mocks/pkg/elf/mock_elf.go            |   69 +
 internal/mocks/pkg/exec/mock_cmd.go           |   68 +
 internal/mocks/pkg/exec/mock_exec.go          |   73 +
 .../mocks/pkg/nvmlprovider/mock_client.go     |   81 +
 .../os/{dir_entry.go => mock_dir_entry.go}    |    2 +-
 .../os/{file_info.go => mock_file_info.go}    |    2 +-
 internal/mocks/pkg/os/{os.go => mock_os.go}   |   14 +-
 .../pkg/transformation/mock_transformer.go    |   84 +
 .../pkg/transformations/mock_transformer.go   |   83 +
 internal/pkg/appconfig/const.go               |   26 +
 .../pkg/appconfig/types.go                    |   19 +-
 internal/pkg/collector/base_collector.go      |   98 +
 .../pkg/collector}/clock_events_collector.go  |   49 +-
 .../collector/clock_events_collector_test.go  |  798 +++++
 internal/pkg/collector/collector_factory.go   |  172 ++
 .../pkg/collector/collector_factory_test.go   |  580 ++++
 internal/pkg/collector/const.go               |   24 +
 internal/pkg/collector/expcollector.go        |  138 +
 .../pkg/collector}/gpu_collector.go           |  209 +-
 internal/pkg/collector/gpu_collector_test.go  |  168 +
 .../pkg/collector/gpu_health_collector.go     |  386 +++
 .../collector/gpu_health_collector_test.go    |  364 +++
 internal/pkg/collector/types.go               |   92 +
 .../pkg/collector/variables.go                |    2 +-
 internal/pkg/collector/xid_collector.go       |   81 +
 internal/pkg/collector/xid_collector_test.go  |  534 ++++
 .../pkg/collector}/xid_errors.go              |    2 +-
 internal/pkg/counters/const.go                |   28 +
 .../pkg/counters/counter_config.go            |   50 +-
 .../pkg/counters/counter_config_test.go       |   32 +-
 .../pkg/counters/exporter_counters.go         |   17 +-
 .../pkg/counters/exporter_counters_test.go    |    4 +-
 internal/pkg/counters/types.go                |   50 +
 internal/pkg/counters/variables.go            |   29 +
 internal/pkg/dcgmprovider/dcgm.go             |  256 ++
 internal/pkg/dcgmprovider/types.go            |   59 +
 internal/pkg/deviceinfo/device_info.go        |  597 ++++
 internal/pkg/deviceinfo/device_info_test.go   | 2749 +++++++++++++++++
 internal/pkg/deviceinfo/testutils.go          |  196 ++
 internal/pkg/deviceinfo/types.go              |   72 +
 internal/pkg/devicemonitoring/const.go        |   21 +
 .../pkg/devicemonitoring/device_monitoring.go |  251 ++
 .../device_monitoring_test.go                 | 1610 ++++++++++
 .../pkg/devicemonitoring/types.go             |   28 +-
 internal/pkg/devicewatcher/const.go           |   24 +
 internal/pkg/devicewatcher/device_watcher.go  |  295 ++
 .../pkg/devicewatcher/device_watcher_test.go  | 1951 ++++++++++++
 internal/pkg/devicewatcher/types.go           |   31 +
 internal/pkg/devicewatcher/variables.go       |   21 +
 .../device_watchlist_manager.go               |  151 +
 .../device_watchlist_manager_test.go          |  780 +++++
 internal/pkg/devicewatchlistmanager/types.go  |   30 +
 internal/pkg/elf/README.md                    |    3 +
 internal/pkg/elf/elf.go                       |   29 +
 internal/pkg/elf/types.go                     |   24 +
 internal/pkg/exec/README.md                   |    3 +
 internal/pkg/exec/exec.go                     |   48 +
 internal/pkg/hostname/hostname.go             |   57 +
 internal/pkg/hostname/hostname_test.go        |  130 +
 .../pkg/integration_test/collector_test.go    | 1117 +++++++
 .../integration_test/transformation_test.go   |  105 +
 internal/pkg/logging/const.go                 |   28 +
 internal/pkg/logging/logger_adapter.go        |   76 -
 internal/pkg/logging/logger_adapter_test.go   |  114 -
 internal/pkg/nvmlprovider/provider.go         |  165 +-
 internal/pkg/nvmlprovider/provider_test.go    |  101 +-
 internal/pkg/nvmlprovider/types.go            |   24 +
 internal/pkg/os/os.go                         |    9 +-
 internal/pkg/prerequisites/dcgmlib_rule.go    |   86 +
 .../pkg/prerequisites/dcgmlib_rule_test.go    |  210 ++
 internal/pkg/prerequisites/types.go           |   21 +
 internal/pkg/prerequisites/validation.go      |   32 +
 internal/pkg/prerequisites/validation_test.go |   99 +
 internal/pkg/prerequisites/variables.go       |   42 +
 internal/pkg/registry/registry.go             |  120 +
 internal/pkg/registry/registry_test.go        |  141 +
 internal/pkg/registry/types.go                |   26 +
 internal/pkg/rendermetrics/render_metrics.go  |  155 +
 .../pkg/rendermetrics/render_metrics_test.go  |  132 +
 internal/pkg/server/server.go                 |  195 ++
 internal/pkg/server/server_test.go            |  277 ++
 internal/pkg/server/types.go                  |   42 +
 {pkg => internal/pkg}/stdout/capture.go       |    9 +-
 {pkg => internal/pkg}/stdout/capture_test.go  |   32 +-
 .../pkg}/stdout/capture_test_wrapper.go       |   36 +-
 .../pkg}/stdout/stdoutprocessor.go            |    0
 internal/pkg/testutils/const.go               |   49 +
 internal/pkg/testutils/test_utils.go          |  313 ++
 internal/pkg/testutils/testutils.go           |   61 -
 internal/pkg/testutils/types.go               |   22 +
 internal/pkg/testutils/variables.go           |  145 +
 internal/pkg/transformation/const.go          |   30 +
 .../pkg/transformation}/hpc.go                |   43 +-
 .../pkg/transformation}/hpc_test.go           |   75 +-
 .../pkg/transformation}/kubernetes.go         |   53 +-
 .../pkg/transformation/kubernetes_test.go     |  217 ++
 internal/pkg/transformation/transformer.go    |   37 +
 .../pkg/transformation/transformer_test.go    |   67 +
 internal/pkg/transformation/types.go          |   40 +
 internal/pkg/transformation/variables.go      |   25 +
 .../pkg/utils}/utils.go                       |   26 +-
 .../pkg/utils}/utils_test.go                  |   75 +-
 .../systemd/nvidia-dcgm-exporter.service      |   33 +
 pkg/cmd/app.go                                |  243 +-
 pkg/cmd/app_test.go                           |   85 +-
 pkg/{dcgmexporter => cmd}/const.go            |   21 +-
 .../clock_events_collector_test.go            |  483 ---
 pkg/dcgmexporter/dcgm.go                      |  131 -
 pkg/dcgmexporter/expcollector.go              |  260 --
 .../field_entity_group_system_info.go         |   95 -
 pkg/dcgmexporter/gpu_collector_test.go        |  486 ---
 pkg/dcgmexporter/kubernetes_test.go           |  337 --
 pkg/dcgmexporter/pipeline.go                  |  377 ---
 pkg/dcgmexporter/pipeline_test.go             |  208 --
 pkg/dcgmexporter/registry.go                  |   92 -
 pkg/dcgmexporter/registry_test.go             |  108 -
 pkg/dcgmexporter/server.go                    |  166 -
 pkg/dcgmexporter/system_info.go               |  890 ------
 pkg/dcgmexporter/system_info_test.go          |  671 ----
 pkg/dcgmexporter/types.go                     |  160 -
 pkg/dcgmexporter/xid_collector.go             |   68 -
 pkg/dcgmexporter/xid_collector_test.go        |  312 --
 scripts/test_coverage.sh                      |   23 +-
 service-monitor.yaml                          |    4 +-
 tests/e2e/Makefile                            |   28 +-
 tests/e2e/e2e_actions_test.go                 |  108 +-
 tests/e2e/e2e_suite_test.go                   |  255 +-
 .../e2e_verify_default_configuration_test.go  |  178 ++
 tests/e2e/e2e_verify_http_basic_auth_test.go  |  134 +
 tests/e2e/e2e_verify_tls_test.go              |  118 +
 tests/e2e/internal/framework/helm.go          |   23 +-
 tests/e2e/internal/framework/kube.go          |  106 +-
 tests/e2e/main_test.go                        |   10 +
 tests/integration/start_read_test.go          |    5 +-
 tests/integration/start_with_tls_test.go      |    8 +-
 .../integration/testdata/default-counters.csv |   77 +
 163 files changed, 20574 insertions(+), 6517 deletions(-)
 delete mode 100644 Jenkinsfile
 create mode 100644 deployment/templates/tls-secret.yaml
 create mode 100644 deployment/templates/web-config-configmap.yaml
 rename docker/{Dockerfile.ubi9 => Dockerfile.ubi} (58%)
 create mode 100644 docker/Dockerfile.ubuntu
 delete mode 100644 docker/Dockerfile.ubuntu22.04
 create mode 100644 internal/mocks/pkg/collector/mock_collector.go
 create mode 100644 internal/mocks/pkg/dcgmprovider/mock_client.go
 create mode 100644 internal/mocks/pkg/deviceinfo/mock_device_info.go
 create mode 100644 internal/mocks/pkg/devicewatcher/mock_device_watcher.go
 create mode 100644 internal/mocks/pkg/devicewatchlistmanager/mock_device_watchlist_manager.go
 create mode 100644 internal/mocks/pkg/elf/mock_elf.go
 create mode 100644 internal/mocks/pkg/exec/mock_cmd.go
 create mode 100644 internal/mocks/pkg/exec/mock_exec.go
 create mode 100644 internal/mocks/pkg/nvmlprovider/mock_client.go
 rename internal/mocks/pkg/os/{dir_entry.go => mock_dir_entry.go} (96%)
 rename internal/mocks/pkg/os/{file_info.go => mock_file_info.go} (96%)
 rename internal/mocks/pkg/os/{os.go => mock_os.go} (93%)
 create mode 100644 internal/mocks/pkg/transformation/mock_transformer.go
 create mode 100644 internal/mocks/pkg/transformations/mock_transformer.go
 create mode 100644 internal/pkg/appconfig/const.go
 rename pkg/dcgmexporter/config.go => internal/pkg/appconfig/types.go (84%)
 create mode 100644 internal/pkg/collector/base_collector.go
 rename {pkg/dcgmexporter => internal/pkg/collector}/clock_events_collector.go (77%)
 create mode 100644 internal/pkg/collector/clock_events_collector_test.go
 create mode 100644 internal/pkg/collector/collector_factory.go
 create mode 100644 internal/pkg/collector/collector_factory_test.go
 create mode 100644 internal/pkg/collector/const.go
 create mode 100644 internal/pkg/collector/expcollector.go
 rename {pkg/dcgmexporter => internal/pkg/collector}/gpu_collector.go (58%)
 create mode 100644 internal/pkg/collector/gpu_collector_test.go
 create mode 100644 internal/pkg/collector/gpu_health_collector.go
 create mode 100644 internal/pkg/collector/gpu_health_collector_test.go
 create mode 100644 internal/pkg/collector/types.go
 rename pkg/dcgmexporter/os.go => internal/pkg/collector/variables.go (97%)
 create mode 100644 internal/pkg/collector/xid_collector.go
 create mode 100644 internal/pkg/collector/xid_collector_test.go
 rename {pkg/dcgmexporter => internal/pkg/collector}/xid_errors.go (99%)
 create mode 100644 internal/pkg/counters/const.go
 rename pkg/dcgmexporter/parser.go => internal/pkg/counters/counter_config.go (74%)
 rename pkg/dcgmexporter/parser_test.go => internal/pkg/counters/counter_config_test.go (81%)
 rename pkg/dcgmexporter/exporter_metrics.go => internal/pkg/counters/exporter_counters.go (82%)
 rename pkg/dcgmexporter/exporter_metrics_test.go => internal/pkg/counters/exporter_counters_test.go (95%)
 create mode 100644 internal/pkg/counters/types.go
 create mode 100644 internal/pkg/counters/variables.go
 create mode 100644 internal/pkg/dcgmprovider/dcgm.go
 create mode 100644 internal/pkg/dcgmprovider/types.go
 create mode 100644 internal/pkg/deviceinfo/device_info.go
 create mode 100644 internal/pkg/deviceinfo/device_info_test.go
 create mode 100644 internal/pkg/deviceinfo/testutils.go
 create mode 100644 internal/pkg/deviceinfo/types.go
 create mode 100644 internal/pkg/devicemonitoring/const.go
 create mode 100644 internal/pkg/devicemonitoring/device_monitoring.go
 create mode 100644 internal/pkg/devicemonitoring/device_monitoring_test.go
 rename pkg/dcgmexporter/test_utils.go => internal/pkg/devicemonitoring/types.go (60%)
 create mode 100644 internal/pkg/devicewatcher/const.go
 create mode 100644 internal/pkg/devicewatcher/device_watcher.go
 create mode 100644 internal/pkg/devicewatcher/device_watcher_test.go
 create mode 100644 internal/pkg/devicewatcher/types.go
 create mode 100644 internal/pkg/devicewatcher/variables.go
 create mode 100644 internal/pkg/devicewatchlistmanager/device_watchlist_manager.go
 create mode 100644 internal/pkg/devicewatchlistmanager/device_watchlist_manager_test.go
 create mode 100644 internal/pkg/devicewatchlistmanager/types.go
 create mode 100644 internal/pkg/elf/README.md
 create mode 100644 internal/pkg/elf/elf.go
 create mode 100644 internal/pkg/elf/types.go
 create mode 100644 internal/pkg/exec/README.md
 create mode 100644 internal/pkg/exec/exec.go
 create mode 100644 internal/pkg/hostname/hostname.go
 create mode 100644 internal/pkg/hostname/hostname_test.go
 create mode 100644 internal/pkg/integration_test/collector_test.go
 create mode 100644 internal/pkg/integration_test/transformation_test.go
 create mode 100644 internal/pkg/logging/const.go
 delete mode 100644 internal/pkg/logging/logger_adapter.go
 delete mode 100644 internal/pkg/logging/logger_adapter_test.go
 create mode 100644 internal/pkg/nvmlprovider/types.go
 create mode 100644 internal/pkg/prerequisites/dcgmlib_rule.go
 create mode 100644 internal/pkg/prerequisites/dcgmlib_rule_test.go
 create mode 100644 internal/pkg/prerequisites/types.go
 create mode 100644 internal/pkg/prerequisites/validation.go
 create mode 100644 internal/pkg/prerequisites/validation_test.go
 create mode 100644 internal/pkg/prerequisites/variables.go
 create mode 100644 internal/pkg/registry/registry.go
 create mode 100644 internal/pkg/registry/registry_test.go
 create mode 100644 internal/pkg/registry/types.go
 create mode 100644 internal/pkg/rendermetrics/render_metrics.go
 create mode 100644 internal/pkg/rendermetrics/render_metrics_test.go
 create mode 100644 internal/pkg/server/server.go
 create mode 100644 internal/pkg/server/server_test.go
 create mode 100644 internal/pkg/server/types.go
 rename {pkg => internal/pkg}/stdout/capture.go (84%)
 rename {pkg => internal/pkg}/stdout/capture_test.go (76%)
 rename {pkg => internal/pkg}/stdout/capture_test_wrapper.go (56%)
 rename {pkg => internal/pkg}/stdout/stdoutprocessor.go (100%)
 create mode 100644 internal/pkg/testutils/const.go
 create mode 100644 internal/pkg/testutils/test_utils.go
 delete mode 100644 internal/pkg/testutils/testutils.go
 create mode 100644 internal/pkg/testutils/types.go
 create mode 100644 internal/pkg/testutils/variables.go
 create mode 100644 internal/pkg/transformation/const.go
 rename {pkg/dcgmexporter => internal/pkg/transformation}/hpc.go (62%)
 rename {pkg/dcgmexporter => internal/pkg/transformation}/hpc_test.go (77%)
 rename {pkg/dcgmexporter => internal/pkg/transformation}/kubernetes.go (75%)
 create mode 100644 internal/pkg/transformation/kubernetes_test.go
 create mode 100644 internal/pkg/transformation/transformer.go
 create mode 100644 internal/pkg/transformation/transformer_test.go
 create mode 100644 internal/pkg/transformation/types.go
 create mode 100644 internal/pkg/transformation/variables.go
 rename {pkg/dcgmexporter => internal/pkg/utils}/utils.go (73%)
 rename {pkg/dcgmexporter => internal/pkg/utils}/utils_test.go (50%)
 create mode 100644 packaging/config-files/systemd/nvidia-dcgm-exporter.service
 rename pkg/{dcgmexporter => cmd}/const.go (73%)
 delete mode 100644 pkg/dcgmexporter/clock_events_collector_test.go
 delete mode 100644 pkg/dcgmexporter/dcgm.go
 delete mode 100644 pkg/dcgmexporter/expcollector.go
 delete mode 100644 pkg/dcgmexporter/field_entity_group_system_info.go
 delete mode 100644 pkg/dcgmexporter/gpu_collector_test.go
 delete mode 100644 pkg/dcgmexporter/kubernetes_test.go
 delete mode 100644 pkg/dcgmexporter/pipeline.go
 delete mode 100644 pkg/dcgmexporter/pipeline_test.go
 delete mode 100644 pkg/dcgmexporter/registry.go
 delete mode 100644 pkg/dcgmexporter/registry_test.go
 delete mode 100644 pkg/dcgmexporter/server.go
 delete mode 100644 pkg/dcgmexporter/system_info.go
 delete mode 100644 pkg/dcgmexporter/system_info_test.go
 delete mode 100644 pkg/dcgmexporter/types.go
 delete mode 100644 pkg/dcgmexporter/xid_collector.go
 delete mode 100644 pkg/dcgmexporter/xid_collector_test.go
 create mode 100644 tests/e2e/e2e_verify_default_configuration_test.go
 create mode 100644 tests/e2e/e2e_verify_http_basic_auth_test.go
 create mode 100644 tests/e2e/e2e_verify_tls_test.go
 create mode 100644 tests/integration/testdata/default-counters.csv

diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index 462bf5f6..e7882b15 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -1,13 +1,12 @@
-FROM nvcr.io/nvidia/cuda:12.3.1-base-ubuntu22.04
-ARG GOLANG_VERSION=1.21.5
+FROM nvcr.io/nvidia/cuda:12.6.3-base-ubuntu22.04
+ARG GOLANG_VERSION=1.22.5
 ARG USERNAME=developer
 ARG USER_UID=1000
 ARG USER_GID=1000
-ARG DCGM_VERSION=3.3.3
 # Create a user 'developer' with UID=1000, add to 'developer' group, and add to 'sudo' group
 RUN groupadd -g $USER_GID $USERNAME && \
-    useradd -m -u $USER_GID -g $USERNAME -s /bin/bash $USERNAME && \
-    usermod -aG sudo $USERNAME
+	useradd -m -u $USER_GID -g $USERNAME -s /bin/bash $USERNAME && \
+	usermod -aG sudo $USERNAME
 # Allow 'developer' to use sudo without a password
 RUN echo "$USERNAME ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
 
@@ -15,17 +14,26 @@ RUN --mount=type=cache,target=/var/cache/apt \
 	set -eux; \
 	apt-get update; \
 	apt-get install -y --no-install-recommends \
-        git \
-		ca-certificates \
-		g++ \
-		gcc \
-		libc6-dev \
-		make \
-		pkg-config \
-        wget \
-		datacenter-gpu-manager=1:${DCGM_VERSION} \
-		libcap2-bin \
-		&& apt-get autoremove -y \
+	git \
+	ca-certificates \
+	g++ \
+	gcc \
+	libc6-dev \
+	make \
+	pkg-config \
+	wget \
+	datacenter-gpu-manager-4-core \
+	libcap2-bin \
+	&& install -m 0755 -d /etc/apt/keyrings \
+	&& wget -O /etc/apt/keyrings/docker.asc https://download.docker.com/linux/ubuntu/gpg \
+	&& chmod a+r /etc/apt/keyrings/docker.asc \
+	&& echo \
+	"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
+	$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
+	tee /etc/apt/sources.list.d/docker.list > /dev/null \
+	&& apt-get update \
+	&& apt-get install -y --no-install-recommends docker-ce docker-ce-cli containerd.io docker-buildx-plugin \
+	&& apt-get autoremove -y \
 	&& rm -rfd /usr/local/dcgm/bindings /usr/local/dcgm/sdk_samples /usr/share/nvidia-validation-suite \
 	# DCGM exporter doesn't use libdcgm_cublas_proxy*.so.
 	&& rm -rf /usr/lib/x86_64-linux-gnu/libdcgm_cublas_proxy*.so \
@@ -36,25 +44,25 @@ RUN set -eux; \
 	url=; \
 	echo "$arch"; \
 	case "$arch" in \
-		'amd64') \
-			url="https://dl.google.com/go/go${GOLANG_VERSION}.linux-amd64.tar.gz"; \
-			;; \
-		'arm64') \
-			url="https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz"; \
-			;; \
-		*) echo >&2 "error: unsupported architecture '$arch' (likely packaging update needed)"; exit 1 ;; \
+	'amd64') \
+	url="https://dl.google.com/go/go${GOLANG_VERSION}.linux-amd64.tar.gz"; \
+	;; \
+	'arm64') \
+	url="https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz"; \
+	;; \
+	*) echo >&2 "error: unsupported architecture '$arch' (likely packaging update needed)"; exit 1 ;; \
 	esac; \
 	build=; \
 	if [ -z "$url" ]; then \
-# https://github.com/golang/go/issues/38536#issuecomment-616897960
-		build=1; \
-		url="https://dl.google.com/go/go${GOLANG_VERSION}.src.tar.gz"; \
-		echo >&2; \
-		echo >&2 "warning: current architecture ($arch) does not have a compatible Go binary release; will be building from source"; \
-		echo >&2; \
+	# https://github.com/golang/go/issues/38536#issuecomment-616897960
+	build=1; \
+	url="https://dl.google.com/go/go${GOLANG_VERSION}.src.tar.gz"; \
+	echo >&2; \
+	echo >&2 "warning: current architecture ($arch) does not have a compatible Go binary release; will be building from source"; \
+	echo >&2; \
 	fi; \
-    wget -O go.tgz "$url" --progress=dot:giga; \
-    tar -C /usr/local -xzf go.tgz; \
+	wget -O go.tgz "$url" --progress=dot:giga; \
+	tar -C /usr/local -xzf go.tgz; \
 	rm go.tgz
 ENV GOTOOLCHAIN=local
 ENV GOPATH /go
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index eb423060..39fb9c5f 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -16,7 +16,7 @@ jobs:
     - name: Set up Go
       uses: actions/setup-go@v2
       with:
-        go-version: 1.21
+        go-version: 1.22
 
     - name: Build
       run: make binary
diff --git a/.gitignore b/.gitignore
index 2b06b62b..6864811c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,7 +11,9 @@ tests.cov
 test_results.json
 .scannerwork
 dist/
-.run/
+.run
+dist/
+
 ###############################################################################
 # JetBrains
 # https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore
@@ -236,4 +238,4 @@ $RECYCLE.BIN/
 *.msp
 
 # Windows shortcuts
-*.lnk
+*.lnk
\ No newline at end of file
diff --git a/.vscode/launch.json b/.vscode/launch.json
index 8d941056..0e7c9609 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -10,12 +10,18 @@
             "request": "launch",
             "mode": "test",
             "program": "${workspaceFolder}/tests/e2e",
-            "args": ["-test.v",
-            "--ginkgo.v",
-            "-kubeconfig","~/.kube/config",
-            "-chart","./../../deployment/",
-            "-image-repository","nvidia/dcgm-exporter",
-            "-arguments","{-f=/etc/dcgm-exporter/default-counters.csv,--enable-dcgm-log=true,--dcgm-log-level=ERROR}"],
+            "args": [
+                "-test.v",
+                "--ginkgo.v",
+                "-kubeconfig",
+                "~/.kube/config",
+                "-chart",
+                "./../../deployment/",
+                "-image-repository",
+                "nvidia/dcgm-exporter",
+                "-arguments",
+                "{-f=/etc/dcgm-exporter/default-counters.csv}"
+            ],
             "env": {},
             "buildFlags": "-tags=e2e"
         },
@@ -30,8 +36,8 @@
                 "-f",
                 "./etc/default-counters.csv",
                 "--debug",
-                "--enable-dcgm-log",
-                "--dcgm-log-level=INFO"
+                "-r",
+                "localhost:5555"
             ]
         }
     ]
diff --git a/Jenkinsfile b/Jenkinsfile
deleted file mode 100644
index c5233875..00000000
--- a/Jenkinsfile
+++ /dev/null
@@ -1,64 +0,0 @@
-@Library(['shared-libs']) _
- 
-pipeline {
-
-    agent {
-        dockerfile {
-            label 'docker'
-            filename 'Dockerfile'
-            args '-v /etc/passwd:/etc/passwd:ro -v /var/run/docker.sock:/var/run/docker.sock:rw'
-        }
-    }
- 
-    options {
-        ansiColor('xterm')
-        timestamps()
-        timeout(time: 1, unit: 'HOURS')
-        gitLabConnection('GitLab Master')
-        buildDiscarder(logRotator(numToKeepStr: '100', artifactNumToKeepStr: '10'))
-    }
- 
-    environment {
-        HOME="${WORKSPACE}"
-        PYTHONUNBUFFERED=1
-    }
- 
-    parameters {
-        string(name: 'REF', defaultValue: '\${gitlabBranch}', description: 'Commit to build')
-    }
- 
-    stages {
-        stage('Prep') {
-            steps {
-                script {
-                    updateGitlabCommitStatus(name: 'Jenkins CI', state: 'running')
-                }
-            }
-        }
-        stage('Compile') {
-            steps {
-                echo "building"
-                sh "make binary"
-            }
-        }
-	stage('Test') {
-            steps {
-                echo "Running tests"
-		// Tests require supported GPU
-                // make test-main
-                sh "make check-format"
-            }
-        }
-    }
-    post {
-        always {
-            script{
-                String status = (currentBuild.currentResult == "SUCCESS") ? "success" : "failed"
-                updateGitlabCommitStatus(name: 'Jenkins CI', state: status)
-            }
-        }
-        cleanup {
-            cleanWs()
-        }
-    }
-}
diff --git a/Makefile b/Makefile
index 98c95003..2d9f5103 100644
--- a/Makefile
+++ b/Makefile
@@ -18,30 +18,29 @@ REGISTRY             ?= nvidia
 GO                   ?= go
 MKDIR                ?= mkdir
 GOLANGCILINT_TIMEOUT ?= 10m
+IMAGE_TAG            ?= ""
 
 DCGM_VERSION   := $(NEW_DCGM_VERSION)
-GOLANG_VERSION := 1.22.5
+GOLANG_VERSION := 1.22.9
 VERSION        := $(NEW_EXPORTER_VERSION)
 FULL_VERSION   := $(DCGM_VERSION)-$(VERSION)
 OUTPUT         := type=oci,dest=/dev/null
 PLATFORMS      := linux/amd64,linux/arm64
-DOCKERCMD      := docker buildx build
+DOCKERCMD      := docker --debug buildx build
 MODULE         := github.com/NVIDIA/dcgm-exporter
 
-
 .PHONY: all binary install check-format local
 all: update-version ubuntu22.04 ubi9
 
-binary: generate update-version
+binary: update-version
 	cd cmd/dcgm-exporter; $(GO) build -ldflags "-X main.BuildVersion=${DCGM_VERSION}-${VERSION}"
 
-test-main:
+test-main: generate
 	$(GO) test ./... -short
 
 install: binary
 	install -m 755 cmd/dcgm-exporter/dcgm-exporter /usr/bin/dcgm-exporter
 	install -m 644 -D ./etc/default-counters.csv /etc/dcgm-exporter/default-counters.csv
-	install -m 644 -D ./etc/dcp-metrics-included.csv /etc/dcgm-exporter/dcp-metrics-included.csv
 
 check-format:
 	test $$(gofmt -l pkg | tee /dev/stderr | wc -l) -eq 0
@@ -58,23 +57,71 @@ else
 	$(MAKE) PLATFORMS=linux/amd64 OUTPUT=type=docker DOCKERCMD='docker build'
 endif
 
-TARGETS = ubuntu22.04 ubi9
+ubi%: DOCKERFILE = docker/Dockerfile.ubi
+ubi%: --docker-build-%
+	@
+ubi9: BASE_IMAGE = nvcr.io/nvidia/cuda:12.6.3-base-ubi9
+ubi9: IMAGE_TAG = ubi9
+
+ubuntu%: DOCKERFILE = docker/Dockerfile.ubuntu
+ubuntu%: --docker-build-%
+	@
+ubuntu22.04: BASE_IMAGE = nvcr.io/nvidia/cuda:12.6.3-base-ubuntu22.04
+ubuntu22.04: IMAGE_TAG = ubuntu22.04
 
-DOCKERFILE.ubuntu22.04 = docker/Dockerfile.ubuntu22.04
-DOCKERFILE.ubi9 = docker/Dockerfile.ubi9
 
-$(TARGETS):
+--docker-build-%:
+	@echo "Building for $@"
+	DOCKER_BUILDKIT=1 \
 	$(DOCKERCMD) --pull \
 		--output $(OUTPUT) \
+		--progress=plain \
 		--platform $(PLATFORMS) \
+		--build-arg BASEIMAGE="$(BASE_IMAGE)" \
 		--build-arg "GOLANG_VERSION=$(GOLANG_VERSION)" \
 		--build-arg "DCGM_VERSION=$(DCGM_VERSION)" \
 		--build-arg "VERSION=$(VERSION)" \
-		--tag "$(REGISTRY)/dcgm-exporter:$(FULL_VERSION)-$@" \
-		--file $(DOCKERFILE.$@) .
+		--tag $(REGISTRY)/dcgm-exporter:$(FULL_VERSION)$(if $(IMAGE_TAG),-$(IMAGE_TAG)) \
+		--file $(DOCKERFILE) .
+
+.PHONY: packages package-arm64 package-amd64
+packages: package-amd64 package-arm64
+
+package-arm64:
+	$(MAKE) package-build PLATFORMS=linux/arm64
+
+package-amd64:
+	$(MAKE) package-build PLATFORMS=linux/amd64
+
+package-build: IMAGE_TAG = ubuntu22.04
+package-build:
+	ARCH=`echo $(PLATFORMS) | cut -d'/' -f2)`; \
+	if [ "$$ARCH" = "amd64" ]; then \
+		ARCH="x86-64"; \
+	fi; \
+	if [ "$$ARCH" = "arm64" ]; then \
+		ARCH="sbsa"; \
+	fi; \
+	export DIST_NAME="dcgm_exporter-linux-$$ARCH-$(VERSION)"; \
+	export COMPONENT_NAME="dcgm_exporter"; \
+	$(MAKE) ubuntu22.04 OUTPUT=type=docker PLATFORMS=$(PLATFORMS) && \
+	$(MKDIR) -p /tmp/$$DIST_NAME/$$COMPONENT_NAME && \
+	$(MKDIR) -p /tmp/$$DIST_NAME/$$COMPONENT_NAME/usr/bin && \
+	$(MKDIR) -p /tmp/$$DIST_NAME/$$COMPONENT_NAME/etc/dcgm-exporter && \
+	I=`docker create $(REGISTRY)/dcgm-exporter:$(FULL_VERSION)-$(IMAGE_TAG)` && \
+	docker cp $$I:/usr/bin/dcgm-exporter /tmp/$$DIST_NAME/$$COMPONENT_NAME/usr/bin/ && \
+	docker cp $$I:/etc/dcgm-exporter /tmp/$$DIST_NAME/$$COMPONENT_NAME/etc/ && \
+	cp ./LICENSE /tmp/$$DIST_NAME/$$COMPONENT_NAME && \
+	mkdir -p /tmp/$$DIST_NAME/$$COMPONENT_NAME/lib/systemd/system/ && \
+	cp ./packaging/config-files/systemd/nvidia-dcgm-exporter.service \
+		/tmp/$$DIST_NAME/$$COMPONENT_NAME/lib/systemd/system/nvidia-dcgm-exporter.service && \
+	docker rm -f $$I && \
+	$(MKDIR) -p $(CURDIR)/dist && \
+	cd "/tmp/$$DIST_NAME" && tar -czf $(CURDIR)/dist/$$DIST_NAME.tar.gz `ls -A` && \
+	rm -rf "/tmp/$$DIST_NAME";
 
 .PHONY: integration
-test-integration:
+test-integration: generate
 	go test -race -count=1 -timeout 5m -v $(TEST_ARGS) ./tests/integration/
 
 test-coverage:
@@ -83,7 +130,7 @@ test-coverage:
 
 .PHONY: lint
 lint:
-	golangci-lint run ./... --timeout $(GOLANGCILINT_TIMEOUT)  --new-from-rev=HEAD~1 --fix
+	golangci-lint run ./... --timeout $(GOLANGCILINT_TIMEOUT)  --new-from-rev=HEAD~1
 
 .PHONY: validate-modules
 validate-modules:
@@ -99,6 +146,7 @@ tools: ## Install required tools and utilities
 	go install github.com/axw/gocov/gocov@latest
 	go install golang.org/x/tools/cmd/goimports@latest
 	go install mvdan.cc/gofumpt@latest
+	go install github.com/wadey/gocovmerge@latest
 
 fmt:
 	find . -name '*.go' | xargs gofumpt -l -w
diff --git a/README.md b/README.md
index fa13ec5e..7e4a1e27 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ Official documentation for DCGM-Exporter can be found on [docs.nvidia.com](https
 To gather metrics on a GPU node, simply start the `dcgm-exporter` container:
 
 ```shell
-docker run -d --gpus all --cap-add SYS_ADMIN --rm -p 9400:9400 nvcr.io/nvidia/k8s/dcgm-exporter:3.3.9-3.6.1-ubuntu22.04
+docker run -d --gpus all --cap-add SYS_ADMIN --rm -p 9400:9400 nvcr.io/nvidia/k8s/dcgm-exporter:4.0.0-4.0.0-ubuntu22.04
 curl localhost:9400/metrics
 # HELP DCGM_FI_DEV_SM_CLOCK SM clock frequency (in MHz).
 # TYPE DCGM_FI_DEV_SM_CLOCK gauge
@@ -111,8 +111,9 @@ To enable GPU-to-job mapping on the DCGM-exporter side, users must run the DCGM-
 
 In order to build dcgm-exporter ensure you have the following:
 
-* [Golang >= 1.21 installed](https://golang.org/)
+* [Golang >= 1.22 installed](https://golang.org/)
 * [DCGM installed](https://developer.nvidia.com/dcgm)
+* Have Linux machine with GPU, compatible with DCGM.
 
 ```shell
 git clone https://github.com/NVIDIA/dcgm-exporter.git
diff --git a/cmd/dcgm-exporter/main.go b/cmd/dcgm-exporter/main.go
index 2dedaae1..94505f48 100644
--- a/cmd/dcgm-exporter/main.go
+++ b/cmd/dcgm-exporter/main.go
@@ -17,22 +17,20 @@
 package main
 
 import (
+	"log/slog"
 	"os"
 
-	"github.com/sirupsen/logrus"
-
 	_ "go.uber.org/automaxprocs"
 
 	"github.com/NVIDIA/dcgm-exporter/pkg/cmd"
 )
 
-var (
-	BuildVersion = "Filled by the build system"
-)
+var BuildVersion = "Filled by the build system"
 
 func main() {
 	app := cmd.NewApp(BuildVersion)
 	if err := app.Run(os.Args); err != nil {
-		logrus.Fatal(err)
+		slog.Error(err.Error())
+		os.Exit(1)
 	}
 }
diff --git a/dcgm-exporter.yaml b/dcgm-exporter.yaml
index d919dc83..8a6c8d6b 100644
--- a/dcgm-exporter.yaml
+++ b/dcgm-exporter.yaml
@@ -18,23 +18,23 @@ metadata:
   name: "dcgm-exporter"
   labels:
     app.kubernetes.io/name: "dcgm-exporter"
-    app.kubernetes.io/version: "3.6.1"
+    app.kubernetes.io/version: "4.0.0"
 spec:
   updateStrategy:
     type: RollingUpdate
   selector:
     matchLabels:
       app.kubernetes.io/name: "dcgm-exporter"
-      app.kubernetes.io/version: "3.6.1"
+      app.kubernetes.io/version: "4.0.0"
   template:
     metadata:
       labels:
         app.kubernetes.io/name: "dcgm-exporter"
-        app.kubernetes.io/version: "3.6.1"
+        app.kubernetes.io/version: "4.0.0"
       name: "dcgm-exporter"
     spec:
       containers:
-      - image: "nvcr.io/nvidia/k8s/dcgm-exporter:3.3.9-3.6.1-ubuntu22.04"
+      - image: "nvcr.io/nvidia/k8s/dcgm-exporter:4.0.0-4.0.0-ubuntu22.04"
         env:
         - name: "DCGM_EXPORTER_LISTEN"
           value: ":9400"
@@ -66,11 +66,11 @@ metadata:
   name: "dcgm-exporter"
   labels:
     app.kubernetes.io/name: "dcgm-exporter"
-    app.kubernetes.io/version: "3.6.1"
+    app.kubernetes.io/version: "4.0.0"
 spec:
   selector:
     app.kubernetes.io/name: "dcgm-exporter"
-    app.kubernetes.io/version: "3.6.1"
+    app.kubernetes.io/version: "4.0.0"
   ports:
   - name: "metrics"
     port: 9400
diff --git a/deployment/Chart.yaml b/deployment/Chart.yaml
index a297d602..f8face93 100644
--- a/deployment/Chart.yaml
+++ b/deployment/Chart.yaml
@@ -1,9 +1,9 @@
 apiVersion: v2
 name: dcgm-exporter
 description: A Helm chart for DCGM exporter
-version: "3.7.0"
+version: "4.0.0"
 kubeVersion: ">= 1.19.0-0"
-appVersion: "3.6.1"
+appVersion: "4.0.0"
 sources:
 - https://github.com/nvidia/dcgm-exporter
 home: https://github.com/nvidia/dcgm-exporter/
diff --git a/deployment/templates/_helpers.tpl b/deployment/templates/_helpers.tpl
index ff71dd04..c14ede3a 100644
--- a/deployment/templates/_helpers.tpl
+++ b/deployment/templates/_helpers.tpl
@@ -73,3 +73,23 @@ Create the name of the service account to use
     {{ default "default" .Values.serviceAccount.name }}
 {{- end -}}
 {{- end -}}
+
+
+{{/*
+Create the name of the tls secret to use
+*/}}
+{{- define "dcgm-exporter.tlsCertsSecretName" -}}
+{{- if .Values.tlsServerConfig.existingSecret -}}
+    {{- printf "%s" (tpl .Values.tlsServerConfig.existingSecret $) -}}
+{{- else -}}
+    {{ printf "%s-tls" (include "dcgm-exporter.fullname" .) }}
+{{- end -}}
+{{- end -}}
+
+
+{{/*
+Create the name of the web-config configmap name to use
+*/}}
+{{- define "dcgm-exporter.webConfigConfigMap" -}}
+  {{ printf "%s-web-config.yml" (include "dcgm-exporter.fullname" .) }}
+{{- end -}}
\ No newline at end of file
diff --git a/deployment/templates/daemonset.yaml b/deployment/templates/daemonset.yaml
index c662d3e3..103f09cb 100644
--- a/deployment/templates/daemonset.yaml
+++ b/deployment/templates/daemonset.yaml
@@ -78,6 +78,18 @@ spec:
       - name: "pod-gpu-resources"
         hostPath:
           path: {{ .Values.kubeletPath }}
+      {{- if and .Values.tlsServerConfig.enabled }}
+      - name: "tls"
+        secret:
+          secretName: {{ include "dcgm-exporter.tlsCertsSecretName" . }}
+          defaultMode: 0664
+      {{- end }}
+      {{- if or .Values.tlsServerConfig.enabled $.Values.basicAuth.users}}
+      - name: "web-config-yaml"
+        configMap:
+          name: {{ include "dcgm-exporter.webConfigConfigMap" . }}
+          defaultMode: 0664
+      {{- end }}
       {{- range .Values.extraHostVolumes }}
       - name: {{ .name | quote }}
         hostPath:
@@ -109,6 +121,10 @@ spec:
           valueFrom:
             fieldRef:
               fieldPath: spec.nodeName
+        {{- if or .Values.tlsServerConfig.enabled $.Values.basicAuth.users}}
+        - name: "DCGM_EXPORTER_WEB_CONFIG_FILE"
+          value: /etc/dcgm-exporter/web-config.yaml
+        {{- end }}
         {{- if .Values.extraEnv }}
         {{- toYaml .Values.extraEnv | nindent 8 }}
         {{- end }}
@@ -119,19 +135,40 @@ spec:
         - name: "pod-gpu-resources"
           readOnly: true
           mountPath: "/var/lib/kubelet/pod-resources"
+        {{- if and .Values.tlsServerConfig.enabled }}
+        - name: "tls"
+          mountPath: /etc/dcgm-exporter/tls
+        {{- end }}
+        {{- if or .Values.tlsServerConfig.enabled $.Values.basicAuth.users}}
+        - name: "web-config-yaml"
+          mountPath: /etc/dcgm-exporter/web-config.yaml
+          subPath: web-config.yaml
+        {{- end }}
         {{- if .Values.extraVolumeMounts }}
         {{- toYaml .Values.extraVolumeMounts | nindent 8 }}
         {{- end }}
         livenessProbe:
+          {{- if not $.Values.basicAuth.users }}
           httpGet:
             path: /health
             port: {{ .Values.service.port }}
+            scheme: {{ ternary "HTTPS" "HTTP" $.Values.tlsServerConfig.enabled }}
+          {{- else }}
+          tcpSocket:
+              port: {{ .Values.service.port }}
+          {{- end }}
           initialDelaySeconds: 45
           periodSeconds: 5
         readinessProbe:
+          {{- if not $.Values.basicAuth.users }}
           httpGet:
             path: /health
             port: {{ .Values.service.port }}
+            scheme: {{ ternary "HTTPS" "HTTP" $.Values.tlsServerConfig.enabled }}
+          {{- else }}
+          tcpSocket:
+              port: {{ .Values.service.port }}
+          {{- end }}
           initialDelaySeconds: 45
         {{- if .Values.resources }}
         resources:
diff --git a/deployment/templates/tls-secret.yaml b/deployment/templates/tls-secret.yaml
new file mode 100644
index 00000000..0762eb04
--- /dev/null
+++ b/deployment/templates/tls-secret.yaml
@@ -0,0 +1,43 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+{{- if and .Values.tlsServerConfig.enabled (not .Values.tlsServerConfig.existingSecret) }}
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ (include "dcgm-exporter.tlsCertsSecretName" .) }}
+  namespace: {{ include "dcgm-exporter.namespace" . }}
+  labels:
+    app.kubernetes.io/component: "dcgm-exporter"
+    {{- include "dcgm-exporter.labels" . | nindent 4 }}
+type: Opaque
+data:
+  {{- if .Values.tlsServerConfig.autoGenerated }}
+  {{- $ca := genCA "dcgm-exporter-ca" 365 }}
+  {{- $hostname := printf "%s" (include "dcgm-exporter.fullname" .) }}
+  {{- $cert := genSignedCert $hostname nil (list $hostname) 365 $ca }}
+  {{ .Values.tlsServerConfig.certFilename }}: {{ $cert.Cert | b64enc | quote }}
+  {{ .Values.tlsServerConfig.keyFilename }}: {{ $cert.Key | b64enc | quote }}
+  {{- if .Values.tlsServerConfig.clientAuthType }}
+  {{ .Values.tlsServerConfig.caFilename }}: {{ $ca.Cert | b64enc | quote }}
+  {{- end }}
+  {{- else }}
+  {{ .Values.tlsServerConfig.certFilename }}: {{ required "'tlsServerConfig.cert' is required when 'tlsServerConfig.enabled=true'" .Values.tlsServerConfig.cert | b64enc | quote }}
+  {{ .Values.tlsServerConfig.keyFilename }}: {{ required "'tlsServerConfig.key' is required when 'tlsServerConfig.enabled=true'" .Values.tlsServerConfig.key | b64enc | quote }}
+  {{- if .Values.tlsServerConfig.clientAuthType }}
+  {{ .Values.tlsServerConfig.caFilename }}: {{ required "'tlsServerConfig.ca' is required when 'tlsServerConfig.clientAuthType' is provided" .Values.tlsServerConfig.ca | b64enc | quote }}
+  {{- end }}
+  {{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/deployment/templates/web-config-configmap.yaml b/deployment/templates/web-config-configmap.yaml
new file mode 100644
index 00000000..af21dfd1
--- /dev/null
+++ b/deployment/templates/web-config-configmap.yaml
@@ -0,0 +1,40 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+{{- if or .Values.tlsServerConfig.enabled .Values.basicAuth.users }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "dcgm-exporter.webConfigConfigMap" . }}
+  namespace: {{ include "dcgm-exporter.namespace" . }}
+  labels:
+    app.kubernetes.io/component: "dcgm-exporter"
+    {{- include "dcgm-exporter.labels" . | nindent 4 }}
+data:
+  web-config.yaml: |
+{{- if .Values.tlsServerConfig.enabled }}
+    tls_server_config:
+       cert_file: {{ required "'tlsServerConfig.certFilename' is required when 'tlsServerConfig.enabled=true'" .Values.tlsServerConfig.certFilename | printf "/etc/dcgm-exporter/tls/%s" | quote }}
+       key_file: {{ required "'tlsServerConfig.keyFilename' is required when 'tlsServerConfig.enabled=true'" .Values.tlsServerConfig.keyFilename | printf "/etc/dcgm-exporter/tls/%s" | quote }}
+       {{- if .Values.tlsServerConfig.clientAuthType }}
+       client_auth_type: {{ .Values.tlsServerConfig.clientAuthType }}
+       client_ca_file: {{ required "'tlsServerConfig.caFilename' is required when 'tlsServerConfig.clientAuthType' is provided" .Values.tlsServerConfig.caFilename | printf "/etc/dcgm-exporter/tls/%s" | quote }}
+       {{- end }}
+{{- end }}
+{{- if .Values.basicAuth.users }}
+    basic_auth_users:
+      {{- range $user, $password := .Values.basicAuth.users }}
+      {{ $user }}: {{ (split ":" (htpasswd $user $password))._1 }}
+      {{- end }}
+{{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/deployment/values.yaml b/deployment/values.yaml
index bf2d5be7..ba1e1e66 100644
--- a/deployment/values.yaml
+++ b/deployment/values.yaml
@@ -17,11 +17,11 @@ image:
   pullPolicy: IfNotPresent
   # Image tag defaults to AppVersion, but you can use the tag key
   # for the image tag, e.g:
-  tag: 3.3.9-3.6.1-ubuntu22.04
+  tag: 4.0.0-4.0.0-ubuntu22.04
 
 # Change the following reference to "/etc/dcgm-exporter/default-counters.csv"
 # to stop profiling metrics from DCGM
-arguments: ["-f", "/etc/dcgm-exporter/dcp-metrics-included.csv"]
+arguments: ["-f", "/etc/dcgm-exporter/default-counters.csv"]
 # NOTE: in general, add any command line arguments to arguments above
 # and they will be passed through.
 # Use "-r", "<HOST>:<PORT>" to connect to an already running hostengine
@@ -146,12 +146,12 @@ extraConfigMapVolumes:
       name: exporter-metrics-config-map
       items:
       - key: metrics
-        path: dcp-metrics-included.csv
+        path: default-counters.csv
 
 extraVolumeMounts:
   - name: exporter-metrics-volume
-    mountPath: /etc/dcgm-exporter/dcp-metrics-included.csv
-    subPath: dcp-metrics-included.csv
+    mountPath: /etc/dcgm-exporter/default-counters.csv
+    subPath: default-counters.csv
 
 extraEnv: []
 #- name: EXTRA_VAR
@@ -160,6 +160,38 @@ extraEnv: []
 # Path to the kubelet socket for /pod-resources
 kubeletPath: "/var/lib/kubelet/pod-resources"
 
+# HTTPS configuration
+tlsServerConfig:
+  # Enable or disable HTTPS configuration
+  enabled: false
+  # Use autogenerated self-signed TLS certificates. Not recommended for production environments.
+  autoGenerated: true
+  # Existing secret containing your own server key and certificate
+  existingSecret: ""
+  # Certificate file name
+  certFilename: "tls.crt"
+  # Key file name
+  keyFilename: "tls.key"
+  # CA certificate file name
+  caFilename: "ca.crt"
+  # Server policy for client authentication. Maps to ClientAuth Policies.
+  # For more detail on clientAuth options:
+  # https://golang.org/pkg/crypto/tls/#ClientAuthType
+  #
+  # NOTE: If you want to enable client authentication, you need to use
+  # RequireAndVerifyClientCert. Other values are insecure.
+  clientAuthType: ""
+  # TLS Key for HTTPS - ignored if existingSecret is provided
+  key: ""
+  # TLS Certificate for HTTPS - ignored if existingSecret is provided
+  cert: ""
+  # CA Certificate for HTTPS - ignored if existingSecret is provided
+  ca: ""
+
+basicAuth:
+  #Object containing <user>:<passwords> key-value pairs for each user that will have access via basic authentication
+  users: {}
+
 # Customized list of metrics to emit. Expected to be in the same format (CSV) as the default list.
 # Must be the complete list and is not additive. If unset, the default list will take effect.
 # customMetrics: |
diff --git a/docker/Dockerfile.ubi9 b/docker/Dockerfile.ubi
similarity index 58%
rename from docker/Dockerfile.ubi9
rename to docker/Dockerfile.ubi
index e6154d21..a073d9fc 100644
--- a/docker/Dockerfile.ubi9
+++ b/docker/Dockerfile.ubi
@@ -1,15 +1,29 @@
-FROM nvcr.io/nvidia/cuda:12.6.2-base-ubi9 AS builder
-ARG GOLANG_VERSION=1.22.4
+ARG BASEIMAGE=nvcr.io/nvidia/cuda:12.6.3-base-ubi9
+
+FROM --platform=$BUILDPLATFORM ubuntu:18.04 AS builder
+
+ARG GOLANG_VERSION=1.22.5
+
 WORKDIR /go/src/github.com/NVIDIA/dcgm-exporter
-RUN set -eux; \
-	dnf clean expire-cache; \
-	dnf install -y go-toolset make wget
-RUN dnf clean all && rm -rf /usr/bin/go
+RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \
+	apt-get update \
+	&& apt-get install -y --no-install-recommends \
+	wget \
+	ca-certificates \
+	git \
+	build-essential \
+	gcc \
+	gcc-aarch64-linux-gnu \
+	qemu-user \
+	qemu-system-arm \
+	libc6-dev-arm64-cross \
+	&& apt-get autoremove -y \
+	&& rm -rf /var/lib/apt/lists/* \
+	&& ln -sf /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 /lib/ld-linux-aarch64.so.1
 
-# Install Go official release
 RUN set -eux; \
+	arch="$(dpkg --print-architecture)"; arch="${arch##*-}"; \
 	url=; \
-	arch=$(uname -m) && if [ "${arch}" = "x86_64" ]; then arch=amd64; fi && if [ "${arch}" = "aarch64" ]; then arch=arm64; fi; \
 	case "$arch" in \
 	'amd64') \
 	url="https://dl.google.com/go/go${GOLANG_VERSION}.linux-amd64.tar.gz"; \
@@ -19,42 +33,37 @@ RUN set -eux; \
 	;; \
 	*) echo >&2 "error: unsupported architecture '$arch' (likely packaging update needed)"; exit 1 ;; \
 	esac; \
-	build=; \
-	if [ -z "$url" ]; then \
-	# https://github.com/golang/go/issues/38536#issuecomment-616897960
-	build=1; \
-	url="https://dl.google.com/go/go${GOLANG_VERSION}.src.tar.gz"; \
-	echo >&2; \
-	echo >&2 "warning: current architecture ($arch) does not have a compatible Go binary release; will be building from source"; \
-	echo >&2; \
-	fi; \
 	wget -O go.tgz "$url" --progress=dot:giga; \
 	tar -C /usr/local -xzf go.tgz; \
 	rm go.tgz;
-ENV GOTOOLCHAIN=local
-ENV GOPATH /go
-ENV PATH $GOPATH/bin:$PATH
+ENV GOTOOLCHAIN=local GOPATH=/go
+ENV PATH=$GOPATH/bin:$PATH
 RUN mkdir -p "$GOPATH/src" "$GOPATH/bin" && chmod -R 1777 "$GOPATH"
-ENV PATH $PATH:/usr/local/go/bin   
+ENV PATH=$PATH:/usr/local/go/bin
 COPY . .
+ARG TARGETOS
+ARG TARGETARCH
+RUN --mount=type=cache,target=/root/.cache/go-build \
+	--mount=type=cache,target=/go/pkg \
+	# when building aarch64 we have to target aarch64-linux-gnu-gcc compiler
+	if [ "$TARGETARCH" = "arm64" ]; then \
+	export CC=aarch64-linux-gnu-gcc; \
+	export LD_LIBRARY_PATH=/usr/aarch64-linux-gnu/lib:$LD_LIBRARY_PATH; \
+	fi && \
+	GOOS=$TARGETOS GOARCH=$TARGETARCH CC=$CC CGO_ENABLED=1 make install
 
-RUN make binary check-format
 
-FROM nvcr.io/nvidia/cuda:12.6.2-base-ubi9
+FROM ${BASEIMAGE}
 ARG DCGM_VERSION
 ARG VERSION
 ARG DIST_DIR
 
 LABEL io.k8s.display-name="NVIDIA DCGM Exporter"
-LABEL name="NVIDIA DCGM Exporter"
-LABEL vendor="NVIDIA"
-LABEL version="${VERSION}"
-LABEL release="N/A"
-LABEL summary="Exports GPU Metrics to Prometheus"
-LABEL description="See summary"
+
+ARG DCGM_VERSION
 
 RUN dnf update --disablerepo=* --enablerepo=ubi-9-appstream-rpms --enablerepo=ubi-9-baseos-rpms -y \
-	&& dnf install --nodocs --setopt=install_weak_deps=False -y datacenter-gpu-manager-${DCGM_VERSION} libcap \
+	&& dnf install --nodocs --setopt=install_weak_deps=False -y datacenter-gpu-manager-4-core libcap \
 	&& dnf -y clean all\
 	&& rm -rf /var/cache/yum\
 	&& rm -rfd /usr/local/dcgm/bindings /usr/local/dcgm/sdk_samples /usr/share/nvidia-validation-suite \
@@ -64,11 +73,11 @@ RUN dnf update --disablerepo=* --enablerepo=ubi-9-appstream-rpms --enablerepo=ub
 	&& rm -f /usr/include/*.h /usr/bin/DcgmProfTesterKernels.ptx /usr/bin/dcgmproftester* \
 	&& rm -rf /var/lib/rpm/rpmdb.sqlite /var/cache/* /var/lib/dnf/history.* /var/log/* /tmp/* /var/tmp/* \
 	&& rm -rf /usr/share/doc && rm -rf /usr/share/man \
-	&& dnf remove openssl
+	&& ldconfig
 
 COPY ./LICENSE ./licenses/LICENSE
-COPY --from=builder /go/src/github.com/NVIDIA/dcgm-exporter/cmd/dcgm-exporter/dcgm-exporter /usr/bin/
-COPY ./etc /etc/dcgm-exporter
+COPY --from=builder /usr/bin/dcgm-exporter /usr/bin/
+COPY etc /etc/dcgm-exporter
 
 ENV NVIDIA_VISIBLE_DEVICES=all
 # disable all constraints on the configurations required by NVIDIA container toolkit
@@ -76,7 +85,7 @@ ENV NVIDIA_DISABLE_REQUIRE="true"
 # Required for DCP metrics
 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility,compat32
 
-ENV NO_SETCAP=
+ENV NO_SETCAP=""
 COPY docker/dcgm-exporter-entrypoint.sh /usr/local/dcgm/dcgm-exporter-entrypoint.sh
 RUN chmod +x /usr/local/dcgm/dcgm-exporter-entrypoint.sh
 
diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu
new file mode 100644
index 00000000..a8b9c1a7
--- /dev/null
+++ b/docker/Dockerfile.ubuntu
@@ -0,0 +1,98 @@
+ARG BASEIMAGE=nvcr.io/nvidia/cuda:12.6.3-base-ubuntu22.04
+
+FROM --platform=$BUILDPLATFORM ubuntu:18.04 AS builder
+
+ARG GOLANG_VERSION=1.22.5
+
+WORKDIR /go/src/github.com/NVIDIA/dcgm-exporter
+RUN apt-get -qq update \
+	&& apt-get -qq install -y --no-install-recommends \
+	wget \
+	ca-certificates \
+	git \
+	build-essential \
+	gcc \
+	gcc-aarch64-linux-gnu \
+	qemu-user \
+	qemu-system-arm \
+	libc6-dev-arm64-cross \
+	&& ln -sf /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 /lib/ld-linux-aarch64.so.1
+
+RUN set -eux; \
+	arch="$(dpkg --print-architecture)"; arch="${arch##*-}"; \
+	url=; \
+	case "$arch" in \
+	'amd64') \
+	url="https://dl.google.com/go/go${GOLANG_VERSION}.linux-amd64.tar.gz"; \
+	;; \
+	'arm64') \
+	url="https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz"; \
+	;; \
+	*) echo >&2 "error: unsupported architecture '$arch' (likely packaging update needed)"; exit 1 ;; \
+	esac; \
+	wget -q -O go.tgz "$url"; \
+	tar -C /usr/local -xzf go.tgz; \
+	rm go.tgz;
+ENV GOTOOLCHAIN=local GOPATH=/go
+ENV PATH=$GOPATH/bin:$PATH
+RUN mkdir -p "$GOPATH/src" "$GOPATH/bin" && chmod -R 1777 "$GOPATH"
+ENV PATH=$PATH:/usr/local/go/bin
+COPY go.mod go.sum ./
+COPY . .
+RUN go mod download
+# when building aarch64 we have to target aarch64-linux-gnu-gcc compiler
+ARG TARGETOS
+ARG TARGETARCH
+RUN if [ "$TARGETARCH" = "arm64" ]; then \
+	export CC=aarch64-linux-gnu-gcc; \
+	export LD_LIBRARY_PATH=/usr/aarch64-linux-gnu/lib:$LD_LIBRARY_PATH; \
+	fi && \
+	GOOS=$TARGETOS GOARCH=$TARGETARCH CGO_ENABLED=1 make install
+RUN apt-get update && apt-get install -y file && rm -rf /var/lib/apt/lists/*
+RUN file /usr/bin/dcgm-exporter
+
+FROM --platform=$TARGETARCH ${BASEIMAGE}
+
+ARG VERSION
+ARG DCGM_VERSION
+ARG DIST_DIR
+ARG TARGETARCH
+
+LABEL io.k8s.display-name="NVIDIA DCGM Exporter"
+LABEL name="NVIDIA DCGM Exporter"
+LABEL vendor="NVIDIA"
+LABEL version="${VERSION}"
+LABEL release="N/A"
+LABEL summary="Exports GPU Metrics to Prometheus"
+LABEL description="See summary"
+
+COPY ./LICENSE ./licenses/LICENSE
+COPY --from=builder /usr/bin/dcgm-exporter /usr/bin/
+COPY etc /etc/dcgm-exporter
+ENV DEBIAN_FRONTEND=noninteractive
+RUN echo "$TARGETARCH" && apt-get -qq update && apt-get -qq install -y --no-install-recommends \
+    datacenter-gpu-manager-4-core libcap2-bin \
+    && apt-get -qq purge --autoremove -y openssl \
+    && apt-get -qq -y clean \
+    && apt-get -qq -y autoclean \
+    && apt-get -qq autoremove -y \
+    && rm -rfd /usr/local/dcgm/bindings /usr/local/dcgm/sdk_samples /usr/share/nvidia-validation-suite \
+    # DCGM exporter doesn't use libdcgm_cublas_proxy*.so.
+    && rm -rf /usr/lib/x86_64-linux-gnu/libdcgm_cublas_proxy*.so \
+    && rm -rf /usr/local/dcgm/scripts \
+    && rm -f /usr/include/*.h /usr/bin/DcgmProfTesterKernels.ptx /usr/bin/dcgmproftester* \
+    && rm -rf /var/cache/debconf/* /var/lib/apt/lists/* /var/log/* /tmp/* /var/tmp/* \
+    && rm -rf /usr/share/doc && rm -rf /usr/share/man \
+    && ldconfig
+# Required for DCP metrics
+ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility,compat32
+# disable all constraints on the configurations required by NVIDIA container toolkit
+ENV NVIDIA_DISABLE_REQUIRE="true"
+ENV NVIDIA_VISIBLE_DEVICES=all
+
+ENV NO_SETCAP=""
+COPY docker/dcgm-exporter-entrypoint.sh /usr/local/dcgm/dcgm-exporter-entrypoint.sh
+RUN uname -a
+RUN chmod +x /usr/local/dcgm/dcgm-exporter-entrypoint.sh
+
+ENTRYPOINT ["/usr/local/dcgm/dcgm-exporter-entrypoint.sh"]
diff --git a/docker/Dockerfile.ubuntu22.04 b/docker/Dockerfile.ubuntu22.04
deleted file mode 100644
index 1cd1c3cc..00000000
--- a/docker/Dockerfile.ubuntu22.04
+++ /dev/null
@@ -1,88 +0,0 @@
-FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu22.04 AS builder
-ARG GOLANG_VERSION=1.22.4
-WORKDIR /go/src/github.com/NVIDIA/dcgm-exporter
-RUN set -eux; \
-	apt-get update; \
-	apt-get install -y --no-install-recommends \
-	g++ \
-	gcc \
-	libc6-dev \
-	make \
-	pkg-config \
-	wget \
-	; \
-	rm -rf /var/lib/apt/lists/*
-RUN set -eux; \
-	arch="$(dpkg --print-architecture)"; arch="${arch##*-}"; \
-	url=; \
-	case "$arch" in \
-	'amd64') \
-	url="https://dl.google.com/go/go${GOLANG_VERSION}.linux-amd64.tar.gz"; \
-	;; \
-	'arm64') \
-	url="https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz"; \
-	;; \
-	*) echo >&2 "error: unsupported architecture '$arch' (likely packaging update needed)"; exit 1 ;; \
-	esac; \
-	build=; \
-	if [ -z "$url" ]; then \
-	# https://github.com/golang/go/issues/38536#issuecomment-616897960
-	build=1; \
-	url="https://dl.google.com/go/go${GOLANG_VERSION}.src.tar.gz"; \
-	echo >&2; \
-	echo >&2 "warning: current architecture ($arch) does not have a compatible Go binary release; will be building from source"; \
-	echo >&2; \
-	fi; \
-	wget -O go.tgz "$url" --progress=dot:giga; \
-	tar -C /usr/local -xzf go.tgz; \
-	rm go.tgz;
-ENV GOTOOLCHAIN=local
-ENV GOPATH /go
-ENV PATH $GOPATH/bin:$PATH
-RUN mkdir -p "$GOPATH/src" "$GOPATH/bin" && chmod -R 1777 "$GOPATH"
-ENV PATH $PATH:/usr/local/go/bin
-COPY . .
-
-RUN make binary check-format
-
-FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu22.04
-
-ARG VERSION
-ARG DCGM_VERSION
-ARG DIST_DIR
-
-LABEL io.k8s.display-name="NVIDIA DCGM Exporter"
-LABEL name="NVIDIA DCGM Exporter"
-LABEL vendor="NVIDIA"
-LABEL version="${VERSION}"
-LABEL release="N/A"
-LABEL summary="Exports GPU Metrics to Prometheus"
-LABEL description="See summary"
-
-COPY ./LICENSE ./licenses/LICENSE
-COPY --from=builder /go/src/github.com/NVIDIA/dcgm-exporter/cmd/dcgm-exporter/dcgm-exporter /usr/bin/
-COPY etc /etc/dcgm-exporter
-
-RUN apt-get update && apt-get install -y --no-install-recommends \
-	datacenter-gpu-manager=1:${DCGM_VERSION} libcap2-bin && apt-get purge --autoremove -y openssl \
-	&& apt-get -y clean \
-	&& apt-get -y autoclean \
-	&& apt-get autoremove -y \
-	&& rm -rfd /usr/local/dcgm/bindings /usr/local/dcgm/sdk_samples /usr/share/nvidia-validation-suite \
-	# DCGM exporter doesn't use libdcgm_cublas_proxy*.so.
-	&& rm -rf /usr/lib/x86_64-linux-gnu/libdcgm_cublas_proxy*.so \
-	&& rm -rf /usr/local/dcgm/scripts \
-	&& rm -f /usr/include/*.h /usr/bin/DcgmProfTesterKernels.ptx /usr/bin/dcgmproftester* \
-	&& rm -rf /var/cache/debconf/* /var/lib/apt/lists/* /var/log/* /tmp/* /var/tmp/* \
-	&& rm -rf /usr/share/doc && rm -rf /usr/share/man
-# Required for DCP metrics
-ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility,compat32
-# disable all constraints on the configurations required by NVIDIA container toolkit
-ENV NVIDIA_DISABLE_REQUIRE="true"
-ENV NVIDIA_VISIBLE_DEVICES=all
-
-ENV NO_SETCAP=
-COPY docker/dcgm-exporter-entrypoint.sh /usr/local/dcgm/dcgm-exporter-entrypoint.sh
-RUN chmod +x /usr/local/dcgm/dcgm-exporter-entrypoint.sh
-
-ENTRYPOINT ["/usr/local/dcgm/dcgm-exporter-entrypoint.sh"]
diff --git a/etc/default-counters.csv b/etc/default-counters.csv
index ad949dd2..aa77a4e4 100644
--- a/etc/default-counters.csv
+++ b/etc/default-counters.csv
@@ -5,7 +5,6 @@
 # Clocks
 DCGM_FI_DEV_SM_CLOCK,  gauge, SM clock frequency (in MHz).
 DCGM_FI_DEV_MEM_CLOCK, gauge, Memory clock frequency (in MHz).
-# DCGM_EXP_CLOCK_EVENTS_COUNT, gauge, Count of clock events within the user-specified time window (see clock-events-count-window-size param).
 
 # Temperature
 DCGM_FI_DEV_MEMORY_TEMP, gauge, Memory temperature (in C).
@@ -16,8 +15,8 @@ DCGM_FI_DEV_POWER_USAGE,              gauge, Power draw (in W).
 DCGM_FI_DEV_TOTAL_ENERGY_CONSUMPTION, counter, Total energy consumption since boot (in mJ).
 
 # PCIE
-DCGM_FI_PROF_PCIE_TX_BYTES,  counter, Total number of bytes transmitted through PCIe TX via NVML.
-DCGM_FI_PROF_PCIE_RX_BYTES,  counter, Total number of bytes received through PCIe RX via NVML.
+# DCGM_FI_PROF_PCIE_TX_BYTES,  counter, Total number of bytes transmitted through PCIe TX via NVML.
+# DCGM_FI_PROF_PCIE_RX_BYTES,  counter, Total number of bytes received through PCIe RX via NVML.
 DCGM_FI_DEV_PCIE_REPLAY_COUNTER, counter, Total number of PCIe retries.
 
 # Utilization (the sample period varies depending on the product)
@@ -34,10 +33,10 @@ DCGM_FI_DEV_XID_ERRORS,              gauge,   Value of the last XID error encoun
 # DCGM_FI_DEV_BOARD_LIMIT_VIOLATION, counter, Throttling duration due to board limit constraints (in us).
 # DCGM_FI_DEV_LOW_UTIL_VIOLATION,    counter, Throttling duration due to low utilization (in us).
 # DCGM_FI_DEV_RELIABILITY_VIOLATION, counter, Throttling duration due to reliability constraints (in us).
-# DCGM_EXP_XID_ERRORS_COUNT,         gauge,   Count of XID Errors within user-specified time window (see xid-count-window-size param).
+
 # Memory usage
-DCGM_FI_DEV_FB_FREE, gauge, Frame buffer memory free (in MB).
-DCGM_FI_DEV_FB_USED, gauge, Frame buffer memory used (in MB).
+DCGM_FI_DEV_FB_FREE, gauge, Framebuffer memory free (in MiB).
+DCGM_FI_DEV_FB_USED, gauge, Framebuffer memory used (in MiB).
 
 # ECC
 # DCGM_FI_DEV_ECC_SBE_VOL_TOTAL, counter, Total number of single-bit volatile ECC errors.
@@ -55,7 +54,8 @@ DCGM_FI_DEV_FB_USED, gauge, Frame buffer memory used (in MB).
 # DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_TOTAL, counter, Total number of NVLink data CRC errors.
 # DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_TOTAL,   counter, Total number of NVLink retries.
 # DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_TOTAL, counter, Total number of NVLink recovery errors.
-DCGM_FI_DEV_NVLINK_BANDWIDTH_TOTAL,            counter, Total number of NVLink bandwidth counters for all lanes
+DCGM_FI_DEV_NVLINK_BANDWIDTH_TOTAL,            counter, Total number of NVLink bandwidth counters for all lanes.
+# DCGM_FI_DEV_NVLINK_BANDWIDTH_L0,               counter, The number of bytes of active NVLink rx or tx data including both header and payload.
 
 # VGPU License status
 DCGM_FI_DEV_VGPU_LICENSE_STATUS, gauge, vGPU License status
@@ -75,3 +75,16 @@ DCGM_FI_DRIVER_VERSION,        label, Driver Version
 # DCGM_FI_DEV_POWER_INFOROM_VER, label, Power management object inforom version
 # DCGM_FI_DEV_INFOROM_IMAGE_VER, label, Inforom image version
 # DCGM_FI_DEV_VBIOS_VERSION,     label, VBIOS version of the device
+
+# Datacenter Profiling (DCP) metrics
+# NOTE: supported on Nvidia datacenter Volta GPUs and newer
+DCGM_FI_PROF_GR_ENGINE_ACTIVE,   gauge, Ratio of time the graphics engine is active.
+# DCGM_FI_PROF_SM_ACTIVE,          gauge, The ratio of cycles an SM has at least 1 warp assigned.
+# DCGM_FI_PROF_SM_OCCUPANCY,       gauge, The ratio of number of warps resident on an SM.
+DCGM_FI_PROF_PIPE_TENSOR_ACTIVE, gauge, Ratio of cycles the tensor (HMMA) pipe is active.
+DCGM_FI_PROF_DRAM_ACTIVE,        gauge, Ratio of cycles the device memory interface is active sending or receiving data.
+# DCGM_FI_PROF_PIPE_FP64_ACTIVE,   gauge, Ratio of cycles the fp64 pipes are active.
+# DCGM_FI_PROF_PIPE_FP32_ACTIVE,   gauge, Ratio of cycles the fp32 pipes are active.
+# DCGM_FI_PROF_PIPE_FP16_ACTIVE,   gauge, Ratio of cycles the fp16 pipes are active.
+DCGM_FI_PROF_PCIE_TX_BYTES,      gauge, The rate of data transmitted over the PCIe bus - including both protocol headers and data payloads - in bytes per second.
+DCGM_FI_PROF_PCIE_RX_BYTES,      gauge, The rate of data received over the PCIe bus - including both protocol headers and data payloads - in bytes per second.
diff --git a/go.mod b/go.mod
index 9436d873..fa1efc33 100644
--- a/go.mod
+++ b/go.mod
@@ -2,57 +2,59 @@ module github.com/NVIDIA/dcgm-exporter
 
 go 1.22.0
 
-toolchain go1.22.4
+toolchain go1.22.9
 
 require (
-	github.com/NVIDIA/go-dcgm v0.0.0-20240118201113-3385e277e49f
-	github.com/NVIDIA/go-nvml v0.12.0-2
-	github.com/avast/retry-go/v4 v4.5.1
-	github.com/bits-and-blooms/bitset v1.13.0
-	github.com/go-kit/log v0.2.1
+	github.com/NVIDIA/go-dcgm v0.0.0-20250106155650-850266c9c8a5
+	github.com/NVIDIA/go-nvml v0.12.4-0
+	github.com/avast/retry-go/v4 v4.6.0
+	github.com/bits-and-blooms/bitset v1.17.0
 	github.com/google/uuid v1.6.0
 	github.com/gorilla/mux v1.8.1
-	github.com/mittwald/go-helm-client v0.12.9
-	github.com/onsi/ginkgo/v2 v2.15.0
-	github.com/onsi/gomega v1.32.0
-	github.com/prometheus/client_model v0.6.0
-	github.com/prometheus/common v0.47.0
-	github.com/prometheus/exporter-toolkit v0.11.0
+	github.com/mittwald/go-helm-client v0.12.14
+	github.com/onsi/ginkgo/v2 v2.22.0
+	github.com/onsi/gomega v1.36.0
+	github.com/pkg/errors v0.9.1
+	github.com/prometheus/client_model v0.6.1
+	github.com/prometheus/common v0.60.1
+	github.com/prometheus/exporter-toolkit v0.13.1
 	github.com/sirupsen/logrus v1.9.3
-	github.com/stretchr/testify v1.8.4
+	github.com/stretchr/testify v1.10.0
 	github.com/urfave/cli/v2 v2.27.1
 	go.uber.org/automaxprocs v1.5.3
 	go.uber.org/mock v0.4.0
-	golang.org/x/sync v0.7.0
-	google.golang.org/grpc v1.64.1
-	k8s.io/api v0.30.2
-	k8s.io/apimachinery v0.30.2
-	k8s.io/client-go v0.30.2
+	golang.org/x/sync v0.8.0
+	google.golang.org/grpc v1.65.0
+	k8s.io/api v0.31.1
+	k8s.io/apimachinery v0.31.1
+	k8s.io/client-go v0.31.1
 	k8s.io/kubelet v0.30.2
-	k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0
+	k8s.io/utils v0.0.0-20240711033017-18e509b52bc8
 )
 
 require (
+	dario.cat/mergo v1.0.1 // indirect
 	github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 // indirect
 	github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect
 	github.com/BurntSushi/toml v1.3.2 // indirect
 	github.com/MakeNowJust/heredoc v1.0.0 // indirect
 	github.com/Masterminds/goutils v1.1.1 // indirect
 	github.com/Masterminds/semver v1.5.0 // indirect
-	github.com/Masterminds/semver/v3 v3.2.1 // indirect
-	github.com/Masterminds/sprig/v3 v3.2.3 // indirect
+	github.com/Masterminds/semver/v3 v3.3.0 // indirect
+	github.com/Masterminds/sprig/v3 v3.3.0 // indirect
 	github.com/Masterminds/squirrel v1.5.4 // indirect
 	github.com/Microsoft/hcsshim v0.11.4 // indirect
 	github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
-	github.com/cespare/xxhash/v2 v2.2.0 // indirect
+	github.com/blang/semver/v4 v4.0.0 // indirect
+	github.com/cespare/xxhash/v2 v2.3.0 // indirect
 	github.com/chai2010/gettext-go v1.0.2 // indirect
 	github.com/containerd/containerd v1.7.12 // indirect
 	github.com/containerd/log v0.1.0 // indirect
 	github.com/coreos/go-systemd/v22 v22.5.0 // indirect
-	github.com/cpuguy83/go-md2man/v2 v2.0.3 // indirect
-	github.com/cyphar/filepath-securejoin v0.2.4 // indirect
-	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
+	github.com/cyphar/filepath-securejoin v0.3.1 // indirect
+	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
 	github.com/distribution/reference v0.5.0 // indirect
 	github.com/docker/cli v26.1.4+incompatible // indirect
 	github.com/docker/distribution v2.8.3+incompatible // indirect
@@ -60,20 +62,20 @@ require (
 	github.com/docker/docker-credential-helpers v0.8.0 // indirect
 	github.com/docker/go-connections v0.5.0 // indirect
 	github.com/docker/go-metrics v0.0.1 // indirect
-	github.com/emicklei/go-restful/v3 v3.11.1 // indirect
-	github.com/evanphx/json-patch v5.7.0+incompatible // indirect
+	github.com/emicklei/go-restful/v3 v3.12.1 // indirect
+	github.com/evanphx/json-patch v5.9.0+incompatible // indirect
 	github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f // indirect
 	github.com/fatih/color v1.16.0 // indirect
 	github.com/felixge/httpsnoop v1.0.4 // indirect
+	github.com/fxamacker/cbor/v2 v2.7.0 // indirect
 	github.com/go-errors/errors v1.5.1 // indirect
 	github.com/go-gorp/gorp/v3 v3.1.0 // indirect
-	github.com/go-logfmt/logfmt v0.6.0 // indirect
-	github.com/go-logr/logr v1.4.1 // indirect
+	github.com/go-logr/logr v1.4.2 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
 	github.com/go-openapi/jsonpointer v0.20.2 // indirect
 	github.com/go-openapi/jsonreference v0.20.4 // indirect
 	github.com/go-openapi/swag v0.22.7 // indirect
-	github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
+	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
 	github.com/gobwas/glob v0.2.3 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
 	github.com/golang/protobuf v1.5.4 // indirect
@@ -81,21 +83,21 @@ require (
 	github.com/google/gnostic-models v0.6.8 // indirect
 	github.com/google/go-cmp v0.6.0 // indirect
 	github.com/google/gofuzz v1.2.0 // indirect
-	github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 // indirect
+	github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db // indirect
 	github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
 	github.com/gorilla/websocket v1.5.1 // indirect
 	github.com/gosuri/uitable v0.0.4 // indirect
 	github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 // indirect
 	github.com/hashicorp/errwrap v1.1.0 // indirect
 	github.com/hashicorp/go-multierror v1.1.1 // indirect
-	github.com/huandu/xstrings v1.4.0 // indirect
+	github.com/huandu/xstrings v1.5.0 // indirect
 	github.com/imdario/mergo v0.3.16 // indirect
 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
-	github.com/jmoiron/sqlx v1.3.5 // indirect
+	github.com/jmoiron/sqlx v1.4.0 // indirect
 	github.com/josharian/intern v1.0.0 // indirect
 	github.com/jpillora/backoff v1.0.0 // indirect
 	github.com/json-iterator/go v1.1.12 // indirect
-	github.com/klauspost/compress v1.17.4 // indirect
+	github.com/klauspost/compress v1.17.9 // indirect
 	github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
 	github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
 	github.com/lib/pq v1.10.9 // indirect
@@ -104,11 +106,13 @@ require (
 	github.com/mattn/go-colorable v0.1.13 // indirect
 	github.com/mattn/go-isatty v0.0.20 // indirect
 	github.com/mattn/go-runewidth v0.0.15 // indirect
+	github.com/mdlayher/socket v0.4.1 // indirect
+	github.com/mdlayher/vsock v1.2.1 // indirect
 	github.com/mitchellh/copystructure v1.2.0 // indirect
 	github.com/mitchellh/go-wordwrap v1.0.1 // indirect
 	github.com/mitchellh/reflectwalk v1.0.2 // indirect
 	github.com/moby/locker v1.0.1 // indirect
-	github.com/moby/spdystream v0.2.0 // indirect
+	github.com/moby/spdystream v0.4.0 // indirect
 	github.com/moby/term v0.5.0 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.2 // indirect
@@ -117,58 +121,56 @@ require (
 	github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
 	github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
 	github.com/opencontainers/go-digest v1.0.0 // indirect
-	github.com/opencontainers/image-spec v1.1.0-rc6 // indirect
+	github.com/opencontainers/image-spec v1.1.0 // indirect
 	github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
-	github.com/pkg/errors v0.9.1 // indirect
-	github.com/pmezard/go-difflib v1.0.0 // indirect
-	github.com/prometheus/client_golang v1.18.0 // indirect
-	github.com/prometheus/procfs v0.12.0 // indirect
+	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
+	github.com/prometheus/client_golang v1.20.4 // indirect
+	github.com/prometheus/procfs v0.15.1 // indirect
 	github.com/rivo/uniseg v0.4.4 // indirect
-	github.com/rubenv/sql-migrate v1.6.0 // indirect
+	github.com/rubenv/sql-migrate v1.7.0 // indirect
 	github.com/russross/blackfriday/v2 v2.1.0 // indirect
-	github.com/shopspring/decimal v1.3.1 // indirect
-	github.com/spf13/cast v1.6.0 // indirect
-	github.com/spf13/cobra v1.8.0 // indirect
+	github.com/shopspring/decimal v1.4.0 // indirect
+	github.com/spf13/cast v1.7.0 // indirect
+	github.com/spf13/cobra v1.8.1 // indirect
 	github.com/spf13/pflag v1.0.5 // indirect
-	github.com/stretchr/objx v0.5.0 // indirect
+	github.com/stretchr/objx v0.5.2 // indirect
+	github.com/x448/float16 v0.8.4 // indirect
 	github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
 	github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
 	github.com/xeipuuv/gojsonschema v1.2.0 // indirect
 	github.com/xlab/treeprint v1.2.0 // indirect
 	github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
-	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.46.1 // indirect
-	go.opentelemetry.io/otel v1.21.0 // indirect
-	go.opentelemetry.io/otel/metric v1.21.0 // indirect
-	go.opentelemetry.io/otel/trace v1.21.0 // indirect
+	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 // indirect
+	go.opentelemetry.io/otel v1.28.0 // indirect
+	go.opentelemetry.io/otel/metric v1.28.0 // indirect
+	go.opentelemetry.io/otel/trace v1.28.0 // indirect
 	go.starlark.net v0.0.0-20231121155337-90ade8b19d09 // indirect
-	golang.org/x/crypto v0.24.0 // indirect
-	golang.org/x/exp v0.0.0-20240103183307-be819d1f06fc // indirect
-	golang.org/x/net v0.26.0 // indirect
-	golang.org/x/oauth2 v0.18.0 // indirect
-	golang.org/x/sys v0.21.0 // indirect
-	golang.org/x/term v0.21.0 // indirect
-	golang.org/x/text v0.16.0 // indirect
+	golang.org/x/crypto v0.28.0 // indirect
+	golang.org/x/net v0.30.0 // indirect
+	golang.org/x/oauth2 v0.23.0 // indirect
+	golang.org/x/sys v0.26.0 // indirect
+	golang.org/x/term v0.25.0 // indirect
+	golang.org/x/text v0.19.0 // indirect
 	golang.org/x/time v0.5.0 // indirect
-	golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect
-	google.golang.org/appengine v1.6.8 // indirect
-	google.golang.org/genproto/googleapis/rpc v0.0.0-20240318140521-94a12d6c2237 // indirect
-	google.golang.org/protobuf v1.33.0 // indirect
-	gopkg.in/evanphx/json-patch.v5 v5.7.0 // indirect
+	golang.org/x/tools v0.26.0 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 // indirect
+	google.golang.org/protobuf v1.35.1 // indirect
+	gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
-	helm.sh/helm/v3 v3.15.2 // indirect
-	k8s.io/apiextensions-apiserver v0.30.0 // indirect
-	k8s.io/apiserver v0.30.2 // indirect
-	k8s.io/cli-runtime v0.30.0 // indirect
-	k8s.io/component-base v0.30.2 // indirect
-	k8s.io/klog/v2 v2.120.1 // indirect
+	helm.sh/helm/v3 v3.16.1 // indirect
+	k8s.io/apiextensions-apiserver v0.31.1 // indirect
+	k8s.io/apiserver v0.31.1 // indirect
+	k8s.io/cli-runtime v0.31.1 // indirect
+	k8s.io/component-base v0.31.1 // indirect
+	k8s.io/klog/v2 v2.130.1 // indirect
 	k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect
-	k8s.io/kubectl v0.30.0 // indirect
+	k8s.io/kubectl v0.31.0 // indirect
 	oras.land/oras-go v1.2.5 // indirect
 	sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
-	sigs.k8s.io/kustomize/api v0.16.0 // indirect
-	sigs.k8s.io/kustomize/kyaml v0.16.0 // indirect
+	sigs.k8s.io/kustomize/api v0.17.2 // indirect
+	sigs.k8s.io/kustomize/kyaml v0.17.1 // indirect
 	sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
 	sigs.k8s.io/yaml v1.4.0 // indirect
 )
diff --git a/go.sum b/go.sum
index 459f9df0..0f30e6bb 100644
--- a/go.sum
+++ b/go.sum
@@ -1,3 +1,7 @@
+dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
+dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
+filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=
+filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
 github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 h1:bvDV9vkmnHYOMsOr4WLk+Vo07yKIzd94sVoIqshQ4bU=
 github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8=
 github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=
@@ -12,21 +16,20 @@ github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJ
 github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
 github.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3QEww=
 github.com/Masterminds/semver v1.5.0/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y=
-github.com/Masterminds/semver/v3 v3.2.0/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ=
-github.com/Masterminds/semver/v3 v3.2.1 h1:RN9w6+7QoMeJVGyfmbcgs28Br8cvmnucEXnY0rYXWg0=
-github.com/Masterminds/semver/v3 v3.2.1/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ=
-github.com/Masterminds/sprig/v3 v3.2.3 h1:eL2fZNezLomi0uOLqjQoN6BfsDD+fyLtgbJMAj9n6YA=
-github.com/Masterminds/sprig/v3 v3.2.3/go.mod h1:rXcFaZ2zZbLRJv/xSysmlgIM1u11eBaRMhvYXJNkGuM=
+github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0=
+github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
+github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs=
+github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0=
 github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8afzqM=
 github.com/Masterminds/squirrel v1.5.4/go.mod h1:NNaOrjSoIDfDA40n7sr2tPNZRfjzjA400rg+riTZj10=
 github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow=
 github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM=
 github.com/Microsoft/hcsshim v0.11.4 h1:68vKo2VN8DE9AdN4tnkWnmdhqdbpUFM8OF3Airm7fz8=
 github.com/Microsoft/hcsshim v0.11.4/go.mod h1:smjE4dvqPX9Zldna+t5FG3rnoHhaB7QYxPRqGcpAD9w=
-github.com/NVIDIA/go-dcgm v0.0.0-20240118201113-3385e277e49f h1:HEY1H1By8XI2P6KHA0wk+nXsBE+l/iYRCAwR6nZAoU8=
-github.com/NVIDIA/go-dcgm v0.0.0-20240118201113-3385e277e49f/go.mod h1:kaRlwPjisNMY7xH8QWJ+6q76YJ/1eu6pWV45B5Ew6C4=
-github.com/NVIDIA/go-nvml v0.12.0-2 h1:Sg239yy7jmopu/cuvYauoMj9fOpcGMngxVxxS1EBXeY=
-github.com/NVIDIA/go-nvml v0.12.0-2/go.mod h1:7ruy85eOM73muOc/I37euONSwEyFqZsv5ED9AogD4G0=
+github.com/NVIDIA/go-dcgm v0.0.0-20250106155650-850266c9c8a5 h1:+HrFl/XGrOqfX8tgvJTCHfuDzbZbpdEQmbOdcDR53Ew=
+github.com/NVIDIA/go-dcgm v0.0.0-20250106155650-850266c9c8a5/go.mod h1:kaRlwPjisNMY7xH8QWJ+6q76YJ/1eu6pWV45B5Ew6C4=
+github.com/NVIDIA/go-nvml v0.12.4-0 h1:4tkbB3pT1O77JGr0gQ6uD8FrsUPqP1A/EOEm2wI1TUg=
+github.com/NVIDIA/go-nvml v0.12.4-0/go.mod h1:8Llmj+1Rr+9VGGwZuRer5N/aCjxGuR5nPb/9ebBiIEQ=
 github.com/Shopify/logrus-bugsnag v0.0.0-20171204204709-577dee27f20d h1:UrqY+r/OJnIp5u0s1SbQ8dVfLCZJsnvazdBP5hS4iRs=
 github.com/Shopify/logrus-bugsnag v0.0.0-20171204204709-577dee27f20d/go.mod h1:HI8ITrYtUY+O+ZhtlqUnD8+KwNPOyugEhfP9fdUIaEQ=
 github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
@@ -35,14 +38,16 @@ github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPd
 github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
 github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so=
 github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw=
-github.com/avast/retry-go/v4 v4.5.1 h1:AxIx0HGi4VZ3I02jr78j5lZ3M6x1E0Ivxa6b0pUUh7o=
-github.com/avast/retry-go/v4 v4.5.1/go.mod h1:/sipNsvNB3RRuT5iNcb6h73nw3IBmXJ/H3XrCQYSOpc=
+github.com/avast/retry-go/v4 v4.6.0 h1:K9xNA+KeB8HHc2aWFuLb25Offp+0iVRXEvFx8IinRJA=
+github.com/avast/retry-go/v4 v4.6.0/go.mod h1:gvWlPhBVsvBbLkVGDg/KwvBv0bEkCOLRRSHKIr2PyOE=
 github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
 github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
 github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
 github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
-github.com/bits-and-blooms/bitset v1.13.0 h1:bAQ9OPNFYbGHV6Nez0tmNI0RiEu7/hxlYJRUA0wFAVE=
-github.com/bits-and-blooms/bitset v1.13.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
+github.com/bits-and-blooms/bitset v1.17.0 h1:1X2TS7aHz1ELcC0yU1y2stUs/0ig5oMU6STFZGrhvHI=
+github.com/bits-and-blooms/bitset v1.17.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
+github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
+github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
 github.com/bshuster-repo/logrus-logstash-hook v1.0.0 h1:e+C0SB5R1pu//O4MQ3f9cFuPGoOVeF2fE4Og9otCc70=
 github.com/bshuster-repo/logrus-logstash-hook v1.0.0/go.mod h1:zsTqEiSzDgAa/8GZR7E1qaXrhYNDKBYy5/dWPTIflbk=
 github.com/bugsnag/bugsnag-go v0.0.0-20141110184014-b1d153021fcd h1:rFt+Y/IK1aEZkEHchZRSq9OQbsSzIT/OrI8YFFmRIng=
@@ -51,13 +56,10 @@ github.com/bugsnag/osext v0.0.0-20130617224835-0dd3f918b21b h1:otBG+dV+YK+Soembj
 github.com/bugsnag/osext v0.0.0-20130617224835-0dd3f918b21b/go.mod h1:obH5gd0BsqsP2LwDJ9aOkm/6J86V6lyAXCoQWGw3K50=
 github.com/bugsnag/panicwrap v0.0.0-20151223152923-e2c28503fcd0 h1:nvj0OLI3YqYXer/kZD8Ri1aaunCxIEsOst1BVJswV0o=
 github.com/bugsnag/panicwrap v0.0.0-20151223152923-e2c28503fcd0/go.mod h1:D/8v3kj0zr8ZAKg1AQ6crr+5VwKN5eIywRkfhyM/+dE=
-github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
-github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
+github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/chai2010/gettext-go v1.0.2 h1:1Lwwip6Q2QGsAdl/ZKPCwTe9fe0CjlUbqj5bFNSjIRk=
 github.com/chai2010/gettext-go v1.0.2/go.mod h1:y+wnP2cHYaVj19NZhYKAwEMH2CI1gNHeQQ+5AjwawxA=
-github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
-github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
-github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
 github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM=
 github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw=
 github.com/containerd/containerd v1.7.12 h1:+KQsnv4VnzyxWcfO9mlxxELaoztsDEjOuCMPAuPqgU0=
@@ -68,15 +70,16 @@ github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
 github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
 github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs=
 github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
-github.com/cpuguy83/go-md2man/v2 v2.0.3 h1:qMCsGGgs+MAzDFyp9LpAe1Lqy/fY/qCovCm0qnXZOBM=
-github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4=
+github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
 github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
 github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
-github.com/cyphar/filepath-securejoin v0.2.4 h1:Ugdm7cg7i6ZK6x3xDF1oEu1nfkyfH53EtKeQYTC3kyg=
-github.com/cyphar/filepath-securejoin v0.2.4/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4=
+github.com/cyphar/filepath-securejoin v0.3.1 h1:1V7cHiaW+C+39wEfpH6XlLBQo3j/PciWFrgfCLS8XrE=
+github.com/cyphar/filepath-securejoin v0.3.1/go.mod h1:F7i41x/9cBF7lzCrVsYs9fuzwRZm4NQsGTBdpp6mETc=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
+github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/distribution/distribution/v3 v3.0.0-20221208165359-362910506bc2 h1:aBfCb7iqHmDEIp6fBvC/hQUddQfg+3qdYjwzaiP9Hnc=
 github.com/distribution/distribution/v3 v3.0.0-20221208165359-362910506bc2/go.mod h1:WHNsWjnIn2V1LYOrME7e8KxSeKunYHsxEm4am0BUtcI=
 github.com/distribution/reference v0.5.0 h1:/FUIFXtfc/x2gpa5/VGfiGLuOIdYa1t65IKK2OFGvA0=
@@ -97,34 +100,32 @@ github.com/docker/go-metrics v0.0.1 h1:AgB/0SvBxihN0X8OR4SjsblXkbMvalQ8cjmtKQ2rQ
 github.com/docker/go-metrics v0.0.1/go.mod h1:cG1hvH2utMXtqgqqYE9plW6lDxS3/5ayHzueweSI3Vw=
 github.com/docker/libtrust v0.0.0-20150114040149-fa567046d9b1 h1:ZClxb8laGDf5arXfYcAtECDFgAgHklGI8CxgjHnXKJ4=
 github.com/docker/libtrust v0.0.0-20150114040149-fa567046d9b1/go.mod h1:cyGadeNEkKy96OOhEzfZl+yxihPEzKnqJwvfuSUqbZE=
-github.com/emicklei/go-restful/v3 v3.11.1 h1:S+9bSbua1z3FgCnV0KKOSSZ3mDthb5NyEPL5gEpCvyk=
-github.com/emicklei/go-restful/v3 v3.11.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
-github.com/evanphx/json-patch v5.7.0+incompatible h1:vgGkfT/9f8zE6tvSCe74nfpAVDQ2tG6yudJd8LBksgI=
-github.com/evanphx/json-patch v5.7.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
+github.com/emicklei/go-restful/v3 v3.12.1 h1:PJMDIM/ak7btuL8Ex0iYET9hxM3CI2sjZtzpL63nKAU=
+github.com/emicklei/go-restful/v3 v3.12.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
+github.com/evanphx/json-patch v5.9.0+incompatible h1:fBXyNpNMuTTDdquAq/uisOr2lShz4oaXpDTX2bLe7ls=
+github.com/evanphx/json-patch v5.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
 github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f h1:Wl78ApPPB2Wvf/TIe2xdyJxTlb6obmF18d8QdkxNDu4=
 github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f/go.mod h1:OSYXu++VVOHnXeitef/D8n/6y4QV8uLHSFXX4NeXMGc=
 github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM=
 github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE=
 github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
 github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
-github.com/foxcpp/go-mockdns v1.0.0 h1:7jBqxd3WDWwi/6WhDvacvH1XsN3rOLXyHM1uhvIx6FI=
-github.com/foxcpp/go-mockdns v1.0.0/go.mod h1:lgRN6+KxQBawyIghpnl5CezHFGS9VLzvtVlwxvzXTQ4=
+github.com/foxcpp/go-mockdns v1.1.0 h1:jI0rD8M0wuYAxL7r/ynTrCQQq0BVqfB99Vgk7DlmewI=
+github.com/foxcpp/go-mockdns v1.1.0/go.mod h1:IhLeSFGed3mJIAXPH2aiRQB+kqz7oqu8ld2qVbOu7Wk=
 github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
 github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
+github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
+github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
 github.com/go-errors/errors v1.5.1 h1:ZwEMSLRCapFLflTpT7NKaAc7ukJ8ZPEjzlxt8rPN8bk=
 github.com/go-errors/errors v1.5.1/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og=
 github.com/go-gorp/gorp/v3 v3.1.0 h1:ItKF/Vbuj31dmV4jxA1qblpSwkl9g1typ24xoe70IGs=
 github.com/go-gorp/gorp/v3 v3.1.0/go.mod h1:dLEjIyyRNiXvNZ8PSmzpt1GsWAUK8kjVhEpjH8TixEw=
 github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
-github.com/go-kit/log v0.2.1 h1:MRVx0/zhvdseW+Gza6N9rVzU/IVzaeE1SFI4raAhmBU=
-github.com/go-kit/log v0.2.1/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBjv0=
 github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
 github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
-github.com/go-logfmt/logfmt v0.6.0 h1:wGYYu3uicYdqXVgoYbvnkrPVXkuLM1p1ifugDMEdRi4=
-github.com/go-logfmt/logfmt v0.6.0/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs=
 github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
-github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ=
-github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
+github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
 github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
 github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
 github.com/go-openapi/jsonpointer v0.20.2 h1:mQc3nmndL8ZBzStEo3JYF8wzmeWffDH4VbXz58sAx6Q=
@@ -133,11 +134,11 @@ github.com/go-openapi/jsonreference v0.20.4 h1:bKlDxQxQJgwpUSgOENiMPzCTBVuc7vTdX
 github.com/go-openapi/jsonreference v0.20.4/go.mod h1:5pZJyJP2MnYCpoeoMAql78cCHauHj0V9Lhc506VOpw4=
 github.com/go-openapi/swag v0.22.7 h1:JWrc1uc/P9cSomxfnsFSVWoE1FW6bNbrVPmpQYpCcR8=
 github.com/go-openapi/swag v0.22.7/go.mod h1:Gl91UqO+btAM0plGGxHqJcQZ1ZTy6jbmridBTsDy8A0=
-github.com/go-sql-driver/mysql v1.6.0 h1:BCTh4TKNUYmOmMUcQ3IipzF5prigylS7XXjEkfCHuOE=
-github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
+github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y=
+github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg=
 github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
-github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
-github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
+github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
+github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
 github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
 github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
 github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
@@ -149,8 +150,6 @@ github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4er
 github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
-github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
-github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
 github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
 github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
 github.com/gomodule/redigo v1.8.2 h1:H5XSIre1MB5NbPYFp+i1NBbb5qN1W8Y8YAQoAYbkm8k=
@@ -160,25 +159,22 @@ github.com/google/btree v1.1.2/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl76
 github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I=
 github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U=
 github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
-github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
 github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
 github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
-github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 h1:K6RDEckDVWvDI9JAJYCmNdQXq6neHJOYx3V6jnqNEec=
-github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
+github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo=
+github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144=
 github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4=
 github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ=
-github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/gorilla/handlers v1.5.1 h1:9lRY6j8DEeeBT10CvO9hGW0gmky0BprnvDI5vfhUHH4=
 github.com/gorilla/handlers v1.5.1/go.mod h1:t8XrUpc4KVXb7HGyJ4/cEnwQiaxrX/hz1Zv/4g96P1Q=
 github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
 github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
-github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
 github.com/gorilla/websocket v1.5.1 h1:gmztn0JnHVt9JZquRuzLw3g4wouNVzKL15iLr/zn/QY=
 github.com/gorilla/websocket v1.5.1/go.mod h1:x3kM2JMyaluk02fnUJpQuwD2dCS5NDG2ZHL0uE0tcaY=
 github.com/gosuri/uitable v0.0.4 h1:IG2xLKRvErL3uhY6e1BylFzG+aJiwQviDDTfOKeKTpY=
@@ -192,17 +188,14 @@ github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+l
 github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
 github.com/hashicorp/golang-lru v0.5.4 h1:YDjusn29QI/Das2iO9M0BHnIbxPeyuCHsjMW+lJfyTc=
 github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
-github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
-github.com/huandu/xstrings v1.4.0 h1:D17IlohoQq4UcpqD7fDk80P7l+lwAmlFaBHgOipl2FU=
-github.com/huandu/xstrings v1.4.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
-github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
-github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
+github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
+github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
 github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
 github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
 github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
-github.com/jmoiron/sqlx v1.3.5 h1:vFFPA71p1o5gAeqtEAwLU4dnX2napprKtHr7PYIcN3g=
-github.com/jmoiron/sqlx v1.3.5/go.mod h1:nRVWtLre0KfCLJvgxzCsLVMogSvQ1zNJtpYr2Ccp0mQ=
+github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o=
+github.com/jmoiron/sqlx v1.4.0/go.mod h1:ZrZ7UsYB/weZdl2Bxg6jCRO9c3YHl8r3ahlKmRT4JLY=
 github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
 github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
 github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA=
@@ -214,19 +207,20 @@ github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHm
 github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
 github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
 github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
-github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4=
-github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
+github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
+github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
 github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
 github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
 github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
 github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
+github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
 github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 h1:SOEGU9fKiNWd/HOJuq6+3iTQz8KNCLtVX6idSoTLdUw=
 github.com/lann/builder v0.0.0-20180802200727-47ae307949d0/go.mod h1:dXGbAdH5GtBTC4WfIxhKZfyBF/HBFgRZSWwZ9g/He9o=
 github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 h1:P6pPBnrTSX3DEVR4fDembhRWSsG5rVo6hYhAB/ADZrk=
 github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0/go.mod h1:vmVJ0l/dxyfGW6FmdpVm2joNMFikkuWg0EoCKLGUMNw=
-github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
 github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
 github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
 github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de h1:9TO3cAIGXtEhnIaL+V+BEER86oLrvS+kWobKpbJuye0=
@@ -240,26 +234,27 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE
 github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
 github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
-github.com/mattn/go-sqlite3 v1.14.6/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU=
-github.com/mattn/go-sqlite3 v1.14.15 h1:vfoHhTN1af61xCRSWzFIWzx2YskyMTwHLrExkBOjvxI=
-github.com/mattn/go-sqlite3 v1.14.15/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=
+github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU=
+github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
 github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
-github.com/miekg/dns v1.1.25 h1:dFwPR6SfLtrSwgDcIq2bcU/gVutB4sNApq2HBdqcakg=
-github.com/miekg/dns v1.1.25/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso=
-github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw=
+github.com/mdlayher/socket v0.4.1 h1:eM9y2/jlbs1M615oshPQOHZzj6R6wMT7bX5NPiQvn2U=
+github.com/mdlayher/socket v0.4.1/go.mod h1:cAqeGjoufqdxWkD7DkpyS+wcefOtmu5OQ8KuoJGIReA=
+github.com/mdlayher/vsock v1.2.1 h1:pC1mTJTvjo1r9n9fbm7S1j04rCgCzhCOS5DY0zqHlnQ=
+github.com/mdlayher/vsock v1.2.1/go.mod h1:NRfCibel++DgeMD8z/hP+PPTjlNJsdPOmxcnENvE+SE=
+github.com/miekg/dns v1.1.57 h1:Jzi7ApEIzwEPLHWRcafCN9LZSBbqQpxjt/wpgvg7wcM=
+github.com/miekg/dns v1.1.57/go.mod h1:uqRjCRUuEAA6qsOiJvDd+CFo/vW+y5WR6SNmHE55hZk=
 github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
 github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
 github.com/mitchellh/go-wordwrap v1.0.1 h1:TLuKupo69TCn6TQSyGxwI1EblZZEsQ0vMlAFQflz0v0=
 github.com/mitchellh/go-wordwrap v1.0.1/go.mod h1:R62XHJLzvMFRBbcrT7m7WgmE1eOyTSsCt+hzestvNj0=
-github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
 github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
 github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
-github.com/mittwald/go-helm-client v0.12.9 h1:tfI5ECgrbfAolA9TnlCeA5F2TEIvdsOxVmoSyW80lCI=
-github.com/mittwald/go-helm-client v0.12.9/go.mod h1:ukR3Et5zbfBij7bFL1ZnLvPytsbBXCrI2qQYr2yVi9I=
+github.com/mittwald/go-helm-client v0.12.14 h1:az3GJ4kRmFK609Ic3iHXveNtg92n9jWG0YpKKTIK4oo=
+github.com/mittwald/go-helm-client v0.12.14/go.mod h1:2VogAupgnV7FiuoPqtpCYKS/RrMh9fFA3/pD/OmTaLc=
 github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg=
 github.com/moby/locker v1.0.1/go.mod h1:S7SDdo5zpBK84bzzVlKr2V0hz+7x9hWbYC/kq7oQppc=
-github.com/moby/spdystream v0.2.0 h1:cjW1zVyyoiM0T7b6UoySUFqzXMoqRckQtXwGPiBhOM8=
-github.com/moby/spdystream v0.2.0/go.mod h1:f7i0iNDQJ059oMTcWxx8MA/zKFIuD/lY+0GqbN2Wy8c=
+github.com/moby/spdystream v0.4.0 h1:Vy79D6mHeJJjiPdFEL2yku1kl0chZpJfZcPpb16BRl8=
+github.com/moby/spdystream v0.4.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI=
 github.com/moby/sys/mountinfo v0.6.2 h1:BzJjoreD5BMFNmD9Rus6gdd1pLuecOFPt8wC+Vygl78=
 github.com/moby/sys/mountinfo v0.6.2/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI=
 github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
@@ -280,14 +275,14 @@ github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+
 github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
 github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus=
 github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=
-github.com/onsi/ginkgo/v2 v2.15.0 h1:79HwNRBAZHOEwrczrgSOPy+eFTTlIGELKy5as+ClttY=
-github.com/onsi/ginkgo/v2 v2.15.0/go.mod h1:HlxMHtYF57y6Dpf+mc5529KKmSq9h2FpCF+/ZkwUxKM=
-github.com/onsi/gomega v1.32.0 h1:JRYU78fJ1LPxlckP6Txi/EYqJvjtMrDC04/MM5XRHPk=
-github.com/onsi/gomega v1.32.0/go.mod h1:a4x4gW6Pz2yK1MAmvluYme5lvYTn61afQ2ETw/8n4Lg=
+github.com/onsi/ginkgo/v2 v2.22.0 h1:Yed107/8DjTr0lKCNt7Dn8yQ6ybuDRQoMGrNFKzMfHg=
+github.com/onsi/ginkgo/v2 v2.22.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo=
+github.com/onsi/gomega v1.36.0 h1:Pb12RlruUtj4XUuPUqeEWc6j5DkVVVA49Uf6YLfC95Y=
+github.com/onsi/gomega v1.36.0/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog=
 github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
-github.com/opencontainers/image-spec v1.1.0-rc6 h1:XDqvyKsJEbRtATzkgItUqBA7QHk58yxX1Ov9HERHNqU=
-github.com/opencontainers/image-spec v1.1.0-rc6/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=
+github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
+github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=
 github.com/peterbourgon/diskv v2.0.1+incompatible h1:UBdAOUP5p4RWqPBg048CAvpKN+vxiaj6gdUUzhl4XmI=
 github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU=
 github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5 h1:Ii+DKncOVM8Cu1Hc+ETb5K+23HdAMvESYE3ZJ5b5cMI=
@@ -295,8 +290,9 @@ github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5/go.mod h1:iIss55rK
 github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
-github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
+github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/poy/onpar v1.1.2 h1:QaNrNiZx0+Nar5dLgTVp5mXkyoVFIbepjyEoGSnhbAY=
 github.com/poy/onpar v1.1.2/go.mod h1:6X8FLNoxyr9kkmnlqpK6LSoiOtrO6MICtWwEuWkLjzg=
 github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g=
@@ -304,63 +300,59 @@ github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P
 github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
 github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
 github.com/prometheus/client_golang v1.1.0/go.mod h1:I1FGZT9+L76gKKOs5djB6ezCbFQP1xR9D75/vuwEF3g=
-github.com/prometheus/client_golang v1.18.0 h1:HzFfmkOzH5Q8L8G+kSJKUx5dtG87sewO+FoDDqP5Tbk=
-github.com/prometheus/client_golang v1.18.0/go.mod h1:T+GXkCk5wSJyOqMIzVgvvjFDlkOQntgjkJWKrN5txjA=
+github.com/prometheus/client_golang v1.20.4 h1:Tgh3Yr67PaOv/uTqloMsCEdeuFTatm5zIq5+qNN23vI=
+github.com/prometheus/client_golang v1.20.4/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
 github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
 github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
-github.com/prometheus/client_model v0.6.0 h1:k1v3CzpSRUTrKMppY35TLwPvxHqBu0bYgxZzqGIgaos=
-github.com/prometheus/client_model v0.6.0/go.mod h1:NTQHnmxFpouOD0DpvP4XujX3CdOAGQPoaGhyTchlyt8=
+github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
+github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
 github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
 github.com/prometheus/common v0.6.0/go.mod h1:eBmuwkDJBwy6iBfxCBob6t6dR6ENT/y+J+Zk0j9GMYc=
-github.com/prometheus/common v0.47.0 h1:p5Cz0FNHo7SnWOmWmoRozVcjEp0bIVU8cV7OShpjL1k=
-github.com/prometheus/common v0.47.0/go.mod h1:0/KsvlIEfPQCQ5I2iNSAWKPZziNCvRs5EC6ILDTlAPc=
-github.com/prometheus/exporter-toolkit v0.11.0 h1:yNTsuZ0aNCNFQ3aFTD2uhPOvr4iD7fdBvKPAEGkNf+g=
-github.com/prometheus/exporter-toolkit v0.11.0/go.mod h1:BVnENhnNecpwoTLiABx7mrPB/OLRIgN74qlQbV+FK1Q=
+github.com/prometheus/common v0.60.1 h1:FUas6GcOw66yB/73KC+BOZoFJmbo/1pojoILArPAaSc=
+github.com/prometheus/common v0.60.1/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw=
+github.com/prometheus/exporter-toolkit v0.13.1 h1:Evsh0gWQo2bdOHlnz9+0Nm7/OFfIwhE2Ws4A2jIlR04=
+github.com/prometheus/exporter-toolkit v0.13.1/go.mod h1:ujdv2YIOxtdFxxqtloLpbqmxd5J0Le6IITUvIRSWjj0=
 github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
 github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
 github.com/prometheus/procfs v0.0.3/go.mod h1:4A/X28fw3Fc593LaREMrKMqOKvUAntwMDaekg4FpcdQ=
-github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo=
-github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo=
+github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
+github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
 github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
 github.com/rivo/uniseg v0.4.4 h1:8TfxU8dW6PdqD27gjM8MVNuicgxIjxpm4K7x4jp8sis=
 github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
-github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
-github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
-github.com/rubenv/sql-migrate v1.6.0 h1:IZpcTlAx/VKXphWEpwWJ7BaMq05tYtE80zYz+8a5Il8=
-github.com/rubenv/sql-migrate v1.6.0/go.mod h1:m3ilnKP7sNb4eYkLsp6cGdPOl4OBcXM6rcbzU+Oqc5k=
+github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
+github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
+github.com/rubenv/sql-migrate v1.7.0 h1:HtQq1xyTN2ISmQDggnh0c9U3JlP8apWh8YO2jzlXpTI=
+github.com/rubenv/sql-migrate v1.7.0/go.mod h1:S4wtDEG1CKn+0ShpTtzWhFpHHI5PvCUtiGI+C+Z2THE=
 github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
-github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0=
-github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
-github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
-github.com/shopspring/decimal v1.3.1 h1:2Usl1nmF/WZucqkFZhnfFYxxxu8LG21F6nPQBE5gKV8=
-github.com/shopspring/decimal v1.3.1/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
+github.com/sergi/go-diff v1.2.0 h1:XU+rvMAioB0UC3q1MFrIQy4Vo5/4VsRDQQXHsEya6xQ=
+github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
+github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
+github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
 github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
 github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
 github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
-github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
-github.com/spf13/cast v1.6.0 h1:GEiTHELF+vaR5dhz3VqZfFSzZjYbgeKDpBxQVS4GYJ0=
-github.com/spf13/cast v1.6.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
-github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0=
-github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho=
+github.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w=
+github.com/spf13/cast v1.7.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
+github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
+github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
 github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
 github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
-github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
-github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c=
-github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
+github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
 github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
-github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
 github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
-github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
-github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
+github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
+github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 github.com/urfave/cli/v2 v2.27.1 h1:8xSQ6szndafKVRmfyeUMxkNUJQMjL1F2zmsZ+qHpfho=
 github.com/urfave/cli/v2 v2.27.1/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ=
+github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
+github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
 github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
 github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
 github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
@@ -374,7 +366,6 @@ github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRT
 github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
 github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
-github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
 github.com/yvasiyarov/go-metrics v0.0.0-20140926110328-57bccd1ccd43 h1:+lm10QQTNSBd8DVTNGHx7o/IKu9HYDvLMffDhbyLccI=
 github.com/yvasiyarov/go-metrics v0.0.0-20140926110328-57bccd1ccd43/go.mod h1:aX5oPXxHm3bOH+xeAttToC8pqch2ScQN/JoXYupl6xs=
 github.com/yvasiyarov/gorelic v0.0.0-20141212073537-a9bba5b9ab50 h1:hlE8//ciYMztlGpl/VA+Zm1AcTPHYkHJPbHqE6WJUXE=
@@ -383,14 +374,14 @@ github.com/yvasiyarov/newrelic_platform_go v0.0.0-20140908184405-b21fdbd4370f h1
 github.com/yvasiyarov/newrelic_platform_go v0.0.0-20140908184405-b21fdbd4370f/go.mod h1:GlGEuHIJweS1mbCqG+7vt2nvWLzLLnRHbXz5JKd/Qbg=
 go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
 go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
-go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.46.1 h1:aFJWCqJMNjENlcleuuOkGAPH82y0yULBScfXcIEdS24=
-go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.46.1/go.mod h1:sEGXWArGqc3tVa+ekntsN65DmVbVeW+7lTKTjZF3/Fo=
-go.opentelemetry.io/otel v1.21.0 h1:hzLeKBZEL7Okw2mGzZ0cc4k/A7Fta0uoPgaJCr8fsFc=
-go.opentelemetry.io/otel v1.21.0/go.mod h1:QZzNPQPm1zLX4gZK4cMi+71eaorMSGT3A4znnUvNNEo=
-go.opentelemetry.io/otel/metric v1.21.0 h1:tlYWfeo+Bocx5kLEloTjbcDwBuELRrIFxwdQ36PlJu4=
-go.opentelemetry.io/otel/metric v1.21.0/go.mod h1:o1p3CA8nNHW8j5yuQLdc1eeqEaPfzug24uvsyIEJRWM=
-go.opentelemetry.io/otel/trace v1.21.0 h1:WD9i5gzvoUPuXIXH24ZNBudiarZDKuekPqi/E8fpfLc=
-go.opentelemetry.io/otel/trace v1.21.0/go.mod h1:LGbsEB0f9LGjN+OZaQQ26sohbOmiMR+BaslueVtS/qQ=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 h1:4K4tsIXefpVJtvA/8srF4V4y0akAoPHkIslgAkjixJA=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0/go.mod h1:jjdQuTGVsXV4vSs+CJ2qYDeDPf9yIJV23qlIzBm73Vg=
+go.opentelemetry.io/otel v1.28.0 h1:/SqNcYk+idO0CxKEUOtKQClMK/MimZihKYMruSMViUo=
+go.opentelemetry.io/otel v1.28.0/go.mod h1:q68ijF8Fc8CnMHKyzqL6akLO46ePnjkgfIMIjUIX9z4=
+go.opentelemetry.io/otel/metric v1.28.0 h1:f0HGvSl1KRAU1DLgLGFjrwVyismPlnuU6JD6bOeuA5Q=
+go.opentelemetry.io/otel/metric v1.28.0/go.mod h1:Fb1eVBFZmLVTMb6PPohq3TO9IIhUisDsbJoL/+uQW4s=
+go.opentelemetry.io/otel/trace v1.28.0 h1:GhQ9cUuQGmNDd5BTCP2dAvv75RdMxEfTmYejp+lkx9g=
+go.opentelemetry.io/otel/trace v1.28.0/go.mod h1:jPyXzNPg6da9+38HEwElrQiHlVMTnVfM3/yv2OlIHaI=
 go.starlark.net v0.0.0-20231121155337-90ade8b19d09 h1:hzy3LFnSN8kuQK8h9tHl4ndF6UruMj47OqwqsS+/Ai4=
 go.starlark.net v0.0.0-20231121155337-90ade8b19d09/go.mod h1:LcLNIzVOMp4oV+uusnpk+VU+SzXaJakUuBjoCSWH5dM=
 go.uber.org/automaxprocs v1.5.3 h1:kWazyxZUrS3Gs4qUpbwo5kEIMGe/DAvi5Z4tl2NW4j8=
@@ -403,103 +394,75 @@ golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnf
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
-golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4=
-golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI=
-golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM=
-golang.org/x/exp v0.0.0-20240103183307-be819d1f06fc h1:ao2WRsKSzW6KuUY9IWPwWahcHCgR0s52IfwutMfEbdM=
-golang.org/x/exp v0.0.0-20240103183307-be819d1f06fc/go.mod h1:iRJReGqOEeBhDZGkGbynYwcHlctCvnjTYIamk7uXpHI=
+golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw=
+golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U=
 golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
-golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
-golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA=
-golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0=
+golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY=
 golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
-golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
-golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
-golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY=
-golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ=
-golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE=
-golang.org/x/oauth2 v0.18.0 h1:09qnuIAgzdx1XplqJvW6CQqMCtGZykZWcXzPMPUusvI=
-golang.org/x/oauth2 v0.18.0/go.mod h1:Wf7knwG0MPoWIMMBgFlEaSUDaKskp0dCfrlJRJXbBi8=
+golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4=
+golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU=
+golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs=
+golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
-golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
+golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190801041406-cbf593c0f2f3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
-golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
-golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
-golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
-golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA=
-golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0=
+golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=
+golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/term v0.25.0 h1:WtHI/ltw4NvSUig5KARz9h521QvRC8RmF/cuYqifU24=
+golang.org/x/term v0.25.0/go.mod h1:RPyXicDX+6vLxogjjRxjgD2TKtmAO6NZBsBRfrOLu7M=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
-golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
-golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
-golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
-golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
+golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM=
+golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
 golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk=
 golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
-golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
-golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg=
-golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
+golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ=
+golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM=
-google.golang.org/appengine v1.6.8/go.mod h1:1jJ3jBArFh5pcgW8gCtRJnepW8FzD1V44FJffLiz/Ds=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20240318140521-94a12d6c2237 h1:NnYq6UN9ReLM9/Y01KWNOWyI5xQ9kbIms5GGJVwS/Yc=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20240318140521-94a12d6c2237/go.mod h1:WtryC6hu0hhx87FDGxWCDptyssuo68sk10vYjF+T9fY=
-google.golang.org/grpc v1.64.1 h1:LKtvyfbX3UGVPFcGqJ9ItpVWW6oN/2XqTxfAnwRRXiA=
-google.golang.org/grpc v1.64.1/go.mod h1:hiQF4LFZelK2WKaP6W0L92zGHtiQdZxk8CrSdvyjeP0=
-google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
-google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
-google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
-google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 h1:BwIjyKYGsK9dMCBOorzRri8MQwmi7mT9rGHsCEinZkA=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094/go.mod h1:Ue6ibwXGpU+dqIcODieyLOcgj7z8+IcskoNIgZxtrFY=
+google.golang.org/grpc v1.65.0 h1:bs/cUb4lp1G5iImFFd3u5ixQzweKizoZJAwBNLR42lc=
+google.golang.org/grpc v1.65.0/go.mod h1:WgYC2ypjlB0EiQi6wdKixMqukr6lBc0Vo+oOgjrM5ZQ=
+google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA=
+google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
 gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
-gopkg.in/evanphx/json-patch.v5 v5.7.0 h1:dGKGylPlZ/jus2g1YqhhyzfH0gPy2R8/MYUpW/OslTY=
-gopkg.in/evanphx/json-patch.v5 v5.7.0/go.mod h1:/kvTRh1TVm5wuM6OkHxqXtE/1nUZZpihg29RtuIyfvk=
+gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4=
+gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
 gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
 gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
 gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
 gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
@@ -507,40 +470,40 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gotest.tools/v3 v3.4.0 h1:ZazjZUfuVeZGLAmlKKuyv3IKP5orXcwtOwDQH6YVr6o=
 gotest.tools/v3 v3.4.0/go.mod h1:CtbdzLSsqVhDgMtKsx03ird5YTGB3ar27v0u/yKBW5g=
-helm.sh/helm/v3 v3.15.2 h1:/3XINUFinJOBjQplGnjw92eLGpgXXp1L8chWPkCkDuw=
-helm.sh/helm/v3 v3.15.2/go.mod h1:FzSIP8jDQaa6WAVg9F+OkKz7J0ZmAga4MABtTbsb9WQ=
-k8s.io/api v0.30.2 h1:+ZhRj+28QT4UOH+BKznu4CBgPWgkXO7XAvMcMl0qKvI=
-k8s.io/api v0.30.2/go.mod h1:ULg5g9JvOev2dG0u2hig4Z7tQ2hHIuS+m8MNZ+X6EmI=
-k8s.io/apiextensions-apiserver v0.30.0 h1:jcZFKMqnICJfRxTgnC4E+Hpcq8UEhT8B2lhBcQ+6uAs=
-k8s.io/apiextensions-apiserver v0.30.0/go.mod h1:N9ogQFGcrbWqAY9p2mUAL5mGxsLqwgtUce127VtRX5Y=
-k8s.io/apimachinery v0.30.2 h1:fEMcnBj6qkzzPGSVsAZtQThU62SmQ4ZymlXRC5yFSCg=
-k8s.io/apimachinery v0.30.2/go.mod h1:iexa2somDaxdnj7bha06bhb43Zpa6eWH8N8dbqVjTUc=
-k8s.io/apiserver v0.30.2 h1:ACouHiYl1yFI2VFI3YGM+lvxgy6ir4yK2oLOsLI1/tw=
-k8s.io/apiserver v0.30.2/go.mod h1:BOTdFBIch9Sv0ypSEcUR6ew/NUFGocRFNl72Ra7wTm8=
-k8s.io/cli-runtime v0.30.0 h1:0vn6/XhOvn1RJ2KJOC6IRR2CGqrpT6QQF4+8pYpWQ48=
-k8s.io/cli-runtime v0.30.0/go.mod h1:vATpDMATVTMA79sZ0YUCzlMelf6rUjoBzlp+RnoM+cg=
-k8s.io/client-go v0.30.2 h1:sBIVJdojUNPDU/jObC+18tXWcTJVcwyqS9diGdWHk50=
-k8s.io/client-go v0.30.2/go.mod h1:JglKSWULm9xlJLx4KCkfLLQ7XwtlbflV6uFFSHTMgVs=
-k8s.io/component-base v0.30.2 h1:pqGBczYoW1sno8q9ObExUqrYSKhtE5rW3y6gX88GZII=
-k8s.io/component-base v0.30.2/go.mod h1:yQLkQDrkK8J6NtP+MGJOws+/PPeEXNpwFixsUI7h/OE=
-k8s.io/klog/v2 v2.120.1 h1:QXU6cPEOIslTGvZaXvFWiP9VKyeet3sawzTOvdXb4Vw=
-k8s.io/klog/v2 v2.120.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
+helm.sh/helm/v3 v3.16.1 h1:cER6tI/8PgUAsaJaQCVBUg3VI9KN4oVaZJgY60RIc0c=
+helm.sh/helm/v3 v3.16.1/go.mod h1:r+xBHHP20qJeEqtvBXMf7W35QDJnzY/eiEBzt+TfHps=
+k8s.io/api v0.31.1 h1:Xe1hX/fPW3PXYYv8BlozYqw63ytA92snr96zMW9gWTU=
+k8s.io/api v0.31.1/go.mod h1:sbN1g6eY6XVLeqNsZGLnI5FwVseTrZX7Fv3O26rhAaI=
+k8s.io/apiextensions-apiserver v0.31.1 h1:L+hwULvXx+nvTYX/MKM3kKMZyei+UiSXQWciX/N6E40=
+k8s.io/apiextensions-apiserver v0.31.1/go.mod h1:tWMPR3sgW+jsl2xm9v7lAyRF1rYEK71i9G5dRtkknoQ=
+k8s.io/apimachinery v0.31.1 h1:mhcUBbj7KUjaVhyXILglcVjuS4nYXiwC+KKFBgIVy7U=
+k8s.io/apimachinery v0.31.1/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo=
+k8s.io/apiserver v0.31.1 h1:Sars5ejQDCRBY5f7R3QFHdqN3s61nhkpaX8/k1iEw1c=
+k8s.io/apiserver v0.31.1/go.mod h1:lzDhpeToamVZJmmFlaLwdYZwd7zB+WYRYIboqA1kGxM=
+k8s.io/cli-runtime v0.31.1 h1:/ZmKhmZ6hNqDM+yf9s3Y4KEYakNXUn5sod2LWGGwCuk=
+k8s.io/cli-runtime v0.31.1/go.mod h1:pKv1cDIaq7ehWGuXQ+A//1OIF+7DI+xudXtExMCbe9U=
+k8s.io/client-go v0.31.1 h1:f0ugtWSbWpxHR7sjVpQwuvw9a3ZKLXX0u0itkFXufb0=
+k8s.io/client-go v0.31.1/go.mod h1:sKI8871MJN2OyeqRlmA4W4KM9KBdBUpDLu/43eGemCg=
+k8s.io/component-base v0.31.1 h1:UpOepcrX3rQ3ab5NB6g5iP0tvsgJWzxTyAo20sgYSy8=
+k8s.io/component-base v0.31.1/go.mod h1:WGeaw7t/kTsqpVTaCoVEtillbqAhF2/JgvO0LDOMa0w=
+k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
+k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
 k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag=
 k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98=
-k8s.io/kubectl v0.30.0 h1:xbPvzagbJ6RNYVMVuiHArC1grrV5vSmmIcSZuCdzRyk=
-k8s.io/kubectl v0.30.0/go.mod h1:zgolRw2MQXLPwmic2l/+iHs239L49fhSeICuMhQQXTI=
+k8s.io/kubectl v0.31.0 h1:kANwAAPVY02r4U4jARP/C+Q1sssCcN/1p9Nk+7BQKVg=
+k8s.io/kubectl v0.31.0/go.mod h1:pB47hhFypGsaHAPjlwrNbvhXgmuAr01ZBvAIIUaI8d4=
 k8s.io/kubelet v0.30.2 h1:Ck4E/pHndI20IzDXxS57dElhDGASPO5pzXF7BcKfmCY=
 k8s.io/kubelet v0.30.2/go.mod h1:DSwwTbLQmdNkebAU7ypIALR4P9aXZNFwgRmedojUE94=
-k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0 h1:jgGTlFYnhF1PM1Ax/lAlxUPE+KfCIXHaathvJg1C3ak=
-k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
+k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A=
+k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
 oras.land/oras-go v1.2.5 h1:XpYuAwAb0DfQsunIyMfeET92emK8km3W4yEzZvUbsTo=
 oras.land/oras-go v1.2.5/go.mod h1:PuAwRShRZCsZb7g8Ar3jKKQR/2A/qN+pkYxIOd/FAoo=
 sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo=
 sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0=
-sigs.k8s.io/kustomize/api v0.16.0 h1:/zAR4FOQDCkgSDmVzV2uiFbuy9bhu3jEzthrHCuvm1g=
-sigs.k8s.io/kustomize/api v0.16.0/go.mod h1:MnFZ7IP2YqVyVwMWoRxPtgl/5hpA+eCCrQR/866cm5c=
-sigs.k8s.io/kustomize/kyaml v0.16.0 h1:6J33uKSoATlKZH16unr2XOhDI+otoe2sR3M8PDzW3K0=
-sigs.k8s.io/kustomize/kyaml v0.16.0/go.mod h1:xOK/7i+vmE14N2FdFyugIshB8eF6ALpy7jI87Q2nRh4=
+sigs.k8s.io/kustomize/api v0.17.2 h1:E7/Fjk7V5fboiuijoZHgs4aHuexi5Y2loXlVOAVAG5g=
+sigs.k8s.io/kustomize/api v0.17.2/go.mod h1:UWTz9Ct+MvoeQsHcJ5e+vziRRkwimm3HytpZgIYqye0=
+sigs.k8s.io/kustomize/kyaml v0.17.1 h1:TnxYQxFXzbmNG6gOINgGWQt09GghzgTP6mIurOgrLCQ=
+sigs.k8s.io/kustomize/kyaml v0.17.1/go.mod h1:9V0mCjIEYjlXuCdYsSXvyoy2BTsLESH7TlGV81S282U=
 sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4=
 sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08=
 sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
diff --git a/hack/VERSION b/hack/VERSION
index 17b0cffa..4dfd2130 100644
--- a/hack/VERSION
+++ b/hack/VERSION
@@ -1,4 +1,4 @@
-OLD_DCGM_VERSION=3.3.8
-OLD_EXPORTER_VERSION=3.6.0
-NEW_DCGM_VERSION=3.3.9
-NEW_EXPORTER_VERSION=3.6.1
+OLD_DCGM_VERSION=3.3.9
+OLD_EXPORTER_VERSION=3.6.1
+NEW_DCGM_VERSION=4.0.0
+NEW_EXPORTER_VERSION=4.0.0
diff --git a/internal/mocks/pkg/collector/mock_collector.go b/internal/mocks/pkg/collector/mock_collector.go
new file mode 100644
index 00000000..395ebb8b
--- /dev/null
+++ b/internal/mocks/pkg/collector/mock_collector.go
@@ -0,0 +1,81 @@
+// Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by MockGen. DO NOT EDIT.
+// Source: github.com/NVIDIA/dcgm-exporter/internal/pkg/collector (interfaces: Collector)
+//
+// Generated by this command:
+//
+//	mockgen -destination=../../mocks/pkg/collector/mock_collector.go -package=collector -copyright_file=../../../hack/header.txt . Collector
+//
+
+// Package collector is a generated GoMock package.
+package collector
+
+import (
+	reflect "reflect"
+
+	collector "github.com/NVIDIA/dcgm-exporter/internal/pkg/collector"
+	gomock "go.uber.org/mock/gomock"
+)
+
+// MockCollector is a mock of Collector interface.
+type MockCollector struct {
+	ctrl     *gomock.Controller
+	recorder *MockCollectorMockRecorder
+}
+
+// MockCollectorMockRecorder is the mock recorder for MockCollector.
+type MockCollectorMockRecorder struct {
+	mock *MockCollector
+}
+
+// NewMockCollector creates a new mock instance.
+func NewMockCollector(ctrl *gomock.Controller) *MockCollector {
+	mock := &MockCollector{ctrl: ctrl}
+	mock.recorder = &MockCollectorMockRecorder{mock}
+	return mock
+}
+
+// EXPECT returns an object that allows the caller to indicate expected use.
+func (m *MockCollector) EXPECT() *MockCollectorMockRecorder {
+	return m.recorder
+}
+
+// Cleanup mocks base method.
+func (m *MockCollector) Cleanup() {
+	m.ctrl.T.Helper()
+	m.ctrl.Call(m, "Cleanup")
+}
+
+// Cleanup indicates an expected call of Cleanup.
+func (mr *MockCollectorMockRecorder) Cleanup() *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Cleanup", reflect.TypeOf((*MockCollector)(nil).Cleanup))
+}
+
+// GetMetrics mocks base method.
+func (m *MockCollector) GetMetrics() (collector.MetricsByCounter, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "GetMetrics")
+	ret0, _ := ret[0].(collector.MetricsByCounter)
+	ret1, _ := ret[1].(error)
+	return ret0, ret1
+}
+
+// GetMetrics indicates an expected call of GetMetrics.
+func (mr *MockCollectorMockRecorder) GetMetrics() *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetMetrics", reflect.TypeOf((*MockCollector)(nil).GetMetrics))
+}
diff --git a/internal/mocks/pkg/dcgmprovider/mock_client.go b/internal/mocks/pkg/dcgmprovider/mock_client.go
new file mode 100644
index 00000000..8229c26d
--- /dev/null
+++ b/internal/mocks/pkg/dcgmprovider/mock_client.go
@@ -0,0 +1,507 @@
+// Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by MockGen. DO NOT EDIT.
+// Source: github.com/NVIDIA/dcgm-exporter/internal/pkg/dcgmprovider (interfaces: DCGM)
+//
+// Generated by this command:
+//
+//	mockgen -destination=../../mocks/pkg/dcgmprovider/mock_client.go -package=dcgmprovider -copyright_file=../../../hack/header.txt . DCGM
+//
+
+// Package dcgmprovider is a generated GoMock package.
+package dcgmprovider
+
+import (
+	reflect "reflect"
+	time "time"
+
+	dcgm "github.com/NVIDIA/go-dcgm/pkg/dcgm"
+	gomock "go.uber.org/mock/gomock"
+)
+
+// MockDCGM is a mock of DCGM interface.
+type MockDCGM struct {
+	ctrl     *gomock.Controller
+	recorder *MockDCGMMockRecorder
+}
+
+// MockDCGMMockRecorder is the mock recorder for MockDCGM.
+type MockDCGMMockRecorder struct {
+	mock *MockDCGM
+}
+
+// NewMockDCGM creates a new mock instance.
+func NewMockDCGM(ctrl *gomock.Controller) *MockDCGM {
+	mock := &MockDCGM{ctrl: ctrl}
+	mock.recorder = &MockDCGMMockRecorder{mock}
+	return mock
+}
+
+// EXPECT returns an object that allows the caller to indicate expected use.
+func (m *MockDCGM) EXPECT() *MockDCGMMockRecorder {
+	return m.recorder
+}
+
+// AddEntityToGroup mocks base method.
+func (m *MockDCGM) AddEntityToGroup(arg0 dcgm.GroupHandle, arg1 dcgm.Field_Entity_Group, arg2 uint) error {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "AddEntityToGroup", arg0, arg1, arg2)
+	ret0, _ := ret[0].(error)
+	return ret0
+}
+
+// AddEntityToGroup indicates an expected call of AddEntityToGroup.
+func (mr *MockDCGMMockRecorder) AddEntityToGroup(arg0, arg1, arg2 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddEntityToGroup", reflect.TypeOf((*MockDCGM)(nil).AddEntityToGroup), arg0, arg1, arg2)
+}
+
+// AddLinkEntityToGroup mocks base method.
+func (m *MockDCGM) AddLinkEntityToGroup(arg0 dcgm.GroupHandle, arg1, arg2 uint) error {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "AddLinkEntityToGroup", arg0, arg1, arg2)
+	ret0, _ := ret[0].(error)
+	return ret0
+}
+
+// AddLinkEntityToGroup indicates an expected call of AddLinkEntityToGroup.
+func (mr *MockDCGMMockRecorder) AddLinkEntityToGroup(arg0, arg1, arg2 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddLinkEntityToGroup", reflect.TypeOf((*MockDCGM)(nil).AddLinkEntityToGroup), arg0, arg1, arg2)
+}
+
+// Cleanup mocks base method.
+func (m *MockDCGM) Cleanup() {
+	m.ctrl.T.Helper()
+	m.ctrl.Call(m, "Cleanup")
+}
+
+// Cleanup indicates an expected call of Cleanup.
+func (mr *MockDCGMMockRecorder) Cleanup() *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Cleanup", reflect.TypeOf((*MockDCGM)(nil).Cleanup))
+}
+
+// CreateFakeEntities mocks base method.
+func (m *MockDCGM) CreateFakeEntities(arg0 []dcgm.MigHierarchyInfo) ([]uint, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "CreateFakeEntities", arg0)
+	ret0, _ := ret[0].([]uint)
+	ret1, _ := ret[1].(error)
+	return ret0, ret1
+}
+
+// CreateFakeEntities indicates an expected call of CreateFakeEntities.
+func (mr *MockDCGMMockRecorder) CreateFakeEntities(arg0 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateFakeEntities", reflect.TypeOf((*MockDCGM)(nil).CreateFakeEntities), arg0)
+}
+
+// CreateGroup mocks base method.
+func (m *MockDCGM) CreateGroup(arg0 string) (dcgm.GroupHandle, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "CreateGroup", arg0)
+	ret0, _ := ret[0].(dcgm.GroupHandle)
+	ret1, _ := ret[1].(error)
+	return ret0, ret1
+}
+
+// CreateGroup indicates an expected call of CreateGroup.
+func (mr *MockDCGMMockRecorder) CreateGroup(arg0 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateGroup", reflect.TypeOf((*MockDCGM)(nil).CreateGroup), arg0)
+}
+
+// DestroyGroup mocks base method.
+func (m *MockDCGM) DestroyGroup(arg0 dcgm.GroupHandle) error {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "DestroyGroup", arg0)
+	ret0, _ := ret[0].(error)
+	return ret0
+}
+
+// DestroyGroup indicates an expected call of DestroyGroup.
+func (mr *MockDCGMMockRecorder) DestroyGroup(arg0 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DestroyGroup", reflect.TypeOf((*MockDCGM)(nil).DestroyGroup), arg0)
+}
+
+// EntitiesGetLatestValues mocks base method.
+func (m *MockDCGM) EntitiesGetLatestValues(arg0 []dcgm.GroupEntityPair, arg1 []dcgm.Short, arg2 uint) ([]dcgm.FieldValue_v2, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "EntitiesGetLatestValues", arg0, arg1, arg2)
+	ret0, _ := ret[0].([]dcgm.FieldValue_v2)
+	ret1, _ := ret[1].(error)
+	return ret0, ret1
+}
+
+// EntitiesGetLatestValues indicates an expected call of EntitiesGetLatestValues.
+func (mr *MockDCGMMockRecorder) EntitiesGetLatestValues(arg0, arg1, arg2 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "EntitiesGetLatestValues", reflect.TypeOf((*MockDCGM)(nil).EntitiesGetLatestValues), arg0, arg1, arg2)
+}
+
+// EntityGetLatestValues mocks base method.
+func (m *MockDCGM) EntityGetLatestValues(arg0 dcgm.Field_Entity_Group, arg1 uint, arg2 []dcgm.Short) ([]dcgm.FieldValue_v1, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "EntityGetLatestValues", arg0, arg1, arg2)
+	ret0, _ := ret[0].([]dcgm.FieldValue_v1)
+	ret1, _ := ret[1].(error)
+	return ret0, ret1
+}
+
+// EntityGetLatestValues indicates an expected call of EntityGetLatestValues.
+func (mr *MockDCGMMockRecorder) EntityGetLatestValues(arg0, arg1, arg2 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "EntityGetLatestValues", reflect.TypeOf((*MockDCGM)(nil).EntityGetLatestValues), arg0, arg1, arg2)
+}
+
+// FieldGetById mocks base method.
+func (m *MockDCGM) FieldGetById(arg0 dcgm.Short) dcgm.FieldMeta {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "FieldGetById", arg0)
+	ret0, _ := ret[0].(dcgm.FieldMeta)
+	return ret0
+}
+
+// FieldGetById indicates an expected call of FieldGetById.
+func (mr *MockDCGMMockRecorder) FieldGetById(arg0 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "FieldGetById", reflect.TypeOf((*MockDCGM)(nil).FieldGetById), arg0)
+}
+
+// FieldGroupCreate mocks base method.
+func (m *MockDCGM) FieldGroupCreate(arg0 string, arg1 []dcgm.Short) (dcgm.FieldHandle, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "FieldGroupCreate", arg0, arg1)
+	ret0, _ := ret[0].(dcgm.FieldHandle)
+	ret1, _ := ret[1].(error)
+	return ret0, ret1
+}
+
+// FieldGroupCreate indicates an expected call of FieldGroupCreate.
+func (mr *MockDCGMMockRecorder) FieldGroupCreate(arg0, arg1 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "FieldGroupCreate", reflect.TypeOf((*MockDCGM)(nil).FieldGroupCreate), arg0, arg1)
+}
+
+// FieldGroupDestroy mocks base method.
+func (m *MockDCGM) FieldGroupDestroy(arg0 dcgm.FieldHandle) error {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "FieldGroupDestroy", arg0)
+	ret0, _ := ret[0].(error)
+	return ret0
+}
+
+// FieldGroupDestroy indicates an expected call of FieldGroupDestroy.
+func (mr *MockDCGMMockRecorder) FieldGroupDestroy(arg0 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "FieldGroupDestroy", reflect.TypeOf((*MockDCGM)(nil).FieldGroupDestroy), arg0)
+}
+
+// Fv2_String mocks base method.
+func (m *MockDCGM) Fv2_String(arg0 dcgm.FieldValue_v2) string {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "Fv2_String", arg0)
+	ret0, _ := ret[0].(string)
+	return ret0
+}
+
+// Fv2_String indicates an expected call of Fv2_String.
+func (mr *MockDCGMMockRecorder) Fv2_String(arg0 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Fv2_String", reflect.TypeOf((*MockDCGM)(nil).Fv2_String), arg0)
+}
+
+// GetAllDeviceCount mocks base method.
+func (m *MockDCGM) GetAllDeviceCount() (uint, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "GetAllDeviceCount")
+	ret0, _ := ret[0].(uint)
+	ret1, _ := ret[1].(error)
+	return ret0, ret1
+}
+
+// GetAllDeviceCount indicates an expected call of GetAllDeviceCount.
+func (mr *MockDCGMMockRecorder) GetAllDeviceCount() *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetAllDeviceCount", reflect.TypeOf((*MockDCGM)(nil).GetAllDeviceCount))
+}
+
+// GetCpuHierarchy mocks base method.
+func (m *MockDCGM) GetCpuHierarchy() (dcgm.CpuHierarchy_v1, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "GetCpuHierarchy")
+	ret0, _ := ret[0].(dcgm.CpuHierarchy_v1)
+	ret1, _ := ret[1].(error)
+	return ret0, ret1
+}
+
+// GetCpuHierarchy indicates an expected call of GetCpuHierarchy.
+func (mr *MockDCGMMockRecorder) GetCpuHierarchy() *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetCpuHierarchy", reflect.TypeOf((*MockDCGM)(nil).GetCpuHierarchy))
+}
+
+// GetDeviceInfo mocks base method.
+func (m *MockDCGM) GetDeviceInfo(arg0 uint) (dcgm.Device, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "GetDeviceInfo", arg0)
+	ret0, _ := ret[0].(dcgm.Device)
+	ret1, _ := ret[1].(error)
+	return ret0, ret1
+}
+
+// GetDeviceInfo indicates an expected call of GetDeviceInfo.
+func (mr *MockDCGMMockRecorder) GetDeviceInfo(arg0 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetDeviceInfo", reflect.TypeOf((*MockDCGM)(nil).GetDeviceInfo), arg0)
+}
+
+// GetEntityGroupEntities mocks base method.
+func (m *MockDCGM) GetEntityGroupEntities(arg0 dcgm.Field_Entity_Group) ([]uint, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "GetEntityGroupEntities", arg0)
+	ret0, _ := ret[0].([]uint)
+	ret1, _ := ret[1].(error)
+	return ret0, ret1
+}
+
+// GetEntityGroupEntities indicates an expected call of GetEntityGroupEntities.
+func (mr *MockDCGMMockRecorder) GetEntityGroupEntities(arg0 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetEntityGroupEntities", reflect.TypeOf((*MockDCGM)(nil).GetEntityGroupEntities), arg0)
+}
+
+// GetGpuInstanceHierarchy mocks base method.
+func (m *MockDCGM) GetGpuInstanceHierarchy() (dcgm.MigHierarchy_v2, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "GetGpuInstanceHierarchy")
+	ret0, _ := ret[0].(dcgm.MigHierarchy_v2)
+	ret1, _ := ret[1].(error)
+	return ret0, ret1
+}
+
+// GetGpuInstanceHierarchy indicates an expected call of GetGpuInstanceHierarchy.
+func (mr *MockDCGMMockRecorder) GetGpuInstanceHierarchy() *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetGpuInstanceHierarchy", reflect.TypeOf((*MockDCGM)(nil).GetGpuInstanceHierarchy))
+}
+
+// GetGroupInfo mocks base method.
+func (m *MockDCGM) GetGroupInfo(arg0 dcgm.GroupHandle) (*dcgm.GroupInfo, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "GetGroupInfo", arg0)
+	ret0, _ := ret[0].(*dcgm.GroupInfo)
+	ret1, _ := ret[1].(error)
+	return ret0, ret1
+}
+
+// GetGroupInfo indicates an expected call of GetGroupInfo.
+func (mr *MockDCGMMockRecorder) GetGroupInfo(arg0 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetGroupInfo", reflect.TypeOf((*MockDCGM)(nil).GetGroupInfo), arg0)
+}
+
+// GetNvLinkLinkStatus mocks base method.
+func (m *MockDCGM) GetNvLinkLinkStatus() ([]dcgm.NvLinkStatus, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "GetNvLinkLinkStatus")
+	ret0, _ := ret[0].([]dcgm.NvLinkStatus)
+	ret1, _ := ret[1].(error)
+	return ret0, ret1
+}
+
+// GetNvLinkLinkStatus indicates an expected call of GetNvLinkLinkStatus.
+func (mr *MockDCGMMockRecorder) GetNvLinkLinkStatus() *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetNvLinkLinkStatus", reflect.TypeOf((*MockDCGM)(nil).GetNvLinkLinkStatus))
+}
+
+// GetSupportedDevices mocks base method.
+func (m *MockDCGM) GetSupportedDevices() ([]uint, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "GetSupportedDevices")
+	ret0, _ := ret[0].([]uint)
+	ret1, _ := ret[1].(error)
+	return ret0, ret1
+}
+
+// GetSupportedDevices indicates an expected call of GetSupportedDevices.
+func (mr *MockDCGMMockRecorder) GetSupportedDevices() *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetSupportedDevices", reflect.TypeOf((*MockDCGM)(nil).GetSupportedDevices))
+}
+
+// GetSupportedMetricGroups mocks base method.
+func (m *MockDCGM) GetSupportedMetricGroups(arg0 uint) ([]dcgm.MetricGroup, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "GetSupportedMetricGroups", arg0)
+	ret0, _ := ret[0].([]dcgm.MetricGroup)
+	ret1, _ := ret[1].(error)
+	return ret0, ret1
+}
+
+// GetSupportedMetricGroups indicates an expected call of GetSupportedMetricGroups.
+func (mr *MockDCGMMockRecorder) GetSupportedMetricGroups(arg0 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetSupportedMetricGroups", reflect.TypeOf((*MockDCGM)(nil).GetSupportedMetricGroups), arg0)
+}
+
+// GetValuesSince mocks base method.
+func (m *MockDCGM) GetValuesSince(arg0 dcgm.GroupHandle, arg1 dcgm.FieldHandle, arg2 time.Time) ([]dcgm.FieldValue_v2, time.Time, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "GetValuesSince", arg0, arg1, arg2)
+	ret0, _ := ret[0].([]dcgm.FieldValue_v2)
+	ret1, _ := ret[1].(time.Time)
+	ret2, _ := ret[2].(error)
+	return ret0, ret1, ret2
+}
+
+// GetValuesSince indicates an expected call of GetValuesSince.
+func (mr *MockDCGMMockRecorder) GetValuesSince(arg0, arg1, arg2 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetValuesSince", reflect.TypeOf((*MockDCGM)(nil).GetValuesSince), arg0, arg1, arg2)
+}
+
+// GroupAllGPUs mocks base method.
+func (m *MockDCGM) GroupAllGPUs() dcgm.GroupHandle {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "GroupAllGPUs")
+	ret0, _ := ret[0].(dcgm.GroupHandle)
+	return ret0
+}
+
+// GroupAllGPUs indicates an expected call of GroupAllGPUs.
+func (mr *MockDCGMMockRecorder) GroupAllGPUs() *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GroupAllGPUs", reflect.TypeOf((*MockDCGM)(nil).GroupAllGPUs))
+}
+
+// HealthCheck mocks base method.
+func (m *MockDCGM) HealthCheck(arg0 dcgm.GroupHandle) (dcgm.HealthResponse, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "HealthCheck", arg0)
+	ret0, _ := ret[0].(dcgm.HealthResponse)
+	ret1, _ := ret[1].(error)
+	return ret0, ret1
+}
+
+// HealthCheck indicates an expected call of HealthCheck.
+func (mr *MockDCGMMockRecorder) HealthCheck(arg0 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "HealthCheck", reflect.TypeOf((*MockDCGM)(nil).HealthCheck), arg0)
+}
+
+// HealthGet mocks base method.
+func (m *MockDCGM) HealthGet(arg0 dcgm.GroupHandle) (dcgm.HealthSystem, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "HealthGet", arg0)
+	ret0, _ := ret[0].(dcgm.HealthSystem)
+	ret1, _ := ret[1].(error)
+	return ret0, ret1
+}
+
+// HealthGet indicates an expected call of HealthGet.
+func (mr *MockDCGMMockRecorder) HealthGet(arg0 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "HealthGet", reflect.TypeOf((*MockDCGM)(nil).HealthGet), arg0)
+}
+
+// HealthSet mocks base method.
+func (m *MockDCGM) HealthSet(arg0 dcgm.GroupHandle, arg1 dcgm.HealthSystem) error {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "HealthSet", arg0, arg1)
+	ret0, _ := ret[0].(error)
+	return ret0
+}
+
+// HealthSet indicates an expected call of HealthSet.
+func (mr *MockDCGMMockRecorder) HealthSet(arg0, arg1 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "HealthSet", reflect.TypeOf((*MockDCGM)(nil).HealthSet), arg0, arg1)
+}
+
+// InjectFieldValue mocks base method.
+func (m *MockDCGM) InjectFieldValue(arg0, arg1, arg2 uint, arg3 int, arg4 int64, arg5 any) error {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "InjectFieldValue", arg0, arg1, arg2, arg3, arg4, arg5)
+	ret0, _ := ret[0].(error)
+	return ret0
+}
+
+// InjectFieldValue indicates an expected call of InjectFieldValue.
+func (mr *MockDCGMMockRecorder) InjectFieldValue(arg0, arg1, arg2, arg3, arg4, arg5 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InjectFieldValue", reflect.TypeOf((*MockDCGM)(nil).InjectFieldValue), arg0, arg1, arg2, arg3, arg4, arg5)
+}
+
+// LinkGetLatestValues mocks base method.
+func (m *MockDCGM) LinkGetLatestValues(arg0, arg1 uint, arg2 []dcgm.Short) ([]dcgm.FieldValue_v1, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "LinkGetLatestValues", arg0, arg1, arg2)
+	ret0, _ := ret[0].([]dcgm.FieldValue_v1)
+	ret1, _ := ret[1].(error)
+	return ret0, ret1
+}
+
+// LinkGetLatestValues indicates an expected call of LinkGetLatestValues.
+func (mr *MockDCGMMockRecorder) LinkGetLatestValues(arg0, arg1, arg2 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "LinkGetLatestValues", reflect.TypeOf((*MockDCGM)(nil).LinkGetLatestValues), arg0, arg1, arg2)
+}
+
+// NewDefaultGroup mocks base method.
+func (m *MockDCGM) NewDefaultGroup(arg0 string) (dcgm.GroupHandle, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "NewDefaultGroup", arg0)
+	ret0, _ := ret[0].(dcgm.GroupHandle)
+	ret1, _ := ret[1].(error)
+	return ret0, ret1
+}
+
+// NewDefaultGroup indicates an expected call of NewDefaultGroup.
+func (mr *MockDCGMMockRecorder) NewDefaultGroup(arg0 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "NewDefaultGroup", reflect.TypeOf((*MockDCGM)(nil).NewDefaultGroup), arg0)
+}
+
+// UpdateAllFields mocks base method.
+func (m *MockDCGM) UpdateAllFields() error {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "UpdateAllFields")
+	ret0, _ := ret[0].(error)
+	return ret0
+}
+
+// UpdateAllFields indicates an expected call of UpdateAllFields.
+func (mr *MockDCGMMockRecorder) UpdateAllFields() *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpdateAllFields", reflect.TypeOf((*MockDCGM)(nil).UpdateAllFields))
+}
+
+// WatchFieldsWithGroupEx mocks base method.
+func (m *MockDCGM) WatchFieldsWithGroupEx(arg0 dcgm.FieldHandle, arg1 dcgm.GroupHandle, arg2 int64, arg3 float64, arg4 int32) error {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "WatchFieldsWithGroupEx", arg0, arg1, arg2, arg3, arg4)
+	ret0, _ := ret[0].(error)
+	return ret0
+}
+
+// WatchFieldsWithGroupEx indicates an expected call of WatchFieldsWithGroupEx.
+func (mr *MockDCGMMockRecorder) WatchFieldsWithGroupEx(arg0, arg1, arg2, arg3, arg4 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "WatchFieldsWithGroupEx", reflect.TypeOf((*MockDCGM)(nil).WatchFieldsWithGroupEx), arg0, arg1, arg2, arg3, arg4)
+}
diff --git a/internal/mocks/pkg/deviceinfo/mock_device_info.go b/internal/mocks/pkg/deviceinfo/mock_device_info.go
new file mode 100644
index 00000000..0d87396b
--- /dev/null
+++ b/internal/mocks/pkg/deviceinfo/mock_device_info.go
@@ -0,0 +1,266 @@
+// Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by MockGen. DO NOT EDIT.
+// Source: github.com/NVIDIA/dcgm-exporter/internal/pkg/deviceinfo (interfaces: Provider)
+//
+// Generated by this command:
+//
+//	mockgen -destination=../../mocks/pkg/deviceinfo/mock_device_info.go -package=deviceinfo -copyright_file=../../../hack/header.txt . Provider
+//
+
+// Package deviceinfo is a generated GoMock package.
+package deviceinfo
+
+import (
+	reflect "reflect"
+
+	appconfig "github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	deviceinfo "github.com/NVIDIA/dcgm-exporter/internal/pkg/deviceinfo"
+	dcgm "github.com/NVIDIA/go-dcgm/pkg/dcgm"
+	gomock "go.uber.org/mock/gomock"
+)
+
+// MockProvider is a mock of Provider interface.
+type MockProvider struct {
+	ctrl     *gomock.Controller
+	recorder *MockProviderMockRecorder
+}
+
+// MockProviderMockRecorder is the mock recorder for MockProvider.
+type MockProviderMockRecorder struct {
+	mock *MockProvider
+}
+
+// NewMockProvider creates a new mock instance.
+func NewMockProvider(ctrl *gomock.Controller) *MockProvider {
+	mock := &MockProvider{ctrl: ctrl}
+	mock.recorder = &MockProviderMockRecorder{mock}
+	return mock
+}
+
+// EXPECT returns an object that allows the caller to indicate expected use.
+func (m *MockProvider) EXPECT() *MockProviderMockRecorder {
+	return m.recorder
+}
+
+// COpts mocks base method.
+func (m *MockProvider) COpts() appconfig.DeviceOptions {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "COpts")
+	ret0, _ := ret[0].(appconfig.DeviceOptions)
+	return ret0
+}
+
+// COpts indicates an expected call of COpts.
+func (mr *MockProviderMockRecorder) COpts() *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "COpts", reflect.TypeOf((*MockProvider)(nil).COpts))
+}
+
+// CPU mocks base method.
+func (m *MockProvider) CPU(arg0 uint) deviceinfo.CPUInfo {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "CPU", arg0)
+	ret0, _ := ret[0].(deviceinfo.CPUInfo)
+	return ret0
+}
+
+// CPU indicates an expected call of CPU.
+func (mr *MockProviderMockRecorder) CPU(arg0 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CPU", reflect.TypeOf((*MockProvider)(nil).CPU), arg0)
+}
+
+// CPUs mocks base method.
+func (m *MockProvider) CPUs() []deviceinfo.CPUInfo {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "CPUs")
+	ret0, _ := ret[0].([]deviceinfo.CPUInfo)
+	return ret0
+}
+
+// CPUs indicates an expected call of CPUs.
+func (mr *MockProviderMockRecorder) CPUs() *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CPUs", reflect.TypeOf((*MockProvider)(nil).CPUs))
+}
+
+// GOpts mocks base method.
+func (m *MockProvider) GOpts() appconfig.DeviceOptions {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "GOpts")
+	ret0, _ := ret[0].(appconfig.DeviceOptions)
+	return ret0
+}
+
+// GOpts indicates an expected call of GOpts.
+func (mr *MockProviderMockRecorder) GOpts() *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GOpts", reflect.TypeOf((*MockProvider)(nil).GOpts))
+}
+
+// GPU mocks base method.
+func (m *MockProvider) GPU(arg0 uint) deviceinfo.GPUInfo {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "GPU", arg0)
+	ret0, _ := ret[0].(deviceinfo.GPUInfo)
+	return ret0
+}
+
+// GPU indicates an expected call of GPU.
+func (mr *MockProviderMockRecorder) GPU(arg0 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GPU", reflect.TypeOf((*MockProvider)(nil).GPU), arg0)
+}
+
+// GPUCount mocks base method.
+func (m *MockProvider) GPUCount() uint {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "GPUCount")
+	ret0, _ := ret[0].(uint)
+	return ret0
+}
+
+// GPUCount indicates an expected call of GPUCount.
+func (mr *MockProviderMockRecorder) GPUCount() *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GPUCount", reflect.TypeOf((*MockProvider)(nil).GPUCount))
+}
+
+// GPUs mocks base method.
+func (m *MockProvider) GPUs() []deviceinfo.GPUInfo {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "GPUs")
+	ret0, _ := ret[0].([]deviceinfo.GPUInfo)
+	return ret0
+}
+
+// GPUs indicates an expected call of GPUs.
+func (mr *MockProviderMockRecorder) GPUs() *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GPUs", reflect.TypeOf((*MockProvider)(nil).GPUs))
+}
+
+// InfoType mocks base method.
+func (m *MockProvider) InfoType() dcgm.Field_Entity_Group {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "InfoType")
+	ret0, _ := ret[0].(dcgm.Field_Entity_Group)
+	return ret0
+}
+
+// InfoType indicates an expected call of InfoType.
+func (mr *MockProviderMockRecorder) InfoType() *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InfoType", reflect.TypeOf((*MockProvider)(nil).InfoType))
+}
+
+// IsCPUWatched mocks base method.
+func (m *MockProvider) IsCPUWatched(arg0 uint) bool {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "IsCPUWatched", arg0)
+	ret0, _ := ret[0].(bool)
+	return ret0
+}
+
+// IsCPUWatched indicates an expected call of IsCPUWatched.
+func (mr *MockProviderMockRecorder) IsCPUWatched(arg0 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "IsCPUWatched", reflect.TypeOf((*MockProvider)(nil).IsCPUWatched), arg0)
+}
+
+// IsCoreWatched mocks base method.
+func (m *MockProvider) IsCoreWatched(arg0, arg1 uint) bool {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "IsCoreWatched", arg0, arg1)
+	ret0, _ := ret[0].(bool)
+	return ret0
+}
+
+// IsCoreWatched indicates an expected call of IsCoreWatched.
+func (mr *MockProviderMockRecorder) IsCoreWatched(arg0, arg1 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "IsCoreWatched", reflect.TypeOf((*MockProvider)(nil).IsCoreWatched), arg0, arg1)
+}
+
+// IsLinkWatched mocks base method.
+func (m *MockProvider) IsLinkWatched(arg0, arg1 uint) bool {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "IsLinkWatched", arg0, arg1)
+	ret0, _ := ret[0].(bool)
+	return ret0
+}
+
+// IsLinkWatched indicates an expected call of IsLinkWatched.
+func (mr *MockProviderMockRecorder) IsLinkWatched(arg0, arg1 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "IsLinkWatched", reflect.TypeOf((*MockProvider)(nil).IsLinkWatched), arg0, arg1)
+}
+
+// IsSwitchWatched mocks base method.
+func (m *MockProvider) IsSwitchWatched(arg0 uint) bool {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "IsSwitchWatched", arg0)
+	ret0, _ := ret[0].(bool)
+	return ret0
+}
+
+// IsSwitchWatched indicates an expected call of IsSwitchWatched.
+func (mr *MockProviderMockRecorder) IsSwitchWatched(arg0 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "IsSwitchWatched", reflect.TypeOf((*MockProvider)(nil).IsSwitchWatched), arg0)
+}
+
+// SOpts mocks base method.
+func (m *MockProvider) SOpts() appconfig.DeviceOptions {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "SOpts")
+	ret0, _ := ret[0].(appconfig.DeviceOptions)
+	return ret0
+}
+
+// SOpts indicates an expected call of SOpts.
+func (mr *MockProviderMockRecorder) SOpts() *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SOpts", reflect.TypeOf((*MockProvider)(nil).SOpts))
+}
+
+// Switch mocks base method.
+func (m *MockProvider) Switch(arg0 uint) deviceinfo.SwitchInfo {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "Switch", arg0)
+	ret0, _ := ret[0].(deviceinfo.SwitchInfo)
+	return ret0
+}
+
+// Switch indicates an expected call of Switch.
+func (mr *MockProviderMockRecorder) Switch(arg0 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Switch", reflect.TypeOf((*MockProvider)(nil).Switch), arg0)
+}
+
+// Switches mocks base method.
+func (m *MockProvider) Switches() []deviceinfo.SwitchInfo {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "Switches")
+	ret0, _ := ret[0].([]deviceinfo.SwitchInfo)
+	return ret0
+}
+
+// Switches indicates an expected call of Switches.
+func (mr *MockProviderMockRecorder) Switches() *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Switches", reflect.TypeOf((*MockProvider)(nil).Switches))
+}
diff --git a/internal/mocks/pkg/devicewatcher/mock_device_watcher.go b/internal/mocks/pkg/devicewatcher/mock_device_watcher.go
new file mode 100644
index 00000000..563decf8
--- /dev/null
+++ b/internal/mocks/pkg/devicewatcher/mock_device_watcher.go
@@ -0,0 +1,87 @@
+// Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by MockGen. DO NOT EDIT.
+// Source: github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatcher (interfaces: Watcher)
+//
+// Generated by this command:
+//
+//	mockgen -destination=../../mocks/pkg/devicewatcher/mock_device_watcher.go -package=devicewatcher -copyright_file=../../../hack/header.txt . Watcher
+//
+
+// Package devicewatcher is a generated GoMock package.
+package devicewatcher
+
+import (
+	reflect "reflect"
+
+	counters "github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+	deviceinfo "github.com/NVIDIA/dcgm-exporter/internal/pkg/deviceinfo"
+	dcgm "github.com/NVIDIA/go-dcgm/pkg/dcgm"
+	gomock "go.uber.org/mock/gomock"
+)
+
+// MockWatcher is a mock of Watcher interface.
+type MockWatcher struct {
+	ctrl     *gomock.Controller
+	recorder *MockWatcherMockRecorder
+}
+
+// MockWatcherMockRecorder is the mock recorder for MockWatcher.
+type MockWatcherMockRecorder struct {
+	mock *MockWatcher
+}
+
+// NewMockWatcher creates a new mock instance.
+func NewMockWatcher(ctrl *gomock.Controller) *MockWatcher {
+	mock := &MockWatcher{ctrl: ctrl}
+	mock.recorder = &MockWatcherMockRecorder{mock}
+	return mock
+}
+
+// EXPECT returns an object that allows the caller to indicate expected use.
+func (m *MockWatcher) EXPECT() *MockWatcherMockRecorder {
+	return m.recorder
+}
+
+// GetDeviceFields mocks base method.
+func (m *MockWatcher) GetDeviceFields(arg0 []counters.Counter, arg1 dcgm.Field_Entity_Group) []dcgm.Short {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "GetDeviceFields", arg0, arg1)
+	ret0, _ := ret[0].([]dcgm.Short)
+	return ret0
+}
+
+// GetDeviceFields indicates an expected call of GetDeviceFields.
+func (mr *MockWatcherMockRecorder) GetDeviceFields(arg0, arg1 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetDeviceFields", reflect.TypeOf((*MockWatcher)(nil).GetDeviceFields), arg0, arg1)
+}
+
+// WatchDeviceFields mocks base method.
+func (m *MockWatcher) WatchDeviceFields(arg0 []dcgm.Short, arg1 deviceinfo.Provider, arg2 int64) ([]dcgm.GroupHandle, dcgm.FieldHandle, []func(), error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "WatchDeviceFields", arg0, arg1, arg2)
+	ret0, _ := ret[0].([]dcgm.GroupHandle)
+	ret1, _ := ret[1].(dcgm.FieldHandle)
+	ret2, _ := ret[2].([]func())
+	ret3, _ := ret[3].(error)
+	return ret0, ret1, ret2, ret3
+}
+
+// WatchDeviceFields indicates an expected call of WatchDeviceFields.
+func (mr *MockWatcherMockRecorder) WatchDeviceFields(arg0, arg1, arg2 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "WatchDeviceFields", reflect.TypeOf((*MockWatcher)(nil).WatchDeviceFields), arg0, arg1, arg2)
+}
diff --git a/internal/mocks/pkg/devicewatchlistmanager/mock_device_watchlist_manager.go b/internal/mocks/pkg/devicewatchlistmanager/mock_device_watchlist_manager.go
new file mode 100644
index 00000000..9d98c66e
--- /dev/null
+++ b/internal/mocks/pkg/devicewatchlistmanager/mock_device_watchlist_manager.go
@@ -0,0 +1,85 @@
+// Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by MockGen. DO NOT EDIT.
+// Source: github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatchlistmanager (interfaces: Manager)
+//
+// Generated by this command:
+//
+//	mockgen -destination=../../mocks/pkg/devicewatchlistmanager/mock_device_watchlist_manager.go -package=devicewatchlistmanager -copyright_file=../../../hack/header.txt . Manager
+//
+
+// Package devicewatchlistmanager is a generated GoMock package.
+package devicewatchlistmanager
+
+import (
+	reflect "reflect"
+
+	devicewatcher "github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatcher"
+	devicewatchlistmanager "github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatchlistmanager"
+	dcgm "github.com/NVIDIA/go-dcgm/pkg/dcgm"
+	gomock "go.uber.org/mock/gomock"
+)
+
+// MockManager is a mock of Manager interface.
+type MockManager struct {
+	ctrl     *gomock.Controller
+	recorder *MockManagerMockRecorder
+}
+
+// MockManagerMockRecorder is the mock recorder for MockManager.
+type MockManagerMockRecorder struct {
+	mock *MockManager
+}
+
+// NewMockManager creates a new mock instance.
+func NewMockManager(ctrl *gomock.Controller) *MockManager {
+	mock := &MockManager{ctrl: ctrl}
+	mock.recorder = &MockManagerMockRecorder{mock}
+	return mock
+}
+
+// EXPECT returns an object that allows the caller to indicate expected use.
+func (m *MockManager) EXPECT() *MockManagerMockRecorder {
+	return m.recorder
+}
+
+// CreateEntityWatchList mocks base method.
+func (m *MockManager) CreateEntityWatchList(arg0 dcgm.Field_Entity_Group, arg1 devicewatcher.Watcher, arg2 int64) error {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "CreateEntityWatchList", arg0, arg1, arg2)
+	ret0, _ := ret[0].(error)
+	return ret0
+}
+
+// CreateEntityWatchList indicates an expected call of CreateEntityWatchList.
+func (mr *MockManagerMockRecorder) CreateEntityWatchList(arg0, arg1, arg2 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateEntityWatchList", reflect.TypeOf((*MockManager)(nil).CreateEntityWatchList), arg0, arg1, arg2)
+}
+
+// EntityWatchList mocks base method.
+func (m *MockManager) EntityWatchList(arg0 dcgm.Field_Entity_Group) (devicewatchlistmanager.WatchList, bool) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "EntityWatchList", arg0)
+	ret0, _ := ret[0].(devicewatchlistmanager.WatchList)
+	ret1, _ := ret[1].(bool)
+	return ret0, ret1
+}
+
+// EntityWatchList indicates an expected call of EntityWatchList.
+func (mr *MockManagerMockRecorder) EntityWatchList(arg0 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "EntityWatchList", reflect.TypeOf((*MockManager)(nil).EntityWatchList), arg0)
+}
diff --git a/internal/mocks/pkg/elf/mock_elf.go b/internal/mocks/pkg/elf/mock_elf.go
new file mode 100644
index 00000000..a39eda43
--- /dev/null
+++ b/internal/mocks/pkg/elf/mock_elf.go
@@ -0,0 +1,69 @@
+// Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by MockGen. DO NOT EDIT.
+// Source: github.com/NVIDIA/dcgm-exporter/internal/pkg/elf (interfaces: ELF)
+//
+// Generated by this command:
+//
+//	mockgen -destination=../../mocks/pkg/elf/mock_elf.go -package=elf -copyright_file=../../../hack/header.txt . ELF
+//
+
+// Package elf is a generated GoMock package.
+package elf
+
+import (
+	elf "debug/elf"
+	reflect "reflect"
+
+	gomock "go.uber.org/mock/gomock"
+)
+
+// MockELF is a mock of ELF interface.
+type MockELF struct {
+	ctrl     *gomock.Controller
+	recorder *MockELFMockRecorder
+}
+
+// MockELFMockRecorder is the mock recorder for MockELF.
+type MockELFMockRecorder struct {
+	mock *MockELF
+}
+
+// NewMockELF creates a new mock instance.
+func NewMockELF(ctrl *gomock.Controller) *MockELF {
+	mock := &MockELF{ctrl: ctrl}
+	mock.recorder = &MockELFMockRecorder{mock}
+	return mock
+}
+
+// EXPECT returns an object that allows the caller to indicate expected use.
+func (m *MockELF) EXPECT() *MockELFMockRecorder {
+	return m.recorder
+}
+
+// Open mocks base method.
+func (m *MockELF) Open(arg0 string) (*elf.File, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "Open", arg0)
+	ret0, _ := ret[0].(*elf.File)
+	ret1, _ := ret[1].(error)
+	return ret0, ret1
+}
+
+// Open indicates an expected call of Open.
+func (mr *MockELFMockRecorder) Open(arg0 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Open", reflect.TypeOf((*MockELF)(nil).Open), arg0)
+}
diff --git a/internal/mocks/pkg/exec/mock_cmd.go b/internal/mocks/pkg/exec/mock_cmd.go
new file mode 100644
index 00000000..85f81647
--- /dev/null
+++ b/internal/mocks/pkg/exec/mock_cmd.go
@@ -0,0 +1,68 @@
+// Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by MockGen. DO NOT EDIT.
+// Source: github.com/NVIDIA/dcgm-exporter/internal/pkg/exec (interfaces: Cmd)
+//
+// Generated by this command:
+//
+//	mockgen -destination=../../mocks/pkg/exec/mock_cmd.go -package=exec -copyright_file=../../../hack/header.txt . Cmd
+//
+
+// Package exec is a generated GoMock package.
+package exec
+
+import (
+	reflect "reflect"
+
+	gomock "go.uber.org/mock/gomock"
+)
+
+// MockCmd is a mock of Cmd interface.
+type MockCmd struct {
+	ctrl     *gomock.Controller
+	recorder *MockCmdMockRecorder
+}
+
+// MockCmdMockRecorder is the mock recorder for MockCmd.
+type MockCmdMockRecorder struct {
+	mock *MockCmd
+}
+
+// NewMockCmd creates a new mock instance.
+func NewMockCmd(ctrl *gomock.Controller) *MockCmd {
+	mock := &MockCmd{ctrl: ctrl}
+	mock.recorder = &MockCmdMockRecorder{mock}
+	return mock
+}
+
+// EXPECT returns an object that allows the caller to indicate expected use.
+func (m *MockCmd) EXPECT() *MockCmdMockRecorder {
+	return m.recorder
+}
+
+// Output mocks base method.
+func (m *MockCmd) Output() ([]byte, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "Output")
+	ret0, _ := ret[0].([]byte)
+	ret1, _ := ret[1].(error)
+	return ret0, ret1
+}
+
+// Output indicates an expected call of Output.
+func (mr *MockCmdMockRecorder) Output() *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Output", reflect.TypeOf((*MockCmd)(nil).Output))
+}
diff --git a/internal/mocks/pkg/exec/mock_exec.go b/internal/mocks/pkg/exec/mock_exec.go
new file mode 100644
index 00000000..18423b6e
--- /dev/null
+++ b/internal/mocks/pkg/exec/mock_exec.go
@@ -0,0 +1,73 @@
+// Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by MockGen. DO NOT EDIT.
+// Source: github.com/NVIDIA/dcgm-exporter/internal/pkg/exec (interfaces: Exec)
+//
+// Generated by this command:
+//
+//	mockgen -destination=../../mocks/pkg/exec/mock_exec.go -package=exec -copyright_file=../../../hack/header.txt . Exec
+//
+
+// Package exec is a generated GoMock package.
+package exec
+
+import (
+	reflect "reflect"
+
+	exec "github.com/NVIDIA/dcgm-exporter/internal/pkg/exec"
+	gomock "go.uber.org/mock/gomock"
+)
+
+// MockExec is a mock of Exec interface.
+type MockExec struct {
+	ctrl     *gomock.Controller
+	recorder *MockExecMockRecorder
+}
+
+// MockExecMockRecorder is the mock recorder for MockExec.
+type MockExecMockRecorder struct {
+	mock *MockExec
+}
+
+// NewMockExec creates a new mock instance.
+func NewMockExec(ctrl *gomock.Controller) *MockExec {
+	mock := &MockExec{ctrl: ctrl}
+	mock.recorder = &MockExecMockRecorder{mock}
+	return mock
+}
+
+// EXPECT returns an object that allows the caller to indicate expected use.
+func (m *MockExec) EXPECT() *MockExecMockRecorder {
+	return m.recorder
+}
+
+// Command mocks base method.
+func (m *MockExec) Command(arg0 string, arg1 ...string) exec.Cmd {
+	m.ctrl.T.Helper()
+	varargs := []any{arg0}
+	for _, a := range arg1 {
+		varargs = append(varargs, a)
+	}
+	ret := m.ctrl.Call(m, "Command", varargs...)
+	ret0, _ := ret[0].(exec.Cmd)
+	return ret0
+}
+
+// Command indicates an expected call of Command.
+func (mr *MockExecMockRecorder) Command(arg0 any, arg1 ...any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	varargs := append([]any{arg0}, arg1...)
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Command", reflect.TypeOf((*MockExec)(nil).Command), varargs...)
+}
diff --git a/internal/mocks/pkg/nvmlprovider/mock_client.go b/internal/mocks/pkg/nvmlprovider/mock_client.go
new file mode 100644
index 00000000..da770340
--- /dev/null
+++ b/internal/mocks/pkg/nvmlprovider/mock_client.go
@@ -0,0 +1,81 @@
+// Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by MockGen. DO NOT EDIT.
+// Source: github.com/NVIDIA/dcgm-exporter/internal/pkg/nvmlprovider (interfaces: NVML)
+//
+// Generated by this command:
+//
+//	mockgen -destination=../../mocks/pkg/nvmlprovider/mock_client.go -package=nvmlprovider -copyright_file=../../../hack/header.txt . NVML
+//
+
+// Package nvmlprovider is a generated GoMock package.
+package nvmlprovider
+
+import (
+	reflect "reflect"
+
+	nvmlprovider "github.com/NVIDIA/dcgm-exporter/internal/pkg/nvmlprovider"
+	gomock "go.uber.org/mock/gomock"
+)
+
+// MockNVML is a mock of NVML interface.
+type MockNVML struct {
+	ctrl     *gomock.Controller
+	recorder *MockNVMLMockRecorder
+}
+
+// MockNVMLMockRecorder is the mock recorder for MockNVML.
+type MockNVMLMockRecorder struct {
+	mock *MockNVML
+}
+
+// NewMockNVML creates a new mock instance.
+func NewMockNVML(ctrl *gomock.Controller) *MockNVML {
+	mock := &MockNVML{ctrl: ctrl}
+	mock.recorder = &MockNVMLMockRecorder{mock}
+	return mock
+}
+
+// EXPECT returns an object that allows the caller to indicate expected use.
+func (m *MockNVML) EXPECT() *MockNVMLMockRecorder {
+	return m.recorder
+}
+
+// Cleanup mocks base method.
+func (m *MockNVML) Cleanup() {
+	m.ctrl.T.Helper()
+	m.ctrl.Call(m, "Cleanup")
+}
+
+// Cleanup indicates an expected call of Cleanup.
+func (mr *MockNVMLMockRecorder) Cleanup() *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Cleanup", reflect.TypeOf((*MockNVML)(nil).Cleanup))
+}
+
+// GetMIGDeviceInfoByID mocks base method.
+func (m *MockNVML) GetMIGDeviceInfoByID(arg0 string) (*nvmlprovider.MIGDeviceInfo, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "GetMIGDeviceInfoByID", arg0)
+	ret0, _ := ret[0].(*nvmlprovider.MIGDeviceInfo)
+	ret1, _ := ret[1].(error)
+	return ret0, ret1
+}
+
+// GetMIGDeviceInfoByID indicates an expected call of GetMIGDeviceInfoByID.
+func (mr *MockNVMLMockRecorder) GetMIGDeviceInfoByID(arg0 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetMIGDeviceInfoByID", reflect.TypeOf((*MockNVML)(nil).GetMIGDeviceInfoByID), arg0)
+}
diff --git a/internal/mocks/pkg/os/dir_entry.go b/internal/mocks/pkg/os/mock_dir_entry.go
similarity index 96%
rename from internal/mocks/pkg/os/dir_entry.go
rename to internal/mocks/pkg/os/mock_dir_entry.go
index d6271885..29113576 100644
--- a/internal/mocks/pkg/os/dir_entry.go
+++ b/internal/mocks/pkg/os/mock_dir_entry.go
@@ -17,7 +17,7 @@
 //
 // Generated by this command:
 //
-//	mockgen -destination=../../mocks/pkg/os/dir_entry.go -package=os -copyright_file=../../../hack/header.txt os DirEntry
+//	mockgen -destination=../../mocks/pkg/os/mock_dir_entry.go -package=os -copyright_file=../../../hack/header.txt os DirEntry
 //
 
 // Package os is a generated GoMock package.
diff --git a/internal/mocks/pkg/os/file_info.go b/internal/mocks/pkg/os/mock_file_info.go
similarity index 96%
rename from internal/mocks/pkg/os/file_info.go
rename to internal/mocks/pkg/os/mock_file_info.go
index b2ef5282..d4e35f94 100644
--- a/internal/mocks/pkg/os/file_info.go
+++ b/internal/mocks/pkg/os/mock_file_info.go
@@ -17,7 +17,7 @@
 //
 // Generated by this command:
 //
-//	mockgen -destination=../../mocks/pkg/os/file_info.go -package=os -copyright_file=../../../hack/header.txt io/fs FileInfo
+//	mockgen -destination=../../mocks/pkg/os/mock_file_info.go -package=os -copyright_file=../../../hack/header.txt io/fs FileInfo
 //
 
 // Package os is a generated GoMock package.
diff --git a/internal/mocks/pkg/os/os.go b/internal/mocks/pkg/os/mock_os.go
similarity index 93%
rename from internal/mocks/pkg/os/os.go
rename to internal/mocks/pkg/os/mock_os.go
index f3b77f42..70c9a186 100644
--- a/internal/mocks/pkg/os/os.go
+++ b/internal/mocks/pkg/os/mock_os.go
@@ -17,7 +17,7 @@
 //
 // Generated by this command:
 //
-//	mockgen -destination=../../mocks/pkg/os/os.go -package=os -copyright_file=../../../hack/header.txt . OS
+//	mockgen -destination=../../mocks/pkg/os/mock_os.go -package=os -copyright_file=../../../hack/header.txt . OS
 //
 
 // Package os is a generated GoMock package.
@@ -69,6 +69,18 @@ func (mr *MockOSMockRecorder) CreateTemp(arg0, arg1 any) *gomock.Call {
 	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateTemp", reflect.TypeOf((*MockOS)(nil).CreateTemp), arg0, arg1)
 }
 
+// Exit mocks base method.
+func (m *MockOS) Exit(arg0 int) {
+	m.ctrl.T.Helper()
+	m.ctrl.Call(m, "Exit", arg0)
+}
+
+// Exit indicates an expected call of Exit.
+func (mr *MockOSMockRecorder) Exit(arg0 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Exit", reflect.TypeOf((*MockOS)(nil).Exit), arg0)
+}
+
 // Getenv mocks base method.
 func (m *MockOS) Getenv(arg0 string) string {
 	m.ctrl.T.Helper()
diff --git a/internal/mocks/pkg/transformation/mock_transformer.go b/internal/mocks/pkg/transformation/mock_transformer.go
new file mode 100644
index 00000000..1ef2e87d
--- /dev/null
+++ b/internal/mocks/pkg/transformation/mock_transformer.go
@@ -0,0 +1,84 @@
+// Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by MockGen. DO NOT EDIT.
+// Source: github.com/NVIDIA/dcgm-exporter/internal/pkg/transformations (interfaces: Transform)
+//
+// Generated by this command:
+//
+//	mockgen -destination=../../mocks/pkg/transformations/mock_transformer.go -package=transformation -copyright_file=../../../hack/header.txt . Transform
+//
+
+// Package transformation is a generated GoMock package.
+package transformation
+
+import (
+	reflect "reflect"
+
+	gomock "go.uber.org/mock/gomock"
+
+	collector "github.com/NVIDIA/dcgm-exporter/internal/pkg/collector"
+	deviceinfo "github.com/NVIDIA/dcgm-exporter/internal/pkg/deviceinfo"
+)
+
+// MockTransform is a mock of Transform interface.
+type MockTransform struct {
+	ctrl     *gomock.Controller
+	recorder *MockTransformMockRecorder
+}
+
+// MockTransformMockRecorder is the mock recorder for MockTransform.
+type MockTransformMockRecorder struct {
+	mock *MockTransform
+}
+
+// NewMockTransform creates a new mock instance.
+func NewMockTransform(ctrl *gomock.Controller) *MockTransform {
+	mock := &MockTransform{ctrl: ctrl}
+	mock.recorder = &MockTransformMockRecorder{mock}
+	return mock
+}
+
+// EXPECT returns an object that allows the caller to indicate expected use.
+func (m *MockTransform) EXPECT() *MockTransformMockRecorder {
+	return m.recorder
+}
+
+// Name mocks base method.
+func (m *MockTransform) Name() string {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "Name")
+	ret0, _ := ret[0].(string)
+	return ret0
+}
+
+// Name indicates an expected call of Name.
+func (mr *MockTransformMockRecorder) Name() *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Name", reflect.TypeOf((*MockTransform)(nil).Name))
+}
+
+// Process mocks base method.
+func (m *MockTransform) Process(arg0 collector.MetricsByCounter, arg1 deviceinfo.Provider) error {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "Process", arg0, arg1)
+	ret0, _ := ret[0].(error)
+	return ret0
+}
+
+// Process indicates an expected call of Process.
+func (mr *MockTransformMockRecorder) Process(arg0, arg1 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Process", reflect.TypeOf((*MockTransform)(nil).Process), arg0, arg1)
+}
diff --git a/internal/mocks/pkg/transformations/mock_transformer.go b/internal/mocks/pkg/transformations/mock_transformer.go
new file mode 100644
index 00000000..bfd858f9
--- /dev/null
+++ b/internal/mocks/pkg/transformations/mock_transformer.go
@@ -0,0 +1,83 @@
+// Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by MockGen. DO NOT EDIT.
+// Source: github.com/NVIDIA/dcgm-exporter/internal/pkg/transformation (interfaces: Transform)
+//
+// Generated by this command:
+//
+//	mockgen -destination=../../mocks/pkg/transformations/mock_transformer.go -package=transformation -copyright_file=../../../hack/header.txt . Transform
+//
+
+// Package transformation is a generated GoMock package.
+package transformation
+
+import (
+	reflect "reflect"
+
+	collector "github.com/NVIDIA/dcgm-exporter/internal/pkg/collector"
+	deviceinfo "github.com/NVIDIA/dcgm-exporter/internal/pkg/deviceinfo"
+	gomock "go.uber.org/mock/gomock"
+)
+
+// MockTransform is a mock of Transform interface.
+type MockTransform struct {
+	ctrl     *gomock.Controller
+	recorder *MockTransformMockRecorder
+}
+
+// MockTransformMockRecorder is the mock recorder for MockTransform.
+type MockTransformMockRecorder struct {
+	mock *MockTransform
+}
+
+// NewMockTransform creates a new mock instance.
+func NewMockTransform(ctrl *gomock.Controller) *MockTransform {
+	mock := &MockTransform{ctrl: ctrl}
+	mock.recorder = &MockTransformMockRecorder{mock}
+	return mock
+}
+
+// EXPECT returns an object that allows the caller to indicate expected use.
+func (m *MockTransform) EXPECT() *MockTransformMockRecorder {
+	return m.recorder
+}
+
+// Name mocks base method.
+func (m *MockTransform) Name() string {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "Name")
+	ret0, _ := ret[0].(string)
+	return ret0
+}
+
+// Name indicates an expected call of Name.
+func (mr *MockTransformMockRecorder) Name() *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Name", reflect.TypeOf((*MockTransform)(nil).Name))
+}
+
+// Process mocks base method.
+func (m *MockTransform) Process(arg0 collector.MetricsByCounter, arg1 deviceinfo.Provider) error {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "Process", arg0, arg1)
+	ret0, _ := ret[0].(error)
+	return ret0
+}
+
+// Process indicates an expected call of Process.
+func (mr *MockTransformMockRecorder) Process(arg0, arg1 any) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Process", reflect.TypeOf((*MockTransform)(nil).Process), arg0, arg1)
+}
diff --git a/internal/pkg/appconfig/const.go b/internal/pkg/appconfig/const.go
new file mode 100644
index 00000000..53fa246e
--- /dev/null
+++ b/internal/pkg/appconfig/const.go
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package appconfig
+
+const (
+	GPUUID     KubernetesGPUIDType = "uid"
+	DeviceName KubernetesGPUIDType = "device-name"
+
+	NvidiaResourceName      = "nvidia.com/gpu"
+	NvidiaMigResourcePrefix = "nvidia.com/mig-"
+	MIG_UUID_PREFIX         = "MIG-"
+)
diff --git a/pkg/dcgmexporter/config.go b/internal/pkg/appconfig/types.go
similarity index 84%
rename from pkg/dcgmexporter/config.go
rename to internal/pkg/appconfig/types.go
index f13c91db..6d369845 100644
--- a/pkg/dcgmexporter/config.go
+++ b/internal/pkg/appconfig/types.go
@@ -5,7 +5,7 @@
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,17 +14,14 @@
  * limitations under the License.
  */
 
-package dcgmexporter
+package appconfig
 
-import "github.com/NVIDIA/go-dcgm/pkg/dcgm"
+import (
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+)
 
 type KubernetesGPUIDType string
 
-const (
-	GPUUID     KubernetesGPUIDType = "uid"
-	DeviceName KubernetesGPUIDType = "device-name"
-)
-
 type DeviceOptions struct {
 	Flex       bool  // If true, then monitor all GPUs if MIG mode is disabled or all GPU instances if MIG is enabled.
 	MajorRange []int // The indices of each GPU/NvSwitch to monitor, or -1 to monitor all
@@ -41,9 +38,9 @@ type Config struct {
 	UseOldNamespace            bool
 	UseRemoteHE                bool
 	RemoteHEInfo               string
-	GPUDevices                 DeviceOptions
-	SwitchDevices              DeviceOptions
-	CPUDevices                 DeviceOptions
+	GPUDeviceOptions           DeviceOptions
+	SwitchDeviceOptions        DeviceOptions
+	CPUDeviceOptions           DeviceOptions
 	NoHostname                 bool
 	UseFakeGPUs                bool
 	ConfigMapData              string
diff --git a/internal/pkg/collector/base_collector.go b/internal/pkg/collector/base_collector.go
new file mode 100644
index 00000000..e8b2c43c
--- /dev/null
+++ b/internal/pkg/collector/base_collector.go
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package collector
+
+import (
+	"fmt"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/dcgmprovider"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicemonitoring"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatchlistmanager"
+)
+
+type baseExpCollector struct {
+	deviceWatchList devicewatchlistmanager.WatchList // Device info and fields used for counters and labels
+	counter         counters.Counter                 // Counter for a specific collector type
+	labelsCounters  []counters.Counter               // Counters used for labels
+	hostname        string                           // Hostname
+	config          *appconfig.Config                // Configuration settings
+	cleanups        []func()                         // Cleanup functions
+}
+
+func (c *baseExpCollector) createMetric(
+	labels map[string]string, mi devicemonitoring.Info, uuid string, val int,
+) Metric {
+	gpuModel := getGPUModel(mi.DeviceInfo, c.config.ReplaceBlanksInModelName)
+
+	m := Metric{
+		Counter:      c.counter,
+		Value:        fmt.Sprint(val),
+		UUID:         uuid,
+		GPU:          fmt.Sprintf("%d", mi.DeviceInfo.GPU),
+		GPUUUID:      mi.DeviceInfo.UUID,
+		GPUDevice:    fmt.Sprintf("nvidia%d", mi.DeviceInfo.GPU),
+		GPUModelName: gpuModel,
+		GPUPCIBusID:  mi.DeviceInfo.PCI.BusID,
+		Hostname:     c.hostname,
+
+		Labels:     labels,
+		Attributes: map[string]string{},
+	}
+	if mi.InstanceInfo != nil {
+		m.MigProfile = mi.InstanceInfo.ProfileName
+		m.GPUInstanceID = fmt.Sprintf("%d", mi.InstanceInfo.Info.NvmlInstanceId)
+	} else {
+		m.MigProfile = ""
+		m.GPUInstanceID = ""
+	}
+	return m
+}
+
+func (c *baseExpCollector) getLabelsFromCounters(mi devicemonitoring.Info, labels map[string]string) error {
+	latestValues, err := dcgmprovider.Client().EntityGetLatestValues(mi.Entity.EntityGroupId, mi.Entity.EntityId,
+		c.deviceWatchList.LabelDeviceFields())
+	if err != nil {
+		return err
+	}
+	// Extract Labels
+	for _, val := range latestValues {
+		v := toString(val)
+		// Filter out counters with no value and ignored fields for this entity
+		if v == skipDCGMValue {
+			continue
+		}
+
+		counter, err := findCounterField(c.labelsCounters, val.FieldId)
+		if err != nil {
+			continue
+		}
+
+		if counter.IsLabel() {
+			labels[counter.FieldName] = v
+			continue
+		}
+	}
+	return nil
+}
+
+func (c *baseExpCollector) Cleanup() {
+	for _, cleanup := range c.cleanups {
+		cleanup()
+	}
+}
diff --git a/pkg/dcgmexporter/clock_events_collector.go b/internal/pkg/collector/clock_events_collector.go
similarity index 77%
rename from pkg/dcgmexporter/clock_events_collector.go
rename to internal/pkg/collector/clock_events_collector.go
index 31eb0ff6..827c63c5 100644
--- a/pkg/dcgmexporter/clock_events_collector.go
+++ b/internal/pkg/collector/clock_events_collector.go
@@ -14,21 +14,25 @@
  * limitations under the License.
  */
 
-package dcgmexporter
+package collector
 
 import (
 	"fmt"
+	"log/slog"
 	"slices"
 
 	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
-	"github.com/sirupsen/logrus"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatchlistmanager"
 )
 
 // IsDCGMExpClockEventsCountEnabled checks if the DCGM_EXP_CLOCK_EVENTS_COUNT counter exists
-func IsDCGMExpClockEventsCountEnabled(counters []Counter) bool {
-	return slices.ContainsFunc(counters,
-		func(c Counter) bool {
-			return c.FieldName == dcgmExpClockEventsCount
+func IsDCGMExpClockEventsCountEnabled(counterList counters.CounterList) bool {
+	return slices.ContainsFunc(counterList,
+		func(c counters.Counter) bool {
+			return c.FieldName == counters.DCGMExpClockEventsCount
 		})
 }
 
@@ -50,7 +54,7 @@ const (
 	DCGM_CLOCKS_THROTTLE_REASON_HW_SLOWDOWN clockEventBitmask = 0x0000000000000008
 	// DCGM_CLOCKS_THROTTLE_REASON_SYNC_BOOST Sync Boost
 	DCGM_CLOCKS_THROTTLE_REASON_SYNC_BOOST clockEventBitmask = 0x0000000000000010
-	//SW Thermal Slowdown
+	// SW Thermal Slowdown
 	DCGM_CLOCKS_THROTTLE_REASON_SW_THERMAL clockEventBitmask = 0x0000000000000020
 	// DCGM_CLOCKS_THROTTLE_REASON_HW_THERMAL HW Thermal Slowdown (reducing the core clocks by a factor of 2 or more) is engaged
 	DCGM_CLOCKS_THROTTLE_REASON_HW_THERMAL clockEventBitmask = 0x0000000000000040
@@ -82,26 +86,33 @@ func (c *clockEventsCollector) GetMetrics() (MetricsByCounter, error) {
 	return c.expCollector.getMetrics()
 }
 
-func NewClockEventsCollector(counters []Counter,
+func NewClockEventsCollector(
+	counterList counters.CounterList,
 	hostname string,
-	config *Config,
-	fieldEntityGroupTypeSystemInfo FieldEntityGroupTypeSystemInfoItem) (Collector, error) {
-	if !IsDCGMExpClockEventsCountEnabled(counters) {
-		logrus.Error(dcgmExpClockEventsCount + " collector is disabled")
-		return nil, fmt.Errorf(dcgmExpClockEventsCount + " collector is disabled")
+	config *appconfig.Config,
+	deviceWatchList devicewatchlistmanager.WatchList,
+) (Collector, error) {
+	if !IsDCGMExpClockEventsCountEnabled(counterList) {
+		slog.Error(counters.DCGMExpClockEventsCount + " collector is disabled")
+		return nil, fmt.Errorf(counters.DCGMExpClockEventsCount + " collector is disabled")
 	}
 
 	collector := clockEventsCollector{}
-	collector.expCollector = newExpCollector(
-		counters,
+	var err error
+	deviceWatchList.SetDeviceFields([]dcgm.Short{dcgm.DCGM_FI_DEV_CLOCKS_EVENT_REASONS})
+
+	collector.expCollector, err = newExpCollector(
+		counterList.LabelCounters(),
 		hostname,
-		[]dcgm.Short{dcgm.DCGM_FI_DEV_CLOCK_THROTTLE_REASONS},
 		config,
-		fieldEntityGroupTypeSystemInfo,
+		deviceWatchList,
 	)
+	if err != nil {
+		return nil, err
+	}
 
-	collector.counter = counters[slices.IndexFunc(counters, func(c Counter) bool {
-		return c.FieldName == dcgmExpClockEventsCount
+	collector.counter = counterList[slices.IndexFunc(counterList, func(c counters.Counter) bool {
+		return c.FieldName == counters.DCGMExpClockEventsCount
 	})]
 
 	collector.labelFiller = func(metricValueLabels map[string]string, entityValue int64) {
diff --git a/internal/pkg/collector/clock_events_collector_test.go b/internal/pkg/collector/clock_events_collector_test.go
new file mode 100644
index 00000000..a5eaa417
--- /dev/null
+++ b/internal/pkg/collector/clock_events_collector_test.go
@@ -0,0 +1,798 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package collector
+
+import (
+	"fmt"
+	"slices"
+	"testing"
+	"time"
+
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+	"github.com/stretchr/testify/assert"
+	"go.uber.org/mock/gomock"
+
+	mockdcgm "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/dcgmprovider"
+	mockdevicewatcher "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/devicewatcher"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/dcgmprovider"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/deviceinfo"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatchlistmanager"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/testutils"
+)
+
+const invalidClockEventValue = 10000
+
+func TestIsDCGMExpClockEventsCountEnabled(t *testing.T) {
+	tests := []struct {
+		name string
+		arg  counters.CounterList
+		want bool
+	}{
+		{
+			name: "empty",
+			arg:  counters.CounterList{},
+			want: false,
+		},
+		{
+			name: "counter event count disabled",
+			arg: counters.CounterList{
+				counters.Counter{
+					FieldID:   1,
+					FieldName: "random1",
+				},
+				counters.Counter{
+					FieldID:   2,
+					FieldName: "random2",
+				},
+			},
+			want: false,
+		},
+		{
+			name: "counter event count enabled",
+			arg: counters.CounterList{
+				counters.Counter{
+					FieldID:   1,
+					FieldName: counters.DCGMExpClockEventsCount,
+				},
+				counters.Counter{
+					FieldID:   2,
+					FieldName: "random2",
+				},
+			},
+			want: true,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.Equalf(t, tt.want, IsDCGMExpClockEventsCountEnabled(tt.arg), "unexpected response")
+		})
+	}
+}
+
+func TestNewClockEventsCollector(t *testing.T) {
+	ctrl := gomock.NewController(t)
+	mockDeviceWatcher := mockdevicewatcher.NewMockWatcher(ctrl)
+
+	sampleDeviceInfo := &deviceinfo.Info{}
+	sampleDeviceFields := []dcgm.Short{42}
+	sampleCollectorInterval := int64(1)
+	sampleConfig := appconfig.Config{}
+	sampleHostname := "localhost"
+	var sampleCleanups []func()
+
+	sampleDCGMExpClockEventsCounter := counters.Counter{
+		FieldID:   1,
+		FieldName: counters.DCGMExpClockEventsCount,
+	}
+
+	sampleOtherCounter := counters.Counter{
+		FieldID:   2,
+		FieldName: "random2",
+	}
+
+	sampleLabelCounter := counters.Counter{
+		FieldID:   3,
+		FieldName: "random2",
+		PromType:  "label",
+	}
+
+	type args struct {
+		counterList     counters.CounterList
+		hostname        string
+		config          *appconfig.Config
+		deviceWatchList *devicewatchlistmanager.WatchList
+	}
+	tests := []struct {
+		name       string
+		args       args
+		conditions func(watcher *mockdevicewatcher.MockWatcher)
+		want       func(string, *appconfig.Config, devicewatchlistmanager.WatchList) Collector
+		wantErr    bool
+	}{
+		{
+			name: "counter is disabled ",
+			args: args{
+				counterList:     counters.CounterList{},
+				hostname:        sampleHostname,
+				config:          nil,
+				deviceWatchList: &devicewatchlistmanager.WatchList{},
+			},
+			conditions: func(watcher *mockdevicewatcher.MockWatcher) {},
+			want: func(
+				_ string, _ *appconfig.Config,
+				_ devicewatchlistmanager.WatchList,
+			) Collector {
+				return nil
+			},
+			wantErr: true,
+		},
+		{
+			name: "new clock events collector watcher fails",
+			args: args{
+				counterList: counters.CounterList{
+					sampleDCGMExpClockEventsCounter,
+					sampleOtherCounter,
+					sampleLabelCounter,
+				},
+				hostname: sampleHostname,
+				config:   &sampleConfig,
+				deviceWatchList: devicewatchlistmanager.NewWatchList(sampleDeviceInfo, sampleDeviceFields, nil,
+					mockDeviceWatcher, sampleCollectorInterval),
+			},
+			conditions: func(watcher *mockdevicewatcher.MockWatcher) {
+				watcher.EXPECT().WatchDeviceFields(gomock.Any(), gomock.Any(),
+					gomock.Any()).Return(nil,
+					dcgm.FieldHandle{},
+					sampleCleanups, fmt.Errorf("some error"))
+			},
+			want: func(
+				_ string, _ *appconfig.Config,
+				_ devicewatchlistmanager.WatchList,
+			) Collector {
+				return nil
+			},
+			wantErr: true,
+		},
+		{
+			name: "new clock events collector ",
+			args: args{
+				counterList: counters.CounterList{
+					sampleDCGMExpClockEventsCounter,
+					sampleOtherCounter,
+					sampleLabelCounter,
+				},
+				hostname: sampleHostname,
+				config:   &sampleConfig,
+				deviceWatchList: devicewatchlistmanager.NewWatchList(sampleDeviceInfo, sampleDeviceFields, nil,
+					mockDeviceWatcher, sampleCollectorInterval),
+			},
+			conditions: func(watcher *mockdevicewatcher.MockWatcher) {
+				watcher.EXPECT().WatchDeviceFields(gomock.Any(), gomock.Any(),
+					gomock.Any()).Return(nil,
+					dcgm.FieldHandle{},
+					sampleCleanups, nil)
+			},
+			want: func(
+				hostname string, config *appconfig.Config,
+				deviceWatchList devicewatchlistmanager.WatchList,
+			) Collector {
+				deviceWatchList.SetDeviceFields([]dcgm.Short{dcgm.DCGM_FI_DEV_CLOCKS_EVENT_REASONS})
+				return &clockEventsCollector{
+					expCollector{
+						baseExpCollector: baseExpCollector{
+							deviceWatchList: deviceWatchList,
+							counter:         sampleDCGMExpClockEventsCounter,
+							labelsCounters:  []counters.Counter{sampleLabelCounter},
+							hostname:        hostname,
+							config:          config,
+							cleanups:        sampleCleanups,
+						},
+						windowSize: config.ClockEventsCountWindowSize,
+					},
+				}
+			},
+			wantErr: false,
+		},
+		{
+			name: "new clock events collector with no label counters",
+			args: args{
+				counterList: counters.CounterList{
+					sampleDCGMExpClockEventsCounter,
+					sampleOtherCounter,
+				},
+				hostname: sampleHostname,
+				config:   &sampleConfig,
+				deviceWatchList: devicewatchlistmanager.NewWatchList(sampleDeviceInfo, sampleDeviceFields, nil,
+					mockDeviceWatcher, sampleCollectorInterval),
+			},
+			conditions: func(watcher *mockdevicewatcher.MockWatcher) {
+				watcher.EXPECT().WatchDeviceFields(gomock.Any(), gomock.Any(),
+					gomock.Any()).Return(nil,
+					dcgm.FieldHandle{},
+					sampleCleanups, nil)
+			},
+			want: func(
+				hostname string, config *appconfig.Config,
+				deviceWatchList devicewatchlistmanager.WatchList,
+			) Collector {
+				deviceWatchList.SetDeviceFields([]dcgm.Short{dcgm.DCGM_FI_DEV_CLOCKS_EVENT_REASONS})
+				return &clockEventsCollector{
+					expCollector{
+						baseExpCollector: baseExpCollector{
+							deviceWatchList: deviceWatchList,
+							counter:         sampleDCGMExpClockEventsCounter,
+							labelsCounters:  nil,
+							hostname:        hostname,
+							config:          config,
+							cleanups:        sampleCleanups,
+						},
+						windowSize: config.ClockEventsCountWindowSize,
+					},
+				}
+			},
+			wantErr: false,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			tt.conditions(mockDeviceWatcher)
+
+			got, err := NewClockEventsCollector(tt.args.counterList, tt.args.hostname, tt.args.config,
+				*tt.args.deviceWatchList)
+			want := tt.want(tt.args.hostname, tt.args.config, *tt.args.deviceWatchList)
+
+			if !tt.wantErr {
+				assert.NoError(t, err, "unexpected error")
+
+				wantAttrs := testutils.GetFields(&want.(*clockEventsCollector).expCollector, testutils.Fields)
+				gotAttrs := testutils.GetFields(&got.(*clockEventsCollector).expCollector, testutils.Fields)
+				assert.Equal(t, wantAttrs, gotAttrs, "unexpected result")
+
+				gotFuncAttrs := testutils.GetFields(&got.(*clockEventsCollector).expCollector, testutils.Functions)
+				for functionName, value := range gotFuncAttrs {
+					assert.NotNilf(t, value, "unexpected %s to be not nil", functionName)
+				}
+			} else {
+				assert.Error(t, err, "expected error")
+				assert.Equal(t, want, got, "unexpected result")
+			}
+		})
+	}
+}
+
+func clockEventMetricsCreator(
+	counter counters.Counter, gpuID uint, value, hostname, mockFieldName,
+	mockFieldLabelValue string, mockClockEvent uint64, useOldNamespace bool,
+) Metric {
+	uuid := "UUID"
+	if useOldNamespace {
+		uuid = "uuid"
+	}
+
+	labels := map[string]string{
+		windowSizeInMSLabel: "0",
+		mockFieldName:       mockFieldLabelValue,
+	}
+
+	if mockClockEvent != invalidClockEventValue {
+		labels["clock_event"] = clockEventBitmask(mockClockEvent).String()
+	}
+
+	return Metric{
+		Counter:       counter,
+		Value:         value,
+		GPU:           fmt.Sprintf("%d", gpuID),
+		GPUUUID:       "",
+		GPUDevice:     fmt.Sprintf("nvidia%d", gpuID),
+		GPUModelName:  "",
+		UUID:          uuid,
+		MigProfile:    "",
+		GPUInstanceID: "",
+		Hostname:      hostname,
+		Labels:        labels,
+		Attributes:    map[string]string{},
+	}
+}
+
+func sortClockEventMetrics(metrics []Metric) {
+	slices.SortFunc(metrics, func(a, b Metric) int {
+		if a.GPU < b.GPU {
+			return -1
+		} else if a.GPU == b.GPU {
+			if a.Labels["clock_event"] < b.Labels["clock_event"] {
+				return -1
+			}
+		}
+		return 1
+	})
+}
+
+func Test_clockEventsCollector_GetMetrics(t *testing.T) {
+	/******* Mock DCGM *************/
+	ctrl := gomock.NewController(t)
+	mockDCGM := mockdcgm.NewMockDCGM(ctrl)
+	mockDeviceWatcher := mockdevicewatcher.NewMockWatcher(ctrl)
+
+	realDCGM := dcgmprovider.Client()
+	defer func() {
+		dcgmprovider.SetClient(realDCGM)
+	}()
+	dcgmprovider.SetClient(mockDCGM)
+
+	/******** Mock Counters ************/
+	mockDCGMExpClockEventsCounter := counters.Counter{
+		FieldID:   1,
+		FieldName: counters.DCGMExpClockEventsCount,
+	}
+
+	mockOtherCounter := counters.Counter{
+		FieldID:   2,
+		FieldName: "random2",
+	}
+
+	mockLabelDeviceField := dcgm.Short(3)
+	mockFieldName := "random3"
+	mockLabelValue := "this is mock label"
+	mockLabelCounter := counters.Counter{
+		FieldID:   mockLabelDeviceField,
+		FieldName: mockFieldName,
+		PromType:  "label",
+	}
+
+	/******** Mock Device Info *********/
+	gOpts := appconfig.DeviceOptions{
+		Flex: true,
+	}
+
+	mockGPUDeviceInfo := testutils.MockGPUDeviceInfo(ctrl, 2, nil)
+	mockGPUDeviceInfo.EXPECT().GOpts().Return(gOpts).AnyTimes()
+
+	/******** Other Mock Inputs ************/
+	gpuID1 := uint(0)
+	gpuID2 := uint(1)
+
+	mockDeviceFields := []dcgm.Short{42}
+	mockCollectorInterval := int64(1)
+	mockConfig := appconfig.Config{}
+	mockHostname := "localhost"
+	cleanupCalled := 0
+	mockCleanups := []func(){
+		func() {
+			cleanupCalled++
+		},
+	}
+
+	mockGroupHandle1 := dcgm.GroupHandle{}
+	mockGroupHandle1.SetHandle(uintptr(1))
+
+	mockGroupHandle2 := dcgm.GroupHandle{}
+	mockGroupHandle2.SetHandle(uintptr(2))
+
+	mockFieldGroupHandle := dcgm.FieldHandle{}
+	mockFieldGroupHandle.SetHandle(uintptr(1))
+
+	mockLatestValues := []dcgm.FieldValue_v1{
+		{
+			FieldId:   150,
+			FieldType: dcgm.DCGM_FT_INT64,
+			Value:     [4096]byte{42},
+		},
+		{
+			FieldId:   uint(mockLabelDeviceField),
+			FieldType: dcgm.DCGM_FT_STRING,
+			Value:     testutils.StrToByteArray(mockLabelValue),
+		},
+		{
+			FieldId:   uint(mockLabelDeviceField),
+			FieldType: dcgm.DCGM_FT_STRING,
+			Value:     testutils.StrToByteArray(dcgm.DCGM_FT_STR_NOT_FOUND),
+		},
+	}
+
+	tests := []struct {
+		name       string
+		collector  func() Collector
+		conditions func(*mockdevicewatcher.MockWatcher, byte, byte)
+		want       func() (MetricsByCounter, byte, byte)
+		wantErr    bool
+	}{
+		{
+			name: "clock events collector with single clock events",
+			collector: func() Collector {
+				counterList := counters.CounterList{
+					mockDCGMExpClockEventsCounter,
+					mockOtherCounter,
+					mockLabelCounter,
+				}
+				sampleConfig := appconfig.Config{UseOldNamespace: true}
+				deviceWatchList := devicewatchlistmanager.NewWatchList(mockGPUDeviceInfo, mockDeviceFields,
+					[]dcgm.Short{mockLabelDeviceField}, mockDeviceWatcher, mockCollectorInterval)
+
+				collector, _ := NewClockEventsCollector(counterList, mockHostname, &sampleConfig, *deviceWatchList)
+				return collector
+			},
+			conditions: func(watcher *mockdevicewatcher.MockWatcher, gpu1Value, gpu2Value byte) {
+				mockEntitiesResult := []dcgm.FieldValue_v2{
+					{EntityId: gpuID1, Value: [4096]byte{gpu1Value}},
+					{EntityId: gpuID2, Value: [4096]byte{gpu2Value}},
+				}
+
+				watcher.EXPECT().WatchDeviceFields(gomock.Any(), gomock.Any(),
+					gomock.Any()).Return([]dcgm.GroupHandle{mockGroupHandle1},
+					mockFieldGroupHandle,
+					mockCleanups, nil)
+
+				mockDCGM.EXPECT().UpdateAllFields().Return(nil)
+				mockDCGM.EXPECT().GetValuesSince(mockGroupHandle1, mockFieldGroupHandle,
+					gomock.AssignableToTypeOf(time.Time{})).Return(mockEntitiesResult, time.Time{}, nil)
+				mockDCGM.EXPECT().EntityGetLatestValues(dcgm.FE_GPU, gpuID1,
+					[]dcgm.Short{mockLabelDeviceField}).Return(mockLatestValues, nil)
+				mockDCGM.EXPECT().EntityGetLatestValues(dcgm.FE_GPU, gpuID2,
+					[]dcgm.Short{mockLabelDeviceField}).Return(mockLatestValues, nil)
+			},
+			want: func() (MetricsByCounter, byte, byte) {
+				mockClockOutput11 := uint64(DCGM_CLOCKS_THROTTLE_REASON_SW_POWER_CAP)
+				mockClockOutput12 := uint64(DCGM_CLOCKS_THROTTLE_REASON_SW_THERMAL)
+
+				mockClockOutput21 := uint64(DCGM_CLOCKS_THROTTLE_REASON_HW_POWER_BRAKE)
+				mockClockOutput22 := uint64(DCGM_CLOCKS_THROTTLE_REASON_HW_THERMAL)
+
+				return MetricsByCounter{
+					mockDCGMExpClockEventsCounter: []Metric{
+						clockEventMetricsCreator(mockDCGMExpClockEventsCounter, gpuID1, "1", mockHostname,
+							mockFieldName,
+							mockLabelValue, mockClockOutput11, true),
+						clockEventMetricsCreator(mockDCGMExpClockEventsCounter, gpuID1, "1", mockHostname,
+							mockFieldName,
+							mockLabelValue, mockClockOutput12, true),
+						clockEventMetricsCreator(mockDCGMExpClockEventsCounter, gpuID2, "1", mockHostname,
+							mockFieldName,
+							mockLabelValue, mockClockOutput21, true),
+						clockEventMetricsCreator(mockDCGMExpClockEventsCounter, gpuID2, "1", mockHostname,
+							mockFieldName,
+							mockLabelValue, mockClockOutput22, true),
+					},
+				}, byte(mockClockOutput11 + mockClockOutput12), byte(mockClockOutput21 + mockClockOutput22)
+			},
+			wantErr: false,
+		},
+		{
+			name: "extra values from GPUs that are not monitored",
+			collector: func() Collector {
+				counterList := counters.CounterList{
+					mockDCGMExpClockEventsCounter,
+					mockOtherCounter,
+					mockLabelCounter,
+				}
+				deviceWatchList := devicewatchlistmanager.NewWatchList(mockGPUDeviceInfo, mockDeviceFields,
+					[]dcgm.Short{mockLabelDeviceField}, mockDeviceWatcher, mockCollectorInterval)
+
+				collector, _ := NewClockEventsCollector(counterList, mockHostname, &mockConfig, *deviceWatchList)
+				return collector
+			},
+			conditions: func(watcher *mockdevicewatcher.MockWatcher, gpu1Value, gpu2Value byte) {
+				mockEntitiesResult := []dcgm.FieldValue_v2{
+					{EntityId: gpuID1, Value: [4096]byte{gpu1Value}},
+					{EntityId: gpuID2, Value: [4096]byte{gpu2Value}},
+					{EntityId: uint(2), Value: [4096]byte{gpu2Value}},
+				}
+
+				watcher.EXPECT().WatchDeviceFields(gomock.Any(), gomock.Any(),
+					gomock.Any()).Return([]dcgm.GroupHandle{mockGroupHandle1},
+					mockFieldGroupHandle,
+					mockCleanups, nil)
+
+				mockDCGM.EXPECT().UpdateAllFields().Return(nil)
+				mockDCGM.EXPECT().GetValuesSince(mockGroupHandle1, mockFieldGroupHandle,
+					gomock.AssignableToTypeOf(time.Time{})).Return(mockEntitiesResult, time.Time{}, nil)
+				mockDCGM.EXPECT().EntityGetLatestValues(dcgm.FE_GPU, gpuID1,
+					[]dcgm.Short{mockLabelDeviceField}).Return(mockLatestValues, nil)
+				mockDCGM.EXPECT().EntityGetLatestValues(dcgm.FE_GPU, gpuID2,
+					[]dcgm.Short{mockLabelDeviceField}).Return(mockLatestValues, nil)
+			},
+			want: func() (MetricsByCounter, byte, byte) {
+				mockClockOutput11 := uint64(DCGM_CLOCKS_THROTTLE_REASON_SW_POWER_CAP)
+				mockClockOutput12 := uint64(DCGM_CLOCKS_THROTTLE_REASON_SW_THERMAL)
+
+				mockClockOutput21 := uint64(DCGM_CLOCKS_THROTTLE_REASON_HW_POWER_BRAKE)
+				mockClockOutput22 := uint64(DCGM_CLOCKS_THROTTLE_REASON_HW_THERMAL)
+
+				return MetricsByCounter{
+					mockDCGMExpClockEventsCounter: []Metric{
+						clockEventMetricsCreator(mockDCGMExpClockEventsCounter, gpuID1, "1", mockHostname,
+							mockFieldName,
+							mockLabelValue, mockClockOutput11, false),
+						clockEventMetricsCreator(mockDCGMExpClockEventsCounter, gpuID1, "1", mockHostname,
+							mockFieldName,
+							mockLabelValue, mockClockOutput12, false),
+						clockEventMetricsCreator(mockDCGMExpClockEventsCounter, gpuID2, "1", mockHostname,
+							mockFieldName,
+							mockLabelValue, mockClockOutput21, false),
+						clockEventMetricsCreator(mockDCGMExpClockEventsCounter, gpuID2, "1", mockHostname,
+							mockFieldName,
+							mockLabelValue, mockClockOutput22, false),
+					},
+				}, byte(mockClockOutput11 + mockClockOutput12), byte(mockClockOutput21 + mockClockOutput22)
+			},
+			wantErr: false,
+		},
+		{
+			name: "missing values for a GPU that is monitored",
+			collector: func() Collector {
+				counterList := counters.CounterList{
+					mockDCGMExpClockEventsCounter,
+					mockOtherCounter,
+					mockLabelCounter,
+				}
+
+				gpuInstanceInfos := make(map[int][]deviceinfo.GPUInstanceInfo)
+				gpuInstanceInfos[3] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo2}
+
+				mockGPUDeviceInfoTemp := testutils.MockGPUDeviceInfo(ctrl, 4, gpuInstanceInfos)
+				mockGPUDeviceInfoTemp.EXPECT().GOpts().Return(gOpts).AnyTimes()
+
+				deviceWatchList := devicewatchlistmanager.NewWatchList(mockGPUDeviceInfoTemp, mockDeviceFields,
+					[]dcgm.Short{mockLabelDeviceField}, mockDeviceWatcher, mockCollectorInterval)
+
+				collector, _ := NewClockEventsCollector(counterList, mockHostname, &mockConfig, *deviceWatchList)
+				return collector
+			},
+			conditions: func(watcher *mockdevicewatcher.MockWatcher, gpu1Value, gpu2Value byte) {
+				mockEntitiesResult := []dcgm.FieldValue_v2{
+					{EntityId: gpuID1, Value: [4096]byte{gpu1Value}},
+					{EntityId: gpuID2, Value: [4096]byte{gpu2Value}},
+				}
+
+				watcher.EXPECT().WatchDeviceFields(gomock.Any(), gomock.Any(),
+					gomock.Any()).Return([]dcgm.GroupHandle{mockGroupHandle1},
+					mockFieldGroupHandle,
+					mockCleanups, nil)
+
+				mockDCGM.EXPECT().UpdateAllFields().Return(nil)
+				mockDCGM.EXPECT().GetValuesSince(mockGroupHandle1, mockFieldGroupHandle,
+					gomock.AssignableToTypeOf(time.Time{})).Return(mockEntitiesResult, time.Time{}, nil)
+				mockDCGM.EXPECT().EntityGetLatestValues(dcgm.FE_GPU, gpuID1,
+					[]dcgm.Short{mockLabelDeviceField}).Return(mockLatestValues, nil)
+				mockDCGM.EXPECT().EntityGetLatestValues(dcgm.FE_GPU, gpuID2,
+					[]dcgm.Short{mockLabelDeviceField}).Return(mockLatestValues, nil)
+				mockDCGM.EXPECT().EntityGetLatestValues(dcgm.FE_GPU, uint(2),
+					[]dcgm.Short{mockLabelDeviceField}).Return(mockLatestValues, nil)
+				mockDCGM.EXPECT().EntityGetLatestValues(dcgm.FE_GPU_I, uint(14),
+					[]dcgm.Short{mockLabelDeviceField}).Return(mockLatestValues, nil)
+			},
+			want: func() (MetricsByCounter, byte, byte) {
+				mockClockOutput11 := uint64(DCGM_CLOCKS_THROTTLE_REASON_SW_POWER_CAP)
+				mockClockOutput12 := uint64(DCGM_CLOCKS_THROTTLE_REASON_SW_THERMAL)
+
+				mockClockOutput21 := uint64(DCGM_CLOCKS_THROTTLE_REASON_HW_POWER_BRAKE)
+				mockClockOutput22 := uint64(DCGM_CLOCKS_THROTTLE_REASON_HW_THERMAL)
+
+				migClockEvent := clockEventMetricsCreator(mockDCGMExpClockEventsCounter, uint(3), "0", mockHostname,
+					mockFieldName,
+					mockLabelValue, invalidClockEventValue, false)
+				migClockEvent.MigProfile = testutils.MockGPUInstanceInfo2.ProfileName
+				migClockEvent.GPUInstanceID = fmt.Sprintf("%d", testutils.MockGPUInstanceInfo2.Info.NvmlInstanceId)
+
+				return MetricsByCounter{
+					mockDCGMExpClockEventsCounter: []Metric{
+						clockEventMetricsCreator(mockDCGMExpClockEventsCounter, gpuID1, "1", mockHostname,
+							mockFieldName,
+							mockLabelValue, mockClockOutput11, false),
+						clockEventMetricsCreator(mockDCGMExpClockEventsCounter, gpuID1, "1", mockHostname,
+							mockFieldName,
+							mockLabelValue, mockClockOutput12, false),
+						clockEventMetricsCreator(mockDCGMExpClockEventsCounter, gpuID2, "1", mockHostname,
+							mockFieldName,
+							mockLabelValue, mockClockOutput21, false),
+						clockEventMetricsCreator(mockDCGMExpClockEventsCounter, gpuID2, "1", mockHostname,
+							mockFieldName,
+							mockLabelValue, mockClockOutput22, false),
+						clockEventMetricsCreator(mockDCGMExpClockEventsCounter, uint(2), "0", mockHostname,
+							mockFieldName,
+							mockLabelValue, invalidClockEventValue, false),
+						migClockEvent,
+					},
+				}, byte(mockClockOutput11 + mockClockOutput12), byte(mockClockOutput21 + mockClockOutput22)
+			},
+			wantErr: false,
+		},
+		{
+			name: "clock events collector with multiple clock events",
+			collector: func() Collector {
+				counterList := counters.CounterList{
+					mockDCGMExpClockEventsCounter,
+					mockOtherCounter,
+					mockLabelCounter,
+				}
+				deviceWatchList := devicewatchlistmanager.NewWatchList(mockGPUDeviceInfo, mockDeviceFields,
+					[]dcgm.Short{mockLabelDeviceField}, mockDeviceWatcher, mockCollectorInterval)
+
+				collector, _ := NewClockEventsCollector(counterList, mockHostname, &mockConfig, *deviceWatchList)
+				return collector
+			},
+			conditions: func(watcher *mockdevicewatcher.MockWatcher, gpu1Value, gpu2Value byte) {
+				mockEntitiesResult := []dcgm.FieldValue_v2{
+					{EntityId: gpuID1, Value: [4096]byte{gpu1Value}},
+					{EntityId: gpuID1, Value: [4096]byte{gpu1Value}},
+					{EntityId: gpuID1, Value: [4096]byte{gpu1Value}},
+					{EntityId: gpuID2, Value: [4096]byte{gpu2Value}},
+					{EntityId: gpuID2, Value: [4096]byte{gpu2Value}},
+				}
+
+				watcher.EXPECT().WatchDeviceFields(gomock.Any(), gomock.Any(),
+					gomock.Any()).Return([]dcgm.GroupHandle{mockGroupHandle1},
+					mockFieldGroupHandle,
+					mockCleanups, nil)
+
+				mockDCGM.EXPECT().UpdateAllFields().Return(nil)
+				mockDCGM.EXPECT().GetValuesSince(mockGroupHandle1, mockFieldGroupHandle,
+					gomock.AssignableToTypeOf(time.Time{})).Return(mockEntitiesResult, time.Time{}, nil)
+				mockDCGM.EXPECT().EntityGetLatestValues(dcgm.FE_GPU, gpuID1,
+					[]dcgm.Short{mockLabelDeviceField}).Return(mockLatestValues, nil)
+				mockDCGM.EXPECT().EntityGetLatestValues(dcgm.FE_GPU, gpuID2,
+					[]dcgm.Short{mockLabelDeviceField}).Return(mockLatestValues, nil)
+			},
+			want: func() (MetricsByCounter, byte, byte) {
+				mockClockOutput11 := uint64(DCGM_CLOCKS_THROTTLE_REASON_SW_POWER_CAP)
+				mockClockOutput12 := uint64(DCGM_CLOCKS_THROTTLE_REASON_SW_THERMAL)
+
+				mockClockOutput21 := uint64(DCGM_CLOCKS_THROTTLE_REASON_HW_POWER_BRAKE)
+				mockClockOutput22 := uint64(DCGM_CLOCKS_THROTTLE_REASON_HW_THERMAL)
+
+				return MetricsByCounter{
+					mockDCGMExpClockEventsCounter: []Metric{
+						clockEventMetricsCreator(mockDCGMExpClockEventsCounter, gpuID1, "3", mockHostname,
+							mockFieldName,
+							mockLabelValue, mockClockOutput11, false),
+						clockEventMetricsCreator(mockDCGMExpClockEventsCounter, gpuID1, "3", mockHostname,
+							mockFieldName,
+							mockLabelValue, mockClockOutput12, false),
+						clockEventMetricsCreator(mockDCGMExpClockEventsCounter, gpuID2, "2", mockHostname,
+							mockFieldName,
+							mockLabelValue, mockClockOutput21, false),
+						clockEventMetricsCreator(mockDCGMExpClockEventsCounter, gpuID2, "2", mockHostname,
+							mockFieldName,
+							mockLabelValue, mockClockOutput22, false),
+					},
+				}, byte(mockClockOutput11 + mockClockOutput12), byte(mockClockOutput21 + mockClockOutput22)
+			},
+			wantErr: false,
+		},
+		{
+			name: "clock events collector with UpdateAllFields() error",
+			collector: func() Collector {
+				counterList := counters.CounterList{
+					mockDCGMExpClockEventsCounter,
+					mockOtherCounter,
+					mockLabelCounter,
+				}
+				deviceWatchList := devicewatchlistmanager.NewWatchList(mockGPUDeviceInfo, mockDeviceFields,
+					[]dcgm.Short{mockLabelDeviceField}, mockDeviceWatcher, mockCollectorInterval)
+
+				collector, _ := NewClockEventsCollector(counterList, mockHostname, &mockConfig, *deviceWatchList)
+				return collector
+			},
+			conditions: func(watcher *mockdevicewatcher.MockWatcher, _, _ byte) {
+				watcher.EXPECT().WatchDeviceFields(gomock.Any(), gomock.Any(),
+					gomock.Any()).Return([]dcgm.GroupHandle{mockGroupHandle1},
+					mockFieldGroupHandle,
+					mockCleanups, nil)
+
+				mockDCGM.EXPECT().UpdateAllFields().Return(fmt.Errorf("some error"))
+			},
+			want: func() (MetricsByCounter, byte, byte) {
+				return nil, 0, 0
+			},
+			wantErr: true,
+		},
+		{
+			name: "clock events collector with GetValuesSince() error",
+			collector: func() Collector {
+				counterList := counters.CounterList{
+					mockDCGMExpClockEventsCounter,
+					mockOtherCounter,
+					mockLabelCounter,
+				}
+				deviceWatchList := devicewatchlistmanager.NewWatchList(mockGPUDeviceInfo, mockDeviceFields,
+					[]dcgm.Short{mockLabelDeviceField}, mockDeviceWatcher, mockCollectorInterval)
+
+				collector, _ := NewClockEventsCollector(counterList, mockHostname, &mockConfig, *deviceWatchList)
+				return collector
+			},
+			conditions: func(watcher *mockdevicewatcher.MockWatcher, _, _ byte) {
+				watcher.EXPECT().WatchDeviceFields(gomock.Any(), gomock.Any(),
+					gomock.Any()).Return([]dcgm.GroupHandle{mockGroupHandle1},
+					mockFieldGroupHandle,
+					mockCleanups, nil)
+
+				mockDCGM.EXPECT().UpdateAllFields().Return(nil)
+				mockDCGM.EXPECT().GetValuesSince(mockGroupHandle1, mockFieldGroupHandle,
+					gomock.AssignableToTypeOf(time.Time{})).Return([]dcgm.FieldValue_v2{}, time.Time{},
+					fmt.Errorf("some error"))
+			},
+			want: func() (MetricsByCounter, byte, byte) {
+				return nil, 0, 0
+			},
+			wantErr: true,
+		},
+		{
+			name: "clock events collector with EntityGetLatestValues() error",
+			collector: func() Collector {
+				counterList := counters.CounterList{
+					mockDCGMExpClockEventsCounter,
+					mockOtherCounter,
+					mockLabelCounter,
+				}
+				deviceWatchList := devicewatchlistmanager.NewWatchList(mockGPUDeviceInfo, mockDeviceFields,
+					[]dcgm.Short{mockLabelDeviceField}, mockDeviceWatcher, mockCollectorInterval)
+
+				collector, _ := NewClockEventsCollector(counterList, mockHostname, &mockConfig, *deviceWatchList)
+				return collector
+			},
+			conditions: func(watcher *mockdevicewatcher.MockWatcher, _, _ byte) {
+				watcher.EXPECT().WatchDeviceFields(gomock.Any(), gomock.Any(),
+					gomock.Any()).Return([]dcgm.GroupHandle{mockGroupHandle1},
+					mockFieldGroupHandle,
+					mockCleanups, nil)
+
+				mockDCGM.EXPECT().UpdateAllFields().Return(nil)
+				mockDCGM.EXPECT().GetValuesSince(mockGroupHandle1, mockFieldGroupHandle,
+					gomock.AssignableToTypeOf(time.Time{})).Return([]dcgm.FieldValue_v2{}, time.Time{}, nil)
+				mockDCGM.EXPECT().EntityGetLatestValues(dcgm.FE_GPU, gpuID1,
+					[]dcgm.Short{mockLabelDeviceField}).Return(mockLatestValues, fmt.Errorf("some error"))
+			},
+			want: func() (MetricsByCounter, byte, byte) {
+				return nil, 0, 0
+			},
+			wantErr: true,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			want, gpu1Value, gpu2Value := tt.want()
+			tt.conditions(mockDeviceWatcher, gpu1Value, gpu2Value)
+			c := tt.collector()
+			defer func() {
+				c.Cleanup()
+				assert.Equal(t, 1, cleanupCalled, "clean up function was not called")
+				cleanupCalled = 0 // reset to zero
+			}()
+
+			got, err := c.GetMetrics()
+
+			if !tt.wantErr {
+				assert.NoError(t, err, "GetMetrics() failed")
+				assert.NotEmpty(t, got, "GetMetrics() returned no metrics")
+
+				wantMetrics := want[mockDCGMExpClockEventsCounter]
+				gotMetrics := got[mockDCGMExpClockEventsCounter]
+
+				assert.Len(t, gotMetrics, len(wantMetrics), "GetMetrics() returned wrong number of metrics")
+
+				sortClockEventMetrics(wantMetrics)
+				sortClockEventMetrics(gotMetrics)
+
+				assert.Equalf(t, wantMetrics, gotMetrics, "GetMetrics()")
+			} else {
+				assert.Errorf(t, err, "GetMetrics() did not return expected error")
+				assert.Empty(t, got, "GetMetrics() returned unexpected metrics")
+			}
+		})
+	}
+}
diff --git a/internal/pkg/collector/collector_factory.go b/internal/pkg/collector/collector_factory.go
new file mode 100644
index 00000000..e26027d2
--- /dev/null
+++ b/internal/pkg/collector/collector_factory.go
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package collector
+
+import (
+	"fmt"
+	"log/slog"
+
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatchlistmanager"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/logging"
+)
+
+type Factory interface {
+	NewCollectors() []EntityCollectorTuple
+}
+
+type collectorFactory struct {
+	counterSet             *counters.CounterSet
+	deviceWatchListManager devicewatchlistmanager.Manager
+	hostname               string
+	config                 *appconfig.Config
+}
+
+func InitCollectorFactory(
+	counterSet *counters.CounterSet,
+	deviceWatchListManager devicewatchlistmanager.Manager,
+	hostname string,
+	config *appconfig.Config,
+) Factory {
+	return &collectorFactory{
+		counterSet:             counterSet,
+		deviceWatchListManager: deviceWatchListManager,
+		hostname:               hostname,
+		config:                 config,
+	}
+}
+
+func (cf *collectorFactory) NewCollectors() []EntityCollectorTuple {
+	slog.Debug("Counters are being initialized.",
+		slog.String(logging.DumpKey, fmt.Sprintf("%+v", cf.counterSet.DCGMCounters)))
+
+	entityCollectorTuples := make([]EntityCollectorTuple, 0)
+	entityTypes := []dcgm.Field_Entity_Group{
+		dcgm.FE_GPU,
+		dcgm.FE_SWITCH,
+		dcgm.FE_LINK,
+		dcgm.FE_CPU,
+		dcgm.FE_CPU_CORE,
+	}
+
+	for _, entityType := range entityTypes {
+		if len(cf.counterSet.DCGMCounters) > 0 {
+			entityWatchList, exists := cf.deviceWatchListManager.EntityWatchList(entityType)
+			if !exists || len(entityWatchList.DeviceFields()) == 0 {
+				continue
+			}
+
+			if dcgmCollector, err := cf.enableDCGMCollector(entityWatchList); err != nil {
+				slog.Error(fmt.Sprintf("DCGM collector for entity type '%s' cannot be initialized; err: %v",
+					entityType.String(), err))
+				os.Exit(1)
+			} else {
+				entityCollectorTuples = append(entityCollectorTuples, EntityCollectorTuple{
+					entity:    entityType,
+					collector: dcgmCollector,
+				})
+			}
+		}
+	}
+
+	if IsDCGMExpClockEventsCountEnabled(cf.counterSet.ExporterCounters) {
+		if newCollector, err := cf.enableExpCollector(counters.DCGMExpClockEventsCount); err != nil {
+			slog.Error(fmt.Sprintf("collector '%s' cannot be initialized; err: %v", counters.DCGMExpClockEventsCount, err))
+			os.Exit(1)
+		} else {
+			entityCollectorTuples = append(entityCollectorTuples, EntityCollectorTuple{
+				entity:    dcgm.FE_GPU,
+				collector: newCollector,
+			})
+		}
+	}
+
+	if IsDCGMExpXIDErrorsCountEnabled(cf.counterSet.ExporterCounters) {
+		if newCollector, err := cf.enableExpCollector(counters.DCGMExpXIDErrorsCount); err != nil {
+			slog.Error(fmt.Sprintf("collector '%s' cannot be initialized; err: %v", counters.DCGMExpXIDErrorsCount, err))
+			os.Exit(1)
+		} else {
+			entityCollectorTuples = append(entityCollectorTuples, EntityCollectorTuple{
+				entity:    dcgm.FE_GPU,
+				collector: newCollector,
+			})
+		}
+	}
+
+	if IsDCGMExpGPUHealthStatusEnabled(cf.counterSet.ExporterCounters) {
+		if newCollector, err := cf.enableExpCollector(counters.DCGMExpGPUHealthStatus); err != nil {
+			slog.Error(fmt.Sprintf("collector '%s' cannot be initialized; err: %v", counters.DCGMExpGPUHealthStatus, err))
+			os.Exit(1)
+		} else {
+			entityCollectorTuples = append(entityCollectorTuples, EntityCollectorTuple{
+				entity:    dcgm.FE_GPU,
+				collector: newCollector,
+			})
+		}
+	}
+
+	return entityCollectorTuples
+}
+
+func (cf *collectorFactory) enableDCGMCollector(entityWatchList devicewatchlistmanager.WatchList) (Collector, error,
+) {
+	newCollector, err := NewDCGMCollector(cf.counterSet.DCGMCounters, cf.hostname, cf.config,
+		entityWatchList)
+	if err != nil {
+		return nil, err
+	}
+
+	return newCollector, nil
+}
+
+func (cf *collectorFactory) enableExpCollector(expCollectorName string) (Collector, error) {
+	entityType := dcgm.FE_GPU
+
+	item, exists := cf.deviceWatchListManager.EntityWatchList(entityType)
+	if !exists {
+		return nil, fmt.Errorf("entity type '%s' does not exist", entityType.String())
+	}
+
+	var newCollector Collector
+	var err error
+	switch expCollectorName {
+	case counters.DCGMExpClockEventsCount:
+		newCollector, err = NewClockEventsCollector(cf.counterSet.ExporterCounters, cf.hostname, cf.config,
+			item)
+	case counters.DCGMExpXIDErrorsCount:
+		newCollector, err = NewXIDCollector(cf.counterSet.ExporterCounters, cf.hostname, cf.config,
+			item)
+	case counters.DCGMExpGPUHealthStatus:
+		newCollector, err = NewGPUHealthStatusCollector(cf.counterSet.ExporterCounters,
+			cf.hostname,
+			cf.config,
+			item,
+		)
+	default:
+		err = fmt.Errorf("invalid collector '%s'", expCollectorName)
+	}
+
+	if err != nil {
+		return nil, err
+	}
+
+	slog.Info(fmt.Sprintf("collector '%s' initialized", expCollectorName))
+	return newCollector, nil
+}
diff --git a/internal/pkg/collector/collector_factory_test.go b/internal/pkg/collector/collector_factory_test.go
new file mode 100644
index 00000000..1c533d87
--- /dev/null
+++ b/internal/pkg/collector/collector_factory_test.go
@@ -0,0 +1,580 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package collector
+
+import (
+	"errors"
+	"fmt"
+	"strings"
+	"testing"
+
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+	"github.com/stretchr/testify/require"
+	"go.uber.org/mock/gomock"
+
+	osmock "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/os"
+	osinterface "github.com/NVIDIA/dcgm-exporter/internal/pkg/os"
+
+	mockdcgm "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/dcgmprovider"
+	mockdeviceinfo "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/deviceinfo"
+	mockdevicewatchlistmanager "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/devicewatchlistmanager"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/dcgmprovider"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/deviceinfo"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatcher"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatchlistmanager"
+)
+
+var deviceWatcher = devicewatcher.NewDeviceWatcher()
+
+var mockGPU = deviceinfo.GPUInfo{
+	DeviceInfo: dcgm.Device{
+		GPU: uint(0),
+	},
+	GPUInstances: []deviceinfo.GPUInstanceInfo{},
+}
+
+func Test_collectorFactory_Register(t *testing.T) {
+	dcgmCounter := counters.Counter{
+		FieldID:   dcgm.DCGM_FI_DEV_GPU_TEMP,
+		FieldName: "DCGM_FI_DEV_GPU_TEMP",
+		PromType:  "gauge",
+		Help:      "",
+	}
+
+	ctrl := gomock.NewController(t)
+
+	mockDeviceInfo := mockdeviceinfo.NewMockProvider(ctrl)
+	mockDeviceInfo.EXPECT().InfoType().Return(dcgm.FE_NONE).AnyTimes()
+	mockDeviceInfo.EXPECT().GOpts().Return(appconfig.DeviceOptions{Flex: true}).AnyTimes()
+	mockDeviceInfo.EXPECT().GPUCount().Return(uint(1)).AnyTimes()
+	mockDeviceInfo.EXPECT().GPU(uint(0)).Return(mockGPU).AnyTimes()
+
+	defaultDeviceWatchList := *devicewatchlistmanager.NewWatchList(mockDeviceInfo, []dcgm.Short{42}, nil,
+		deviceWatcher, int64(1))
+
+	tests := []struct {
+		name                      string
+		cs                        *counters.CounterSet
+		getDeviceWatchListManager func() devicewatchlistmanager.Manager
+		hostname                  string
+		config                    *appconfig.Config
+		setupDCGMMock             func(*mockdcgm.MockDCGM)
+		assert                    func(*testing.T, []EntityCollectorTuple)
+		wantsPanic                bool
+	}{
+		{
+			name: fmt.Sprintf("Collector enabled for the %s", dcgm.FE_GPU.String()),
+			cs: &counters.CounterSet{
+				DCGMCounters: []counters.Counter{dcgmCounter},
+			},
+			getDeviceWatchListManager: func() devicewatchlistmanager.Manager {
+				mockDeviceWatchListManager := mockdevicewatchlistmanager.NewMockManager(ctrl)
+				mockDeviceWatchListManager.EXPECT().EntityWatchList(dcgm.FE_GPU).Return(defaultDeviceWatchList,
+					true)
+				mockDeviceWatchListManager.EXPECT().EntityWatchList(gomock.Any()).Return(devicewatchlistmanager.WatchList{},
+					false).AnyTimes()
+				return mockDeviceWatchListManager
+			},
+			hostname: "testhost",
+			config:   &appconfig.Config{},
+			setupDCGMMock: func(mockDCGM *mockdcgm.MockDCGM) {
+				mockGroupHandle := dcgm.GroupHandle{}
+				mockGroupHandle.SetHandle(uintptr(42))
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandle, nil).AnyTimes()
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandle, dcgm.FE_GPU,
+					mockGPU.DeviceInfo.GPU).Return(nil).AnyTimes()
+
+				mockFieldHandle := dcgm.FieldHandle{}
+				mockFieldHandle.SetHandle(uintptr(43))
+				mockDCGM.EXPECT().FieldGroupCreate(gomock.Any(), gomock.Eq([]dcgm.Short{42})).Return(
+					mockFieldHandle, nil).AnyTimes()
+
+				mockDCGM.EXPECT().WatchFieldsWithGroupEx(gomock.Eq(mockFieldHandle),
+					gomock.Eq(mockGroupHandle),
+					gomock.Any(),
+					gomock.Any(),
+					gomock.Any(),
+				).Return(nil).AnyTimes()
+			},
+			assert: func(t *testing.T, entityCollectorTuples []EntityCollectorTuple) {
+				require.Len(t, entityCollectorTuples, 1)
+				require.Equal(t, entityCollectorTuples[0].Entity(), dcgm.FE_GPU)
+				require.IsType(t, &DCGMCollector{}, entityCollectorTuples[0].Collector())
+			},
+		},
+		{
+			name: fmt.Sprintf("Collector enabled for the %s but DCGM returns error", dcgm.FE_GPU.String()),
+			cs: &counters.CounterSet{
+				DCGMCounters: []counters.Counter{dcgmCounter},
+			},
+			getDeviceWatchListManager: func() devicewatchlistmanager.Manager {
+				mockDeviceWatchListManager := mockdevicewatchlistmanager.NewMockManager(ctrl)
+				mockDeviceWatchListManager.EXPECT().EntityWatchList(dcgm.FE_GPU).Return(defaultDeviceWatchList,
+					true)
+				mockDeviceWatchListManager.EXPECT().EntityWatchList(gomock.Any()).Return(devicewatchlistmanager.WatchList{},
+					false).AnyTimes()
+				return mockDeviceWatchListManager
+			},
+			hostname: "testhost",
+			config:   &appconfig.Config{},
+			setupDCGMMock: func(mockDCGM *mockdcgm.MockDCGM) {
+				mockGroupHandle := dcgm.GroupHandle{}
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandle, errors.New("boom")).AnyTimes()
+			},
+			wantsPanic: true,
+		},
+		{
+			name: "DCGM_EXP_CLOCK_EVENTS_COUNT collector is enabled",
+			cs: &counters.CounterSet{
+				DCGMCounters: []counters.Counter{},
+				ExporterCounters: []counters.Counter{
+					{
+						FieldName: "DCGM_EXP_CLOCK_EVENTS_COUNT",
+					},
+				},
+			},
+			getDeviceWatchListManager: func() devicewatchlistmanager.Manager {
+				mockDeviceWatchListManager := mockdevicewatchlistmanager.NewMockManager(ctrl)
+				mockDeviceWatchListManager.EXPECT().EntityWatchList(dcgm.FE_GPU).Return(defaultDeviceWatchList,
+					true).AnyTimes()
+				return mockDeviceWatchListManager
+			},
+			hostname:      "testhost",
+			config:        &appconfig.Config{},
+			setupDCGMMock: setupDCGMMockForDCGMExpMetrics([]dcgm.Short{112}),
+			assert: func(t *testing.T, entityCollectorTuples []EntityCollectorTuple) {
+				require.Len(t, entityCollectorTuples, 1)
+				require.Equal(t, entityCollectorTuples[0].Entity(), dcgm.FE_GPU)
+				require.IsType(t, &clockEventsCollector{}, entityCollectorTuples[0].Collector())
+			},
+		},
+		{
+			name: "DCGM_EXP_CLOCK_EVENTS_COUNT collector can not be initialized",
+			cs: &counters.CounterSet{
+				DCGMCounters: []counters.Counter{},
+				ExporterCounters: []counters.Counter{
+					{
+						FieldName: "DCGM_EXP_CLOCK_EVENTS_COUNT",
+					},
+				},
+			},
+			getDeviceWatchListManager: func() devicewatchlistmanager.Manager {
+				mockDeviceWatchListManager := mockdevicewatchlistmanager.NewMockManager(ctrl)
+				mockDeviceWatchListManager.EXPECT().EntityWatchList(gomock.Any()).Return(devicewatchlistmanager.
+					WatchList{}, false).AnyTimes()
+				return mockDeviceWatchListManager
+			},
+			hostname:   "testhost",
+			config:     &appconfig.Config{},
+			wantsPanic: true,
+			assert: func(t *testing.T, entityCollectorTuples []EntityCollectorTuple) {
+				require.Len(t, entityCollectorTuples, 0)
+			},
+		},
+		{
+			name: "DCGM_EXP_CLOCK_EVENTS_COUNT collector can not be created by DCGM",
+			cs: &counters.CounterSet{
+				DCGMCounters: []counters.Counter{},
+				ExporterCounters: []counters.Counter{
+					{
+						FieldName: "DCGM_EXP_CLOCK_EVENTS_COUNT",
+					},
+				},
+			},
+			getDeviceWatchListManager: func() devicewatchlistmanager.Manager {
+				mockDeviceWatchListManager := mockdevicewatchlistmanager.NewMockManager(ctrl)
+				mockDeviceWatchListManager.EXPECT().EntityWatchList(dcgm.FE_GPU).Return(defaultDeviceWatchList,
+					true)
+				mockDeviceWatchListManager.EXPECT().EntityWatchList(gomock.Any()).Return(devicewatchlistmanager.WatchList{},
+					false).AnyTimes()
+				return mockDeviceWatchListManager
+			},
+			setupDCGMMock: func(mockDCGM *mockdcgm.MockDCGM) {
+				mockGroupHandle := dcgm.GroupHandle{}
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandle, errors.New("boom")).AnyTimes()
+			},
+			hostname:   "testhost",
+			config:     &appconfig.Config{},
+			wantsPanic: true,
+		},
+		{
+			name: "DCGM_EXP_XID_ERRORS_COUNT collector is enabled",
+			cs: &counters.CounterSet{
+				DCGMCounters: []counters.Counter{},
+				ExporterCounters: []counters.Counter{
+					{
+						FieldName: "DCGM_EXP_XID_ERRORS_COUNT",
+					},
+				},
+			},
+			getDeviceWatchListManager: func() devicewatchlistmanager.Manager {
+				mockDeviceWatchListManager := mockdevicewatchlistmanager.NewMockManager(ctrl)
+				mockDeviceWatchListManager.EXPECT().EntityWatchList(dcgm.FE_GPU).Return(defaultDeviceWatchList,
+					true)
+				mockDeviceWatchListManager.EXPECT().EntityWatchList(gomock.Any()).Return(devicewatchlistmanager.WatchList{},
+					false).AnyTimes()
+				return mockDeviceWatchListManager
+			},
+			hostname:      "testhost",
+			config:        &appconfig.Config{},
+			setupDCGMMock: setupDCGMMockForDCGMExpMetrics([]dcgm.Short{230}),
+			assert: func(t *testing.T, entityCollectorTuples []EntityCollectorTuple) {
+				require.Len(t, entityCollectorTuples, 1)
+				require.Equal(t, entityCollectorTuples[0].Entity(), dcgm.FE_GPU)
+				require.IsType(t, &xidCollector{}, entityCollectorTuples[0].Collector())
+			},
+		},
+		{
+			name: "DCGM_EXP_XID_ERRORS_COUNT collector can not be initialized",
+			cs: &counters.CounterSet{
+				DCGMCounters: []counters.Counter{},
+				ExporterCounters: []counters.Counter{
+					{
+						FieldName: "DCGM_EXP_XID_ERRORS_COUNT",
+					},
+				},
+			},
+			getDeviceWatchListManager: func() devicewatchlistmanager.Manager {
+				mockDeviceWatchListManager := mockdevicewatchlistmanager.NewMockManager(ctrl)
+				mockDeviceWatchListManager.EXPECT().EntityWatchList(gomock.Any()).Return(devicewatchlistmanager.
+					WatchList{}, false).AnyTimes()
+				return mockDeviceWatchListManager
+			},
+			hostname:   "testhost",
+			config:     &appconfig.Config{},
+			wantsPanic: true,
+			assert: func(t *testing.T, entityCollectorTuples []EntityCollectorTuple) {
+				require.Len(t, entityCollectorTuples, 0)
+			},
+		},
+		{
+			name: "DCGM_EXP_XID_ERRORS_COUNT collector can not be created by DCGM",
+			cs: &counters.CounterSet{
+				DCGMCounters: []counters.Counter{},
+				ExporterCounters: []counters.Counter{
+					{
+						FieldName: "DCGM_EXP_XID_ERRORS_COUNT",
+					},
+				},
+			},
+			getDeviceWatchListManager: func() devicewatchlistmanager.Manager {
+				mockDeviceWatchListManager := mockdevicewatchlistmanager.NewMockManager(ctrl)
+				mockDeviceWatchListManager.EXPECT().EntityWatchList(dcgm.FE_GPU).Return(defaultDeviceWatchList,
+					true)
+				mockDeviceWatchListManager.EXPECT().EntityWatchList(gomock.Any()).Return(devicewatchlistmanager.WatchList{},
+					false).AnyTimes()
+				return mockDeviceWatchListManager
+			},
+			setupDCGMMock: func(mockDCGM *mockdcgm.MockDCGM) {
+				mockGroupHandle := dcgm.GroupHandle{}
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandle, errors.New("boom")).AnyTimes()
+			},
+			hostname:   "testhost",
+			config:     &appconfig.Config{},
+			wantsPanic: true,
+		},
+		{
+			name: "DCGM_EXP_GPU_HEALTH_STATUS collector is enabled",
+			cs: &counters.CounterSet{
+				DCGMCounters: []counters.Counter{},
+				ExporterCounters: []counters.Counter{
+					{
+						FieldName: "DCGM_EXP_GPU_HEALTH_STATUS",
+					},
+				},
+			},
+			getDeviceWatchListManager: func() devicewatchlistmanager.Manager {
+				mockDeviceWatchListManager := mockdevicewatchlistmanager.NewMockManager(ctrl)
+				mockDeviceWatchListManager.EXPECT().EntityWatchList(dcgm.FE_GPU).Return(defaultDeviceWatchList,
+					true)
+				return mockDeviceWatchListManager
+			},
+			hostname: "testhost",
+			config:   &appconfig.Config{},
+			setupDCGMMock: func(mockDCGM *mockdcgm.MockDCGM) {
+				mockDCGM.EXPECT().GetSupportedDevices().Return([]uint{0}, nil).AnyTimes()
+				mockDCGM.EXPECT().HealthSet(gomock.Any(), gomock.Any()).Return(nil).AnyTimes()
+				mockDCGM.EXPECT().GetAllDeviceCount().Return(uint(1), nil).AnyTimes()
+				mockDCGM.EXPECT().GetDeviceInfo(gomock.Eq(uint(0))).Return(dcgm.Device{}, nil).AnyTimes()
+				mockDCGM.EXPECT().GetGpuInstanceHierarchy().Return(dcgm.MigHierarchy_v2{}, nil).AnyTimes()
+				mockDCGM.EXPECT().FieldGroupCreate(gomock.Any(), gomock.Any()).Return(dcgm.FieldHandle{}, nil)
+				mockDCGM.EXPECT().WatchFieldsWithGroupEx(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).
+					Return(nil).AnyTimes()
+				setupDCGMMockForDCGMExpMetrics([]dcgm.Short{230})(mockDCGM)
+			},
+			assert: func(t *testing.T, entityCollectorTuples []EntityCollectorTuple) {
+				require.Len(t, entityCollectorTuples, 1)
+				require.Equal(t, entityCollectorTuples[0].Entity(), dcgm.FE_GPU)
+				require.IsType(t, &gpuHealthStatusCollector{}, entityCollectorTuples[0].Collector())
+			},
+		},
+		{
+			name: "DCGM_EXP_GPU_HEALTH_STATUS collector can not be initialized",
+			cs: &counters.CounterSet{
+				DCGMCounters: []counters.Counter{},
+				ExporterCounters: []counters.Counter{
+					{
+						FieldName: "DCGM_EXP_GPU_HEALTH_STATUS",
+					},
+				},
+			},
+			getDeviceWatchListManager: func() devicewatchlistmanager.Manager {
+				mockDeviceWatchListManager := mockdevicewatchlistmanager.NewMockManager(ctrl)
+				mockDeviceWatchListManager.EXPECT().EntityWatchList(dcgm.FE_GPU).Return(defaultDeviceWatchList,
+					true)
+				return mockDeviceWatchListManager
+			},
+			hostname: "testhost",
+			config:   &appconfig.Config{},
+			setupDCGMMock: func(mockDCGM *mockdcgm.MockDCGM) {
+				mockDCGM.EXPECT().GetSupportedDevices().Return([]uint{}, errors.New("boom!")).AnyTimes()
+			},
+			wantsPanic: true,
+		},
+		{
+			name: "DCGM_EXP_GPU_HEALTH_STATUS collector can not be initialized when zero supported devices",
+			cs: &counters.CounterSet{
+				DCGMCounters: []counters.Counter{},
+				ExporterCounters: []counters.Counter{
+					{
+						FieldName: "DCGM_EXP_GPU_HEALTH_STATUS",
+					},
+				},
+			},
+			getDeviceWatchListManager: func() devicewatchlistmanager.Manager {
+				mockDeviceWatchListManager := mockdevicewatchlistmanager.NewMockManager(ctrl)
+				mockDeviceWatchListManager.EXPECT().EntityWatchList(dcgm.FE_GPU).Return(defaultDeviceWatchList,
+					true)
+				return mockDeviceWatchListManager
+			},
+			hostname: "testhost",
+			config:   &appconfig.Config{},
+			setupDCGMMock: func(mockDCGM *mockdcgm.MockDCGM) {
+				mockDCGM.EXPECT().GetSupportedDevices().Return([]uint{}, nil)
+			},
+			wantsPanic: true,
+		},
+		{
+			name: "DCGM_EXP_GPU_HEALTH_STATUS collector can not be initialized when entity group can not be created",
+			cs: &counters.CounterSet{
+				DCGMCounters: []counters.Counter{},
+				ExporterCounters: []counters.Counter{
+					{
+						FieldName: "DCGM_EXP_GPU_HEALTH_STATUS",
+					},
+				},
+			},
+			getDeviceWatchListManager: func() devicewatchlistmanager.Manager {
+				mockDeviceWatchListManager := mockdevicewatchlistmanager.NewMockManager(ctrl)
+				mockDeviceWatchListManager.EXPECT().EntityWatchList(dcgm.FE_GPU).Return(defaultDeviceWatchList,
+					true)
+				return mockDeviceWatchListManager
+			},
+			hostname: "testhost",
+			config:   &appconfig.Config{},
+			setupDCGMMock: func(mockDCGM *mockdcgm.MockDCGM) {
+				mockDCGM.EXPECT().GetSupportedDevices().Return([]uint{0}, nil)
+				mockDCGM.EXPECT().CreateGroup(gomock.Cond(func(x any) bool {
+					return strings.HasPrefix(x.(string), "gpu_health_monitor_")
+				})).Return(dcgm.GroupHandle{}, errors.New("boom!"))
+			},
+			wantsPanic: true,
+		},
+		{
+			name: "DCGM_EXP_GPU_HEALTH_STATUS collector can not be initialized when entity can not be added to the group",
+			cs: &counters.CounterSet{
+				DCGMCounters: []counters.Counter{},
+				ExporterCounters: []counters.Counter{
+					{
+						FieldName: "DCGM_EXP_GPU_HEALTH_STATUS",
+					},
+				},
+			},
+			getDeviceWatchListManager: func() devicewatchlistmanager.Manager {
+				mockDeviceWatchListManager := mockdevicewatchlistmanager.NewMockManager(ctrl)
+				mockDeviceWatchListManager.EXPECT().EntityWatchList(dcgm.FE_GPU).Return(defaultDeviceWatchList,
+					true)
+				return mockDeviceWatchListManager
+			},
+			hostname: "testhost",
+			config:   &appconfig.Config{},
+			setupDCGMMock: func(mockDCGM *mockdcgm.MockDCGM) {
+				mockDCGM.EXPECT().GetSupportedDevices().Return([]uint{0}, nil)
+				mockDCGM.EXPECT().CreateGroup(gomock.Cond(func(x any) bool {
+					return strings.HasPrefix(x.(string), "gpu_health_monitor_")
+				})).Return(dcgm.GroupHandle{}, nil)
+				mockDCGM.EXPECT().AddEntityToGroup(gomock.Any(), gomock.Any(), gomock.Eq(uint(0))).Return(errors.New("boom!"))
+			},
+			wantsPanic: true,
+		},
+		{
+			name: "DCGM_EXP_GPU_HEALTH_STATUS collector can not be initialized when enable healthcheck returns an error",
+			cs: &counters.CounterSet{
+				DCGMCounters: []counters.Counter{},
+				ExporterCounters: []counters.Counter{
+					{
+						FieldName: "DCGM_EXP_GPU_HEALTH_STATUS",
+					},
+				},
+			},
+			getDeviceWatchListManager: func() devicewatchlistmanager.Manager {
+				mockDeviceWatchListManager := mockdevicewatchlistmanager.NewMockManager(ctrl)
+				mockDeviceWatchListManager.EXPECT().EntityWatchList(dcgm.FE_GPU).Return(defaultDeviceWatchList,
+					true)
+				return mockDeviceWatchListManager
+			},
+			hostname: "testhost",
+			config:   &appconfig.Config{},
+			setupDCGMMock: func(mockDCGM *mockdcgm.MockDCGM) {
+				mockDCGM.EXPECT().GetSupportedDevices().Return([]uint{0}, nil)
+				mockDCGM.EXPECT().CreateGroup(gomock.Cond(func(x any) bool {
+					return strings.HasPrefix(x.(string), "gpu_health_monitor_")
+				})).Return(dcgm.GroupHandle{}, nil)
+				mockDCGM.EXPECT().AddEntityToGroup(gomock.Any(), gomock.Any(), gomock.Eq(uint(0))).Return(nil)
+				mockDCGM.EXPECT().HealthSet(gomock.Any(), gomock.Eq(dcgm.DCGM_HEALTH_WATCH_ALL)).Return(errors.New("boom!"))
+			},
+			wantsPanic: true,
+		},
+		{
+			name: "DCGM_EXP_GPU_HEALTH_STATUS collector can not be initialized when deviceinfo.Initialize returns an error",
+			cs: &counters.CounterSet{
+				DCGMCounters: []counters.Counter{},
+				ExporterCounters: []counters.Counter{
+					{
+						FieldName: "DCGM_EXP_GPU_HEALTH_STATUS",
+					},
+				},
+			},
+			getDeviceWatchListManager: func() devicewatchlistmanager.Manager {
+				mockDeviceWatchListManager := mockdevicewatchlistmanager.NewMockManager(ctrl)
+				mockDeviceWatchListManager.EXPECT().EntityWatchList(dcgm.FE_GPU).Return(defaultDeviceWatchList,
+					true)
+				return mockDeviceWatchListManager
+			},
+			hostname: "testhost",
+			config:   &appconfig.Config{},
+			setupDCGMMock: func(mockDCGM *mockdcgm.MockDCGM) {
+				mockDCGM.EXPECT().GetSupportedDevices().Return([]uint{0}, nil)
+				mockDCGM.EXPECT().CreateGroup(gomock.Cond(func(x any) bool {
+					return strings.HasPrefix(x.(string), "gpu_health_monitor_")
+				})).Return(dcgm.GroupHandle{}, nil)
+				mockDCGM.EXPECT().AddEntityToGroup(gomock.Any(), gomock.Any(), gomock.Eq(uint(0))).Return(nil)
+				mockDCGM.EXPECT().HealthSet(gomock.Any(), gomock.Eq(dcgm.DCGM_HEALTH_WATCH_ALL)).Return(nil)
+				mockDCGM.EXPECT().GetAllDeviceCount().Return(uint(0), errors.New("boom!"))
+			},
+			wantsPanic: true,
+		},
+		{
+			name: "DCGM_EXP_GPU_HEALTH_STATUS collector can not be initialized when device watch returns an error",
+			cs: &counters.CounterSet{
+				DCGMCounters: []counters.Counter{},
+				ExporterCounters: []counters.Counter{
+					{
+						FieldName: "DCGM_EXP_GPU_HEALTH_STATUS",
+					},
+				},
+			},
+			getDeviceWatchListManager: func() devicewatchlistmanager.Manager {
+				mockDeviceWatchListManager := mockdevicewatchlistmanager.NewMockManager(ctrl)
+				mockDeviceWatchListManager.EXPECT().EntityWatchList(dcgm.FE_GPU).Return(defaultDeviceWatchList,
+					true)
+				return mockDeviceWatchListManager
+			},
+			hostname: "testhost",
+			config:   &appconfig.Config{},
+			setupDCGMMock: func(mockDCGM *mockdcgm.MockDCGM) {
+				mockDCGM.EXPECT().GetSupportedDevices().Return([]uint{0}, nil)
+				mockDCGM.EXPECT().CreateGroup(gomock.Cond(func(x any) bool {
+					return strings.HasPrefix(x.(string), "gpu_health_monitor_")
+				})).Return(dcgm.GroupHandle{}, nil)
+				mockDCGM.EXPECT().AddEntityToGroup(gomock.Any(), gomock.Any(), gomock.Eq(uint(0))).Return(nil)
+				mockDCGM.EXPECT().HealthSet(gomock.Any(), gomock.Eq(dcgm.DCGM_HEALTH_WATCH_ALL)).Return(nil)
+				mockDCGM.EXPECT().GetAllDeviceCount().Return(uint(1), nil)
+				mockDCGM.EXPECT().GetDeviceInfo(gomock.Eq(uint(0))).Return(dcgm.Device{}, nil)
+				mockDCGM.EXPECT().GetGpuInstanceHierarchy().Return(dcgm.MigHierarchy_v2{}, nil)
+				mockDCGM.EXPECT().CreateGroup(gomock.Cond(func(x any) bool {
+					return strings.HasPrefix(x.(string), "gpu-collector-group")
+				})).Return(dcgm.GroupHandle{}, errors.New("boom!"))
+			},
+			wantsPanic: true,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			ctrl := gomock.NewController(t)
+			defer ctrl.Finish()
+
+			mockDCGMProvider := mockdcgm.NewMockDCGM(ctrl)
+
+			realDCGM := dcgmprovider.Client()
+			defer func() {
+				dcgmprovider.SetClient(realDCGM)
+			}()
+
+			mOS := osmock.NewMockOS(ctrl)
+			mOS.EXPECT().Exit(gomock.Eq(1)).Do(func(code int) {
+				panic("os.Exit")
+			}).AnyTimes()
+			os = mOS
+			defer func() {
+				os = osinterface.RealOS{}
+			}()
+
+			dcgmprovider.SetClient(mockDCGMProvider)
+			if tt.setupDCGMMock != nil {
+				tt.setupDCGMMock(mockDCGMProvider)
+			}
+
+			if tt.wantsPanic {
+				require.PanicsWithValue(t, "os.Exit", func() {
+					InitCollectorFactory(tt.cs, tt.getDeviceWatchListManager(), tt.hostname,
+						tt.config).NewCollectors()
+				})
+				return
+			}
+			entityCollectors := InitCollectorFactory(tt.cs, tt.getDeviceWatchListManager(), tt.hostname,
+				tt.config).NewCollectors()
+			if tt.assert != nil {
+				tt.assert(t, entityCollectors)
+			}
+		})
+	}
+}
+
+func setupDCGMMockForDCGMExpMetrics(fields []dcgm.Short) func(mockDCGM *mockdcgm.MockDCGM) {
+	return func(mockDCGM *mockdcgm.MockDCGM) {
+		mockGroupHandle := dcgm.GroupHandle{}
+		mockGroupHandle.SetHandle(uintptr(42))
+		mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandle, nil).AnyTimes()
+		mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandle, dcgm.FE_GPU,
+			mockGPU.DeviceInfo.GPU).Return(nil).AnyTimes()
+
+		mockFieldHandle := dcgm.FieldHandle{}
+		mockFieldHandle.SetHandle(uintptr(43))
+		mockDCGM.EXPECT().FieldGroupCreate(gomock.Any(), gomock.Eq(fields)).Return(
+			mockFieldHandle, nil).AnyTimes()
+
+		mockDCGM.EXPECT().WatchFieldsWithGroupEx(gomock.Eq(mockFieldHandle),
+			gomock.Eq(mockGroupHandle),
+			gomock.Any(),
+			gomock.Any(),
+			gomock.Any(),
+		).Return(nil).AnyTimes()
+	}
+}
diff --git a/internal/pkg/collector/const.go b/internal/pkg/collector/const.go
new file mode 100644
index 00000000..b239510d
--- /dev/null
+++ b/internal/pkg/collector/const.go
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package collector
+
+const (
+	windowSizeInMSLabel = "window_size_in_ms"
+
+	skipDCGMValue   = "SKIPPING DCGM VALUE"
+	FailedToConvert = "ERROR - FAILED TO CONVERT TO STRING"
+)
diff --git a/internal/pkg/collector/expcollector.go b/internal/pkg/collector/expcollector.go
new file mode 100644
index 00000000..ebc2572c
--- /dev/null
+++ b/internal/pkg/collector/expcollector.go
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package collector
+
+import (
+	"fmt"
+	"log/slog"
+	"maps"
+	"time"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/dcgmprovider"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicemonitoring"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatchlistmanager"
+)
+
+type expCollector struct {
+	baseExpCollector
+	fieldValueParser func(val int64) []int64        // Function to parse the field value
+	labelFiller      func(map[string]string, int64) // Function to fill labels
+	windowSize       int                            // Window size
+}
+
+func (c *expCollector) getMetrics() (MetricsByCounter, error) {
+	err := dcgmprovider.Client().UpdateAllFields()
+	if err != nil {
+		return nil, err
+	}
+
+	mapEntityIDToValues := map[uint]map[int64]int{}
+
+	window := time.Now().Add(-time.Duration(c.windowSize) * time.Millisecond)
+
+	for _, group := range c.deviceWatchList.DeviceGroups() {
+		values, _, err := dcgmprovider.Client().GetValuesSince(group, c.deviceWatchList.DeviceFieldGroup(), window)
+		if err != nil {
+			return nil, err
+		}
+
+		for _, val := range values {
+			if val.Status == 0 {
+				if _, exists := mapEntityIDToValues[val.EntityId]; !exists {
+					mapEntityIDToValues[val.EntityId] = map[int64]int{}
+				}
+
+				for _, v := range c.fieldValueParser(val.Int64()) {
+					mapEntityIDToValues[val.EntityId][v] += 1
+				}
+			}
+		}
+	}
+
+	labels := map[string]string{}
+	labels[windowSizeInMSLabel] = fmt.Sprint(c.windowSize)
+
+	monitoringInfo := devicemonitoring.GetMonitoredEntities(c.deviceWatchList.DeviceInfo())
+	metrics := make(MetricsByCounter)
+	useOld := c.config.UseOldNamespace
+	uuid := "UUID"
+	if useOld {
+		uuid = "uuid"
+	}
+	for _, mi := range monitoringInfo {
+		if len(c.labelsCounters) > 0 && len(c.deviceWatchList.LabelDeviceFields()) > 0 {
+			err := c.getLabelsFromCounters(mi, labels)
+			if err != nil {
+				return nil, err
+			}
+		}
+		entityValues, exists := mapEntityIDToValues[mi.DeviceInfo.GPU]
+		if exists {
+			for entityValue, val := range entityValues {
+
+				metricValueLabels := maps.Clone(labels)
+				c.labelFiller(metricValueLabels, entityValue)
+
+				m := c.createMetric(metricValueLabels, mi, uuid, val)
+
+				metrics[c.counter] = append(metrics[c.counter], m)
+			}
+		} else {
+			// Create metric with Zero value if group (mapEntityIDToValues) is empty
+			m := c.createMetric(labels, mi, uuid, 0)
+			metrics[c.counter] = append(metrics[c.counter], m)
+		}
+	}
+
+	return metrics, nil
+}
+
+// newExpCollector is a constructor for the expCollector
+func newExpCollector(
+	labelsCounters []counters.Counter,
+	hostname string,
+	config *appconfig.Config,
+	deviceWatchList devicewatchlistmanager.WatchList,
+) (expCollector, error) {
+	collector := expCollector{
+		baseExpCollector: baseExpCollector{
+			deviceWatchList: deviceWatchList,
+			hostname:        hostname,
+			config:          config,
+			labelsCounters:  labelsCounters,
+		},
+
+		fieldValueParser: func(val int64) []int64 {
+			return []int64{val}
+		},
+		labelFiller: func(metricValueLabels map[string]string, entityValue int64) {
+			// This function is intentionally left blank
+		},
+	}
+
+	var err error
+
+	collector.cleanups, err = collector.deviceWatchList.Watch()
+	if err != nil {
+		slog.Warn(fmt.Sprintf("Failed to watch metrics: %s", err))
+		return expCollector{}, err
+	}
+
+	return collector, nil
+}
diff --git a/pkg/dcgmexporter/gpu_collector.go b/internal/pkg/collector/gpu_collector.go
similarity index 58%
rename from pkg/dcgmexporter/gpu_collector.go
rename to internal/pkg/collector/gpu_collector.go
index e4cac498..8bed9226 100644
--- a/pkg/dcgmexporter/gpu_collector.go
+++ b/internal/pkg/collector/gpu_collector.go
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,95 +14,78 @@
  * limitations under the License.
  */
 
-package dcgmexporter
+package collector
 
 import (
 	"errors"
 	"fmt"
+	"log/slog"
 	"strconv"
 	"strings"
 
 	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
-	"github.com/sirupsen/logrus"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/dcgmprovider"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/deviceinfo"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicemonitoring"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatchlistmanager"
 )
 
 const unknownErr = "Unknown Error"
 
-type DCGMCollectorConstructor func([]Counter, string, *Config, FieldEntityGroupTypeSystemInfoItem) (*DCGMCollector,
-	func(), error)
+type DCGMCollector struct {
+	counters                 []counters.Counter
+	cleanups                 []func()
+	useOldNamespace          bool
+	deviceWatchList          devicewatchlistmanager.WatchList
+	hostname                 string
+	replaceBlanksInModelName bool
+}
 
 func NewDCGMCollector(
-	c []Counter,
+	c []counters.Counter,
 	hostname string,
-	config *Config,
-	fieldEntityGroupTypeSystemInfo FieldEntityGroupTypeSystemInfoItem,
-) (*DCGMCollector, func(), error) {
-	if fieldEntityGroupTypeSystemInfo.isEmpty() {
-		return nil, func() {}, errors.New("fieldEntityGroupTypeSystemInfo is empty")
+	config *appconfig.Config,
+	deviceWatchList devicewatchlistmanager.WatchList,
+) (*DCGMCollector, error) {
+	if deviceWatchList.IsEmpty() {
+		return nil, errors.New("deviceWatchList is empty")
 	}
 
 	collector := &DCGMCollector{
-		Counters:     c,
-		DeviceFields: fieldEntityGroupTypeSystemInfo.DeviceFields,
-		SysInfo:      fieldEntityGroupTypeSystemInfo.SystemInfo,
-		Hostname:     hostname,
+		counters:        c,
+		deviceWatchList: deviceWatchList,
+		hostname:        hostname,
 	}
 
 	if config == nil {
-		logrus.Warn("Config is empty")
-		return collector, func() { collector.Cleanup() }, nil
+		slog.Warn("Config is empty")
+		return collector, nil
 	}
 
-	collector.UseOldNamespace = config.UseOldNamespace
-	collector.ReplaceBlanksInModelName = config.ReplaceBlanksInModelName
+	collector.useOldNamespace = config.UseOldNamespace
+	collector.replaceBlanksInModelName = config.ReplaceBlanksInModelName
 
-	_, _, cleanups, err := SetupDcgmFieldsWatch(collector.DeviceFields,
-		fieldEntityGroupTypeSystemInfo.SystemInfo,
-		int64(config.CollectInterval)*1000)
-	if err != nil {
-		logrus.Fatal("Failed to watch metrics: ", err)
-	}
-
-	collector.Cleanups = cleanups
-
-	return collector, func() { collector.Cleanup() }, nil
-}
-
-func GetSystemInfo(config *Config, entityType dcgm.Field_Entity_Group) (*SystemInfo, error) {
-	sysInfo, err := InitializeSystemInfo(config.GPUDevices,
-		config.SwitchDevices,
-		config.CPUDevices,
-		config.UseFakeGPUs, entityType)
+	cleanups, err := deviceWatchList.Watch()
 	if err != nil {
 		return nil, err
 	}
-	return &sysInfo, err
-}
 
-func GetHostname(config *Config) (string, error) {
-	hostname := ""
-	var err error
-	if !config.NoHostname {
-		if nodeName := os.Getenv("NODE_NAME"); nodeName != "" {
-			hostname = nodeName
-		} else {
-			hostname, err = os.Hostname()
-			if err != nil {
-				return "", err
-			}
-		}
-	}
-	return hostname, nil
+	collector.cleanups = cleanups
+
+	return collector, nil
 }
 
 func (c *DCGMCollector) Cleanup() {
-	for _, c := range c.Cleanups {
+	for _, c := range c.cleanups {
 		c()
 	}
 }
 
 func (c *DCGMCollector) GetMetrics() (MetricsByCounter, error) {
-	monitoringInfo := GetMonitoredEntities(c.SysInfo)
+	monitoringInfo := devicemonitoring.GetMonitoredEntities(c.deviceWatchList.DeviceInfo())
 
 	metrics := make(MetricsByCounter)
 
@@ -110,78 +93,70 @@ func (c *DCGMCollector) GetMetrics() (MetricsByCounter, error) {
 		var vals []dcgm.FieldValue_v1
 		var err error
 		if mi.Entity.EntityGroupId == dcgm.FE_LINK {
-			vals, err = dcgm.LinkGetLatestValues(mi.Entity.EntityId, mi.ParentId, c.DeviceFields)
+			vals, err = dcgmprovider.Client().LinkGetLatestValues(mi.Entity.EntityId, mi.ParentId,
+				c.deviceWatchList.DeviceFields())
 		} else {
-			vals, err = dcgm.EntityGetLatestValues(mi.Entity.EntityGroupId, mi.Entity.EntityId, c.DeviceFields)
+			vals, err = dcgmprovider.Client().EntityGetLatestValues(mi.Entity.EntityGroupId, mi.Entity.EntityId,
+				c.deviceWatchList.DeviceFields())
 		}
 
 		if err != nil {
 			if derr, ok := err.(*dcgm.DcgmError); ok {
 				if derr.Code == dcgm.DCGM_ST_CONNECTION_NOT_VALID {
-					logrus.Fatal("Could not retrieve metrics: ", err)
+					slog.Error("Could not retrieve metrics: " + err.Error())
+					os.Exit(1)
 				}
 			}
 			return nil, err
 		}
 
 		// InstanceInfo will be nil for GPUs
-		if c.SysInfo.InfoType == dcgm.FE_SWITCH || c.SysInfo.InfoType == dcgm.FE_LINK {
-			ToSwitchMetric(metrics, vals, c.Counters, mi, c.UseOldNamespace, c.Hostname)
-		} else if c.SysInfo.InfoType == dcgm.FE_CPU || c.SysInfo.InfoType == dcgm.FE_CPU_CORE {
-			ToCPUMetric(metrics, vals, c.Counters, mi, c.UseOldNamespace, c.Hostname)
-		} else {
-			ToMetric(metrics,
+		switch c.deviceWatchList.DeviceInfo().InfoType() {
+		case dcgm.FE_SWITCH, dcgm.FE_LINK:
+			toSwitchMetric(metrics, vals, c.counters, mi, c.useOldNamespace, c.hostname)
+		case dcgm.FE_CPU, dcgm.FE_CPU_CORE:
+			toCPUMetric(metrics, vals, c.counters, mi, c.useOldNamespace, c.hostname)
+		default:
+			toMetric(metrics,
 				vals,
-				c.Counters,
+				c.counters,
 				mi.DeviceInfo,
 				mi.InstanceInfo,
-				c.UseOldNamespace,
-				c.Hostname,
-				c.ReplaceBlanksInModelName)
+				c.useOldNamespace,
+				c.hostname,
+				c.replaceBlanksInModelName)
 		}
 	}
 
 	return metrics, nil
 }
 
-func ShouldMonitorDeviceType(fields []dcgm.Short, entityType dcgm.Field_Entity_Group) bool {
-	if len(fields) == 0 {
-		return false
-	}
-
-	if len(fields) == 1 && fields[0] == dcgm.DCGM_FI_DRIVER_VERSION {
-		return false
-	}
-
-	return true
-}
-
-func FindCounterField(c []Counter, fieldID uint) (Counter, error) {
+func findCounterField(c []counters.Counter, fieldID uint) (counters.Counter, error) {
 	for i := 0; i < len(c); i++ {
 		if uint(c[i].FieldID) == fieldID {
 			return c[i], nil
 		}
 	}
 
-	return Counter{}, fmt.Errorf("could not find counter corresponding to field ID '%d'", fieldID)
+	return counters.Counter{}, fmt.Errorf("could not find counter corresponding to field ID '%d'", fieldID)
 }
 
-func ToSwitchMetric(
+func toSwitchMetric(
 	metrics MetricsByCounter,
-	values []dcgm.FieldValue_v1, c []Counter, mi MonitoringInfo, useOld bool, hostname string,
+	values []dcgm.FieldValue_v1, c []counters.Counter, mi devicemonitoring.Info, useOld bool, hostname string,
 ) {
 	labels := map[string]string{}
 
 	for _, val := range values {
-		v := ToString(val)
+		v := toString(val)
 		// Filter out counters with no value and ignored fields for this entity
 
-		counter, err := FindCounterField(c, val.FieldId)
+		counter, err := findCounterField(c, val.FieldId)
 		if err != nil {
 			continue
 		}
 
-		if counter.PromType == "label" {
+		if counter.IsLabel() {
 			labels[counter.FieldName] = v
 			continue
 		}
@@ -190,7 +165,7 @@ func ToSwitchMetric(
 			uuid = "uuid"
 		}
 		var m Metric
-		if v == SkipDCGMValue {
+		if v == skipDCGMValue {
 			continue
 		} else {
 			m = Metric{
@@ -212,22 +187,22 @@ func ToSwitchMetric(
 	}
 }
 
-func ToCPUMetric(
+func toCPUMetric(
 	metrics MetricsByCounter,
-	values []dcgm.FieldValue_v1, c []Counter, mi MonitoringInfo, useOld bool, hostname string,
+	values []dcgm.FieldValue_v1, c []counters.Counter, mi devicemonitoring.Info, useOld bool, hostname string,
 ) {
 	labels := map[string]string{}
 
 	for _, val := range values {
-		v := ToString(val)
+		v := toString(val)
 		// Filter out counters with no value and ignored fields for this entity
 
-		counter, err := FindCounterField(c, val.FieldId)
+		counter, err := findCounterField(c, val.FieldId)
 		if err != nil {
 			continue
 		}
 
-		if counter.PromType == "label" {
+		if counter.IsLabel() {
 			labels[counter.FieldName] = v
 			continue
 		}
@@ -236,7 +211,7 @@ func ToCPUMetric(
 			uuid = "uuid"
 		}
 		var m Metric
-		if v == SkipDCGMValue {
+		if v == skipDCGMValue {
 			continue
 		} else {
 			m = Metric{
@@ -258,12 +233,12 @@ func ToCPUMetric(
 	}
 }
 
-func ToMetric(
+func toMetric(
 	metrics MetricsByCounter,
 	values []dcgm.FieldValue_v1,
-	c []Counter,
+	c []counters.Counter,
 	d dcgm.Device,
-	instanceInfo *GPUInstanceInfo,
+	instanceInfo *deviceinfo.GPUInstanceInfo,
 	useOld bool,
 	hostname string,
 	replaceBlanksInModelName bool,
@@ -271,18 +246,18 @@ func ToMetric(
 	labels := map[string]string{}
 
 	for _, val := range values {
-		v := ToString(val)
+		v := toString(val)
 		// Filter out counters with no value and ignored fields for this entity
-		if v == SkipDCGMValue {
+		if v == skipDCGMValue {
 			continue
 		}
 
-		counter, err := FindCounterField(c, val.FieldId)
+		counter, err := findCounterField(c, val.FieldId)
 		if err != nil {
 			continue
 		}
 
-		if counter.PromType == "label" {
+		if counter.IsLabel() {
 			labels[counter.FieldName] = v
 			continue
 		}
@@ -342,52 +317,52 @@ func getGPUModel(d dcgm.Device, replaceBlanksInModelName bool) string {
 	return gpuModel
 }
 
-func ToString(value dcgm.FieldValue_v1) string {
+func toString(value dcgm.FieldValue_v1) string {
 	switch value.FieldType {
 	case dcgm.DCGM_FT_INT64:
 		switch v := value.Int64(); v {
 		case dcgm.DCGM_FT_INT32_BLANK:
-			return SkipDCGMValue
+			return skipDCGMValue
 		case dcgm.DCGM_FT_INT32_NOT_FOUND:
-			return SkipDCGMValue
+			return skipDCGMValue
 		case dcgm.DCGM_FT_INT32_NOT_SUPPORTED:
-			return SkipDCGMValue
+			return skipDCGMValue
 		case dcgm.DCGM_FT_INT32_NOT_PERMISSIONED:
-			return SkipDCGMValue
+			return skipDCGMValue
 		case dcgm.DCGM_FT_INT64_BLANK:
-			return SkipDCGMValue
+			return skipDCGMValue
 		case dcgm.DCGM_FT_INT64_NOT_FOUND:
-			return SkipDCGMValue
+			return skipDCGMValue
 		case dcgm.DCGM_FT_INT64_NOT_SUPPORTED:
-			return SkipDCGMValue
+			return skipDCGMValue
 		case dcgm.DCGM_FT_INT64_NOT_PERMISSIONED:
-			return SkipDCGMValue
+			return skipDCGMValue
 		default:
 			return fmt.Sprintf("%d", value.Int64())
 		}
 	case dcgm.DCGM_FT_DOUBLE:
 		switch v := value.Float64(); v {
 		case dcgm.DCGM_FT_FP64_BLANK:
-			return SkipDCGMValue
+			return skipDCGMValue
 		case dcgm.DCGM_FT_FP64_NOT_FOUND:
-			return SkipDCGMValue
+			return skipDCGMValue
 		case dcgm.DCGM_FT_FP64_NOT_SUPPORTED:
-			return SkipDCGMValue
+			return skipDCGMValue
 		case dcgm.DCGM_FT_FP64_NOT_PERMISSIONED:
-			return SkipDCGMValue
+			return skipDCGMValue
 		default:
 			return fmt.Sprintf("%f", value.Float64())
 		}
 	case dcgm.DCGM_FT_STRING:
 		switch v := value.String(); v {
 		case dcgm.DCGM_FT_STR_BLANK:
-			return SkipDCGMValue
+			return skipDCGMValue
 		case dcgm.DCGM_FT_STR_NOT_FOUND:
-			return SkipDCGMValue
+			return skipDCGMValue
 		case dcgm.DCGM_FT_STR_NOT_SUPPORTED:
-			return SkipDCGMValue
+			return skipDCGMValue
 		case dcgm.DCGM_FT_STR_NOT_PERMISSIONED:
-			return SkipDCGMValue
+			return skipDCGMValue
 		default:
 			return v
 		}
diff --git a/internal/pkg/collector/gpu_collector_test.go b/internal/pkg/collector/gpu_collector_test.go
new file mode 100644
index 00000000..cdb2ce39
--- /dev/null
+++ b/internal/pkg/collector/gpu_collector_test.go
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package collector
+
+import (
+	"fmt"
+	"reflect"
+	"testing"
+
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+	"github.com/stretchr/testify/assert"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/deviceinfo"
+)
+
+func TestToMetric(t *testing.T) {
+	fieldValue := [4096]byte{}
+	fieldValue[0] = 42
+	values := []dcgm.FieldValue_v1{
+		{
+			FieldId:   150,
+			FieldType: dcgm.DCGM_FT_INT64,
+			Value:     fieldValue,
+		},
+	}
+
+	c := []counters.Counter{
+		{
+			FieldID:   150,
+			FieldName: "DCGM_FI_DEV_GPU_TEMP",
+			PromType:  "gauge",
+			Help:      "Temperature Help info",
+		},
+	}
+
+	d := dcgm.Device{
+		UUID: "fake0",
+		Identifiers: dcgm.DeviceIdentifiers{
+			Model: "NVIDIA T400 4GB",
+		},
+		PCI: dcgm.PCIInfo{
+			BusID: "00000000:0000:0000.0",
+		},
+	}
+
+	var instanceInfo *deviceinfo.GPUInstanceInfo = nil
+
+	type testCase struct {
+		replaceBlanksInModelName bool
+		expectedGPUModelName     string
+	}
+
+	testCases := []testCase{
+		{
+			replaceBlanksInModelName: true,
+			expectedGPUModelName:     "NVIDIA-T400-4GB",
+		},
+		{
+			replaceBlanksInModelName: false,
+			expectedGPUModelName:     "NVIDIA T400 4GB",
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(fmt.Sprintf("When replaceBlanksInModelName is %t", tc.replaceBlanksInModelName), func(t *testing.T) {
+			metrics := make(map[counters.Counter][]Metric)
+			toMetric(metrics, values, c, d, instanceInfo, false, "", tc.replaceBlanksInModelName)
+			assert.Len(t, metrics, 1)
+			// We get metric value with 0 index
+			metricValues := metrics[reflect.ValueOf(metrics).MapKeys()[0].Interface().(counters.Counter)]
+			assert.Equal(t, "42", metricValues[0].Value)
+			assert.Equal(t, tc.expectedGPUModelName, metricValues[0].GPUModelName)
+
+			assert.Equal(t, d.UUID, metricValues[0].GPUUUID)
+			assert.Equal(t, d.PCI.BusID, metricValues[0].GPUPCIBusID)
+		})
+	}
+}
+
+func TestToMetricWhenDCGM_FI_DEV_XID_ERRORSField(t *testing.T) {
+	c := []counters.Counter{
+		{
+			FieldID:   dcgm.DCGM_FI_DEV_XID_ERRORS,
+			FieldName: "DCGM_FI_DEV_GPU_TEMP",
+			PromType:  "gauge",
+			Help:      "Temperature Help info",
+		},
+	}
+
+	d := dcgm.Device{
+		UUID: "fake0",
+		Identifiers: dcgm.DeviceIdentifiers{
+			Model: "NVIDIA T400 4GB",
+		},
+		PCI: dcgm.PCIInfo{
+			BusID: "00000000:0000:0000.0",
+		},
+	}
+
+	var instanceInfo *deviceinfo.GPUInstanceInfo = nil
+
+	type testCase struct {
+		name        string
+		fieldValue  byte
+		expectedErr string
+	}
+
+	testCases := []testCase{
+		{
+			name:        "when DCGM_FI_DEV_XID_ERRORS has no error",
+			fieldValue:  0,
+			expectedErr: xidErrCodeToText[0],
+		},
+		{
+			name:        "when DCGM_FI_DEV_XID_ERRORS has known value",
+			fieldValue:  42,
+			expectedErr: xidErrCodeToText[42],
+		},
+		{
+			name:        "when DCGM_FI_DEV_XID_ERRORS has unknown value",
+			fieldValue:  255,
+			expectedErr: unknownErr,
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			fieldValue := [4096]byte{}
+			fieldValue[0] = tc.fieldValue
+			values := []dcgm.FieldValue_v1{
+				{
+					FieldId:   dcgm.DCGM_FI_DEV_XID_ERRORS,
+					FieldType: dcgm.DCGM_FT_INT64,
+					Value:     fieldValue,
+				},
+			}
+
+			metrics := make(map[counters.Counter][]Metric)
+			toMetric(metrics, values, c, d, instanceInfo, false, "", false)
+			assert.Len(t, metrics, 1)
+			// We get metric value with 0 index
+			metricValues := metrics[reflect.ValueOf(metrics).MapKeys()[0].Interface().(counters.Counter)]
+			assert.Equal(t, fmt.Sprint(tc.fieldValue), metricValues[0].Value)
+			assert.Contains(t, metricValues[0].Attributes, "err_code")
+			assert.Equal(t, fmt.Sprint(tc.fieldValue), metricValues[0].Attributes["err_code"])
+			assert.Contains(t, metricValues[0].Attributes, "err_msg")
+			assert.Equal(t, tc.expectedErr, metricValues[0].Attributes["err_msg"])
+
+			assert.Equal(t, d.UUID, metricValues[0].GPUUUID)
+			assert.Equal(t, d.PCI.BusID, metricValues[0].GPUPCIBusID)
+		})
+	}
+}
diff --git a/internal/pkg/collector/gpu_health_collector.go b/internal/pkg/collector/gpu_health_collector.go
new file mode 100644
index 00000000..62ec8065
--- /dev/null
+++ b/internal/pkg/collector/gpu_health_collector.go
@@ -0,0 +1,386 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package collector
+
+import (
+	"errors"
+	"fmt"
+	"maps"
+	"slices"
+
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+	"github.com/sirupsen/logrus"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/dcgmprovider"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/deviceinfo"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicemonitoring"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatchlistmanager"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/logging"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/utils"
+)
+
+var gpuHealthChecks = []dcgm.HealthSystem{
+	dcgm.DCGM_HEALTH_WATCH_PCIE,
+	dcgm.DCGM_HEALTH_WATCH_NVLINK,
+	dcgm.DCGM_HEALTH_WATCH_PMU,
+	dcgm.DCGM_HEALTH_WATCH_MCU,
+	dcgm.DCGM_HEALTH_WATCH_MEM,
+	dcgm.DCGM_HEALTH_WATCH_SM,
+	dcgm.DCGM_HEALTH_WATCH_INFOROM,
+	dcgm.DCGM_HEALTH_WATCH_THERMAL,
+	dcgm.DCGM_HEALTH_WATCH_POWER,
+	dcgm.DCGM_HEALTH_WATCH_DRIVER,
+}
+
+type gpuHealthStatusCollector struct {
+	baseExpCollector
+	groupID            dcgm.GroupHandle
+	deviceInfoProvider deviceinfo.Provider
+}
+
+func (c *gpuHealthStatusCollector) GetMetrics() (MetricsByCounter, error) {
+	// Read the GPU health status.
+	gpuHealthStatus, err := dcgmprovider.Client().HealthCheck(c.groupID)
+	if err != nil {
+		return MetricsByCounter{}, err
+	}
+
+	monitoringInfo := devicemonitoring.GetMonitoredEntities(c.deviceInfoProvider)
+
+	// Get the GPU in the group
+	groupInfo, err := dcgmprovider.Client().GetGroupInfo(c.groupID)
+	if err != nil {
+		return MetricsByCounter{}, err
+	}
+
+	groupEntityPairSet := make(map[dcgm.GroupEntityPair]struct{})
+
+	for _, entityPair := range groupInfo.EntityList {
+		groupEntityPairSet[entityPair] = struct{}{}
+	}
+
+	// Find monitoring info for GPU in the group
+	monitoringInfoInGroup := make([]devicemonitoring.Info, 0)
+
+	for _, info := range monitoringInfo {
+		if _, exists := groupEntityPairSet[info.Entity]; exists {
+			monitoringInfoInGroup = append(monitoringInfoInGroup, info)
+		}
+	}
+
+	metrics := make(MetricsByCounter)
+	metrics[c.counter] = make([]Metric, 0)
+
+	useOld := c.config.UseOldNamespace
+	uuid := "UUID"
+	if useOld {
+		uuid = "uuid"
+	}
+
+	entityHealthSystemToIncident := map[dcgm.GroupEntityPair]map[dcgm.HealthSystem]dcgm.Incident{}
+
+	for _, mi := range monitoringInfoInGroup {
+		entityHealthSystemToIncident[mi.Entity] = make(map[dcgm.HealthSystem]dcgm.Incident)
+		// Populate the table with default values
+		for _, healthSystem := range gpuHealthChecks {
+			entityHealthSystemToIncident[mi.Entity][healthSystem] = dcgm.Incident{
+				System: healthSystem,
+				Health: dcgm.DCGM_HEALTH_RESULT_PASS,
+				Error:  dcgm.DiagErrorDetail{},
+			}
+		}
+	}
+
+	// We assyme that each health check may produce only one incident per system
+	for _, incident := range gpuHealthStatus.Incidents {
+		entityHealthSystemToIncident[incident.EntityInfo][incident.System] = incident
+	}
+
+	labels := map[string]string{}
+
+	for _, mi := range monitoringInfoInGroup {
+		if len(c.labelsCounters) > 0 && len(c.deviceWatchList.LabelDeviceFields()) > 0 {
+			err := c.getLabelsFromCounters(mi, labels)
+			if err != nil {
+				return nil, err
+			}
+		}
+		for _, healthSystem := range gpuHealthChecks {
+			incident := entityHealthSystemToIncident[mi.Entity][healthSystem]
+			metricValueLabels := maps.Clone(labels)
+			metricValueLabels["health_watch"] = healthSystemWatchToString(incident.System)
+			metricValueLabels["health_error_code"] = healthCheckErrorToString(incident.Error.Code)
+			m := c.createMetric(metricValueLabels, mi, uuid, int(incident.Health))
+			metrics[c.counter] = append(metrics[c.counter], m)
+		}
+	}
+
+	return metrics, nil
+}
+
+func (c *gpuHealthStatusCollector) Cleanup() {
+	for _, cleanup := range c.cleanups {
+		cleanup()
+	}
+}
+
+func NewGPUHealthStatusCollector(
+	counterList counters.CounterList,
+	hostname string,
+	config *appconfig.Config,
+	deviceWatchList devicewatchlistmanager.WatchList,
+) (Collector, error) {
+	if !IsDCGMExpGPUHealthStatusEnabled(counterList) {
+		logrus.Error(counters.DCGMExpGPUHealthStatus + " collector is disabled")
+		return nil, fmt.Errorf(counters.DCGMExpGPUHealthStatus + " collector is disabled")
+	}
+
+	supportedGPUs, err := dcgmprovider.Client().GetSupportedDevices()
+	if err != nil {
+		logrus.WithError(err).Error("Failed to get supported GPU devices")
+		return nil, err
+	}
+
+	if len(supportedGPUs) == 0 {
+		logrus.Error("No supported GPU devices found")
+		return nil, errors.New("no supported GPU devices found")
+	}
+
+	// Create Group
+	newGroupNumber, err := utils.RandUint64()
+	if err != nil {
+		logrus.WithError(err).Error("Failed to generate new group number")
+		return nil, err
+	}
+
+	cleanups := []func(){}
+
+	groupID, err := dcgmprovider.Client().CreateGroup(fmt.Sprintf("gpu_health_monitor_%d", newGroupNumber))
+	if err != nil {
+		logrus.WithError(err).Error("Failed to create group")
+		return nil, err
+	}
+
+	cleanups = append(cleanups, func() {
+		destroyErr := dcgmprovider.Client().DestroyGroup(groupID)
+		if destroyErr != nil {
+			logrus.WithFields(logrus.Fields{
+				logging.GroupIDKey: groupID,
+				logrus.ErrorKey:    destroyErr,
+			}).Warn("cannot destroy group")
+		}
+	})
+
+	for _, gpu := range supportedGPUs {
+		err = dcgmprovider.Client().AddEntityToGroup(groupID, dcgm.FE_GPU, gpu)
+		if err != nil {
+			logrus.WithError(err).WithField("gpu", gpu).Error("Failed to add GPU device to group")
+			return nil, err
+		}
+	}
+
+	err = dcgmprovider.Client().HealthSet(groupID, dcgm.DCGM_HEALTH_WATCH_ALL)
+	if err != nil {
+		logrus.WithError(err).Error("Failed to set health watch")
+		return nil, err
+	}
+
+	deviceInfoProvider, err := deviceinfo.Initialize(appconfig.DeviceOptions{
+		MinorRange: []int{-1},
+		MajorRange: []int{-1},
+	},
+		appconfig.DeviceOptions{},
+		appconfig.DeviceOptions{},
+		config.UseFakeGPUs, dcgm.FE_GPU)
+	if err != nil {
+		return nil, err
+	}
+
+	if !deviceWatchList.IsEmpty() {
+		watchListCleanups, err := deviceWatchList.Watch()
+		if err != nil {
+			logrus.WithError(err).Error("Failed to watch metrics")
+			return nil, err
+		}
+
+		cleanups = append(cleanups, watchListCleanups...)
+	}
+
+	return &gpuHealthStatusCollector{
+		baseExpCollector: baseExpCollector{
+			counter: counterList[slices.IndexFunc(counterList, func(c counters.Counter) bool {
+				return c.FieldName == counters.DCGMExpGPUHealthStatus
+			})],
+			labelsCounters:  counterList.LabelCounters(),
+			hostname:        hostname,
+			config:          config,
+			cleanups:        cleanups,
+			deviceWatchList: deviceWatchList,
+		},
+		groupID:            groupID,
+		deviceInfoProvider: deviceInfoProvider,
+	}, nil
+}
+
+func IsDCGMExpGPUHealthStatusEnabled(counterList counters.CounterList) bool {
+	return slices.ContainsFunc(counterList, func(c counters.Counter) bool {
+		return c.FieldName == counters.DCGMExpGPUHealthStatus
+	})
+}
+
+var healthSystemWatchToStringMap = map[dcgm.HealthSystem]string{
+	dcgm.DCGM_HEALTH_WATCH_PCIE:              "PCIE",
+	dcgm.DCGM_HEALTH_WATCH_NVLINK:            "NVLINK",
+	dcgm.DCGM_HEALTH_WATCH_PMU:               "PMU",
+	dcgm.DCGM_HEALTH_WATCH_MCU:               "MCU",
+	dcgm.DCGM_HEALTH_WATCH_MEM:               "MEM",
+	dcgm.DCGM_HEALTH_WATCH_SM:                "SM",
+	dcgm.DCGM_HEALTH_WATCH_INFOROM:           "INFOROM",
+	dcgm.DCGM_HEALTH_WATCH_THERMAL:           "THERMAL",
+	dcgm.DCGM_HEALTH_WATCH_POWER:             "POWER",
+	dcgm.DCGM_HEALTH_WATCH_DRIVER:            "DRIVER",
+	dcgm.DCGM_HEALTH_WATCH_NVSWITCH_NONFATAL: "NVSWITCH_NONFATAL",
+	dcgm.DCGM_HEALTH_WATCH_NVSWITCH_FATAL:    "NVSWITCH_FATAL",
+}
+
+func healthSystemWatchToString(heathSystem dcgm.HealthSystem) string {
+	name, ok := healthSystemWatchToStringMap[heathSystem]
+	if !ok {
+		return ""
+	}
+	return name
+}
+
+var healthCheckErrorToStringMap = map[dcgm.HealthCheckErrorCode]string{
+	dcgm.DCGM_FR_OK:                              "DCGM_FR_OK",
+	dcgm.DCGM_FR_UNKNOWN:                         "DCGM_FR_UNKNOWN",
+	dcgm.DCGM_FR_UNRECOGNIZED:                    "DCGM_FR_UNRECOGNIZED",
+	dcgm.DCGM_FR_PCI_REPLAY_RATE:                 "DCGM_FR_PCI_REPLAY_RATE",
+	dcgm.DCGM_FR_VOLATILE_DBE_DETECTED:           "DCGM_FR_VOLATILE_DBE_DETECTED",
+	dcgm.DCGM_FR_VOLATILE_SBE_DETECTED:           "DCGM_FR_VOLATILE_SBE_DETECTED",
+	dcgm.DCGM_FR_PENDING_PAGE_RETIREMENTS:        "DCGM_FR_PENDING_PAGE_RETIREMENTS",
+	dcgm.DCGM_FR_RETIRED_PAGES_LIMIT:             "DCGM_FR_RETIRED_PAGES_LIMIT",
+	dcgm.DCGM_FR_RETIRED_PAGES_DBE_LIMIT:         "DCGM_FR_RETIRED_PAGES_DBE_LIMIT",
+	dcgm.DCGM_FR_CORRUPT_INFOROM:                 "DCGM_FR_CORRUPT_INFOROM",
+	dcgm.DCGM_FR_CLOCK_THROTTLE_THERMAL:          "DCGM_FR_CLOCK_THROTTLE_THERMAL",
+	dcgm.DCGM_FR_POWER_UNREADABLE:                "DCGM_FR_POWER_UNREADABLE",
+	dcgm.DCGM_FR_CLOCK_THROTTLE_POWER:            "DCGM_FR_CLOCK_THROTTLE_POWER",
+	dcgm.DCGM_FR_NVLINK_ERROR_THRESHOLD:          "DCGM_FR_NVLINK_ERROR_THRESHOLD",
+	dcgm.DCGM_FR_NVLINK_DOWN:                     "DCGM_FR_NVLINK_DOWN",
+	dcgm.DCGM_FR_NVSWITCH_FATAL_ERROR:            "DCGM_FR_NVSWITCH_FATAL_ERROR",
+	dcgm.DCGM_FR_NVSWITCH_NON_FATAL_ERROR:        "DCGM_FR_NVSWITCH_NON_FATAL_ERROR",
+	dcgm.DCGM_FR_NVSWITCH_DOWN:                   "DCGM_FR_NVSWITCH_DOWN",
+	dcgm.DCGM_FR_NO_ACCESS_TO_FILE:               "DCGM_FR_NO_ACCESS_TO_FILE",
+	dcgm.DCGM_FR_NVML_API:                        "DCGM_FR_NVML_API",
+	dcgm.DCGM_FR_DEVICE_COUNT_MISMATCH:           "DCGM_FR_DEVICE_COUNT_MISMATCH",
+	dcgm.DCGM_FR_BAD_PARAMETER:                   "DCGM_FR_BAD_PARAMETER",
+	dcgm.DCGM_FR_CANNOT_OPEN_LIB:                 "DCGM_FR_CANNOT_OPEN_LIB",
+	dcgm.DCGM_FR_DENYLISTED_DRIVER:               "DCGM_FR_DENYLISTED_DRIVER",
+	dcgm.DCGM_FR_NVML_LIB_BAD:                    "DCGM_FR_NVML_LIB_BAD",
+	dcgm.DCGM_FR_GRAPHICS_PROCESSES:              "DCGM_FR_GRAPHICS_PROCESSES",
+	dcgm.DCGM_FR_HOSTENGINE_CONN:                 "DCGM_FR_HOSTENGINE_CONN",
+	dcgm.DCGM_FR_FIELD_QUERY:                     "DCGM_FR_FIELD_QUERY",
+	dcgm.DCGM_FR_BAD_CUDA_ENV:                    "DCGM_FR_BAD_CUDA_ENV",
+	dcgm.DCGM_FR_PERSISTENCE_MODE:                "DCGM_FR_PERSISTENCE_MODE",
+	dcgm.DCGM_FR_LOW_BANDWIDTH:                   "DCGM_FR_LOW_BANDWIDTH",
+	dcgm.DCGM_FR_HIGH_LATENCY:                    "DCGM_FR_HIGH_LATENCY",
+	dcgm.DCGM_FR_CANNOT_GET_FIELD_TAG:            "DCGM_FR_CANNOT_GET_FIELD_TAG",
+	dcgm.DCGM_FR_FIELD_VIOLATION:                 "DCGM_FR_FIELD_VIOLATION",
+	dcgm.DCGM_FR_FIELD_THRESHOLD:                 "DCGM_FR_FIELD_THRESHOLD",
+	dcgm.DCGM_FR_FIELD_VIOLATION_DBL:             "DCGM_FR_FIELD_VIOLATION_DBL",
+	dcgm.DCGM_FR_FIELD_THRESHOLD_DBL:             "DCGM_FR_FIELD_THRESHOLD_DBL",
+	dcgm.DCGM_FR_UNSUPPORTED_FIELD_TYPE:          "DCGM_FR_UNSUPPORTED_FIELD_TYPE",
+	dcgm.DCGM_FR_FIELD_THRESHOLD_TS:              "DCGM_FR_FIELD_THRESHOLD_TS",
+	dcgm.DCGM_FR_FIELD_THRESHOLD_TS_DBL:          "DCGM_FR_FIELD_THRESHOLD_TS_DBL",
+	dcgm.DCGM_FR_THERMAL_VIOLATIONS:              "DCGM_FR_THERMAL_VIOLATIONS",
+	dcgm.DCGM_FR_THERMAL_VIOLATIONS_TS:           "DCGM_FR_THERMAL_VIOLATIONS_TS",
+	dcgm.DCGM_FR_TEMP_VIOLATION:                  "DCGM_FR_TEMP_VIOLATION",
+	dcgm.DCGM_FR_THROTTLING_VIOLATION:            "DCGM_FR_THROTTLING_VIOLATION",
+	dcgm.DCGM_FR_INTERNAL:                        "DCGM_FR_INTERNAL",
+	dcgm.DCGM_FR_PCIE_GENERATION:                 "DCGM_FR_PCIE_GENERATION",
+	dcgm.DCGM_FR_PCIE_WIDTH:                      "DCGM_FR_PCIE_WIDTH",
+	dcgm.DCGM_FR_ABORTED:                         "DCGM_FR_ABORTED",
+	dcgm.DCGM_FR_TEST_DISABLED:                   "DCGM_FR_TEST_DISABLED",
+	dcgm.DCGM_FR_CANNOT_GET_STAT:                 "DCGM_FR_CANNOT_GET_STAT",
+	dcgm.DCGM_FR_STRESS_LEVEL:                    "DCGM_FR_STRESS_LEVEL",
+	dcgm.DCGM_FR_CUDA_API:                        "DCGM_FR_CUDA_API",
+	dcgm.DCGM_FR_FAULTY_MEMORY:                   "DCGM_FR_FAULTY_MEMORY",
+	dcgm.DCGM_FR_CANNOT_SET_WATCHES:              "DCGM_FR_CANNOT_SET_WATCHES",
+	dcgm.DCGM_FR_CUDA_UNBOUND:                    "DCGM_FR_CUDA_UNBOUND",
+	dcgm.DCGM_FR_ECC_DISABLED:                    "DCGM_FR_ECC_DISABLED",
+	dcgm.DCGM_FR_MEMORY_ALLOC:                    "DCGM_FR_MEMORY_ALLOC",
+	dcgm.DCGM_FR_CUDA_DBE:                        "DCGM_FR_CUDA_DBE",
+	dcgm.DCGM_FR_MEMORY_MISMATCH:                 "DCGM_FR_MEMORY_MISMATCH",
+	dcgm.DCGM_FR_CUDA_DEVICE:                     "DCGM_FR_CUDA_DEVICE",
+	dcgm.DCGM_FR_ECC_UNSUPPORTED:                 "DCGM_FR_ECC_UNSUPPORTED",
+	dcgm.DCGM_FR_ECC_PENDING:                     "DCGM_FR_ECC_PENDING",
+	dcgm.DCGM_FR_MEMORY_BANDWIDTH:                "DCGM_FR_MEMORY_BANDWIDTH",
+	dcgm.DCGM_FR_TARGET_POWER:                    "DCGM_FR_TARGET_POWER",
+	dcgm.DCGM_FR_API_FAIL:                        "DCGM_FR_API_FAIL",
+	dcgm.DCGM_FR_API_FAIL_GPU:                    "DCGM_FR_API_FAIL_GPU",
+	dcgm.DCGM_FR_CUDA_CONTEXT:                    "DCGM_FR_CUDA_CONTEXT",
+	dcgm.DCGM_FR_DCGM_API:                        "DCGM_FR_DCGM_API",
+	dcgm.DCGM_FR_CONCURRENT_GPUS:                 "DCGM_FR_CONCURRENT_GPUS",
+	dcgm.DCGM_FR_TOO_MANY_ERRORS:                 "DCGM_FR_TOO_MANY_ERRORS",
+	dcgm.DCGM_FR_NVLINK_CRC_ERROR_THRESHOLD:      "DCGM_FR_NVLINK_CRC_ERROR_THRESHOLD",
+	dcgm.DCGM_FR_NVLINK_ERROR_CRITICAL:           "DCGM_FR_NVLINK_ERROR_CRITICAL",
+	dcgm.DCGM_FR_ENFORCED_POWER_LIMIT:            "DCGM_FR_ENFORCED_POWER_LIMIT",
+	dcgm.DCGM_FR_MEMORY_ALLOC_HOST:               "DCGM_FR_MEMORY_ALLOC_HOST",
+	dcgm.DCGM_FR_GPU_OP_MODE:                     "DCGM_FR_GPU_OP_MODE",
+	dcgm.DCGM_FR_NO_MEMORY_CLOCKS:                "DCGM_FR_NO_MEMORY_CLOCKS",
+	dcgm.DCGM_FR_NO_GRAPHICS_CLOCKS:              "DCGM_FR_NO_GRAPHICS_CLOCKS",
+	dcgm.DCGM_FR_HAD_TO_RESTORE_STATE:            "DCGM_FR_HAD_TO_RESTORE_STATE",
+	dcgm.DCGM_FR_L1TAG_UNSUPPORTED:               "DCGM_FR_L1TAG_UNSUPPORTED",
+	dcgm.DCGM_FR_L1TAG_MISCOMPARE:                "DCGM_FR_L1TAG_MISCOMPARE",
+	dcgm.DCGM_FR_ROW_REMAP_FAILURE:               "DCGM_FR_ROW_REMAP_FAILURE",
+	dcgm.DCGM_FR_UNCONTAINED_ERROR:               "DCGM_FR_UNCONTAINED_ERROR",
+	dcgm.DCGM_FR_EMPTY_GPU_LIST:                  "DCGM_FR_EMPTY_GPU_LIST",
+	dcgm.DCGM_FR_DBE_PENDING_PAGE_RETIREMENTS:    "DCGM_FR_DBE_PENDING_PAGE_RETIREMENTS",
+	dcgm.DCGM_FR_UNCORRECTABLE_ROW_REMAP:         "DCGM_FR_UNCORRECTABLE_ROW_REMAP",
+	dcgm.DCGM_FR_PENDING_ROW_REMAP:               "DCGM_FR_PENDING_ROW_REMAP",
+	dcgm.DCGM_FR_BROKEN_P2P_MEMORY_DEVICE:        "DCGM_FR_BROKEN_P2P_MEMORY_DEVICE",
+	dcgm.DCGM_FR_BROKEN_P2P_WRITER_DEVICE:        "DCGM_FR_BROKEN_P2P_WRITER_DEVICE",
+	dcgm.DCGM_FR_NVSWITCH_NVLINK_DOWN:            "DCGM_FR_NVSWITCH_NVLINK_DOWN",
+	dcgm.DCGM_FR_EUD_BINARY_PERMISSIONS:          "DCGM_FR_EUD_BINARY_PERMISSIONS",
+	dcgm.DCGM_FR_EUD_NON_ROOT_USER:               "DCGM_FR_EUD_NON_ROOT_USER",
+	dcgm.DCGM_FR_EUD_SPAWN_FAILURE:               "DCGM_FR_EUD_SPAWN_FAILURE",
+	dcgm.DCGM_FR_EUD_TIMEOUT:                     "DCGM_FR_EUD_TIMEOUT",
+	dcgm.DCGM_FR_EUD_ZOMBIE:                      "DCGM_FR_EUD_ZOMBIE",
+	dcgm.DCGM_FR_EUD_NON_ZERO_EXIT_CODE:          "DCGM_FR_EUD_NON_ZERO_EXIT_CODE",
+	dcgm.DCGM_FR_EUD_TEST_FAILED:                 "DCGM_FR_EUD_TEST_FAILED",
+	dcgm.DCGM_FR_FILE_CREATE_PERMISSIONS:         "DCGM_FR_FILE_CREATE_PERMISSIONS",
+	dcgm.DCGM_FR_PAUSE_RESUME_FAILED:             "DCGM_FR_PAUSE_RESUME_FAILED",
+	dcgm.DCGM_FR_PCIE_H_REPLAY_VIOLATION:         "DCGM_FR_PCIE_H_REPLAY_VIOLATION",
+	dcgm.DCGM_FR_GPU_EXPECTED_NVLINKS_UP:         "DCGM_FR_GPU_EXPECTED_NVLINKS_UP",
+	dcgm.DCGM_FR_NVSWITCH_EXPECTED_NVLINKS_UP:    "DCGM_FR_NVSWITCH_EXPECTED_NVLINKS_UP",
+	dcgm.DCGM_FR_XID_ERROR:                       "DCGM_FR_XID_ERROR",
+	dcgm.DCGM_FR_SBE_VIOLATION:                   "DCGM_FR_SBE_VIOLATION",
+	dcgm.DCGM_FR_DBE_VIOLATION:                   "DCGM_FR_DBE_VIOLATION",
+	dcgm.DCGM_FR_PCIE_REPLAY_VIOLATION:           "DCGM_FR_PCIE_REPLAY_VIOLATION",
+	dcgm.DCGM_FR_SBE_THRESHOLD_VIOLATION:         "DCGM_FR_SBE_THRESHOLD_VIOLATION",
+	dcgm.DCGM_FR_DBE_THRESHOLD_VIOLATION:         "DCGM_FR_DBE_THRESHOLD_VIOLATION",
+	dcgm.DCGM_FR_PCIE_REPLAY_THRESHOLD_VIOLATION: "DCGM_FR_PCIE_REPLAY_THRESHOLD_VIOLATION",
+	dcgm.DCGM_FR_CUDA_FM_NOT_INITIALIZED:         "DCGM_FR_CUDA_FM_NOT_INITIALIZED",
+	dcgm.DCGM_FR_SXID_ERROR:                      "DCGM_FR_SXID_ERROR",
+	dcgm.DCGM_FR_ERROR_SENTINEL:                  "DCGM_FR_ERROR_SENTINEL",
+}
+
+func healthCheckErrorToString(err dcgm.HealthCheckErrorCode) string {
+	return healthCheckErrorToStringMap[err]
+}
diff --git a/internal/pkg/collector/gpu_health_collector_test.go b/internal/pkg/collector/gpu_health_collector_test.go
new file mode 100644
index 00000000..0d44e12c
--- /dev/null
+++ b/internal/pkg/collector/gpu_health_collector_test.go
@@ -0,0 +1,364 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package collector
+
+import (
+	"errors"
+	"reflect"
+	"strings"
+	"testing"
+
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"go.uber.org/mock/gomock"
+
+	mockdcgm "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/dcgmprovider"
+	mockdeviceinfo "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/deviceinfo"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/dcgmprovider"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatchlistmanager"
+)
+
+func TestNewGPUHealthStatusCollector(t *testing.T) {
+	type testCase struct {
+		name                 string
+		counterList          counters.CounterList
+		setDCGMproviderState func(*mockdcgm.MockDCGM)
+		assertResult         func(Collector, error)
+	}
+
+	testCases := []testCase{
+		{
+			name:        "returns error when collector is disabled",
+			counterList: []counters.Counter{},
+			assertResult: func(c Collector, err error) {
+				assert.Nil(t, c)
+				assert.Error(t, err)
+			},
+		},
+		{
+			name: "returns no errors, whe collector is enabled",
+			counterList: []counters.Counter{
+				{
+					FieldName: "DCGM_EXP_GPU_HEALTH_STATUS",
+				},
+			},
+			setDCGMproviderState: func(mockDCGMProvider *mockdcgm.MockDCGM) {
+				mockDCGMProvider.EXPECT().DestroyGroup(gomock.Any()).Return(errors.New("boom!")).Times(2)
+				mockDCGMProvider.EXPECT().FieldGroupDestroy(gomock.Any()).Return(errors.New("boom!"))
+			},
+			assertResult: func(c Collector, err error) {
+				assert.NotNil(t, c)
+				assert.NoError(t, err)
+			},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			// Initialize the mock controller
+			ctrl := gomock.NewController(t)
+
+			mockDCGMProvider := mockdcgm.NewMockDCGM(ctrl)
+
+			realDCGM := dcgmprovider.Client()
+			defer func() {
+				dcgmprovider.SetClient(realDCGM)
+			}()
+
+			dcgmprovider.SetClient(mockDCGMProvider)
+			if tc.setDCGMproviderState != nil {
+				tc.setDCGMproviderState(mockDCGMProvider)
+			}
+			setDefaultExpectationsForGPUHealthStatusCollectorMockDCGMProvider(t, mockDCGMProvider)
+
+			// Create a new collector
+			collector, err := NewGPUHealthStatusCollector(tc.counterList,
+				"",
+				&appconfig.Config{},
+				getDefaultDeviceWatchListForGPUHealthStatusCollectorMockDCGMProvider(ctrl),
+			)
+
+			tc.assertResult(collector, err)
+			if collector != nil {
+				// Cleanup the collector
+				assert.NotPanics(t, func() {
+					collector.Cleanup()
+				})
+			}
+		})
+	}
+}
+
+func setDefaultExpectationsForGPUHealthStatusCollectorMockDCGMProvider(t *testing.T, mockDCGMProvider *mockdcgm.MockDCGM) {
+	t.Helper()
+	mockDCGMProvider.EXPECT().GetSupportedDevices().Return([]uint{0}, nil).AnyTimes()
+	mockDCGMProvider.EXPECT().CreateGroup(gomock.Cond(func(x any) bool {
+		return strings.HasPrefix(x.(string), "gpu_health_monitor_")
+	})).Return(dcgm.GroupHandle{}, nil).AnyTimes()
+	mockDCGMProvider.EXPECT().AddEntityToGroup(gomock.Any(), gomock.Any(), gomock.Eq(uint(0))).Return(nil).AnyTimes()
+	mockDCGMProvider.EXPECT().HealthSet(gomock.Any(), gomock.Eq(dcgm.DCGM_HEALTH_WATCH_ALL)).Return(nil).AnyTimes()
+	mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(1), nil).AnyTimes()
+	mockDCGMProvider.EXPECT().GetDeviceInfo(gomock.Eq(uint(0))).Return(dcgm.Device{}, nil).AnyTimes()
+	mockDCGMProvider.EXPECT().GetGpuInstanceHierarchy().Return(dcgm.MigHierarchy_v2{}, nil).AnyTimes()
+	mockDCGMProvider.EXPECT().CreateGroup(gomock.Cond(func(x any) bool {
+		return strings.HasPrefix(x.(string), "gpu-collector-group")
+	})).Return(dcgm.GroupHandle{}, nil).AnyTimes()
+	mockDCGMProvider.EXPECT().AddEntityToGroup(gomock.Any(), gomock.Any(), gomock.Eq(uint(0))).Return(nil).AnyTimes()
+	mockDCGMProvider.EXPECT().FieldGroupCreate(gomock.Cond(func(x any) bool {
+		return strings.HasPrefix(x.(string), "gpu-collector-fieldgroup")
+	}), gomock.Any()).Return(dcgm.FieldHandle{}, nil).AnyTimes()
+	mockDCGMProvider.EXPECT().WatchFieldsWithGroupEx(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).
+		Return(nil).AnyTimes()
+	mockDCGMProvider.EXPECT().EntityGetLatestValues(gomock.Any(), gomock.Any(), gomock.Any()).
+		Return([]dcgm.FieldValue_v1{}, nil).AnyTimes()
+
+	healthCheckResponse := dcgm.HealthResponse{
+		OverallHealth: dcgm.DCGM_HEALTH_RESULT_FAIL,
+		Incidents: []dcgm.Incident{
+			{
+				System: dcgm.DCGM_HEALTH_WATCH_THERMAL,
+				Health: dcgm.DCGM_HEALTH_RESULT_FAIL,
+				Error: dcgm.DiagErrorDetail{
+					Message: "boom!",
+					Code:    dcgm.DCGM_FR_THERMAL_VIOLATIONS,
+				},
+				EntityInfo: dcgm.GroupEntityPair{
+					EntityGroupId: dcgm.FE_GPU,
+					EntityId:      uint(0),
+				},
+			},
+		},
+	}
+
+	mockDCGMProvider.EXPECT().HealthCheck(gomock.Any()).Return(healthCheckResponse, nil).AnyTimes()
+	mockDCGMProvider.EXPECT().GetGroupInfo(gomock.Any()).Return(&dcgm.GroupInfo{
+		EntityList: []dcgm.GroupEntityPair{
+			{EntityId: uint(0), EntityGroupId: dcgm.FE_GPU},
+		},
+	}, nil).AnyTimes()
+}
+
+func getDefaultDeviceWatchListForGPUHealthStatusCollectorMockDCGMProvider(ctrl *gomock.Controller) devicewatchlistmanager.WatchList {
+	mockDeviceInfo := mockdeviceinfo.NewMockProvider(ctrl)
+	mockDeviceInfo.EXPECT().InfoType().Return(dcgm.FE_NONE).AnyTimes()
+	mockDeviceInfo.EXPECT().GOpts().Return(appconfig.DeviceOptions{Flex: true}).AnyTimes()
+	mockDeviceInfo.EXPECT().GPUCount().Return(uint(1)).AnyTimes()
+	mockDeviceInfo.EXPECT().GPU(uint(0)).Return(mockGPU).AnyTimes()
+
+	return *devicewatchlistmanager.NewWatchList(mockDeviceInfo,
+		[]dcgm.Short{42},
+		[]dcgm.Short{524},
+		deviceWatcher,
+		int64(1))
+}
+
+func TestGPUHealthStatusCollector_GetMetrics_ErrorHandling(t *testing.T) {
+	var counterList counters.CounterList = []counters.Counter{
+		{
+			FieldName: "DCGM_EXP_GPU_HEALTH_STATUS",
+		},
+		{
+			FieldName: "DCGM_FI_DRIVER_VERSION",
+			PromType:  "label",
+			FieldID:   dcgm.DCGM_FI_DEV_VGPU_DRIVER_VERSION,
+		},
+	}
+
+	type testCase struct {
+		name                 string
+		setDCGMproviderState func(*mockdcgm.MockDCGM)
+		asserResult          func(MetricsByCounter, error)
+	}
+
+	testCases := []testCase{
+		{
+			name: "returns Metrics without errors",
+			asserResult: func(metrics MetricsByCounter, err error) {
+				require.NoError(t, err)
+				// We expect 1 metric: DCGM_EXP_GPU_HEALTH_STATUS
+				require.Len(t, metrics, 1)
+				// We get metric value with 0 index
+				metricValues := metrics[reflect.ValueOf(metrics).MapKeys()[0].Interface().(counters.Counter)]
+				assert.Len(t, metricValues, len(gpuHealthChecks), "number of metric values doesn't match to number of healthchecks")
+
+				var thermalViolationsFound bool
+
+				for _, value := range metricValues {
+					healthWatch := value.Labels["health_watch"]
+					healthErrorCode := value.Labels["health_error_code"]
+					if healthWatch == "THERMAL" && healthErrorCode == "DCGM_FR_THERMAL_VIOLATIONS" {
+						assert.Equal(t, "20", value.Value)
+						thermalViolationsFound = true
+					} else {
+						assert.Equal(t, "0", value.Value)
+					}
+				}
+				assert.True(t, thermalViolationsFound, "expected DCGM_FR_THERMAL_VIOLATIONS error not found")
+			},
+		},
+
+		{
+			name: "When HealthCheck returns error",
+			setDCGMproviderState: func(mockDCGMProvider *mockdcgm.MockDCGM) {
+				// Clear expectations for SomeMethod
+				mockDCGMProvider.EXPECT().HealthCheck(gomock.Any()).Return(dcgm.HealthResponse{},
+					errors.New("boom!"))
+			},
+			asserResult: func(metrics MetricsByCounter, err error) {
+				assert.Error(t, err)
+				assert.Empty(t, metrics)
+			},
+		},
+		{
+			name: "When GetGroupInfo returns error",
+			setDCGMproviderState: func(mockDCGMProvider *mockdcgm.MockDCGM) {
+				mockDCGMProvider.EXPECT().GetGroupInfo(gomock.Any()).Return(nil, errors.New("boom!")).AnyTimes()
+			},
+			asserResult: func(metrics MetricsByCounter, err error) {
+				assert.Error(t, err)
+				assert.Empty(t, metrics)
+			},
+		},
+		{
+			name: "When EntityGetLatestValues returns error",
+			setDCGMproviderState: func(mockDCGMProvider *mockdcgm.MockDCGM) {
+				mockDCGMProvider.EXPECT().EntityGetLatestValues(gomock.Any(), gomock.Any(), gomock.Any()).
+					Return([]dcgm.FieldValue_v1{}, errors.New("boom!")).AnyTimes()
+			},
+			asserResult: func(metrics MetricsByCounter, err error) {
+				assert.Error(t, err)
+				assert.Empty(t, metrics)
+			},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			// Initialize the mock controller
+			ctrl := gomock.NewController(t)
+
+			mockDCGMProvider := mockdcgm.NewMockDCGM(ctrl)
+
+			realDCGM := dcgmprovider.Client()
+			defer func() {
+				dcgmprovider.SetClient(realDCGM)
+			}()
+
+			dcgmprovider.SetClient(mockDCGMProvider)
+
+			// We need to set new expectations, and then set the default ones.
+			if tc.setDCGMproviderState != nil {
+				tc.setDCGMproviderState(mockDCGMProvider)
+			}
+
+			setDefaultExpectationsForGPUHealthStatusCollectorMockDCGMProvider(t, mockDCGMProvider)
+
+			// Create a new collector
+			collector, err := NewGPUHealthStatusCollector(counterList,
+				"",
+				&appconfig.Config{
+					UseOldNamespace: true,
+				},
+				getDefaultDeviceWatchListForGPUHealthStatusCollectorMockDCGMProvider(ctrl),
+			)
+
+			require.NoError(t, err)
+
+			metrics, err := collector.GetMetrics()
+
+			tc.asserResult(metrics, err)
+
+			ctrl.Finish() // This will finish the current controller
+		})
+	}
+}
+
+func TestIsDCGMExpGPUHealthStatusEnabled(t *testing.T) {
+	tests := []struct {
+		name string
+		arg  counters.CounterList
+		want bool
+	}{
+		{
+			name: "empty",
+			arg:  counters.CounterList{},
+			want: false,
+		},
+		{
+			name: "counter event count disabled",
+			arg: counters.CounterList{
+				counters.Counter{
+					FieldID:   1,
+					FieldName: "random1",
+				},
+				counters.Counter{
+					FieldID:   2,
+					FieldName: "random2",
+				},
+			},
+			want: false,
+		},
+		{
+			name: "counter event count enabled",
+			arg: counters.CounterList{
+				counters.Counter{
+					FieldID:   1,
+					FieldName: counters.DCGMExpGPUHealthStatus,
+				},
+				counters.Counter{
+					FieldID:   2,
+					FieldName: "random2",
+				},
+			},
+			want: true,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.Equalf(t, tt.want, IsDCGMExpGPUHealthStatusEnabled(tt.arg), "unexpected response")
+		})
+	}
+}
+
+func TestHealthSystemWatchToString(t *testing.T) {
+	type testCase struct {
+		name        string
+		heathSystem dcgm.HealthSystem
+		expected    string
+	}
+
+	testCases := []testCase{
+		{
+			name:        "returns POWER when dcgm.DCGM_HEALTH_WATCH_POWER",
+			heathSystem: dcgm.DCGM_HEALTH_WATCH_POWER,
+			expected:    "POWER",
+		},
+		{
+			name:        "returns empty string when dcgm.HealthSystem is unknown",
+			heathSystem: dcgm.HealthSystem(100500),
+			expected:    "",
+		},
+	}
+
+	for _, tc := range testCases {
+		actual := healthSystemWatchToString(tc.heathSystem)
+		assert.Equal(t, tc.expected, actual)
+	}
+}
diff --git a/internal/pkg/collector/types.go b/internal/pkg/collector/types.go
new file mode 100644
index 00000000..499b0863
--- /dev/null
+++ b/internal/pkg/collector/types.go
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package collector
+
+import (
+	"fmt"
+
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+)
+
+//go:generate go run -v go.uber.org/mock/mockgen  -destination=../../mocks/pkg/collector/mock_collector.go -package=collector -copyright_file=../../../hack/header.txt . Collector
+
+// Collector interface
+type Collector interface {
+	GetMetrics() (MetricsByCounter, error)
+	Cleanup()
+}
+
+type EntityCollectorTuple struct {
+	entity    dcgm.Field_Entity_Group
+	collector Collector
+}
+
+func (e *EntityCollectorTuple) SetEntity(entity dcgm.Field_Entity_Group) {
+	e.entity = entity
+}
+
+func (e *EntityCollectorTuple) Entity() dcgm.Field_Entity_Group {
+	return e.entity
+}
+
+func (e *EntityCollectorTuple) SetCollector(collector Collector) {
+	e.collector = collector
+}
+
+func (e *EntityCollectorTuple) Collector() Collector {
+	return e.collector
+}
+
+type Metric struct {
+	Counter counters.Counter
+	Value   string
+
+	GPU          string
+	GPUUUID      string
+	GPUDevice    string
+	GPUModelName string
+	GPUPCIBusID  string
+
+	UUID string
+
+	MigProfile    string
+	GPUInstanceID string
+	Hostname      string
+
+	Labels     map[string]string
+	Attributes map[string]string
+}
+
+func (m Metric) GetIDOfType(idType appconfig.KubernetesGPUIDType) (string, error) {
+	// For MIG devices, return the MIG profile instead of
+	if m.MigProfile != "" {
+		return fmt.Sprintf("%s-%s", m.GPU, m.GPUInstanceID), nil
+	}
+	switch idType {
+	case appconfig.GPUUID:
+		return m.GPUUUID, nil
+	case appconfig.DeviceName:
+		return m.GPUDevice, nil
+	}
+	return "", fmt.Errorf("unsupported KubernetesGPUIDType for MetricID '%s'", idType)
+}
+
+// MetricsByCounter represents a map where each Counter is associated with a slice of Metric objects
+type MetricsByCounter map[counters.Counter][]Metric
diff --git a/pkg/dcgmexporter/os.go b/internal/pkg/collector/variables.go
similarity index 97%
rename from pkg/dcgmexporter/os.go
rename to internal/pkg/collector/variables.go
index da351ca4..b88dd531 100644
--- a/pkg/dcgmexporter/os.go
+++ b/internal/pkg/collector/variables.go
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package dcgmexporter
+package collector
 
 import osinterface "github.com/NVIDIA/dcgm-exporter/internal/pkg/os"
 
diff --git a/internal/pkg/collector/xid_collector.go b/internal/pkg/collector/xid_collector.go
new file mode 100644
index 00000000..e5271471
--- /dev/null
+++ b/internal/pkg/collector/xid_collector.go
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package collector
+
+import (
+	"fmt"
+	"log/slog"
+	"slices"
+
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatchlistmanager"
+)
+
+type xidCollector struct {
+	expCollector
+}
+
+func (c *xidCollector) GetMetrics() (MetricsByCounter, error) {
+	return c.expCollector.getMetrics()
+}
+
+func NewXIDCollector(
+	counterList counters.CounterList,
+	hostname string,
+	config *appconfig.Config,
+	deviceWatchList devicewatchlistmanager.WatchList,
+) (Collector, error) {
+	if !IsDCGMExpXIDErrorsCountEnabled(counterList) {
+		slog.Error(counters.DCGMExpXIDErrorsCount + " collector is disabled")
+		return nil, fmt.Errorf(counters.DCGMExpXIDErrorsCount + " collector is disabled")
+	}
+
+	collector := xidCollector{}
+	var err error
+	deviceWatchList.SetDeviceFields([]dcgm.Short{dcgm.DCGM_FI_DEV_XID_ERRORS})
+
+	collector.expCollector, err = newExpCollector(
+		counterList.LabelCounters(),
+		hostname,
+		config,
+		deviceWatchList,
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	collector.counter = counterList[slices.IndexFunc(counterList, func(c counters.Counter) bool {
+		return c.FieldName == counters.DCGMExpXIDErrorsCount
+	})]
+
+	collector.labelFiller = func(metricValueLabels map[string]string, entityValue int64) {
+		metricValueLabels["xid"] = fmt.Sprint(entityValue)
+	}
+
+	collector.windowSize = config.XIDCountWindowSize
+
+	return &collector, nil
+}
+
+func IsDCGMExpXIDErrorsCountEnabled(counterList counters.CounterList) bool {
+	return slices.ContainsFunc(counterList, func(c counters.Counter) bool {
+		return c.FieldName == counters.DCGMExpXIDErrorsCount
+	})
+}
diff --git a/internal/pkg/collector/xid_collector_test.go b/internal/pkg/collector/xid_collector_test.go
new file mode 100644
index 00000000..9bd436bd
--- /dev/null
+++ b/internal/pkg/collector/xid_collector_test.go
@@ -0,0 +1,534 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package collector
+
+import (
+	"fmt"
+	"slices"
+	"testing"
+	"time"
+
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+	"github.com/stretchr/testify/assert"
+	"go.uber.org/mock/gomock"
+
+	mockdcgm "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/dcgmprovider"
+	mockdevicewatcher "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/devicewatcher"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/dcgmprovider"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/deviceinfo"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatchlistmanager"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/testutils"
+)
+
+func TestIsDCGMExpXIDErrorsCountEnabled(t *testing.T) {
+	tests := []struct {
+		name string
+		arg  counters.CounterList
+		want bool
+	}{
+		{
+			name: "empty",
+			arg:  counters.CounterList{},
+			want: false,
+		},
+		{
+			name: "counter disabled",
+			arg: counters.CounterList{
+				counters.Counter{
+					FieldID:   1,
+					FieldName: "random1",
+				},
+				counters.Counter{
+					FieldID:   2,
+					FieldName: "random2",
+				},
+			},
+			want: false,
+		},
+		{
+			name: "counter enabled",
+			arg: counters.CounterList{
+				counters.Counter{
+					FieldID:   1,
+					FieldName: counters.DCGMExpXIDErrorsCount,
+				},
+				counters.Counter{
+					FieldID:   2,
+					FieldName: "random2",
+				},
+			},
+			want: true,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.Equalf(t, tt.want, IsDCGMExpXIDErrorsCountEnabled(tt.arg), "unexpected response")
+		})
+	}
+}
+
+func TestNewXIDCollector(t *testing.T) {
+	ctrl := gomock.NewController(t)
+	mockDeviceWatcher := mockdevicewatcher.NewMockWatcher(ctrl)
+
+	sampleDeviceInfo := &deviceinfo.Info{}
+	sampleDeviceFields := []dcgm.Short{42}
+	sampleCollectorInterval := int64(1)
+	sampleConfig := appconfig.Config{}
+	sampleHostname := "localhost"
+	var sampleCleanups []func()
+
+	sampleDCGMExpXIDCounter := counters.Counter{
+		FieldID:   1,
+		FieldName: counters.DCGMExpXIDErrorsCount,
+	}
+
+	sampleOtherCounter := counters.Counter{
+		FieldID:   2,
+		FieldName: "random2",
+	}
+
+	sampleLabelCounter := counters.Counter{
+		FieldID:   3,
+		FieldName: "random2",
+		PromType:  "label",
+	}
+
+	type args struct {
+		counterList     counters.CounterList
+		hostname        string
+		config          *appconfig.Config
+		deviceWatchList *devicewatchlistmanager.WatchList
+	}
+	tests := []struct {
+		name       string
+		args       args
+		conditions func(watcher *mockdevicewatcher.MockWatcher)
+		want       func(string, *appconfig.Config, devicewatchlistmanager.WatchList) Collector
+		wantErr    bool
+	}{
+		{
+			name: "counter is disabled ",
+			args: args{
+				counterList:     counters.CounterList{},
+				hostname:        sampleHostname,
+				config:          nil,
+				deviceWatchList: &devicewatchlistmanager.WatchList{},
+			},
+			conditions: func(watcher *mockdevicewatcher.MockWatcher) {},
+			want: func(
+				_ string, _ *appconfig.Config,
+				_ devicewatchlistmanager.WatchList,
+			) Collector {
+				return nil
+			},
+			wantErr: true,
+		},
+		{
+			name: "new XID collector watcher fails",
+			args: args{
+				counterList: counters.CounterList{
+					sampleDCGMExpXIDCounter,
+					sampleOtherCounter,
+					sampleLabelCounter,
+				},
+				hostname: sampleHostname,
+				config:   &sampleConfig,
+				deviceWatchList: devicewatchlistmanager.NewWatchList(sampleDeviceInfo, sampleDeviceFields, nil,
+					mockDeviceWatcher, sampleCollectorInterval),
+			},
+			conditions: func(watcher *mockdevicewatcher.MockWatcher) {
+				watcher.EXPECT().WatchDeviceFields(gomock.Any(), gomock.Any(),
+					gomock.Any()).Return(nil,
+					dcgm.FieldHandle{},
+					sampleCleanups, fmt.Errorf("some error"))
+			},
+			want: func(
+				_ string, _ *appconfig.Config,
+				_ devicewatchlistmanager.WatchList,
+			) Collector {
+				return nil
+			},
+			wantErr: true,
+		},
+		{
+			name: "new XID collector ",
+			args: args{
+				counterList: counters.CounterList{
+					sampleDCGMExpXIDCounter,
+					sampleOtherCounter,
+					sampleLabelCounter,
+				},
+				hostname: sampleHostname,
+				config:   &sampleConfig,
+				deviceWatchList: devicewatchlistmanager.NewWatchList(sampleDeviceInfo, sampleDeviceFields, nil,
+					mockDeviceWatcher, sampleCollectorInterval),
+			},
+			conditions: func(watcher *mockdevicewatcher.MockWatcher) {
+				watcher.EXPECT().WatchDeviceFields(gomock.Any(), gomock.Any(),
+					gomock.Any()).Return(nil,
+					dcgm.FieldHandle{},
+					sampleCleanups, nil)
+			},
+			want: func(
+				hostname string, config *appconfig.Config,
+				deviceWatchList devicewatchlistmanager.WatchList,
+			) Collector {
+				deviceWatchList.SetDeviceFields([]dcgm.Short{dcgm.DCGM_FI_DEV_XID_ERRORS})
+				return &xidCollector{
+					expCollector{
+						baseExpCollector: baseExpCollector{
+							deviceWatchList: deviceWatchList,
+							counter:         sampleDCGMExpXIDCounter,
+							labelsCounters:  []counters.Counter{sampleLabelCounter},
+							hostname:        hostname,
+							config:          config,
+							cleanups:        sampleCleanups,
+						},
+						windowSize: config.XIDCountWindowSize,
+					},
+				}
+			},
+			wantErr: false,
+		},
+		{
+			name: "new XID collector with no label counters",
+			args: args{
+				counterList: counters.CounterList{
+					sampleDCGMExpXIDCounter,
+					sampleOtherCounter,
+				},
+				hostname: sampleHostname,
+				config:   &sampleConfig,
+				deviceWatchList: devicewatchlistmanager.NewWatchList(sampleDeviceInfo, sampleDeviceFields, nil,
+					mockDeviceWatcher, sampleCollectorInterval),
+			},
+			conditions: func(watcher *mockdevicewatcher.MockWatcher) {
+				watcher.EXPECT().WatchDeviceFields(gomock.Any(), gomock.Any(),
+					gomock.Any()).Return(nil,
+					dcgm.FieldHandle{},
+					sampleCleanups, nil)
+			},
+			want: func(
+				hostname string, config *appconfig.Config,
+				deviceWatchList devicewatchlistmanager.WatchList,
+			) Collector {
+				deviceWatchList.SetDeviceFields([]dcgm.Short{dcgm.DCGM_FI_DEV_XID_ERRORS})
+				return &xidCollector{
+					expCollector{
+						baseExpCollector: baseExpCollector{
+							deviceWatchList: deviceWatchList,
+							counter:         sampleDCGMExpXIDCounter,
+							labelsCounters:  nil,
+							hostname:        hostname,
+							config:          config,
+							cleanups:        sampleCleanups,
+						},
+						windowSize: config.XIDCountWindowSize,
+					},
+				}
+			},
+			wantErr: false,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			tt.conditions(mockDeviceWatcher)
+
+			got, err := NewXIDCollector(tt.args.counterList, tt.args.hostname, tt.args.config,
+				*tt.args.deviceWatchList)
+			want := tt.want(tt.args.hostname, tt.args.config, *tt.args.deviceWatchList)
+
+			if !tt.wantErr {
+				assert.NoError(t, err, "unexpected error")
+
+				wantAttrs := testutils.GetFields(&want.(*xidCollector).expCollector, testutils.Fields)
+				gotAttrs := testutils.GetFields(&got.(*xidCollector).expCollector, testutils.Fields)
+				assert.Equal(t, wantAttrs, gotAttrs, "unexpected result")
+
+				gotFuncAttrs := testutils.GetFields(&got.(*xidCollector).expCollector, testutils.Functions)
+				for functionName, value := range gotFuncAttrs {
+					assert.NotNilf(t, value, "unexpected %s to be not nil", functionName)
+				}
+			} else {
+				assert.Error(t, err, "expected error")
+				assert.Equal(t, want, got, "unexpected result")
+			}
+		})
+	}
+}
+
+func sortXIDMetrics(metrics []Metric) {
+	slices.SortFunc(metrics, func(a, b Metric) int {
+		if a.GPU < b.GPU {
+			return -1
+		} else if a.GPU == b.GPU {
+			if a.Labels["xid"] < b.Labels["xid"] {
+				return -1
+			}
+		}
+		return 1
+	})
+}
+
+func xidMetricsCreator(
+	counter counters.Counter, gpuID uint, value, hostname, mockFieldName,
+	mockFieldLabelValue string, mockXID uint64,
+) Metric {
+	return Metric{
+		Counter:       counter,
+		Value:         value,
+		GPU:           fmt.Sprintf("%d", gpuID),
+		GPUUUID:       "",
+		GPUDevice:     fmt.Sprintf("nvidia%d", gpuID),
+		GPUModelName:  "",
+		UUID:          "UUID",
+		MigProfile:    "",
+		GPUInstanceID: "",
+		Hostname:      hostname,
+		Labels: map[string]string{
+			windowSizeInMSLabel: "0",
+			mockFieldName:       mockFieldLabelValue,
+			"xid":               fmt.Sprint(mockXID),
+		},
+		Attributes: map[string]string{},
+	}
+}
+
+func Test_xidCollector_GetMetrics(t *testing.T) {
+	/******* Mock DCGM *************/
+	ctrl := gomock.NewController(t)
+	mockDCGM := mockdcgm.NewMockDCGM(ctrl)
+	mockDeviceWatcher := mockdevicewatcher.NewMockWatcher(ctrl)
+
+	realDCGM := dcgmprovider.Client()
+	defer func() {
+		dcgmprovider.SetClient(realDCGM)
+	}()
+	dcgmprovider.SetClient(mockDCGM)
+
+	/******** Mock Counters ************/
+	mockDCGMXIDCounter := counters.Counter{
+		FieldID:   1,
+		FieldName: counters.DCGMExpXIDErrorsCount,
+	}
+
+	mockOtherCounter := counters.Counter{
+		FieldID:   2,
+		FieldName: "random2",
+	}
+
+	mockLabelDeviceField := dcgm.Short(3)
+	mockFieldName := "random3"
+	mockLabelValue := "this is mock label"
+	mockLabelCounter := counters.Counter{
+		FieldID:   mockLabelDeviceField,
+		FieldName: mockFieldName,
+		PromType:  "label",
+	}
+
+	/******** Mock Device Info *********/
+	gOpts := appconfig.DeviceOptions{
+		Flex: true,
+	}
+
+	mockGPUDeviceInfo := testutils.MockGPUDeviceInfo(ctrl, 2, nil)
+	mockGPUDeviceInfo.EXPECT().GOpts().Return(gOpts).AnyTimes()
+
+	/******** Other Mock Inputs ************/
+	gpuID1 := uint(0)
+	gpuID2 := uint(1)
+
+	mockDeviceFields := []dcgm.Short{42}
+	mockCollectorInterval := int64(1)
+	mockConfig := appconfig.Config{}
+	mockHostname := "localhost"
+	var mockCleanups []func()
+
+	mockGroupHandle1 := dcgm.GroupHandle{}
+	mockGroupHandle1.SetHandle(uintptr(1))
+
+	mockGroupHandle2 := dcgm.GroupHandle{}
+	mockGroupHandle2.SetHandle(uintptr(2))
+
+	mockFieldGroupHandle := dcgm.FieldHandle{}
+	mockFieldGroupHandle.SetHandle(uintptr(1))
+
+	mockLatestValues := []dcgm.FieldValue_v1{
+		{
+			FieldId:   150,
+			FieldType: dcgm.DCGM_FT_INT64,
+			Value:     [4096]byte{42},
+		},
+		{
+			FieldId:   uint(mockLabelDeviceField),
+			FieldType: dcgm.DCGM_FT_STRING,
+			Value:     testutils.StrToByteArray(mockLabelValue),
+		},
+		{
+			FieldId:   uint(mockLabelDeviceField),
+			FieldType: dcgm.DCGM_FT_STRING,
+			Value:     testutils.StrToByteArray(dcgm.DCGM_FT_STR_NOT_FOUND),
+		},
+	}
+
+	tests := []struct {
+		name       string
+		collector  func() Collector
+		conditions func(*mockdevicewatcher.MockWatcher, byte, byte)
+		want       func() (MetricsByCounter, byte, byte)
+		wantErr    bool
+	}{
+		{
+			name: "XID collector with single XID event",
+			collector: func() Collector {
+				counterList := counters.CounterList{
+					mockDCGMXIDCounter,
+					mockOtherCounter,
+					mockLabelCounter,
+				}
+				deviceWatchList := devicewatchlistmanager.NewWatchList(mockGPUDeviceInfo, mockDeviceFields,
+					[]dcgm.Short{mockLabelDeviceField}, mockDeviceWatcher, mockCollectorInterval)
+
+				collector, _ := NewXIDCollector(counterList, mockHostname, &mockConfig, *deviceWatchList)
+				return collector
+			},
+			conditions: func(watcher *mockdevicewatcher.MockWatcher, gpu1Value, gpu2Value byte) {
+				mockEntitiesResult := []dcgm.FieldValue_v2{
+					{EntityId: gpuID1, Value: [4096]byte{gpu1Value}},
+					{EntityId: gpuID2, Value: [4096]byte{gpu2Value}},
+				}
+
+				watcher.EXPECT().WatchDeviceFields(gomock.Any(), gomock.Any(),
+					gomock.Any()).Return([]dcgm.GroupHandle{mockGroupHandle1},
+					mockFieldGroupHandle,
+					mockCleanups, nil)
+
+				mockDCGM.EXPECT().UpdateAllFields().Return(nil)
+				mockDCGM.EXPECT().GetValuesSince(mockGroupHandle1, mockFieldGroupHandle,
+					gomock.AssignableToTypeOf(time.Time{})).Return(mockEntitiesResult, time.Time{}, nil)
+				mockDCGM.EXPECT().EntityGetLatestValues(dcgm.FE_GPU, gpuID1,
+					[]dcgm.Short{mockLabelDeviceField}).Return(mockLatestValues, nil)
+				mockDCGM.EXPECT().EntityGetLatestValues(dcgm.FE_GPU, gpuID2,
+					[]dcgm.Short{mockLabelDeviceField}).Return(mockLatestValues, nil)
+			},
+			want: func() (MetricsByCounter, byte, byte) {
+				mockXIDErr1 := uint64(42)
+				mockXIDErr2 := uint64(46)
+
+				return MetricsByCounter{
+					mockDCGMXIDCounter: []Metric{
+						xidMetricsCreator(mockDCGMXIDCounter, gpuID1, "1", mockHostname,
+							mockFieldName,
+							mockLabelValue, mockXIDErr1),
+						xidMetricsCreator(mockDCGMXIDCounter, gpuID2, "1", mockHostname,
+							mockFieldName,
+							mockLabelValue, mockXIDErr2),
+					},
+				}, byte(mockXIDErr1), byte(mockXIDErr2)
+			},
+			wantErr: false,
+		},
+		{
+			name: "xid collector with multiple events",
+			collector: func() Collector {
+				counterList := counters.CounterList{
+					mockDCGMXIDCounter,
+					mockOtherCounter,
+					mockLabelCounter,
+				}
+				deviceWatchList := devicewatchlistmanager.NewWatchList(mockGPUDeviceInfo, mockDeviceFields,
+					[]dcgm.Short{mockLabelDeviceField}, mockDeviceWatcher, mockCollectorInterval)
+
+				collector, _ := NewXIDCollector(counterList, mockHostname, &mockConfig, *deviceWatchList)
+				return collector
+			},
+			conditions: func(watcher *mockdevicewatcher.MockWatcher, xidErr1, xidErr2 byte) {
+				mockEntitiesResult := []dcgm.FieldValue_v2{
+					{EntityId: gpuID1, Value: [4096]byte{xidErr1}},
+					{EntityId: gpuID1, Value: [4096]byte{xidErr1}},
+					{EntityId: gpuID1, Value: [4096]byte{xidErr2}},
+					{EntityId: gpuID2, Value: [4096]byte{xidErr1}},
+					{EntityId: gpuID2, Value: [4096]byte{xidErr2}},
+					{EntityId: gpuID2, Value: [4096]byte{xidErr2}},
+					{EntityId: gpuID2, Value: [4096]byte{xidErr2}},
+				}
+
+				watcher.EXPECT().WatchDeviceFields(gomock.Any(), gomock.Any(),
+					gomock.Any()).Return([]dcgm.GroupHandle{mockGroupHandle1},
+					mockFieldGroupHandle,
+					mockCleanups, nil)
+
+				mockDCGM.EXPECT().UpdateAllFields().Return(nil)
+				mockDCGM.EXPECT().GetValuesSince(mockGroupHandle1, mockFieldGroupHandle,
+					gomock.AssignableToTypeOf(time.Time{})).Return(mockEntitiesResult, time.Time{}, nil)
+				mockDCGM.EXPECT().EntityGetLatestValues(dcgm.FE_GPU, gpuID1,
+					[]dcgm.Short{mockLabelDeviceField}).Return(mockLatestValues, nil)
+				mockDCGM.EXPECT().EntityGetLatestValues(dcgm.FE_GPU, gpuID2,
+					[]dcgm.Short{mockLabelDeviceField}).Return(mockLatestValues, nil)
+			},
+			want: func() (MetricsByCounter, byte, byte) {
+				mockXIDErr1 := uint64(42)
+				mockXIDErr2 := uint64(46)
+
+				return MetricsByCounter{
+					mockDCGMXIDCounter: []Metric{
+						xidMetricsCreator(mockDCGMXIDCounter, gpuID1, "2", mockHostname,
+							mockFieldName,
+							mockLabelValue, mockXIDErr1),
+						xidMetricsCreator(mockDCGMXIDCounter, gpuID1, "1", mockHostname,
+							mockFieldName,
+							mockLabelValue, mockXIDErr2),
+						xidMetricsCreator(mockDCGMXIDCounter, gpuID2, "1", mockHostname,
+							mockFieldName,
+							mockLabelValue, mockXIDErr1),
+						xidMetricsCreator(mockDCGMXIDCounter, gpuID2, "3", mockHostname,
+							mockFieldName,
+							mockLabelValue, mockXIDErr2),
+					},
+				}, byte(mockXIDErr1), byte(mockXIDErr2)
+			},
+			wantErr: false,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			want, gpu1Value, gpu2Value := tt.want()
+			tt.conditions(mockDeviceWatcher, gpu1Value, gpu2Value)
+			c := tt.collector()
+
+			got, err := c.GetMetrics()
+
+			if !tt.wantErr {
+				assert.NoError(t, err, "GetMetrics() failed")
+				assert.NotEmpty(t, got)
+
+				wantMetrics := want[mockDCGMXIDCounter]
+				gotMetrics := got[mockDCGMXIDCounter]
+
+				assert.Len(t, gotMetrics, len(wantMetrics), "GetMetrics() returned wrong number of metrics")
+
+				sortXIDMetrics(wantMetrics)
+				sortXIDMetrics(gotMetrics)
+
+				assert.Equalf(t, wantMetrics, gotMetrics, "GetMetrics()")
+			}
+		})
+	}
+}
diff --git a/pkg/dcgmexporter/xid_errors.go b/internal/pkg/collector/xid_errors.go
similarity index 99%
rename from pkg/dcgmexporter/xid_errors.go
rename to internal/pkg/collector/xid_errors.go
index 64be5363..b3e0cca4 100644
--- a/pkg/dcgmexporter/xid_errors.go
+++ b/internal/pkg/collector/xid_errors.go
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package dcgmexporter
+package collector
 
 // Based on this doc: https://docs.nvidia.com/deploy/xid-errors/#topic_4
 var xidErrCodeToText = []string{
diff --git a/internal/pkg/counters/const.go b/internal/pkg/counters/const.go
new file mode 100644
index 00000000..e6fb310f
--- /dev/null
+++ b/internal/pkg/counters/const.go
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package counters
+
+const (
+	undefinedConfigMapData = "none"
+
+	cpuFieldsStart = 1100
+	dcpFieldsStart = 1000
+
+	DCGMExpClockEventsCount = "DCGM_EXP_CLOCK_EVENTS_COUNT"
+	DCGMExpXIDErrorsCount   = "DCGM_EXP_XID_ERRORS_COUNT"
+	DCGMExpGPUHealthStatus  = "DCGM_EXP_GPU_HEALTH_STATUS"
+)
diff --git a/pkg/dcgmexporter/parser.go b/internal/pkg/counters/counter_config.go
similarity index 74%
rename from pkg/dcgmexporter/parser.go
rename to internal/pkg/counters/counter_config.go
index f25036ab..8eb7050f 100644
--- a/pkg/dcgmexporter/parser.go
+++ b/internal/pkg/counters/counter_config.go
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,29 +14,25 @@
  * limitations under the License.
  */
 
-package dcgmexporter
+package counters
 
 import (
 	"context"
 	"encoding/csv"
 	"fmt"
+	"log/slog"
 	"strings"
 
 	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
-	"github.com/sirupsen/logrus"
-
 	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/client-go/kubernetes"
 	"k8s.io/client-go/rest"
-)
 
-const (
-	cpuFieldsStart = 1100
-	dcpFieldsStart = 1000
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
 )
 
-func GetCounterSet(c *Config) (*CounterSet, error) {
+func GetCounterSet(c *appconfig.Config) (*CounterSet, error) {
 	var (
 		err     error
 		records [][]string
@@ -48,27 +44,29 @@ func GetCounterSet(c *Config) (*CounterSet, error) {
 		var client kubernetes.Interface
 		client, err = getKubeClient()
 		if err != nil {
-			logrus.Fatal(err)
+			slog.Error(err.Error())
+			os.Exit(1)
 		}
 		records, err = readConfigMap(client, c)
 		if err != nil {
-			logrus.Fatal(err)
+			slog.Error(err.Error())
+			os.Exit(1)
 		}
 	} else {
 		err = fmt.Errorf("no configmap data specified")
 	}
 
 	if err != nil || c.ConfigMapData == undefinedConfigMapData {
-		logrus.Infof("Falling back to metric file '%s'", c.CollectorsFile)
+		slog.Info(fmt.Sprintf("Falling back to metric file '%s'", c.CollectorsFile))
 
 		records, err = ReadCSVFile(c.CollectorsFile)
 		if err != nil {
-			logrus.Errorf("Could not read metrics file '%s'; err: %v", c.CollectorsFile, err)
+			slog.Error(fmt.Sprintf("Could not read metrics file '%s'; err: %v", c.CollectorsFile, err))
 			return res, err
 		}
 	}
 
-	res, err = extractCounters(records, c)
+	res, err = ExtractCounters(records, c)
 	if err != nil {
 		return res, err
 	}
@@ -91,7 +89,7 @@ func ReadCSVFile(filename string) ([][]string, error) {
 	return records, err
 }
 
-func extractCounters(records [][]string, c *Config) (*CounterSet, error) {
+func ExtractCounters(records [][]string, c *appconfig.Config) (*CounterSet, error) {
 	res := CounterSet{}
 
 	for i, record := range records {
@@ -118,7 +116,13 @@ func extractCounters(records [][]string, c *Config) (*CounterSet, error) {
 			if err != nil {
 				return nil, fmt.Errorf("could not find DCGM field; err: %w", err)
 			} else if expField != DCGMFIUnknown {
-				res.ExporterCounters = append(res.ExporterCounters, Counter{dcgm.Short(expField), record[0], record[1], record[2]})
+				res.ExporterCounters = append(res.ExporterCounters,
+					Counter{
+						FieldID:   dcgm.Short(expField),
+						FieldName: record[0],
+						PromType:  record[1],
+						Help:      record[2],
+					})
 				continue
 			}
 		}
@@ -129,7 +133,7 @@ func extractCounters(records [][]string, c *Config) (*CounterSet, error) {
 
 		if !useOld {
 			if !fieldIsSupported(uint(fieldID), c) {
-				logrus.Warnf("Skipping line %d ('%s'): metric not enabled", i, record[0])
+				slog.Warn(fmt.Sprintf("Skipping line %d ('%s'): metric not enabled", i, record[0]))
 				continue
 			}
 
@@ -137,10 +141,11 @@ func extractCounters(records [][]string, c *Config) (*CounterSet, error) {
 				return nil, fmt.Errorf("could not find Prometheus metric type '%s'", record[1])
 			}
 
-			res.DCGMCounters = append(res.DCGMCounters, Counter{fieldID, record[0], record[1], record[2]})
+			res.DCGMCounters = append(res.DCGMCounters,
+				Counter{FieldID: fieldID, FieldName: record[0], PromType: record[1], Help: record[2]})
 		} else {
 			if !fieldIsSupported(uint(oldFieldID), c) {
-				logrus.Warnf("Skipping line %d ('%s'): metric not enabled", i, record[0])
+				slog.Warn(fmt.Sprintf("Skipping line %d ('%s'): metric not enabled", i, record[0]))
 				continue
 			}
 
@@ -148,14 +153,15 @@ func extractCounters(records [][]string, c *Config) (*CounterSet, error) {
 				return nil, fmt.Errorf("could not find Prometheus metric type '%s'", record[1])
 			}
 
-			res.DCGMCounters = append(res.DCGMCounters, Counter{oldFieldID, record[0], record[1], record[2]})
+			res.DCGMCounters = append(res.DCGMCounters,
+				Counter{FieldID: oldFieldID, FieldName: record[0], PromType: record[1], Help: record[2]})
 		}
 	}
 
 	return &res, nil
 }
 
-func fieldIsSupported(fieldID uint, c *Config) bool {
+func fieldIsSupported(fieldID uint, c *appconfig.Config) bool {
 	if fieldID < dcpFieldsStart || fieldID >= cpuFieldsStart {
 		return true
 	}
@@ -175,7 +181,7 @@ func fieldIsSupported(fieldID uint, c *Config) bool {
 	return false
 }
 
-func readConfigMap(kubeClient kubernetes.Interface, c *Config) ([][]string, error) {
+func readConfigMap(kubeClient kubernetes.Interface, c *appconfig.Config) ([][]string, error) {
 	parts := strings.Split(c.ConfigMapData, ":")
 	if len(parts) != 2 {
 		return nil, fmt.Errorf("malformed configmap-data '%s'", c.ConfigMapData)
diff --git a/pkg/dcgmexporter/parser_test.go b/internal/pkg/counters/counter_config_test.go
similarity index 81%
rename from pkg/dcgmexporter/parser_test.go
rename to internal/pkg/counters/counter_config_test.go
index 0f00e25b..ca852517 100644
--- a/pkg/dcgmexporter/parser_test.go
+++ b/internal/pkg/counters/counter_config_test.go
@@ -1,4 +1,20 @@
-package dcgmexporter
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package counters
 
 import (
 	"testing"
@@ -7,6 +23,8 @@ import (
 	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/client-go/kubernetes/fake"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
 )
 
 func TestEmptyConfigMap(t *testing.T) {
@@ -19,7 +37,7 @@ func TestEmptyConfigMap(t *testing.T) {
 		Data: map[string]string{"metrics": ""},
 	})
 
-	c := Config{
+	c := appconfig.Config{
 		ConfigMapData: "default:configmap1",
 	}
 	records, err := readConfigMap(clientset, &c)
@@ -37,7 +55,7 @@ func TestValidConfigMap(t *testing.T) {
 		Data: map[string]string{"metrics": "DCGM_FI_DEV_GPU_TEMP, gauge, temperature"},
 	})
 
-	c := Config{
+	c := appconfig.Config{
 		ConfigMapData: "default:configmap1",
 	}
 	records, err := readConfigMap(clientset, &c)
@@ -55,7 +73,7 @@ func TestInvalidConfigMapData(t *testing.T) {
 		Data: map[string]string{"bad": "DCGM_FI_DEV_GPU_TEMP, gauge, temperature"},
 	})
 
-	c := Config{
+	c := appconfig.Config{
 		ConfigMapData: "default:configmap1",
 	}
 	records, err := readConfigMap(clientset, &c)
@@ -72,7 +90,7 @@ func TestInvalidConfigMapName(t *testing.T) {
 		},
 	})
 
-	c := Config{
+	c := appconfig.Config{
 		ConfigMapData: "default:configmap1",
 	}
 	records, err := readConfigMap(clientset, &c)
@@ -89,7 +107,7 @@ func TestInvalidConfigMapNamespace(t *testing.T) {
 		},
 	})
 
-	c := Config{
+	c := appconfig.Config{
 		ConfigMapData: "default:configmap1",
 	}
 	records, err := readConfigMap(clientset, &c)
@@ -142,7 +160,7 @@ func extractCountersHelper(t *testing.T, input string, valid bool) {
 		t.Fatalf("Cannot close temp file: %v", err)
 	}
 
-	c := Config{
+	c := appconfig.Config{
 		ConfigMapData:  undefinedConfigMapData,
 		CollectorsFile: tmpFile.Name(),
 	}
diff --git a/pkg/dcgmexporter/exporter_metrics.go b/internal/pkg/counters/exporter_counters.go
similarity index 82%
rename from pkg/dcgmexporter/exporter_metrics.go
rename to internal/pkg/counters/exporter_counters.go
index ecf7ab7f..f4471083 100644
--- a/pkg/dcgmexporter/exporter_metrics.go
+++ b/internal/pkg/counters/exporter_counters.go
@@ -14,30 +14,28 @@
  * limitations under the License.
  */
 
-package dcgmexporter
+package counters
 
 import "fmt"
 
-const (
-	dcgmExpClockEventsCount = "DCGM_EXP_CLOCK_EVENTS_COUNT"
-	dcgmExpXIDErrorsCount   = "DCGM_EXP_XID_ERRORS_COUNT"
-)
-
 type ExporterCounter uint16
 
 const (
 	DCGMFIUnknown        ExporterCounter = 0
 	DCGMXIDErrorsCount   ExporterCounter = iota + 9000
 	DCGMClockEventsCount ExporterCounter = iota + 9000
+	DCGMGPUHealthStatus  ExporterCounter = iota + 9000
 )
 
 // String method to convert the enum value to a string
 func (enm ExporterCounter) String() string {
 	switch enm {
 	case DCGMXIDErrorsCount:
-		return dcgmExpXIDErrorsCount
+		return DCGMExpXIDErrorsCount
 	case DCGMClockEventsCount:
-		return dcgmExpClockEventsCount
+		return DCGMExpClockEventsCount
+	case DCGMGPUHealthStatus:
+		return DCGMExpGPUHealthStatus
 	default:
 		return "DCGM_FI_UNKNOWN"
 	}
@@ -47,13 +45,14 @@ func (enm ExporterCounter) String() string {
 var DCGMFields = map[string]ExporterCounter{
 	DCGMXIDErrorsCount.String():   DCGMXIDErrorsCount,
 	DCGMClockEventsCount.String(): DCGMClockEventsCount,
+	DCGMGPUHealthStatus.String():  DCGMGPUHealthStatus,
 	DCGMFIUnknown.String():        DCGMFIUnknown,
 }
 
 func IdentifyMetricType(s string) (ExporterCounter, error) {
 	mv, ok := DCGMFields[s]
 	if !ok {
-		return mv, fmt.Errorf("Unknown ExporterCounter field '%s'", s)
+		return mv, fmt.Errorf("unknown ExporterCounter field '%s'", s)
 	}
 	return mv, nil
 }
diff --git a/pkg/dcgmexporter/exporter_metrics_test.go b/internal/pkg/counters/exporter_counters_test.go
similarity index 95%
rename from pkg/dcgmexporter/exporter_metrics_test.go
rename to internal/pkg/counters/exporter_counters_test.go
index 60710393..3e216ee0 100644
--- a/pkg/dcgmexporter/exporter_metrics_test.go
+++ b/internal/pkg/counters/exporter_counters_test.go
@@ -5,7 +5,7 @@
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *      http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package dcgmexporter
+package counters
 
 import (
 	"testing"
diff --git a/internal/pkg/counters/types.go b/internal/pkg/counters/types.go
new file mode 100644
index 00000000..2da015c8
--- /dev/null
+++ b/internal/pkg/counters/types.go
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package counters
+
+import (
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+)
+
+type Counter struct {
+	FieldID   dcgm.Short
+	FieldName string
+	PromType  string
+	Help      string
+}
+
+func (c Counter) IsLabel() bool {
+	return c.PromType == "label"
+}
+
+type CounterList []Counter
+
+func (c CounterList) LabelCounters() CounterList {
+	var labelsCounters CounterList
+	for _, counter := range c {
+		if counter.IsLabel() {
+			labelsCounters = append(labelsCounters, counter)
+		}
+	}
+
+	return labelsCounters
+}
+
+type CounterSet struct {
+	DCGMCounters     CounterList
+	ExporterCounters CounterList
+}
diff --git a/internal/pkg/counters/variables.go b/internal/pkg/counters/variables.go
new file mode 100644
index 00000000..95bcbd6f
--- /dev/null
+++ b/internal/pkg/counters/variables.go
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package counters
+
+import osinterface "github.com/NVIDIA/dcgm-exporter/internal/pkg/os"
+
+var os osinterface.OS = osinterface.RealOS{}
+
+var promMetricType = map[string]bool{
+	"gauge":     true,
+	"counter":   true,
+	"histogram": true,
+	"summary":   true,
+	"label":     true,
+}
diff --git a/internal/pkg/dcgmprovider/dcgm.go b/internal/pkg/dcgmprovider/dcgm.go
new file mode 100644
index 00000000..175bd698
--- /dev/null
+++ b/internal/pkg/dcgmprovider/dcgm.go
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package dcgmprovider
+
+import (
+	"fmt"
+	"log/slog"
+	"os"
+	"time"
+
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+)
+
+var dcgmInterface DCGM
+
+// Initialize sets up the Singleton DCGM interface using the provided configuration.
+func Initialize(config *appconfig.Config) {
+	dcgmInterface = newDCGMProvider(config)
+}
+
+// reset clears the current DCGM interface instance.
+func reset() {
+	dcgmInterface = nil
+}
+
+// Client retrieves the current DCGM interface instance.
+func Client() DCGM {
+	return dcgmInterface
+}
+
+// SetClient sets the current DCGM interface instance to the provided one.
+func SetClient(d DCGM) {
+	dcgmInterface = d
+}
+
+// dcgmProvider implements DCGM Interface
+type dcgmProvider struct {
+	shutdown      func()
+	moduleCleanup func()
+}
+
+// newDCGMProvider initializes a new DCGM provider based on the provided configuration
+func newDCGMProvider(config *appconfig.Config) DCGM {
+	// Check if a DCGM client already exists and return it if so.
+	if Client() != nil {
+		slog.Info("DCGM already initialized")
+		return Client()
+	}
+
+	client := dcgmProvider{}
+
+	// Connect to a remote DCGM host engine if configured.
+	if config.UseRemoteHE {
+		slog.Info("Attempting to connect to remote hostengine at " + config.RemoteHEInfo)
+		cleanup, err := dcgm.Init(dcgm.Standalone, config.RemoteHEInfo, "0")
+		if err != nil {
+			cleanup()
+			slog.Error(err.Error())
+			os.Exit(1)
+		}
+		client.shutdown = cleanup
+	} else {
+		if config.EnableDCGMLog {
+			os.Setenv("__DCGM_DBG_FILE", "-")
+			os.Setenv("__DCGM_DBG_LVL", config.DCGMLogLevel)
+		}
+
+		// Initialize a local/embedded DCGM instance.
+		slog.Info("Attempting to initialize DCGM.")
+		cleanup, err := dcgm.Init(dcgm.Embedded)
+		if err != nil {
+			slog.Error(err.Error())
+			os.Exit(1)
+		}
+		client.shutdown = cleanup
+	}
+
+	// Initialize the DcgmFields module
+	if val := dcgm.FieldsInit(); val < 0 {
+		slog.Error(fmt.Sprintf("Failed to initialize DCGM Fields module; err: %d", val))
+		os.Exit(1)
+	} else {
+		slog.Info("Initialized DCGM Fields module.")
+	}
+
+	return client
+}
+
+func (d dcgmProvider) AddEntityToGroup(
+	groupId dcgm.GroupHandle, entityGroupId dcgm.Field_Entity_Group,
+	entityId uint,
+) error {
+	return dcgm.AddEntityToGroup(groupId, entityGroupId, entityId)
+}
+
+func (d dcgmProvider) AddLinkEntityToGroup(groupId dcgm.GroupHandle, index uint, parentId uint) error {
+	return dcgm.AddLinkEntityToGroup(groupId, index, parentId)
+}
+
+func (d dcgmProvider) CreateFakeEntities(entities []dcgm.MigHierarchyInfo) ([]uint, error) {
+	return dcgm.CreateFakeEntities(entities)
+}
+
+func (d dcgmProvider) CreateGroup(groupName string) (dcgm.GroupHandle, error) {
+	return dcgm.CreateGroup(groupName)
+}
+
+func (d dcgmProvider) DestroyGroup(groupId dcgm.GroupHandle) error {
+	return dcgm.DestroyGroup(groupId)
+}
+
+func (d dcgmProvider) EntitiesGetLatestValues(
+	entities []dcgm.GroupEntityPair, fields []dcgm.Short, flags uint,
+) ([]dcgm.FieldValue_v2, error) {
+	return dcgm.EntitiesGetLatestValues(entities, fields, flags)
+}
+
+func (d dcgmProvider) EntityGetLatestValues(
+	entityGroup dcgm.Field_Entity_Group, entityId uint, fields []dcgm.Short,
+) ([]dcgm.FieldValue_v1,
+	error,
+) {
+	return dcgm.EntityGetLatestValues(entityGroup, entityId, fields)
+}
+
+func (d dcgmProvider) Fv2_String(fv dcgm.FieldValue_v2) string {
+	return dcgm.Fv2_String(fv)
+}
+
+func (d dcgmProvider) FieldGetById(fieldId dcgm.Short) dcgm.FieldMeta {
+	return dcgm.FieldGetById(fieldId)
+}
+
+func (d dcgmProvider) FieldGroupCreate(fieldsGroupName string, fields []dcgm.Short) (dcgm.FieldHandle, error) {
+	return dcgm.FieldGroupCreate(fieldsGroupName, fields)
+}
+
+func (d dcgmProvider) FieldGroupDestroy(fieldsGroup dcgm.FieldHandle) error {
+	return dcgm.FieldGroupDestroy(fieldsGroup)
+}
+
+func (d dcgmProvider) GetAllDeviceCount() (uint, error) {
+	return dcgm.GetAllDeviceCount()
+}
+
+func (d dcgmProvider) GetCpuHierarchy() (dcgm.CpuHierarchy_v1, error) {
+	return dcgm.GetCpuHierarchy()
+}
+
+func (d dcgmProvider) GetDeviceInfo(gpuId uint) (dcgm.Device, error) {
+	return dcgm.GetDeviceInfo(gpuId)
+}
+
+func (d dcgmProvider) GetEntityGroupEntities(entityGroup dcgm.Field_Entity_Group) ([]uint, error) {
+	return dcgm.GetEntityGroupEntities(entityGroup)
+}
+
+func (d dcgmProvider) GetGpuInstanceHierarchy() (dcgm.MigHierarchy_v2, error) {
+	return dcgm.GetGpuInstanceHierarchy()
+}
+
+func (d dcgmProvider) GetNvLinkLinkStatus() ([]dcgm.NvLinkStatus, error) {
+	return dcgm.GetNvLinkLinkStatus()
+}
+
+func (d dcgmProvider) GetSupportedDevices() ([]uint, error) {
+	return dcgm.GetSupportedDevices()
+}
+
+func (d dcgmProvider) GetSupportedMetricGroups(gpuId uint) ([]dcgm.MetricGroup, error) {
+	return dcgm.GetSupportedMetricGroups(gpuId)
+}
+
+func (d dcgmProvider) GetValuesSince(
+	gpuGroup dcgm.GroupHandle, fieldGroup dcgm.FieldHandle, sinceTime time.Time,
+) ([]dcgm.FieldValue_v2, time.Time, error) {
+	return dcgm.GetValuesSince(gpuGroup, fieldGroup, sinceTime)
+}
+
+func (d dcgmProvider) GroupAllGPUs() dcgm.GroupHandle {
+	return dcgm.GroupAllGPUs()
+}
+
+func (d dcgmProvider) InjectFieldValue(
+	gpu uint, fieldID uint, fieldType uint, status int, ts int64, value interface{},
+) error {
+	return dcgm.InjectFieldValue(gpu, fieldID, fieldType, status, ts, value)
+}
+
+func (d dcgmProvider) LinkGetLatestValues(index uint, parentId uint, fields []dcgm.Short) ([]dcgm.FieldValue_v1,
+	error,
+) {
+	return dcgm.LinkGetLatestValues(index, parentId, fields)
+}
+
+func (d dcgmProvider) NewDefaultGroup(groupName string) (dcgm.GroupHandle, error) {
+	return dcgm.NewDefaultGroup(groupName)
+}
+
+func (d dcgmProvider) UpdateAllFields() error {
+	return dcgm.UpdateAllFields()
+}
+
+func (d dcgmProvider) WatchFieldsWithGroupEx(
+	fieldsGroup dcgm.FieldHandle, group dcgm.GroupHandle, updateFreq int64, maxKeepAge float64,
+	maxKeepSamples int32,
+) error {
+	return dcgm.WatchFieldsWithGroupEx(fieldsGroup, group, updateFreq, maxKeepAge, maxKeepSamples)
+}
+
+// Cleanup performs cleanup operations for the DCGM provider, including terminating modules and shutting down DCGM.
+func (d dcgmProvider) Cleanup() {
+	// Terminates the DcgmFields module
+	slog.Info("Attempting to terminate DCGM Fields module.")
+	if val := dcgm.FieldsTerm(); val < 0 {
+		slog.Error(fmt.Sprintf("Failed to terminate DCGM Fields module; err: %d", val))
+	}
+
+	// Shuts down the DCGM instance.
+	slog.Info("Attempting to terminate DCGM.")
+	d.shutdown()
+
+	reset()
+}
+
+func (d dcgmProvider) HealthSet(groupID dcgm.GroupHandle, systems dcgm.HealthSystem) error {
+	return dcgm.HealthSet(groupID, systems)
+}
+
+func (d dcgmProvider) HealthGet(groupID dcgm.GroupHandle) (dcgm.HealthSystem, error) {
+	return dcgm.HealthGet(groupID)
+}
+
+func (d dcgmProvider) HealthCheck(groupID dcgm.GroupHandle) (dcgm.HealthResponse, error) {
+	return dcgm.HealthCheck(groupID)
+}
+
+func (d dcgmProvider) GetGroupInfo(groupID dcgm.GroupHandle) (*dcgm.GroupInfo, error) {
+	return dcgm.GetGroupInfo(groupID)
+}
diff --git a/internal/pkg/dcgmprovider/types.go b/internal/pkg/dcgmprovider/types.go
new file mode 100644
index 00000000..18dea167
--- /dev/null
+++ b/internal/pkg/dcgmprovider/types.go
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//go:generate go run -v go.uber.org/mock/mockgen  -destination=../../mocks/pkg/dcgmprovider/mock_client.go -package=dcgmprovider -copyright_file=../../../hack/header.txt . DCGM
+
+package dcgmprovider
+
+import (
+	"time"
+
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+)
+
+type DCGM interface {
+	AddEntityToGroup(dcgm.GroupHandle, dcgm.Field_Entity_Group, uint) error
+	AddLinkEntityToGroup(dcgm.GroupHandle, uint, uint) error
+	CreateFakeEntities(entities []dcgm.MigHierarchyInfo) ([]uint, error)
+	CreateGroup(string) (dcgm.GroupHandle, error)
+	DestroyGroup(groupId dcgm.GroupHandle) error
+	EntitiesGetLatestValues([]dcgm.GroupEntityPair, []dcgm.Short, uint) ([]dcgm.FieldValue_v2, error)
+	EntityGetLatestValues(dcgm.Field_Entity_Group, uint, []dcgm.Short) ([]dcgm.FieldValue_v1, error)
+	Fv2_String(fv dcgm.FieldValue_v2) string
+	FieldGetById(dcgm.Short) dcgm.FieldMeta
+	FieldGroupCreate(string, []dcgm.Short) (dcgm.FieldHandle, error)
+	FieldGroupDestroy(dcgm.FieldHandle) error
+	GetAllDeviceCount() (uint, error)
+	GetCpuHierarchy() (dcgm.CpuHierarchy_v1, error)
+	GetDeviceInfo(uint) (dcgm.Device, error)
+	GetEntityGroupEntities(entityGroup dcgm.Field_Entity_Group) ([]uint, error)
+	GetGpuInstanceHierarchy() (dcgm.MigHierarchy_v2, error)
+	GetNvLinkLinkStatus() ([]dcgm.NvLinkStatus, error)
+	GetSupportedDevices() ([]uint, error)
+	GetSupportedMetricGroups(uint) ([]dcgm.MetricGroup, error)
+	GetValuesSince(dcgm.GroupHandle, dcgm.FieldHandle, time.Time) ([]dcgm.FieldValue_v2, time.Time, error)
+	GroupAllGPUs() dcgm.GroupHandle
+	InjectFieldValue(gpu uint, fieldID uint, fieldType uint, status int, ts int64, value interface{}) error
+	LinkGetLatestValues(uint, uint, []dcgm.Short) ([]dcgm.FieldValue_v1, error)
+	NewDefaultGroup(string) (dcgm.GroupHandle, error)
+	UpdateAllFields() error
+	WatchFieldsWithGroupEx(dcgm.FieldHandle, dcgm.GroupHandle, int64, float64, int32) error
+	Cleanup()
+	HealthSet(groupID dcgm.GroupHandle, systems dcgm.HealthSystem) error
+	HealthGet(groupID dcgm.GroupHandle) (dcgm.HealthSystem, error)
+	HealthCheck(groupID dcgm.GroupHandle) (dcgm.HealthResponse, error)
+	GetGroupInfo(groupID dcgm.GroupHandle) (*dcgm.GroupInfo, error)
+}
diff --git a/internal/pkg/deviceinfo/device_info.go b/internal/pkg/deviceinfo/device_info.go
new file mode 100644
index 00000000..04e9d269
--- /dev/null
+++ b/internal/pkg/deviceinfo/device_info.go
@@ -0,0 +1,597 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package deviceinfo
+
+import (
+	"fmt"
+	"log/slog"
+	"slices"
+
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+	"github.com/bits-and-blooms/bitset"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/dcgmprovider"
+)
+
+const deviceInitMessage = "System entities of type %s initialized"
+
+type Info struct {
+	gpuCount uint
+	gpus     [dcgm.MAX_NUM_DEVICES]GPUInfo
+	switches []SwitchInfo
+	cpus     []CPUInfo
+	gOpt     appconfig.DeviceOptions
+	sOpt     appconfig.DeviceOptions
+	cOpt     appconfig.DeviceOptions
+	infoType dcgm.Field_Entity_Group
+}
+
+func (s *Info) GPUCount() uint {
+	return s.gpuCount
+}
+
+func (s *Info) GPUs() []GPUInfo {
+	return s.gpus[:]
+}
+
+func (s *Info) GPU(i uint) GPUInfo {
+	return s.gpus[i]
+}
+
+func (s *Info) Switches() []SwitchInfo {
+	return s.switches
+}
+
+func (s *Info) Switch(i uint) SwitchInfo {
+	return s.switches[i]
+}
+
+func (s *Info) CPUs() []CPUInfo {
+	return s.cpus
+}
+
+func (s *Info) CPU(i uint) CPUInfo {
+	return s.cpus[i]
+}
+
+func (s *Info) GOpts() appconfig.DeviceOptions {
+	return s.gOpt
+}
+
+func (s *Info) SOpts() appconfig.DeviceOptions {
+	return s.sOpt
+}
+
+func (s *Info) COpts() appconfig.DeviceOptions {
+	return s.cOpt
+}
+
+func (s *Info) InfoType() dcgm.Field_Entity_Group {
+	return s.infoType
+}
+
+func Initialize(
+	gOpt appconfig.DeviceOptions, sOpt appconfig.DeviceOptions, cOpt appconfig.DeviceOptions, useFakeGPUs bool,
+	entityType dcgm.Field_Entity_Group,
+) (*Info, error) {
+	deviceInfo := &Info{}
+	var err error
+
+	slog.Info(fmt.Sprintf("Initializing system entities of type '%s'", entityType.String()))
+	switch entityType {
+	case dcgm.FE_LINK:
+		deviceInfo.infoType = dcgm.FE_LINK
+		err = deviceInfo.initializeNvSwitchInfo(sOpt)
+	case dcgm.FE_SWITCH:
+		deviceInfo.infoType = dcgm.FE_SWITCH
+		err = deviceInfo.initializeNvSwitchInfo(sOpt)
+	case dcgm.FE_GPU:
+		deviceInfo.infoType = dcgm.FE_GPU
+		err = deviceInfo.initializeGPUInfo(gOpt, useFakeGPUs)
+	case dcgm.FE_CPU:
+		deviceInfo.infoType = dcgm.FE_CPU
+		err = deviceInfo.initializeCPUInfo(cOpt)
+	case dcgm.FE_CPU_CORE:
+		deviceInfo.infoType = dcgm.FE_CPU_CORE
+		err = deviceInfo.initializeCPUInfo(cOpt)
+	default:
+		err = fmt.Errorf("invalid entity type '%d'", entityType)
+	}
+
+	return deviceInfo, err
+}
+
+func (s *Info) initializeGPUInfo(gOpt appconfig.DeviceOptions, useFakeGPUs bool) error {
+	gpuCount, err := dcgmprovider.Client().GetAllDeviceCount()
+	if err != nil {
+		return err
+	}
+	s.gpuCount = gpuCount
+
+	for i := uint(0); i < s.gpuCount; i++ {
+		// TODO (roarora): Use of array to store GPUs makes it harder to ignore GPUs (including GPU Instances) which
+		//                 should be filtered out based on `Major` attribute in Device Options. Fix it!
+
+		// Default mig enabled to false
+		s.gpus[i].MigEnabled = false
+		s.gpus[i].DeviceInfo, err = dcgmprovider.Client().GetDeviceInfo(i)
+		if err != nil {
+			if useFakeGPUs {
+				s.gpus[i].DeviceInfo.GPU = i
+				s.gpus[i].DeviceInfo.UUID = fmt.Sprintf("fake%d", i)
+			} else {
+				return err
+			}
+		}
+	}
+
+	hierarchy, err := dcgmprovider.Client().GetGpuInstanceHierarchy()
+	if err != nil {
+		return err
+	}
+
+	if hierarchy.Count > 0 {
+		var entities []dcgm.GroupEntityPair
+
+		gpuID := uint(0)
+		instanceIndex := 0
+		for i := uint(0); i < hierarchy.Count; i++ {
+			entityID := hierarchy.EntityList[i].Entity.EntityId
+
+			if hierarchy.EntityList[i].Parent.EntityGroupId == dcgm.FE_GPU {
+
+				// We are adding a GPU instance
+				gpuID = hierarchy.EntityList[i].Parent.EntityId
+
+				instanceInfo := GPUInstanceInfo{
+					Info:        hierarchy.EntityList[i].Info,
+					ProfileName: "",
+					EntityId:    entityID,
+				}
+				s.gpus[gpuID].MigEnabled = true
+				s.gpus[gpuID].GPUInstances = append(s.gpus[gpuID].GPUInstances, instanceInfo)
+				entities = append(entities, dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU_I, EntityId: entityID})
+				instanceIndex = len(s.gpus[gpuID].GPUInstances) - 1
+			} else if hierarchy.EntityList[i].Parent.EntityGroupId == dcgm.FE_GPU_I {
+				// TODO (roarora): Fix this implementation as it expects Instances and Compute Instances to be reported
+				//                 in a certain sequence if, that is not the case results are incorrect.
+
+				// Add the compute instance, gpuId is recorded previously
+				ciInfo := ComputeInstanceInfo{hierarchy.EntityList[i].Info, "", entityID}
+				s.gpus[gpuID].GPUInstances[instanceIndex].ComputeInstances = append(s.gpus[gpuID].GPUInstances[instanceIndex].ComputeInstances,
+					ciInfo)
+			}
+		}
+
+		err = s.populateMigProfileNames(entities)
+		if err != nil {
+			return err
+		}
+	}
+
+	s.gOpt = gOpt
+	err = s.verifyDevicePresence()
+	if err == nil {
+		slog.Debug(fmt.Sprintf(deviceInitMessage, s.infoType))
+	}
+	return err
+}
+
+func (s *Info) initializeCPUInfo(cOpt appconfig.DeviceOptions) error {
+	hierarchy, err := dcgmprovider.Client().GetCpuHierarchy()
+	if err != nil {
+		return err
+	}
+
+	if hierarchy.NumCpus <= 0 {
+		return fmt.Errorf("no cpus to monitor")
+	}
+
+	for i := 0; i < int(hierarchy.NumCpus); i++ {
+		// monitor only the CPUs as per the device options input
+		if cOpt.Flex || s.shouldMonitor(cOpt.MajorRange, hierarchy.Cpus[i].CpuId) {
+			cores := getCoreArray(hierarchy.Cpus[i].OwnedCores)
+
+			monitoredCores := make([]uint, 0)
+			for _, core := range cores {
+				// monitor only the CPU cores as per the device options input
+				if cOpt.Flex || s.shouldMonitor(cOpt.MinorRange, core) {
+					monitoredCores = append(monitoredCores, core)
+				}
+			}
+
+			cpu := CPUInfo{
+				hierarchy.Cpus[i].CpuId,
+				monitoredCores,
+			}
+
+			s.cpus = append(s.cpus, cpu)
+		}
+	}
+
+	s.cOpt = cOpt
+
+	// ensures all the CPUs and Cores to monitor have been discovered
+	err = s.verifyCPUDevicePresence()
+	if err != nil {
+		return err
+	}
+
+	// Ensure correct CPUs and Cores are monitored
+	slog.Debug(fmt.Sprintf(deviceInitMessage, s.infoType))
+	return nil
+}
+
+func (s *Info) initializeNvSwitchInfo(sOpt appconfig.DeviceOptions) error {
+	switches, err := dcgmprovider.Client().GetEntityGroupEntities(dcgm.FE_SWITCH)
+	if err != nil {
+		return err
+	}
+
+	if len(switches) <= 0 {
+		return fmt.Errorf("no switches to monitor")
+	}
+
+	links, err := dcgmprovider.Client().GetNvLinkLinkStatus()
+	if err != nil {
+		return err
+	}
+
+	for i := 0; i < len(switches); i++ {
+		// monitor only the Switches as per the device options input
+		if sOpt.Flex || s.shouldMonitor(sOpt.MajorRange, switches[i]) {
+
+			var matchingLinks []dcgm.NvLinkStatus
+			for _, link := range links {
+				// monitor only the NV Link as per the device options input
+				if sOpt.Flex || s.shouldMonitor(sOpt.MinorRange, link.Index) {
+					if link.ParentType == dcgm.FE_SWITCH && link.ParentId == switches[i] {
+						matchingLinks = append(matchingLinks, link)
+					}
+				}
+			}
+
+			sw := SwitchInfo{
+				switches[i],
+				matchingLinks,
+			}
+
+			s.switches = append(s.switches, sw)
+		}
+	}
+
+	s.sOpt = sOpt
+	err = s.verifySwitchDevicePresence()
+	if err == nil {
+		slog.Debug(fmt.Sprintf(deviceInitMessage, s.infoType))
+	}
+
+	return err
+}
+
+func (s *Info) setGPUInstanceProfileName(entityId uint, profileName string) bool {
+	for i := uint(0); i < s.gpuCount; i++ {
+		for j := range s.gpus[i].GPUInstances {
+			if s.gpus[i].GPUInstances[j].EntityId == entityId {
+				s.gpus[i].GPUInstances[j].ProfileName = profileName
+				return true
+			}
+		}
+	}
+
+	return false
+}
+
+func (s *Info) setMigProfileNames(values []dcgm.FieldValue_v2) error {
+	var err error
+	var errFound bool
+	errStr := "cannot find match for entities:"
+
+	for _, v := range values {
+		if !s.setGPUInstanceProfileName(v.EntityId, dcgmprovider.Client().Fv2_String(v)) {
+			errStr = fmt.Sprintf("%s group %d, id %d", errStr, v.EntityGroupId, v.EntityId)
+			errFound = true
+		}
+	}
+
+	if errFound {
+		err = fmt.Errorf("%s", errStr)
+	}
+
+	return err
+}
+
+func (s *Info) populateMigProfileNames(entities []dcgm.GroupEntityPair) error {
+	if len(entities) == 0 {
+		// There are no entities to populate
+		return nil
+	}
+
+	var fields []dcgm.Short
+	fields = append(fields, dcgm.DCGM_FI_DEV_NAME)
+	flags := dcgm.DCGM_FV_FLAG_LIVE_DATA
+	values, err := dcgmprovider.Client().EntitiesGetLatestValues(entities, fields, flags)
+	if err != nil {
+		return err
+	}
+
+	return s.setMigProfileNames(values)
+}
+
+func (s *Info) gpuIDExists(gpuId int) bool {
+	for i := uint(0); i < s.gpuCount; i++ {
+		if s.gpus[i].DeviceInfo.GPU == uint(gpuId) {
+			return true
+		}
+	}
+	return false
+}
+
+func (s *Info) gpuInstanceIDExists(gpuInstanceId int) bool {
+	for i := uint(0); i < s.gpuCount; i++ {
+		for _, instance := range s.gpus[i].GPUInstances {
+			if instance.EntityId == uint(gpuInstanceId) {
+				return true
+			}
+		}
+	}
+	return false
+}
+
+func (s *Info) cpuIDExists(cpuId int) bool {
+	for _, cpu := range s.cpus {
+		if cpu.EntityId == uint(cpuId) {
+			return true
+		}
+	}
+	return false
+}
+
+func (s *Info) cpuCoreIDExists(coreId int) bool {
+	for _, cpu := range s.cpus {
+		for _, core := range cpu.Cores {
+			if core == uint(coreId) {
+				return true
+			}
+		}
+	}
+	return false
+}
+
+func (s *Info) switchIDExists(switchId int) bool {
+	for _, sw := range s.switches {
+		if sw.EntityId == uint(switchId) {
+			return true
+		}
+	}
+	return false
+}
+
+func (s *Info) linkIDExists(linkId int) bool {
+	for _, sw := range s.switches {
+		for _, link := range sw.NvLinks {
+			if link.Index == uint(linkId) {
+				return true
+			}
+		}
+	}
+	return false
+}
+
+func (s *Info) verifyDevicePresence() error {
+	if s.gOpt.Flex {
+		return nil
+	}
+
+	if len(s.gOpt.MajorRange) > 0 && s.gOpt.MajorRange[0] != -1 {
+		// Verify we can find all the specified gpus
+		for _, gpuID := range s.gOpt.MajorRange {
+			if !s.gpuIDExists(gpuID) {
+				return fmt.Errorf("couldn't find requested GPU ID '%d'", gpuID)
+			}
+		}
+	}
+
+	if len(s.gOpt.MinorRange) > 0 && s.gOpt.MinorRange[0] != -1 {
+		for _, gpuInstanceID := range s.gOpt.MinorRange {
+			if !s.gpuInstanceIDExists(gpuInstanceID) {
+				return fmt.Errorf("couldn't find requested GPU instance ID '%d'", gpuInstanceID)
+			}
+		}
+	}
+
+	return nil
+}
+
+func (s *Info) verifyCPUDevicePresence() error {
+	if s.cOpt.Flex {
+		return nil
+	}
+
+	if len(s.cOpt.MajorRange) > 0 && s.cOpt.MajorRange[0] != -1 {
+		// Verify we can find all the specified CPUs
+		for _, cpuID := range s.cOpt.MajorRange {
+			if !s.cpuIDExists(cpuID) {
+				return fmt.Errorf("couldn't find requested CPU ID '%d'", cpuID)
+			}
+		}
+	}
+
+	if len(s.cOpt.MinorRange) > 0 && s.cOpt.MinorRange[0] != -1 {
+		for _, coreID := range s.cOpt.MinorRange {
+			if !s.cpuCoreIDExists(coreID) {
+				return fmt.Errorf("couldn't find requested CPU core '%d'", coreID)
+			}
+		}
+	}
+
+	return nil
+}
+
+func (s *Info) shouldMonitor(monitoringRange []int, val uint) bool {
+	if len(monitoringRange) > 0 {
+		if monitoringRange[0] == -1 {
+			return true
+		} else {
+			return slices.Contains(monitoringRange, int(val))
+		}
+	}
+
+	return false
+}
+
+func (s *Info) verifySwitchDevicePresence() error {
+	if s.sOpt.Flex {
+		return nil
+	}
+
+	if len(s.sOpt.MajorRange) > 0 && s.sOpt.MajorRange[0] != -1 {
+		// Verify we can find all the specified switches
+		for _, swID := range s.sOpt.MajorRange {
+			if !s.switchIDExists(swID) {
+				return fmt.Errorf("couldn't find requested NvSwitch ID '%d'", swID)
+			}
+		}
+	}
+
+	if len(s.sOpt.MinorRange) > 0 && s.sOpt.MinorRange[0] != -1 {
+		for _, linkID := range s.sOpt.MinorRange {
+			if !s.linkIDExists(linkID) {
+				return fmt.Errorf("couldn't find requested NvLink '%d'", linkID)
+			}
+		}
+	}
+
+	return nil
+}
+
+func (s *Info) IsCPUWatched(cpuID uint) bool {
+	if !slices.ContainsFunc(s.cpus, func(cpu CPUInfo) bool {
+		return cpu.EntityId == cpuID
+	}) {
+		return false
+	}
+
+	if s.cOpt.Flex {
+		return true
+	}
+
+	if len(s.cOpt.MajorRange) > 0 && s.cOpt.MajorRange[0] == -1 {
+		return true
+	}
+
+	return slices.ContainsFunc(s.cOpt.MajorRange, func(cpu int) bool {
+		return uint(cpu) == cpuID
+	})
+}
+
+func (s *Info) IsCoreWatched(coreID uint, cpuID uint) bool {
+	if s.cOpt.Flex {
+		return true
+	}
+
+	// Find a CPU
+	cpuIdx := slices.IndexFunc(s.cpus, func(cpu CPUInfo) bool {
+		return s.IsCPUWatched(cpu.EntityId) && cpu.EntityId == cpuID
+	})
+
+	if cpuIdx > -1 {
+		if len(s.cOpt.MinorRange) > 0 && s.cOpt.MinorRange[0] == -1 {
+			return true
+		}
+
+		return slices.Contains(s.cOpt.MinorRange, int(coreID))
+	}
+
+	return false
+}
+
+func (s *Info) IsSwitchWatched(switchID uint) bool {
+	if s.sOpt.Flex {
+		return true
+	}
+
+	// When MajorRange contains -1 value, we do monitorig of all switches
+	if len(s.sOpt.MajorRange) > 0 && s.sOpt.MajorRange[0] == -1 {
+		return true
+	}
+
+	return slices.Contains(s.sOpt.MajorRange, int(switchID))
+}
+
+func (s *Info) IsLinkWatched(linkIndex uint, switchID uint) bool {
+	if s.sOpt.Flex {
+		return true
+	}
+
+	// Find a switch
+	switchIdx := slices.IndexFunc(s.switches, func(si SwitchInfo) bool {
+		return si.EntityId == switchID && s.IsSwitchWatched(si.EntityId)
+	})
+
+	if switchIdx > -1 {
+		// Switch exists and is watched
+		sw := s.switches[switchIdx]
+
+		if len(s.sOpt.MinorRange) > 0 && s.sOpt.MinorRange[0] == -1 {
+			return true
+		}
+
+		// The Link exists
+		if slices.ContainsFunc(sw.NvLinks, func(nls dcgm.NvLinkStatus) bool {
+			return nls.Index == linkIndex
+		}) {
+			// and the link index in the Minor range
+			return slices.Contains(s.sOpt.MinorRange, int(linkIndex))
+		}
+	}
+
+	return false
+}
+
+func getCoreArray(bitmask []uint64) []uint {
+	var cores []uint
+	bits := make([]uint64, dcgm.MAX_CPU_CORE_BITMASK_COUNT)
+
+	for i := 0; i < len(bitmask); i++ {
+		bits[i] = bitmask[i]
+	}
+
+	b := bitset.From(bits)
+
+	for i := uint(0); i < dcgm.MAX_NUM_CPU_CORES; i++ {
+		if b.Test(i) {
+			cores = append(cores, i)
+		}
+	}
+
+	return cores
+}
+
+// Helper Functions
+
+func GetGPUInstanceIdentifier(deviceInfo Provider, gpuuuid string, gpuInstanceID uint) string {
+	for i := uint(0); i < deviceInfo.GPUCount(); i++ {
+		if deviceInfo.GPU(i).DeviceInfo.UUID == gpuuuid {
+			identifier := fmt.Sprintf("%d-%d", deviceInfo.GPU(i).DeviceInfo.GPU, gpuInstanceID)
+			return identifier
+		}
+	}
+
+	return ""
+}
diff --git a/internal/pkg/deviceinfo/device_info_test.go b/internal/pkg/deviceinfo/device_info_test.go
new file mode 100644
index 00000000..f947b671
--- /dev/null
+++ b/internal/pkg/deviceinfo/device_info_test.go
@@ -0,0 +1,2749 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package deviceinfo
+
+import (
+	"fmt"
+	"slices"
+	"testing"
+
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"go.uber.org/mock/gomock"
+
+	mockdcgm "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/dcgmprovider"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/dcgmprovider"
+)
+
+var fakeProfileName = "2fake.4gb"
+
+func SpoofGPUDeviceInfo() Info {
+	var deviceInfo Info
+	deviceInfo.gpuCount = 2
+	deviceInfo.gpus[0].DeviceInfo.GPU = 0
+	gi := GPUInstanceInfo{
+		Info:        dcgm.MigEntityInfo{GpuUuid: "fake", NvmlProfileSlices: 3},
+		ProfileName: fakeProfileName,
+		EntityId:    0,
+	}
+	deviceInfo.gpus[0].GPUInstances = append(deviceInfo.gpus[0].GPUInstances, gi)
+	gi2 := GPUInstanceInfo{
+		Info:        dcgm.MigEntityInfo{GpuUuid: "fake", NvmlInstanceId: 1, NvmlProfileSlices: 3},
+		ProfileName: fakeProfileName,
+		EntityId:    14,
+	}
+	deviceInfo.gpus[1].GPUInstances = append(deviceInfo.gpus[1].GPUInstances, gi2)
+	deviceInfo.gpus[1].DeviceInfo.GPU = 1
+
+	return deviceInfo
+}
+
+func TestGetters(t *testing.T) {
+	fakeDevices := SpoofGPUDevices()
+	fakeDeviceInfo := [dcgm.MAX_NUM_DEVICES]GPUInfo{}
+	fakeDeviceInfo[0] = GPUInfo{
+		DeviceInfo: fakeDevices[0],
+		MigEnabled: false,
+	}
+	fakeDeviceInfo[1] = GPUInfo{
+		DeviceInfo: fakeDevices[1],
+		MigEnabled: true,
+	}
+
+	fakeSwitches := []SwitchInfo{
+		{
+			EntityId: 0,
+			NvLinks:  nil,
+		},
+		{
+			EntityId: 1,
+			NvLinks:  nil,
+		},
+	}
+
+	fakeCPUs := []CPUInfo{
+		{
+			EntityId: 0,
+			Cores:    nil,
+		},
+		{
+			EntityId: 1,
+			Cores:    nil,
+		},
+	}
+
+	fakeGOpts := appconfig.DeviceOptions{
+		Flex: true,
+	}
+
+	fakeSOpts := appconfig.DeviceOptions{
+		Flex:       false,
+		MajorRange: []int{-1},
+		MinorRange: []int{1, 2, 3},
+	}
+
+	fakeCOpts := appconfig.DeviceOptions{
+		Flex:       false,
+		MajorRange: []int{0, 1},
+		MinorRange: []int{1, 2, 3},
+	}
+
+	fakeInfoType := dcgm.FE_GPU
+
+	deviceInfo := Info{
+		gpuCount: uint(len(fakeDevices)),
+		gpus:     fakeDeviceInfo,
+		switches: fakeSwitches,
+		cpus:     fakeCPUs,
+		gOpt:     fakeGOpts,
+		sOpt:     fakeSOpts,
+		cOpt:     fakeCOpts,
+		infoType: fakeInfoType,
+	}
+
+	assert.Equal(t, uint(len(fakeDevices)), deviceInfo.GPUCount(), "GPU count mismatch")
+	assert.Equal(t, fakeDeviceInfo[:], deviceInfo.GPUs(), "GPUs mismatch")
+	assert.Equal(t, fakeDeviceInfo[0], deviceInfo.GPU(uint(0)), "GPU mismatch")
+	assert.Equal(t, fakeSwitches, deviceInfo.Switches(), "Switches mismatch")
+	assert.Equal(t, fakeSwitches[1], deviceInfo.Switch(uint(1)), "Switch mismatch")
+	assert.Equal(t, fakeCPUs, deviceInfo.CPUs(), "CPUs mismatch")
+	assert.Equal(t, fakeCPUs[1], deviceInfo.CPU(uint(1)), "CPU mismatch")
+	assert.Equal(t, fakeGOpts, deviceInfo.GOpts(), "GPUs options mismatch")
+	assert.Equal(t, fakeSOpts, deviceInfo.SOpts(), "Switches options mismatch")
+	assert.Equal(t, fakeCOpts, deviceInfo.COpts(), "CPUs options mismatch")
+	assert.Equal(t, fakeInfoType, deviceInfo.InfoType(), "InfoType mismatch")
+}
+
+func TestInitialize(t *testing.T) {
+	ctrl := gomock.NewController(t)
+	mockDCGMProvider := mockdcgm.NewMockDCGM(ctrl)
+
+	realDCGM := dcgmprovider.Client()
+	defer func() {
+		dcgmprovider.SetClient(realDCGM)
+	}()
+	dcgmprovider.SetClient(mockDCGMProvider)
+
+	fakeDevices := SpoofGPUDevices()
+	_, fakeGPUs, _, _ := SpoofMigHierarchy()
+
+	tests := []struct {
+		name           string
+		gOpts          appconfig.DeviceOptions
+		sOpts          appconfig.DeviceOptions
+		cOpts          appconfig.DeviceOptions
+		entityType     dcgm.Field_Entity_Group
+		mockCalls      func()
+		expectedOutput func() *Info
+		assertions     func(*Info, *Info)
+		wantErr        bool
+	}{
+		{
+			name:       "Initialize GPUs",
+			gOpts:      appconfig.DeviceOptions{Flex: true},
+			entityType: dcgm.FE_GPU,
+			mockCalls: func() {
+				mockHierarchy := dcgm.MigHierarchy_v2{
+					Count: 1,
+				}
+				mockHierarchy.EntityList[0] = fakeGPUs[0]
+
+				mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(1), nil)
+				mockDCGMProvider.EXPECT().GetDeviceInfo(gomock.Any()).Return(fakeDevices[0], nil)
+				mockDCGMProvider.EXPECT().GetGpuInstanceHierarchy().Return(mockHierarchy, nil)
+			},
+			expectedOutput: func() *Info {
+				return &Info{
+					gpuCount: 0,
+					gpus: [dcgm.MAX_NUM_DEVICES]GPUInfo{
+						{
+							DeviceInfo: fakeDevices[0],
+						},
+					},
+					switches: nil,
+					cpus:     nil,
+					gOpt:     appconfig.DeviceOptions{Flex: true},
+					sOpt:     appconfig.DeviceOptions{},
+					cOpt:     appconfig.DeviceOptions{},
+					infoType: dcgm.FE_GPU,
+				}
+			},
+			assertions: func(expected, actual *Info) {
+				assert.Equal(t, expected.gpus[0].DeviceInfo, actual.gpus[0].DeviceInfo,
+					"GPU device info mismatch")
+
+				assert.Equal(t, expected.gpus[0].MigEnabled, actual.gpus[0].MigEnabled,
+					"MIG info mismatch")
+
+				assert.Equal(t, len(expected.gpus[0].GPUInstances), len(actual.gpus[0].GPUInstances),
+					"GPU Instances length mismatch")
+
+				assert.Equal(t, expected.gOpt, actual.gOpt, "GPU options mismatch")
+
+				assert.Equal(t, expected.infoType, actual.infoType, "GPU info type mismatch")
+			},
+			wantErr: false,
+		},
+		{
+			name:       "Initialize GPUs error",
+			gOpts:      appconfig.DeviceOptions{Flex: true},
+			entityType: dcgm.FE_GPU,
+			mockCalls: func() {
+				mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(0), fmt.Errorf("some error"))
+			},
+			wantErr: true,
+		},
+		{
+			name:       "Initialize Switches",
+			sOpts:      appconfig.DeviceOptions{Flex: true},
+			entityType: dcgm.FE_SWITCH,
+			mockCalls: func() {
+				mockDCGMProvider.EXPECT().GetEntityGroupEntities(gomock.Any()).Return([]uint{1}, nil)
+				mockDCGMProvider.EXPECT().GetNvLinkLinkStatus().Return([]dcgm.NvLinkStatus{
+					{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(1)},
+				}, nil)
+			},
+			expectedOutput: func() *Info {
+				return &Info{
+					gpuCount: 0,
+					gpus:     [dcgm.MAX_NUM_DEVICES]GPUInfo{},
+					switches: []SwitchInfo{
+						{
+							EntityId: uint(1),
+							NvLinks: []dcgm.NvLinkStatus{
+								{
+									ParentId:   uint(1),
+									ParentType: dcgm.FE_SWITCH,
+									Index:      uint(1),
+								},
+							},
+						},
+					},
+					cpus:     nil,
+					gOpt:     appconfig.DeviceOptions{},
+					sOpt:     appconfig.DeviceOptions{Flex: true},
+					cOpt:     appconfig.DeviceOptions{},
+					infoType: dcgm.FE_SWITCH,
+				}
+			},
+			assertions: func(expected, actual *Info) {
+				assert.Equal(t, len(expected.switches), len(actual.switches),
+					"Switches length mismatch")
+
+				assert.Equal(t, expected.switches[0].EntityId, actual.switches[0].EntityId,
+					"Switch Entity ID mismatch")
+
+				assert.Equal(t, len(expected.switches[0].NvLinks), len(actual.switches[0].NvLinks),
+					"Switches NV link length mismatch")
+
+				assert.Equal(t, expected.switches[0].NvLinks[0].Index, actual.switches[0].NvLinks[0].Index,
+					"Switches NV link Index mismatch")
+
+				assert.Equal(t, expected.sOpt, actual.sOpt, "Switch options mismatch")
+
+				assert.Equal(t, expected.infoType, actual.infoType, "Switch info type mismatch")
+			},
+			wantErr: false,
+		},
+		{
+			name:       "Initialize Switches error",
+			sOpts:      appconfig.DeviceOptions{Flex: true},
+			entityType: dcgm.FE_SWITCH,
+			mockCalls: func() {
+				mockDCGMProvider.EXPECT().GetEntityGroupEntities(dcgm.FE_SWITCH).Return([]uint{uint(0)},
+					fmt.Errorf("some error"))
+			},
+			wantErr: true,
+		},
+		{
+			name:       "Initialize NV Links",
+			sOpts:      appconfig.DeviceOptions{Flex: true},
+			entityType: dcgm.FE_LINK,
+			mockCalls: func() {
+				mockDCGMProvider.EXPECT().GetEntityGroupEntities(gomock.Any()).Return([]uint{1}, nil)
+				mockDCGMProvider.EXPECT().GetNvLinkLinkStatus().Return([]dcgm.NvLinkStatus{
+					{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(1)},
+				}, nil)
+			},
+			expectedOutput: func() *Info {
+				return &Info{
+					gpuCount: 0,
+					gpus:     [dcgm.MAX_NUM_DEVICES]GPUInfo{},
+					switches: []SwitchInfo{
+						{
+							EntityId: uint(1),
+							NvLinks: []dcgm.NvLinkStatus{
+								{
+									ParentId:   uint(1),
+									ParentType: dcgm.FE_SWITCH,
+									Index:      uint(1),
+								},
+							},
+						},
+					},
+					cpus:     nil,
+					gOpt:     appconfig.DeviceOptions{},
+					sOpt:     appconfig.DeviceOptions{Flex: true},
+					cOpt:     appconfig.DeviceOptions{},
+					infoType: dcgm.FE_LINK,
+				}
+			},
+			assertions: func(expected, actual *Info) {
+				assert.Equal(t, len(expected.switches), len(actual.switches),
+					"Switches length mismatch")
+
+				assert.Equal(t, expected.switches[0].EntityId, actual.switches[0].EntityId,
+					"Switch Entity ID mismatch")
+
+				assert.Equal(t, len(expected.switches[0].NvLinks), len(actual.switches[0].NvLinks),
+					"Switches NV link length mismatch")
+
+				assert.Equal(t, expected.switches[0].NvLinks[0].Index, actual.switches[0].NvLinks[0].Index,
+					"Switches NV link Index mismatch")
+
+				assert.Equal(t, expected.sOpt, actual.sOpt, "NV Link options mismatch")
+
+				assert.Equal(t, expected.infoType, actual.infoType, "NV Link info type mismatch")
+			},
+			wantErr: false,
+		},
+		{
+			name:       "Initialize NV Link error",
+			sOpts:      appconfig.DeviceOptions{Flex: true},
+			entityType: dcgm.FE_LINK,
+			mockCalls: func() {
+				mockDCGMProvider.EXPECT().GetEntityGroupEntities(dcgm.FE_SWITCH).Return([]uint{uint(0)},
+					fmt.Errorf("some error"))
+			},
+			wantErr: true,
+		},
+		{
+			name:       "initialize CPUs",
+			cOpts:      appconfig.DeviceOptions{Flex: true},
+			entityType: dcgm.FE_CPU,
+			mockCalls: func() {
+				mockCPUHierarchy := dcgm.CpuHierarchy_v1{
+					NumCpus: 1,
+					Cpus: [dcgm.MAX_NUM_CPUS]dcgm.CpuHierarchyCpu_v1{
+						{
+							CpuId:      0,
+							OwnedCores: []uint64{1, 2, 8},
+						},
+					},
+				}
+				mockDCGMProvider.EXPECT().GetCpuHierarchy().Return(mockCPUHierarchy, nil)
+			},
+			expectedOutput: func() *Info {
+				return &Info{
+					gpuCount: 0,
+					gpus:     [dcgm.MAX_NUM_DEVICES]GPUInfo{},
+					switches: nil,
+					cpus: []CPUInfo{
+						{
+							EntityId: uint(1),
+							Cores:    []uint{0, 65, 131},
+						},
+					},
+					gOpt:     appconfig.DeviceOptions{},
+					sOpt:     appconfig.DeviceOptions{},
+					cOpt:     appconfig.DeviceOptions{Flex: true},
+					infoType: dcgm.FE_CPU,
+				}
+			},
+			assertions: func(expected, actual *Info) {
+				assert.Equal(t, len(expected.cpus), len(actual.cpus),
+					"CPU length mismatch")
+
+				assert.Equal(t, expected.cpus[0].EntityId, expected.cpus[0].EntityId,
+					"CPU Entity ID mismatch")
+
+				assert.Equal(t, len(expected.cpus[0].Cores), len(actual.cpus[0].Cores),
+					"CPU Core length mismatch")
+
+				assert.True(t, slices.Equal(expected.cpus[0].Cores, actual.cpus[0].Cores),
+					"CPU Cores mismatch")
+
+				assert.Equal(t, expected.cOpt, actual.cOpt, "CPU options mismatch")
+
+				assert.Equal(t, expected.infoType, actual.infoType, "CPU info type mismatch")
+			},
+			wantErr: false,
+		},
+		{
+			name:       "Initialize CPU error",
+			cOpts:      appconfig.DeviceOptions{Flex: true},
+			entityType: dcgm.FE_CPU,
+			mockCalls: func() {
+				mockDCGMProvider.EXPECT().GetCpuHierarchy().Return(dcgm.CpuHierarchy_v1{}, fmt.Errorf("some error"))
+			},
+			wantErr: true,
+		},
+		{
+			name:       "Initialize CPU Cores",
+			cOpts:      appconfig.DeviceOptions{Flex: true},
+			entityType: dcgm.FE_CPU_CORE,
+			mockCalls: func() {
+				mockCPUHierarchy := dcgm.CpuHierarchy_v1{
+					NumCpus: 1,
+					Cpus: [dcgm.MAX_NUM_CPUS]dcgm.CpuHierarchyCpu_v1{
+						{
+							CpuId:      0,
+							OwnedCores: []uint64{1, 2, 8},
+						},
+					},
+				}
+				mockDCGMProvider.EXPECT().GetCpuHierarchy().Return(mockCPUHierarchy, nil)
+			},
+			expectedOutput: func() *Info {
+				return &Info{
+					gpuCount: 0,
+					gpus:     [dcgm.MAX_NUM_DEVICES]GPUInfo{},
+					switches: nil,
+					cpus: []CPUInfo{
+						{
+							EntityId: uint(1),
+							Cores:    []uint{0, 65, 131},
+						},
+					},
+					gOpt:     appconfig.DeviceOptions{},
+					sOpt:     appconfig.DeviceOptions{},
+					cOpt:     appconfig.DeviceOptions{Flex: true},
+					infoType: dcgm.FE_CPU_CORE,
+				}
+			},
+			assertions: func(expected, actual *Info) {
+				assert.Equal(t, len(expected.cpus), len(actual.cpus),
+					"CPU length mismatch")
+
+				assert.Equal(t, expected.cpus[0].EntityId, expected.cpus[0].EntityId,
+					"CPU Entity ID mismatch")
+
+				assert.Equal(t, len(expected.cpus[0].Cores), len(actual.cpus[0].Cores),
+					"CPU Core length mismatch")
+
+				assert.True(t, slices.Equal(expected.cpus[0].Cores, actual.cpus[0].Cores),
+					"CPU Cores mismatch")
+
+				assert.Equal(t, expected.cOpt, actual.cOpt, "CPU options mismatch")
+
+				assert.Equal(t, expected.infoType, actual.infoType, "CPU info type mismatch")
+			},
+			wantErr: false,
+		},
+		{
+			name:       "Initialize CPU Cores error",
+			cOpts:      appconfig.DeviceOptions{Flex: true},
+			entityType: dcgm.FE_CPU_CORE,
+			mockCalls: func() {
+				mockDCGMProvider.EXPECT().GetCpuHierarchy().Return(dcgm.CpuHierarchy_v1{}, fmt.Errorf("some error"))
+			},
+			wantErr: true,
+		},
+		{
+			name:       "Initialize Invalid type error",
+			cOpts:      appconfig.DeviceOptions{Flex: true},
+			entityType: dcgm.FE_VGPU,
+			mockCalls:  func() {},
+			wantErr:    true,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			tt.mockCalls()
+
+			if !tt.wantErr {
+				deviceInfo, err := Initialize(tt.gOpts, tt.sOpts, tt.cOpts, false, tt.entityType)
+				assert.NoError(t, err, "Error not expected")
+				assert.NotNil(t, deviceInfo, "Expected output to be not nil")
+
+				expectedDeviceInfo := tt.expectedOutput()
+				tt.assertions(expectedDeviceInfo, deviceInfo)
+			} else {
+				_, err := Initialize(tt.gOpts, tt.sOpts, tt.cOpts, false, tt.entityType)
+				assert.Error(t, err, "Error expected")
+			}
+		})
+	}
+}
+
+func TestInitializeGPUInfo(t *testing.T) {
+	ctrl := gomock.NewController(t)
+	mockDCGMProvider := mockdcgm.NewMockDCGM(ctrl)
+
+	realDCGM := dcgmprovider.Client()
+	defer func() {
+		dcgmprovider.SetClient(realDCGM)
+	}()
+	dcgmprovider.SetClient(mockDCGMProvider)
+
+	fakeDevices := SpoofGPUDevices()
+	fakeMigHierarchy, fakeGPUs, fakeGPUInstances, fakeGPUComputeInstances := SpoofMigHierarchy()
+
+	tests := []struct {
+		name           string
+		gOpts          appconfig.DeviceOptions
+		mockCalls      func()
+		expectedOutput map[uint]GPUInfo
+		wantErr        bool
+	}{
+		{
+			name: "GPU with 0 Device Count",
+			gOpts: appconfig.DeviceOptions{
+				Flex: true,
+			},
+			mockCalls: func() {
+				mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(0), nil)
+				mockDCGMProvider.EXPECT().GetGpuInstanceHierarchy().Return(dcgm.MigHierarchy_v2{
+					Count: 0,
+				}, nil)
+			},
+			expectedOutput: map[uint]GPUInfo{},
+			wantErr:        false,
+		},
+		{
+			name: "GPU with 0 Device Count with GetAllDeviceCount error",
+			gOpts: appconfig.DeviceOptions{
+				Flex: true,
+			},
+			mockCalls: func() {
+				mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(0), fmt.Errorf("some error"))
+			},
+			expectedOutput: map[uint]GPUInfo{},
+			wantErr:        true,
+		},
+		{
+			name: "GPU Count 1 with No Hierarchy",
+			gOpts: appconfig.DeviceOptions{
+				Flex: true,
+			},
+			mockCalls: func() {
+				mockHierarchy := dcgm.MigHierarchy_v2{
+					Count: 1,
+				}
+				mockHierarchy.EntityList[0] = fakeGPUs[0]
+
+				mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(1), nil)
+				mockDCGMProvider.EXPECT().GetDeviceInfo(gomock.Any()).Return(fakeDevices[0], nil)
+				mockDCGMProvider.EXPECT().GetGpuInstanceHierarchy().Return(mockHierarchy, nil)
+			},
+			expectedOutput: map[uint]GPUInfo{
+				0: {
+					DeviceInfo: fakeDevices[0],
+				},
+			},
+			wantErr: false,
+		},
+		{
+			name: "GPU count 2 GPU with No Hierarchy",
+			gOpts: appconfig.DeviceOptions{
+				Flex: true,
+			},
+			mockCalls: func() {
+				mockHierarchy := dcgm.MigHierarchy_v2{
+					Count: 2,
+				}
+				mockHierarchy.EntityList[0] = fakeGPUs[0]
+				mockHierarchy.EntityList[0] = fakeGPUs[1]
+
+				mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(len(fakeDevices)), nil)
+				mockDCGMProvider.EXPECT().GetGpuInstanceHierarchy().Return(mockHierarchy, nil)
+
+				for i := 0; i < len(fakeDevices); i++ {
+					mockDCGMProvider.EXPECT().GetDeviceInfo(uint(i)).Return(fakeDevices[i], nil)
+				}
+			},
+			expectedOutput: map[uint]GPUInfo{
+				0: {DeviceInfo: fakeDevices[0]},
+				1: {DeviceInfo: fakeDevices[1]},
+			},
+			wantErr: false,
+		},
+		{
+			name: "GPU Count 1 with No Hierarchy but GetDeviceInfo error",
+			gOpts: appconfig.DeviceOptions{
+				Flex: true,
+			},
+			mockCalls: func() {
+				mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(1), nil)
+				mockDCGMProvider.EXPECT().GetDeviceInfo(gomock.Any()).Return(fakeDevices[0], fmt.Errorf("some error"))
+			},
+			expectedOutput: map[uint]GPUInfo{},
+			wantErr:        true,
+		},
+		{
+			name: "GPU Count 1 with No Hierarchy but GetGpuInstanceHierarchy error",
+			gOpts: appconfig.DeviceOptions{
+				Flex: true,
+			},
+			mockCalls: func() {
+				mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(1), nil)
+				mockDCGMProvider.EXPECT().GetDeviceInfo(gomock.Any()).Return(fakeDevices[0], nil)
+				mockDCGMProvider.EXPECT().GetGpuInstanceHierarchy().Return(dcgm.MigHierarchy_v2{},
+					fmt.Errorf("some error"))
+			},
+			expectedOutput: map[uint]GPUInfo{},
+			wantErr:        true,
+		},
+		{
+			name: "GPU Count 1 with Hierarchy",
+			gOpts: appconfig.DeviceOptions{
+				Flex: true,
+			},
+			mockCalls: func() {
+				mockHierarchy := dcgm.MigHierarchy_v2{
+					Count: 6,
+				}
+				mockHierarchy.EntityList[0] = fakeGPUs[0]
+				mockHierarchy.EntityList[1] = fakeGPUInstances[0]
+				mockHierarchy.EntityList[2] = fakeGPUComputeInstances[0]
+				mockHierarchy.EntityList[3] = fakeGPUComputeInstances[1]
+				mockHierarchy.EntityList[4] = fakeGPUInstances[1]
+				mockHierarchy.EntityList[5] = fakeGPUComputeInstances[2]
+
+				mockEntitiesInput := []dcgm.GroupEntityPair{
+					{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[0].Entity.EntityId},
+					{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[1].Entity.EntityId},
+				}
+
+				mockEntitiesResult := []dcgm.FieldValue_v2{
+					{EntityId: mockEntitiesInput[0].EntityId},
+					{EntityId: mockEntitiesInput[1].EntityId},
+				}
+
+				mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(1), nil)
+				mockDCGMProvider.EXPECT().GetDeviceInfo(gomock.Any()).Return(fakeDevices[0], nil)
+				mockDCGMProvider.EXPECT().GetGpuInstanceHierarchy().Return(mockHierarchy, nil)
+				mockDCGMProvider.EXPECT().EntitiesGetLatestValues(mockEntitiesInput, gomock.Any(),
+					gomock.Any()).Return(mockEntitiesResult, nil)
+				mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[0]).Return("instance_profile_0")
+				mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[1]).Return("instance_profile_1")
+			},
+			expectedOutput: map[uint]GPUInfo{
+				0: {
+					DeviceInfo: fakeDevices[0],
+					GPUInstances: []GPUInstanceInfo{
+						{
+							EntityId: fakeGPUInstances[0].Entity.EntityId,
+							Info:     fakeGPUInstances[0].Info,
+							ComputeInstances: []ComputeInstanceInfo{
+								{
+									EntityId:     fakeGPUComputeInstances[0].Entity.EntityId,
+									InstanceInfo: fakeGPUComputeInstances[0].Info,
+								},
+								{
+									EntityId:     fakeGPUComputeInstances[1].Entity.EntityId,
+									InstanceInfo: fakeGPUComputeInstances[1].Info,
+								},
+							},
+							ProfileName: "instance_profile_0",
+						},
+						{
+							EntityId: fakeGPUInstances[1].Entity.EntityId,
+							Info:     fakeGPUInstances[1].Info,
+							ComputeInstances: []ComputeInstanceInfo{
+								{
+									EntityId:     fakeGPUComputeInstances[2].Entity.EntityId,
+									InstanceInfo: fakeGPUComputeInstances[2].Info,
+								},
+							},
+							ProfileName: "instance_profile_1",
+						},
+					},
+					MigEnabled: true,
+				},
+			},
+			wantErr: false,
+		},
+		{
+			name: "GPU Count 2 with Hierarchy",
+			gOpts: appconfig.DeviceOptions{
+				Flex: true,
+			},
+			mockCalls: func() {
+				mockEntitiesInput := []dcgm.GroupEntityPair{
+					{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[0].Entity.EntityId},
+					{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[1].Entity.EntityId},
+					{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[2].Entity.EntityId},
+				}
+
+				mockEntitiesResult := []dcgm.FieldValue_v2{
+					{EntityId: mockEntitiesInput[0].EntityId},
+					{EntityId: mockEntitiesInput[1].EntityId},
+					{EntityId: mockEntitiesInput[2].EntityId},
+				}
+
+				mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(len(fakeDevices)), nil)
+				mockDCGMProvider.EXPECT().GetGpuInstanceHierarchy().Return(fakeMigHierarchy, nil)
+				mockDCGMProvider.EXPECT().EntitiesGetLatestValues(mockEntitiesInput, gomock.Any(),
+					gomock.Any()).Return(mockEntitiesResult, nil)
+				mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[0]).Return("instance_profile_0")
+				mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[1]).Return("instance_profile_1")
+				mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[2]).Return("instance_profile_2")
+
+				for i := 0; i < len(fakeDevices); i++ {
+					mockDCGMProvider.EXPECT().GetDeviceInfo(uint(i)).Return(fakeDevices[i], nil)
+				}
+			},
+			expectedOutput: map[uint]GPUInfo{
+				0: {
+					DeviceInfo: fakeDevices[0],
+					GPUInstances: []GPUInstanceInfo{
+						{
+							EntityId: fakeGPUInstances[0].Entity.EntityId,
+							Info:     fakeGPUInstances[0].Info,
+							ComputeInstances: []ComputeInstanceInfo{
+								{
+									EntityId:     fakeGPUComputeInstances[0].Entity.EntityId,
+									InstanceInfo: fakeGPUComputeInstances[0].Info,
+								},
+								{
+									EntityId:     fakeGPUComputeInstances[1].Entity.EntityId,
+									InstanceInfo: fakeGPUComputeInstances[1].Info,
+								},
+							},
+							ProfileName: "instance_profile_0",
+						},
+						{
+							EntityId: fakeGPUInstances[1].Entity.EntityId,
+							Info:     fakeGPUInstances[1].Info,
+							ComputeInstances: []ComputeInstanceInfo{
+								{
+									EntityId:     fakeGPUComputeInstances[2].Entity.EntityId,
+									InstanceInfo: fakeGPUComputeInstances[2].Info,
+								},
+							},
+							ProfileName: "instance_profile_1",
+						},
+					},
+					MigEnabled: true,
+				},
+				1: {
+					DeviceInfo: fakeDevices[1],
+					GPUInstances: []GPUInstanceInfo{
+						{
+							EntityId: fakeGPUInstances[2].Entity.EntityId,
+							Info:     fakeGPUInstances[2].Info,
+							ComputeInstances: []ComputeInstanceInfo{
+								{
+									EntityId:     fakeGPUComputeInstances[3].Entity.EntityId,
+									InstanceInfo: fakeGPUComputeInstances[3].Info,
+								},
+							},
+							ProfileName: "instance_profile_2",
+						},
+					},
+					MigEnabled: true,
+				},
+			},
+			wantErr: false,
+		},
+		{
+			name: "GPU Count 2 with Hierarchy but EntitiesGetLatestValues error",
+			gOpts: appconfig.DeviceOptions{
+				Flex: true,
+			},
+			mockCalls: func() {
+				mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(len(fakeDevices)), nil)
+				mockDCGMProvider.EXPECT().GetGpuInstanceHierarchy().Return(fakeMigHierarchy, nil)
+				mockDCGMProvider.EXPECT().EntitiesGetLatestValues(gomock.Any(), gomock.Any(),
+					gomock.Any()).Return([]dcgm.FieldValue_v2{}, fmt.Errorf("some error"))
+
+				for i := 0; i < len(fakeDevices); i++ {
+					mockDCGMProvider.EXPECT().GetDeviceInfo(uint(i)).Return(fakeDevices[i], nil)
+				}
+			},
+			wantErr: true,
+		},
+		/*
+			// TODO (roarora): Today, a different sequence out of GetGpuInstanceHierarchy causes an error in exporter
+			{
+				name: "GPU Count 2 with Hierarchy Different MIG Hierarchy Sequence",
+				gOpts: appconfig.DeviceOptions{
+					Flex: true,
+				},
+				mockCalls: func() {
+					mockHierarchy := dcgm.MigHierarchy_v2{
+						Count: 9,
+					}
+					mockHierarchy.EntityList[0] = fakeGPUs[0]
+					mockHierarchy.EntityList[1] = fakeGPUInstances[0]
+					mockHierarchy.EntityList[2] = fakeGPUInstances[1]
+					mockHierarchy.EntityList[3] = fakeGPUComputeInstances[0]
+					mockHierarchy.EntityList[4] = fakeGPUComputeInstances[1]
+					mockHierarchy.EntityList[5] = fakeGPUComputeInstances[2]
+					mockHierarchy.EntityList[6] = fakeGPUs[1]
+					mockHierarchy.EntityList[7] = fakeGPUInstances[2]
+					mockHierarchy.EntityList[8] = fakeGPUComputeInstances[3]
+
+					mockEntitiesInput := []dcgm.GroupEntityPair{
+						{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[0].Entity.EntityId},
+						{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[1].Entity.EntityId},
+						{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[2].Entity.EntityId},
+					}
+
+					mockEntitiesResult := []dcgm.FieldValue_v2{
+						{EntityId: mockEntitiesInput[0].EntityId},
+						{EntityId: mockEntitiesInput[1].EntityId},
+						{EntityId: mockEntitiesInput[2].EntityId},
+					}
+
+					mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(len(fakeDevices)), nil)
+					mockDCGMProvider.EXPECT().GetGpuInstanceHierarchy().Return(mockHierarchy, nil)
+					mockDCGMProvider.EXPECT().EntitiesGetLatestValues(mockEntitiesInput, gomock.Any(),
+						gomock.Any()).Return(mockEntitiesResult, nil)
+					mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[0]).Return("instance_profile_0")
+					mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[1]).Return("instance_profile_1")
+					mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[2]).Return("instance_profile_2")
+
+					for i := 0; i < len(fakeDevices); i++ {
+						mockDCGMProvider.EXPECT().GetDeviceInfo(uint(i)).Return(fakeDevices[i], nil)
+					}
+				},
+				expectedOutput: map[uint]GPUInfo{
+					0: {
+						DeviceInfo: fakeDevices[0],
+						GPUInstances: []GPUInstanceInfo{
+							{
+								EntityId: fakeGPUInstances[0].Entity.EntityId,
+								Info:     fakeGPUInstances[0].Info,
+								ComputeInstances: []ComputeInstanceInfo{
+									{
+										EntityId:     fakeGPUComputeInstances[0].Entity.EntityId,
+										InstanceInfo: fakeGPUComputeInstances[0].Info,
+									},
+									{
+										EntityId:     fakeGPUComputeInstances[1].Entity.EntityId,
+										InstanceInfo: fakeGPUComputeInstances[1].Info,
+									},
+								},
+								ProfileName: "instance_profile_0",
+							},
+							{
+								EntityId: fakeGPUInstances[1].Entity.EntityId,
+								Info:     fakeGPUInstances[1].Info,
+								ComputeInstances: []ComputeInstanceInfo{
+									{
+										EntityId:     fakeGPUComputeInstances[2].Entity.EntityId,
+										InstanceInfo: fakeGPUComputeInstances[2].Info,
+									},
+								},
+								ProfileName: "instance_profile_1",
+							},
+						},
+						MigEnabled: true,
+					},
+					1: {
+						DeviceInfo: fakeDevices[1],
+						GPUInstances: []GPUInstanceInfo{
+							{
+								EntityId: fakeGPUInstances[2].Entity.EntityId,
+								Info:     fakeGPUInstances[2].Info,
+								ComputeInstances: []ComputeInstanceInfo{
+									{
+										EntityId:     fakeGPUComputeInstances[3].Entity.EntityId,
+										InstanceInfo: fakeGPUComputeInstances[3].Info,
+									},
+								},
+								ProfileName: "instance_profile_2",
+							},
+						},
+						MigEnabled: true,
+					},
+				},
+				wantErr: false,
+			},*/
+		{
+			name: "GPU Count 2 with Hierarchy and device options",
+			gOpts: appconfig.DeviceOptions{
+				Flex:       false,
+				MajorRange: []int{0, 1},
+				MinorRange: []int{1, 2, 3},
+			},
+			mockCalls: func() {
+				mockEntitiesInput := []dcgm.GroupEntityPair{
+					{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[0].Entity.EntityId},
+					{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[1].Entity.EntityId},
+					{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[2].Entity.EntityId},
+				}
+
+				mockEntitiesResult := []dcgm.FieldValue_v2{
+					{EntityId: mockEntitiesInput[0].EntityId},
+					{EntityId: mockEntitiesInput[1].EntityId},
+					{EntityId: mockEntitiesInput[2].EntityId},
+				}
+
+				mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(len(fakeDevices)), nil)
+				mockDCGMProvider.EXPECT().GetGpuInstanceHierarchy().Return(fakeMigHierarchy, nil)
+				mockDCGMProvider.EXPECT().EntitiesGetLatestValues(mockEntitiesInput, gomock.Any(),
+					gomock.Any()).Return(mockEntitiesResult, nil)
+				mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[0]).Return("instance_profile_0")
+				mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[1]).Return("instance_profile_1")
+				mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[2]).Return("instance_profile_2")
+
+				for i := 0; i < len(fakeDevices); i++ {
+					mockDCGMProvider.EXPECT().GetDeviceInfo(uint(i)).Return(fakeDevices[i], nil)
+				}
+			},
+			expectedOutput: map[uint]GPUInfo{
+				0: {
+					DeviceInfo: fakeDevices[0],
+					GPUInstances: []GPUInstanceInfo{
+						{
+							EntityId: fakeGPUInstances[0].Entity.EntityId,
+							Info:     fakeGPUInstances[0].Info,
+							ComputeInstances: []ComputeInstanceInfo{
+								{
+									EntityId:     fakeGPUComputeInstances[0].Entity.EntityId,
+									InstanceInfo: fakeGPUComputeInstances[0].Info,
+								},
+								{
+									EntityId:     fakeGPUComputeInstances[1].Entity.EntityId,
+									InstanceInfo: fakeGPUComputeInstances[1].Info,
+								},
+							},
+							ProfileName: "instance_profile_0",
+						},
+						{
+							EntityId: fakeGPUInstances[1].Entity.EntityId,
+							Info:     fakeGPUInstances[1].Info,
+							ComputeInstances: []ComputeInstanceInfo{
+								{
+									EntityId:     fakeGPUComputeInstances[2].Entity.EntityId,
+									InstanceInfo: fakeGPUComputeInstances[2].Info,
+								},
+							},
+							ProfileName: "instance_profile_1",
+						},
+					},
+					MigEnabled: true,
+				},
+				1: {
+					DeviceInfo: fakeDevices[1],
+					GPUInstances: []GPUInstanceInfo{
+						{
+							EntityId: fakeGPUInstances[2].Entity.EntityId,
+							Info:     fakeGPUInstances[2].Info,
+							ComputeInstances: []ComputeInstanceInfo{
+								{
+									EntityId:     fakeGPUComputeInstances[3].Entity.EntityId,
+									InstanceInfo: fakeGPUComputeInstances[3].Info,
+								},
+							},
+							ProfileName: "instance_profile_2",
+						},
+					},
+					MigEnabled: true,
+				},
+			},
+			wantErr: false,
+		},
+		/*
+			// TODO (roarora): Today, Specifying Major range does not remove extra GPUs
+			{
+				name: "GPU Count 2 with Hierarchy and device options with extra GPU discovery",
+				gOpts: appconfig.DeviceOptions{
+					Flex:       false,
+					MajorRange: []int{0},
+					MinorRange: []int{1, 2},
+				},
+				mockCalls: func() {
+					mockEntitiesInput := []dcgm.GroupEntityPair{
+						{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[0].Entity.EntityId},
+						{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[1].Entity.EntityId},
+						{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[2].Entity.EntityId},
+					}
+
+					mockEntitiesResult := []dcgm.FieldValue_v2{
+						{EntityId: mockEntitiesInput[0].EntityId},
+						{EntityId: mockEntitiesInput[1].EntityId},
+						{EntityId: mockEntitiesInput[2].EntityId},
+					}
+
+					mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(len(fakeDevices)), nil)
+					mockDCGMProvider.EXPECT().GetGpuInstanceHierarchy().Return(fakeMigHierarchy, nil)
+					mockDCGMProvider.EXPECT().EntitiesGetLatestValues(mockEntitiesInput, gomock.Any(),
+						gomock.Any()).Return(mockEntitiesResult, nil)
+					mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[0]).Return("instance_profile_0")
+					mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[1]).Return("instance_profile_1")
+					mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[2]).Return("instance_profile_2")
+
+					for i := 0; i < len(fakeDevices); i++ {
+						mockDCGMProvider.EXPECT().GetDeviceInfo(uint(i)).Return(fakeDevices[i], nil)
+					}
+				},
+				expectedOutput: map[uint]GPUInfo{
+					0: {
+						DeviceInfo: fakeDevices[0],
+						GPUInstances: []GPUInstanceInfo{
+							{
+								EntityId: fakeGPUInstances[0].Entity.EntityId,
+								Info:     fakeGPUInstances[0].Info,
+								ComputeInstances: []ComputeInstanceInfo{
+									{
+										EntityId:     fakeGPUComputeInstances[0].Entity.EntityId,
+										InstanceInfo: fakeGPUComputeInstances[0].Info,
+									},
+									{
+										EntityId:     fakeGPUComputeInstances[1].Entity.EntityId,
+										InstanceInfo: fakeGPUComputeInstances[1].Info,
+									},
+								},
+								ProfileName: "instance_profile_0",
+							},
+							{
+								EntityId: fakeGPUInstances[1].Entity.EntityId,
+								Info:     fakeGPUInstances[1].Info,
+								ComputeInstances: []ComputeInstanceInfo{
+									{
+										EntityId:     fakeGPUComputeInstances[2].Entity.EntityId,
+										InstanceInfo: fakeGPUComputeInstances[2].Info,
+									},
+								},
+								ProfileName: "instance_profile_1",
+							},
+						},
+						MigEnabled: true,
+					},
+				},
+				wantErr: false,
+			},
+			// TODO (roarora): Today, Specifying Minor range does not remove extra GPU Instances
+			{
+				name: "GPU Count 2 with Hierarchy and device options with extra GPU Instance discovery",
+				gOpts: appconfig.DeviceOptions{
+					Flex:       false,
+					MajorRange: []int{0, 1},
+					MinorRange: []int{1, 3},
+				},
+				mockCalls: func() {
+					mockEntitiesInput := []dcgm.GroupEntityPair{
+						{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[0].Entity.EntityId},
+						{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[1].Entity.EntityId},
+						{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[2].Entity.EntityId},
+					}
+
+					mockEntitiesResult := []dcgm.FieldValue_v2{
+						{EntityId: mockEntitiesInput[0].EntityId},
+						{EntityId: mockEntitiesInput[1].EntityId},
+						{EntityId: mockEntitiesInput[2].EntityId},
+					}
+
+					mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(len(fakeDevices)), nil)
+					mockDCGMProvider.EXPECT().GetGpuInstanceHierarchy().Return(fakeMigHierarchy, nil)
+					mockDCGMProvider.EXPECT().EntitiesGetLatestValues(mockEntitiesInput, gomock.Any(),
+						gomock.Any()).Return(mockEntitiesResult, nil)
+					mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[0]).Return("instance_profile_0")
+					mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[1]).Return("instance_profile_1")
+					mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[2]).Return("instance_profile_2")
+
+					for i := 0; i < len(fakeDevices); i++ {
+						mockDCGMProvider.EXPECT().GetDeviceInfo(uint(i)).Return(fakeDevices[i], nil)
+					}
+				},
+				expectedOutput: map[uint]GPUInfo{
+					0: {
+						DeviceInfo: fakeDevices[0],
+						GPUInstances: []GPUInstanceInfo{
+							{
+								EntityId: fakeGPUInstances[0].Entity.EntityId,
+								Info:     fakeGPUInstances[0].Info,
+								ComputeInstances: []ComputeInstanceInfo{
+									{
+										EntityId:     fakeGPUComputeInstances[0].Entity.EntityId,
+										InstanceInfo: fakeGPUComputeInstances[0].Info,
+									},
+									{
+										EntityId:     fakeGPUComputeInstances[1].Entity.EntityId,
+										InstanceInfo: fakeGPUComputeInstances[1].Info,
+									},
+								},
+								ProfileName: "instance_profile_0",
+							},
+						},
+						MigEnabled: true,
+					},
+					1: {
+						DeviceInfo: fakeDevices[1],
+						GPUInstances: []GPUInstanceInfo{
+							{
+								EntityId: fakeGPUInstances[2].Entity.EntityId,
+								Info:     fakeGPUInstances[2].Info,
+								ComputeInstances: []ComputeInstanceInfo{
+									{
+										EntityId:     fakeGPUComputeInstances[3].Entity.EntityId,
+										InstanceInfo: fakeGPUComputeInstances[3].Info,
+									},
+								},
+								ProfileName: "instance_profile_2",
+							},
+						},
+						MigEnabled: true,
+					},
+				},
+				wantErr: false,
+			},
+		*/
+		{
+			name: "GPU Count 2 with Hierarchy and device options Major -1",
+			gOpts: appconfig.DeviceOptions{
+				Flex:       false,
+				MajorRange: []int{-1},
+				MinorRange: []int{1, 2, 3},
+			},
+			mockCalls: func() {
+				mockEntitiesInput := []dcgm.GroupEntityPair{
+					{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[0].Entity.EntityId},
+					{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[1].Entity.EntityId},
+					{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[2].Entity.EntityId},
+				}
+
+				mockEntitiesResult := []dcgm.FieldValue_v2{
+					{EntityId: mockEntitiesInput[0].EntityId},
+					{EntityId: mockEntitiesInput[1].EntityId},
+					{EntityId: mockEntitiesInput[2].EntityId},
+				}
+
+				mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(len(fakeDevices)), nil)
+				mockDCGMProvider.EXPECT().GetGpuInstanceHierarchy().Return(fakeMigHierarchy, nil)
+				mockDCGMProvider.EXPECT().EntitiesGetLatestValues(mockEntitiesInput, gomock.Any(),
+					gomock.Any()).Return(mockEntitiesResult, nil)
+				mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[0]).Return("instance_profile_0")
+				mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[1]).Return("instance_profile_1")
+				mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[2]).Return("instance_profile_2")
+
+				for i := 0; i < len(fakeDevices); i++ {
+					mockDCGMProvider.EXPECT().GetDeviceInfo(uint(i)).Return(fakeDevices[i], nil)
+				}
+			},
+			expectedOutput: map[uint]GPUInfo{
+				0: {
+					DeviceInfo: fakeDevices[0],
+					GPUInstances: []GPUInstanceInfo{
+						{
+							EntityId: fakeGPUInstances[0].Entity.EntityId,
+							Info:     fakeGPUInstances[0].Info,
+							ComputeInstances: []ComputeInstanceInfo{
+								{
+									EntityId:     fakeGPUComputeInstances[0].Entity.EntityId,
+									InstanceInfo: fakeGPUComputeInstances[0].Info,
+								},
+								{
+									EntityId:     fakeGPUComputeInstances[1].Entity.EntityId,
+									InstanceInfo: fakeGPUComputeInstances[1].Info,
+								},
+							},
+							ProfileName: "instance_profile_0",
+						},
+						{
+							EntityId: fakeGPUInstances[1].Entity.EntityId,
+							Info:     fakeGPUInstances[1].Info,
+							ComputeInstances: []ComputeInstanceInfo{
+								{
+									EntityId:     fakeGPUComputeInstances[2].Entity.EntityId,
+									InstanceInfo: fakeGPUComputeInstances[2].Info,
+								},
+							},
+							ProfileName: "instance_profile_1",
+						},
+					},
+					MigEnabled: true,
+				},
+				1: {
+					DeviceInfo: fakeDevices[1],
+					GPUInstances: []GPUInstanceInfo{
+						{
+							EntityId: fakeGPUInstances[2].Entity.EntityId,
+							Info:     fakeGPUInstances[2].Info,
+							ComputeInstances: []ComputeInstanceInfo{
+								{
+									EntityId:     fakeGPUComputeInstances[3].Entity.EntityId,
+									InstanceInfo: fakeGPUComputeInstances[3].Info,
+								},
+							},
+							ProfileName: "instance_profile_2",
+						},
+					},
+					MigEnabled: true,
+				},
+			},
+			wantErr: false,
+		},
+		{
+			name: "GPU Count 2 with Hierarchy and device options Major -1 and Minor -1",
+			gOpts: appconfig.DeviceOptions{
+				Flex:       false,
+				MajorRange: []int{-1},
+				MinorRange: []int{-1},
+			},
+			mockCalls: func() {
+				mockEntitiesInput := []dcgm.GroupEntityPair{
+					{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[0].Entity.EntityId},
+					{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[1].Entity.EntityId},
+					{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[2].Entity.EntityId},
+				}
+
+				mockEntitiesResult := []dcgm.FieldValue_v2{
+					{EntityId: mockEntitiesInput[0].EntityId},
+					{EntityId: mockEntitiesInput[1].EntityId},
+					{EntityId: mockEntitiesInput[2].EntityId},
+				}
+
+				mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(len(fakeDevices)), nil)
+				mockDCGMProvider.EXPECT().GetGpuInstanceHierarchy().Return(fakeMigHierarchy, nil)
+				mockDCGMProvider.EXPECT().EntitiesGetLatestValues(mockEntitiesInput, gomock.Any(),
+					gomock.Any()).Return(mockEntitiesResult, nil)
+				mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[0]).Return("instance_profile_0")
+				mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[1]).Return("instance_profile_1")
+				mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[2]).Return("instance_profile_2")
+
+				for i := 0; i < len(fakeDevices); i++ {
+					mockDCGMProvider.EXPECT().GetDeviceInfo(uint(i)).Return(fakeDevices[i], nil)
+				}
+			},
+			expectedOutput: map[uint]GPUInfo{
+				0: {
+					DeviceInfo: fakeDevices[0],
+					GPUInstances: []GPUInstanceInfo{
+						{
+							EntityId: fakeGPUInstances[0].Entity.EntityId,
+							Info:     fakeGPUInstances[0].Info,
+							ComputeInstances: []ComputeInstanceInfo{
+								{
+									EntityId:     fakeGPUComputeInstances[0].Entity.EntityId,
+									InstanceInfo: fakeGPUComputeInstances[0].Info,
+								},
+								{
+									EntityId:     fakeGPUComputeInstances[1].Entity.EntityId,
+									InstanceInfo: fakeGPUComputeInstances[1].Info,
+								},
+							},
+							ProfileName: "instance_profile_0",
+						},
+						{
+							EntityId: fakeGPUInstances[1].Entity.EntityId,
+							Info:     fakeGPUInstances[1].Info,
+							ComputeInstances: []ComputeInstanceInfo{
+								{
+									EntityId:     fakeGPUComputeInstances[2].Entity.EntityId,
+									InstanceInfo: fakeGPUComputeInstances[2].Info,
+								},
+							},
+							ProfileName: "instance_profile_1",
+						},
+					},
+					MigEnabled: true,
+				},
+				1: {
+					DeviceInfo: fakeDevices[1],
+					GPUInstances: []GPUInstanceInfo{
+						{
+							EntityId: fakeGPUInstances[2].Entity.EntityId,
+							Info:     fakeGPUInstances[2].Info,
+							ComputeInstances: []ComputeInstanceInfo{
+								{
+									EntityId:     fakeGPUComputeInstances[3].Entity.EntityId,
+									InstanceInfo: fakeGPUComputeInstances[3].Info,
+								},
+							},
+							ProfileName: "instance_profile_2",
+						},
+					},
+					MigEnabled: true,
+				},
+			},
+			wantErr: false,
+		},
+		{
+			name: "GPU Count 2 with Hierarchy and missing GPU",
+			gOpts: appconfig.DeviceOptions{
+				Flex:       false,
+				MajorRange: []int{0, 1, 2},
+				MinorRange: []int{1, 2, 3},
+			},
+			mockCalls: func() {
+				mockEntitiesInput := []dcgm.GroupEntityPair{
+					{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[0].Entity.EntityId},
+					{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[1].Entity.EntityId},
+					{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[2].Entity.EntityId},
+				}
+
+				mockEntitiesResult := []dcgm.FieldValue_v2{
+					{EntityId: mockEntitiesInput[0].EntityId},
+					{EntityId: mockEntitiesInput[1].EntityId},
+					{EntityId: mockEntitiesInput[2].EntityId},
+				}
+
+				mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(len(fakeDevices)), nil)
+				mockDCGMProvider.EXPECT().GetGpuInstanceHierarchy().Return(fakeMigHierarchy, nil)
+				mockDCGMProvider.EXPECT().EntitiesGetLatestValues(mockEntitiesInput, gomock.Any(),
+					gomock.Any()).Return(mockEntitiesResult, nil)
+				mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[0]).Return("instance_profile_0")
+				mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[1]).Return("instance_profile_1")
+				mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[2]).Return("instance_profile_2")
+
+				for i := 0; i < len(fakeDevices); i++ {
+					mockDCGMProvider.EXPECT().GetDeviceInfo(uint(i)).Return(fakeDevices[i], nil)
+				}
+			},
+			wantErr: true,
+		},
+		{
+			name: "GPU Count 2 with Hierarchy and missing GPU Instances",
+			gOpts: appconfig.DeviceOptions{
+				Flex:       false,
+				MajorRange: []int{0, 1},
+				MinorRange: []int{1, 2, 3, 4},
+			},
+			mockCalls: func() {
+				mockEntitiesInput := []dcgm.GroupEntityPair{
+					{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[0].Entity.EntityId},
+					{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[1].Entity.EntityId},
+					{EntityGroupId: dcgm.FE_GPU_I, EntityId: fakeGPUInstances[2].Entity.EntityId},
+				}
+
+				mockEntitiesResult := []dcgm.FieldValue_v2{
+					{EntityId: mockEntitiesInput[0].EntityId},
+					{EntityId: mockEntitiesInput[1].EntityId},
+					{EntityId: mockEntitiesInput[2].EntityId},
+				}
+
+				mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(len(fakeDevices)), nil)
+				mockDCGMProvider.EXPECT().GetGpuInstanceHierarchy().Return(fakeMigHierarchy, nil)
+				mockDCGMProvider.EXPECT().EntitiesGetLatestValues(mockEntitiesInput, gomock.Any(),
+					gomock.Any()).Return(mockEntitiesResult, nil)
+				mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[0]).Return("instance_profile_0")
+				mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[1]).Return("instance_profile_1")
+				mockDCGMProvider.EXPECT().Fv2_String(mockEntitiesResult[2]).Return("instance_profile_2")
+
+				for i := 0; i < len(fakeDevices); i++ {
+					mockDCGMProvider.EXPECT().GetDeviceInfo(uint(i)).Return(fakeDevices[i], nil)
+				}
+			},
+			wantErr: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			tt.mockCalls()
+
+			if !tt.wantErr {
+				deviceInfo := Info{}
+				err := deviceInfo.initializeGPUInfo(tt.gOpts, false)
+				assert.NoError(t, err, "Error not expected")
+				assert.Equal(t, len(tt.expectedOutput), int(deviceInfo.gpuCount), "GPU length mismatch")
+
+				for i := 0; i < int(deviceInfo.gpuCount); i++ {
+					actualGPU := deviceInfo.gpus[i]
+					expectedGPU := tt.expectedOutput[actualGPU.DeviceInfo.GPU]
+
+					assert.Equal(t, expectedGPU.DeviceInfo, actualGPU.DeviceInfo,
+						"GPU device info mismatch")
+
+					assert.Equal(t, expectedGPU.MigEnabled, actualGPU.MigEnabled,
+						"MIG info mismatch")
+
+					assert.Equal(t, len(expectedGPU.GPUInstances), len(actualGPU.GPUInstances),
+						"GPU Instances length mismatch")
+
+					// Ensure each GPU Instance and Computer matches
+					for _, expectedInstance := range expectedGPU.GPUInstances {
+						instanceExist := slices.ContainsFunc(actualGPU.GPUInstances,
+							func(actualInstance GPUInstanceInfo) bool {
+								return expectedInstance.Info == actualInstance.Info &&
+									expectedInstance.EntityId == actualInstance.EntityId &&
+									slices.Equal(expectedInstance.ComputeInstances, actualInstance.ComputeInstances)
+							})
+
+						assert.True(t, instanceExist, "Expected instance %+v not found", expectedInstance)
+					}
+				}
+			} else {
+				deviceInfo := Info{}
+				err := deviceInfo.initializeGPUInfo(tt.gOpts, false)
+				assert.Error(t, err, "Error expected")
+			}
+		})
+	}
+}
+
+func TestInitializeCPUInfo(t *testing.T) {
+	ctrl := gomock.NewController(t)
+	mockDCGMProvider := mockdcgm.NewMockDCGM(ctrl)
+
+	realDCGM := dcgmprovider.Client()
+	defer func() {
+		dcgmprovider.SetClient(realDCGM)
+	}()
+	dcgmprovider.SetClient(mockDCGMProvider)
+
+	tests := []struct {
+		name                  string
+		cOpts                 appconfig.DeviceOptions
+		mockCalls             func()
+		expectedCPUCoreOutput map[uint][]int
+		wantErr               bool
+	}{
+		{
+			name: "CPU Hierarchy with 0 CPUs",
+			cOpts: appconfig.DeviceOptions{
+				Flex: true,
+			},
+			mockCalls: func() {
+				mockCPUHierarchy := dcgm.CpuHierarchy_v1{
+					NumCpus: 0,
+				}
+				mockDCGMProvider.EXPECT().GetCpuHierarchy().Return(mockCPUHierarchy, nil)
+			},
+			wantErr: true,
+		},
+		{
+			name: "CPU Hierarchy with 1 CPU",
+			cOpts: appconfig.DeviceOptions{
+				Flex: true,
+			},
+			mockCalls: func() {
+				mockCPUHierarchy := dcgm.CpuHierarchy_v1{
+					NumCpus: 1,
+					Cpus: [dcgm.MAX_NUM_CPUS]dcgm.CpuHierarchyCpu_v1{
+						{
+							CpuId:      0,
+							OwnedCores: []uint64{1, 2, 8},
+						},
+					},
+				}
+				mockDCGMProvider.EXPECT().GetCpuHierarchy().Return(mockCPUHierarchy, nil)
+			},
+			expectedCPUCoreOutput: map[uint][]int{0: {0, 65, 131}},
+			wantErr:               false,
+		},
+		{
+			name: "CPU Hierarchy with 1 CPUs but GetCpuHierarchy error",
+			cOpts: appconfig.DeviceOptions{
+				Flex: true,
+			},
+			mockCalls: func() {
+				mockCPUHierarchy := dcgm.CpuHierarchy_v1{
+					NumCpus: 1,
+					Cpus: [dcgm.MAX_NUM_CPUS]dcgm.CpuHierarchyCpu_v1{
+						{
+							CpuId:      0,
+							OwnedCores: []uint64{1, 2, 8},
+						},
+					},
+				}
+				mockDCGMProvider.EXPECT().GetCpuHierarchy().Return(mockCPUHierarchy, fmt.Errorf("some error"))
+			},
+			wantErr: true,
+		},
+		{
+			name: "CPU Hierarchy with 2 CPUs",
+			cOpts: appconfig.DeviceOptions{
+				Flex: true,
+			},
+			mockCalls: func() {
+				mockCPUHierarchy := dcgm.CpuHierarchy_v1{
+					NumCpus: 2,
+					Cpus: [dcgm.MAX_NUM_CPUS]dcgm.CpuHierarchyCpu_v1{
+						{
+							CpuId:      0,
+							OwnedCores: []uint64{1, 2, 8},
+						},
+						{
+							CpuId:      1,
+							OwnedCores: []uint64{8, 16, 32},
+						},
+					},
+				}
+				mockDCGMProvider.EXPECT().GetCpuHierarchy().Return(mockCPUHierarchy, nil)
+			},
+			expectedCPUCoreOutput: map[uint][]int{0: {0, 65, 131}, 1: {3, 68, 133}},
+			wantErr:               false,
+		},
+		{
+			name: "CPU Hierarchy with multiple CPUs and device options",
+			cOpts: appconfig.DeviceOptions{
+				Flex:       false,
+				MajorRange: []int{0, 1, 2, 3, 4},
+				MinorRange: []int{1, 2, 4, 8, 16, 32, 64, 128, 256},
+			},
+			mockCalls: func() {
+				mockCPUHierarchy := dcgm.CpuHierarchy_v1{
+					NumCpus: 5,
+					Cpus: [dcgm.MAX_NUM_CPUS]dcgm.CpuHierarchyCpu_v1{
+						{
+							CpuId:      0,
+							OwnedCores: []uint64{0b10110},
+						},
+						{
+							CpuId:      1,
+							OwnedCores: []uint64{0x100010100},
+						},
+						{
+							CpuId:      2,
+							OwnedCores: []uint64{0x0, 0x1, 0x1, 0x0},
+						},
+						{
+							CpuId:      3,
+							OwnedCores: []uint64{0x0, 0x0, 0x0, 0x0, 0x1},
+						},
+						{
+							CpuId: 4,
+						},
+					},
+				}
+				mockDCGMProvider.EXPECT().GetCpuHierarchy().Return(mockCPUHierarchy, nil)
+			},
+			expectedCPUCoreOutput: map[uint][]int{0: {1, 2, 4}, 1: {8, 16, 32}, 2: {64, 128}, 3: {256}, 4: {}},
+			wantErr:               false,
+		},
+		{
+			name: "CPU Hierarchy with multiple CPUs and device options with extra CPU discovery",
+			cOpts: appconfig.DeviceOptions{
+				Flex:       false,
+				MajorRange: []int{0, 1, 2},
+				MinorRange: []int{1, 2, 4, 8, 16, 32, 64, 128},
+			},
+			mockCalls: func() {
+				mockCPUHierarchy := dcgm.CpuHierarchy_v1{
+					NumCpus: 5,
+					Cpus: [dcgm.MAX_NUM_CPUS]dcgm.CpuHierarchyCpu_v1{
+						{
+							CpuId:      0,
+							OwnedCores: []uint64{0b10110},
+						},
+						{
+							CpuId:      1,
+							OwnedCores: []uint64{0x100010100},
+						},
+						{
+							CpuId:      2,
+							OwnedCores: []uint64{0x0, 0x1, 0x1},
+						},
+						{
+							CpuId:      3,
+							OwnedCores: []uint64{0x0, 0x0, 0x0, 0x0, 0x1},
+						},
+						{
+							CpuId: 4,
+						},
+					},
+				}
+				mockDCGMProvider.EXPECT().GetCpuHierarchy().Return(mockCPUHierarchy, nil)
+			},
+			expectedCPUCoreOutput: map[uint][]int{0: {1, 2, 4}, 1: {8, 16, 32}, 2: {64, 128}},
+			wantErr:               false,
+		},
+		{
+			name: "CPU Hierarchy with multiple CPUs and device options with extra CPU core discovery",
+			cOpts: appconfig.DeviceOptions{
+				Flex:       false,
+				MajorRange: []int{0, 1, 2},
+				MinorRange: []int{1, 2, 4, 8, 16, 32, 64},
+			},
+			mockCalls: func() {
+				mockCPUHierarchy := dcgm.CpuHierarchy_v1{
+					NumCpus: 5,
+					Cpus: [dcgm.MAX_NUM_CPUS]dcgm.CpuHierarchyCpu_v1{
+						{
+							CpuId:      0,
+							OwnedCores: []uint64{0b10110},
+						},
+						{
+							CpuId:      1,
+							OwnedCores: []uint64{0x100010100},
+						},
+						{
+							CpuId:      2,
+							OwnedCores: []uint64{0x0, 0x1, 0x1, 0x1},
+						},
+						{
+							CpuId:      3,
+							OwnedCores: []uint64{0x0, 0x0, 0x0, 0x0, 0x1},
+						},
+						{
+							CpuId: 4,
+						},
+					},
+				}
+				mockDCGMProvider.EXPECT().GetCpuHierarchy().Return(mockCPUHierarchy, nil)
+			},
+			expectedCPUCoreOutput: map[uint][]int{0: {1, 2, 4}, 1: {8, 16, 32}, 2: {64}},
+			wantErr:               false,
+		},
+		{
+			name: "CPU Hierarchy with multiple CPUs and device options Major -1",
+			cOpts: appconfig.DeviceOptions{
+				Flex:       false,
+				MajorRange: []int{-1},
+				MinorRange: []int{1, 2, 4, 8, 16, 32, 64, 128, 256},
+			},
+			mockCalls: func() {
+				mockCPUHierarchy := dcgm.CpuHierarchy_v1{
+					NumCpus: 5,
+					Cpus: [dcgm.MAX_NUM_CPUS]dcgm.CpuHierarchyCpu_v1{
+						{
+							CpuId:      0,
+							OwnedCores: []uint64{0b10110},
+						},
+						{
+							CpuId:      1,
+							OwnedCores: []uint64{0x100010100},
+						},
+						{
+							CpuId:      2,
+							OwnedCores: []uint64{0x0, 0x1, 0x1, 0x0},
+						},
+						{
+							CpuId:      3,
+							OwnedCores: []uint64{0x0, 0x0, 0x0, 0x0, 0x1},
+						},
+						{
+							CpuId: 4,
+						},
+					},
+				}
+				mockDCGMProvider.EXPECT().GetCpuHierarchy().Return(mockCPUHierarchy, nil)
+			},
+			expectedCPUCoreOutput: map[uint][]int{0: {1, 2, 4}, 1: {8, 16, 32}, 2: {64, 128}, 3: {256}, 4: {}},
+			wantErr:               false,
+		},
+		{
+			name: "CPU Hierarchy with multiple CPUs and device options Major -1 and Minor -1",
+			cOpts: appconfig.DeviceOptions{
+				Flex:       false,
+				MajorRange: []int{-1},
+				MinorRange: []int{-1},
+			},
+			mockCalls: func() {
+				mockCPUHierarchy := dcgm.CpuHierarchy_v1{
+					NumCpus: 5,
+					Cpus: [dcgm.MAX_NUM_CPUS]dcgm.CpuHierarchyCpu_v1{
+						{
+							CpuId:      0,
+							OwnedCores: []uint64{0b10110},
+						},
+						{
+							CpuId:      1,
+							OwnedCores: []uint64{0x100010100},
+						},
+						{
+							CpuId:      2,
+							OwnedCores: []uint64{0x0, 0x1, 0x1, 0x0},
+						},
+						{
+							CpuId:      3,
+							OwnedCores: []uint64{0x0, 0x0, 0x0, 0x0, 0x1},
+						},
+						{
+							CpuId: 4,
+						},
+					},
+				}
+				mockDCGMProvider.EXPECT().GetCpuHierarchy().Return(mockCPUHierarchy, nil)
+			},
+			expectedCPUCoreOutput: map[uint][]int{0: {1, 2, 4}, 1: {8, 16, 32}, 2: {64, 128}, 3: {256}, 4: {}},
+			wantErr:               false,
+		},
+		{
+			name: "CPU Hierarchy with multiple CPUs and missing CPU",
+			cOpts: appconfig.DeviceOptions{
+				Flex:       false,
+				MajorRange: []int{0, 1, 2, 3, 4, 5},
+				MinorRange: []int{1, 2, 4, 8, 16, 32, 64, 128, 256},
+			},
+			mockCalls: func() {
+				mockCPUHierarchy := dcgm.CpuHierarchy_v1{
+					NumCpus: 5,
+					Cpus: [dcgm.MAX_NUM_CPUS]dcgm.CpuHierarchyCpu_v1{
+						{
+							CpuId:      0,
+							OwnedCores: []uint64{0b10110},
+						},
+						{
+							CpuId:      1,
+							OwnedCores: []uint64{0x100010100},
+						},
+						{
+							CpuId:      2,
+							OwnedCores: []uint64{0x0, 0x1, 0x1, 0x0},
+						},
+						{
+							CpuId:      3,
+							OwnedCores: []uint64{0x0, 0x0, 0x0, 0x0, 0x1},
+						},
+						{
+							CpuId: 4,
+						},
+					},
+				}
+				mockDCGMProvider.EXPECT().GetCpuHierarchy().Return(mockCPUHierarchy, nil)
+			},
+			wantErr: true,
+		},
+		{
+			name: "CPU Hierarchy with multiple CPUs and missing CPU cores",
+			cOpts: appconfig.DeviceOptions{
+				Flex:       false,
+				MajorRange: []int{0, 1, 2, 3, 4},
+				MinorRange: []int{1, 2, 4, 8, 16, 32, 64, 128, 256, 1024},
+			},
+			mockCalls: func() {
+				mockCPUHierarchy := dcgm.CpuHierarchy_v1{
+					NumCpus: 5,
+					Cpus: [dcgm.MAX_NUM_CPUS]dcgm.CpuHierarchyCpu_v1{
+						{
+							CpuId:      0,
+							OwnedCores: []uint64{0b10110},
+						},
+						{
+							CpuId:      1,
+							OwnedCores: []uint64{0x100010100},
+						},
+						{
+							CpuId:      2,
+							OwnedCores: []uint64{0x0, 0x1, 0x1, 0x0},
+						},
+						{
+							CpuId:      3,
+							OwnedCores: []uint64{0x0, 0x0, 0x0, 0x0, 0x1},
+						},
+						{
+							CpuId: 4,
+						},
+					},
+				}
+				mockDCGMProvider.EXPECT().GetCpuHierarchy().Return(mockCPUHierarchy, nil)
+			},
+			wantErr: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			tt.mockCalls()
+
+			if !tt.wantErr {
+
+				deviceInfo := Info{}
+				err := deviceInfo.initializeCPUInfo(tt.cOpts)
+				assert.NoError(t, err, "Error not expected")
+				assert.Equal(t, len(tt.expectedCPUCoreOutput), len(deviceInfo.cpus), "CPU length mismatch")
+
+				for _, cpu := range deviceInfo.cpus {
+					assert.Equal(t, len(tt.expectedCPUCoreOutput[cpu.EntityId]), len(cpu.Cores), "Core length mismatch")
+
+					for _, core := range cpu.Cores {
+						assert.True(t, slices.Contains(tt.expectedCPUCoreOutput[cpu.EntityId], int(core)),
+							"Core mismatch")
+					}
+				}
+			} else {
+				deviceInfo := Info{}
+				err := deviceInfo.initializeCPUInfo(tt.cOpts)
+				assert.Error(t, err, "Error expected")
+			}
+		})
+	}
+}
+
+func TestInitializeNvSwitchInfo(t *testing.T) {
+	ctrl := gomock.NewController(t)
+	mockDCGMProvider := mockdcgm.NewMockDCGM(ctrl)
+
+	realDCGM := dcgmprovider.Client()
+	defer func() {
+		dcgmprovider.SetClient(realDCGM)
+	}()
+	dcgmprovider.SetClient(mockDCGMProvider)
+
+	tests := []struct {
+		name                    string
+		sOpts                   appconfig.DeviceOptions
+		switchOutput            []uint
+		linkStatusOutput        []dcgm.NvLinkStatus
+		mockCalls               func([]uint, []dcgm.NvLinkStatus)
+		expectedSwitchToLinkMap map[uint][]uint
+		wantErr                 bool
+	}{
+		{
+			name: "Zero Switches",
+			sOpts: appconfig.DeviceOptions{
+				Flex: true,
+			},
+			switchOutput: []uint{},
+			mockCalls: func(switchOutput []uint, linkStatusOutput []dcgm.NvLinkStatus) {
+				mockDCGMProvider.EXPECT().GetEntityGroupEntities(gomock.Any()).Return(
+					switchOutput, nil)
+			},
+			wantErr: true,
+		},
+		{
+			name: "Single switch Single Link",
+			sOpts: appconfig.DeviceOptions{
+				Flex: true,
+			},
+			switchOutput: []uint{1},
+			linkStatusOutput: []dcgm.NvLinkStatus{
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(1)},
+			},
+			mockCalls: func(switchOutput []uint, linkStatusOutput []dcgm.NvLinkStatus) {
+				mockDCGMProvider.EXPECT().GetEntityGroupEntities(gomock.Any()).Return(
+					switchOutput, nil)
+				mockDCGMProvider.EXPECT().GetNvLinkLinkStatus().Return(linkStatusOutput, nil)
+			},
+			expectedSwitchToLinkMap: map[uint][]uint{1: {1}},
+			wantErr:                 false,
+		},
+		{
+			name: "Single switch Multiple Links",
+			sOpts: appconfig.DeviceOptions{
+				Flex: true,
+			},
+			switchOutput: []uint{1},
+			linkStatusOutput: []dcgm.NvLinkStatus{
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(1)},
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(2)},
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(3)},
+			},
+			mockCalls: func(switchOutput []uint, linkStatusOutput []dcgm.NvLinkStatus) {
+				mockDCGMProvider.EXPECT().GetEntityGroupEntities(gomock.Any()).Return(
+					switchOutput, nil)
+				mockDCGMProvider.EXPECT().GetNvLinkLinkStatus().Return(linkStatusOutput, nil)
+			},
+			expectedSwitchToLinkMap: map[uint][]uint{1: {1, 2, 3}},
+			wantErr:                 false,
+		},
+		{
+			name: "Multiple switch Multiple Links",
+			sOpts: appconfig.DeviceOptions{
+				Flex: true,
+			},
+			switchOutput: []uint{1, 2, 3},
+			linkStatusOutput: []dcgm.NvLinkStatus{
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(1)},
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(2)},
+				{ParentId: uint(2), ParentType: dcgm.FE_SWITCH, Index: uint(3)},
+			},
+			mockCalls: func(switchOutput []uint, linkStatusOutput []dcgm.NvLinkStatus) {
+				mockDCGMProvider.EXPECT().GetEntityGroupEntities(gomock.Any()).Return(
+					switchOutput, nil)
+				mockDCGMProvider.EXPECT().GetNvLinkLinkStatus().Return(linkStatusOutput, nil)
+			},
+			expectedSwitchToLinkMap: map[uint][]uint{1: {1, 2}, 2: {3}, 3: {}},
+			wantErr:                 false,
+		},
+		{
+			name: "Multiple switch Multiple Links with device options",
+			sOpts: appconfig.DeviceOptions{
+				Flex:       false,
+				MajorRange: []int{1, 2, 3, 4, 5},
+				MinorRange: []int{1, 2, 3, 4, 5, 6, 7, 8, 9},
+			},
+			switchOutput: []uint{1, 2, 3, 4, 5},
+			linkStatusOutput: []dcgm.NvLinkStatus{
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(1)},
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(2)},
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(3)},
+				{ParentId: uint(2), ParentType: dcgm.FE_SWITCH, Index: uint(4)},
+				{ParentId: uint(2), ParentType: dcgm.FE_SWITCH, Index: uint(5)},
+				{ParentId: uint(2), ParentType: dcgm.FE_SWITCH, Index: uint(6)},
+				{ParentId: uint(3), ParentType: dcgm.FE_SWITCH, Index: uint(7)},
+				{ParentId: uint(3), ParentType: dcgm.FE_SWITCH, Index: uint(8)},
+				{ParentId: uint(4), ParentType: dcgm.FE_SWITCH, Index: uint(9)},
+			},
+			mockCalls: func(switchOutput []uint, linkStatusOutput []dcgm.NvLinkStatus) {
+				mockDCGMProvider.EXPECT().GetEntityGroupEntities(gomock.Any()).Return(
+					switchOutput, nil)
+				mockDCGMProvider.EXPECT().GetNvLinkLinkStatus().Return(linkStatusOutput, nil)
+			},
+			expectedSwitchToLinkMap: map[uint][]uint{1: {1, 2, 3}, 2: {4, 5, 6}, 3: {7, 8}, 4: {9}, 5: {}},
+			wantErr:                 false,
+		},
+		{
+			name: "Multiple switch Multiple Links with device options with extra Switch discovery",
+			sOpts: appconfig.DeviceOptions{
+				Flex:       false,
+				MajorRange: []int{1, 2, 3},
+				MinorRange: []int{1, 2, 3, 4, 5, 6, 7, 8},
+			},
+			switchOutput: []uint{1, 2, 3, 4, 5},
+			linkStatusOutput: []dcgm.NvLinkStatus{
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(1)},
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(2)},
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(3)},
+				{ParentId: uint(2), ParentType: dcgm.FE_SWITCH, Index: uint(4)},
+				{ParentId: uint(2), ParentType: dcgm.FE_SWITCH, Index: uint(5)},
+				{ParentId: uint(2), ParentType: dcgm.FE_SWITCH, Index: uint(6)},
+				{ParentId: uint(3), ParentType: dcgm.FE_SWITCH, Index: uint(7)},
+				{ParentId: uint(3), ParentType: dcgm.FE_SWITCH, Index: uint(8)},
+				{ParentId: uint(4), ParentType: dcgm.FE_SWITCH, Index: uint(9)},
+			},
+			mockCalls: func(switchOutput []uint, linkStatusOutput []dcgm.NvLinkStatus) {
+				mockDCGMProvider.EXPECT().GetEntityGroupEntities(gomock.Any()).Return(
+					switchOutput, nil)
+				mockDCGMProvider.EXPECT().GetNvLinkLinkStatus().Return(linkStatusOutput, nil)
+			},
+			expectedSwitchToLinkMap: map[uint][]uint{1: {1, 2, 3}, 2: {4, 5, 6}, 3: {7, 8}},
+			wantErr:                 false,
+		},
+		{
+			name: "Multiple switch Multiple Links with device options with extra Link discovery",
+			sOpts: appconfig.DeviceOptions{
+				Flex:       false,
+				MajorRange: []int{1, 2, 3},
+				MinorRange: []int{1, 2, 3, 4, 5, 6, 7},
+			},
+			switchOutput: []uint{1, 2, 3, 4},
+			linkStatusOutput: []dcgm.NvLinkStatus{
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(1)},
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(2)},
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(3)},
+				{ParentId: uint(2), ParentType: dcgm.FE_SWITCH, Index: uint(4)},
+				{ParentId: uint(2), ParentType: dcgm.FE_SWITCH, Index: uint(5)},
+				{ParentId: uint(2), ParentType: dcgm.FE_SWITCH, Index: uint(6)},
+				{ParentId: uint(3), ParentType: dcgm.FE_SWITCH, Index: uint(7)},
+				{ParentId: uint(3), ParentType: dcgm.FE_SWITCH, Index: uint(8)},
+				{ParentId: uint(4), ParentType: dcgm.FE_SWITCH, Index: uint(9)},
+			},
+			mockCalls: func(switchOutput []uint, linkStatusOutput []dcgm.NvLinkStatus) {
+				mockDCGMProvider.EXPECT().GetEntityGroupEntities(gomock.Any()).Return(
+					switchOutput, nil)
+				mockDCGMProvider.EXPECT().GetNvLinkLinkStatus().Return(linkStatusOutput, nil)
+			},
+			expectedSwitchToLinkMap: map[uint][]uint{1: {1, 2, 3}, 2: {4, 5, 6}, 3: {7}},
+			wantErr:                 false,
+		},
+		{
+			name: "Multiple switch Multiple Links and device options Major -1",
+			sOpts: appconfig.DeviceOptions{
+				Flex:       false,
+				MajorRange: []int{-1},
+				MinorRange: []int{1, 2, 3, 4, 5, 6, 7, 8, 9},
+			},
+			switchOutput: []uint{1, 2, 3, 4, 5},
+			linkStatusOutput: []dcgm.NvLinkStatus{
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(1)},
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(2)},
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(3)},
+				{ParentId: uint(2), ParentType: dcgm.FE_SWITCH, Index: uint(4)},
+				{ParentId: uint(2), ParentType: dcgm.FE_SWITCH, Index: uint(5)},
+				{ParentId: uint(2), ParentType: dcgm.FE_SWITCH, Index: uint(6)},
+				{ParentId: uint(3), ParentType: dcgm.FE_SWITCH, Index: uint(7)},
+				{ParentId: uint(3), ParentType: dcgm.FE_SWITCH, Index: uint(8)},
+				{ParentId: uint(4), ParentType: dcgm.FE_SWITCH, Index: uint(9)},
+			},
+			mockCalls: func(switchOutput []uint, linkStatusOutput []dcgm.NvLinkStatus) {
+				mockDCGMProvider.EXPECT().GetEntityGroupEntities(gomock.Any()).Return(
+					switchOutput, nil)
+				mockDCGMProvider.EXPECT().GetNvLinkLinkStatus().Return(linkStatusOutput, nil)
+			},
+			expectedSwitchToLinkMap: map[uint][]uint{1: {1, 2, 3}, 2: {4, 5, 6}, 3: {7, 8}, 4: {9}, 5: {}},
+			wantErr:                 false,
+		},
+		{
+			name: "Multiple switch Multiple Links and device options Major empty",
+			sOpts: appconfig.DeviceOptions{
+				Flex:       false,
+				MajorRange: []int{},
+				MinorRange: []int{-1},
+			},
+			switchOutput: []uint{1, 2, 3, 4, 5},
+			linkStatusOutput: []dcgm.NvLinkStatus{
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(1)},
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(2)},
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(3)},
+				{ParentId: uint(2), ParentType: dcgm.FE_SWITCH, Index: uint(4)},
+				{ParentId: uint(2), ParentType: dcgm.FE_SWITCH, Index: uint(5)},
+				{ParentId: uint(2), ParentType: dcgm.FE_SWITCH, Index: uint(6)},
+				{ParentId: uint(3), ParentType: dcgm.FE_SWITCH, Index: uint(7)},
+				{ParentId: uint(3), ParentType: dcgm.FE_SWITCH, Index: uint(8)},
+				{ParentId: uint(4), ParentType: dcgm.FE_SWITCH, Index: uint(9)},
+			},
+			mockCalls: func(switchOutput []uint, linkStatusOutput []dcgm.NvLinkStatus) {
+				mockDCGMProvider.EXPECT().GetEntityGroupEntities(gomock.Any()).Return(
+					switchOutput, nil)
+				mockDCGMProvider.EXPECT().GetNvLinkLinkStatus().Return(linkStatusOutput, nil)
+			},
+			expectedSwitchToLinkMap: map[uint][]uint{},
+			wantErr:                 false,
+		},
+		{
+			name: "Multiple switch Multiple Links and device options Major -1 and Minor -1",
+			sOpts: appconfig.DeviceOptions{
+				Flex:       false,
+				MajorRange: []int{-1},
+				MinorRange: []int{-1},
+			},
+			switchOutput: []uint{1, 2, 3, 4, 5},
+			linkStatusOutput: []dcgm.NvLinkStatus{
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(1)},
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(2)},
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(3)},
+				{ParentId: uint(2), ParentType: dcgm.FE_SWITCH, Index: uint(4)},
+				{ParentId: uint(2), ParentType: dcgm.FE_SWITCH, Index: uint(5)},
+				{ParentId: uint(2), ParentType: dcgm.FE_SWITCH, Index: uint(6)},
+				{ParentId: uint(3), ParentType: dcgm.FE_SWITCH, Index: uint(7)},
+				{ParentId: uint(3), ParentType: dcgm.FE_SWITCH, Index: uint(8)},
+				{ParentId: uint(4), ParentType: dcgm.FE_SWITCH, Index: uint(9)},
+			},
+			mockCalls: func(switchOutput []uint, linkStatusOutput []dcgm.NvLinkStatus) {
+				mockDCGMProvider.EXPECT().GetEntityGroupEntities(gomock.Any()).Return(
+					switchOutput, nil)
+				mockDCGMProvider.EXPECT().GetNvLinkLinkStatus().Return(linkStatusOutput, nil)
+			},
+			expectedSwitchToLinkMap: map[uint][]uint{1: {1, 2, 3}, 2: {4, 5, 6}, 3: {7, 8}, 4: {9}, 5: {}},
+			wantErr:                 false,
+		},
+		{
+			name: "Multiple switch Multiple Links with missing switches",
+			sOpts: appconfig.DeviceOptions{
+				Flex:       false,
+				MajorRange: []int{1, 2, 3, 4, 5, 6},
+				MinorRange: []int{1, 2, 3, 4, 5, 6, 7, 8, 9},
+			},
+			switchOutput: []uint{1, 2, 3, 4},
+			linkStatusOutput: []dcgm.NvLinkStatus{
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(1)},
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(2)},
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(3)},
+				{ParentId: uint(2), ParentType: dcgm.FE_SWITCH, Index: uint(4)},
+				{ParentId: uint(2), ParentType: dcgm.FE_SWITCH, Index: uint(5)},
+				{ParentId: uint(2), ParentType: dcgm.FE_SWITCH, Index: uint(6)},
+				{ParentId: uint(3), ParentType: dcgm.FE_SWITCH, Index: uint(7)},
+				{ParentId: uint(3), ParentType: dcgm.FE_SWITCH, Index: uint(8)},
+				{ParentId: uint(4), ParentType: dcgm.FE_SWITCH, Index: uint(9)},
+			},
+			mockCalls: func(switchOutput []uint, linkStatusOutput []dcgm.NvLinkStatus) {
+				mockDCGMProvider.EXPECT().GetEntityGroupEntities(gomock.Any()).Return(
+					switchOutput, nil)
+				mockDCGMProvider.EXPECT().GetNvLinkLinkStatus().Return(linkStatusOutput, nil)
+			},
+			wantErr: true,
+		},
+		{
+			name: "Multiple switch Multiple Links with missing links",
+			sOpts: appconfig.DeviceOptions{
+				Flex:       false,
+				MajorRange: []int{1, 2, 3, 4},
+				MinorRange: []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13},
+			},
+			switchOutput: []uint{1, 2, 3, 4},
+			linkStatusOutput: []dcgm.NvLinkStatus{
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(1)},
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(2)},
+				{ParentId: uint(1), ParentType: dcgm.FE_SWITCH, Index: uint(3)},
+				{ParentId: uint(2), ParentType: dcgm.FE_SWITCH, Index: uint(4)},
+				{ParentId: uint(2), ParentType: dcgm.FE_SWITCH, Index: uint(5)},
+				{ParentId: uint(2), ParentType: dcgm.FE_SWITCH, Index: uint(6)},
+				{ParentId: uint(3), ParentType: dcgm.FE_SWITCH, Index: uint(7)},
+				{ParentId: uint(3), ParentType: dcgm.FE_SWITCH, Index: uint(8)},
+				{ParentId: uint(4), ParentType: dcgm.FE_SWITCH, Index: uint(9)},
+			},
+			mockCalls: func(switchOutput []uint, linkStatusOutput []dcgm.NvLinkStatus) {
+				mockDCGMProvider.EXPECT().GetEntityGroupEntities(gomock.Any()).Return(
+					switchOutput, nil)
+				mockDCGMProvider.EXPECT().GetNvLinkLinkStatus().Return(linkStatusOutput, nil)
+			},
+			wantErr: true,
+		},
+		{
+			name: "Error GetEntityGroupEntities Response",
+			sOpts: appconfig.DeviceOptions{
+				Flex: true,
+			},
+			mockCalls: func(switchOutput []uint, linkStatusOutput []dcgm.NvLinkStatus) {
+				mockDCGMProvider.EXPECT().GetEntityGroupEntities(gomock.Any()).Return(
+					switchOutput, fmt.Errorf("some error"))
+			},
+			wantErr: true,
+		},
+		{
+			name: "Error GetNvLinkLinkStatus Response",
+			sOpts: appconfig.DeviceOptions{
+				Flex: true,
+			},
+			switchOutput: []uint{1},
+			mockCalls: func(switchOutput []uint, linkStatusOutput []dcgm.NvLinkStatus) {
+				mockDCGMProvider.EXPECT().GetEntityGroupEntities(gomock.Any()).Return(
+					switchOutput, nil)
+				mockDCGMProvider.EXPECT().GetNvLinkLinkStatus().Return(linkStatusOutput, fmt.Errorf("some error"))
+			},
+			wantErr: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			tt.mockCalls(tt.switchOutput, tt.linkStatusOutput)
+
+			if !tt.wantErr {
+				deviceInfo := Info{}
+				err := deviceInfo.initializeNvSwitchInfo(tt.sOpts)
+				assert.NoError(t, err, "Error not expected")
+				assert.Equal(t, len(tt.expectedSwitchToLinkMap), len(deviceInfo.switches), "Switch length mismatch")
+
+				for _, swInfo := range deviceInfo.switches {
+					assert.Equal(t, len(tt.expectedSwitchToLinkMap[swInfo.EntityId]), len(swInfo.NvLinks),
+						"NV Link length mismatch")
+
+					for _, nvLink := range swInfo.NvLinks {
+						assert.True(t, slices.Contains(tt.expectedSwitchToLinkMap[swInfo.EntityId], nvLink.Index),
+							"NV Link Index mismatch")
+					}
+				}
+			} else {
+				deviceInfo := Info{}
+				err := deviceInfo.initializeNvSwitchInfo(tt.sOpts)
+				assert.Error(t, err, "Error expected")
+			}
+		})
+	}
+}
+
+func TestVerifyDevicePresence(t *testing.T) {
+	deviceInfo := SpoofGPUDeviceInfo()
+	deviceInfo.gOpt.Flex = true
+	err := deviceInfo.verifyDevicePresence()
+	require.Equal(t, err, nil, "Expected to have no error, but found %s", err)
+
+	deviceInfo.gOpt.Flex = false
+	deviceInfo.gOpt.MajorRange = append(deviceInfo.gOpt.MajorRange, -1)
+	deviceInfo.gOpt.MinorRange = append(deviceInfo.gOpt.MinorRange, -1)
+	err = deviceInfo.verifyDevicePresence()
+	require.Equal(t, err, nil, "Expected to have no error, but found %s", err)
+
+	deviceInfo.gOpt.MinorRange[0] = 10 // this GPU instance doesn't exist
+	err = deviceInfo.verifyDevicePresence()
+	require.NotEqual(t, err, nil, "Expected to have an error for a non-existent GPU instance, but none found")
+
+	deviceInfo.gOpt.MajorRange[0] = 10 // this GPU doesn't exist
+	deviceInfo.gOpt.MinorRange[0] = -1
+	err = deviceInfo.verifyDevicePresence()
+	require.NotEqual(t, err, nil, "Expected to have an error for a non-existent GPU, but none found")
+
+	// Add gpus and instances that exist
+	deviceInfo.gOpt.MajorRange[0] = 0
+	deviceInfo.gOpt.MajorRange = append(deviceInfo.gOpt.MajorRange, 1)
+	deviceInfo.gOpt.MinorRange[0] = 0
+	deviceInfo.gOpt.MinorRange = append(deviceInfo.gOpt.MinorRange, 14)
+	err = deviceInfo.verifyDevicePresence()
+	require.Equal(t, err, nil, "Expected to have no error, but found %s", err)
+}
+
+func TestIsSwitchWatched(t *testing.T) {
+	tests := []struct {
+		name       string
+		switchID   uint
+		deviceInfo Info
+		want       bool
+	}{
+		{
+			name:     "Monitor all devices",
+			switchID: 1,
+			deviceInfo: Info{
+				sOpt: appconfig.DeviceOptions{
+					Flex: true,
+				},
+			},
+			want: true,
+		},
+		{
+			name:     "MajorRange empty",
+			switchID: 2,
+			deviceInfo: Info{
+				sOpt: appconfig.DeviceOptions{
+					MajorRange: []int{},
+				},
+			},
+			want: false,
+		},
+		{
+			name:     "MajorRange contains -1 to watch all devices",
+			switchID: 3,
+			deviceInfo: Info{
+				sOpt: appconfig.DeviceOptions{
+					MajorRange: []int{-1},
+				},
+			},
+			want: true,
+		},
+		{
+			name:     "SwitchID in MajorRange",
+			switchID: 4,
+			deviceInfo: Info{
+				sOpt: appconfig.DeviceOptions{
+					MajorRange: []int{3, 4, 5},
+				},
+			},
+			want: true,
+		},
+		{
+			name:     "SwitchID not in MajorRange",
+			switchID: 5,
+			deviceInfo: Info{
+				sOpt: appconfig.DeviceOptions{
+					MajorRange: []int{3, 4, 6},
+				},
+			},
+			want: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := tt.deviceInfo.IsSwitchWatched(tt.switchID)
+			assert.Equal(t, tt.want, got)
+		})
+	}
+}
+
+func TestIsLinkWatched(t *testing.T) {
+	tests := []struct {
+		name       string
+		linkIndex  uint
+		switchID   uint
+		deviceInfo Info
+		want       bool
+	}{
+		{
+			name:       "Monitor all devices",
+			linkIndex:  1,
+			deviceInfo: Info{sOpt: appconfig.DeviceOptions{Flex: true}},
+			want:       true,
+		},
+		{
+			name:       "No watched devices",
+			linkIndex:  1,
+			deviceInfo: Info{},
+			want:       false,
+		},
+		{
+			name:      "Watched link with empty MinorRange",
+			linkIndex: 2,
+			deviceInfo: Info{
+				sOpt: appconfig.DeviceOptions{
+					MajorRange: []int{-1},
+				},
+				switches: []SwitchInfo{
+					{
+						EntityId: 1,
+						NvLinks: []dcgm.NvLinkStatus{
+							{Index: 2},
+						},
+					},
+				},
+			},
+			want: false,
+		},
+		{
+			name:      "MinorRange contains -1 to watch all links",
+			switchID:  1,
+			linkIndex: 3,
+			deviceInfo: Info{
+				sOpt: appconfig.DeviceOptions{
+					MajorRange: []int{-1},
+					MinorRange: []int{-1},
+				},
+				switches: []SwitchInfo{
+					{
+						EntityId: 1,
+						NvLinks: []dcgm.NvLinkStatus{
+							{Index: 3},
+						},
+					},
+				},
+			},
+			want: true,
+		},
+		{
+			name:      "The link not in the watched switch",
+			switchID:  1,
+			linkIndex: 4,
+			deviceInfo: Info{
+				sOpt: appconfig.DeviceOptions{
+					MajorRange: []int{-1},
+					MinorRange: []int{1, 2, 3},
+				},
+				switches: []SwitchInfo{
+					{
+						EntityId: 1,
+						NvLinks: []dcgm.NvLinkStatus{
+							{Index: 4},
+						},
+					},
+				},
+			},
+			want: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := tt.deviceInfo.IsLinkWatched(tt.linkIndex, tt.switchID)
+			assert.Equal(t, tt.want, got)
+		})
+	}
+}
+
+func TestIsCPUWatched(t *testing.T) {
+	tests := []struct {
+		name       string
+		cpuID      uint
+		deviceInfo Info
+		want       bool
+	}{
+		{
+			name:  "Monitor all devices",
+			cpuID: 1,
+			deviceInfo: Info{
+				cOpt: appconfig.DeviceOptions{Flex: true},
+				cpus: []CPUInfo{
+					{
+						EntityId: 1,
+					},
+				},
+			},
+			want: true,
+		},
+		{
+			name:  "MajorRange Contains -1",
+			cpuID: 2,
+			deviceInfo: Info{
+				cOpt: appconfig.DeviceOptions{MajorRange: []int{-1}},
+				cpus: []CPUInfo{
+					{
+						EntityId: 2,
+					},
+				},
+			},
+			want: true,
+		},
+		{
+			name:  "CPU ID in MajorRange",
+			cpuID: 3,
+			deviceInfo: Info{
+				cOpt: appconfig.DeviceOptions{MajorRange: []int{1, 2, 3}},
+				cpus: []CPUInfo{
+					{
+						EntityId: 3,
+					},
+				},
+			},
+			want: true,
+		},
+		{
+			name:  "CPU ID Not in MajorRange",
+			cpuID: 4,
+			deviceInfo: Info{
+				cOpt: appconfig.DeviceOptions{MajorRange: []int{1, 2, 3}},
+				cpus: []CPUInfo{
+					{
+						EntityId: 4,
+					},
+				},
+			},
+			want: false,
+		},
+		{
+			name:  "MajorRange Empty",
+			cpuID: 5,
+			deviceInfo: Info{
+				cOpt: appconfig.DeviceOptions{MajorRange: []int{}},
+				cpus: []CPUInfo{
+					{
+						EntityId: 5,
+					},
+				},
+			},
+			want: false,
+		},
+		{
+			name:  "CPU not found",
+			cpuID: 6,
+			deviceInfo: Info{
+				cOpt: appconfig.DeviceOptions{MajorRange: []int{}},
+				cpus: []CPUInfo{
+					{
+						EntityId: 5,
+					},
+				},
+			},
+			want: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.Equal(t, tt.want, tt.deviceInfo.IsCPUWatched(tt.cpuID))
+		})
+	}
+}
+
+func TestIsCoreWatched(t *testing.T) {
+	tests := []struct {
+		name       string
+		coreID     uint
+		cpuID      uint
+		deviceInfo Info
+		want       bool
+	}{
+		{
+			name:   "Monitor all devices",
+			coreID: 1,
+			cpuID:  1,
+			deviceInfo: Info{
+				cOpt: appconfig.DeviceOptions{Flex: true},
+			},
+			want: true,
+		},
+		{
+			name:   "Core in MinorRange",
+			coreID: 2,
+			cpuID:  1,
+			deviceInfo: Info{
+				cOpt: appconfig.DeviceOptions{
+					MinorRange: []int{1, 2, 3},
+					MajorRange: []int{-1},
+				},
+				cpus: []CPUInfo{{EntityId: 1}},
+			},
+			want: true,
+		},
+		{
+			name:   "Core Not in MinorRange",
+			coreID: 4,
+			cpuID:  1,
+			deviceInfo: Info{
+				cOpt: appconfig.DeviceOptions{
+					MinorRange: []int{1, 2, 3},
+					MajorRange: []int{-1},
+				},
+				cpus: []CPUInfo{{EntityId: 1}},
+			},
+			want: false,
+		},
+		{
+			name:   "MinorRange Contains -1",
+			coreID: 5,
+			cpuID:  1,
+			deviceInfo: Info{
+				cOpt: appconfig.DeviceOptions{
+					MinorRange: []int{-1},
+					MajorRange: []int{-1},
+				},
+				cpus: []CPUInfo{{EntityId: 1}},
+			},
+			want: true,
+		},
+		{
+			name:   "CPU Not Found",
+			coreID: 1,
+			cpuID:  2,
+			deviceInfo: Info{
+				cOpt: appconfig.DeviceOptions{
+					MinorRange: []int{1, 2, 3},
+					MajorRange: []int{-1},
+				},
+				cpus: []CPUInfo{{EntityId: 1}},
+			},
+			want: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.Equal(t, tt.want, tt.deviceInfo.IsCoreWatched(tt.coreID, tt.cpuID))
+		})
+	}
+}
+
+func TestSetMigProfileNames(t *testing.T) {
+	config := &appconfig.Config{
+		UseRemoteHE: false,
+	}
+	dcgmprovider.Initialize(config)
+	defer dcgmprovider.Client().Cleanup()
+
+	tests := []struct {
+		name       string
+		deviceInfo Info
+		values     []dcgm.FieldValue_v2
+		valid      bool
+	}{
+		{
+			name: "MIG profile found",
+			deviceInfo: Info{
+				gpuCount: 1,
+				gpus: [dcgm.MAX_NUM_DEVICES]GPUInfo{
+					{
+						GPUInstances: []GPUInstanceInfo{
+							{EntityId: 1},
+						},
+					},
+				},
+			},
+			values: []dcgm.FieldValue_v2{
+				{
+					EntityId:    1,
+					FieldType:   dcgm.DCGM_FT_STRING,
+					StringValue: &fakeProfileName,
+				},
+			},
+			valid: true,
+		},
+		{
+			name: "Multiple MIG gpus",
+			deviceInfo: Info{
+				gpuCount: 3,
+				gpus: [dcgm.MAX_NUM_DEVICES]GPUInfo{
+					{
+						GPUInstances: []GPUInstanceInfo{
+							{EntityId: 1},
+						},
+					},
+					{
+						GPUInstances: []GPUInstanceInfo{
+							{EntityId: 2},
+						},
+					},
+					{
+						GPUInstances: []GPUInstanceInfo{
+							{EntityId: 3},
+						},
+					},
+				},
+			},
+			values: []dcgm.FieldValue_v2{
+				{
+					EntityId:    2,
+					FieldType:   dcgm.DCGM_FT_STRING,
+					StringValue: &fakeProfileName,
+				},
+			},
+			valid: true,
+		},
+		{
+			name: "Multiple MIG gpus and Values",
+			deviceInfo: Info{
+				gpuCount: 3,
+				gpus: [dcgm.MAX_NUM_DEVICES]GPUInfo{
+					{
+						GPUInstances: []GPUInstanceInfo{
+							{EntityId: 1},
+						},
+					},
+					{
+						GPUInstances: []GPUInstanceInfo{
+							{EntityId: 2},
+						},
+					},
+					{
+						GPUInstances: []GPUInstanceInfo{
+							{EntityId: 3},
+						},
+					},
+				},
+			},
+			values: []dcgm.FieldValue_v2{
+				{
+					EntityId:    2,
+					FieldType:   dcgm.DCGM_FT_STRING,
+					StringValue: &fakeProfileName,
+				},
+				{
+					EntityId:    3,
+					FieldType:   dcgm.DCGM_FT_STRING,
+					StringValue: &fakeProfileName,
+				},
+			},
+			valid: true,
+		},
+		{
+			name: "MIG profile not found",
+			deviceInfo: Info{
+				gpuCount: 1,
+				gpus: [dcgm.MAX_NUM_DEVICES]GPUInfo{
+					{
+						GPUInstances: []GPUInstanceInfo{
+							{EntityId: 1},
+						},
+					},
+				},
+			},
+			values: []dcgm.FieldValue_v2{
+				{
+					EntityId:    2,
+					FieldType:   dcgm.DCGM_FT_STRING,
+					StringValue: &fakeProfileName,
+				},
+			},
+			valid: false,
+		},
+		{
+			name: "MIG profile not string type",
+			deviceInfo: Info{
+				gpuCount: 1,
+				gpus: [dcgm.MAX_NUM_DEVICES]GPUInfo{
+					{
+						GPUInstances: []GPUInstanceInfo{
+							{EntityId: 1},
+						},
+					},
+				},
+			},
+			values: []dcgm.FieldValue_v2{
+				{
+					EntityId:    1,
+					FieldType:   dcgm.DCGM_FT_BINARY,
+					StringValue: &fakeProfileName,
+					Value:       [4096]byte{'1', '2', '3'},
+				},
+			},
+			valid: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if tt.valid {
+				assert.NoError(t, tt.deviceInfo.setMigProfileNames(tt.values), "Expected no error.")
+			} else {
+				assert.Error(t, tt.deviceInfo.setMigProfileNames(tt.values), "Expected an error.")
+			}
+		})
+	}
+}
+
+func Test_getCoreArray(t *testing.T) {
+	tests := []struct {
+		name    string
+		bitmask []uint64
+		want    []uint
+	}{
+		{
+			name:    "Empty bitmask",
+			bitmask: []uint64{},
+			want:    []uint{},
+		},
+		{
+			name:    "Single value - single core",
+			bitmask: []uint64{1},
+			want:    []uint{0},
+		},
+		{
+			name:    "Multiple values - multiple cores",
+			bitmask: []uint64{1, 2, 8},
+			want:    []uint{0, 65, 131},
+		},
+		{
+			name:    "Single uint64 value - multiple cores",
+			bitmask: []uint64{0b1101},
+			want:    []uint{0, 2, 3},
+		},
+		{
+			name:    "Multiple uint64 values - multiple cores",
+			bitmask: []uint64{0b1101, 0b0111},
+			want:    []uint{0, 2, 3, 64, 65, 66},
+		},
+		{
+			name:    "Large bitmask",
+			bitmask: []uint64{0b1101, 0b1010, 0b1111000011110000},
+			want:    []uint{0, 2, 3, 65, 67, 132, 133, 134, 135, 140, 141, 142, 143},
+		},
+		{
+			name: "Overflow uint64 values",
+			bitmask: []uint64{
+				0b0001, 0b0001, 0b0001, 0b0001, 0b0001, 0b0001, 0b0001, 0b0001, 0b0001, 0b0001, 0b0001,
+				0b0001, 0b0001, 0b0001, 0b0001, 0b0001, 0b0001, 0b0001,
+			},
+			want: []uint{0, 64, 128, 192, 256, 320, 384, 548, 512, 576, 640, 704, 768, 832, 896, 960, 1024, 1088},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if len(tt.bitmask) > 16 {
+				assert.Panics(t, func() { getCoreArray(tt.bitmask) }, "Expected getCoreArray to panic")
+			} else {
+				result := getCoreArray(tt.bitmask)
+				assert.True(t, slices.Equal(tt.want, result), "getCoreArray results not equal", tt.want, result)
+			}
+		})
+	}
+}
+
+func TestGetGPUInstanceIdentifier(t *testing.T) {
+	fakeDevices := SpoofGPUDevices()
+	gpuInstanceID := 3
+
+	type args struct {
+		deviceInfo    Provider
+		gpuuuid       string
+		gpuInstanceID uint
+	}
+	tests := []struct {
+		name           string
+		args           args
+		expectedOutput string
+	}{
+		{
+			name: "GPU UUID found",
+			args: args{
+				deviceInfo: &Info{
+					gpuCount: 2,
+					gpus: [dcgm.MAX_NUM_DEVICES]GPUInfo{
+						{
+							DeviceInfo: fakeDevices[0],
+						},
+						{
+							DeviceInfo: fakeDevices[1],
+						},
+					},
+				},
+				gpuuuid:       fakeDevices[1].UUID,
+				gpuInstanceID: uint(gpuInstanceID),
+			},
+			expectedOutput: fmt.Sprintf("%d-%d", fakeDevices[1].GPU, gpuInstanceID),
+		},
+		{
+			name: "GPU UUID not found",
+			args: args{
+				deviceInfo: &Info{
+					gpuCount: 2,
+					gpus: [dcgm.MAX_NUM_DEVICES]GPUInfo{
+						{
+							DeviceInfo: fakeDevices[0],
+						},
+						{
+							DeviceInfo: fakeDevices[1],
+						},
+					},
+				},
+				gpuuuid: "random",
+			},
+			expectedOutput: "",
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.Equalf(t, tt.expectedOutput, GetGPUInstanceIdentifier(tt.args.deviceInfo, tt.args.gpuuuid,
+				tt.args.gpuInstanceID), "GPU Instance Identifier mismatch")
+		})
+	}
+}
diff --git a/internal/pkg/deviceinfo/testutils.go b/internal/pkg/deviceinfo/testutils.go
new file mode 100644
index 00000000..4dad7540
--- /dev/null
+++ b/internal/pkg/deviceinfo/testutils.go
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package deviceinfo
+
+import (
+	"math"
+
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+)
+
+func SpoofGPUDevices() []dcgm.Device {
+	sampleDevices := []dcgm.Device{
+		{
+			GPU:  0,
+			UUID: "000000000000",
+			Identifiers: dcgm.DeviceIdentifiers{
+				Model: "NVIDIA T400 4GB",
+			},
+		},
+		{
+			GPU:  1,
+			UUID: "11111111111",
+			Identifiers: dcgm.DeviceIdentifiers{
+				Model: "NVIDIA A100 40GB",
+			},
+		},
+	}
+
+	return sampleDevices
+}
+
+func SpoofMigHierarchy() (dcgm.MigHierarchy_v2, []dcgm.MigHierarchyInfo_v2, []dcgm.MigHierarchyInfo_v2,
+	[]dcgm.MigHierarchyInfo_v2,
+) {
+	sampleMigHierarchy := dcgm.MigHierarchy_v2{
+		Version: 2,
+		Count:   9,
+	}
+
+	// First GPU
+	sampleGPU1 := dcgm.MigHierarchyInfo_v2{
+		Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: 0},
+		Parent: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_NONE, EntityId: math.MaxUint},
+		Info: dcgm.MigEntityInfo{
+			GpuUuid:               "FAKE_GPU1",
+			NvmlGpuIndex:          0,
+			NvmlInstanceId:        math.MaxUint,
+			NvmlComputeInstanceId: math.MaxUint,
+			NvmlMigProfileId:      math.MaxUint,
+			NvmlProfileSlices:     0,
+		},
+	}
+
+	// Second GPU
+	sampleGPU2 := dcgm.MigHierarchyInfo_v2{
+		Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: 1},
+		Parent: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_NONE, EntityId: math.MaxUint},
+		Info: dcgm.MigEntityInfo{
+			GpuUuid:               "FAKE_GPU2",
+			NvmlGpuIndex:          1,
+			NvmlInstanceId:        math.MaxUint,
+			NvmlComputeInstanceId: math.MaxUint,
+			NvmlMigProfileId:      math.MaxUint,
+			NvmlProfileSlices:     0,
+		},
+	}
+
+	// First GPU Instance in GPU1
+	sampleGPU1Instance1 := dcgm.MigHierarchyInfo_v2{
+		Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU_I, EntityId: 1},
+		Parent: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: 0},
+		Info: dcgm.MigEntityInfo{
+			GpuUuid:               "FAKE_GPU1_I1",
+			NvmlGpuIndex:          0,
+			NvmlInstanceId:        0,
+			NvmlComputeInstanceId: math.MaxUint,
+			NvmlMigProfileId:      1,
+			NvmlProfileSlices:     4,
+		},
+	}
+
+	// Second GPU Instance in GPU1
+	sampleGPU1Instance2 := dcgm.MigHierarchyInfo_v2{
+		Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU_I, EntityId: 2},
+		Parent: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: 0},
+		Info: dcgm.MigEntityInfo{
+			GpuUuid:               "FAKE_GPU1_I2",
+			NvmlGpuIndex:          0,
+			NvmlInstanceId:        1,
+			NvmlComputeInstanceId: math.MaxUint,
+			NvmlMigProfileId:      2,
+			NvmlProfileSlices:     2,
+		},
+	}
+
+	// First Compute Instance in the First GPU Instance in GPU1
+	sampleGPU1Instance1CI1 := dcgm.MigHierarchyInfo_v2{
+		Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU_CI, EntityId: 1},
+		Parent: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU_I, EntityId: 1},
+		Info: dcgm.MigEntityInfo{
+			GpuUuid:               "FAKE_GPU1_I1_CI1",
+			NvmlGpuIndex:          0,
+			NvmlInstanceId:        0,
+			NvmlComputeInstanceId: 0,
+			NvmlMigProfileId:      3,
+			NvmlProfileSlices:     1,
+		},
+	}
+
+	// Second Compute Instance in the First GPU Instance in GPU1
+	sampleGPU1Instance1CI2 := dcgm.MigHierarchyInfo_v2{
+		Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU_CI, EntityId: 2},
+		Parent: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU_I, EntityId: 1},
+		Info: dcgm.MigEntityInfo{
+			GpuUuid:               "FAKE_GPU1_I1_CI2",
+			NvmlGpuIndex:          0,
+			NvmlInstanceId:        0,
+			NvmlComputeInstanceId: 1,
+			NvmlMigProfileId:      4,
+			NvmlProfileSlices:     1,
+		},
+	}
+
+	// First Compute Instance in the Second GPU Instance in GPU1
+	sampleGPU1Instance2CI1 := dcgm.MigHierarchyInfo_v2{
+		Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU_CI, EntityId: 3},
+		Parent: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU_I, EntityId: 2},
+		Info: dcgm.MigEntityInfo{
+			GpuUuid:               "FAKE_GPU1_I2_CI1",
+			NvmlGpuIndex:          0,
+			NvmlInstanceId:        1,
+			NvmlComputeInstanceId: 2,
+			NvmlMigProfileId:      5,
+			NvmlProfileSlices:     1,
+		},
+	}
+
+	// First GPU Instance in GPU2
+	sampleGPU2Instance1 := dcgm.MigHierarchyInfo_v2{
+		Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU_I, EntityId: 3},
+		Parent: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: 1},
+		Info: dcgm.MigEntityInfo{
+			GpuUuid:               "FAKE_GPU2_I1",
+			NvmlGpuIndex:          1,
+			NvmlInstanceId:        0,
+			NvmlComputeInstanceId: math.MaxUint,
+			NvmlMigProfileId:      6,
+			NvmlProfileSlices:     4,
+		},
+	}
+
+	// First Compute Instance in the First GPU Instance in GPU2
+	sampleGPU2Instance1CI1 := dcgm.MigHierarchyInfo_v2{
+		Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU_CI, EntityId: 4},
+		Parent: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU_I, EntityId: 3},
+		Info: dcgm.MigEntityInfo{
+			GpuUuid:               "FAKE_GPU2_I1_CI1",
+			NvmlGpuIndex:          1,
+			NvmlInstanceId:        0,
+			NvmlComputeInstanceId: 0,
+			NvmlMigProfileId:      7,
+			NvmlProfileSlices:     1,
+		},
+	}
+
+	sampleMigHierarchy.EntityList[0] = sampleGPU1
+	sampleMigHierarchy.EntityList[1] = sampleGPU1Instance1
+	sampleMigHierarchy.EntityList[2] = sampleGPU1Instance1CI1
+	sampleMigHierarchy.EntityList[3] = sampleGPU1Instance1CI2
+	sampleMigHierarchy.EntityList[4] = sampleGPU1Instance2
+	sampleMigHierarchy.EntityList[5] = sampleGPU1Instance2CI1
+	sampleMigHierarchy.EntityList[6] = sampleGPU2
+	sampleMigHierarchy.EntityList[7] = sampleGPU2Instance1
+	sampleMigHierarchy.EntityList[8] = sampleGPU2Instance1CI1
+
+	return sampleMigHierarchy, []dcgm.MigHierarchyInfo_v2{sampleGPU1, sampleGPU2},
+		[]dcgm.MigHierarchyInfo_v2{sampleGPU1Instance1, sampleGPU1Instance2, sampleGPU2Instance1},
+		[]dcgm.MigHierarchyInfo_v2{
+			sampleGPU1Instance1CI1, sampleGPU1Instance1CI2, sampleGPU1Instance2CI1,
+			sampleGPU2Instance1CI1,
+		}
+}
diff --git a/internal/pkg/deviceinfo/types.go b/internal/pkg/deviceinfo/types.go
new file mode 100644
index 00000000..af52d27e
--- /dev/null
+++ b/internal/pkg/deviceinfo/types.go
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//go:generate go run -v go.uber.org/mock/mockgen  -destination=../../mocks/pkg/deviceinfo/mock_device_info.go -package=deviceinfo -copyright_file=../../../hack/header.txt . Provider
+
+package deviceinfo
+
+import (
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+)
+
+type Provider interface {
+	GPUCount() uint
+	GPUs() []GPUInfo
+	GPU(i uint) GPUInfo
+	Switches() []SwitchInfo
+	Switch(i uint) SwitchInfo
+	CPUs() []CPUInfo
+	CPU(i uint) CPUInfo
+	GOpts() appconfig.DeviceOptions
+	SOpts() appconfig.DeviceOptions
+	COpts() appconfig.DeviceOptions
+	InfoType() dcgm.Field_Entity_Group
+	IsCPUWatched(cpuID uint) bool
+	IsCoreWatched(coreID uint, cpuID uint) bool
+	IsSwitchWatched(switchID uint) bool
+	IsLinkWatched(linkIndex uint, switchID uint) bool
+}
+
+type GPUInfo struct {
+	DeviceInfo   dcgm.Device
+	GPUInstances []GPUInstanceInfo
+	MigEnabled   bool
+}
+
+type GPUInstanceInfo struct {
+	Info             dcgm.MigEntityInfo
+	ProfileName      string
+	EntityId         uint
+	ComputeInstances []ComputeInstanceInfo
+}
+
+type ComputeInstanceInfo struct {
+	InstanceInfo dcgm.MigEntityInfo
+	ProfileName  string
+	EntityId     uint
+}
+
+type CPUInfo struct {
+	EntityId uint
+	Cores    []uint
+}
+
+type SwitchInfo struct {
+	EntityId uint
+	NvLinks  []dcgm.NvLinkStatus
+}
diff --git a/internal/pkg/devicemonitoring/const.go b/internal/pkg/devicemonitoring/const.go
new file mode 100644
index 00000000..847ec613
--- /dev/null
+++ b/internal/pkg/devicemonitoring/const.go
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package devicemonitoring
+
+const (
+	PARENT_ID_IGNORED = 0
+)
diff --git a/internal/pkg/devicemonitoring/device_monitoring.go b/internal/pkg/devicemonitoring/device_monitoring.go
new file mode 100644
index 00000000..f062d404
--- /dev/null
+++ b/internal/pkg/devicemonitoring/device_monitoring.go
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package devicemonitoring
+
+import (
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/deviceinfo"
+)
+
+func GetMonitoredEntities(deviceInfo deviceinfo.Provider) []Info {
+	var monitoring []Info
+
+	switch deviceInfo.InfoType() {
+	case dcgm.FE_SWITCH:
+		monitoring = monitorAllSwitches(deviceInfo)
+	case dcgm.FE_LINK:
+		monitoring = monitorAllLinks(deviceInfo)
+	case dcgm.FE_CPU:
+		monitoring = monitorAllCPUs(deviceInfo)
+	case dcgm.FE_CPU_CORE:
+		monitoring = monitorAllCPUCores(deviceInfo)
+	default:
+		if deviceInfo.GOpts().Flex {
+			monitoring = monitorAllGPUInstances(deviceInfo, true)
+		} else {
+			monitoring = handleGPUOptions(deviceInfo)
+		}
+	}
+
+	return monitoring
+}
+
+func handleGPUOptions(deviceInfo deviceinfo.Provider) []Info {
+	var monitoring []Info
+
+	// Current logic:
+	// if MajorRange -1, MinorRange -1: Monitor all GPUs and GPU Instances
+	// if MajorRange -1, MinorRange <Some Range>: Monitor all GPU and specific GPU Instances
+	// if MajorRange  <Some Range>, MinorRange -1: Monitor specific GPU and all GPU Instances
+	// if MajorRange  <Some Range>, MinorRange <Some Range>: Monitor specific GPUs and specific GPU Instances
+	if len(deviceInfo.GOpts().MajorRange) > 0 && deviceInfo.GOpts().MajorRange[0] == -1 {
+		monitoring = monitorAllGPUs(deviceInfo)
+	} else {
+		for _, gpuID := range deviceInfo.GOpts().MajorRange {
+			// We've already verified that everything in the options list exists
+			monitoring = append(monitoring, *monitorGPU(deviceInfo, gpuID))
+		}
+	}
+
+	if len(deviceInfo.GOpts().MinorRange) > 0 && deviceInfo.GOpts().MinorRange[0] == -1 {
+		monitoring = append(monitoring, monitorAllGPUInstances(deviceInfo, false)...)
+	} else {
+		for _, gpuInstanceID := range deviceInfo.GOpts().MinorRange {
+			// We've already verified that everything in the options list exists
+			monitoring = append(monitoring, *monitorGPUInstance(deviceInfo, gpuInstanceID))
+		}
+	}
+
+	return monitoring
+}
+
+func monitorAllGPUs(deviceInfo deviceinfo.Provider) []Info {
+	var monitoring []Info
+
+	for i := uint(0); i < deviceInfo.GPUCount(); i++ {
+		mi := Info{
+			dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: deviceInfo.GPU(i).DeviceInfo.GPU},
+			deviceInfo.GPU(i).DeviceInfo,
+			nil,
+			PARENT_ID_IGNORED,
+		}
+		monitoring = append(monitoring, mi)
+	}
+
+	return monitoring
+}
+
+func monitorAllGPUInstances(deviceInfo deviceinfo.Provider, addFlexibly bool) []Info {
+	var monitoring []Info
+
+	for i := uint(0); i < deviceInfo.GPUCount(); i++ {
+		// If the GPU Instance count is 0, addFlexibly allows adding GPU to the monitoring list.
+		if addFlexibly && len(deviceInfo.GPU(i).GPUInstances) == 0 {
+			mi := Info{
+				dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: deviceInfo.GPU(i).DeviceInfo.GPU},
+				deviceInfo.GPU(i).DeviceInfo,
+				nil,
+				PARENT_ID_IGNORED,
+			}
+			monitoring = append(monitoring, mi)
+		} else {
+			for j := 0; j < len(deviceInfo.GPU(i).GPUInstances); j++ {
+				mi := Info{
+					dcgm.GroupEntityPair{
+						EntityGroupId: dcgm.FE_GPU_I,
+						EntityId:      deviceInfo.GPU(i).GPUInstances[j].EntityId,
+					},
+					deviceInfo.GPU(i).DeviceInfo,
+					&deviceInfo.GPU(i).GPUInstances[j],
+					PARENT_ID_IGNORED,
+				}
+				monitoring = append(monitoring, mi)
+			}
+		}
+	}
+
+	return monitoring
+}
+
+func monitorAllCPUs(deviceInfo deviceinfo.Provider) []Info {
+	var monitoring []Info
+
+	for _, cpu := range deviceInfo.CPUs() {
+		if !deviceInfo.IsCPUWatched(cpu.EntityId) {
+			continue
+		}
+
+		mi := Info{
+			dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_CPU, EntityId: cpu.EntityId},
+			dcgm.Device{},
+			nil,
+			PARENT_ID_IGNORED,
+		}
+		monitoring = append(monitoring, mi)
+	}
+
+	return monitoring
+}
+
+func monitorAllCPUCores(deviceInfo deviceinfo.Provider) []Info {
+	var monitoring []Info
+
+	for _, cpu := range deviceInfo.CPUs() {
+		if !deviceInfo.IsCPUWatched(cpu.EntityId) {
+			continue
+		}
+
+		for _, core := range cpu.Cores {
+			if !deviceInfo.IsCoreWatched(core, cpu.EntityId) {
+				continue
+			}
+
+			mi := Info{
+				dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_CPU_CORE, EntityId: core},
+				dcgm.Device{},
+				nil,
+				cpu.EntityId,
+			}
+			monitoring = append(monitoring, mi)
+		}
+	}
+
+	return monitoring
+}
+
+func monitorAllSwitches(deviceInfo deviceinfo.Provider) []Info {
+	var monitoring []Info
+
+	for _, sw := range deviceInfo.Switches() {
+		if !deviceInfo.IsSwitchWatched(sw.EntityId) {
+			continue
+		}
+
+		mi := Info{
+			dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_SWITCH, EntityId: sw.EntityId},
+			dcgm.Device{},
+			nil,
+			PARENT_ID_IGNORED,
+		}
+		monitoring = append(monitoring, mi)
+	}
+
+	return monitoring
+}
+
+func monitorAllLinks(deviceInfo deviceinfo.Provider) []Info {
+	var monitoring []Info
+
+	for _, sw := range deviceInfo.Switches() {
+		if !deviceInfo.IsSwitchWatched(sw.EntityId) {
+			continue
+		}
+
+		for _, link := range sw.NvLinks {
+			if link.State != dcgm.LS_UP {
+				continue
+			}
+
+			if !deviceInfo.IsLinkWatched(link.Index, sw.EntityId) {
+				continue
+			}
+
+			mi := Info{
+				dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_LINK, EntityId: link.Index},
+				dcgm.Device{},
+				nil,
+				link.ParentId,
+			}
+			monitoring = append(monitoring, mi)
+		}
+	}
+
+	return monitoring
+}
+
+func monitorGPU(deviceInfo deviceinfo.Provider, gpuID int) *Info {
+	for i := uint(0); i < deviceInfo.GPUCount(); i++ {
+		if deviceInfo.GPU(i).DeviceInfo.GPU == uint(gpuID) {
+			return &Info{
+				dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: deviceInfo.GPU(i).DeviceInfo.GPU},
+				deviceInfo.GPU(i).DeviceInfo,
+				nil,
+				PARENT_ID_IGNORED,
+			}
+		}
+	}
+
+	return nil
+}
+
+func monitorGPUInstance(deviceInfo deviceinfo.Provider, gpuInstanceID int) *Info {
+	for i := uint(0); i < deviceInfo.GPUCount(); i++ {
+		for _, instance := range deviceInfo.GPU(i).GPUInstances {
+			if instance.EntityId == uint(gpuInstanceID) {
+				return &Info{
+					dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU_I, EntityId: uint(gpuInstanceID)},
+					deviceInfo.GPU(i).DeviceInfo,
+					&instance,
+					PARENT_ID_IGNORED,
+				}
+			}
+		}
+	}
+
+	return nil
+}
diff --git a/internal/pkg/devicemonitoring/device_monitoring_test.go b/internal/pkg/devicemonitoring/device_monitoring_test.go
new file mode 100644
index 00000000..91444243
--- /dev/null
+++ b/internal/pkg/devicemonitoring/device_monitoring_test.go
@@ -0,0 +1,1610 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package devicemonitoring
+
+import (
+	"testing"
+
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+	"github.com/stretchr/testify/assert"
+	"go.uber.org/mock/gomock"
+
+	mockdeviceinfo "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/deviceinfo"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/deviceinfo"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/testutils"
+)
+
+func TestGetMonitoredEntities(t *testing.T) {
+	tests := []struct {
+		name     string
+		mockFunc func() *mockdeviceinfo.MockProvider
+		want     []Info
+	}{
+		{
+			name: "GPU Count 2, Flex = true",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+
+				gOpts := appconfig.DeviceOptions{
+					Flex: true,
+				}
+
+				mockGPUDeviceInfo := testutils.MockGPUDeviceInfo(ctrl, 2, nil)
+				mockGPUDeviceInfo.EXPECT().GOpts().Return(gOpts).AnyTimes()
+
+				return mockGPUDeviceInfo
+			},
+			want: []Info{
+				{
+					Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: uint(0)},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(0),
+					},
+					InstanceInfo: nil,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+				{
+					Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: uint(1)},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(1),
+					},
+					InstanceInfo: nil,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+			},
+		},
+		{
+			name: "GPU Count 2, Flex = false, Major -1, Minor -1",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				gpuInstanceInfos := make(map[int][]deviceinfo.GPUInstanceInfo)
+				gpuInstanceInfos[0] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo1}
+				gpuInstanceInfos[1] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo2}
+
+				ctrl := gomock.NewController(t)
+
+				gOpts := appconfig.DeviceOptions{
+					Flex:       false,
+					MajorRange: []int{-1},
+					MinorRange: []int{-1},
+				}
+
+				mockGPUDeviceInfo := testutils.MockGPUDeviceInfo(ctrl, 2, gpuInstanceInfos)
+				mockGPUDeviceInfo.EXPECT().GOpts().Return(gOpts).AnyTimes()
+
+				return mockGPUDeviceInfo
+			},
+			want: []Info{
+				{
+					Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: uint(0)},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(0),
+					},
+					InstanceInfo: nil,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+				{
+					Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: uint(1)},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(1),
+					},
+					InstanceInfo: nil,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+				{
+					Entity: dcgm.GroupEntityPair{
+						EntityGroupId: dcgm.FE_GPU_I,
+						EntityId:      testutils.MockGPUInstanceInfo1.EntityId,
+					},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(0),
+					},
+					InstanceInfo: &testutils.MockGPUInstanceInfo1,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+				{
+					Entity: dcgm.GroupEntityPair{
+						EntityGroupId: dcgm.FE_GPU_I,
+						EntityId:      testutils.MockGPUInstanceInfo2.EntityId,
+					},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(1),
+					},
+					InstanceInfo: &testutils.MockGPUInstanceInfo2,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+			},
+		},
+		{
+			name: "GPU Count 2, Flex = false, Major -1, Minor 14",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				gpuInstanceInfos := make(map[int][]deviceinfo.GPUInstanceInfo)
+				gpuInstanceInfos[0] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo1}
+				gpuInstanceInfos[1] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo2}
+
+				ctrl := gomock.NewController(t)
+
+				gOpts := appconfig.DeviceOptions{
+					Flex:       false,
+					MajorRange: []int{-1},
+					MinorRange: []int{14},
+				}
+
+				mockGPUDeviceInfo := testutils.MockGPUDeviceInfo(ctrl, 2, gpuInstanceInfos)
+				mockGPUDeviceInfo.EXPECT().GOpts().Return(gOpts).AnyTimes()
+
+				return mockGPUDeviceInfo
+			},
+			want: []Info{
+				{
+					Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: uint(0)},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(0),
+					},
+					InstanceInfo: nil,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+				{
+					Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: uint(1)},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(1),
+					},
+					InstanceInfo: nil,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+				{
+					Entity: dcgm.GroupEntityPair{
+						EntityGroupId: dcgm.FE_GPU_I,
+						EntityId:      testutils.MockGPUInstanceInfo2.EntityId,
+					},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(1),
+					},
+					InstanceInfo: &testutils.MockGPUInstanceInfo2,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+			},
+		},
+		{
+			name: "GPU Count 2, Flex = false, Major 1, Minor -1",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				gpuInstanceInfos := make(map[int][]deviceinfo.GPUInstanceInfo)
+				gpuInstanceInfos[0] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo1}
+				gpuInstanceInfos[1] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo2}
+
+				ctrl := gomock.NewController(t)
+
+				gOpts := appconfig.DeviceOptions{
+					Flex:       false,
+					MajorRange: []int{1},
+					MinorRange: []int{-1},
+				}
+
+				mockGPUDeviceInfo := testutils.MockGPUDeviceInfo(ctrl, 2, gpuInstanceInfos)
+				mockGPUDeviceInfo.EXPECT().GOpts().Return(gOpts).AnyTimes()
+
+				return mockGPUDeviceInfo
+			},
+			want: []Info{
+				{
+					Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: uint(1)},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(1),
+					},
+					InstanceInfo: nil,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+				{
+					Entity: dcgm.GroupEntityPair{
+						EntityGroupId: dcgm.FE_GPU_I,
+						EntityId:      testutils.MockGPUInstanceInfo1.EntityId,
+					},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(0),
+					},
+					InstanceInfo: &testutils.MockGPUInstanceInfo1,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+				{
+					Entity: dcgm.GroupEntityPair{
+						EntityGroupId: dcgm.FE_GPU_I,
+						EntityId:      testutils.MockGPUInstanceInfo2.EntityId,
+					},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(1),
+					},
+					InstanceInfo: &testutils.MockGPUInstanceInfo2,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+			},
+		},
+		{
+			name: "GPU Count 2, Flex = false, Major 0, Minor 14",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				gpuInstanceInfos := make(map[int][]deviceinfo.GPUInstanceInfo)
+				gpuInstanceInfos[0] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo1}
+				gpuInstanceInfos[1] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo2}
+
+				ctrl := gomock.NewController(t)
+
+				gOpts := appconfig.DeviceOptions{
+					Flex:       false,
+					MajorRange: []int{0},
+					MinorRange: []int{14},
+				}
+
+				mockGPUDeviceInfo := testutils.MockGPUDeviceInfo(ctrl, 2, gpuInstanceInfos)
+				mockGPUDeviceInfo.EXPECT().GOpts().Return(gOpts).AnyTimes()
+
+				return mockGPUDeviceInfo
+			},
+			want: []Info{
+				{
+					Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: uint(0)},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(0),
+					},
+					InstanceInfo: nil,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+				{
+					Entity: dcgm.GroupEntityPair{
+						EntityGroupId: dcgm.FE_GPU_I,
+						EntityId:      testutils.MockGPUInstanceInfo2.EntityId,
+					},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(1),
+					},
+					InstanceInfo: &testutils.MockGPUInstanceInfo2,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+			},
+		},
+		{
+			name: "GPU Count 2, Flex = false, Minor -1",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				gpuInstanceInfos := make(map[int][]deviceinfo.GPUInstanceInfo)
+				gpuInstanceInfos[0] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo1}
+				gpuInstanceInfos[1] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo2}
+
+				ctrl := gomock.NewController(t)
+
+				gOpts := appconfig.DeviceOptions{
+					Flex:       false,
+					MinorRange: []int{-1},
+				}
+
+				mockGPUDeviceInfo := testutils.MockGPUDeviceInfo(ctrl, 2, gpuInstanceInfos)
+				mockGPUDeviceInfo.EXPECT().GOpts().Return(gOpts).AnyTimes()
+
+				return mockGPUDeviceInfo
+			},
+			want: []Info{
+				{
+					Entity: dcgm.GroupEntityPair{
+						EntityGroupId: dcgm.FE_GPU_I,
+						EntityId:      testutils.MockGPUInstanceInfo1.EntityId,
+					},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(0),
+					},
+					InstanceInfo: &testutils.MockGPUInstanceInfo1,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+				{
+					Entity: dcgm.GroupEntityPair{
+						EntityGroupId: dcgm.FE_GPU_I,
+						EntityId:      testutils.MockGPUInstanceInfo2.EntityId,
+					},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(1),
+					},
+					InstanceInfo: &testutils.MockGPUInstanceInfo2,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+			},
+		},
+		{
+			name: "GPU Count 2, GPU Instance Count 1 each, Flex = true",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				gpuInstanceInfos := make(map[int][]deviceinfo.GPUInstanceInfo)
+				gpuInstanceInfos[0] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo1}
+				gpuInstanceInfos[1] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo2}
+
+				ctrl := gomock.NewController(t)
+
+				gOpts := appconfig.DeviceOptions{
+					Flex: true,
+				}
+
+				mockGPUDeviceInfo := testutils.MockGPUDeviceInfo(ctrl, 2, gpuInstanceInfos)
+				mockGPUDeviceInfo.EXPECT().GOpts().Return(gOpts).AnyTimes()
+
+				return mockGPUDeviceInfo
+			},
+			want: []Info{
+				{
+					Entity: dcgm.GroupEntityPair{
+						EntityGroupId: dcgm.FE_GPU_I,
+						EntityId:      testutils.MockGPUInstanceInfo1.EntityId,
+					},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(0),
+					},
+					InstanceInfo: &testutils.MockGPUInstanceInfo1,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+				{
+					Entity: dcgm.GroupEntityPair{
+						EntityGroupId: dcgm.FE_GPU_I,
+						EntityId:      testutils.MockGPUInstanceInfo2.EntityId,
+					},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(1),
+					},
+					InstanceInfo: &testutils.MockGPUInstanceInfo2,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+			},
+		},
+		{
+			name: "GPU Count 2, GPU Instance Count 2 and 0, Flex = true",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				gpuInstanceInfos := make(map[int][]deviceinfo.GPUInstanceInfo)
+				gpuInstanceInfos[0] = []deviceinfo.GPUInstanceInfo{
+					testutils.MockGPUInstanceInfo1,
+					testutils.MockGPUInstanceInfo2,
+				}
+
+				ctrl := gomock.NewController(t)
+
+				gOpts := appconfig.DeviceOptions{
+					Flex: true,
+				}
+
+				mockGPUDeviceInfo := testutils.MockGPUDeviceInfo(ctrl, 2, gpuInstanceInfos)
+				mockGPUDeviceInfo.EXPECT().GOpts().Return(gOpts).AnyTimes()
+
+				return mockGPUDeviceInfo
+			},
+			want: []Info{
+				{
+					Entity: dcgm.GroupEntityPair{
+						EntityGroupId: dcgm.FE_GPU_I,
+						EntityId:      testutils.MockGPUInstanceInfo1.EntityId,
+					},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(0),
+					},
+					InstanceInfo: &testutils.MockGPUInstanceInfo1,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+				{
+					Entity: dcgm.GroupEntityPair{
+						EntityGroupId: dcgm.FE_GPU_I,
+						EntityId:      testutils.MockGPUInstanceInfo2.EntityId,
+					},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(0),
+					},
+					InstanceInfo: &testutils.MockGPUInstanceInfo2,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+				{
+					Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: uint(1)},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(1),
+					},
+					InstanceInfo: nil,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+			},
+		},
+		{
+			name: "Switch Count 2, Watched 1",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				watchedSwitches := map[uint]bool{0: false, 1: true}
+				return testutils.MockSwitchDeviceInfo(ctrl, 2, nil, watchedSwitches, nil, dcgm.FE_SWITCH)
+			},
+			want: []Info{
+				{
+					Entity:       dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_SWITCH, EntityId: uint(1)},
+					DeviceInfo:   dcgm.Device{},
+					InstanceInfo: nil,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+			},
+		},
+		{
+			name: "Switch Count 5, Link Count 4, Switch Watched = true, Link Watched = true, link-up = mix",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+
+				switchToNvLinks := make(map[int][]dcgm.NvLinkStatus)
+
+				switchToNvLinks[0] = []dcgm.NvLinkStatus{testutils.MockNVLinkVal1, testutils.MockNVLinkVal2}
+				switchToNvLinks[1] = []dcgm.NvLinkStatus{testutils.MockNVLinkVal1, testutils.MockNVLinkVal2}
+
+				watchedSwitches := map[uint]bool{0: true, 1: true}
+				watchedLinks := map[testutils.WatchedEntityKey]bool{
+					{0, 0}: true,
+					{0, 1}: true,
+					{1, 0}: true,
+					{1, 1}: true,
+				}
+				return testutils.MockSwitchDeviceInfo(ctrl, 5, switchToNvLinks, watchedSwitches, watchedLinks,
+					dcgm.FE_LINK)
+			},
+			want: []Info{
+				{
+					Entity:       dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_LINK, EntityId: uint(1)},
+					DeviceInfo:   dcgm.Device{},
+					InstanceInfo: nil,
+					ParentId:     0,
+				},
+				{
+					Entity:       dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_LINK, EntityId: uint(1)},
+					DeviceInfo:   dcgm.Device{},
+					InstanceInfo: nil,
+					ParentId:     1,
+				},
+			},
+		},
+		{
+			name: "CPU Count 3, watched = mix",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				watchedCPUs := map[uint]bool{0: false, 1: true, 2: false}
+				return testutils.MockCPUDeviceInfo(ctrl, 3, nil, watchedCPUs, nil, dcgm.FE_CPU)
+			},
+			want: []Info{
+				{
+					Entity:       dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_CPU, EntityId: uint(1)},
+					DeviceInfo:   dcgm.Device{},
+					InstanceInfo: nil,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+			},
+		},
+		{
+			name: "CPU Count 2, Core Count 4, CPU Watched = true, Core Watched = mix",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+
+				cpuToCores := make(map[int][]uint)
+				cpuToCores[0] = []uint{0, 1}
+				cpuToCores[1] = []uint{0, 1}
+
+				watchedCPUs := map[uint]bool{0: true, 1: true}
+				watchedCores := map[testutils.WatchedEntityKey]bool{
+					{0, 0}: true,
+					{0, 1}: false,
+					{1, 0}: false,
+					{1, 1}: true,
+				}
+				return testutils.MockCPUDeviceInfo(ctrl, 2, cpuToCores, watchedCPUs, watchedCores, dcgm.FE_CPU_CORE)
+			},
+			want: []Info{
+				{
+					Entity:       dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_CPU_CORE, EntityId: uint(0)},
+					DeviceInfo:   dcgm.Device{},
+					InstanceInfo: nil,
+					ParentId:     0,
+				},
+				{
+					Entity:       dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_CPU_CORE, EntityId: uint(1)},
+					DeviceInfo:   dcgm.Device{},
+					InstanceInfo: nil,
+					ParentId:     1,
+				},
+			},
+		},
+	}
+	for _, tt := range tests {
+		deviceInfo := tt.mockFunc()
+		t.Run(tt.name, func(t *testing.T) {
+			got := GetMonitoredEntities(deviceInfo)
+			assert.Equalf(t, tt.want, got, "Unexpected Output")
+		})
+	}
+}
+
+func Test_monitorAllGPUs(t *testing.T) {
+	tests := []struct {
+		name     string
+		mockFunc func() *mockdeviceinfo.MockProvider
+		want     []Info
+	}{
+		{
+			name: "GPU Count 0",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				return testutils.MockGPUDeviceInfo(ctrl, 0, nil)
+			},
+			want: nil,
+		},
+		{
+			name: "GPU Count 1",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				gpuInstanceInfos := make(map[int][]deviceinfo.GPUInstanceInfo)
+				gpuInstanceInfos[0] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo1}
+
+				ctrl := gomock.NewController(t)
+				return testutils.MockGPUDeviceInfo(ctrl, 1, gpuInstanceInfos)
+			},
+			want: []Info{
+				{
+					Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: uint(0)},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(0),
+					},
+					InstanceInfo: nil,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+			},
+		},
+		{
+			name: "GPU Count 2",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				gpuInstanceInfos := make(map[int][]deviceinfo.GPUInstanceInfo)
+				gpuInstanceInfos[0] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo1}
+				gpuInstanceInfos[1] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo2}
+
+				ctrl := gomock.NewController(t)
+				return testutils.MockGPUDeviceInfo(ctrl, 2, gpuInstanceInfos)
+			},
+			want: []Info{
+				{
+					Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: uint(0)},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(0),
+					},
+					InstanceInfo: nil,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+				{
+					Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: uint(1)},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(1),
+					},
+					InstanceInfo: nil,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			deviceInfo := tt.mockFunc()
+			got := monitorAllGPUs(deviceInfo)
+			assert.Equalf(t, tt.want, got, "Unexpected Output")
+		})
+	}
+}
+
+func Test_monitorAllGPUInstances(t *testing.T) {
+	tests := []struct {
+		name        string
+		mockFunc    func() *mockdeviceinfo.MockProvider
+		addFlexibly bool
+		want        []Info
+	}{
+		{
+			name: "GPU Count 0, addFlexibly true",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				return testutils.MockGPUDeviceInfo(ctrl, 0, nil)
+			},
+			addFlexibly: true,
+			want:        nil,
+		},
+		{
+			name: "GPU Count 0, addFlexibly false",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				return testutils.MockGPUDeviceInfo(ctrl, 0, nil)
+			},
+			addFlexibly: false,
+			want:        nil,
+		},
+		{
+			name: "GPU Count 1, addFlexibly true",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				return testutils.MockGPUDeviceInfo(ctrl, 1, nil)
+			},
+			addFlexibly: true,
+			want: []Info{
+				{
+					Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: uint(0)},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(0),
+					},
+					InstanceInfo: nil,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+			},
+		},
+		{
+			name: "GPU Count 1, addFlexibly false",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				return testutils.MockGPUDeviceInfo(ctrl, 1, nil)
+			},
+			addFlexibly: false,
+			want:        nil,
+		},
+		{
+			name: "GPU Count 2, addFlexibly true",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				return testutils.MockGPUDeviceInfo(ctrl, 2, nil)
+			},
+			addFlexibly: true,
+			want: []Info{
+				{
+					Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: uint(0)},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(0),
+					},
+					InstanceInfo: nil,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+				{
+					Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: uint(1)},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(1),
+					},
+					InstanceInfo: nil,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+			},
+		},
+		{
+			name: "GPU Count 2, addFlexibly false",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				return testutils.MockGPUDeviceInfo(ctrl, 2, nil)
+			},
+			addFlexibly: false,
+			want:        nil,
+		},
+		{
+			name: "GPU Count 1, GPU Instance Count 1",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				gpuInstanceInfos := make(map[int][]deviceinfo.GPUInstanceInfo)
+				gpuInstanceInfos[0] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo1}
+
+				ctrl := gomock.NewController(t)
+				return testutils.MockGPUDeviceInfo(ctrl, 1, gpuInstanceInfos)
+			},
+			addFlexibly: true,
+			want: []Info{
+				{
+					Entity: dcgm.GroupEntityPair{
+						EntityGroupId: dcgm.FE_GPU_I,
+						EntityId:      testutils.MockGPUInstanceInfo1.EntityId,
+					},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(0),
+					},
+					InstanceInfo: &testutils.MockGPUInstanceInfo1,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+			},
+		},
+		{
+			name: "GPU Count 1, GPU Instance Count 2",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				gpuInstanceInfos := make(map[int][]deviceinfo.GPUInstanceInfo)
+				gpuInstanceInfos[0] = []deviceinfo.GPUInstanceInfo{
+					testutils.MockGPUInstanceInfo1,
+					testutils.MockGPUInstanceInfo2,
+				}
+
+				ctrl := gomock.NewController(t)
+				return testutils.MockGPUDeviceInfo(ctrl, 1, gpuInstanceInfos)
+			},
+			addFlexibly: true,
+			want: []Info{
+				{
+					Entity: dcgm.GroupEntityPair{
+						EntityGroupId: dcgm.FE_GPU_I,
+						EntityId:      testutils.MockGPUInstanceInfo1.EntityId,
+					},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(0),
+					},
+					InstanceInfo: &testutils.MockGPUInstanceInfo1,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+				{
+					Entity: dcgm.GroupEntityPair{
+						EntityGroupId: dcgm.FE_GPU_I,
+						EntityId:      testutils.MockGPUInstanceInfo2.EntityId,
+					},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(0),
+					},
+					InstanceInfo: &testutils.MockGPUInstanceInfo2,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+			},
+		},
+		{
+			name: "GPU Count 2, GPU Instance Count 1 each",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				gpuInstanceInfos := make(map[int][]deviceinfo.GPUInstanceInfo)
+				gpuInstanceInfos[0] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo1}
+				gpuInstanceInfos[1] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo2}
+
+				ctrl := gomock.NewController(t)
+				return testutils.MockGPUDeviceInfo(ctrl, 2, gpuInstanceInfos)
+			},
+			addFlexibly: true,
+			want: []Info{
+				{
+					Entity: dcgm.GroupEntityPair{
+						EntityGroupId: dcgm.FE_GPU_I,
+						EntityId:      testutils.MockGPUInstanceInfo1.EntityId,
+					},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(0),
+					},
+					InstanceInfo: &testutils.MockGPUInstanceInfo1,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+				{
+					Entity: dcgm.GroupEntityPair{
+						EntityGroupId: dcgm.FE_GPU_I,
+						EntityId:      testutils.MockGPUInstanceInfo2.EntityId,
+					},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(1),
+					},
+					InstanceInfo: &testutils.MockGPUInstanceInfo2,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+			},
+		},
+		{
+			name: "GPU Count 2, GPU Instance Count 2 and 0, addFlexibly true",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				gpuInstanceInfos := make(map[int][]deviceinfo.GPUInstanceInfo)
+				gpuInstanceInfos[0] = []deviceinfo.GPUInstanceInfo{
+					testutils.MockGPUInstanceInfo1,
+					testutils.MockGPUInstanceInfo2,
+				}
+
+				ctrl := gomock.NewController(t)
+				return testutils.MockGPUDeviceInfo(ctrl, 2, gpuInstanceInfos)
+			},
+			addFlexibly: true,
+			want: []Info{
+				{
+					Entity: dcgm.GroupEntityPair{
+						EntityGroupId: dcgm.FE_GPU_I,
+						EntityId:      testutils.MockGPUInstanceInfo1.EntityId,
+					},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(0),
+					},
+					InstanceInfo: &testutils.MockGPUInstanceInfo1,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+				{
+					Entity: dcgm.GroupEntityPair{
+						EntityGroupId: dcgm.FE_GPU_I,
+						EntityId:      testutils.MockGPUInstanceInfo2.EntityId,
+					},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(0),
+					},
+					InstanceInfo: &testutils.MockGPUInstanceInfo2,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+				{
+					Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: uint(1)},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(1),
+					},
+					InstanceInfo: nil,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+			},
+		},
+		{
+			name: "GPU Count 2, GPU Instance Count 2 and 0, addFlexibly false",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				gpuInstanceInfos := make(map[int][]deviceinfo.GPUInstanceInfo)
+				gpuInstanceInfos[0] = []deviceinfo.GPUInstanceInfo{
+					testutils.MockGPUInstanceInfo1,
+					testutils.MockGPUInstanceInfo2,
+				}
+
+				ctrl := gomock.NewController(t)
+				return testutils.MockGPUDeviceInfo(ctrl, 2, gpuInstanceInfos)
+			},
+			addFlexibly: false,
+			want: []Info{
+				{
+					Entity: dcgm.GroupEntityPair{
+						EntityGroupId: dcgm.FE_GPU_I,
+						EntityId:      testutils.MockGPUInstanceInfo1.EntityId,
+					},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(0),
+					},
+					InstanceInfo: &testutils.MockGPUInstanceInfo1,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+				{
+					Entity: dcgm.GroupEntityPair{
+						EntityGroupId: dcgm.FE_GPU_I,
+						EntityId:      testutils.MockGPUInstanceInfo2.EntityId,
+					},
+					DeviceInfo: dcgm.Device{
+						GPU: uint(0),
+					},
+					InstanceInfo: &testutils.MockGPUInstanceInfo2,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			deviceInfo := tt.mockFunc()
+			got := monitorAllGPUInstances(deviceInfo, tt.addFlexibly)
+			assert.Equalf(t, tt.want, got, "Unexpected Output")
+		})
+	}
+}
+
+func Test_monitorAllSwitches(t *testing.T) {
+	tests := []struct {
+		name     string
+		mockFunc func() *mockdeviceinfo.MockProvider
+		want     []Info
+	}{
+		{
+			name: "Switch Count 0",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				return testutils.MockSwitchDeviceInfo(ctrl, 0, nil, nil, nil, dcgm.FE_SWITCH)
+			},
+			want: nil,
+		},
+		{
+			name: "Switch Count 1, watched = true",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				watchedSwitches := map[uint]bool{0: true}
+				return testutils.MockSwitchDeviceInfo(ctrl, 1, nil, watchedSwitches, nil, dcgm.FE_SWITCH)
+			},
+			want: []Info{
+				{
+					Entity:       dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_SWITCH, EntityId: uint(0)},
+					DeviceInfo:   dcgm.Device{},
+					InstanceInfo: nil,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+			},
+		},
+		{
+			name: "Switch Count 1, watched = false",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				watchedSwitches := map[uint]bool{0: false}
+				return testutils.MockSwitchDeviceInfo(ctrl, 1, nil, watchedSwitches, nil, dcgm.FE_SWITCH)
+			},
+			want: nil,
+		},
+		{
+			name: "Switch Count 2, watched = true",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				watchedSwitches := map[uint]bool{0: true, 1: true}
+				return testutils.MockSwitchDeviceInfo(ctrl, 2, nil, watchedSwitches, nil, dcgm.FE_SWITCH)
+			},
+			want: []Info{
+				{
+					Entity:       dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_SWITCH, EntityId: uint(0)},
+					DeviceInfo:   dcgm.Device{},
+					InstanceInfo: nil,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+				{
+					Entity:       dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_SWITCH, EntityId: uint(1)},
+					DeviceInfo:   dcgm.Device{},
+					InstanceInfo: nil,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+			},
+		},
+		{
+			name: "Switch Count 2, watched = false",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				watchedSwitches := map[uint]bool{0: false, 1: false}
+				return testutils.MockSwitchDeviceInfo(ctrl, 2, nil, watchedSwitches, nil, dcgm.FE_SWITCH)
+			},
+			want: nil,
+		},
+		{
+			name: "Switch Count 3, watched = mix",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				watchedSwitches := map[uint]bool{0: false, 1: true, 2: false}
+				return testutils.MockSwitchDeviceInfo(ctrl, 3, nil, watchedSwitches, nil, dcgm.FE_SWITCH)
+			},
+			want: []Info{
+				{
+					Entity:       dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_SWITCH, EntityId: uint(1)},
+					DeviceInfo:   dcgm.Device{},
+					InstanceInfo: nil,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+			},
+		},
+	}
+	for _, tt := range tests {
+		deviceInfo := tt.mockFunc()
+		t.Run(tt.name, func(t *testing.T) {
+			got := monitorAllSwitches(deviceInfo)
+			assert.Equalf(t, tt.want, got, "Unexpected Output")
+		})
+	}
+}
+
+func Test_monitorAllLinks(t *testing.T) {
+	tests := []struct {
+		name     string
+		mockFunc func() *mockdeviceinfo.MockProvider
+		want     []Info
+	}{
+		{
+			name: "Switch Count 0",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				return testutils.MockSwitchDeviceInfo(ctrl, 0, nil, nil, nil, dcgm.FE_SWITCH)
+			},
+			want: nil,
+		},
+		{
+			name: "Switch Count 2, Link Count 0",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				watchedSwitches := map[uint]bool{0: true, 1: true}
+				return testutils.MockSwitchDeviceInfo(ctrl, 2, nil, watchedSwitches, nil, dcgm.FE_SWITCH)
+			},
+			want: nil,
+		},
+		{
+			name: "Switch Count 1, Link Count 1, Switch Watched = true, Link Watched = true, Link Up = true",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+
+				switchToNvLinks := make(map[int][]dcgm.NvLinkStatus)
+				switchToNvLinks[0] = []dcgm.NvLinkStatus{testutils.MockNVLinkVal2}
+
+				watchedSwitches := map[uint]bool{0: true}
+				watchedLinks := map[testutils.WatchedEntityKey]bool{
+					{0, 1}: true,
+				}
+				return testutils.MockSwitchDeviceInfo(ctrl, 1, switchToNvLinks, watchedSwitches, watchedLinks,
+					dcgm.FE_LINK)
+			},
+			want: []Info{
+				{
+					Entity:       dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_LINK, EntityId: uint(1)},
+					DeviceInfo:   dcgm.Device{},
+					InstanceInfo: nil,
+					ParentId:     0,
+				},
+			},
+		},
+		{
+			name: "Switch Count 1, Link Count 1, Switch Watched = false, Link Watched = true, Link Up = true",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+
+				switchToNvLinks := make(map[int][]dcgm.NvLinkStatus)
+				switchToNvLinks[0] = []dcgm.NvLinkStatus{testutils.MockNVLinkVal2}
+
+				watchedSwitches := map[uint]bool{0: false}
+				watchedLinks := map[testutils.WatchedEntityKey]bool{
+					{0, 1}: true,
+				}
+				return testutils.MockSwitchDeviceInfo(ctrl, 1, switchToNvLinks, watchedSwitches, watchedLinks,
+					dcgm.FE_LINK)
+			},
+			want: nil,
+		},
+		{
+			name: "Switch Count 1, Link Count 1, Switch Watched = true, Link Watched = false, Link Up = true",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+
+				switchToNvLinks := make(map[int][]dcgm.NvLinkStatus)
+				switchToNvLinks[0] = []dcgm.NvLinkStatus{testutils.MockNVLinkVal2}
+
+				watchedSwitches := map[uint]bool{0: true}
+				watchedLinks := map[testutils.WatchedEntityKey]bool{
+					{0, 1}: false,
+				}
+				return testutils.MockSwitchDeviceInfo(ctrl, 1, switchToNvLinks, watchedSwitches, watchedLinks,
+					dcgm.FE_LINK)
+			},
+			want: nil,
+		},
+		{
+			name: "Switch Count 1, Link Count 1, Switch Watched = true, Link Watched = true, Link Up = false",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+
+				switchToNvLinks := make(map[int][]dcgm.NvLinkStatus)
+				switchToNvLinks[0] = []dcgm.NvLinkStatus{testutils.MockNVLinkVal1}
+
+				watchedSwitches := map[uint]bool{0: true}
+				watchedLinks := map[testutils.WatchedEntityKey]bool{
+					{0, 0}: true,
+				}
+				return testutils.MockSwitchDeviceInfo(ctrl, 1, switchToNvLinks, watchedSwitches, watchedLinks,
+					dcgm.FE_LINK)
+			},
+			want: nil,
+		},
+		{
+			name: "Switch Count 2, Link Count 2, Switch Watched = true, Link Watched = true, Link Up = true",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+
+				switchToNvLinks := make(map[int][]dcgm.NvLinkStatus)
+
+				switchToNvLinks[0] = []dcgm.NvLinkStatus{testutils.MockNVLinkVal2}
+				switchToNvLinks[1] = []dcgm.NvLinkStatus{testutils.MockNVLinkVal2}
+
+				watchedSwitches := map[uint]bool{0: true, 1: true}
+				watchedLinks := map[testutils.WatchedEntityKey]bool{
+					{0, 1}: true,
+					{1, 1}: true,
+				}
+				return testutils.MockSwitchDeviceInfo(ctrl, 2, switchToNvLinks, watchedSwitches, watchedLinks,
+					dcgm.FE_LINK)
+			},
+			want: []Info{
+				{
+					Entity:       dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_LINK, EntityId: uint(1)},
+					DeviceInfo:   dcgm.Device{},
+					InstanceInfo: nil,
+					ParentId:     0,
+				},
+				{
+					Entity:       dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_LINK, EntityId: uint(1)},
+					DeviceInfo:   dcgm.Device{},
+					InstanceInfo: nil,
+					ParentId:     1,
+				},
+			},
+		},
+		{
+			name: "Switch Count 5, Link Count 4, Switch Watched = true, Link Watched = mix, Link Up = mix",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+
+				switchToNvLinks := make(map[int][]dcgm.NvLinkStatus)
+
+				switchToNvLinks[0] = []dcgm.NvLinkStatus{testutils.MockNVLinkVal1, testutils.MockNVLinkVal2}
+				switchToNvLinks[1] = []dcgm.NvLinkStatus{testutils.MockNVLinkVal1, testutils.MockNVLinkVal2}
+
+				watchedSwitches := map[uint]bool{0: true, 1: true}
+				watchedLinks := map[testutils.WatchedEntityKey]bool{
+					{0, 0}: true,
+					{0, 1}: false,
+					{1, 0}: true,
+					{1, 1}: true,
+				}
+				return testutils.MockSwitchDeviceInfo(ctrl, 5, switchToNvLinks, watchedSwitches, watchedLinks,
+					dcgm.FE_LINK)
+			},
+			want: []Info{
+				{
+					Entity:       dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_LINK, EntityId: uint(1)},
+					DeviceInfo:   dcgm.Device{},
+					InstanceInfo: nil,
+					ParentId:     1,
+				},
+			},
+		},
+	}
+	for _, tt := range tests {
+		deviceInfo := tt.mockFunc()
+		t.Run(tt.name, func(t *testing.T) {
+			got := monitorAllLinks(deviceInfo)
+			assert.Equalf(t, tt.want, got, "Unexpected Output")
+		})
+	}
+}
+
+func Test_monitorAllCPUs(t *testing.T) {
+	tests := []struct {
+		name     string
+		mockFunc func() *mockdeviceinfo.MockProvider
+		want     []Info
+	}{
+		{
+			name: "CPU Count 0",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				return testutils.MockCPUDeviceInfo(ctrl, 0, nil, nil, nil, dcgm.FE_CPU)
+			},
+			want: nil,
+		},
+		{
+			name: "CPU Count 1, watched = true",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				watchedCPUs := map[uint]bool{0: true}
+				return testutils.MockCPUDeviceInfo(ctrl, 1, nil, watchedCPUs, nil, dcgm.FE_CPU)
+			},
+			want: []Info{
+				{
+					Entity:       dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_CPU, EntityId: uint(0)},
+					DeviceInfo:   dcgm.Device{},
+					InstanceInfo: nil,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+			},
+		},
+		{
+			name: "CPU Count 1, watched = false",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				watchedCPUs := map[uint]bool{0: false}
+				return testutils.MockCPUDeviceInfo(ctrl, 1, nil, watchedCPUs, nil, dcgm.FE_CPU)
+			},
+			want: nil,
+		},
+		{
+			name: "CPU Count 2, watched = true",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				watchedCPUs := map[uint]bool{0: true, 1: true}
+				return testutils.MockCPUDeviceInfo(ctrl, 2, nil, watchedCPUs, nil, dcgm.FE_CPU)
+			},
+			want: []Info{
+				{
+					Entity:       dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_CPU, EntityId: uint(0)},
+					DeviceInfo:   dcgm.Device{},
+					InstanceInfo: nil,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+				{
+					Entity:       dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_CPU, EntityId: uint(1)},
+					DeviceInfo:   dcgm.Device{},
+					InstanceInfo: nil,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+			},
+		},
+		{
+			name: "CPU Count 2, watched = false",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				watchedCPUs := map[uint]bool{0: false, 1: false}
+				return testutils.MockCPUDeviceInfo(ctrl, 2, nil, watchedCPUs, nil, dcgm.FE_CPU)
+			},
+			want: nil,
+		},
+		{
+			name: "Switch Count 3, watched = mix",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				watchedCPUs := map[uint]bool{0: false, 1: true, 2: false}
+				return testutils.MockCPUDeviceInfo(ctrl, 3, nil, watchedCPUs, nil, dcgm.FE_CPU)
+			},
+			want: []Info{
+				{
+					Entity:       dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_CPU, EntityId: uint(1)},
+					DeviceInfo:   dcgm.Device{},
+					InstanceInfo: nil,
+					ParentId:     PARENT_ID_IGNORED,
+				},
+			},
+		},
+	}
+	for _, tt := range tests {
+		deviceInfo := tt.mockFunc()
+		t.Run(tt.name, func(t *testing.T) {
+			got := monitorAllCPUs(deviceInfo)
+			assert.Equalf(t, tt.want, got, "Unexpected Output")
+		})
+	}
+}
+
+func Test_monitorAllCPUCores(t *testing.T) {
+	tests := []struct {
+		name     string
+		mockFunc func() *mockdeviceinfo.MockProvider
+		want     []Info
+	}{
+		{
+			name: "CPU Count 0",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				return testutils.MockCPUDeviceInfo(ctrl, 0, nil, nil, nil, dcgm.FE_CPU_CORE)
+			},
+			want: nil,
+		},
+		{
+			name: "CPU Count 2, Core Count 0",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				watchedCPUs := map[uint]bool{0: true, 1: true}
+				return testutils.MockCPUDeviceInfo(ctrl, 2, nil, watchedCPUs, nil, dcgm.FE_CPU_CORE)
+			},
+			want: nil,
+		},
+		{
+			name: "CPU Count 1, Core Count 1, CPU Watched = true, Core Watched = true",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+
+				cpuToCores := make(map[int][]uint)
+				cpuToCores[0] = []uint{1}
+
+				watchedCPUs := map[uint]bool{0: true}
+				watchedCores := map[testutils.WatchedEntityKey]bool{
+					{0, 1}: true,
+				}
+				return testutils.MockCPUDeviceInfo(ctrl, 1, cpuToCores, watchedCPUs, watchedCores, dcgm.FE_CPU_CORE)
+			},
+			want: []Info{
+				{
+					Entity:       dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_CPU_CORE, EntityId: uint(1)},
+					DeviceInfo:   dcgm.Device{},
+					InstanceInfo: nil,
+					ParentId:     0,
+				},
+			},
+		},
+		{
+			name: "CPU Count 1, Core Count 1, CPU Watched = false, Core Watched = true",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+
+				cpuToCores := make(map[int][]uint)
+				cpuToCores[0] = []uint{1}
+
+				watchedCPUs := map[uint]bool{0: false}
+				watchedCores := map[testutils.WatchedEntityKey]bool{
+					{0, 1}: true,
+				}
+				return testutils.MockCPUDeviceInfo(ctrl, 1, cpuToCores, watchedCPUs, watchedCores, dcgm.FE_CPU_CORE)
+			},
+			want: nil,
+		},
+		{
+			name: "CPU Count 1, Core Count 1, CPU Watched = true, Core Watched = false",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+
+				cpuToCores := make(map[int][]uint)
+				cpuToCores[0] = []uint{1}
+
+				watchedCPUs := map[uint]bool{0: true}
+				watchedCores := map[testutils.WatchedEntityKey]bool{
+					{0, 1}: false,
+				}
+				return testutils.MockCPUDeviceInfo(ctrl, 1, cpuToCores, watchedCPUs, watchedCores, dcgm.FE_CPU_CORE)
+			},
+			want: nil,
+		},
+		{
+			name: "CPU Count 2, Core Count 4, CPU Watched = true, Core Watched = true",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+
+				cpuToCores := make(map[int][]uint)
+				cpuToCores[0] = []uint{0, 1}
+				cpuToCores[1] = []uint{0, 1}
+
+				watchedCPUs := map[uint]bool{0: true, 1: true}
+				watchedCores := map[testutils.WatchedEntityKey]bool{
+					{0, 0}: true,
+					{0, 1}: true,
+					{1, 0}: true,
+					{1, 1}: true,
+				}
+				return testutils.MockCPUDeviceInfo(ctrl, 2, cpuToCores, watchedCPUs, watchedCores, dcgm.FE_CPU_CORE)
+			},
+			want: []Info{
+				{
+					Entity:       dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_CPU_CORE, EntityId: uint(0)},
+					DeviceInfo:   dcgm.Device{},
+					InstanceInfo: nil,
+					ParentId:     0,
+				},
+				{
+					Entity:       dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_CPU_CORE, EntityId: uint(1)},
+					DeviceInfo:   dcgm.Device{},
+					InstanceInfo: nil,
+					ParentId:     0,
+				},
+				{
+					Entity:       dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_CPU_CORE, EntityId: uint(0)},
+					DeviceInfo:   dcgm.Device{},
+					InstanceInfo: nil,
+					ParentId:     1,
+				},
+				{
+					Entity:       dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_CPU_CORE, EntityId: uint(1)},
+					DeviceInfo:   dcgm.Device{},
+					InstanceInfo: nil,
+					ParentId:     1,
+				},
+			},
+		},
+		{
+			name: "CPU Count 2, Core Count 4, CPU Watched = true, Core Watched = mix",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+
+				cpuToCores := make(map[int][]uint)
+				cpuToCores[0] = []uint{0, 1}
+				cpuToCores[1] = []uint{0, 1}
+
+				watchedCPUs := map[uint]bool{0: true, 1: true}
+				watchedCores := map[testutils.WatchedEntityKey]bool{
+					{0, 0}: true,
+					{0, 1}: false,
+					{1, 0}: false,
+					{1, 1}: true,
+				}
+				return testutils.MockCPUDeviceInfo(ctrl, 2, cpuToCores, watchedCPUs, watchedCores, dcgm.FE_CPU_CORE)
+			},
+			want: []Info{
+				{
+					Entity:       dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_CPU_CORE, EntityId: uint(0)},
+					DeviceInfo:   dcgm.Device{},
+					InstanceInfo: nil,
+					ParentId:     0,
+				},
+				{
+					Entity:       dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_CPU_CORE, EntityId: uint(1)},
+					DeviceInfo:   dcgm.Device{},
+					InstanceInfo: nil,
+					ParentId:     1,
+				},
+			},
+		},
+	}
+	for _, tt := range tests {
+		deviceInfo := tt.mockFunc()
+		t.Run(tt.name, func(t *testing.T) {
+			got := monitorAllCPUCores(deviceInfo)
+			assert.Equalf(t, tt.want, got, "Unexpected Output")
+		})
+	}
+}
+
+func Test_monitorGPU(t *testing.T) {
+	tests := []struct {
+		name     string
+		mockFunc func() *mockdeviceinfo.MockProvider
+		gpuID    int
+		want     *Info
+	}{
+		{
+			name: "GPU Count 0",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				return testutils.MockGPUDeviceInfo(ctrl, 0, nil)
+			},
+			gpuID: 0,
+			want:  nil,
+		},
+		{
+			name: "GPU Count 1",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				gpuInstanceInfos := make(map[int][]deviceinfo.GPUInstanceInfo)
+				gpuInstanceInfos[0] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo1}
+
+				ctrl := gomock.NewController(t)
+				return testutils.MockGPUDeviceInfo(ctrl, 1, gpuInstanceInfos)
+			},
+			gpuID: 0,
+			want: &Info{
+				Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: uint(0)},
+				DeviceInfo: dcgm.Device{
+					GPU: uint(0),
+				},
+				InstanceInfo: nil,
+				ParentId:     PARENT_ID_IGNORED,
+			},
+		},
+		{
+			name: "GPU Count 1, gpuID mismatch",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				gpuInstanceInfos := make(map[int][]deviceinfo.GPUInstanceInfo)
+				gpuInstanceInfos[0] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo1}
+
+				ctrl := gomock.NewController(t)
+				return testutils.MockGPUDeviceInfo(ctrl, 1, gpuInstanceInfos)
+			},
+			gpuID: 1000,
+			want:  nil,
+		},
+		{
+			name: "GPU Count 2, one GPU ID match",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				gpuInstanceInfos := make(map[int][]deviceinfo.GPUInstanceInfo)
+				gpuInstanceInfos[0] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo1}
+				gpuInstanceInfos[1] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo2}
+
+				ctrl := gomock.NewController(t)
+				return testutils.MockGPUDeviceInfo(ctrl, 2, gpuInstanceInfos)
+			},
+			gpuID: 1,
+			want: &Info{
+				Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: uint(1)},
+				DeviceInfo: dcgm.Device{
+					GPU: uint(1),
+				},
+				InstanceInfo: nil,
+				ParentId:     PARENT_ID_IGNORED,
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		deviceInfo := tt.mockFunc()
+		t.Run(tt.name, func(t *testing.T) {
+			got := monitorGPU(deviceInfo, tt.gpuID)
+			assert.Equalf(t, tt.want, got, "Unexpected Output")
+		})
+	}
+}
+
+func Test_monitorGPUInstance(t *testing.T) {
+	tests := []struct {
+		name          string
+		mockFunc      func() *mockdeviceinfo.MockProvider
+		gpuInstanceID int
+		want          *Info
+	}{
+		{
+			name: "GPU Count 0, addFlexibly true",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				return testutils.MockGPUDeviceInfo(ctrl, 0, nil)
+			},
+			gpuInstanceID: 0,
+			want:          nil,
+		},
+		{
+			name: "GPU Count 1, GPU Instance Count 0",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				return testutils.MockGPUDeviceInfo(ctrl, 1, nil)
+			},
+			gpuInstanceID: 0,
+			want:          nil,
+		},
+		{
+			name: "GPU Count 2, GPU Instance Count 0",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				ctrl := gomock.NewController(t)
+				return testutils.MockGPUDeviceInfo(ctrl, 2, nil)
+			},
+			gpuInstanceID: 0,
+			want:          nil,
+		},
+		{
+			name: "GPU Count 1, GPU Instance Count 1",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				gpuInstanceInfos := make(map[int][]deviceinfo.GPUInstanceInfo)
+				gpuInstanceInfos[0] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo1}
+
+				ctrl := gomock.NewController(t)
+				return testutils.MockGPUDeviceInfo(ctrl, 1, gpuInstanceInfos)
+			},
+			gpuInstanceID: 0,
+			want: &Info{
+				Entity: dcgm.GroupEntityPair{
+					EntityGroupId: dcgm.FE_GPU_I,
+					EntityId:      testutils.MockGPUInstanceInfo1.EntityId,
+				},
+				DeviceInfo: dcgm.Device{
+					GPU: uint(0),
+				},
+				InstanceInfo: &testutils.MockGPUInstanceInfo1,
+				ParentId:     PARENT_ID_IGNORED,
+			},
+		},
+		{
+			name: "GPU Count 1, GPU Instance Count 1, GPU Instance ID mismatch",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				gpuInstanceInfos := make(map[int][]deviceinfo.GPUInstanceInfo)
+				gpuInstanceInfos[0] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo1}
+
+				ctrl := gomock.NewController(t)
+				return testutils.MockGPUDeviceInfo(ctrl, 1, gpuInstanceInfos)
+			},
+			gpuInstanceID: 1000,
+			want:          nil,
+		},
+		{
+			name: "GPU Count 1, GPU Instance Count 2, one match",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				gpuInstanceInfos := make(map[int][]deviceinfo.GPUInstanceInfo)
+				gpuInstanceInfos[0] = []deviceinfo.GPUInstanceInfo{
+					testutils.MockGPUInstanceInfo1,
+					testutils.MockGPUInstanceInfo2,
+				}
+
+				ctrl := gomock.NewController(t)
+				return testutils.MockGPUDeviceInfo(ctrl, 1, gpuInstanceInfos)
+			},
+			gpuInstanceID: 14,
+			want: &Info{
+				Entity: dcgm.GroupEntityPair{
+					EntityGroupId: dcgm.FE_GPU_I,
+					EntityId:      testutils.MockGPUInstanceInfo2.EntityId,
+				},
+				DeviceInfo: dcgm.Device{
+					GPU: uint(0),
+				},
+				InstanceInfo: &testutils.MockGPUInstanceInfo2,
+				ParentId:     PARENT_ID_IGNORED,
+			},
+		},
+		{
+			name: "GPU Count 2, GPU Instance Count 1 each, one match",
+			mockFunc: func() *mockdeviceinfo.MockProvider {
+				gpuInstanceInfos := make(map[int][]deviceinfo.GPUInstanceInfo)
+				gpuInstanceInfos[0] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo1}
+				gpuInstanceInfos[1] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo2}
+
+				ctrl := gomock.NewController(t)
+				return testutils.MockGPUDeviceInfo(ctrl, 2, gpuInstanceInfos)
+			},
+			gpuInstanceID: 14,
+			want: &Info{
+				Entity: dcgm.GroupEntityPair{
+					EntityGroupId: dcgm.FE_GPU_I,
+					EntityId:      testutils.MockGPUInstanceInfo2.EntityId,
+				},
+				DeviceInfo: dcgm.Device{
+					GPU: uint(1),
+				},
+				InstanceInfo: &testutils.MockGPUInstanceInfo2,
+				ParentId:     PARENT_ID_IGNORED,
+			},
+		},
+	}
+	for _, tt := range tests {
+		deviceInfo := tt.mockFunc()
+		t.Run(tt.name, func(t *testing.T) {
+			got := monitorGPUInstance(deviceInfo, tt.gpuInstanceID)
+			assert.Equalf(t, tt.want, got, "Unexpected Output")
+		})
+	}
+}
diff --git a/pkg/dcgmexporter/test_utils.go b/internal/pkg/devicemonitoring/types.go
similarity index 60%
rename from pkg/dcgmexporter/test_utils.go
rename to internal/pkg/devicemonitoring/types.go
index 6c13aeaf..eb94c8dc 100644
--- a/pkg/dcgmexporter/test_utils.go
+++ b/internal/pkg/devicemonitoring/types.go
@@ -14,29 +14,17 @@
  * limitations under the License.
  */
 
-package dcgmexporter
+package devicemonitoring
 
 import (
-	"testing"
-
 	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
-	"github.com/stretchr/testify/assert"
-)
-
-func setupTest(t *testing.T) func(t *testing.T) {
-	cleanup, err := dcgm.Init(dcgm.Embedded)
-	assert.NoError(t, err)
 
-	return func(t *testing.T) {
-		defer cleanup()
-	}
-}
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/deviceinfo"
+)
 
-func runOnlyWithLiveGPUs(t *testing.T) {
-	t.Helper()
-	gpus, err := dcgm.GetSupportedDevices()
-	assert.NoError(t, err)
-	if len(gpus) < 1 {
-		t.Skip("Skipping test that requires live GPUs. None were found")
-	}
+type Info struct {
+	Entity       dcgm.GroupEntityPair
+	DeviceInfo   dcgm.Device
+	InstanceInfo *deviceinfo.GPUInstanceInfo
+	ParentId     uint
 }
diff --git a/internal/pkg/devicewatcher/const.go b/internal/pkg/devicewatcher/const.go
new file mode 100644
index 00000000..a6a1d994
--- /dev/null
+++ b/internal/pkg/devicewatcher/const.go
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package devicewatcher
+
+const (
+	DCGM_ST_NOT_CONFIGURED = "Setting not configured"
+
+	maxKeepAge     = 600.0 // How long to keep data for this field in seconds
+	maxKeepSamples = 0   // Maximum number of samples to keep. 0=no limit
+)
diff --git a/internal/pkg/devicewatcher/device_watcher.go b/internal/pkg/devicewatcher/device_watcher.go
new file mode 100644
index 00000000..56d78b07
--- /dev/null
+++ b/internal/pkg/devicewatcher/device_watcher.go
@@ -0,0 +1,295 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package devicewatcher
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"strings"
+
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/dcgmprovider"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/deviceinfo"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicemonitoring"
+	. "github.com/NVIDIA/dcgm-exporter/internal/pkg/logging"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/utils"
+)
+
+type DeviceWatcher struct{}
+
+func NewDeviceWatcher() *DeviceWatcher {
+	return &DeviceWatcher{}
+}
+
+func (d *DeviceWatcher) GetDeviceFields(counters []counters.Counter, entityType dcgm.Field_Entity_Group) []dcgm.Short {
+	var deviceFields []dcgm.Short
+	for _, counter := range counters {
+		fieldMeta := dcgmprovider.Client().FieldGetById(counter.FieldID)
+
+		if shouldIncludeField(entityType, fieldMeta.EntityLevel) {
+			deviceFields = append(deviceFields, counter.FieldID)
+		}
+	}
+
+	return deviceFields
+}
+
+func shouldIncludeField(entityType, fieldLevel dcgm.Field_Entity_Group) bool {
+	if fieldLevel == entityType || fieldLevel == dcgm.FE_NONE {
+		return true
+	}
+
+	switch entityType {
+	case dcgm.FE_GPU:
+		return fieldLevel == dcgm.FE_GPU_CI || fieldLevel == dcgm.FE_GPU_I || fieldLevel == dcgm.FE_VGPU
+	case dcgm.FE_CPU:
+		return fieldLevel == dcgm.FE_CPU_CORE
+	case dcgm.FE_SWITCH:
+		return fieldLevel == dcgm.FE_LINK
+	default:
+		return false
+	}
+}
+
+func (d *DeviceWatcher) WatchDeviceFields(
+	deviceFields []dcgm.Short, deviceInfo deviceinfo.Provider, updateFreqInUsec int64,
+) ([]dcgm.GroupHandle, dcgm.FieldHandle, []func(), error) {
+	var err error
+	var cleanups []func()
+	var groups []dcgm.GroupHandle
+
+	switch deviceInfo.InfoType() {
+	case dcgm.FE_LINK:
+		// This handles NV link case only.
+		groups, cleanups, err = d.createNVLinkGroups(deviceInfo)
+	case dcgm.FE_CPU_CORE:
+		// This handles CPU Core case only.
+		groups, cleanups, err = d.createCPUCoreGroups(deviceInfo)
+	default:
+		// This handles GPUs (including GPU Instances), CPUs and Switches cases.
+		groups, cleanups, err = d.createGroups(deviceInfo)
+	}
+	if err != nil {
+		return nil, dcgm.FieldHandle{}, utils.CleanupOnError(cleanups), err
+	} else if len(groups) == 0 {
+		return nil, dcgm.FieldHandle{}, cleanups, nil
+	}
+
+	fieldGroup, cleanup, fieldGroupErr := newFieldGroup(deviceFields)
+	if fieldGroupErr != nil {
+		return nil, dcgm.FieldHandle{}, utils.CleanupOnError(cleanups), fieldGroupErr
+	}
+	cleanups = append(cleanups, cleanup)
+
+	for _, group := range groups {
+		err = watchFieldGroup(group, fieldGroup, updateFreqInUsec)
+		if err != nil {
+			return nil, dcgm.FieldHandle{}, utils.CleanupOnError(cleanups), err
+		}
+	}
+
+	return groups, fieldGroup, cleanups, nil
+}
+
+func (d *DeviceWatcher) createGroups(deviceInfo deviceinfo.Provider) ([]dcgm.GroupHandle, []func(),
+	error,
+) {
+	if group, cleanup, err := d.createGenericGroup(deviceInfo); err != nil {
+		return []dcgm.GroupHandle{}, []func(){cleanup}, err
+	} else if group != nil {
+		return []dcgm.GroupHandle{*group}, []func(){cleanup}, nil
+	}
+
+	return []dcgm.GroupHandle{}, []func(){}, nil
+}
+
+func (d *DeviceWatcher) createGenericGroup(deviceInfo deviceinfo.Provider) (*dcgm.GroupHandle, func(),
+	error,
+) {
+	monitoringInfo := devicemonitoring.GetMonitoredEntities(deviceInfo)
+	if len(monitoringInfo) == 0 {
+		return nil, doNothing, nil
+	}
+
+	groupID, cleanup, err := createGroup()
+	if err != nil {
+		return nil, cleanup, err
+	}
+
+	for _, mi := range monitoringInfo {
+		err := dcgmprovider.Client().AddEntityToGroup(groupID, mi.Entity.EntityGroupId, mi.Entity.EntityId)
+		if err != nil {
+			return &groupID, cleanup, err
+		}
+	}
+
+	return &groupID, cleanup, nil
+}
+
+func (d *DeviceWatcher) createCPUCoreGroups(deviceInfo deviceinfo.Provider) ([]dcgm.GroupHandle, []func(),
+	error,
+) {
+	var groups []dcgm.GroupHandle
+	var cleanups []func()
+	var err error
+
+	for _, cpu := range deviceInfo.CPUs() {
+		if !deviceInfo.IsCPUWatched(cpu.EntityId) {
+			continue
+		}
+
+		var groupCoreCount int
+		var groupID dcgm.GroupHandle
+		for _, core := range cpu.Cores {
+			if !deviceInfo.IsCoreWatched(core, cpu.EntityId) {
+				continue
+			}
+
+			// Create per-cpu core groups or after max number of CPU cores have been added to current group
+			if groupCoreCount%dcgm.DCGM_GROUP_MAX_ENTITIES == 0 {
+				var cleanup func()
+
+				groupID, cleanup, err = createGroup()
+				if err != nil {
+					return nil, cleanups, err
+				}
+
+				cleanups = append(cleanups, cleanup)
+				groups = append(groups, groupID)
+			}
+
+			groupCoreCount++
+
+			err = dcgmprovider.Client().AddEntityToGroup(groupID, dcgm.FE_CPU_CORE, core)
+			if err != nil {
+				return groups, cleanups, err
+			}
+		}
+	}
+
+	return groups, cleanups, nil
+}
+
+func (d *DeviceWatcher) createNVLinkGroups(deviceInfo deviceinfo.Provider) ([]dcgm.GroupHandle, []func(),
+	error,
+) {
+	var groups []dcgm.GroupHandle
+	var cleanups []func()
+	var err error
+
+	/* Create per-switch link groups */
+	for _, sw := range deviceInfo.Switches() {
+		if !deviceInfo.IsSwitchWatched(sw.EntityId) {
+			continue
+		}
+
+		var groupLinkCount int
+		var groupID dcgm.GroupHandle
+		for _, link := range sw.NvLinks {
+			if link.State != dcgm.LS_UP {
+				continue
+			}
+
+			if !deviceInfo.IsLinkWatched(link.Index, sw.EntityId) {
+				continue
+			}
+
+			// Create per-switch link groups
+			if groupLinkCount == 0 {
+				var cleanup func()
+
+				groupID, cleanup, err = createGroup()
+				if err != nil {
+					return nil, cleanups, err
+				}
+
+				cleanups = append(cleanups, cleanup)
+				groups = append(groups, groupID)
+			}
+
+			groupLinkCount++
+
+			err = dcgmprovider.Client().AddLinkEntityToGroup(groupID, link.Index, link.ParentId)
+			if err != nil {
+				return groups, cleanups, err
+			}
+		}
+	}
+
+	return groups, cleanups, nil
+}
+
+func createGroup() (dcgm.GroupHandle, func(), error) {
+	newGroupNumber, err := utils.RandUint64()
+	if err != nil {
+		return dcgm.GroupHandle{}, doNothing, err
+	}
+
+	groupID, err := dcgmprovider.Client().CreateGroup(fmt.Sprintf("gpu-collector-group-%d", newGroupNumber))
+	if err != nil {
+		return dcgm.GroupHandle{}, doNothing, err
+	}
+
+	cleanup := func() {
+		destroyErr := dcgmprovider.Client().DestroyGroup(groupID)
+		if destroyErr != nil && !strings.Contains(destroyErr.Error(), DCGM_ST_NOT_CONFIGURED) {
+			slog.LogAttrs(context.Background(), slog.LevelWarn, "cannot destroy group",
+				slog.Any(GroupIDKey, groupID),
+				slog.String(ErrorKey, destroyErr.Error()),
+			)
+		}
+	}
+	return groupID, cleanup, nil
+}
+
+func newFieldGroup(deviceFields []dcgm.Short) (dcgm.FieldHandle, func(), error) {
+	newFieldGroupNumber, err := utils.RandUint64()
+	if err != nil {
+		return dcgm.FieldHandle{}, doNothing, err
+	}
+
+	name := fmt.Sprintf("gpu-collector-fieldgroup-%d", newFieldGroupNumber)
+	fieldGroup, err := dcgmprovider.Client().FieldGroupCreate(name, deviceFields)
+	if err != nil {
+		return dcgm.FieldHandle{}, doNothing, err
+	}
+
+	cleanup := func() {
+		err := dcgmprovider.Client().FieldGroupDestroy(fieldGroup)
+		if err != nil {
+			slog.Warn("Cannot destroy field group.",
+				slog.String(ErrorKey, err.Error()),
+			)
+		}
+	}
+
+	return fieldGroup, cleanup, nil
+}
+
+func watchFieldGroup(
+	group dcgm.GroupHandle, field dcgm.FieldHandle, updateFreq int64,
+) error {
+	err := dcgmprovider.Client().WatchFieldsWithGroupEx(field, group, updateFreq, maxKeepAge, maxKeepSamples)
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
diff --git a/internal/pkg/devicewatcher/device_watcher_test.go b/internal/pkg/devicewatcher/device_watcher_test.go
new file mode 100644
index 00000000..123d914e
--- /dev/null
+++ b/internal/pkg/devicewatcher/device_watcher_test.go
@@ -0,0 +1,1951 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package devicewatcher
+
+import (
+	"crypto/rand"
+	"fmt"
+	"slices"
+	"testing"
+
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+	"github.com/stretchr/testify/assert"
+	"go.uber.org/mock/gomock"
+
+	mockdcgm "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/dcgmprovider"
+	mockdeviceinfo "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/deviceinfo"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/dcgmprovider"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/deviceinfo"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/testutils"
+)
+
+func TestDeviceWatcher_WatchDeviceFields(t *testing.T) {
+	ctrl := gomock.NewController(t)
+	mockDCGM := mockdcgm.NewMockDCGM(ctrl)
+
+	realDCGM := dcgmprovider.Client()
+	defer func() {
+		dcgmprovider.SetClient(realDCGM)
+	}()
+	dcgmprovider.SetClient(mockDCGM)
+
+	tests := []struct {
+		name               string
+		mockDeviceInfoFunc func() *mockdeviceinfo.MockProvider
+		mockDCGMFunc       func([]dcgm.GroupHandle, dcgm.FieldHandle)
+		expectGroupIDs     func() []dcgm.GroupHandle
+		expectFieldGroupID func() dcgm.FieldHandle
+		wantErr            bool
+	}{
+		{
+			name: "Watch Switch Links",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				mockLink1 := testutils.MockNVLinkVal1
+				mockLink1.State = 3
+
+				switchToNvLinks := make(map[int][]dcgm.NvLinkStatus)
+				switchToNvLinks[0] = []dcgm.NvLinkStatus{mockLink1, testutils.MockNVLinkVal2}
+				switchToNvLinks[1] = []dcgm.NvLinkStatus{mockLink1, testutils.MockNVLinkVal2}
+
+				watchedSwitches := map[uint]bool{0: true, 1: true}
+				watchedLinks := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: true,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: true,
+					{ParentID: 1, ChildID: 1}: true,
+				}
+				return testutils.MockSwitchDeviceInfo(ctrl, 5, switchToNvLinks, watchedSwitches, watchedLinks,
+					dcgm.FE_LINK)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				mockGroupHandle1 := dcgm.GroupHandle{}
+				mockGroupHandle1.SetHandle(uintptr(1))
+
+				mockGroupHandle2 := dcgm.GroupHandle{}
+				mockGroupHandle2.SetHandle(uintptr(2))
+
+				return []dcgm.GroupHandle{mockGroupHandle1, mockGroupHandle2}
+			},
+			expectFieldGroupID: func() dcgm.FieldHandle {
+				mockFieldGroupHandle := dcgm.FieldHandle{}
+				mockFieldGroupHandle.SetHandle(uintptr(1))
+
+				return mockFieldGroupHandle
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle, mockFieldGroupHandle dcgm.FieldHandle) {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[0], nil)
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[1], nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[0], uint(0), uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[0], uint(1), uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[1], uint(0), uint(1)).Return(nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[1], uint(1), uint(1)).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[0]).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[1]).Return(nil)
+
+				mockDCGM.EXPECT().FieldGroupCreate(gomock.Any(), gomock.Any()).Return(mockFieldGroupHandle, nil)
+				mockDCGM.EXPECT().FieldGroupDestroy(mockFieldGroupHandle).Return(nil)
+
+				mockDCGM.EXPECT().WatchFieldsWithGroupEx(mockFieldGroupHandle, mockGroupHandles[0], gomock.Any(),
+					gomock.Any(), gomock.Any()).Return(nil)
+				mockDCGM.EXPECT().WatchFieldsWithGroupEx(mockFieldGroupHandle, mockGroupHandles[1], gomock.Any(),
+					gomock.Any(), gomock.Any()).Return(nil)
+			},
+			wantErr: false,
+		},
+		{
+			name: "Watch Switch Links when No Switches watched",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				watchedSwitches := map[uint]bool{0: false, 1: false}
+				return testutils.MockSwitchDeviceInfo(ctrl, 5, nil, watchedSwitches, nil,
+					dcgm.FE_LINK)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				return nil
+			},
+			expectFieldGroupID: func() dcgm.FieldHandle {
+				return dcgm.FieldHandle{}
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle, mockFieldGroupHandle dcgm.FieldHandle) {},
+			wantErr:      false,
+		},
+		{
+			name: "Watch Switch Links but got AddLinkEntityToGroup Error",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				mockLink1 := testutils.MockNVLinkVal1
+				mockLink1.State = 3
+
+				switchToNvLinks := make(map[int][]dcgm.NvLinkStatus)
+				switchToNvLinks[0] = []dcgm.NvLinkStatus{mockLink1, testutils.MockNVLinkVal2}
+				switchToNvLinks[1] = []dcgm.NvLinkStatus{mockLink1, testutils.MockNVLinkVal2}
+
+				watchedSwitches := map[uint]bool{0: true, 1: true}
+				watchedLinks := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: true,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: true,
+					{ParentID: 1, ChildID: 1}: true,
+				}
+				return testutils.MockSwitchDeviceInfo(ctrl, 5, switchToNvLinks, watchedSwitches, watchedLinks,
+					dcgm.FE_LINK)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				mockGroupHandle1 := dcgm.GroupHandle{}
+				mockGroupHandle1.SetHandle(uintptr(1))
+
+				mockGroupHandle2 := dcgm.GroupHandle{}
+				mockGroupHandle2.SetHandle(uintptr(2))
+
+				return []dcgm.GroupHandle{mockGroupHandle1, mockGroupHandle2}
+			},
+			expectFieldGroupID: func() dcgm.FieldHandle {
+				return dcgm.FieldHandle{}
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle, mockFieldGroupHandle dcgm.FieldHandle) {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[0], nil)
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[1], nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[0], uint(0), uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[0], uint(1), uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[1], uint(0), uint(1)).Return(nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[1], uint(1),
+					uint(1)).Return(fmt.Errorf("some error"))
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[0]).Return(fmt.Errorf("some other error"))
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[1]).Return(nil)
+			},
+			wantErr: true,
+		},
+		{
+			name: "Watch Switch Links but got FieldGroupCreate Error",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				mockLink1 := testutils.MockNVLinkVal1
+				mockLink1.State = 3
+
+				switchToNvLinks := make(map[int][]dcgm.NvLinkStatus)
+				switchToNvLinks[0] = []dcgm.NvLinkStatus{mockLink1, testutils.MockNVLinkVal2}
+				switchToNvLinks[1] = []dcgm.NvLinkStatus{mockLink1, testutils.MockNVLinkVal2}
+
+				watchedSwitches := map[uint]bool{0: true, 1: true}
+				watchedLinks := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: true,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: true,
+					{ParentID: 1, ChildID: 1}: true,
+				}
+				return testutils.MockSwitchDeviceInfo(ctrl, 5, switchToNvLinks, watchedSwitches, watchedLinks,
+					dcgm.FE_LINK)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				mockGroupHandle1 := dcgm.GroupHandle{}
+				mockGroupHandle1.SetHandle(uintptr(1))
+
+				mockGroupHandle2 := dcgm.GroupHandle{}
+				mockGroupHandle2.SetHandle(uintptr(2))
+
+				return []dcgm.GroupHandle{mockGroupHandle1, mockGroupHandle2}
+			},
+			expectFieldGroupID: func() dcgm.FieldHandle {
+				mockFieldGroupHandle := dcgm.FieldHandle{}
+				mockFieldGroupHandle.SetHandle(uintptr(1))
+
+				return mockFieldGroupHandle
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle, mockFieldGroupHandle dcgm.FieldHandle) {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[0], nil)
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[1], nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[0], uint(0), uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[0], uint(1), uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[1], uint(0), uint(1)).Return(nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[1], uint(1), uint(1)).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[0]).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[1]).Return(nil)
+
+				mockDCGM.EXPECT().FieldGroupCreate(gomock.Any(), gomock.Any()).Return(mockFieldGroupHandle,
+					fmt.Errorf("some error"))
+			},
+			wantErr: true,
+		},
+		{
+			name: "Watch Switch Links but got WatchFieldsWithGroupEx Error",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				mockLink1 := testutils.MockNVLinkVal1
+				mockLink1.State = 3
+
+				switchToNvLinks := make(map[int][]dcgm.NvLinkStatus)
+				switchToNvLinks[0] = []dcgm.NvLinkStatus{mockLink1, testutils.MockNVLinkVal2}
+				switchToNvLinks[1] = []dcgm.NvLinkStatus{mockLink1, testutils.MockNVLinkVal2}
+
+				watchedSwitches := map[uint]bool{0: true, 1: true}
+				watchedLinks := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: true,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: true,
+					{ParentID: 1, ChildID: 1}: true,
+				}
+				return testutils.MockSwitchDeviceInfo(ctrl, 5, switchToNvLinks, watchedSwitches, watchedLinks,
+					dcgm.FE_LINK)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				mockGroupHandle1 := dcgm.GroupHandle{}
+				mockGroupHandle1.SetHandle(uintptr(1))
+
+				mockGroupHandle2 := dcgm.GroupHandle{}
+				mockGroupHandle2.SetHandle(uintptr(2))
+
+				return []dcgm.GroupHandle{mockGroupHandle1, mockGroupHandle2}
+			},
+			expectFieldGroupID: func() dcgm.FieldHandle {
+				mockFieldGroupHandle := dcgm.FieldHandle{}
+				mockFieldGroupHandle.SetHandle(uintptr(1))
+
+				return mockFieldGroupHandle
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle, mockFieldGroupHandle dcgm.FieldHandle) {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[0], nil)
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[1], nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[0], uint(0), uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[0], uint(1), uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[1], uint(0), uint(1)).Return(nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[1], uint(1), uint(1)).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[0]).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[1]).Return(nil)
+
+				mockDCGM.EXPECT().FieldGroupCreate(gomock.Any(), gomock.Any()).Return(mockFieldGroupHandle, nil)
+				mockDCGM.EXPECT().FieldGroupDestroy(mockFieldGroupHandle).Return(nil)
+
+				mockDCGM.EXPECT().WatchFieldsWithGroupEx(mockFieldGroupHandle, mockGroupHandles[0], gomock.Any(),
+					gomock.Any(), gomock.Any()).Return(nil)
+				mockDCGM.EXPECT().WatchFieldsWithGroupEx(mockFieldGroupHandle, mockGroupHandles[1], gomock.Any(),
+					gomock.Any(), gomock.Any()).Return(fmt.Errorf("some error"))
+			},
+			wantErr: true,
+		},
+		{
+			name: "Watch GPUs",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				gOpts := appconfig.DeviceOptions{
+					Flex: true,
+				}
+
+				mockGPUDeviceInfo := testutils.MockGPUDeviceInfo(ctrl, 2, nil)
+				mockGPUDeviceInfo.EXPECT().GOpts().Return(gOpts).AnyTimes()
+
+				return mockGPUDeviceInfo
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				mockGroupHandle := dcgm.GroupHandle{}
+				mockGroupHandle.SetHandle(uintptr(1))
+
+				return []dcgm.GroupHandle{mockGroupHandle}
+			},
+			expectFieldGroupID: func() dcgm.FieldHandle {
+				mockFieldGroupHandle := dcgm.FieldHandle{}
+				mockFieldGroupHandle.SetHandle(uintptr(1))
+
+				return mockFieldGroupHandle
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle, mockFieldGroupHandle dcgm.FieldHandle) {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[0], nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[0], dcgm.FE_GPU, uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[0], dcgm.FE_GPU, uint(1)).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[0]).Return(nil)
+
+				mockDCGM.EXPECT().FieldGroupCreate(gomock.Any(), gomock.Any()).Return(mockFieldGroupHandle, nil)
+				mockDCGM.EXPECT().FieldGroupDestroy(mockFieldGroupHandle).Return(nil)
+
+				mockDCGM.EXPECT().WatchFieldsWithGroupEx(mockFieldGroupHandle, mockGroupHandles[0], gomock.Any(),
+					gomock.Any(), gomock.Any()).Return(nil)
+			},
+			wantErr: false,
+		},
+		{
+			name: "Watch GPUs when No GPUs or GPU Instances to monitor",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				gOpts := appconfig.DeviceOptions{
+					Flex: true,
+				}
+
+				mockGPUDeviceInfo := testutils.MockGPUDeviceInfo(ctrl, 0, nil)
+				mockGPUDeviceInfo.EXPECT().GOpts().Return(gOpts).AnyTimes()
+
+				return mockGPUDeviceInfo
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				return []dcgm.GroupHandle{}
+			},
+			expectFieldGroupID: func() dcgm.FieldHandle {
+				return dcgm.FieldHandle{}
+			},
+			mockDCGMFunc: func(_ []dcgm.GroupHandle, _ dcgm.FieldHandle) {},
+			wantErr:      false,
+		},
+		{
+			name: "Watch GPUs but got AddEntityToGroup Error",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				gOpts := appconfig.DeviceOptions{
+					Flex: true,
+				}
+
+				mockGPUDeviceInfo := testutils.MockGPUDeviceInfo(ctrl, 2, nil)
+				mockGPUDeviceInfo.EXPECT().GOpts().Return(gOpts).AnyTimes()
+
+				return mockGPUDeviceInfo
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				mockGroupHandle := dcgm.GroupHandle{}
+				mockGroupHandle.SetHandle(uintptr(1))
+
+				return []dcgm.GroupHandle{mockGroupHandle}
+			},
+			expectFieldGroupID: func() dcgm.FieldHandle {
+				return dcgm.FieldHandle{}
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle, _ dcgm.FieldHandle) {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[0], nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[0], dcgm.FE_GPU, uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[0], dcgm.FE_GPU,
+					uint(1)).Return(fmt.Errorf("some error"))
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[0]).Return(fmt.Errorf("some other error"))
+			},
+			wantErr: true,
+		},
+		{
+			name: "Watch CPU Cores",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				cpuToCores := make(map[int][]uint)
+				cpuToCores[0] = []uint{0, 1}
+				cpuToCores[1] = []uint{0, 1}
+
+				watchedCPUs := map[uint]bool{0: true, 1: true}
+				watchedCores := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: true,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: true,
+					{ParentID: 1, ChildID: 1}: true,
+				}
+
+				return testutils.MockCPUDeviceInfo(ctrl, 2, cpuToCores, watchedCPUs, watchedCores,
+					dcgm.FE_CPU_CORE)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				mockGroupHandle1 := dcgm.GroupHandle{}
+				mockGroupHandle1.SetHandle(uintptr(1))
+
+				mockGroupHandle2 := dcgm.GroupHandle{}
+				mockGroupHandle2.SetHandle(uintptr(2))
+
+				return []dcgm.GroupHandle{mockGroupHandle1, mockGroupHandle2}
+			},
+			expectFieldGroupID: func() dcgm.FieldHandle {
+				mockFieldGroupHandle := dcgm.FieldHandle{}
+				mockFieldGroupHandle.SetHandle(uintptr(1))
+
+				return mockFieldGroupHandle
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle, mockFieldGroupHandle dcgm.FieldHandle) {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[0], nil)
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[1], nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[0], dcgm.FE_CPU_CORE, uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[0], dcgm.FE_CPU_CORE, uint(1)).Return(nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[1], dcgm.FE_CPU_CORE, uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[1], dcgm.FE_CPU_CORE, uint(1)).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[0]).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[1]).Return(nil)
+
+				mockDCGM.EXPECT().FieldGroupCreate(gomock.Any(), gomock.Any()).Return(mockFieldGroupHandle, nil)
+				mockDCGM.EXPECT().FieldGroupDestroy(mockFieldGroupHandle).Return(nil)
+
+				mockDCGM.EXPECT().WatchFieldsWithGroupEx(mockFieldGroupHandle, mockGroupHandles[0], gomock.Any(),
+					gomock.Any(), gomock.Any()).Return(nil)
+				mockDCGM.EXPECT().WatchFieldsWithGroupEx(mockFieldGroupHandle, mockGroupHandles[1], gomock.Any(),
+					gomock.Any(), gomock.Any()).Return(nil)
+			},
+			wantErr: false,
+		},
+		{
+			name: "No CPU cores to watch",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				watchedCPUs := map[uint]bool{0: false, 1: false}
+				mockGPUDeviceInfo := testutils.MockCPUDeviceInfo(ctrl, 2, nil, watchedCPUs, nil,
+					dcgm.FE_CPU_CORE)
+
+				return mockGPUDeviceInfo
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				return nil
+			},
+			expectFieldGroupID: func() dcgm.FieldHandle {
+				return dcgm.FieldHandle{}
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle, mockFieldGroupHandle dcgm.FieldHandle) {},
+			wantErr:      false,
+		},
+		{
+			name: "Watch CPU cores when Create Group Error",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				cpuToCores := make(map[int][]uint)
+				cpuToCores[0] = []uint{0, 1}
+				cpuToCores[1] = []uint{0, 1}
+
+				watchedCPUs := map[uint]bool{0: true, 1: true}
+				watchedCores := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: true,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: true,
+					{ParentID: 1, ChildID: 1}: true,
+				}
+
+				return testutils.MockCPUDeviceInfo(ctrl, 2, cpuToCores, watchedCPUs, watchedCores,
+					dcgm.FE_CPU_CORE)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				mockGroupHandle1 := dcgm.GroupHandle{}
+				mockGroupHandle1.SetHandle(uintptr(1))
+
+				mockGroupHandle2 := dcgm.GroupHandle{}
+				mockGroupHandle2.SetHandle(uintptr(2))
+
+				return []dcgm.GroupHandle{mockGroupHandle1, mockGroupHandle2}
+			},
+			expectFieldGroupID: func() dcgm.FieldHandle {
+				return dcgm.FieldHandle{}
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle, mockFieldGroupHandle dcgm.FieldHandle) {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[0], nil)
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[1], fmt.Errorf("random error"))
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[0], dcgm.FE_CPU_CORE, uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[0], dcgm.FE_CPU_CORE, uint(1)).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[0]).Return(nil)
+			},
+			wantErr: true,
+		},
+		{
+			name: "Watch CPUs",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				cpuToCores := make(map[int][]uint)
+
+				watchedCPUs := map[uint]bool{0: true, 1: true}
+				watchedCores := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: true,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: true,
+					{ParentID: 1, ChildID: 1}: true,
+				}
+
+				return testutils.MockCPUDeviceInfo(ctrl, 2, cpuToCores, watchedCPUs, watchedCores,
+					dcgm.FE_CPU)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				mockGroupHandle1 := dcgm.GroupHandle{}
+				mockGroupHandle1.SetHandle(uintptr(1))
+
+				return []dcgm.GroupHandle{mockGroupHandle1}
+			},
+			expectFieldGroupID: func() dcgm.FieldHandle {
+				mockFieldGroupHandle := dcgm.FieldHandle{}
+				mockFieldGroupHandle.SetHandle(uintptr(1))
+
+				return mockFieldGroupHandle
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle, mockFieldGroupHandle dcgm.FieldHandle) {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[0], nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[0], dcgm.FE_CPU, uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[0], dcgm.FE_CPU, uint(1)).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[0]).Return(nil)
+
+				mockDCGM.EXPECT().FieldGroupCreate(gomock.Any(), gomock.Any()).Return(mockFieldGroupHandle, nil)
+				mockDCGM.EXPECT().FieldGroupDestroy(mockFieldGroupHandle).Return(nil)
+
+				mockDCGM.EXPECT().WatchFieldsWithGroupEx(mockFieldGroupHandle, mockGroupHandles[0], gomock.Any(),
+					gomock.Any(), gomock.Any()).Return(nil)
+			},
+			wantErr: false,
+		},
+		{
+			name: "Watch CPUs when CPUs to monitor",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				cpuToCores := make(map[int][]uint)
+
+				watchedCPUs := map[uint]bool{0: false, 1: false}
+				watchedCores := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: true,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: true,
+					{ParentID: 1, ChildID: 1}: true,
+				}
+
+				return testutils.MockCPUDeviceInfo(ctrl, 2, cpuToCores, watchedCPUs, watchedCores,
+					dcgm.FE_CPU)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				return []dcgm.GroupHandle{}
+			},
+			expectFieldGroupID: func() dcgm.FieldHandle {
+				return dcgm.FieldHandle{}
+			},
+			mockDCGMFunc: func(_ []dcgm.GroupHandle, _ dcgm.FieldHandle) {},
+			wantErr:      false,
+		},
+		{
+			name: "Watch CPUs but got AddEntityToGroup Error",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				cpuToCores := make(map[int][]uint)
+
+				watchedCPUs := map[uint]bool{0: false, 1: true}
+				watchedCores := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: true,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: true,
+					{ParentID: 1, ChildID: 1}: true,
+				}
+
+				return testutils.MockCPUDeviceInfo(ctrl, 2, cpuToCores, watchedCPUs, watchedCores,
+					dcgm.FE_CPU)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				mockGroupHandle := dcgm.GroupHandle{}
+				mockGroupHandle.SetHandle(uintptr(1))
+
+				return []dcgm.GroupHandle{mockGroupHandle}
+			},
+			expectFieldGroupID: func() dcgm.FieldHandle {
+				return dcgm.FieldHandle{}
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle, _ dcgm.FieldHandle) {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[0], nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[0], dcgm.FE_CPU,
+					uint(1)).Return(fmt.Errorf("some error"))
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[0]).Return(fmt.Errorf("some other error"))
+			},
+			wantErr: true,
+		},
+		{
+			name: "Watch Switches",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				switchToNvLinks := make(map[int][]dcgm.NvLinkStatus)
+				switchToNvLinks[0] = []dcgm.NvLinkStatus{testutils.MockNVLinkVal1, testutils.MockNVLinkVal2}
+				switchToNvLinks[1] = []dcgm.NvLinkStatus{testutils.MockNVLinkVal1, testutils.MockNVLinkVal2}
+
+				watchedSwitches := map[uint]bool{0: true, 1: true}
+				watchedLinks := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: true,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: true,
+					{ParentID: 1, ChildID: 1}: true,
+				}
+				return testutils.MockSwitchDeviceInfo(ctrl, 5, switchToNvLinks, watchedSwitches, watchedLinks,
+					dcgm.FE_SWITCH)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				mockGroupHandle1 := dcgm.GroupHandle{}
+				mockGroupHandle1.SetHandle(uintptr(1))
+
+				return []dcgm.GroupHandle{mockGroupHandle1}
+			},
+			expectFieldGroupID: func() dcgm.FieldHandle {
+				mockFieldGroupHandle := dcgm.FieldHandle{}
+				mockFieldGroupHandle.SetHandle(uintptr(1))
+
+				return mockFieldGroupHandle
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle, mockFieldGroupHandle dcgm.FieldHandle) {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[0], nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[0], dcgm.FE_SWITCH, uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[0], dcgm.FE_SWITCH, uint(1)).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[0]).Return(nil)
+
+				mockDCGM.EXPECT().FieldGroupCreate(gomock.Any(), gomock.Any()).Return(mockFieldGroupHandle, nil)
+				mockDCGM.EXPECT().FieldGroupDestroy(mockFieldGroupHandle).Return(nil)
+
+				mockDCGM.EXPECT().WatchFieldsWithGroupEx(mockFieldGroupHandle, mockGroupHandles[0], gomock.Any(),
+					gomock.Any(), gomock.Any()).Return(nil)
+			},
+			wantErr: false,
+		},
+		{
+			name: "Watch CPUs when no switches available",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				return testutils.MockSwitchDeviceInfo(ctrl, 0, nil, nil, nil,
+					dcgm.FE_SWITCH)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				return []dcgm.GroupHandle{}
+			},
+			expectFieldGroupID: func() dcgm.FieldHandle {
+				return dcgm.FieldHandle{}
+			},
+			mockDCGMFunc: func(_ []dcgm.GroupHandle, _ dcgm.FieldHandle) {},
+			wantErr:      false,
+		},
+		{
+			name: "Watch CPUs when Create Group error",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				switchToNvLinks := make(map[int][]dcgm.NvLinkStatus)
+				switchToNvLinks[0] = []dcgm.NvLinkStatus{testutils.MockNVLinkVal1, testutils.MockNVLinkVal2}
+				switchToNvLinks[1] = []dcgm.NvLinkStatus{testutils.MockNVLinkVal1, testutils.MockNVLinkVal2}
+
+				watchedSwitches := map[uint]bool{0: true, 1: true}
+				watchedLinks := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: true,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: true,
+					{ParentID: 1, ChildID: 1}: true,
+				}
+				return testutils.MockSwitchDeviceInfo(ctrl, 5, switchToNvLinks, watchedSwitches, watchedLinks,
+					dcgm.FE_SWITCH)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				mockGroupHandle1 := dcgm.GroupHandle{}
+				mockGroupHandle1.SetHandle(uintptr(1))
+
+				return []dcgm.GroupHandle{mockGroupHandle1}
+			},
+			expectFieldGroupID: func() dcgm.FieldHandle {
+				mockFieldGroupHandle := dcgm.FieldHandle{}
+				mockFieldGroupHandle.SetHandle(uintptr(1))
+
+				return mockFieldGroupHandle
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle, mockFieldGroupHandle dcgm.FieldHandle) {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[0], fmt.Errorf("random error"))
+			},
+			wantErr: true,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mockDeviceInfo := tt.mockDeviceInfoFunc()
+			mockGroupIDs := tt.expectGroupIDs()
+			mockFieldGroupIDs := tt.expectFieldGroupID()
+			tt.mockDCGMFunc(mockGroupIDs, mockFieldGroupIDs)
+
+			d := NewDeviceWatcher()
+			inputFields := []dcgm.Short{1, 2, 3, 4}
+			_, _, gotFuncs, err := d.WatchDeviceFields(inputFields, mockDeviceInfo, 1000000)
+			// Ensure DestroyGroup functions gets called
+			for _, gotFunc := range gotFuncs {
+				gotFunc()
+			}
+
+			if !tt.wantErr {
+				assert.Nil(t, err, "expected no error")
+			} else {
+				assert.NotNil(t, err, "expected no error.")
+				assert.Nil(t, gotFuncs, "expected cleanup functions to be nil")
+			}
+		})
+	}
+}
+
+func TestDeviceWatcher_createGenericGroup(t *testing.T) {
+	ctrl := gomock.NewController(t)
+	mockDCGM := mockdcgm.NewMockDCGM(ctrl)
+
+	realDCGM := dcgmprovider.Client()
+	defer func() {
+		dcgmprovider.SetClient(realDCGM)
+	}()
+	dcgmprovider.SetClient(mockDCGM)
+
+	tests := []struct {
+		name               string
+		mockDeviceInfoFunc func() *mockdeviceinfo.MockProvider
+		mockDCGMFunc       func(dcgm.GroupHandle) func()
+		expectGroupID      func() *dcgm.GroupHandle
+		wantErr            bool
+	}{
+		{
+			name: "Create Group for GPUs",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				gOpts := appconfig.DeviceOptions{
+					Flex: true,
+				}
+
+				mockGPUDeviceInfo := testutils.MockGPUDeviceInfo(ctrl, 2, nil)
+				mockGPUDeviceInfo.EXPECT().GOpts().Return(gOpts).AnyTimes()
+
+				return mockGPUDeviceInfo
+			},
+			expectGroupID: func() *dcgm.GroupHandle {
+				mockGroupHandle := dcgm.GroupHandle{}
+				mockGroupHandle.SetHandle(uintptr(1))
+
+				return &mockGroupHandle
+			},
+			mockDCGMFunc: func(mockGroupHandle dcgm.GroupHandle) func() {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandle, nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandle, dcgm.FE_GPU, uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandle, dcgm.FE_GPU, uint(1)).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandle).Return(nil)
+
+				return doNothing
+			},
+			wantErr: false,
+		},
+		{
+			name: "Create Group for GPU Instances",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				gpuInstanceInfos := make(map[int][]deviceinfo.GPUInstanceInfo)
+				gpuInstanceInfos[0] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo1}
+				gpuInstanceInfos[1] = []deviceinfo.GPUInstanceInfo{testutils.MockGPUInstanceInfo2}
+
+				gOpts := appconfig.DeviceOptions{
+					Flex: true,
+				}
+
+				mockGPUDeviceInfo := testutils.MockGPUDeviceInfo(ctrl, 2, gpuInstanceInfos)
+				mockGPUDeviceInfo.EXPECT().GOpts().Return(gOpts).AnyTimes()
+
+				return mockGPUDeviceInfo
+			},
+			expectGroupID: func() *dcgm.GroupHandle {
+				mockGroupHandle := dcgm.GroupHandle{}
+				mockGroupHandle.SetHandle(uintptr(1))
+
+				return &mockGroupHandle
+			},
+			mockDCGMFunc: func(mockGroupHandle dcgm.GroupHandle) func() {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandle, nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandle, dcgm.FE_GPU_I,
+					testutils.MockGPUInstanceInfo1.EntityId).Return(nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandle, dcgm.FE_GPU_I,
+					testutils.MockGPUInstanceInfo2.EntityId).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandle).Return(nil)
+
+				return doNothing
+			},
+			wantErr: false,
+		},
+		{
+			name: "Create Group for CPUs",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				cpuToCores := make(map[int][]uint)
+
+				watchedCPUs := map[uint]bool{0: true, 1: true}
+				watchedCores := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: true,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: true,
+					{ParentID: 1, ChildID: 1}: true,
+				}
+
+				return testutils.MockCPUDeviceInfo(ctrl, 2, cpuToCores, watchedCPUs, watchedCores,
+					dcgm.FE_CPU)
+			},
+			expectGroupID: func() *dcgm.GroupHandle {
+				mockGroupHandle := dcgm.GroupHandle{}
+				mockGroupHandle.SetHandle(uintptr(1))
+
+				return &mockGroupHandle
+			},
+			mockDCGMFunc: func(mockGroupHandle dcgm.GroupHandle) func() {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandle, nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandle, dcgm.FE_CPU, uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandle, dcgm.FE_CPU, uint(1)).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandle).Return(nil)
+
+				return doNothing
+			},
+			wantErr: false,
+		},
+		{
+			name: "Create Group for Switches",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				switchToNvLinks := make(map[int][]dcgm.NvLinkStatus)
+				switchToNvLinks[0] = []dcgm.NvLinkStatus{testutils.MockNVLinkVal1, testutils.MockNVLinkVal2}
+				switchToNvLinks[1] = []dcgm.NvLinkStatus{testutils.MockNVLinkVal1, testutils.MockNVLinkVal2}
+
+				watchedSwitches := map[uint]bool{0: true, 1: true}
+				watchedLinks := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: true,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: true,
+					{ParentID: 1, ChildID: 1}: true,
+				}
+				return testutils.MockSwitchDeviceInfo(ctrl, 5, switchToNvLinks, watchedSwitches, watchedLinks,
+					dcgm.FE_SWITCH)
+			},
+			expectGroupID: func() *dcgm.GroupHandle {
+				mockGroupHandle := dcgm.GroupHandle{}
+				mockGroupHandle.SetHandle(uintptr(1))
+
+				return &mockGroupHandle
+			},
+			mockDCGMFunc: func(mockGroupHandle dcgm.GroupHandle) func() {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandle, nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandle, dcgm.FE_SWITCH, uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandle, dcgm.FE_SWITCH, uint(1)).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandle).Return(nil)
+
+				return doNothing
+			},
+			wantErr: false,
+		},
+		{
+			name: "No GPUs or GPU Instances to monitor",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				gOpts := appconfig.DeviceOptions{
+					Flex: true,
+				}
+
+				mockGPUDeviceInfo := testutils.MockGPUDeviceInfo(ctrl, 0, nil)
+				mockGPUDeviceInfo.EXPECT().GOpts().Return(gOpts).AnyTimes()
+
+				return mockGPUDeviceInfo
+			},
+			expectGroupID: func() *dcgm.GroupHandle {
+				return nil
+			},
+			mockDCGMFunc: func(_ dcgm.GroupHandle) func() {
+				return doNothing
+			},
+			wantErr: false,
+		},
+		{
+			name: "Random Unit Error",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				gOpts := appconfig.DeviceOptions{
+					Flex: true,
+				}
+
+				mockGPUDeviceInfo := testutils.MockGPUDeviceInfo(ctrl, 2, nil)
+				mockGPUDeviceInfo.EXPECT().GOpts().Return(gOpts).AnyTimes()
+
+				return mockGPUDeviceInfo
+			},
+			expectGroupID: func() *dcgm.GroupHandle {
+				return nil
+			},
+			mockDCGMFunc: func(_ dcgm.GroupHandle) func() {
+				// Simulate a failure in rand.Reader using mock rand.Reader
+				mockReader := &testutils.MockReader{Err: fmt.Errorf("mock error")}
+
+				originalReader := rand.Reader
+				rand.Reader = mockReader
+				return func() {
+					rand.Reader = originalReader
+				}
+			},
+			wantErr: true,
+		},
+		{
+			name: "Create Group Error",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				gOpts := appconfig.DeviceOptions{
+					Flex: true,
+				}
+
+				mockGPUDeviceInfo := testutils.MockGPUDeviceInfo(ctrl, 2, nil)
+				mockGPUDeviceInfo.EXPECT().GOpts().Return(gOpts).AnyTimes()
+
+				return mockGPUDeviceInfo
+			},
+			expectGroupID: func() *dcgm.GroupHandle {
+				return nil
+			},
+			mockDCGMFunc: func(_ dcgm.GroupHandle) func() {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(dcgm.GroupHandle{}, fmt.Errorf("random error"))
+
+				return doNothing
+			},
+			wantErr: true,
+		},
+		{
+			name: "AddEntityToGroup Error",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				gOpts := appconfig.DeviceOptions{
+					Flex: true,
+				}
+
+				mockGPUDeviceInfo := testutils.MockGPUDeviceInfo(ctrl, 2, nil)
+				mockGPUDeviceInfo.EXPECT().GOpts().Return(gOpts).AnyTimes()
+
+				return mockGPUDeviceInfo
+			},
+			expectGroupID: func() *dcgm.GroupHandle {
+				mockGroupHandle := dcgm.GroupHandle{}
+				mockGroupHandle.SetHandle(uintptr(1))
+
+				return &mockGroupHandle
+			},
+			mockDCGMFunc: func(mockGroupHandle dcgm.GroupHandle) func() {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandle, nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandle, dcgm.FE_GPU, uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandle, dcgm.FE_GPU,
+					uint(1)).Return(fmt.Errorf("some error"))
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandle).Return(fmt.Errorf("some other error"))
+
+				return doNothing
+			},
+			wantErr: true,
+		},
+		{
+			name: "DestroyGroup Error",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				gOpts := appconfig.DeviceOptions{
+					Flex: true,
+				}
+
+				mockGPUDeviceInfo := testutils.MockGPUDeviceInfo(ctrl, 2, nil)
+				mockGPUDeviceInfo.EXPECT().GOpts().Return(gOpts).AnyTimes()
+
+				return mockGPUDeviceInfo
+			},
+			expectGroupID: func() *dcgm.GroupHandle {
+				mockGroupHandle := dcgm.GroupHandle{}
+				mockGroupHandle.SetHandle(uintptr(1))
+
+				return &mockGroupHandle
+			},
+			mockDCGMFunc: func(mockGroupHandle dcgm.GroupHandle) func() {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandle, nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandle, dcgm.FE_GPU, uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandle, dcgm.FE_GPU, uint(1)).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandle).Return(fmt.Errorf("some error"))
+
+				return doNothing
+			},
+			wantErr: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mockDeviceInfo := tt.mockDeviceInfoFunc()
+			mockGroupID := tt.expectGroupID()
+			inputGroupID := dcgm.GroupHandle{}
+			if mockGroupID != nil {
+				inputGroupID = *mockGroupID
+			}
+
+			f := tt.mockDCGMFunc(inputGroupID)
+			defer f()
+
+			d := &DeviceWatcher{}
+			gotGroupID, gotFunc, err := d.createGenericGroup(mockDeviceInfo)
+			gotFunc() // Ensure DestroyGroup function gets called
+
+			if !tt.wantErr {
+				assert.Nil(t, err, "expected no error")
+				assert.Equal(t, mockGroupID, gotGroupID, "expected group IDs to be the same.")
+			} else {
+				assert.NotNil(t, err, "expected no error.")
+			}
+		})
+	}
+}
+
+func TestDeviceWatcher_createCPUCoreGroups(t *testing.T) {
+	ctrl := gomock.NewController(t)
+	mockDCGM := mockdcgm.NewMockDCGM(ctrl)
+
+	realDCGM := dcgmprovider.Client()
+	defer func() {
+		dcgmprovider.SetClient(realDCGM)
+	}()
+	dcgmprovider.SetClient(mockDCGM)
+
+	tests := []struct {
+		name               string
+		mockDeviceInfoFunc func() *mockdeviceinfo.MockProvider
+		mockDCGMFunc       func(mockGroupHandles []dcgm.GroupHandle) func()
+		expectGroupIDs     func() []dcgm.GroupHandle
+		wantErr            bool
+	}{
+		{
+			name: "Create Group for CPU Cores",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				cpuToCores := make(map[int][]uint)
+				cpuToCores[0] = []uint{0, 1}
+				cpuToCores[1] = []uint{0, 1}
+
+				watchedCPUs := map[uint]bool{0: true, 1: true}
+				watchedCores := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: true,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: true,
+					{ParentID: 1, ChildID: 1}: true,
+				}
+
+				return testutils.MockCPUDeviceInfo(ctrl, 2, cpuToCores, watchedCPUs, watchedCores,
+					dcgm.FE_CPU_CORE)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				mockGroupHandle1 := dcgm.GroupHandle{}
+				mockGroupHandle1.SetHandle(uintptr(1))
+
+				mockGroupHandle2 := dcgm.GroupHandle{}
+				mockGroupHandle2.SetHandle(uintptr(2))
+
+				return []dcgm.GroupHandle{mockGroupHandle1, mockGroupHandle2}
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle) func() {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[0], nil)
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[1], nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[0], dcgm.FE_CPU_CORE, uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[0], dcgm.FE_CPU_CORE, uint(1)).Return(nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[1], dcgm.FE_CPU_CORE, uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[1], dcgm.FE_CPU_CORE, uint(1)).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[0]).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[1]).Return(nil)
+
+				return doNothing
+			},
+			wantErr: false,
+		},
+		{
+			name: "No CPU watched",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				watchedCPUs := map[uint]bool{0: false, 1: false}
+				mockGPUDeviceInfo := testutils.MockCPUDeviceInfo(ctrl, 2, nil, watchedCPUs, nil,
+					dcgm.FE_CPU_CORE)
+
+				return mockGPUDeviceInfo
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				return nil
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle) func() {
+				return doNothing
+			},
+			wantErr: false,
+		},
+		{
+			name: "Only CPUs watched",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				watchedCPUs := map[uint]bool{0: true, 1: true}
+				return testutils.MockCPUDeviceInfo(ctrl, 2, nil, watchedCPUs, nil,
+					dcgm.FE_CPU_CORE)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				return nil
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle) func() {
+				return doNothing
+			},
+			wantErr: false,
+		},
+		{
+			name: "Only 1 Core watched",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				cpuToCores := make(map[int][]uint)
+				cpuToCores[0] = []uint{0, 1}
+				cpuToCores[1] = []uint{0, 1}
+
+				watchedCPUs := map[uint]bool{0: true, 1: true}
+				watchedCores := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: false,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: false,
+					{ParentID: 1, ChildID: 1}: false,
+				}
+
+				return testutils.MockCPUDeviceInfo(ctrl, 2, cpuToCores, watchedCPUs, watchedCores,
+					dcgm.FE_CPU_CORE)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				mockGroupHandle1 := dcgm.GroupHandle{}
+				mockGroupHandle1.SetHandle(uintptr(1))
+
+				return []dcgm.GroupHandle{mockGroupHandle1}
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle) func() {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[0], nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[0], dcgm.FE_CPU_CORE, uint(1)).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[0]).Return(nil)
+
+				return doNothing
+			},
+			wantErr: false,
+		},
+		{
+			name: "One Core Each watched",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				cpuToCores := make(map[int][]uint)
+				cpuToCores[0] = []uint{0, 1}
+				cpuToCores[1] = []uint{0, 1}
+
+				watchedCPUs := map[uint]bool{0: true, 1: true}
+				watchedCores := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: false,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: true,
+					{ParentID: 1, ChildID: 1}: false,
+				}
+
+				return testutils.MockCPUDeviceInfo(ctrl, 2, cpuToCores, watchedCPUs, watchedCores,
+					dcgm.FE_CPU_CORE)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				mockGroupHandle1 := dcgm.GroupHandle{}
+				mockGroupHandle1.SetHandle(uintptr(1))
+
+				mockGroupHandle2 := dcgm.GroupHandle{}
+				mockGroupHandle2.SetHandle(uintptr(2))
+
+				return []dcgm.GroupHandle{mockGroupHandle1, mockGroupHandle2}
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle) func() {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[0], nil)
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[1], nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[0], dcgm.FE_CPU_CORE, uint(1)).Return(nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[1], dcgm.FE_CPU_CORE, uint(0)).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[0]).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[1]).Return(nil)
+
+				return doNothing
+			},
+			wantErr: false,
+		},
+		{
+			name: "Random Unit Error",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				cpuToCores := make(map[int][]uint)
+				cpuToCores[0] = []uint{0, 1}
+				cpuToCores[1] = []uint{0, 1}
+
+				watchedCPUs := map[uint]bool{0: true, 1: true}
+				watchedCores := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: true,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: true,
+					{ParentID: 1, ChildID: 1}: true,
+				}
+
+				return testutils.MockCPUDeviceInfo(ctrl, 2, cpuToCores, watchedCPUs, watchedCores,
+					dcgm.FE_CPU_CORE)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				mockGroupHandle1 := dcgm.GroupHandle{}
+				mockGroupHandle1.SetHandle(uintptr(1))
+
+				mockGroupHandle2 := dcgm.GroupHandle{}
+				mockGroupHandle2.SetHandle(uintptr(2))
+
+				return []dcgm.GroupHandle{mockGroupHandle1, mockGroupHandle2}
+			},
+			mockDCGMFunc: func(_ []dcgm.GroupHandle) func() {
+				// Simulate a failure in rand.Reader using mock rand.Reader
+				mockReader := &testutils.MockReader{Err: fmt.Errorf("mock error")}
+
+				originalReader := rand.Reader
+				rand.Reader = mockReader
+				return func() {
+					rand.Reader = originalReader
+				}
+			},
+			wantErr: true,
+		},
+		{
+			name: "Create Group Error",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				cpuToCores := make(map[int][]uint)
+				cpuToCores[0] = []uint{0, 1}
+				cpuToCores[1] = []uint{0, 1}
+
+				watchedCPUs := map[uint]bool{0: true, 1: true}
+				watchedCores := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: true,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: true,
+					{ParentID: 1, ChildID: 1}: true,
+				}
+
+				return testutils.MockCPUDeviceInfo(ctrl, 2, cpuToCores, watchedCPUs, watchedCores,
+					dcgm.FE_CPU_CORE)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				mockGroupHandle1 := dcgm.GroupHandle{}
+				mockGroupHandle1.SetHandle(uintptr(1))
+
+				mockGroupHandle2 := dcgm.GroupHandle{}
+				mockGroupHandle2.SetHandle(uintptr(2))
+
+				return []dcgm.GroupHandle{mockGroupHandle1, mockGroupHandle2}
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle) func() {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[0], nil)
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[1], fmt.Errorf("random error"))
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[0], dcgm.FE_CPU_CORE, uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[0], dcgm.FE_CPU_CORE, uint(1)).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[0]).Return(nil)
+				return doNothing
+			},
+			wantErr: true,
+		},
+		{
+			name: "AddEntityToGroup Error",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				cpuToCores := make(map[int][]uint)
+				cpuToCores[0] = []uint{0, 1}
+				cpuToCores[1] = []uint{0, 1}
+
+				watchedCPUs := map[uint]bool{0: true, 1: true}
+				watchedCores := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: true,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: true,
+					{ParentID: 1, ChildID: 1}: true,
+				}
+
+				return testutils.MockCPUDeviceInfo(ctrl, 2, cpuToCores, watchedCPUs, watchedCores,
+					dcgm.FE_CPU_CORE)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				mockGroupHandle1 := dcgm.GroupHandle{}
+				mockGroupHandle1.SetHandle(uintptr(1))
+
+				mockGroupHandle2 := dcgm.GroupHandle{}
+				mockGroupHandle2.SetHandle(uintptr(2))
+
+				return []dcgm.GroupHandle{mockGroupHandle1, mockGroupHandle2}
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle) func() {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[0], nil)
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[1], nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[0], dcgm.FE_CPU_CORE, uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[0], dcgm.FE_CPU_CORE, uint(1)).Return(nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[1], dcgm.FE_CPU_CORE, uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddEntityToGroup(mockGroupHandles[1], dcgm.FE_CPU_CORE,
+					uint(1)).Return(fmt.Errorf("some error"))
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[0]).Return(fmt.Errorf("some other error"))
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[1]).Return(nil)
+				return doNothing
+			},
+			wantErr: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mockDeviceInfo := tt.mockDeviceInfoFunc()
+			mockGroupIDs := tt.expectGroupIDs()
+			f := tt.mockDCGMFunc(mockGroupIDs)
+			defer f()
+
+			d := &DeviceWatcher{}
+			gotGroupIDs, gotFuncs, err := d.createCPUCoreGroups(mockDeviceInfo)
+			// Ensure DestroyGroup functions gets called
+			for _, gotFunc := range gotFuncs {
+				gotFunc()
+			}
+
+			if !tt.wantErr {
+				assert.Nil(t, err, "expected no error")
+				assert.Equal(t, mockGroupIDs, gotGroupIDs, "expected group IDs to be the same.")
+			} else {
+				assert.NotNil(t, err, "expected no error.")
+			}
+		})
+	}
+}
+
+func TestDeviceWatcher_createNVLinkGroups(t *testing.T) {
+	ctrl := gomock.NewController(t)
+	mockDCGM := mockdcgm.NewMockDCGM(ctrl)
+
+	realDCGM := dcgmprovider.Client()
+	defer func() {
+		dcgmprovider.SetClient(realDCGM)
+	}()
+	dcgmprovider.SetClient(mockDCGM)
+
+	tests := []struct {
+		name               string
+		mockDeviceInfoFunc func() *mockdeviceinfo.MockProvider
+		mockDCGMFunc       func(mockGroupHandles []dcgm.GroupHandle) func()
+		expectGroupIDs     func() []dcgm.GroupHandle
+		wantErr            bool
+	}{
+		{
+			name: "Create Group for Switch Links",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				mockLink1 := testutils.MockNVLinkVal1
+				mockLink1.State = 3
+
+				switchToNvLinks := make(map[int][]dcgm.NvLinkStatus)
+				switchToNvLinks[0] = []dcgm.NvLinkStatus{mockLink1, testutils.MockNVLinkVal2}
+				switchToNvLinks[1] = []dcgm.NvLinkStatus{mockLink1, testutils.MockNVLinkVal2}
+
+				watchedSwitches := map[uint]bool{0: true, 1: true}
+				watchedLinks := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: true,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: true,
+					{ParentID: 1, ChildID: 1}: true,
+				}
+				return testutils.MockSwitchDeviceInfo(ctrl, 5, switchToNvLinks, watchedSwitches, watchedLinks,
+					dcgm.FE_LINK)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				mockGroupHandle1 := dcgm.GroupHandle{}
+				mockGroupHandle1.SetHandle(uintptr(1))
+
+				mockGroupHandle2 := dcgm.GroupHandle{}
+				mockGroupHandle2.SetHandle(uintptr(2))
+
+				return []dcgm.GroupHandle{mockGroupHandle1, mockGroupHandle2}
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle) func() {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[0], nil)
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[1], nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[0], uint(0), uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[0], uint(1), uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[1], uint(0), uint(1)).Return(nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[1], uint(1), uint(1)).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[0]).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[1]).Return(nil)
+
+				return doNothing
+			},
+			wantErr: false,
+		},
+		{
+			name: "No Switches watched",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				watchedSwitches := map[uint]bool{0: false, 1: false}
+				return testutils.MockSwitchDeviceInfo(ctrl, 5, nil, watchedSwitches, nil,
+					dcgm.FE_LINK)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				return nil
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle) func() {
+				return doNothing
+			},
+			wantErr: false,
+		},
+		{
+			name: "Only Switches watched",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				watchedSwitches := map[uint]bool{0: true, 1: true}
+				return testutils.MockSwitchDeviceInfo(ctrl, 5, nil, watchedSwitches, nil,
+					dcgm.FE_LINK)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				return nil
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle) func() {
+				return doNothing
+			},
+			wantErr: false,
+		},
+		{
+			name: "Only 1 NV Link watched",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				mockLink1 := testutils.MockNVLinkVal1
+				mockLink1.State = 3
+
+				switchToNvLinks := make(map[int][]dcgm.NvLinkStatus)
+				switchToNvLinks[0] = []dcgm.NvLinkStatus{mockLink1, testutils.MockNVLinkVal2}
+				switchToNvLinks[1] = []dcgm.NvLinkStatus{mockLink1, testutils.MockNVLinkVal2}
+
+				watchedSwitches := map[uint]bool{0: true, 1: true}
+				watchedLinks := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: false,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: false,
+					{ParentID: 1, ChildID: 1}: false,
+				}
+
+				return testutils.MockSwitchDeviceInfo(ctrl, 5, switchToNvLinks, watchedSwitches, watchedLinks,
+					dcgm.FE_LINK)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				mockGroupHandle1 := dcgm.GroupHandle{}
+				mockGroupHandle1.SetHandle(uintptr(1))
+
+				return []dcgm.GroupHandle{mockGroupHandle1}
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle) func() {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[0], nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[0], uint(1), uint(0)).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[0]).Return(nil)
+
+				return doNothing
+			},
+			wantErr: false,
+		},
+		{
+			name: "One NV Link Each watched",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				mockLink1 := testutils.MockNVLinkVal1
+				mockLink1.State = 3
+
+				switchToNvLinks := make(map[int][]dcgm.NvLinkStatus)
+				switchToNvLinks[0] = []dcgm.NvLinkStatus{mockLink1, testutils.MockNVLinkVal2}
+				switchToNvLinks[1] = []dcgm.NvLinkStatus{mockLink1, testutils.MockNVLinkVal2}
+
+				watchedSwitches := map[uint]bool{0: true, 1: true}
+				watchedLinks := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: false,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: true,
+					{ParentID: 1, ChildID: 1}: false,
+				}
+
+				return testutils.MockSwitchDeviceInfo(ctrl, 5, switchToNvLinks, watchedSwitches, watchedLinks,
+					dcgm.FE_LINK)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				mockGroupHandle1 := dcgm.GroupHandle{}
+				mockGroupHandle1.SetHandle(uintptr(1))
+
+				mockGroupHandle2 := dcgm.GroupHandle{}
+				mockGroupHandle2.SetHandle(uintptr(2))
+
+				return []dcgm.GroupHandle{mockGroupHandle1, mockGroupHandle2}
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle) func() {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[0], nil)
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[1], nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[0], uint(1), uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[1], uint(0), uint(1)).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[0]).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[1]).Return(nil)
+
+				return doNothing
+			},
+			wantErr: false,
+		},
+		{
+			name: "One NV Link Each watched but one link down",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				mockLink1 := testutils.MockNVLinkVal1
+				mockLink1.State = 3
+
+				switchToNvLinks := make(map[int][]dcgm.NvLinkStatus)
+				switchToNvLinks[0] = []dcgm.NvLinkStatus{mockLink1, testutils.MockNVLinkVal2}
+				switchToNvLinks[1] = []dcgm.NvLinkStatus{testutils.MockNVLinkVal1, testutils.MockNVLinkVal2}
+
+				watchedSwitches := map[uint]bool{0: true, 1: true}
+				watchedLinks := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: false,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: true,
+					{ParentID: 1, ChildID: 1}: false,
+				}
+
+				return testutils.MockSwitchDeviceInfo(ctrl, 5, switchToNvLinks, watchedSwitches, watchedLinks,
+					dcgm.FE_LINK)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				mockGroupHandle1 := dcgm.GroupHandle{}
+				mockGroupHandle1.SetHandle(uintptr(1))
+
+				return []dcgm.GroupHandle{mockGroupHandle1}
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle) func() {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[0], nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[0], uint(1), uint(0)).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[0]).Return(nil)
+
+				return doNothing
+			},
+			wantErr: false,
+		},
+		{
+			name: "One NV Link Each watched but all watched NV links down",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				mockLink1 := testutils.MockNVLinkVal1
+				mockLink1.State = 3
+
+				mockLink2 := testutils.MockNVLinkVal2
+				mockLink2.State = 2
+
+				switchToNvLinks := make(map[int][]dcgm.NvLinkStatus)
+				switchToNvLinks[0] = []dcgm.NvLinkStatus{mockLink1, mockLink2}
+				switchToNvLinks[1] = []dcgm.NvLinkStatus{testutils.MockNVLinkVal1, testutils.MockNVLinkVal2}
+
+				watchedSwitches := map[uint]bool{0: true, 1: true}
+				watchedLinks := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: false,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: true,
+					{ParentID: 1, ChildID: 1}: false,
+				}
+
+				return testutils.MockSwitchDeviceInfo(ctrl, 5, switchToNvLinks, watchedSwitches, watchedLinks,
+					dcgm.FE_LINK)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				return nil
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle) func() {
+				return doNothing
+			},
+			wantErr: false,
+		},
+		{
+			name: "Random Unit Error",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				mockLink1 := testutils.MockNVLinkVal1
+				mockLink1.State = 3
+
+				switchToNvLinks := make(map[int][]dcgm.NvLinkStatus)
+				switchToNvLinks[0] = []dcgm.NvLinkStatus{mockLink1, testutils.MockNVLinkVal2}
+				switchToNvLinks[1] = []dcgm.NvLinkStatus{mockLink1, testutils.MockNVLinkVal2}
+
+				watchedSwitches := map[uint]bool{0: true, 1: true}
+				watchedLinks := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: true,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: true,
+					{ParentID: 1, ChildID: 1}: true,
+				}
+				return testutils.MockSwitchDeviceInfo(ctrl, 5, switchToNvLinks, watchedSwitches, watchedLinks,
+					dcgm.FE_LINK)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				mockGroupHandle1 := dcgm.GroupHandle{}
+				mockGroupHandle1.SetHandle(uintptr(1))
+
+				mockGroupHandle2 := dcgm.GroupHandle{}
+				mockGroupHandle2.SetHandle(uintptr(2))
+
+				return []dcgm.GroupHandle{mockGroupHandle1, mockGroupHandle2}
+			},
+			mockDCGMFunc: func(_ []dcgm.GroupHandle) func() {
+				// Simulate a failure in rand.Reader using mock rand.Reader
+				mockReader := &testutils.MockReader{Err: fmt.Errorf("mock error")}
+
+				originalReader := rand.Reader
+				rand.Reader = mockReader
+				return func() {
+					rand.Reader = originalReader
+				}
+			},
+			wantErr: true,
+		},
+		{
+			name: "Create Group Error",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				mockLink1 := testutils.MockNVLinkVal1
+				mockLink1.State = 3
+
+				switchToNvLinks := make(map[int][]dcgm.NvLinkStatus)
+				switchToNvLinks[0] = []dcgm.NvLinkStatus{mockLink1, testutils.MockNVLinkVal2}
+				switchToNvLinks[1] = []dcgm.NvLinkStatus{mockLink1, testutils.MockNVLinkVal2}
+
+				watchedSwitches := map[uint]bool{0: true, 1: true}
+				watchedLinks := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: true,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: true,
+					{ParentID: 1, ChildID: 1}: true,
+				}
+				return testutils.MockSwitchDeviceInfo(ctrl, 5, switchToNvLinks, watchedSwitches, watchedLinks,
+					dcgm.FE_LINK)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				mockGroupHandle1 := dcgm.GroupHandle{}
+				mockGroupHandle1.SetHandle(uintptr(1))
+
+				mockGroupHandle2 := dcgm.GroupHandle{}
+				mockGroupHandle2.SetHandle(uintptr(2))
+
+				return []dcgm.GroupHandle{mockGroupHandle1, mockGroupHandle2}
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle) func() {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[0], nil)
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[1], fmt.Errorf("random error"))
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[0], uint(0), uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[0], uint(1), uint(0)).Return(nil)
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[0]).Return(nil)
+				return doNothing
+			},
+			wantErr: true,
+		},
+		{
+			name: "AddLinkEntityToGroup Error",
+			mockDeviceInfoFunc: func() *mockdeviceinfo.MockProvider {
+				mockLink1 := testutils.MockNVLinkVal1
+				mockLink1.State = 3
+
+				switchToNvLinks := make(map[int][]dcgm.NvLinkStatus)
+				switchToNvLinks[0] = []dcgm.NvLinkStatus{mockLink1, testutils.MockNVLinkVal2}
+				switchToNvLinks[1] = []dcgm.NvLinkStatus{mockLink1, testutils.MockNVLinkVal2}
+
+				watchedSwitches := map[uint]bool{0: true, 1: true}
+				watchedLinks := map[testutils.WatchedEntityKey]bool{
+					{ParentID: 0, ChildID: 0}: true,
+					{ParentID: 0, ChildID: 1}: true,
+					{ParentID: 1, ChildID: 0}: true,
+					{ParentID: 1, ChildID: 1}: true,
+				}
+				return testutils.MockSwitchDeviceInfo(ctrl, 5, switchToNvLinks, watchedSwitches, watchedLinks,
+					dcgm.FE_LINK)
+			},
+			expectGroupIDs: func() []dcgm.GroupHandle {
+				mockGroupHandle1 := dcgm.GroupHandle{}
+				mockGroupHandle1.SetHandle(uintptr(1))
+
+				mockGroupHandle2 := dcgm.GroupHandle{}
+				mockGroupHandle2.SetHandle(uintptr(2))
+
+				return []dcgm.GroupHandle{mockGroupHandle1, mockGroupHandle2}
+			},
+			mockDCGMFunc: func(mockGroupHandles []dcgm.GroupHandle) func() {
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[0], nil)
+				mockDCGM.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandles[1], nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[0], uint(0), uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[0], uint(1), uint(0)).Return(nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[1], uint(0), uint(1)).Return(nil)
+				mockDCGM.EXPECT().AddLinkEntityToGroup(mockGroupHandles[1], uint(1),
+					uint(1)).Return(fmt.Errorf("some error"))
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[0]).Return(fmt.Errorf("some other error"))
+				mockDCGM.EXPECT().DestroyGroup(mockGroupHandles[1]).Return(nil)
+				return doNothing
+			},
+			wantErr: true,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mockDeviceInfo := tt.mockDeviceInfoFunc()
+			mockGroupIDs := tt.expectGroupIDs()
+			f := tt.mockDCGMFunc(mockGroupIDs)
+			defer f()
+
+			d := &DeviceWatcher{}
+			gotGroupIDs, gotFuncs, err := d.createNVLinkGroups(mockDeviceInfo)
+			// Ensure DestroyGroup functions gets called
+			for _, gotFunc := range gotFuncs {
+				gotFunc()
+			}
+
+			if !tt.wantErr {
+				assert.Nil(t, err, "expected no error")
+				assert.Equal(t, mockGroupIDs, gotGroupIDs, "expected group IDs to be the same.")
+			} else {
+				assert.NotNil(t, err, "expected no error.")
+			}
+		})
+	}
+}
+
+func Test_newFieldGroup(t *testing.T) {
+	ctrl := gomock.NewController(t)
+	mockDCGM := mockdcgm.NewMockDCGM(ctrl)
+
+	realDCGM := dcgmprovider.Client()
+	defer func() {
+		dcgmprovider.SetClient(realDCGM)
+	}()
+	dcgmprovider.SetClient(mockDCGM)
+
+	tests := []struct {
+		name               string
+		mockDCGMFunc       func(dcgm.FieldHandle) func()
+		expectFieldGroupID func() dcgm.FieldHandle
+		wantErr            bool
+	}{
+		{
+			name: "Create Group for Switch Links",
+			expectFieldGroupID: func() dcgm.FieldHandle {
+				mockFieldGroupHandle := dcgm.FieldHandle{}
+				mockFieldGroupHandle.SetHandle(uintptr(1))
+
+				return mockFieldGroupHandle
+			},
+			mockDCGMFunc: func(mockFieldGroupHandle dcgm.FieldHandle) func() {
+				mockDCGM.EXPECT().FieldGroupCreate(gomock.Any(), gomock.Any()).Return(mockFieldGroupHandle, nil)
+				mockDCGM.EXPECT().FieldGroupDestroy(mockFieldGroupHandle).Return(nil)
+
+				return doNothing
+			},
+			wantErr: false,
+		},
+		{
+			name: "Random Unit Error",
+			expectFieldGroupID: func() dcgm.FieldHandle {
+				return dcgm.FieldHandle{}
+			},
+			mockDCGMFunc: func(mockFieldGroupHandle dcgm.FieldHandle) func() {
+				// Simulate a failure in rand.Reader using mock rand.Reader
+				mockReader := &testutils.MockReader{Err: fmt.Errorf("mock error")}
+
+				originalReader := rand.Reader
+				rand.Reader = mockReader
+				return func() {
+					rand.Reader = originalReader
+				}
+			},
+			wantErr: true,
+		},
+		{
+			name: "Field Group Create Error",
+			expectFieldGroupID: func() dcgm.FieldHandle {
+				mockFieldGroupHandle := dcgm.FieldHandle{}
+				mockFieldGroupHandle.SetHandle(uintptr(1))
+
+				return mockFieldGroupHandle
+			},
+			mockDCGMFunc: func(mockFieldGroupHandle dcgm.FieldHandle) func() {
+				mockDCGM.EXPECT().FieldGroupCreate(gomock.Any(), gomock.Any()).Return(mockFieldGroupHandle,
+					fmt.Errorf("random error"))
+
+				return doNothing
+			},
+			wantErr: true,
+		},
+		{
+			name: "Field Group Destroy Error",
+			expectFieldGroupID: func() dcgm.FieldHandle {
+				mockFieldGroupHandle := dcgm.FieldHandle{}
+				mockFieldGroupHandle.SetHandle(uintptr(1))
+
+				return mockFieldGroupHandle
+			},
+			mockDCGMFunc: func(mockFieldGroupHandle dcgm.FieldHandle) func() {
+				mockDCGM.EXPECT().FieldGroupCreate(gomock.Any(), gomock.Any()).Return(mockFieldGroupHandle, nil)
+				mockDCGM.EXPECT().FieldGroupDestroy(mockFieldGroupHandle).Return(fmt.Errorf("some other error"))
+
+				return doNothing
+			},
+			wantErr: false,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mockFieldGroupIDs := tt.expectFieldGroupID()
+			f := tt.mockDCGMFunc(mockFieldGroupIDs)
+			defer f()
+
+			input := []dcgm.Short{1, 2, 3, 4}
+			gotFieldGroupIDs, gotFunc, err := newFieldGroup(input)
+			gotFunc() // Ensure DestroyGroup functions gets called
+
+			if !tt.wantErr {
+				assert.Nil(t, err, "expected no error")
+				assert.Equal(t, mockFieldGroupIDs, gotFieldGroupIDs, "expected field group IDs to be the same.")
+			} else {
+				assert.NotNil(t, err, "expected no error.")
+			}
+		})
+	}
+}
+
+func TestDeviceWatcher_GetDeviceFields(t *testing.T) {
+	ctrl := gomock.NewController(t)
+	mockDCGM := mockdcgm.NewMockDCGM(ctrl)
+
+	realDCGM := dcgmprovider.Client()
+	defer func() {
+		dcgmprovider.SetClient(realDCGM)
+	}()
+	dcgmprovider.SetClient(mockDCGM)
+
+	type args struct {
+		counterList []counters.Counter
+		entityType  dcgm.Field_Entity_Group
+	}
+	tests := []struct {
+		name         string
+		args         args
+		mockDCGMFunc func([]dcgm.Short)
+		want         func() []dcgm.Short
+	}{
+		{
+			name: "GPU, GPU Instance and VGPU Counters",
+			args: args{
+				counterList: testutils.SampleCounters,
+				entityType:  dcgm.FE_GPU,
+			},
+			mockDCGMFunc: func(fieldIDs []dcgm.Short) {
+				for _, fieldID := range fieldIDs {
+					mockDCGM.EXPECT().FieldGetById(fieldID).Return(testutils.SampleFieldIDToFieldMeta[fieldID])
+				}
+			},
+			want: func() []dcgm.Short {
+				return append(testutils.SampleGPUFieldIDs, testutils.SampleDriverVersionCounter.FieldID)
+			},
+		},
+		{
+			name: "GPU Instance Counters",
+			args: args{
+				counterList: testutils.SampleCounters,
+				entityType:  dcgm.FE_GPU_I,
+			},
+			mockDCGMFunc: func(fieldIDs []dcgm.Short) {
+				for _, fieldID := range fieldIDs {
+					mockDCGM.EXPECT().FieldGetById(fieldID).Return(testutils.SampleFieldIDToFieldMeta[fieldID])
+				}
+			},
+			want: func() []dcgm.Short {
+				return []dcgm.Short{
+					testutils.SampleGPUPowerUsageCounter.FieldID,
+					testutils.SampleDriverVersionCounter.FieldID,
+				}
+			},
+		},
+		{
+			name: "VGPU Counters",
+			args: args{
+				counterList: testutils.SampleCounters,
+				entityType:  dcgm.FE_VGPU,
+			},
+			mockDCGMFunc: func(fieldIDs []dcgm.Short) {
+				for _, fieldID := range fieldIDs {
+					mockDCGM.EXPECT().FieldGetById(fieldID).Return(testutils.SampleFieldIDToFieldMeta[fieldID])
+				}
+			},
+			want: func() []dcgm.Short {
+				return []dcgm.Short{
+					testutils.SampleVGPULicenseStatusCounter.FieldID,
+					testutils.SampleDriverVersionCounter.FieldID,
+				}
+			},
+		},
+		{
+			name: "CPU and CPU Core Counters",
+			args: args{
+				counterList: testutils.SampleCounters,
+				entityType:  dcgm.FE_CPU,
+			},
+			mockDCGMFunc: func(fieldIDs []dcgm.Short) {
+				for _, fieldID := range fieldIDs {
+					mockDCGM.EXPECT().FieldGetById(fieldID).Return(testutils.SampleFieldIDToFieldMeta[fieldID])
+				}
+			},
+			want: func() []dcgm.Short {
+				return []dcgm.Short{
+					testutils.SampleCPUUtilTotalCounter.FieldID,
+					testutils.SampleDriverVersionCounter.FieldID,
+				}
+			},
+		},
+		{
+			name: "Switch and NV Link Counters",
+			args: args{
+				counterList: testutils.SampleCounters,
+				entityType:  dcgm.FE_SWITCH,
+			},
+			mockDCGMFunc: func(fieldIDs []dcgm.Short) {
+				for _, fieldID := range fieldIDs {
+					mockDCGM.EXPECT().FieldGetById(fieldID).Return(testutils.SampleFieldIDToFieldMeta[fieldID])
+				}
+			},
+			want: func() []dcgm.Short {
+				return []dcgm.Short{
+					testutils.SampleSwitchCurrentTempCounter.FieldID,
+					testutils.SampleSwitchLinkFlitErrorsCounter.FieldID,
+					testutils.SampleDriverVersionCounter.FieldID,
+				}
+			},
+		},
+		{
+			name: "NV Link Counters",
+			args: args{
+				counterList: testutils.SampleCounters,
+				entityType:  dcgm.FE_LINK,
+			},
+			mockDCGMFunc: func(fieldIDs []dcgm.Short) {
+				for _, fieldID := range fieldIDs {
+					mockDCGM.EXPECT().FieldGetById(fieldID).Return(testutils.SampleFieldIDToFieldMeta[fieldID])
+				}
+			},
+			want: func() []dcgm.Short {
+				return []dcgm.Short{
+					testutils.SampleSwitchLinkFlitErrorsCounter.FieldID,
+					testutils.SampleDriverVersionCounter.FieldID,
+				}
+			},
+		},
+		{
+			name: "Invalid Entity Type",
+			args: args{
+				counterList: testutils.SampleCounters,
+				entityType:  dcgm.FE_COUNT,
+			},
+			mockDCGMFunc: func(fieldIDs []dcgm.Short) {
+				for _, fieldID := range fieldIDs {
+					mockDCGM.EXPECT().FieldGetById(fieldID).Return(testutils.SampleFieldIDToFieldMeta[fieldID])
+				}
+			},
+			want: func() []dcgm.Short {
+				return []dcgm.Short{
+					testutils.SampleDriverVersionCounter.FieldID,
+				}
+			},
+		},
+		{
+			name: "No Counters",
+			args: args{
+				counterList: []counters.Counter{},
+				entityType:  dcgm.FE_GPU,
+			},
+			mockDCGMFunc: func(_ []dcgm.Short) {},
+			want: func() []dcgm.Short {
+				return nil
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			tt.mockDCGMFunc(testutils.SampleAllFieldIDs)
+
+			d := &DeviceWatcher{}
+			want := tt.want()
+			got := d.GetDeviceFields(tt.args.counterList, tt.args.entityType)
+
+			slices.Sort(want)
+			slices.Sort(got)
+			assert.Equal(t, want, got, "Device fields mismatch")
+		})
+	}
+}
diff --git a/internal/pkg/devicewatcher/types.go b/internal/pkg/devicewatcher/types.go
new file mode 100644
index 00000000..53e0a205
--- /dev/null
+++ b/internal/pkg/devicewatcher/types.go
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//go:generate go run -v go.uber.org/mock/mockgen  -destination=../../mocks/pkg/devicewatcher/mock_device_watcher.go -package=devicewatcher -copyright_file=../../../hack/header.txt . Watcher
+
+package devicewatcher
+
+import (
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/deviceinfo"
+)
+
+type Watcher interface {
+	GetDeviceFields([]counters.Counter, dcgm.Field_Entity_Group) []dcgm.Short
+	WatchDeviceFields([]dcgm.Short, deviceinfo.Provider, int64) ([]dcgm.GroupHandle, dcgm.FieldHandle, []func(), error)
+}
diff --git a/internal/pkg/devicewatcher/variables.go b/internal/pkg/devicewatcher/variables.go
new file mode 100644
index 00000000..d209d996
--- /dev/null
+++ b/internal/pkg/devicewatcher/variables.go
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package devicewatcher
+
+var doNothing = func() {
+	// This function is intentionally left blank
+}
diff --git a/internal/pkg/devicewatchlistmanager/device_watchlist_manager.go b/internal/pkg/devicewatchlistmanager/device_watchlist_manager.go
new file mode 100644
index 00000000..00a9f6c9
--- /dev/null
+++ b/internal/pkg/devicewatchlistmanager/device_watchlist_manager.go
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package devicewatchlistmanager
+
+import (
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/deviceinfo"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatcher"
+)
+
+// DeviceTypesToWatch supported entity group types
+var DeviceTypesToWatch = []dcgm.Field_Entity_Group{
+	dcgm.FE_GPU,
+	dcgm.FE_SWITCH,
+	dcgm.FE_LINK,
+	dcgm.FE_CPU,
+	dcgm.FE_CPU_CORE,
+}
+
+type WatchList struct {
+	deviceInfo        deviceinfo.Provider
+	deviceFields      []dcgm.Short
+	deviceGroups      []dcgm.GroupHandle
+	deviceFieldGroup  dcgm.FieldHandle
+	labelDeviceFields []dcgm.Short
+	watcher           devicewatcher.Watcher
+	collectInterval   int64
+}
+
+func NewWatchList(
+	deviceInfo deviceinfo.Provider, deviceFields, labelDeviceFields []dcgm.Short,
+	watcher devicewatcher.Watcher, collectInterval int64,
+) *WatchList {
+	return &WatchList{
+		deviceInfo:        deviceInfo,
+		deviceFields:      deviceFields,
+		labelDeviceFields: labelDeviceFields,
+		watcher:           watcher,
+		collectInterval:   collectInterval,
+	}
+}
+
+func (d *WatchList) DeviceInfo() deviceinfo.Provider {
+	return d.deviceInfo
+}
+
+func (d *WatchList) DeviceFields() []dcgm.Short {
+	return d.deviceFields
+}
+
+func (d *WatchList) SetDeviceFields(deviceFields []dcgm.Short) {
+	d.deviceFields = deviceFields
+}
+
+func (d *WatchList) LabelDeviceFields() []dcgm.Short {
+	return d.labelDeviceFields
+}
+
+func (d *WatchList) IsEmpty() bool {
+	return len(d.deviceFields) == 0
+}
+
+func (d *WatchList) Watch() ([]func(), error) {
+	var cleanups []func()
+	var err error
+
+	d.deviceGroups, d.deviceFieldGroup, cleanups, err = d.watcher.WatchDeviceFields(d.deviceFields, d.deviceInfo,
+		d.collectInterval*1000)
+	return cleanups, err
+}
+
+func (d *WatchList) DeviceGroups() []dcgm.GroupHandle {
+	return d.deviceGroups
+}
+
+func (d *WatchList) DeviceFieldGroup() dcgm.FieldHandle {
+	return d.deviceFieldGroup
+}
+
+// WatchListManager manages multiple entities and their corresponding WatchLists, counters to watch
+// and device options.
+type WatchListManager struct {
+	entityWatchLists map[dcgm.Field_Entity_Group]WatchList
+	counters         counters.CounterList
+	gOpts            appconfig.DeviceOptions
+	sOpts            appconfig.DeviceOptions
+	cOpts            appconfig.DeviceOptions
+	useFakeGPUs      bool
+}
+
+// NewWatchListManager creates a new instance of the WatchListManager
+func NewWatchListManager(
+	counters counters.CounterList, config *appconfig.Config,
+) *WatchListManager {
+	return &WatchListManager{
+		entityWatchLists: make(map[dcgm.Field_Entity_Group]WatchList),
+		counters:         counters,
+		gOpts:            config.GPUDeviceOptions,
+		sOpts:            config.SwitchDeviceOptions,
+		cOpts:            config.CPUDeviceOptions,
+		useFakeGPUs:      config.UseFakeGPUs,
+	}
+}
+
+// CreateEntityWatchList identifies an entity's device fields, label field to monitor
+// and loads its device information.
+func (e *WatchListManager) CreateEntityWatchList(
+	entityType dcgm.Field_Entity_Group, watcher devicewatcher.Watcher, collectInterval int64,
+) error {
+	deviceFields := watcher.GetDeviceFields(e.counters, entityType)
+
+	labelDeviceFields := watcher.GetDeviceFields(e.counters.LabelCounters(), entityType)
+
+	deviceInfo, err := deviceinfo.Initialize(e.gOpts, e.sOpts, e.cOpts, e.useFakeGPUs, entityType)
+	if err != nil {
+		return err
+	}
+
+	e.entityWatchLists[entityType] = *NewWatchList(
+		deviceInfo,
+		deviceFields,
+		labelDeviceFields,
+		watcher,
+		collectInterval)
+
+	return err
+}
+
+// EntityWatchList returns a given entity's WatchList and true if such WatchList exists otherwise
+// an empty WatchList and false.
+func (e *WatchListManager) EntityWatchList(deviceType dcgm.Field_Entity_Group) (WatchList, bool) {
+	entityWatchList, exists := e.entityWatchLists[deviceType]
+	return entityWatchList, exists
+}
diff --git a/internal/pkg/devicewatchlistmanager/device_watchlist_manager_test.go b/internal/pkg/devicewatchlistmanager/device_watchlist_manager_test.go
new file mode 100644
index 00000000..88ee975b
--- /dev/null
+++ b/internal/pkg/devicewatchlistmanager/device_watchlist_manager_test.go
@@ -0,0 +1,780 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package devicewatchlistmanager
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+	"github.com/stretchr/testify/assert"
+	"go.uber.org/mock/gomock"
+
+	mockdcgm "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/dcgmprovider"
+	mockdeviceinfo "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/deviceinfo"
+	mockdevicewatcher "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/devicewatcher"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/dcgmprovider"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/deviceinfo"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/testutils"
+)
+
+var (
+	deviceOptionFalse = appconfig.DeviceOptions{
+		Flex:       false,
+		MajorRange: nil,
+		MinorRange: nil,
+	}
+
+	deviceOptionTrue = appconfig.DeviceOptions{
+		Flex:       true,
+		MajorRange: nil,
+		MinorRange: nil,
+	}
+
+	deviceOptionOther = appconfig.DeviceOptions{
+		Flex:       false,
+		MajorRange: []int{1},
+		MinorRange: []int{-1},
+	}
+
+	mockDeviceInfoFunc = func(ctrl *gomock.Controller) *mockdeviceinfo.MockProvider {
+		gOpts := appconfig.DeviceOptions{
+			Flex: true,
+		}
+
+		mockGPUDeviceInfo := testutils.MockGPUDeviceInfo(ctrl, 2, nil)
+		mockGPUDeviceInfo.EXPECT().GOpts().Return(gOpts).AnyTimes()
+
+		return mockGPUDeviceInfo
+	}
+)
+
+func TestNewWatchList(t *testing.T) {
+	ctrl := gomock.NewController(t)
+
+	type args struct {
+		deviceInfo        deviceinfo.Provider
+		deviceFields      []dcgm.Short
+		labelDeviceFields []dcgm.Short
+		newDeviceFields   []dcgm.Short
+		collectInterval   int64
+	}
+	tests := []struct {
+		name         string
+		args         args
+		wantEmpty    bool
+		wantWatchErr bool
+	}{
+		{
+			name: "New Watch List",
+			args: args{
+				deviceInfo:        mockDeviceInfoFunc(ctrl),
+				deviceFields:      []dcgm.Short{1, 2, 3, 4},
+				labelDeviceFields: []dcgm.Short{100, 101},
+				collectInterval:   int64(1),
+			},
+			wantEmpty:    false,
+			wantWatchErr: false,
+		},
+		{
+			name: "Empty Device Fields",
+			args: args{
+				deviceInfo:        mockDeviceInfoFunc(ctrl),
+				deviceFields:      nil,
+				labelDeviceFields: []dcgm.Short{100, 101},
+				collectInterval:   int64(1),
+			},
+			wantEmpty:    true,
+			wantWatchErr: false,
+		},
+		{
+			name: "SetDevice Fields",
+			args: args{
+				deviceInfo:        mockDeviceInfoFunc(ctrl),
+				deviceFields:      []dcgm.Short{1, 2, 3, 4},
+				labelDeviceFields: []dcgm.Short{100, 101},
+				newDeviceFields:   []dcgm.Short{1000},
+				collectInterval:   int64(1),
+			},
+			wantEmpty:    false,
+			wantWatchErr: false,
+		},
+		{
+			name: "Watch Error",
+			args: args{
+				deviceInfo:        mockDeviceInfoFunc(ctrl),
+				deviceFields:      nil,
+				labelDeviceFields: []dcgm.Short{100, 101},
+				collectInterval:   int64(1),
+			},
+			wantEmpty:    true,
+			wantWatchErr: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mockDeviceWatcher := mockdevicewatcher.NewMockWatcher(ctrl)
+
+			var err error
+			if tt.wantWatchErr {
+				err = fmt.Errorf("some error")
+			}
+
+			mockDeviceWatcher.EXPECT().WatchDeviceFields(tt.args.deviceFields, tt.args.deviceInfo,
+				tt.args.collectInterval*1000).Return([]dcgm.GroupHandle{}, dcgm.FieldHandle{}, []func(){}, err)
+
+			got := NewWatchList(tt.args.deviceInfo, tt.args.deviceFields, tt.args.labelDeviceFields, mockDeviceWatcher,
+				tt.args.collectInterval)
+
+			assert.Equal(t, tt.args.deviceInfo, got.DeviceInfo(), "Unexpected DeviceInfo() output.")
+			assert.Equal(t, tt.args.deviceFields, got.DeviceFields(), "Unexpected DeviceFields() output.")
+			assert.Equal(t, tt.args.labelDeviceFields, got.LabelDeviceFields(),
+				"Unexpected LabelDeviceFields() output.")
+			assert.Equal(t, tt.wantEmpty, got.IsEmpty(), "Unexpected IsEmpty() output.")
+
+			_, err = got.Watch()
+			if !tt.wantWatchErr {
+				assert.Nil(t, err, "expected no error")
+			} else {
+				assert.NotNil(t, err, "expected error")
+			}
+
+			if tt.args.newDeviceFields != nil {
+				got.SetDeviceFields(tt.args.newDeviceFields)
+				assert.Equal(t, tt.args.newDeviceFields, got.DeviceFields(),
+					"Unexpected DeviceFields() output after SetDeviceFields().")
+				assert.NotEqual(t, tt.args.deviceFields, got.DeviceFields(),
+					"Unexpected DeviceFields() output after SetDeviceFields().")
+			}
+		})
+	}
+}
+
+func TestNewWatchListManager(t *testing.T) {
+	type args struct {
+		counters counters.CounterList
+		config   *appconfig.Config
+	}
+	tests := []struct {
+		name string
+		args args
+		want *WatchListManager
+	}{
+		{
+			name: "New Watch List Manager",
+			args: args{
+				counters: testutils.SampleCounters,
+				config: &appconfig.Config{
+					GPUDeviceOptions:    deviceOptionFalse,
+					SwitchDeviceOptions: deviceOptionTrue,
+					CPUDeviceOptions:    deviceOptionOther,
+					UseFakeGPUs:         false,
+				},
+			},
+			want: &WatchListManager{
+				entityWatchLists: make(map[dcgm.Field_Entity_Group]WatchList),
+				counters:         testutils.SampleCounters,
+				gOpts:            deviceOptionFalse,
+				sOpts:            deviceOptionTrue,
+				cOpts:            deviceOptionOther,
+				useFakeGPUs:      false,
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.Equalf(t, tt.want, NewWatchListManager(tt.args.counters, tt.args.config),
+				"Unexpected NewWatchListManager output")
+		})
+	}
+}
+
+func TestWatchListManager_CreateEntityWatchList(t *testing.T) {
+	ctrl := gomock.NewController(t)
+	mockDCGMProvider := mockdcgm.NewMockDCGM(ctrl)
+
+	realDCGM := dcgmprovider.Client()
+	defer func() {
+		dcgmprovider.SetClient(realDCGM)
+	}()
+	dcgmprovider.SetClient(mockDCGMProvider)
+
+	type fields struct {
+		entityWatchLists      map[dcgm.Field_Entity_Group]WatchList
+		entityWatchListsCount int
+		counters              counters.CounterList
+		gOpts                 appconfig.DeviceOptions
+		sOpts                 appconfig.DeviceOptions
+		cOpts                 appconfig.DeviceOptions
+		useFakeGPUs           bool
+	}
+	type args struct {
+		entityType      dcgm.Field_Entity_Group
+		watcher         *mockdevicewatcher.MockWatcher
+		collectInterval int64
+	}
+	tests := []struct {
+		name         string
+		fields       fields
+		args         args
+		deviceFields []dcgm.Short
+		mockFunc     func(
+			*mockdevicewatcher.MockWatcher, counters.CounterList, counters.CounterList,
+			dcgm.Field_Entity_Group, []dcgm.Short, []dcgm.Short,
+		)
+		wantFunc func(
+			*WatchListManager, dcgm.Field_Entity_Group, []dcgm.Short, []dcgm.Short,
+			*mockdevicewatcher.MockWatcher, int64,
+		) map[dcgm.Field_Entity_Group]WatchList
+		wantErr bool
+	}{
+		{
+			name: "Create GPU WatchList",
+			fields: fields{
+				entityWatchLists:      make(map[dcgm.Field_Entity_Group]WatchList),
+				entityWatchListsCount: 1,
+				counters:              testutils.SampleCounters,
+				gOpts:                 deviceOptionFalse,
+				sOpts:                 deviceOptionTrue,
+				cOpts:                 deviceOptionOther,
+				useFakeGPUs:           false,
+			},
+			args: args{
+				entityType:      dcgm.FE_GPU,
+				watcher:         mockdevicewatcher.NewMockWatcher(ctrl),
+				collectInterval: 1,
+			},
+			deviceFields: testutils.SampleGPUFieldIDs,
+			mockFunc: func(
+				watcher *mockdevicewatcher.MockWatcher, counters, labelCounters counters.CounterList,
+				entityType dcgm.Field_Entity_Group, deviceFields, labelDeviceFields []dcgm.Short,
+			) {
+				watcher.EXPECT().GetDeviceFields(counters, entityType).Return(deviceFields)
+				watcher.EXPECT().GetDeviceFields(labelCounters, entityType).Return(labelDeviceFields)
+
+				fakeDevices := deviceinfo.SpoofGPUDevices()
+				_, fakeGPUs, _, _ := deviceinfo.SpoofMigHierarchy()
+
+				mockHierarchy := dcgm.MigHierarchy_v2{
+					Count: 1,
+				}
+				mockHierarchy.EntityList[0] = fakeGPUs[0]
+
+				// Times 2 because the wantFunc is also calling the same method
+				mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(1), nil).Times(2)
+				mockDCGMProvider.EXPECT().GetDeviceInfo(gomock.Any()).Return(fakeDevices[0], nil).Times(2)
+				mockDCGMProvider.EXPECT().GetGpuInstanceHierarchy().Return(mockHierarchy, nil).Times(2)
+			},
+			wantFunc: func(
+				e *WatchListManager, entityType dcgm.Field_Entity_Group, deviceFields,
+				labelDeviceFields []dcgm.Short, watcher *mockdevicewatcher.MockWatcher, collectInterval int64,
+			) map[dcgm.Field_Entity_Group]WatchList {
+				watchList := make(map[dcgm.Field_Entity_Group]WatchList)
+
+				mockDeviceInfo, _ := deviceinfo.Initialize(e.gOpts, e.sOpts, e.cOpts, e.useFakeGPUs, entityType)
+				watchList[entityType] = *NewWatchList(mockDeviceInfo, deviceFields, labelDeviceFields, watcher,
+					collectInterval)
+
+				return watchList
+			},
+		},
+		{
+			name: "Override existing GPU WatchList",
+			fields: fields{
+				entityWatchLists: map[dcgm.Field_Entity_Group]WatchList{
+					dcgm.FE_GPU: {
+						deviceInfo:        &deviceinfo.Info{},
+						deviceFields:      []dcgm.Short{10, 20, 30},
+						labelDeviceFields: []dcgm.Short{100, 200, 300},
+						watcher:           nil,
+						collectInterval:   10000,
+					},
+				},
+				entityWatchListsCount: 1,
+				counters:              testutils.SampleCounters,
+				gOpts:                 deviceOptionFalse,
+				sOpts:                 deviceOptionTrue,
+				cOpts:                 deviceOptionOther,
+				useFakeGPUs:           false,
+			},
+			args: args{
+				entityType:      dcgm.FE_GPU,
+				watcher:         mockdevicewatcher.NewMockWatcher(ctrl),
+				collectInterval: 1,
+			},
+			deviceFields: testutils.SampleGPUFieldIDs,
+			mockFunc: func(
+				watcher *mockdevicewatcher.MockWatcher, counters, labelCounters counters.CounterList,
+				entityType dcgm.Field_Entity_Group, deviceFields, labelDeviceFields []dcgm.Short,
+			) {
+				watcher.EXPECT().GetDeviceFields(counters, entityType).Return(deviceFields)
+				watcher.EXPECT().GetDeviceFields(labelCounters, entityType).Return(labelDeviceFields)
+
+				fakeDevices := deviceinfo.SpoofGPUDevices()
+				_, fakeGPUs, _, _ := deviceinfo.SpoofMigHierarchy()
+
+				mockHierarchy := dcgm.MigHierarchy_v2{
+					Count: 1,
+				}
+				mockHierarchy.EntityList[0] = fakeGPUs[0]
+
+				// Times 2 because the wantFunc is also calling the same method
+				mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(1), nil).Times(2)
+				mockDCGMProvider.EXPECT().GetDeviceInfo(gomock.Any()).Return(fakeDevices[0], nil).Times(2)
+				mockDCGMProvider.EXPECT().GetGpuInstanceHierarchy().Return(mockHierarchy, nil).Times(2)
+			},
+			wantFunc: func(
+				e *WatchListManager, entityType dcgm.Field_Entity_Group, deviceFields,
+				labelDeviceFields []dcgm.Short, watcher *mockdevicewatcher.MockWatcher, collectInterval int64,
+			) map[dcgm.Field_Entity_Group]WatchList {
+				watchList := make(map[dcgm.Field_Entity_Group]WatchList)
+
+				mockDeviceInfo, _ := deviceinfo.Initialize(e.gOpts, e.sOpts, e.cOpts, e.useFakeGPUs, entityType)
+				watchList[entityType] = *NewWatchList(mockDeviceInfo, deviceFields, labelDeviceFields, watcher,
+					collectInterval)
+
+				return watchList
+			},
+		},
+		{
+			name: "Multiple Type WatchList",
+			fields: fields{
+				entityWatchLists: map[dcgm.Field_Entity_Group]WatchList{
+					dcgm.FE_GPU: {
+						deviceInfo:        &deviceinfo.Info{},
+						deviceFields:      []dcgm.Short{10, 20, 30},
+						labelDeviceFields: []dcgm.Short{100, 200, 300},
+						watcher:           nil,
+						collectInterval:   10000,
+					},
+					dcgm.FE_CPU: {
+						deviceInfo:        &deviceinfo.Info{},
+						deviceFields:      []dcgm.Short{11, 21, 31},
+						labelDeviceFields: []dcgm.Short{110, 210, 310},
+						watcher:           nil,
+						collectInterval:   10000,
+					},
+				},
+				entityWatchListsCount: 2,
+				counters:              testutils.SampleCounters,
+				gOpts:                 deviceOptionFalse,
+				sOpts:                 deviceOptionTrue,
+				cOpts:                 deviceOptionOther,
+				useFakeGPUs:           false,
+			},
+			args: args{
+				entityType:      dcgm.FE_GPU,
+				watcher:         mockdevicewatcher.NewMockWatcher(ctrl),
+				collectInterval: 1,
+			},
+			deviceFields: testutils.SampleGPUFieldIDs,
+			mockFunc: func(
+				watcher *mockdevicewatcher.MockWatcher, counters, labelCounters counters.CounterList,
+				entityType dcgm.Field_Entity_Group, deviceFields, labelDeviceFields []dcgm.Short,
+			) {
+				watcher.EXPECT().GetDeviceFields(counters, entityType).Return(deviceFields)
+				watcher.EXPECT().GetDeviceFields(labelCounters, entityType).Return(labelDeviceFields)
+
+				fakeDevices := deviceinfo.SpoofGPUDevices()
+				_, fakeGPUs, _, _ := deviceinfo.SpoofMigHierarchy()
+
+				mockHierarchy := dcgm.MigHierarchy_v2{
+					Count: 1,
+				}
+				mockHierarchy.EntityList[0] = fakeGPUs[0]
+
+				// Times 2 because the wantFunc is also calling the same method
+				mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(1), nil).Times(2)
+				mockDCGMProvider.EXPECT().GetDeviceInfo(gomock.Any()).Return(fakeDevices[0], nil).Times(2)
+				mockDCGMProvider.EXPECT().GetGpuInstanceHierarchy().Return(mockHierarchy, nil).Times(2)
+			},
+			wantFunc: func(
+				e *WatchListManager, entityType dcgm.Field_Entity_Group, deviceFields,
+				labelDeviceFields []dcgm.Short, watcher *mockdevicewatcher.MockWatcher, collectInterval int64,
+			) map[dcgm.Field_Entity_Group]WatchList {
+				watchList := make(map[dcgm.Field_Entity_Group]WatchList)
+				for entity, existingWatchList := range e.entityWatchLists {
+					watchList[entity] = existingWatchList
+				}
+
+				mockDeviceInfo, _ := deviceinfo.Initialize(e.gOpts, e.sOpts, e.cOpts, e.useFakeGPUs, entityType)
+				watchList[entityType] = *NewWatchList(mockDeviceInfo, deviceFields, labelDeviceFields, watcher,
+					collectInterval)
+
+				return watchList
+			},
+		},
+		{
+			name: "Multiple Type WatchList and different type",
+			fields: fields{
+				entityWatchLists: map[dcgm.Field_Entity_Group]WatchList{
+					dcgm.FE_SWITCH: {
+						deviceInfo:        &deviceinfo.Info{},
+						deviceFields:      []dcgm.Short{10, 20, 30},
+						labelDeviceFields: []dcgm.Short{100, 200, 300},
+						watcher:           nil,
+						collectInterval:   10000,
+					},
+					dcgm.FE_CPU: {
+						deviceInfo:        &deviceinfo.Info{},
+						deviceFields:      []dcgm.Short{11, 21, 31},
+						labelDeviceFields: []dcgm.Short{110, 210, 310},
+						watcher:           nil,
+						collectInterval:   10000,
+					},
+				},
+				entityWatchListsCount: 3,
+				counters:              testutils.SampleCounters,
+				gOpts:                 deviceOptionFalse,
+				sOpts:                 deviceOptionTrue,
+				cOpts:                 deviceOptionOther,
+				useFakeGPUs:           false,
+			},
+			args: args{
+				entityType:      dcgm.FE_GPU,
+				watcher:         mockdevicewatcher.NewMockWatcher(ctrl),
+				collectInterval: 1,
+			},
+			deviceFields: testutils.SampleGPUFieldIDs,
+			mockFunc: func(
+				watcher *mockdevicewatcher.MockWatcher, counters, labelCounters counters.CounterList,
+				entityType dcgm.Field_Entity_Group, deviceFields, labelDeviceFields []dcgm.Short,
+			) {
+				watcher.EXPECT().GetDeviceFields(counters, entityType).Return(deviceFields)
+				watcher.EXPECT().GetDeviceFields(labelCounters, entityType).Return(labelDeviceFields)
+
+				fakeDevices := deviceinfo.SpoofGPUDevices()
+				_, fakeGPUs, _, _ := deviceinfo.SpoofMigHierarchy()
+
+				mockHierarchy := dcgm.MigHierarchy_v2{
+					Count: 1,
+				}
+				mockHierarchy.EntityList[0] = fakeGPUs[0]
+
+				// Times 2 because the wantFunc is also calling the same method
+				mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(1), nil).Times(2)
+				mockDCGMProvider.EXPECT().GetDeviceInfo(gomock.Any()).Return(fakeDevices[0], nil).Times(2)
+				mockDCGMProvider.EXPECT().GetGpuInstanceHierarchy().Return(mockHierarchy, nil).Times(2)
+			},
+			wantFunc: func(
+				e *WatchListManager, entityType dcgm.Field_Entity_Group, deviceFields,
+				labelDeviceFields []dcgm.Short, watcher *mockdevicewatcher.MockWatcher, collectInterval int64,
+			) map[dcgm.Field_Entity_Group]WatchList {
+				watchList := make(map[dcgm.Field_Entity_Group]WatchList)
+				for entity, existingWatchList := range e.entityWatchLists {
+					watchList[entity] = existingWatchList
+				}
+
+				mockDeviceInfo, _ := deviceinfo.Initialize(e.gOpts, e.sOpts, e.cOpts, e.useFakeGPUs, entityType)
+				watchList[entityType] = *NewWatchList(mockDeviceInfo, deviceFields, labelDeviceFields, watcher,
+					collectInterval)
+
+				return watchList
+			},
+		},
+		{
+			name: "Device Info initialize error",
+			fields: fields{
+				entityWatchLists: make(map[dcgm.Field_Entity_Group]WatchList),
+				counters:         testutils.SampleCounters,
+				gOpts:            deviceOptionFalse,
+				sOpts:            deviceOptionTrue,
+				cOpts:            deviceOptionOther,
+				useFakeGPUs:      false,
+			},
+			args: args{
+				entityType:      dcgm.FE_GPU,
+				watcher:         mockdevicewatcher.NewMockWatcher(ctrl),
+				collectInterval: 1,
+			},
+			deviceFields: testutils.SampleGPUFieldIDs,
+			mockFunc: func(
+				watcher *mockdevicewatcher.MockWatcher, counters, labelCounters counters.CounterList,
+				entityType dcgm.Field_Entity_Group, deviceFields, labelDeviceFields []dcgm.Short,
+			) {
+				watcher.EXPECT().GetDeviceFields(counters, entityType).Return(deviceFields)
+				watcher.EXPECT().GetDeviceFields(labelCounters, entityType).Return(labelDeviceFields)
+
+				mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(0), fmt.Errorf("some error"))
+			},
+			wantFunc: func(
+				e *WatchListManager, entityType dcgm.Field_Entity_Group, deviceFields,
+				labelDeviceFields []dcgm.Short, watcher *mockdevicewatcher.MockWatcher, collectInterval int64,
+			) map[dcgm.Field_Entity_Group]WatchList {
+				return nil
+			},
+			wantErr: true,
+		},
+		{
+			name: "No GPU WatchList",
+			fields: fields{
+				entityWatchLists:      make(map[dcgm.Field_Entity_Group]WatchList),
+				entityWatchListsCount: 1,
+				counters:              []counters.Counter{},
+				gOpts:                 deviceOptionFalse,
+				sOpts:                 deviceOptionTrue,
+				cOpts:                 deviceOptionOther,
+				useFakeGPUs:           false,
+			},
+			args: args{
+				entityType:      dcgm.FE_GPU,
+				watcher:         mockdevicewatcher.NewMockWatcher(ctrl),
+				collectInterval: 1,
+			},
+			deviceFields: []dcgm.Short{},
+			mockFunc: func(
+				watcher *mockdevicewatcher.MockWatcher, counters, labelCounters counters.CounterList,
+				entityType dcgm.Field_Entity_Group, deviceFields, labelDeviceFields []dcgm.Short,
+			) {
+				watcher.EXPECT().GetDeviceFields(counters, entityType).Return(deviceFields).Times(1)
+				watcher.EXPECT().GetDeviceFields(labelCounters, entityType).Return(deviceFields).Times(1)
+
+				fakeDevices := deviceinfo.SpoofGPUDevices()
+				_, fakeGPUs, _, _ := deviceinfo.SpoofMigHierarchy()
+
+				mockHierarchy := dcgm.MigHierarchy_v2{
+					Count: 1,
+				}
+				mockHierarchy.EntityList[0] = fakeGPUs[0]
+
+				// Times 2 because the wantFunc is also calling the same method
+				mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(1), nil).Times(2)
+				mockDCGMProvider.EXPECT().GetDeviceInfo(gomock.Any()).Return(fakeDevices[0], nil).Times(2)
+				mockDCGMProvider.EXPECT().GetGpuInstanceHierarchy().Return(mockHierarchy, nil).Times(2)
+			},
+			wantFunc: func(
+				e *WatchListManager,
+				entityType dcgm.Field_Entity_Group,
+				deviceFields,
+				labelDeviceFields []dcgm.Short,
+				watcher *mockdevicewatcher.MockWatcher,
+				collectInterval int64,
+			) map[dcgm.Field_Entity_Group]WatchList {
+				watchList := make(map[dcgm.Field_Entity_Group]WatchList)
+
+				mockDeviceInfo, _ := deviceinfo.Initialize(e.gOpts, e.sOpts, e.cOpts, e.useFakeGPUs, entityType)
+				watchList[entityType] = *NewWatchList(mockDeviceInfo, deviceFields, []dcgm.Short{}, watcher,
+					collectInterval)
+
+				return watchList
+			},
+			wantErr: false,
+		},
+		{
+			name: "Only Driver Version to Watch",
+			fields: fields{
+				entityWatchLists:      make(map[dcgm.Field_Entity_Group]WatchList),
+				entityWatchListsCount: 1,
+				counters:              []counters.Counter{},
+				gOpts:                 deviceOptionFalse,
+				sOpts:                 deviceOptionTrue,
+				cOpts:                 deviceOptionOther,
+				useFakeGPUs:           false,
+			},
+			args: args{
+				entityType:      dcgm.FE_GPU,
+				watcher:         mockdevicewatcher.NewMockWatcher(ctrl),
+				collectInterval: 1,
+			},
+			deviceFields: []dcgm.Short{testutils.SampleDriverVersionCounter.FieldID},
+			mockFunc: func(
+				watcher *mockdevicewatcher.MockWatcher,
+				counters, labelCounters counters.CounterList,
+				entityType dcgm.Field_Entity_Group,
+				deviceFields, labelDeviceFields []dcgm.Short,
+			) {
+				watcher.EXPECT().GetDeviceFields(counters, entityType).Return(deviceFields).Times(1)
+				watcher.EXPECT().GetDeviceFields(labelCounters, entityType).Return(deviceFields).Times(1)
+
+				fakeDevices := deviceinfo.SpoofGPUDevices()
+				_, fakeGPUs, _, _ := deviceinfo.SpoofMigHierarchy()
+
+				mockHierarchy := dcgm.MigHierarchy_v2{
+					Count: 1,
+				}
+				mockHierarchy.EntityList[0] = fakeGPUs[0]
+
+				// Times 2 because the wantFunc is also calling the same method
+				mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(1), nil).Times(2)
+				mockDCGMProvider.EXPECT().GetDeviceInfo(gomock.Any()).Return(fakeDevices[0], nil).Times(2)
+				mockDCGMProvider.EXPECT().GetGpuInstanceHierarchy().Return(mockHierarchy, nil).Times(2)
+			},
+			wantFunc: func(
+				e *WatchListManager,
+				entityType dcgm.Field_Entity_Group,
+				deviceFields,
+				labelDeviceFields []dcgm.Short,
+				watcher *mockdevicewatcher.MockWatcher,
+				collectInterval int64,
+			) map[dcgm.Field_Entity_Group]WatchList {
+				watchList := make(map[dcgm.Field_Entity_Group]WatchList)
+
+				mockDeviceInfo, _ := deviceinfo.Initialize(e.gOpts, e.sOpts, e.cOpts, e.useFakeGPUs, entityType)
+				watchList[entityType] = *NewWatchList(mockDeviceInfo, deviceFields, labelDeviceFields, watcher,
+					collectInterval)
+
+				return watchList
+			},
+			wantErr: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			e := &WatchListManager{
+				entityWatchLists: tt.fields.entityWatchLists,
+				counters:         tt.fields.counters,
+				gOpts:            tt.fields.gOpts,
+				sOpts:            tt.fields.sOpts,
+				cOpts:            tt.fields.cOpts,
+				useFakeGPUs:      tt.fields.useFakeGPUs,
+			}
+
+			tt.mockFunc(
+				tt.args.watcher,
+				tt.fields.counters,
+				tt.fields.counters.LabelCounters(),
+				tt.args.entityType,
+				tt.deviceFields,
+				[]dcgm.Short{testutils.SampleDriverVersionCounter.FieldID},
+			)
+
+			want := tt.wantFunc(
+				e,
+				tt.args.entityType,
+				tt.deviceFields,
+				[]dcgm.Short{testutils.SampleDriverVersionCounter.FieldID},
+				tt.args.watcher,
+				tt.args.collectInterval,
+			)
+
+			err := e.CreateEntityWatchList(tt.args.entityType, tt.args.watcher, tt.args.collectInterval)
+			got := e.entityWatchLists
+			gotEntityWatchList, exist := e.EntityWatchList(tt.args.entityType)
+
+			if !tt.wantErr {
+				assert.Nil(t, err, "expected no error")
+				wantEntityWatchList := want[tt.args.entityType]
+
+				assert.True(t, exist, "expected entity to exist")
+				assert.Equal(t, want, got, "expected output to be equal")
+				assert.Equal(t, tt.fields.entityWatchListsCount, len(got),
+					"expected entityWatchLists count to be equal")
+				assert.Equal(t, wantEntityWatchList, gotEntityWatchList, "expected entity results to be equal")
+			} else {
+				assert.NotNil(t, err, "expected an error.")
+				assert.Equal(t, 0, len(got), "expected output to be zero")
+				assert.False(t, exist, "expected entity to not exist")
+			}
+		})
+	}
+}
+
+func TestWatchListManager_EntityWatchList(t *testing.T) {
+	tests := []struct {
+		name             string
+		deviceType       dcgm.Field_Entity_Group
+		entityWatchLists map[dcgm.Field_Entity_Group]WatchList
+		wantWatchList    WatchList
+		wantExist        bool
+		override         bool
+	}{
+		{
+			name:       "Get GPU WatchList",
+			deviceType: dcgm.FE_GPU,
+			entityWatchLists: map[dcgm.Field_Entity_Group]WatchList{
+				dcgm.FE_GPU: {
+					deviceInfo:        &deviceinfo.Info{},
+					deviceFields:      []dcgm.Short{10, 20, 30},
+					labelDeviceFields: []dcgm.Short{100, 200, 300},
+					watcher:           nil,
+					collectInterval:   10000,
+				},
+			},
+			wantWatchList: WatchList{
+				deviceInfo:        &deviceinfo.Info{},
+				deviceFields:      []dcgm.Short{10, 20, 30},
+				labelDeviceFields: []dcgm.Short{100, 200, 300},
+				watcher:           nil,
+				collectInterval:   10000,
+			},
+			wantExist: true,
+		},
+		{
+			name:       "Get latest GPU WatchList",
+			deviceType: dcgm.FE_GPU,
+			entityWatchLists: map[dcgm.Field_Entity_Group]WatchList{
+				dcgm.FE_GPU: {
+					deviceInfo:        &deviceinfo.Info{},
+					deviceFields:      []dcgm.Short{10, 20, 30},
+					labelDeviceFields: []dcgm.Short{100, 200, 300},
+					watcher:           nil,
+					collectInterval:   10000,
+				},
+			},
+			wantWatchList: WatchList{
+				deviceInfo:        &deviceinfo.Info{},
+				deviceFields:      []dcgm.Short{101, 201, 301},
+				labelDeviceFields: []dcgm.Short{1001, 2001, 3001},
+				watcher:           nil,
+				collectInterval:   10000,
+			},
+			wantExist: true,
+			override:  true,
+		},
+		{
+			name:             "Empty WatchList",
+			deviceType:       dcgm.FE_GPU,
+			entityWatchLists: map[dcgm.Field_Entity_Group]WatchList{},
+			wantWatchList:    WatchList{},
+			wantExist:        false,
+		},
+		{
+			name:       "Get GPU WatchList when only CPU Entity exist",
+			deviceType: dcgm.FE_GPU,
+			entityWatchLists: map[dcgm.Field_Entity_Group]WatchList{
+				dcgm.FE_CPU: {
+					deviceInfo:        &deviceinfo.Info{},
+					deviceFields:      []dcgm.Short{10, 20, 30},
+					labelDeviceFields: []dcgm.Short{100, 200, 300},
+					watcher:           nil,
+					collectInterval:   10000,
+				},
+			},
+			wantWatchList: WatchList{},
+			wantExist:     false,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			e := &WatchListManager{
+				entityWatchLists: tt.entityWatchLists,
+			}
+
+			if tt.override {
+				e.entityWatchLists[tt.deviceType] = tt.wantWatchList
+			}
+
+			gotEntityWatchList, exist := e.EntityWatchList(tt.deviceType)
+			assert.Equal(t, tt.wantExist, exist, "expected entity exist value to be equal")
+			assert.Equal(t, tt.wantWatchList, gotEntityWatchList, "expected output to be equal")
+		})
+	}
+}
diff --git a/internal/pkg/devicewatchlistmanager/types.go b/internal/pkg/devicewatchlistmanager/types.go
new file mode 100644
index 00000000..e856fd81
--- /dev/null
+++ b/internal/pkg/devicewatchlistmanager/types.go
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//go:generate go run -v go.uber.org/mock/mockgen  -destination=../../mocks/pkg/devicewatchlistmanager/mock_device_watchlist_manager.go -package=devicewatchlistmanager -copyright_file=../../../hack/header.txt . Manager
+
+package devicewatchlistmanager
+
+import (
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatcher"
+)
+
+type Manager interface {
+	CreateEntityWatchList(dcgm.Field_Entity_Group, devicewatcher.Watcher, int64) error
+	EntityWatchList(dcgm.Field_Entity_Group) (WatchList, bool)
+}
diff --git a/internal/pkg/elf/README.md b/internal/pkg/elf/README.md
new file mode 100644
index 00000000..fa1e8378
--- /dev/null
+++ b/internal/pkg/elf/README.md
@@ -0,0 +1,3 @@
+# Exec - wrapper package for system debug/elf package
+
+The package allows to mock debug/elf package functions for testing purposes.
diff --git a/internal/pkg/elf/elf.go b/internal/pkg/elf/elf.go
new file mode 100644
index 00000000..a547943b
--- /dev/null
+++ b/internal/pkg/elf/elf.go
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package elf
+
+import (
+	"debug/elf"
+)
+
+var _ ELF = (*RealELF)(nil)
+
+type RealELF struct{}
+
+func (r RealELF) Open(name string) (*elf.File, error) {
+	return elf.Open(name)
+}
diff --git a/internal/pkg/elf/types.go b/internal/pkg/elf/types.go
new file mode 100644
index 00000000..f55cf498
--- /dev/null
+++ b/internal/pkg/elf/types.go
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package elf
+
+import "debug/elf"
+
+//go:generate go run -v go.uber.org/mock/mockgen  -destination=../../mocks/pkg/elf/mock_elf.go -package=elf -copyright_file=../../../hack/header.txt . ELF
+type ELF interface {
+	Open(name string) (*elf.File, error)
+}
diff --git a/internal/pkg/exec/README.md b/internal/pkg/exec/README.md
new file mode 100644
index 00000000..85171b19
--- /dev/null
+++ b/internal/pkg/exec/README.md
@@ -0,0 +1,3 @@
+# Exec - wrapper package for system os/exec package
+
+The package allows to mock os/exec package functions for testing purposes.
diff --git a/internal/pkg/exec/exec.go b/internal/pkg/exec/exec.go
new file mode 100644
index 00000000..1af40c62
--- /dev/null
+++ b/internal/pkg/exec/exec.go
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package exec
+
+import "os/exec"
+
+//go:generate go run -v go.uber.org/mock/mockgen  -destination=../../mocks/pkg/exec/mock_exec.go -package=exec -copyright_file=../../../hack/header.txt . Exec
+type Exec interface {
+	Command(name string, arg ...string) Cmd
+}
+
+//go:generate go run -v go.uber.org/mock/mockgen  -destination=../../mocks/pkg/exec/mock_cmd.go -package=exec -copyright_file=../../../hack/header.txt . Cmd
+type Cmd interface {
+	Output() ([]byte, error)
+}
+
+var (
+	_ Exec = (*RealExec)(nil)
+	_ Cmd  = (*RealCmd)(nil)
+)
+
+type RealExec struct{}
+
+func (r RealExec) Command(name string, arg ...string) Cmd {
+	return &RealCmd{cmd: exec.Command(name, arg...)}
+}
+
+type RealCmd struct {
+	cmd *exec.Cmd
+}
+
+func (r *RealCmd) Output() ([]byte, error) {
+	return r.cmd.Output()
+}
diff --git a/internal/pkg/hostname/hostname.go b/internal/pkg/hostname/hostname.go
new file mode 100644
index 00000000..c714929d
--- /dev/null
+++ b/internal/pkg/hostname/hostname.go
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package hostname
+
+import (
+	"net"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	osinterface "github.com/NVIDIA/dcgm-exporter/internal/pkg/os"
+)
+
+var os osinterface.OS = osinterface.RealOS{}
+
+// GetHostname return a hostname where metric was collected.
+func GetHostname(config *appconfig.Config) (string, error) {
+	if config.UseRemoteHE {
+		return parseRemoteHostname(config)
+	}
+	return getLocalHostname()
+}
+
+func parseRemoteHostname(config *appconfig.Config) (string, error) {
+	// Extract the hostname or IP address part from the appconfig.RemoteHEInfo
+	// This handles inputs like "localhost:5555", "example.com:5555", or "192.168.1.1:5555"
+	host, _, err := net.SplitHostPort(config.RemoteHEInfo)
+	if err != nil {
+		// If there's an error, it might be because there's no port in the appconfig.RemoteHEInfo
+		// In that case, use the appconfig.RemoteHEInfo as is
+		host = config.RemoteHEInfo
+	}
+	return host, nil
+}
+
+func getLocalHostname() (string, error) {
+	if nodeName := os.Getenv("NODE_NAME"); nodeName != "" {
+		return nodeName, nil
+	}
+	hostname, err := os.Hostname()
+	if err != nil {
+		return "", err
+	}
+	return hostname, nil
+}
diff --git a/internal/pkg/hostname/hostname_test.go b/internal/pkg/hostname/hostname_test.go
new file mode 100644
index 00000000..78c9afc8
--- /dev/null
+++ b/internal/pkg/hostname/hostname_test.go
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package hostname
+
+import (
+	"errors"
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"go.uber.org/mock/gomock"
+
+	osmock "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/os"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	osinterface "github.com/NVIDIA/dcgm-exporter/internal/pkg/os"
+)
+
+func TestGetHostname(t *testing.T) {
+	tests := []struct {
+		name    string
+		config  *appconfig.Config
+		hook    func() func()
+		want    string
+		wantErr assert.ErrorAssertionFunc
+	}{
+		{
+			name:   "When os.Hostname() return hostname",
+			config: &appconfig.Config{UseRemoteHE: false},
+			hook: func() func() {
+				ctrl := gomock.NewController(t)
+				m := osmock.NewMockOS(ctrl)
+				m.EXPECT().Getenv(gomock.Eq("NODE_NAME"))
+				m.EXPECT().Hostname().Return("test-hostname", nil).AnyTimes()
+				os = m
+				return func() {
+					os = osinterface.RealOS{}
+				}
+			},
+			want: "test-hostname",
+		},
+		{
+			name:   "When GetHostname uses the NODE_NAME env variable",
+			config: &appconfig.Config{UseRemoteHE: false},
+			hook: func() func() {
+				ctrl := gomock.NewController(t)
+				m := osmock.NewMockOS(ctrl)
+				m.EXPECT().Getenv(gomock.Eq("NODE_NAME")).Return("test-hostname")
+				os = m
+				return func() {
+					os = osinterface.RealOS{}
+				}
+			},
+			want: "test-hostname",
+		},
+		{
+			name:   "When os.Hostname() return error",
+			config: &appconfig.Config{UseRemoteHE: false},
+			hook: func() func() {
+				ctrl := gomock.NewController(t)
+				m := osmock.NewMockOS(ctrl)
+				m.EXPECT().Getenv(gomock.Eq("NODE_NAME"))
+				m.EXPECT().Hostname().Return("", errors.New("Boom!")).AnyTimes()
+				os = m
+				return func() {
+					os = osinterface.RealOS{}
+				}
+			},
+			want: "",
+		},
+		{
+			name:   "When os.Hostname() return error",
+			config: &appconfig.Config{UseRemoteHE: false},
+			hook: func() func() {
+				ctrl := gomock.NewController(t)
+				m := osmock.NewMockOS(ctrl)
+				m.EXPECT().Getenv(gomock.Eq("NODE_NAME"))
+				m.EXPECT().Hostname().Return("", errors.New("Boom!")).AnyTimes()
+				os = m
+				return func() {
+					os = osinterface.RealOS{}
+				}
+			},
+			want:    "",
+			wantErr: assert.Error,
+		},
+		{
+			name: "When appconfig.UseRemoteHE is true and remote hostname is name",
+			config: &appconfig.Config{
+				UseRemoteHE:  true,
+				RemoteHEInfo: "example.com:5555",
+			},
+			want: "example.com",
+		},
+		{
+			name: "When appconfig.UseRemoteHE is true and hostname is IP address",
+			config: &appconfig.Config{
+				UseRemoteHE:  true,
+				RemoteHEInfo: "127.0.0.1",
+			},
+			want: "127.0.0.1",
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if tt.hook != nil {
+				cleanup := tt.hook()
+				defer cleanup()
+			}
+			got, err := GetHostname(tt.config)
+			if tt.wantErr != nil && !tt.wantErr(t, err, fmt.Sprintf("GetHostname(%v)", tt.config)) {
+				return
+			}
+			assert.Equalf(t, tt.want, got, "GetHostname(%v)", tt.config)
+		})
+	}
+}
diff --git a/internal/pkg/integration_test/collector_test.go b/internal/pkg/integration_test/collector_test.go
new file mode 100644
index 00000000..357bbdf9
--- /dev/null
+++ b/internal/pkg/integration_test/collector_test.go
@@ -0,0 +1,1117 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package integration_test
+
+import (
+	"bytes"
+	"fmt"
+	"reflect"
+	"slices"
+	"strconv"
+	"testing"
+	"time"
+
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+	io_prometheus_client "github.com/prometheus/client_model/go"
+	"github.com/prometheus/common/expfmt"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"go.uber.org/mock/gomock"
+	"google.golang.org/grpc"
+	"k8s.io/kubelet/pkg/apis/podresources/v1alpha1"
+	"k8s.io/utils/ptr"
+
+	mockdcgmprovider "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/dcgmprovider"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/collector"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/dcgmprovider"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatcher"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatchlistmanager"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/rendermetrics"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/testutils"
+)
+
+var deviceWatcher = devicewatcher.NewDeviceWatcher()
+
+var expectedGPUMetrics = map[string]bool{
+	testutils.SampleGPUTempCounter.FieldName:           true,
+	testutils.SampleGPUTotalEnergyCounter.FieldName:    true,
+	testutils.SampleGPUPowerUsageCounter.FieldName:     true,
+	testutils.SampleVGPULicenseStatusCounter.FieldName: true,
+}
+
+var expectedCPUMetrics = map[string]bool{
+	testutils.SampleCPUUtilTotalCounter.FieldName: true,
+}
+
+func setupTest() func() {
+	config := &appconfig.Config{
+		UseRemoteHE: false,
+	}
+
+	dcgmprovider.Initialize(config)
+
+	return func() {
+		defer dcgmprovider.Client().Cleanup()
+	}
+}
+
+func runOnlyWithLiveGPUs(t *testing.T) {
+	t.Helper()
+	gpus, err := dcgmprovider.Client().GetSupportedDevices()
+	assert.NoError(t, err)
+	if len(gpus) < 1 {
+		t.Skip("Skipping test that requires live GPUs. None were found")
+	}
+}
+
+func mockDCGM(ctrl *gomock.Controller) *mockdcgmprovider.MockDCGM {
+	// Mock results outputs
+	mockDevice := dcgm.Device{
+		GPU:  0,
+		UUID: "fake1",
+		PCI: dcgm.PCIInfo{
+			BusID: "00000000:0000:0000.0",
+		},
+	}
+
+	mockMigHierarchy := dcgm.MigHierarchy_v2{
+		Count: 0,
+	}
+
+	mockCPUHierarchy := dcgm.CpuHierarchy_v1{
+		Version: 0,
+		NumCpus: 1,
+		Cpus: [dcgm.MAX_NUM_CPUS]dcgm.CpuHierarchyCpu_v1{
+			{
+				CpuId:      0,
+				OwnedCores: []uint64{0, 18446744073709551360, 65535},
+			},
+		},
+	}
+
+	mockGroupHandle := dcgm.GroupHandle{}
+	mockGroupHandle.SetHandle(1)
+
+	mockFieldHandle := dcgm.FieldHandle{}
+	mockFieldHandle.SetHandle(1)
+
+	mockDCGMProvider := mockdcgmprovider.NewMockDCGM(ctrl)
+	mockDCGMProvider.EXPECT().GetAllDeviceCount().Return(uint(1), nil).AnyTimes()
+	mockDCGMProvider.EXPECT().AddEntityToGroup(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).AnyTimes()
+	mockDCGMProvider.EXPECT().GetGpuInstanceHierarchy().Return(mockMigHierarchy, nil).AnyTimes()
+	mockDCGMProvider.EXPECT().GetCpuHierarchy().Return(mockCPUHierarchy, nil).AnyTimes()
+	mockDCGMProvider.EXPECT().CreateGroup(gomock.Any()).Return(mockGroupHandle, nil).AnyTimes()
+	mockDCGMProvider.EXPECT().DestroyGroup(gomock.Any()).Return(nil).AnyTimes()
+	mockDCGMProvider.EXPECT().FieldGroupCreate(gomock.Any(), gomock.Any()).Return(mockFieldHandle, nil).AnyTimes()
+	mockDCGMProvider.EXPECT().FieldGroupDestroy(gomock.Any()).Return(nil).AnyTimes()
+	mockDCGMProvider.EXPECT().WatchFieldsWithGroupEx(gomock.Any(), gomock.Any(), gomock.Any(),
+		gomock.Any(), gomock.Any()).Return(nil).AnyTimes()
+	mockDCGMProvider.EXPECT().GetDeviceInfo(gomock.Any()).Return(mockDevice, nil).AnyTimes()
+
+	return mockDCGMProvider
+}
+
+func TestClockEventsCollector_NewClocksThrottleReasonsCollector(t *testing.T) {
+	config := &appconfig.Config{
+		UseRemoteHE: false,
+		GPUDeviceOptions: appconfig.DeviceOptions{
+			Flex:       true,
+			MajorRange: []int{-1},
+			MinorRange: []int{-1},
+		},
+	}
+
+	dcgmprovider.Initialize(config)
+	defer dcgmprovider.Client().Cleanup()
+
+	allCounters := []counters.Counter{
+		{
+			FieldID: dcgm.DCGM_FI_DEV_CLOCKS_EVENT_REASONS,
+		},
+	}
+
+	deviceWatchListManager := devicewatchlistmanager.NewWatchListManager(allCounters, config)
+	err := deviceWatchListManager.CreateEntityWatchList(dcgm.FE_GPU, deviceWatcher,
+		int64(config.CollectInterval))
+	require.NoError(t, err)
+	item, _ := deviceWatchListManager.EntityWatchList(dcgm.FE_GPU)
+
+	t.Run("Should Return Error When DCGM_EXP_CLOCK_EVENTS_COUNT is not present", func(t *testing.T) {
+		records := [][]string{
+			{"DCGM_FI_DRIVER_VERSION", "label", "Driver Version"},
+		}
+		cc, err := counters.ExtractCounters(records, config)
+		require.NoError(t, err)
+		require.Len(t, cc.ExporterCounters, 0)
+		require.Len(t, cc.DCGMCounters, 1)
+		clockEventCollector, err := collector.NewClockEventsCollector(cc.DCGMCounters, "", config, item)
+		require.Error(t, err)
+		require.Nil(t, clockEventCollector)
+	})
+
+	t.Run("Should Return Error When Counter Param Is Empty", func(t *testing.T) {
+		counterList := make([]counters.Counter, 0)
+		clockEventCollector, err := collector.NewClockEventsCollector(counterList, "", config, item)
+		require.Error(t, err)
+		require.Nil(t, clockEventCollector)
+	})
+
+	t.Run("Should Not Return Error When DCGM_EXP_CLOCK_EVENTS_COUNT Present More Than Once", func(t *testing.T) {
+		records := [][]string{
+			{"DCGM_FI_DRIVER_VERSION", "label", "Driver Version"},
+			{"DCGM_EXP_CLOCK_EVENTS_COUNT", "gauge", ""},
+			{"DCGM_EXP_CLOCK_EVENTS_COUNT", "gauge", ""},
+			{"DCGM_EXP_CLOCK_EVENTS_COUNT", "gauge", ""},
+		}
+		cc, err := counters.ExtractCounters(records, config)
+		require.NoError(t, err)
+		for i := range cc.DCGMCounters {
+			if cc.DCGMCounters[i].PromType == "label" {
+				cc.ExporterCounters = append(cc.ExporterCounters, cc.DCGMCounters[i])
+			}
+		}
+		clockEventCollector, err := collector.NewClockEventsCollector(cc.ExporterCounters, "", config, item)
+		require.NoError(t, err)
+		require.NotNil(t, clockEventCollector)
+	})
+}
+
+func TestClockEventsCollector_Gather(t *testing.T) {
+	teardownTest := setupTest()
+	defer teardownTest()
+	runOnlyWithLiveGPUs(t)
+	testutils.RequireLinux(t)
+
+	hostname := "local-test"
+	config := &appconfig.Config{
+		GPUDeviceOptions: appconfig.DeviceOptions{
+			Flex:       true,
+			MajorRange: []int{-1},
+			MinorRange: []int{-1},
+		},
+		ClockEventsCountWindowSize: int(time.Duration(5) * time.Minute),
+	}
+
+	records := [][]string{
+		{"DCGM_EXP_CLOCK_EVENTS_COUNT", "gauge", ""},
+		{"DCGM_FI_DRIVER_VERSION", "label", "Driver Version"},
+	}
+
+	cc, err := counters.ExtractCounters(records, config)
+	require.NoError(t, err)
+	require.Len(t, cc.ExporterCounters, 1)
+	require.Len(t, cc.DCGMCounters, 1)
+
+	cc.ExporterCounters = append(cc.ExporterCounters, cc.DCGMCounters.LabelCounters()...)
+
+	// Create fake GPU
+	numGPUs, err := dcgmprovider.Client().GetAllDeviceCount()
+	require.NoError(t, err)
+
+	if numGPUs+1 > dcgm.MAX_NUM_DEVICES {
+		t.Skipf("Unable to add fake GPU with more than %d gpus", dcgm.MAX_NUM_DEVICES)
+	}
+
+	entityList := []dcgm.MigHierarchyInfo{
+		{Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU}},
+		{Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU}},
+		{Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU}},
+	}
+
+	gpuIDs, err := dcgmprovider.Client().CreateFakeEntities(entityList)
+	require.NoError(t, err)
+	require.NotEmpty(t, gpuIDs)
+
+	type clockEventsCountExpectation map[string]string
+	expectations := map[string]clockEventsCountExpectation{}
+
+	for i, gpuID := range gpuIDs {
+		err = dcgmprovider.Client().InjectFieldValue(gpuID,
+			dcgm.DCGM_FI_DEV_CLOCKS_EVENT_REASONS,
+			dcgm.DCGM_FT_INT64,
+			0,
+			time.Now().Add(-time.Duration(i)*time.Second).UnixMicro(),
+			int64(collector.DCGM_CLOCKS_THROTTLE_REASON_SW_THERMAL|collector.DCGM_CLOCKS_THROTTLE_REASON_HW_THERMAL),
+		)
+		require.NoError(t, err)
+
+		err = dcgmprovider.Client().InjectFieldValue(gpuID,
+			dcgm.DCGM_FI_DEV_CLOCKS_EVENT_REASONS,
+			dcgm.DCGM_FT_INT64,
+			0,
+			time.Now().Add(-time.Duration(i)*time.Second).UnixMicro(),
+			int64(collector.DCGM_CLOCKS_THROTTLE_REASON_SW_THERMAL|collector.DCGM_CLOCKS_THROTTLE_REASON_HW_THERMAL),
+		)
+		require.NoError(t, err)
+
+		err = dcgmprovider.Client().InjectFieldValue(gpuID,
+			dcgm.DCGM_FI_DEV_CLOCKS_EVENT_REASONS,
+			dcgm.DCGM_FT_INT64,
+			0,
+			time.Now().Add(-time.Duration(i)*time.Second).UnixMicro(),
+			int64(collector.DCGM_CLOCKS_THROTTLE_REASON_GPU_IDLE),
+		)
+		require.NoError(t, err)
+
+		expectations[fmt.Sprint(gpuID)] = clockEventsCountExpectation{
+			collector.DCGM_CLOCKS_THROTTLE_REASON_SW_THERMAL.String(): "2",
+			collector.DCGM_CLOCKS_THROTTLE_REASON_HW_THERMAL.String(): "2",
+			collector.DCGM_CLOCKS_THROTTLE_REASON_GPU_IDLE.String():   "1",
+		}
+	}
+
+	// Create a fake K8S to emulate work on K8S environment
+	tmpDir, cleanup := testutils.CreateTmpDir(t)
+	defer cleanup()
+	socketPath := tmpDir + "/kubelet.sock"
+	server := grpc.NewServer()
+
+	gpuIDsAsString := make([]string, len(gpuIDs))
+
+	for i, g := range gpuIDs {
+		gpuIDsAsString[i] = fmt.Sprint(g)
+	}
+
+	v1alpha1.RegisterPodResourcesListerServer(server,
+		testutils.NewMockPodResourcesServer(appconfig.NvidiaResourceName, gpuIDsAsString))
+	// Tell that the app is running on K8S
+	config.Kubernetes = true
+	config.PodResourcesKubeletSocket = socketPath
+
+	allCounters := []counters.Counter{
+		{
+			FieldID: dcgm.DCGM_FI_DEV_CLOCKS_EVENT_REASONS,
+		},
+	}
+
+	allCounters = append(allCounters, cc.ExporterCounters.LabelCounters()...)
+
+	deviceWatchListManager := devicewatchlistmanager.NewWatchListManager(allCounters, config)
+	err = deviceWatchListManager.CreateEntityWatchList(dcgm.FE_GPU, deviceWatcher,
+		int64(config.CollectInterval))
+	require.NoError(t, err)
+
+	item, _ := deviceWatchListManager.EntityWatchList(dcgm.FE_GPU)
+
+	clockEventCollector, err := collector.NewClockEventsCollector(cc.ExporterCounters, hostname, config, item)
+	require.NoError(t, err)
+
+	defer func() {
+		clockEventCollector.Cleanup()
+	}()
+
+	metrics, err := clockEventCollector.GetMetrics()
+	require.NoError(t, err)
+	require.NotEmpty(t, metrics)
+	// We expect 1 metric: DCGM_EXP_CLOCK_EVENTS_COUNT
+	require.Len(t, metrics, 1)
+	// We get metric value with 0 index
+	metricValues := metrics[reflect.ValueOf(metrics).MapKeys()[0].Interface().(counters.Counter)]
+
+	for i := 0; i < len(metricValues); i++ {
+		gpuID, err := strconv.ParseUint(metricValues[i].GPU, 10, 64)
+		if err == nil {
+			if !slices.Contains(gpuIDs, uint(gpuID)) {
+				metricValues = append(metricValues[:i], metricValues[i+1:]...)
+			}
+		}
+	}
+
+	// We expect 9 records, because we have 3 fake GPU and each GPU experienced 3 CLOCK_EVENTS
+	require.Len(t, metricValues, 9)
+	for _, val := range metricValues {
+		require.Contains(t, val.Labels, "window_size_in_ms")
+		require.Equal(t, fmt.Sprint(config.ClockEventsCountWindowSize), val.Labels["window_size_in_ms"])
+		expected, exists := expectations[val.GPU]
+		require.True(t, exists)
+		actualReason, exists := val.Labels["clock_event"]
+		require.True(t, exists)
+		expectedVal, exists := expected[actualReason]
+		require.True(t, exists)
+		require.Equal(t, expectedVal, val.Value)
+	}
+}
+
+func TestClockEventsCollector_Gather_AllTheThings(t *testing.T) {
+	teardownTest := setupTest()
+	defer teardownTest()
+	runOnlyWithLiveGPUs(t)
+
+	hostname := "local-test"
+	config := &appconfig.Config{
+		GPUDeviceOptions: appconfig.DeviceOptions{
+			Flex:       true,
+			MajorRange: []int{-1},
+			MinorRange: []int{-1},
+		},
+		ClockEventsCountWindowSize: int(time.Duration(5) * time.Minute),
+	}
+
+	records := [][]string{
+		{"DCGM_EXP_CLOCK_EVENTS_COUNT", "gauge", ""},
+		{"DCGM_FI_DRIVER_VERSION", "label", "Driver Version"},
+	}
+
+	cc, err := counters.ExtractCounters(records, config)
+	require.NoError(t, err)
+	require.Len(t, cc.ExporterCounters, 1)
+	require.Len(t, cc.DCGMCounters, 1)
+
+	for i := range cc.DCGMCounters {
+		if cc.DCGMCounters[i].PromType == "label" {
+			cc.ExporterCounters = append(cc.ExporterCounters, cc.DCGMCounters[i])
+		}
+	}
+
+	// Create fake GPU
+	numGPUs, err := dcgmprovider.Client().GetAllDeviceCount()
+	require.NoError(t, err)
+
+	if numGPUs+1 > dcgm.MAX_NUM_DEVICES {
+		t.Skipf("Unable to add fake GPU with more than %d gpus", dcgm.MAX_NUM_DEVICES)
+	}
+
+	entityList := []dcgm.MigHierarchyInfo{
+		{Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU}},
+	}
+
+	gpuIDs, err := dcgmprovider.Client().CreateFakeEntities(entityList)
+	require.NoError(t, err)
+	require.NotEmpty(t, gpuIDs)
+
+	type clockThrottleReasonExpectation map[string]string
+	expectations := map[string]clockThrottleReasonExpectation{}
+
+	require.Len(t, gpuIDs, 1)
+	gpuID := gpuIDs[0]
+	err = dcgmprovider.Client().InjectFieldValue(gpuID,
+		dcgm.DCGM_FI_DEV_CLOCKS_EVENT_REASONS,
+		dcgm.DCGM_FT_INT64,
+		0,
+		time.Now().Add(-time.Duration(1)*time.Second).UnixMicro(),
+		int64(collector.DCGM_CLOCKS_THROTTLE_REASON_GPU_IDLE|
+			collector.DCGM_CLOCKS_THROTTLE_REASON_CLOCKS_SETTING|
+			collector.DCGM_CLOCKS_THROTTLE_REASON_SW_POWER_CAP|
+			collector.DCGM_CLOCKS_THROTTLE_REASON_HW_SLOWDOWN|
+			collector.DCGM_CLOCKS_THROTTLE_REASON_SYNC_BOOST|
+			collector.DCGM_CLOCKS_THROTTLE_REASON_SW_THERMAL|
+			collector.DCGM_CLOCKS_THROTTLE_REASON_HW_THERMAL|
+			collector.DCGM_CLOCKS_THROTTLE_REASON_HW_POWER_BRAKE|
+			collector.DCGM_CLOCKS_THROTTLE_REASON_DISPLAY_CLOCKS),
+	)
+
+	require.NoError(t, err)
+
+	expectations[fmt.Sprint(gpuID)] = clockThrottleReasonExpectation{
+		collector.DCGM_CLOCKS_THROTTLE_REASON_GPU_IDLE.String():       "1",
+		collector.DCGM_CLOCKS_THROTTLE_REASON_CLOCKS_SETTING.String(): "1",
+		collector.DCGM_CLOCKS_THROTTLE_REASON_SW_POWER_CAP.String():   "1",
+		collector.DCGM_CLOCKS_THROTTLE_REASON_HW_SLOWDOWN.String():    "1",
+		collector.DCGM_CLOCKS_THROTTLE_REASON_SYNC_BOOST.String():     "1",
+		collector.DCGM_CLOCKS_THROTTLE_REASON_SW_THERMAL.String():     "1",
+		collector.DCGM_CLOCKS_THROTTLE_REASON_HW_THERMAL.String():     "1",
+		collector.DCGM_CLOCKS_THROTTLE_REASON_HW_POWER_BRAKE.String(): "1",
+		collector.DCGM_CLOCKS_THROTTLE_REASON_DISPLAY_CLOCKS.String(): "1",
+	}
+
+	allCounters := []counters.Counter{
+		{
+			FieldID: dcgm.DCGM_FI_DEV_CLOCKS_EVENT_REASONS,
+		},
+	}
+
+	allCounters = append(allCounters, cc.ExporterCounters.LabelCounters()...)
+
+	deviceWatchListManager := devicewatchlistmanager.NewWatchListManager(allCounters, config)
+
+	err = deviceWatchListManager.CreateEntityWatchList(dcgm.FE_GPU, deviceWatcher,
+		int64(config.CollectInterval))
+	require.NoError(t, err)
+
+	item, _ := deviceWatchListManager.EntityWatchList(dcgm.FE_GPU)
+
+	clockEventCollector, err := collector.NewClockEventsCollector(cc.ExporterCounters, hostname, config, item)
+	require.NoError(t, err)
+
+	defer func() {
+		clockEventCollector.Cleanup()
+	}()
+
+	metrics, err := clockEventCollector.GetMetrics()
+	require.NoError(t, err)
+	require.NotEmpty(t, metrics)
+	// We expect 1 metric: DCGM_EXP_CLOCK_EVENTS_COUNT
+	require.Len(t, metrics, 1)
+	// We get metric value with 0 index
+	metricValues := metrics[reflect.ValueOf(metrics).MapKeys()[0].Interface().(counters.Counter)]
+
+	metricValues = getFakeGPUMetrics(metricValues, gpuIDs)
+
+	// Expected 9 metric values, because we injected 9 reasons
+	require.Len(t, metricValues, 9)
+	for _, val := range metricValues {
+		require.Contains(t, val.Labels, "window_size_in_ms")
+		require.Equal(t, fmt.Sprint(config.ClockEventsCountWindowSize), val.Labels["window_size_in_ms"])
+		expected, exists := expectations[val.GPU]
+		require.True(t, exists)
+		actualReason, exists := val.Labels["clock_event"]
+		require.True(t, exists)
+		expectedVal, exists := expected[actualReason]
+		require.True(t, exists)
+		require.Equal(t, expectedVal, val.Value)
+	}
+}
+
+func TestClockEventsCollector_Gather_AllTheThings_WhenNoLabels(t *testing.T) {
+	teardownTest := setupTest()
+	defer teardownTest()
+	runOnlyWithLiveGPUs(t)
+
+	hostname := "local-test"
+	config := &appconfig.Config{
+		GPUDeviceOptions: appconfig.DeviceOptions{
+			Flex:       true,
+			MajorRange: []int{-1},
+			MinorRange: []int{-1},
+		},
+		ClockEventsCountWindowSize: int(time.Duration(5) * time.Minute),
+	}
+
+	records := [][]string{
+		{"DCGM_EXP_CLOCK_EVENTS_COUNT", "gauge", ""},
+	}
+
+	cc, err := counters.ExtractCounters(records, config)
+	require.NoError(t, err)
+	require.Len(t, cc.ExporterCounters, 1)
+	require.Len(t, cc.DCGMCounters, 0)
+
+	// Create fake GPU
+	numGPUs, err := dcgmprovider.Client().GetAllDeviceCount()
+	require.NoError(t, err)
+
+	if numGPUs+1 > dcgm.MAX_NUM_DEVICES {
+		t.Skipf("Unable to add fake GPU with more than %d gpus", dcgm.MAX_NUM_DEVICES)
+	}
+
+	entityList := []dcgm.MigHierarchyInfo{
+		{Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU}},
+	}
+
+	gpuIDs, err := dcgmprovider.Client().CreateFakeEntities(entityList)
+	require.NoError(t, err)
+	require.NotEmpty(t, gpuIDs)
+
+	gpuID := gpuIDs[0]
+	err = dcgmprovider.Client().InjectFieldValue(gpuID,
+		dcgm.DCGM_FI_DEV_CLOCKS_EVENT_REASONS,
+		dcgm.DCGM_FT_INT64,
+		0,
+		time.Now().Add(-time.Duration(1)*time.Second).UnixMicro(),
+		int64(collector.DCGM_CLOCKS_THROTTLE_REASON_GPU_IDLE|
+			collector.DCGM_CLOCKS_THROTTLE_REASON_CLOCKS_SETTING|
+			collector.DCGM_CLOCKS_THROTTLE_REASON_SW_POWER_CAP|
+			collector.DCGM_CLOCKS_THROTTLE_REASON_HW_SLOWDOWN|
+			collector.DCGM_CLOCKS_THROTTLE_REASON_SYNC_BOOST|
+			collector.DCGM_CLOCKS_THROTTLE_REASON_SW_THERMAL|
+			collector.DCGM_CLOCKS_THROTTLE_REASON_HW_THERMAL|
+			collector.DCGM_CLOCKS_THROTTLE_REASON_HW_POWER_BRAKE|
+			collector.DCGM_CLOCKS_THROTTLE_REASON_DISPLAY_CLOCKS),
+	)
+
+	require.NoError(t, err)
+
+	allCounters := []counters.Counter{
+		{
+			FieldID: dcgm.DCGM_FI_DEV_CLOCKS_EVENT_REASONS,
+		},
+	}
+
+	deviceWatchListManager := devicewatchlistmanager.NewWatchListManager(allCounters, config)
+
+	err = deviceWatchListManager.CreateEntityWatchList(dcgm.FE_GPU, deviceWatcher,
+		int64(config.CollectInterval))
+	require.NoError(t, err)
+
+	item, _ := deviceWatchListManager.EntityWatchList(dcgm.FE_GPU)
+
+	clockEventCollector, err := collector.NewClockEventsCollector(cc.ExporterCounters, hostname, config, item)
+	require.NoError(t, err)
+
+	defer func() {
+		clockEventCollector.Cleanup()
+	}()
+
+	metrics, err := clockEventCollector.GetMetrics()
+	require.NoError(t, err)
+	require.NotEmpty(t, metrics)
+	// We expect 1 metric: DCGM_EXP_CLOCK_EVENTS_COUNT
+	require.Len(t, metrics, 1)
+	// We get metric value with 0 index
+	metricValues := metrics[reflect.ValueOf(metrics).MapKeys()[0].Interface().(counters.Counter)]
+	// Exclude the real GPU from the test
+	metricValues = getFakeGPUMetrics(metricValues, gpuIDs)
+	// Expected 9 metric values, because we injected 9 reasons
+	require.Len(t, metricValues, 9)
+}
+
+func getFakeGPUMetrics(metricValues []collector.Metric, gpuIDs []uint) []collector.Metric {
+	for i := 0; i < len(metricValues); i++ {
+		gpuID, err := strconv.ParseUint(metricValues[i].GPU, 10, 64)
+		if err == nil {
+			if !slices.Contains(gpuIDs, uint(gpuID)) {
+				metricValues = append(metricValues[:i], metricValues[i+1:]...)
+			}
+		}
+	}
+	return metricValues
+}
+
+func TestXIDCollector_Gather_Encode(t *testing.T) {
+	teardownTest := setupTest()
+	defer teardownTest()
+	runOnlyWithLiveGPUs(t)
+
+	hostname := "local-test"
+	config := &appconfig.Config{
+		GPUDeviceOptions: appconfig.DeviceOptions{
+			Flex:       true,
+			MajorRange: []int{-1},
+			MinorRange: []int{-1},
+		},
+		XIDCountWindowSize: int(time.Duration(5) * time.Minute),
+	}
+
+	records := [][]string{
+		{
+			"DCGM_EXP_XID_ERRORS_COUNT",
+			"gauge",
+			"Count of XID Errors within user-specified time window (see xid-count-window-size param).",
+		},
+		{"DCGM_FI_DRIVER_VERSION", "label", "Driver Version"},
+	}
+
+	cc, err := counters.ExtractCounters(records, config)
+	require.NoError(t, err)
+	require.Len(t, cc.ExporterCounters, 1)
+	require.Len(t, cc.DCGMCounters, 1)
+
+	for i := range cc.DCGMCounters {
+		if cc.DCGMCounters[i].PromType == "label" {
+			cc.ExporterCounters = append(cc.ExporterCounters, cc.DCGMCounters[i])
+		}
+	}
+
+	// Get a number of hardware GPUs
+	hardwareGPUs, err := dcgmprovider.Client().GetAllDeviceCount()
+	require.NoError(t, err)
+
+	if hardwareGPUs+1 > dcgm.MAX_NUM_DEVICES {
+		t.Skipf("Unable to add fake GPU with more than %d gpus", dcgm.MAX_NUM_DEVICES)
+	}
+
+	entityList := []dcgm.MigHierarchyInfo{
+		{Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU}},
+		{Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU}},
+		{Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU}},
+	}
+
+	// Create fake GPU
+	fakeGPUIDs, err := dcgmprovider.Client().CreateFakeEntities(entityList)
+	require.NoError(t, err)
+	require.NotEmpty(t, fakeGPUIDs)
+
+	for i, gpuID := range fakeGPUIDs {
+		err = dcgmprovider.Client().InjectFieldValue(gpuID,
+			dcgm.DCGM_FI_DEV_XID_ERRORS,
+			dcgm.DCGM_FT_INT64,
+			0,
+			time.Now().Add(-time.Duration(i)*time.Second).UnixMicro(),
+			int64(42),
+		)
+		require.NoError(t, err)
+
+		err = dcgmprovider.Client().InjectFieldValue(gpuID,
+			dcgm.DCGM_FI_DEV_XID_ERRORS,
+			dcgm.DCGM_FT_INT64,
+			0,
+			time.Now().Add(-time.Duration(i)*time.Second).UnixMicro(),
+			int64(42),
+		)
+		require.NoError(t, err)
+
+		err = dcgmprovider.Client().InjectFieldValue(gpuID,
+			dcgm.DCGM_FI_DEV_XID_ERRORS,
+			dcgm.DCGM_FT_INT64,
+			0,
+			time.Now().Add(-time.Duration(i)*time.Second).UnixMicro(),
+			int64(46),
+		)
+		require.NoError(t, err)
+
+	}
+
+	allCounters := []counters.Counter{
+		{
+			FieldID: dcgm.DCGM_FI_DEV_CLOCKS_EVENT_REASONS,
+		},
+	}
+
+	allCounters = append(allCounters, cc.ExporterCounters.LabelCounters()...)
+
+	deviceWatchListManager := devicewatchlistmanager.NewWatchListManager(allCounters, config)
+	err = deviceWatchListManager.CreateEntityWatchList(dcgm.FE_GPU, deviceWatcher,
+		int64(config.CollectInterval))
+	require.NoError(t, err)
+
+	item, exists := deviceWatchListManager.EntityWatchList(dcgm.FE_GPU)
+	require.True(t, exists)
+
+	xidCollector, err := collector.NewXIDCollector(cc.ExporterCounters, hostname, config, item)
+	require.NoError(t, err)
+
+	defer func() {
+		xidCollector.Cleanup()
+	}()
+
+	metrics, err := xidCollector.GetMetrics()
+	require.NoError(t, err)
+	require.NotEmpty(t, metrics)
+	// We expect 1 metric: DCGM_EXP_XID_ERRORS_COUNT
+	require.Len(t, metrics, 1)
+	// We get metric value with 0 index
+	metricValues := metrics[reflect.ValueOf(metrics).MapKeys()[0].Interface().(counters.Counter)]
+
+	fakeGPUIDMap := map[string]struct{}{}
+	for _, fakeGPUID := range fakeGPUIDs {
+		fakeGPUIDMap[fmt.Sprint(fakeGPUID)] = struct{}{}
+	}
+
+	conditionFakeGPUOnly := func(m collector.Metric) bool {
+		_, exists := fakeGPUIDMap[m.GPU]
+		return exists
+	}
+
+	// We want to filter out physical GPU and keep fake only
+	metricValues = filterMetrics(metricValues, conditionFakeGPUOnly)
+	require.Len(t, metricValues, len(fakeGPUIDs)*2)
+
+	for _, val := range metricValues {
+		require.Contains(t, val.Labels, "window_size_in_ms")
+		require.Equal(t, fmt.Sprint(config.XIDCountWindowSize), val.Labels["window_size_in_ms"])
+	}
+
+	// We inject new error
+	err = dcgmprovider.Client().InjectFieldValue(fakeGPUIDs[0],
+		dcgm.DCGM_FI_DEV_XID_ERRORS,
+		dcgm.DCGM_FT_INT64,
+		0,
+		time.Now().UnixMicro(),
+		int64(19),
+	)
+	require.NoError(t, err)
+
+	// Wait for 1 second
+	time.Sleep(1 * time.Second)
+
+	metrics, err = xidCollector.GetMetrics()
+	require.NoError(t, err)
+	require.NotEmpty(t, metrics)
+
+	// We expect 1 metric: DCGM_EXP_XID_ERRORS_COUNT
+	require.Len(t, metrics, 1)
+	// We get metric value with the last index
+	metricValues = metrics[reflect.ValueOf(metrics).MapKeys()[0].Interface().(counters.Counter)]
+	// We want to filter out physical GPU and keep fake only
+	metricValues = filterMetrics(metricValues, conditionFakeGPUOnly)
+	// We update metrics with slice, that doesn't contain physical GPU
+	metrics[reflect.ValueOf(metrics).MapKeys()[0].Interface().(counters.Counter)] = metricValues
+
+	// We have 3 fake GPU and each GPU experienced 3 XID errors: 42, 46, 19 to GPU0
+	require.Len(t, metricValues, 1+(len(fakeGPUIDs)*2))
+	for _, val := range metricValues {
+		require.Contains(t, val.Labels, "window_size_in_ms")
+		require.Equal(t, fmt.Sprint(config.XIDCountWindowSize), val.Labels["window_size_in_ms"])
+	}
+
+	// Now we check the metric rendering
+	var b bytes.Buffer
+	err = rendermetrics.RenderGroup(&b, dcgm.FE_GPU, metrics)
+	require.NoError(t, err)
+	require.NotEmpty(t, b)
+
+	var parser expfmt.TextParser
+	mf, err := parser.TextToMetricFamilies(&b)
+	require.NoError(t, err)
+	require.NotEmpty(t, mf)
+	require.Len(t, mf, 1)
+	metricFamily := mf[reflect.ValueOf(mf).MapKeys()[0].Interface().(string)]
+	require.NotNil(t, metricFamily.Name)
+	assert.Equal(t, "DCGM_EXP_XID_ERRORS_COUNT", *metricFamily.Name)
+	assert.Equal(t, "Count of XID Errors within user-specified time window (see xid-count-window-size param).",
+		*metricFamily.Help)
+	assert.Equal(t, io_prometheus_client.MetricType_GAUGE, *metricFamily.Type)
+	// We have 3 fake GPU and each GPU, except the one experienced XID errors: 42, 46, 19
+	require.Len(t, metricFamily.Metric, 1+(len(fakeGPUIDs)*2))
+	for _, mv := range metricFamily.Metric {
+		require.NotNil(t, mv.Gauge.Value)
+		if *(mv.Gauge.Value) == 0 {
+			// We don't inject XID errors into the hardware GPU, so we do not expect XID label
+			assert.Len(t, mv.Label, 7)
+			assert.False(t, slices.ContainsFunc(mv.Label, func(lp *io_prometheus_client.LabelPair) bool {
+				return ptr.Deref(lp.Name, "") == "xid"
+			}))
+			continue
+		}
+		assert.Len(t, mv.Label, 9)
+		assert.Equal(t, "gpu", *mv.Label[0].Name)
+		assert.Equal(t, "UUID", *mv.Label[1].Name)
+		assert.Equal(t, "pci_bus_id", *mv.Label[2].Name)
+		assert.NotEmpty(t, *mv.Label[2].Value)
+		assert.Equal(t, "device", *mv.Label[3].Name)
+		assert.Equal(t, "modelName", *mv.Label[4].Name)
+		assert.Equal(t, "Hostname", *mv.Label[5].Name)
+		assert.Equal(t, "DCGM_FI_DRIVER_VERSION", *mv.Label[6].Name)
+		assert.Equal(t, "window_size_in_ms", *mv.Label[7].Name)
+		assert.Equal(t, "xid", *mv.Label[8].Name)
+		assert.NotEmpty(t, *mv.Label[8].Value)
+	}
+}
+
+func TestXIDCollector_NewXIDCollector(t *testing.T) {
+	config := &appconfig.Config{
+		UseRemoteHE: false,
+		GPUDeviceOptions: appconfig.DeviceOptions{
+			Flex:       true,
+			MajorRange: []int{-1},
+			MinorRange: []int{-1},
+		},
+	}
+
+	dcgmprovider.Initialize(config)
+	defer dcgmprovider.Client().Cleanup()
+
+	allCounters := []counters.Counter{
+		{
+			FieldID: dcgm.DCGM_FI_DEV_CLOCKS_EVENT_REASONS,
+		},
+	}
+
+	deviceWatchListManager := devicewatchlistmanager.NewWatchListManager(allCounters, config)
+	err := deviceWatchListManager.CreateEntityWatchList(dcgm.FE_GPU, deviceWatcher,
+		int64(config.CollectInterval))
+	require.NoError(t, err)
+
+	item, _ := deviceWatchListManager.EntityWatchList(dcgm.FE_GPU)
+
+	t.Run("Should Return Error When DCGM_EXP_XID_ERRORS_COUNT is not present", func(t *testing.T) {
+		records := [][]string{
+			{"DCGM_FI_DRIVER_VERSION", "label", "Driver Version"},
+		}
+		cc, err := counters.ExtractCounters(records, config)
+		require.NoError(t, err)
+		require.Len(t, cc.ExporterCounters, 0)
+		require.Len(t, cc.DCGMCounters, 1)
+
+		xidCollector, err := collector.NewXIDCollector(cc.DCGMCounters, "", config, item)
+		require.Error(t, err)
+		require.Nil(t, xidCollector)
+	})
+
+	t.Run("Should Return Error When Counters Param Is Empty", func(t *testing.T) {
+		emptyCounters := make([]counters.Counter, 0)
+		xidCollector, err := collector.NewXIDCollector(emptyCounters, "", config, item)
+		require.Error(t, err)
+		require.Nil(t, xidCollector)
+	})
+
+	t.Run("Should Not Return Error When DCGM_EXP_XID_ERRORS_COUNT Present More Than Once", func(t *testing.T) {
+		records := [][]string{
+			{"DCGM_FI_DRIVER_VERSION", "label", "Driver Version"},
+			{
+				"DCGM_EXP_XID_ERRORS_COUNT",
+				"gauge",
+				"Count of XID Errors within user-specified time window (see xid-count-window-size param).",
+			},
+			{
+				"DCGM_EXP_XID_ERRORS_COUNT",
+				"gauge",
+				"Count of XID Errors within user-specified time window (see xid-count-window-size param).",
+			},
+			{
+				"DCGM_EXP_XID_ERRORS_COUNT",
+				"gauge",
+				"Count of XID Errors within user-specified time window (see xid-count-window-size param).",
+			},
+		}
+		cc, err := counters.ExtractCounters(records, config)
+		require.NoError(t, err)
+		for i := range cc.DCGMCounters {
+			if cc.DCGMCounters[i].PromType == "label" {
+				cc.ExporterCounters = append(cc.ExporterCounters, cc.DCGMCounters[i])
+			}
+		}
+		xidCollector, err := collector.NewXIDCollector(cc.ExporterCounters, "", config, item)
+		require.NoError(t, err)
+		require.NotNil(t, xidCollector)
+	})
+}
+
+func filterMetrics(metricValues []collector.Metric, condition func(metric collector.Metric) bool) []collector.Metric {
+	var result []collector.Metric
+	for _, metricValue := range metricValues {
+		if condition(metricValue) {
+			result = append(result, metricValue)
+		}
+	}
+	return result
+}
+
+func TestDCGMCollector(t *testing.T) {
+	config := &appconfig.Config{
+		UseRemoteHE: false,
+	}
+	dcgmprovider.Initialize(config)
+	defer dcgmprovider.Client().Cleanup()
+
+	dcgmCollector := testDCGMGPUCollector(t, testutils.SampleCounters)
+	dcgmCollector.Cleanup()
+
+	dcgmCollector = testDCGMCPUCollector(t, testutils.SampleCounters)
+	dcgmCollector.Cleanup()
+}
+
+func testDCGMGPUCollector(t *testing.T, counters []counters.Counter) *collector.DCGMCollector {
+	dOpt := appconfig.DeviceOptions{
+		Flex:       true,
+		MajorRange: []int{-1},
+		MinorRange: []int{-1},
+	}
+	config := appconfig.Config{
+		GPUDeviceOptions: dOpt,
+		NoHostname:       false,
+		UseOldNamespace:  false,
+		UseFakeGPUs:      false,
+		CollectInterval:  1,
+	}
+
+	// Store actual dcgm provider
+	realDCGMProvider := dcgmprovider.Client()
+	defer dcgmprovider.SetClient(realDCGMProvider)
+
+	ctrl := gomock.NewController(t)
+	mockDCGMProvider := mockDCGM(ctrl)
+
+	// Calls where actual API calls and results are desirable
+	mockDCGMProvider.EXPECT().FieldGetById(gomock.Any()).
+		DoAndReturn(func(fieldID dcgm.Short) dcgm.FieldMeta {
+			return realDCGMProvider.FieldGetById(fieldID)
+		}).AnyTimes()
+
+	mockDCGMProvider.EXPECT().EntityGetLatestValues(gomock.Any(), gomock.Any(), gomock.Any()).
+		DoAndReturn(func(entityGroup dcgm.Field_Entity_Group, entityId uint, fields []dcgm.Short) ([]dcgm.FieldValue_v1,
+			error,
+		) {
+			return realDCGMProvider.EntityGetLatestValues(entityGroup, entityId, fields)
+		}).AnyTimes()
+
+	// Set mock DCGM provider
+	dcgmprovider.SetClient(mockDCGMProvider)
+
+	deviceWatchListManager := devicewatchlistmanager.NewWatchListManager(counters, &config)
+
+	err := deviceWatchListManager.CreateEntityWatchList(dcgm.FE_GPU, deviceWatcher,
+		int64(config.CollectInterval))
+	require.NoError(t, err)
+
+	gpuItem, exists := deviceWatchListManager.EntityWatchList(dcgm.FE_GPU)
+	require.True(t, exists)
+
+	g, err := collector.NewDCGMCollector(counters, "", &config, gpuItem)
+	require.NoError(t, err)
+
+	/* Test for error when no switches are available to monitor. */
+	switchItem, exists := deviceWatchListManager.EntityWatchList(dcgm.FE_SWITCH)
+	assert.False(t, exists, "dcgm.FE_SWITCH should not be available")
+
+	_, err = collector.NewDCGMCollector(counters, "", &config, switchItem)
+	require.Error(t, err, "NewDCGMCollector should return error")
+
+	/* Test for error when no cpus are available to monitor. */
+	cpuItem, exist := deviceWatchListManager.EntityWatchList(dcgm.FE_CPU)
+	require.False(t, exist, "dcgm.FE_CPU should not be available")
+
+	_, err = collector.NewDCGMCollector(counters, "", &config, cpuItem)
+	require.Error(t, err, "NewDCGMCollector should return error")
+
+	out, err := g.GetMetrics()
+	require.NoError(t, err)
+	require.Greater(t, len(out), 0, "Check that you have a GPU on this node")
+	require.Len(t, out, len(expectedGPUMetrics),
+		fmt.Sprintf("Expected: %+v \nGot: %+v", expectedGPUMetrics, out))
+
+	seenMetrics := map[string]bool{}
+	for _, metrics := range out {
+		for _, metric := range metrics {
+			seenMetrics[metric.Counter.FieldName] = true
+			require.NotEmpty(t, metric.GPU)
+			require.NotEmpty(t, metric.GPUUUID)
+			require.NotEmpty(t, metric.GPUPCIBusID)
+			require.NotEmpty(t, metric.Value)
+			require.NotEqual(t, metric.Value, collector.FailedToConvert)
+		}
+	}
+	require.Equal(t, seenMetrics, expectedGPUMetrics)
+
+	return g
+}
+
+func testDCGMCPUCollector(t *testing.T, counters []counters.Counter) *collector.DCGMCollector {
+	dOpt := appconfig.DeviceOptions{Flex: true, MajorRange: []int{-1}, MinorRange: []int{-1}}
+	config := appconfig.Config{
+		CPUDeviceOptions: dOpt,
+		NoHostname:       false,
+		UseOldNamespace:  false,
+		UseFakeGPUs:      false,
+	}
+
+	realDCGMProvider := dcgmprovider.Client()
+	defer dcgmprovider.SetClient(realDCGMProvider)
+
+	ctrl := gomock.NewController(t)
+	mockDCGMProvider := mockDCGM(ctrl)
+
+	// Calls where actual API calls and results are desirable
+	mockDCGMProvider.EXPECT().FieldGetById(gomock.Any()).
+		DoAndReturn(func(fieldID dcgm.Short) dcgm.FieldMeta {
+			return realDCGMProvider.FieldGetById(fieldID)
+		}).AnyTimes()
+
+	mockDCGMProvider.EXPECT().EntityGetLatestValues(gomock.Any(), gomock.Any(), gomock.Any()).
+		DoAndReturn(func(entityGroup dcgm.Field_Entity_Group, entityId uint, fields []dcgm.Short) ([]dcgm.FieldValue_v1,
+			error,
+		) {
+			return realDCGMProvider.EntityGetLatestValues(entityGroup, entityId, fields)
+		}).AnyTimes()
+
+	dcgmprovider.SetClient(mockDCGMProvider)
+
+	/* Test that only cpu metrics are collected for cpu entities. */
+	deviceWatchListManager := devicewatchlistmanager.NewWatchListManager(counters, &config)
+	err := deviceWatchListManager.CreateEntityWatchList(dcgm.FE_CPU, deviceWatcher,
+		int64(config.CollectInterval))
+	require.NoError(t, err)
+
+	err = deviceWatchListManager.CreateEntityWatchList(dcgm.FE_CPU, deviceWatcher,
+		int64(config.CollectInterval))
+	require.NoError(t, err)
+
+	cpuItem, cpuItemExist := deviceWatchListManager.EntityWatchList(dcgm.FE_CPU)
+	require.True(t, cpuItemExist)
+
+	c, err := collector.NewDCGMCollector(counters, "", &config, cpuItem)
+	require.NoError(t, err)
+
+	out, err := c.GetMetrics()
+	require.NoError(t, err)
+	require.Greater(t, len(out), 0, "Check that the fake CPU has been registered")
+
+	for _, dev := range out {
+		seenMetrics := map[string]bool{}
+		for _, metric := range dev {
+			seenMetrics[metric.Counter.FieldName] = true
+			require.NotEmpty(t, metric.GPU)
+
+			require.NotEmpty(t, metric.Value)
+			require.NotEqual(t, metric.Value, collector.FailedToConvert)
+		}
+		require.Equal(t, seenMetrics, expectedCPUMetrics)
+	}
+
+	return c
+}
+
+func TestGPUCollector_GetMetrics(t *testing.T) {
+	teardownTest := setupTest()
+	defer teardownTest()
+
+	runOnlyWithLiveGPUs(t)
+	// Create fake GPU
+	numGPUs, err := dcgmprovider.Client().GetAllDeviceCount()
+	require.NoError(t, err)
+
+	if numGPUs+1 > dcgm.MAX_NUM_DEVICES {
+		t.Skipf("Unable to add fake GPU with more than %d gpus", dcgm.MAX_NUM_DEVICES)
+	}
+
+	entityList := []dcgm.MigHierarchyInfo{
+		{Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU}},
+		{Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU}},
+		{Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU}},
+	}
+
+	gpuIDs, err := dcgmprovider.Client().CreateFakeEntities(entityList)
+	require.NoError(t, err)
+	require.NotEmpty(t, gpuIDs)
+
+	numGPUs, err = dcgmprovider.Client().GetAllDeviceCount()
+	require.NoError(t, err)
+
+	intputCounters := []counters.Counter{
+		{
+			FieldID:   100,
+			FieldName: "DCGM_FI_DEV_SM_CLOCK",
+			PromType:  "gauge",
+			Help:      "SM clock frequency (in MHz).",
+		},
+	}
+
+	dOpt := appconfig.DeviceOptions{
+		Flex:       true,
+		MajorRange: []int{-1},
+		MinorRange: []int{-1},
+	}
+	config := appconfig.Config{
+		GPUDeviceOptions: dOpt,
+		NoHostname:       false,
+		UseOldNamespace:  false,
+		UseFakeGPUs:      false,
+	}
+
+	deviceWatchListManager := devicewatchlistmanager.NewWatchListManager(intputCounters, &config)
+	err = deviceWatchListManager.CreateEntityWatchList(dcgm.FE_GPU, deviceWatcher,
+		int64(config.CollectInterval))
+	require.NoError(t, err)
+
+	gpuItem, exists := deviceWatchListManager.EntityWatchList(dcgm.FE_GPU)
+	require.True(t, exists)
+
+	c, err := collector.NewDCGMCollector(intputCounters, "", &config, gpuItem)
+	require.NoError(t, err)
+
+	defer c.Cleanup()
+
+	out, err := c.GetMetrics()
+	require.NoError(t, err)
+	require.Len(t, out, 1)
+
+	values := out[intputCounters[0]]
+
+	require.Equal(t, numGPUs, uint(len(values)))
+}
diff --git a/internal/pkg/integration_test/transformation_test.go b/internal/pkg/integration_test/transformation_test.go
new file mode 100644
index 00000000..1c93a432
--- /dev/null
+++ b/internal/pkg/integration_test/transformation_test.go
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package integration_test
+
+import (
+	"fmt"
+	"reflect"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+	"google.golang.org/grpc"
+	"k8s.io/kubelet/pkg/apis/podresources/v1alpha1"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/collector"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/dcgmprovider"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/deviceinfo"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/testutils"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/transformation"
+)
+
+const (
+	// Note standard resource attributes
+	podAttribute       = "pod"
+	namespaceAttribute = "namespace"
+	containerAttribute = "container"
+)
+
+func TestProcessPodMapper(t *testing.T) {
+	testutils.RequireLinux(t)
+
+	tmpDir, cleanup := testutils.CreateTmpDir(t)
+	defer cleanup()
+
+	config := &appconfig.Config{
+		UseRemoteHE: false,
+	}
+
+	dcgmprovider.Initialize(config)
+	defer dcgmprovider.Client().Cleanup()
+
+	c := testDCGMGPUCollector(t, testutils.SampleCounters)
+	defer c.Cleanup()
+
+	out, err := c.GetMetrics()
+	require.NoError(t, err)
+
+	original := out
+
+	arbirtaryMetric := out[reflect.ValueOf(out).MapKeys()[0].Interface().(counters.Counter)]
+
+	socketPath := tmpDir + "/kubelet.sock"
+	server := grpc.NewServer()
+	gpus := getGPUUUIDs(arbirtaryMetric)
+	v1alpha1.RegisterPodResourcesListerServer(server,
+		testutils.NewMockPodResourcesServer(appconfig.NvidiaResourceName, gpus))
+
+	cleanup = testutils.StartMockServer(t, server, socketPath)
+	defer cleanup()
+
+	podMapper := transformation.NewPodMapper(&appconfig.Config{
+		KubernetesGPUIdType:       appconfig.GPUUID,
+		PodResourcesKubeletSocket: socketPath,
+	})
+	require.NoError(t, err)
+	var deviceInfo deviceinfo.Provider
+	err = podMapper.Process(out, deviceInfo)
+	require.NoError(t, err)
+
+	require.Len(t, out, len(original))
+	for _, metrics := range out {
+		for _, metric := range metrics {
+			require.Contains(t, metric.Attributes, podAttribute)
+			require.Contains(t, metric.Attributes, namespaceAttribute)
+			require.Contains(t, metric.Attributes, containerAttribute)
+			require.Equal(t, metric.Attributes[podAttribute], fmt.Sprintf("gpu-pod-%s", metric.GPU))
+			require.Equal(t, metric.Attributes[namespaceAttribute], "default")
+			require.Equal(t, metric.Attributes[containerAttribute], "default")
+		}
+	}
+}
+
+func getGPUUUIDs(metrics []collector.Metric) []string {
+	gpus := make([]string, len(metrics))
+	for i, dev := range metrics {
+		gpus[i] = dev.GPUUUID
+	}
+
+	return gpus
+}
diff --git a/internal/pkg/logging/const.go b/internal/pkg/logging/const.go
new file mode 100644
index 00000000..95a0a482
--- /dev/null
+++ b/internal/pkg/logging/const.go
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package logging
+
+// Constants for logging fields
+const (
+	GroupIDKey          = "groupID"
+	DumpKey             = "dump"
+	StackTrace          = "stacktrace"
+	FieldEntityGroupKey = "fieldEntityGroup"
+	MetricsKey          = "metrics"
+	DeviceInfoKey       = "deviceInfo"
+	ErrorKey            = "error"
+)
diff --git a/internal/pkg/logging/logger_adapter.go b/internal/pkg/logging/logger_adapter.go
deleted file mode 100644
index 19374778..00000000
--- a/internal/pkg/logging/logger_adapter.go
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package logging
-
-import (
-	"fmt"
-
-	"github.com/go-kit/log"
-	"github.com/go-kit/log/level"
-	"github.com/sirupsen/logrus"
-)
-
-// LogrusAdapter is an adapter that allows logrus Logger to be used as a go-kit/log Logger.
-type LogrusAdapter struct {
-	Logger *logrus.Logger
-}
-
-// NewLogrusAdapter creates a new LogrusAdapter with the provided logrus.Logger.
-func NewLogrusAdapter(logger *logrus.Logger) log.Logger {
-	return &LogrusAdapter{
-		Logger: logger,
-	}
-}
-
-// Log implements the go-kit/log Logger interface.
-func (la *LogrusAdapter) Log(keyvals ...interface{}) error {
-	// keyvals is a slice of interfaces, that represents a key-value pairs.
-	if len(keyvals)%2 != 0 {
-		keyvals = append(keyvals, "MISSING")
-	}
-
-	fields := logrus.Fields{}
-	for i := 0; i < len(keyvals); i += 2 {
-		key, ok := keyvals[i].(string)
-		if !ok {
-			// If the key is not la string, use la default key
-			key = "missing_key"
-		}
-		fields[key] = keyvals[i+1]
-	}
-
-	// The go-kit/log uses msg field to keep log message, we don't want to use message as field in the logrus.
-	msg, exists := fields["msg"]
-	if exists {
-		delete(fields, "msg")
-	}
-
-	// The go-kit/log uses level fields to keep log level. We need to convert this field into logrus value.
-	lvl, exists := fields["level"]
-	if !exists {
-		fields["level"] = level.InfoValue()
-	}
-	delete(fields, "level")
-	parsedLvl, err := logrus.ParseLevel(fmt.Sprint(lvl))
-	if err != nil {
-		parsedLvl = logrus.InfoLevel
-	}
-
-	la.Logger.WithFields(fields).Log(parsedLvl, msg)
-
-	return nil
-}
diff --git a/internal/pkg/logging/logger_adapter_test.go b/internal/pkg/logging/logger_adapter_test.go
deleted file mode 100644
index b4eb38bd..00000000
--- a/internal/pkg/logging/logger_adapter_test.go
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package logging
-
-import (
-	"testing"
-
-	"github.com/go-kit/log/level"
-	"github.com/sirupsen/logrus"
-	"github.com/sirupsen/logrus/hooks/test"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-)
-
-func TestLogrusAdapter_Log(t *testing.T) {
-	type testCase struct {
-		name    string
-		keyvals []interface{}
-		assert  func(*testing.T, *logrus.Entry)
-	}
-
-	//"msg", "Listening on", "address"
-	testCases := []testCase{
-		{
-			name: "Success",
-			keyvals: []interface{}{
-				"level",
-				level.InfoValue,
-				"msg",
-				"Listening on",
-				"address",
-				"127.0.0.0.1:8080",
-			},
-			assert: func(t *testing.T, entry *logrus.Entry) {
-				t.Helper()
-				require.NotNil(t, entry)
-				assert.Equal(t, "Listening on", entry.Message)
-				require.Contains(t, entry.Data, "address")
-				assert.Equal(t, "127.0.0.0.1:8080", entry.Data["address"])
-			},
-		},
-		{
-			name: "When no Level",
-			keyvals: []interface{}{
-				"msg",
-				"Listening on",
-				"address",
-				"127.0.0.0.1:8080",
-			},
-			assert: func(t *testing.T, entry *logrus.Entry) {
-				t.Helper()
-				require.NotNil(t, entry)
-				assert.Equal(t, "Listening on", entry.Message)
-				require.Contains(t, entry.Data, "address")
-				assert.Equal(t, "127.0.0.0.1:8080", entry.Data["address"])
-			},
-		},
-		{
-			name: "When key is not string",
-			keyvals: []interface{}{
-				"msg",
-				"Listening on",
-				42,
-				"127.0.0.0.1:8080",
-			},
-			assert: func(t *testing.T, entry *logrus.Entry) {
-				t.Helper()
-				require.NotNil(t, entry)
-				assert.Equal(t, "Listening on", entry.Message)
-				require.Contains(t, entry.Data, "missing_key")
-				assert.Equal(t, "127.0.0.0.1:8080", entry.Data["missing_key"])
-			},
-		},
-		{
-			name: "When value is missing",
-			keyvals: []interface{}{
-				"msg",
-				"Listening on",
-				"address",
-			},
-			assert: func(t *testing.T, entry *logrus.Entry) {
-				t.Helper()
-				require.NotNil(t, entry)
-				assert.Equal(t, "Listening on", entry.Message)
-				require.Contains(t, entry.Data, "address")
-				assert.Equal(t, "MISSING", entry.Data["address"])
-			},
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(tc.name, func(t *testing.T) {
-			logrusLogger, logHook := test.NewNullLogger()
-			logger := NewLogrusAdapter(logrusLogger)
-			err := logger.Log(tc.keyvals...)
-			require.NoError(t, err)
-			tc.assert(t, logHook.LastEntry())
-		})
-	}
-}
diff --git a/internal/pkg/nvmlprovider/provider.go b/internal/pkg/nvmlprovider/provider.go
index 32678933..786ba936 100644
--- a/internal/pkg/nvmlprovider/provider.go
+++ b/internal/pkg/nvmlprovider/provider.go
@@ -19,96 +19,151 @@ package nvmlprovider
 import (
 	"errors"
 	"fmt"
+	"log/slog"
 	"strconv"
 	"strings"
-	"sync"
 
 	"github.com/NVIDIA/go-nvml/pkg/nvml"
-	"github.com/sirupsen/logrus"
 )
 
-var nvmlOnce *sync.Once = new(sync.Once)
-
 type MIGDeviceInfo struct {
 	ParentUUID        string
 	GPUInstanceID     int
 	ComputeInstanceID int
 }
 
+var nvmlInterface NVML
+
+// Initialize sets up the Singleton NVML interface.
+func Initialize() {
+	nvmlInterface = newNVMLProvider()
+}
+
+// reset clears the current NVML interface instance.
+func reset() {
+	nvmlInterface = nil
+}
+
+// Client retrieves the current NVML interface instance.
+func Client() NVML {
+	return nvmlInterface
+}
+
+// SetClient sets the current NVML interface instance to the provided one.
+func SetClient(n NVML) {
+	nvmlInterface = n
+}
+
+// nvmlProvider implements NVML Interface
+type nvmlProvider struct {
+	initialized bool
+}
+
+func newNVMLProvider() NVML {
+	// Check if a NVML client already exists and return it if so.
+	if Client() != nil && Client().(nvmlProvider).initialized {
+		slog.Info("NVML already initialized.")
+		return Client()
+	}
+
+	slog.Info("Attempting to initialize NVML library.")
+	ret := nvml.Init()
+	if ret != nvml.SUCCESS {
+		err := errors.New(nvml.ErrorString(ret))
+		slog.Error(fmt.Sprintf("Cannot init NVML library; err: %v", err))
+		return nvmlProvider{initialized: false}
+	}
+
+	return nvmlProvider{initialized: true}
+}
+
+func (n nvmlProvider) preCheck() error {
+	if !n.initialized {
+		return fmt.Errorf("NVML library not initialized")
+	}
+
+	return nil
+}
+
 // GetMIGDeviceInfoByID returns information about MIG DEVICE by ID
-func GetMIGDeviceInfoByID(uuid string) (*MIGDeviceInfo, error) {
-	var err error
-
-	nvmlOnce.Do(func() {
-		ret := nvml.Init()
-		if ret != nvml.SUCCESS {
-			err = errors.New(nvml.ErrorString(ret))
-			logrus.Error("Can not init NVML library.")
-		}
-	})
-	if err != nil {
+func (n nvmlProvider) GetMIGDeviceInfoByID(uuid string) (*MIGDeviceInfo, error) {
+	if err := n.preCheck(); err != nil {
+		slog.Error(fmt.Sprintf("failed to get MIG Device Info; err: %v", err))
 		return nil, err
 	}
 
-	// 	1. With drivers >= R470 (470.42.01+), each MIG device is assigned a GPU UUID starting
-	//  with MIG-<UUID>.
-
 	device, ret := nvml.DeviceGetHandleByUUID(uuid)
 	if ret == nvml.SUCCESS {
-		parentDevice, ret := device.GetDeviceHandleFromMigDeviceHandle()
-		if ret != nvml.SUCCESS {
-			return nil, errors.New(nvml.ErrorString(ret))
-		}
-
-		parentUUID, ret := parentDevice.GetUUID()
-		if ret != nvml.SUCCESS {
-			return nil, errors.New(nvml.ErrorString(ret))
-		}
-
-		gi, ret := device.GetGpuInstanceId()
-		if ret != nvml.SUCCESS {
-			return nil, errors.New(nvml.ErrorString(ret))
-		}
-
-		ci, ret := device.GetComputeInstanceId()
-		if ret != nvml.SUCCESS {
-			return nil, errors.New(nvml.ErrorString(ret))
-		}
-
-		return &MIGDeviceInfo{
-			ParentUUID:        parentUUID,
-			GPUInstanceID:     gi,
-			ComputeInstanceID: ci,
-		}, nil
+		return getMIGDeviceInfoForNewDriver(device)
 	}
 
-	//  2. With drivers < R470 (e.g. R450 and R460), each MIG device is enumerated by
-	// specifying the CI and the corresponding parent GI. The format follows this
-	// convention: MIG-<GPU-UUID>/<GPU instance ID>/<compute instance ID>.
+	return getMIGDeviceInfoForOldDriver(uuid)
+}
+
+// getMIGDeviceInfoForNewDriver identifies MIG Device Information for drivers >= R470 (470.42.01+),
+// each MIG device is assigned a GPU UUID starting with MIG-<UUID>.
+func getMIGDeviceInfoForNewDriver(device nvml.Device) (*MIGDeviceInfo, error) {
+	parentDevice, ret := device.GetDeviceHandleFromMigDeviceHandle()
+	if ret != nvml.SUCCESS {
+		return nil, errors.New(nvml.ErrorString(ret))
+	}
 
+	parentUUID, ret := parentDevice.GetUUID()
+	if ret != nvml.SUCCESS {
+		return nil, errors.New(nvml.ErrorString(ret))
+	}
+
+	gi, ret := device.GetGpuInstanceId()
+	if ret != nvml.SUCCESS {
+		return nil, errors.New(nvml.ErrorString(ret))
+	}
+
+	ci, ret := device.GetComputeInstanceId()
+	if ret != nvml.SUCCESS {
+		return nil, errors.New(nvml.ErrorString(ret))
+	}
+
+	return &MIGDeviceInfo{
+		ParentUUID:        parentUUID,
+		GPUInstanceID:     gi,
+		ComputeInstanceID: ci,
+	}, nil
+}
+
+// getMIGDeviceInfoForOldDriver identifies MIG Device Information for drivers < R470 (e.g. R450 and R460),
+// each MIG device is enumerated by specifying the CI and the corresponding parent GI. The format follows this
+// convention: MIG-<GPU-UUID>/<GPU instance ID>/<Compute instance ID>.
+func getMIGDeviceInfoForOldDriver(uuid string) (*MIGDeviceInfo, error) {
 	tokens := strings.SplitN(uuid, "-", 2)
 	if len(tokens) != 2 || tokens[0] != "MIG" {
-		return nil, fmt.Errorf("unable to parse UUID '%s' as MIG device", uuid)
+		return nil, fmt.Errorf("unable to parse '%s' as MIG device UUID", uuid)
 	}
 
-	tokens = strings.SplitN(tokens[1], "/", 3)
-	if len(tokens) != 3 || !strings.HasPrefix(tokens[0], "GPU-") {
-		return nil, fmt.Errorf("unable to parse UUID '%s' as MIG device", uuid)
+	gpuTokens := strings.SplitN(tokens[1], "/", 3)
+	if len(gpuTokens) != 3 || !strings.HasPrefix(gpuTokens[0], "GPU-") {
+		return nil, fmt.Errorf("invalid MIG device UUID '%s'", uuid)
 	}
 
-	gi, err := strconv.Atoi(tokens[1])
+	gi, err := strconv.Atoi(gpuTokens[1])
 	if err != nil {
-		return nil, fmt.Errorf("unable to parse UUID '%s' as MIG device", uuid)
+		return nil, fmt.Errorf("invalid GPU instance ID '%s' for MIG device '%s'", gpuTokens[1], uuid)
 	}
 
-	ci, err := strconv.Atoi(tokens[2])
+	ci, err := strconv.Atoi(gpuTokens[2])
 	if err != nil {
-		return nil, fmt.Errorf("unable to parse UUID '%s' as MIG device", uuid)
+		return nil, fmt.Errorf("invalid Compute instance ID '%s' for MIG device '%s'", gpuTokens[2], uuid)
 	}
 
 	return &MIGDeviceInfo{
-		ParentUUID:        tokens[0],
+		ParentUUID:        gpuTokens[0],
 		GPUInstanceID:     gi,
 		ComputeInstanceID: ci,
 	}, nil
 }
+
+// Cleanup performs cleanup operations for the NVML provider
+func (n nvmlProvider) Cleanup() {
+	if err := n.preCheck(); err == nil {
+		reset()
+	}
+}
diff --git a/internal/pkg/nvmlprovider/provider_test.go b/internal/pkg/nvmlprovider/provider_test.go
index 0b63a7f4..fde0a76d 100644
--- a/internal/pkg/nvmlprovider/provider_test.go
+++ b/internal/pkg/nvmlprovider/provider_test.go
@@ -22,21 +22,34 @@ import (
 	"github.com/stretchr/testify/assert"
 )
 
+func TestGetMIGDeviceInfoByID_When_NVML_Not_Initialized(t *testing.T) {
+	validMIGUUID := "MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/5"
+	newNvmlProvider := nvmlProvider{}
+
+	deviceInfo, err := newNvmlProvider.GetMIGDeviceInfoByID(validMIGUUID)
+	assert.Error(t, err, "uuid: %v, Device Info: %+v", validMIGUUID, deviceInfo)
+}
+
 func TestGetMIGDeviceInfoByID_When_DriverVersion_Below_R470(t *testing.T) {
+	Initialize()
+	assert.NotNil(t, Client(), "expected NVML Client to be not nil")
+	assert.True(t, Client().(nvmlProvider).initialized, "expected Client to be initialized")
+	defer Client().Cleanup()
+
 	tests := []struct {
-		name          string
-		uuid          string
-		expectedGPU   string
-		expectedGi    int
-		expectedCi    int
-		expectedError bool
+		name            string
+		uuid            string
+		expectedMIGInfo *MIGDeviceInfo
+		expectedError   bool
 	}{
 		{
-			name:        "Successfull Parsing",
-			uuid:        "MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/5",
-			expectedGPU: "GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5",
-			expectedGi:  1,
-			expectedCi:  5,
+			name: "Successful Parsing",
+			uuid: "MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/5",
+			expectedMIGInfo: &MIGDeviceInfo{
+				ParentUUID:        "GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5",
+				GPUInstanceID:     1,
+				ComputeInstanceID: 5,
+			},
 		},
 		{
 			name:          "Fail, Missing MIG at the beginning of UUID",
@@ -62,41 +75,41 @@ func TestGetMIGDeviceInfoByID_When_DriverVersion_Below_R470(t *testing.T) {
 
 	for _, tc := range tests {
 		t.Run(tc.name, func(t *testing.T) {
-			deviceInfo, err := GetMIGDeviceInfoByID(tc.uuid)
-			if tc.expectedError && err != nil {
-				return
-			}
-			if tc.expectedError && err == nil {
-				t.Fatalf("Expected an error, but didn't get one: uuid: %v, (gpu: %v, gi: %v, ci: %v)",
-					tc.uuid,
-					deviceInfo.ParentUUID,
-					deviceInfo.GPUInstanceID,
-					deviceInfo.ComputeInstanceID)
-			}
-			if !tc.expectedError && err != nil {
-				t.Fatalf("Unexpected error: %v, uuid: %v, (gpu: %v, gi: %v, ci: %v)",
-					err,
-					tc.uuid,
-					deviceInfo.ParentUUID,
-					deviceInfo.GPUInstanceID,
-					deviceInfo.ComputeInstanceID)
+			deviceInfo, err := Client().GetMIGDeviceInfoByID(tc.uuid)
+			if tc.expectedError {
+				assert.Error(t, err, "uuid: %v, Device Info: %+v", tc.uuid, deviceInfo)
+			} else {
+				assert.Nil(t, err, "err: %v, uuid: %v", err, tc.uuid)
+				assert.Equal(t, tc.expectedMIGInfo, deviceInfo, "MIG uuid '%v' parsed incorrectly", tc.uuid)
 			}
+		})
+	}
+}
 
-			assert.Equal(t, tc.expectedGPU, deviceInfo.ParentUUID, "MIG UUID parsed incorrectly: uuid: %v, (gpu: %v, gi: %v, ci: %v)",
-				tc.uuid,
-				deviceInfo.ParentUUID,
-				deviceInfo.GPUInstanceID,
-				deviceInfo.ComputeInstanceID)
-			assert.Equal(t, tc.expectedGi, deviceInfo.GPUInstanceID, "MIG UUID parsed incorrectly: uuid: %v, (gpu: %v, gi: %v, ci: %v)",
-				tc.uuid,
-				deviceInfo.ParentUUID,
-				deviceInfo.GPUInstanceID,
-				deviceInfo.ComputeInstanceID)
-			assert.Equal(t, tc.expectedCi, deviceInfo.ComputeInstanceID, "MIG UUID parsed incorrectly: uuid: %v, (gpu: %v, gi: %v, ci: %v)",
-				tc.uuid,
-				deviceInfo.ParentUUID,
-				deviceInfo.GPUInstanceID,
-				deviceInfo.ComputeInstanceID)
+func Test_newNVMLProvider(t *testing.T) {
+	tests := []struct {
+		name       string
+		preRunFunc func() NVML
+	}{
+		{
+			name: "NVML not initialized",
+			preRunFunc: func() NVML {
+				return nvmlProvider{initialized: true}
+			},
+		},
+		{
+			name: "NVML already initialized",
+			preRunFunc: func() NVML {
+				Initialize()
+				return Client()
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			want := tt.preRunFunc()
+			defer reset()
+			assert.Equalf(t, want, newNVMLProvider(), "Unexpected Output")
 		})
 	}
 }
diff --git a/internal/pkg/nvmlprovider/types.go b/internal/pkg/nvmlprovider/types.go
new file mode 100644
index 00000000..507b7afd
--- /dev/null
+++ b/internal/pkg/nvmlprovider/types.go
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//go:generate go run -v go.uber.org/mock/mockgen  -destination=../../mocks/pkg/nvmlprovider/mock_client.go -package=nvmlprovider -copyright_file=../../../hack/header.txt . NVML
+
+package nvmlprovider
+
+type NVML interface {
+	GetMIGDeviceInfoByID(string) (*MIGDeviceInfo, error)
+	Cleanup()
+}
diff --git a/internal/pkg/os/os.go b/internal/pkg/os/os.go
index 891d3893..2e676fce 100644
--- a/internal/pkg/os/os.go
+++ b/internal/pkg/os/os.go
@@ -18,9 +18,9 @@ package os
 
 import "os"
 
-//go:generate go run -v go.uber.org/mock/mockgen  -destination=../../mocks/pkg/os/os.go -package=os -copyright_file=../../../hack/header.txt . OS
-//go:generate go run -v go.uber.org/mock/mockgen  -destination=../../mocks/pkg/os/dir_entry.go -package=os -copyright_file=../../../hack/header.txt os DirEntry
-//go:generate go run -v go.uber.org/mock/mockgen  -destination=../../mocks/pkg/os/file_info.go -package=os -copyright_file=../../../hack/header.txt io/fs FileInfo
+//go:generate go run -v go.uber.org/mock/mockgen  -destination=../../mocks/pkg/os/mock_os.go -package=os -copyright_file=../../../hack/header.txt . OS
+//go:generate go run -v go.uber.org/mock/mockgen  -destination=../../mocks/pkg/os/mock_dir_entry.go -package=os -copyright_file=../../../hack/header.txt os DirEntry
+//go:generate go run -v go.uber.org/mock/mockgen  -destination=../../mocks/pkg/os/mock_file_info.go -package=os -copyright_file=../../../hack/header.txt io/fs FileInfo
 type OS interface {
 	CreateTemp(dir, pattern string) (*os.File, error)
 	Getenv(key string) string
@@ -33,6 +33,7 @@ type OS interface {
 	Stat(name string) (os.FileInfo, error)
 	TempDir() string
 	ReadDir(name string) ([]os.DirEntry, error)
+	Exit(code int)
 }
 
 type RealOS struct{}
@@ -80,3 +81,5 @@ func (RealOS) Remove(name string) error {
 func (RealOS) ReadDir(name string) ([]os.DirEntry, error) {
 	return os.ReadDir(name)
 }
+
+func (RealOS) Exit(code int) { os.Exit(code) }
diff --git a/internal/pkg/prerequisites/dcgmlib_rule.go b/internal/pkg/prerequisites/dcgmlib_rule.go
new file mode 100644
index 00000000..5bd8ce0f
--- /dev/null
+++ b/internal/pkg/prerequisites/dcgmlib_rule.go
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package prerequisites
+
+import (
+	debugelf "debug/elf"
+	"fmt"
+	"log/slog"
+	"strings"
+)
+
+const (
+	libdcgmco     = "libdcgm.so.4"
+	procSelfExe   = "/proc/self/exe"
+	ldconfig      = "ldconfig"
+	ldconfigParam = "-p"
+)
+
+type dcgmLibExistsRule struct{}
+
+// Validate checks if libdcgm.so.4 exists and matches with the machine architecture.
+func (c dcgmLibExistsRule) Validate() error {
+	// On Ubuntu, ldconfig is a wrapper around ldconfig.real
+	ldconfigPath := fmt.Sprintf("/sbin/%s.real", ldconfig)
+	if _, err := os.Stat(ldconfigPath); err != nil {
+		ldconfigPath = "/sbin/" + ldconfig
+	}
+	// Get list of shared libraries. See: man ldconfig
+	out, err := exec.Command(ldconfigPath, ldconfigParam).Output()
+	if err != nil {
+		return err
+	}
+
+	for _, match := range rxLDCacheEntry.FindAllSubmatch(out, -1) {
+		libName := strings.TrimSpace(string(match[1]))
+		if libName == libdcgmco {
+			libPath := strings.TrimSpace(string(match[2]))
+			selfMachine, err := c.readELF(procSelfExe)
+			if err != nil {
+				return err
+			}
+			libMachine, err := c.readELF(libPath)
+			if err != nil {
+				// When datacenter-gpu-manager uninstalled, the ldconfig -p may return that the libdcgm.so.4 is present,
+				// but the library file was removed.
+				slog.Error(err.Error())
+				return errLibdcgmNotFound
+			}
+
+			if selfMachine != libMachine {
+				return fmt.Errorf("the %s library architecture mismatch with the system; wanted: %s, received: %s",
+					libdcgmco, selfMachine, libMachine)
+			}
+
+			return nil
+		}
+	}
+
+	return errLibdcgmNotFound
+}
+
+func (c dcgmLibExistsRule) readELF(name string) (debugelf.Machine, error) {
+	elfFile, err := elf.Open(name)
+	if err != nil {
+		return 0, fmt.Errorf("could not open %s: %v", name, err)
+	}
+	if err := elfFile.Close(); err != nil {
+		slog.Warn(fmt.Sprintf("could not close ELF: %v", err))
+	}
+
+	return elfFile.Machine, nil
+}
diff --git a/internal/pkg/prerequisites/dcgmlib_rule_test.go b/internal/pkg/prerequisites/dcgmlib_rule_test.go
new file mode 100644
index 00000000..d96f0c83
--- /dev/null
+++ b/internal/pkg/prerequisites/dcgmlib_rule_test.go
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package prerequisites
+
+import (
+	"errors"
+	"testing"
+
+	"go.uber.org/mock/gomock"
+
+	debugelf "debug/elf"
+
+	"github.com/stretchr/testify/require"
+
+	mockelf "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/elf"
+	mockexec "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/exec"
+)
+
+func Test_dcgmLibExistsRule_Validate(t *testing.T) {
+	ldconfigPath := "/sbin/ldconfig.real"
+
+	type testCase struct {
+		Name                 string
+		ExecMockExpectations func(*gomock.Controller, *mockexec.MockExec)
+		ELFMockExpectations  func(*gomock.Controller, *mockelf.MockELF)
+		AssertErr            func(err error)
+	}
+
+	testCases := []testCase{
+		{
+			Name: "no error",
+			ExecMockExpectations: func(ctrl *gomock.Controller, mockExec *mockexec.MockExec) {
+				output := `1211 libs found in cache '/etc/ld.so.cache'
+				libdcgm.so.4 (libc6,x86-64) => /lib/x86_64-linux-gnu/libdcgm.so.4
+			Cache generated by: ldconfig (Ubuntu GLIBC 2.35-0ubuntu3.7) stable release version 2.35`
+				cmd := mockexec.NewMockCmd(ctrl)
+				cmd.EXPECT().Output().AnyTimes().Return([]byte(output), nil)
+				mockExec.EXPECT().Command(gomock.Eq(ldconfigPath), gomock.Eq(ldconfigParam)).AnyTimes().Return(cmd)
+			},
+			ELFMockExpectations: func(c *gomock.Controller, mockELF *mockelf.MockELF) {
+				self := &debugelf.File{
+					FileHeader: debugelf.FileHeader{
+						Machine: debugelf.EM_X86_64,
+					},
+				}
+				mockELF.EXPECT().Open(gomock.Eq("/proc/self/exe")).AnyTimes().Return(self, nil)
+
+				libdcgm := &debugelf.File{
+					FileHeader: debugelf.FileHeader{
+						Machine: debugelf.EM_X86_64,
+					},
+				}
+				mockELF.EXPECT().Open(gomock.Eq("/lib/x86_64-linux-gnu/libdcgm.so.4")).AnyTimes().Return(libdcgm, nil)
+			},
+			AssertErr: func(err error) {
+				require.NoError(t, err)
+			},
+		},
+		{
+			Name: "returns error when library is not found",
+			ExecMockExpectations: func(ctrl *gomock.Controller, mockExec *mockexec.MockExec) {
+				output := `1211 libs found in cache '/etc/ld.so.cache'
+				libcuda.so (libc6,x86-64) => /lib/x86_64-linux-gnu/libcuda.so
+			Cache generated by: ldconfig (Ubuntu GLIBC 2.35-0ubuntu3.7) stable release version 2.35`
+				cmd := mockexec.NewMockCmd(ctrl)
+				cmd.EXPECT().Output().AnyTimes().Return([]byte(output), nil)
+				mockExec.EXPECT().Command(gomock.Eq(ldconfigPath), gomock.Eq(ldconfigParam)).AnyTimes().Return(cmd)
+			},
+			ELFMockExpectations: func(c *gomock.Controller, mockELF *mockelf.MockELF) {
+				self := &debugelf.File{
+					FileHeader: debugelf.FileHeader{
+						Machine: debugelf.EM_X86_64,
+					},
+				}
+				mockELF.EXPECT().Open(gomock.Eq("/proc/self/exe")).AnyTimes().Return(self, nil)
+
+				libdcgm := &debugelf.File{
+					FileHeader: debugelf.FileHeader{
+						Machine: debugelf.EM_X86_64,
+					},
+				}
+				mockELF.EXPECT().Open(gomock.Eq("/lib/x86_64-linux-gnu/libdcgm.so.4")).AnyTimes().Return(libdcgm, nil)
+			},
+			AssertErr: func(err error) {
+				require.Error(t, err)
+				require.ErrorContains(t, err, "the libdcgm.so.4 library was not found. Install Data Center GPU Manager (DCGM).")
+			},
+		},
+		{
+			Name: "returns error when can not execute command",
+			ExecMockExpectations: func(ctrl *gomock.Controller, mockExec *mockexec.MockExec) {
+				cmd := mockexec.NewMockCmd(ctrl)
+				cmd.EXPECT().Output().AnyTimes().Return([]byte{}, errors.New("boom!"))
+				mockExec.EXPECT().Command(gomock.Eq(ldconfigPath), gomock.Eq(ldconfigParam)).AnyTimes().Return(cmd)
+			},
+			AssertErr: func(err error) {
+				require.Error(t, err)
+			},
+		},
+		{
+			Name: "error when can not open /proc/self/exe",
+			ExecMockExpectations: func(ctrl *gomock.Controller, mockExec *mockexec.MockExec) {
+				output := `1211 libs found in cache '/etc/ld.so.cache'
+				libdcgm.so.4 (libc6,x86-64) => /lib/x86_64-linux-gnu/libdcgm.so.4
+			Cache generated by: ldconfig (Ubuntu GLIBC 2.35-0ubuntu3.7) stable release version 2.35`
+				cmd := mockexec.NewMockCmd(ctrl)
+				cmd.EXPECT().Output().AnyTimes().Return([]byte(output), nil)
+				mockExec.EXPECT().Command(gomock.Eq(ldconfigPath), gomock.Eq(ldconfigParam)).AnyTimes().Return(cmd)
+			},
+			ELFMockExpectations: func(c *gomock.Controller, mockELF *mockelf.MockELF) {
+				mockELF.EXPECT().Open(gomock.Eq("/proc/self/exe")).AnyTimes().Return(nil, errors.New("boom!"))
+			},
+			AssertErr: func(err error) {
+				require.Error(t, err)
+			},
+		},
+		{
+			Name: "returns error when library architecture missmatch",
+			ExecMockExpectations: func(ctrl *gomock.Controller, mockExec *mockexec.MockExec) {
+				output := `1211 libs found in cache '/etc/ld.so.cache'
+				libdcgm.so.4 (libc6,x86-64) => /lib/x86_64-linux-gnu/libdcgm.so.4
+			Cache generated by: ldconfig (Ubuntu GLIBC 2.35-0ubuntu3.7) stable release version 2.35`
+				cmd := mockexec.NewMockCmd(ctrl)
+				cmd.EXPECT().Output().AnyTimes().Return([]byte(output), nil)
+				mockExec.EXPECT().Command(gomock.Eq(ldconfigPath), gomock.Eq(ldconfigParam)).AnyTimes().Return(cmd)
+			},
+			ELFMockExpectations: func(c *gomock.Controller, mockELF *mockelf.MockELF) {
+				self := &debugelf.File{
+					FileHeader: debugelf.FileHeader{
+						Machine: debugelf.EM_X86_64,
+					},
+				}
+				mockELF.EXPECT().Open(gomock.Eq("/proc/self/exe")).AnyTimes().Return(self, nil)
+
+				libdcgm := &debugelf.File{
+					FileHeader: debugelf.FileHeader{
+						Machine: debugelf.EM_AARCH64,
+					},
+				}
+				mockELF.EXPECT().Open(gomock.Eq("/lib/x86_64-linux-gnu/libdcgm.so.4")).AnyTimes().Return(libdcgm, nil)
+			},
+			AssertErr: func(err error) {
+				require.Error(t, err)
+				require.ErrorContains(t, err,
+					"the libdcgm.so.4 library architecture mismatch with the system; wanted: EM_X86_64, received: EM_AARCH64")
+			},
+		},
+		{
+			Name: "returns error when library file can not be open",
+			ExecMockExpectations: func(ctrl *gomock.Controller, mockExec *mockexec.MockExec) {
+				output := `1211 libs found in cache '/etc/ld.so.cache'
+				libdcgm.so.4 (libc6,x86-64) => /lib/x86_64-linux-gnu/libdcgm.so.4
+			Cache generated by: ldconfig (Ubuntu GLIBC 2.35-0ubuntu3.7) stable release version 2.35`
+				cmd := mockexec.NewMockCmd(ctrl)
+				cmd.EXPECT().Output().AnyTimes().Return([]byte(output), nil)
+				mockExec.EXPECT().Command(gomock.Eq(ldconfigPath), gomock.Eq(ldconfigParam)).AnyTimes().Return(cmd)
+			},
+			ELFMockExpectations: func(c *gomock.Controller, mockELF *mockelf.MockELF) {
+				self := &debugelf.File{
+					FileHeader: debugelf.FileHeader{
+						Machine: debugelf.EM_X86_64,
+					},
+				}
+				mockELF.EXPECT().Open(gomock.Eq("/proc/self/exe")).AnyTimes().Return(self, nil)
+
+				mockELF.EXPECT().Open(gomock.Eq("/lib/x86_64-linux-gnu/libdcgm.so.4")).AnyTimes().Return(nil, errors.New("boom!"))
+			},
+			AssertErr: func(err error) {
+				require.Error(t, err)
+			},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.Name, func(t *testing.T) {
+			ctrl := gomock.NewController(t)
+
+			executor := mockexec.NewMockExec(ctrl)
+
+			if tc.ExecMockExpectations != nil {
+				tc.ExecMockExpectations(ctrl, executor)
+			}
+			exec = executor
+
+			elfreader := mockelf.NewMockELF(ctrl)
+
+			if tc.ELFMockExpectations != nil {
+				tc.ELFMockExpectations(ctrl, elfreader)
+			}
+			elf = elfreader
+
+			err := dcgmLibExistsRule{}.Validate()
+			tc.AssertErr(err)
+		})
+	}
+}
diff --git a/internal/pkg/prerequisites/types.go b/internal/pkg/prerequisites/types.go
new file mode 100644
index 00000000..c5e39156
--- /dev/null
+++ b/internal/pkg/prerequisites/types.go
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package prerequisites
+
+type rule interface {
+	Validate() error
+}
diff --git a/internal/pkg/prerequisites/validation.go b/internal/pkg/prerequisites/validation.go
new file mode 100644
index 00000000..d5e42aab
--- /dev/null
+++ b/internal/pkg/prerequisites/validation.go
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package prerequisites
+
+var rules = []rule{
+	dcgmLibExistsRule{},
+}
+
+func Validate() error {
+	for _, rule := range rules {
+		err := rule.Validate()
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
diff --git a/internal/pkg/prerequisites/validation_test.go b/internal/pkg/prerequisites/validation_test.go
new file mode 100644
index 00000000..21e85be9
--- /dev/null
+++ b/internal/pkg/prerequisites/validation_test.go
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package prerequisites
+
+import (
+	debugelf "debug/elf"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+	"go.uber.org/mock/gomock"
+
+	mockelf "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/elf"
+	mockexec "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/exec"
+	mockos "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/os"
+
+	realos "os"
+)
+
+func TestValidate(t *testing.T) {
+	type testCase struct {
+		Name               string
+		OSMockExpectations func(*gomock.Controller, *mockos.MockOS)
+		LDConfigPath       string
+	}
+
+	tests := []testCase{
+		{
+			Name: "Ubuntu-based system with /sbin/ldconfig.real",
+			OSMockExpectations: func(ctrl *gomock.Controller, mo *mockos.MockOS) {
+				mfi := mockos.NewMockFileInfo(ctrl)
+				mo.EXPECT().Stat("/sbin/ldconfig.real").Return(mfi, nil)
+			},
+			LDConfigPath: "/sbin/ldconfig.real",
+		},
+		{
+			Name: "Linux system without /sbin/ldconfig.real",
+			OSMockExpectations: func(ctrl *gomock.Controller, mo *mockos.MockOS) {
+				mo.EXPECT().Stat("/sbin/ldconfig.real").Return(nil, &realos.PathError{})
+			},
+			LDConfigPath: "/sbin/ldconfig",
+		},
+	}
+
+	for _, tc := range tests {
+
+		ctrl := gomock.NewController(t)
+
+		osinstance := mockos.NewMockOS(ctrl)
+		tc.OSMockExpectations(ctrl, osinstance)
+
+		os = osinstance
+
+		executor := mockexec.NewMockExec(ctrl)
+
+		output := `1211 libs found in cache '/etc/ld.so.cache'
+					libdcgm.so.4 (libc6,x86-64) => /lib/x86_64-linux-gnu/libdcgm.so.4
+				Cache generated by: ldconfig (Ubuntu GLIBC 2.35-0ubuntu3.7) stable release version 2.35`
+		cmd := mockexec.NewMockCmd(ctrl)
+		cmd.EXPECT().Output().AnyTimes().Return([]byte(output), nil)
+		executor.EXPECT().Command(gomock.Eq(tc.LDConfigPath), gomock.Eq(ldconfigParam)).AnyTimes().Return(cmd)
+
+		exec = executor
+
+		elfreader := mockelf.NewMockELF(ctrl)
+
+		self := &debugelf.File{
+			FileHeader: debugelf.FileHeader{
+				Machine: debugelf.EM_X86_64,
+			},
+		}
+		elfreader.EXPECT().Open(gomock.Eq("/proc/self/exe")).AnyTimes().Return(self, nil)
+
+		libdcgm := &debugelf.File{
+			FileHeader: debugelf.FileHeader{
+				Machine: debugelf.EM_X86_64,
+			},
+		}
+		elfreader.EXPECT().Open(gomock.Eq("/lib/x86_64-linux-gnu/libdcgm.so.4")).AnyTimes().Return(libdcgm, nil)
+
+		elf = elfreader
+
+		err := Validate()
+		require.NoError(t, err)
+	}
+}
diff --git a/internal/pkg/prerequisites/variables.go b/internal/pkg/prerequisites/variables.go
new file mode 100644
index 00000000..32c6c452
--- /dev/null
+++ b/internal/pkg/prerequisites/variables.go
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package prerequisites
+
+import (
+	"fmt"
+	"regexp"
+
+	elfinterface "github.com/NVIDIA/dcgm-exporter/internal/pkg/elf"
+	execinterface "github.com/NVIDIA/dcgm-exporter/internal/pkg/exec"
+	osinterface "github.com/NVIDIA/dcgm-exporter/internal/pkg/os"
+)
+
+var (
+	os osinterface.OS = osinterface.RealOS{}
+
+	exec execinterface.Exec = execinterface.RealExec{}
+
+	elf elfinterface.ELF = elfinterface.RealELF{}
+
+	// rxLDCacheEntry matches the following library strings:
+	//	libdcgm.so.4 (libc6,x86-64) => /lib/x86_64-linux-gnu/libdcgm.so.4
+	//	ld-linux.so.2 (ELF) => /lib/ld-linux.so.2
+	// ld-linux-x86-64.so.2 (libc6,x86-64) => /lib/x86_64-linux-gnu/ld-linux-x86-64.so.2
+	rxLDCacheEntry = regexp.MustCompile(`(?m)^(.*)\s*\(.*\)\s*=>\s*(.*)$`)
+
+	errLibdcgmNotFound = fmt.Errorf("the %s library was not found. Install Data Center GPU Manager (DCGM).", libdcgmco)
+)
diff --git a/internal/pkg/registry/registry.go b/internal/pkg/registry/registry.go
new file mode 100644
index 00000000..40065cc5
--- /dev/null
+++ b/internal/pkg/registry/registry.go
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package registry
+
+import (
+	"sync"
+
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+
+	"golang.org/x/sync/errgroup"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/collector"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+)
+
+// groupCounterTuple represents a composite key, that consists Group and Counter.
+// The groupCounterTuple is necessary to maintain uniqueness of Group and Counter pairs.
+type groupCounterTuple struct {
+	Group   dcgm.Field_Entity_Group
+	Counter counters.Counter
+}
+
+type Registry struct {
+	collectorGroups     map[dcgm.Field_Entity_Group][]collector.Collector
+	collectorGroupsSeen map[collector.EntityCollectorTuple]struct{}
+	mtx                 sync.RWMutex
+}
+
+// NewRegistry creates a new registry
+func NewRegistry() *Registry {
+	return &Registry{
+		collectorGroups:     map[dcgm.Field_Entity_Group][]collector.Collector{},
+		collectorGroupsSeen: map[collector.EntityCollectorTuple]struct{}{},
+	}
+}
+
+// Register registers a collector with the registry.
+func (r *Registry) Register(entityCollectorTuples collector.EntityCollectorTuple) {
+	if _, exists := r.collectorGroupsSeen[entityCollectorTuples]; exists {
+		return
+	}
+	r.collectorGroups[entityCollectorTuples.Entity()] = append(r.collectorGroups[entityCollectorTuples.Entity()],
+		entityCollectorTuples.Collector())
+	r.collectorGroupsSeen[entityCollectorTuples] = struct{}{}
+}
+
+// Gather gathers metrics from all registered collectors.
+func (r *Registry) Gather() (MetricsByCounterGroup, error) {
+	r.mtx.Lock()
+	defer r.mtx.Unlock()
+
+	var wg sync.WaitGroup
+
+	g := new(errgroup.Group)
+
+	var sm sync.Map
+
+	for group, collectors := range r.collectorGroups {
+		for _, c := range collectors {
+			c := c // creates new c, see https://golang.org/doc/faq#closures_and_goroutines
+			group := group
+			wg.Add(1)
+			g.Go(func() error {
+				metrics, err := c.GetMetrics()
+				if err != nil {
+					return err
+				}
+
+				for counter, metricVals := range metrics {
+					val, _ := sm.LoadOrStore(groupCounterTuple{Group: group, Counter: counter}, []collector.Metric{})
+					out := val.([]collector.Metric)
+					out = append(out, metricVals...)
+					sm.Store(groupCounterTuple{Group: group, Counter: counter}, out)
+				}
+
+				return nil
+			})
+		}
+	}
+
+	if err := g.Wait(); err != nil {
+		return nil, err
+	}
+
+	output := MetricsByCounterGroup{}
+
+	sm.Range(func(key, value interface{}) bool {
+		tuple := key.(groupCounterTuple)
+		if _, exists := output[tuple.Group]; !exists {
+			output[tuple.Group] = map[counters.Counter][]collector.Metric{}
+		}
+		output[tuple.Group][tuple.Counter] = value.([]collector.Metric)
+		return true // continue iteration
+	})
+
+	return output, nil
+}
+
+// Cleanup resources of registered collectors
+func (r *Registry) Cleanup() {
+	for _, collectors := range r.collectorGroups {
+		for _, c := range collectors {
+			c.Cleanup()
+		}
+	}
+}
diff --git a/internal/pkg/registry/registry_test.go b/internal/pkg/registry/registry_test.go
new file mode 100644
index 00000000..9844e934
--- /dev/null
+++ b/internal/pkg/registry/registry_test.go
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package registry
+
+import (
+	"errors"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+	"github.com/stretchr/testify/mock"
+	"github.com/stretchr/testify/require"
+
+	collectorpkg "github.com/NVIDIA/dcgm-exporter/internal/pkg/collector"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+)
+
+type mockCollector struct {
+	mock.Mock
+}
+
+func (m *mockCollector) GetMetrics() (collectorpkg.MetricsByCounter, error) {
+	args := m.Called()
+	return args.Get(0).(collectorpkg.MetricsByCounter), args.Error(1)
+}
+
+func (m *mockCollector) Cleanup() {
+	m.Called()
+}
+
+func TestRegistry_Gather(t *testing.T) {
+	collector := new(mockCollector)
+
+	metrics := collectorpkg.MetricsByCounter{}
+	counterA := counters.Counter{
+		FieldID:   155,
+		FieldName: "DCGM_FI_DEV_POWER_USAGE",
+		PromType:  "gauge",
+	}
+
+	metrics[counterA] = append(metrics[counterA], collectorpkg.Metric{
+		GPU:        "0",
+		Counter:    counterA,
+		Attributes: map[string]string{},
+	})
+
+	counterB := counters.Counter{
+		FieldName: "DCGM_FI_EXP_CLOCK_THROTTLE_REASONS_COUNT",
+		PromType:  "gauge",
+	}
+
+	metrics[counterB] = append(metrics[counterB], collectorpkg.Metric{
+		GPU:        "0",
+		Counter:    counterB,
+		Value:      "42",
+		Attributes: map[string]string{},
+	})
+
+	type test struct {
+		name           string
+		collectorState func() *mock.Call
+		assert         func(MetricsByCounterGroup, error)
+	}
+
+	tests := []test{
+		{
+			name: "When collector return no errors",
+			collectorState: func() *mock.Call {
+				cs := collector.On("GetMetrics").Return(metrics, nil)
+				cs.On("Cleanup").Return()
+				return cs
+			},
+			assert: func(mbcg MetricsByCounterGroup, err error) {
+				require.NoError(t, err)
+				require.Contains(t, mbcg, dcgm.FE_GPU)
+				require.Len(t, mbcg, 1)
+				require.Len(t, mbcg[dcgm.FE_GPU], 2)
+			},
+		},
+		{
+			name: "When collector return errors",
+			collectorState: func() *mock.Call {
+				cs := collector.On("GetMetrics").Return(collectorpkg.MetricsByCounter{}, errors.New("Boom!"))
+				cs.On("Cleanup").Return()
+				return cs
+			},
+			assert: func(mbcg MetricsByCounterGroup, err error) {
+				require.Error(t, err)
+				require.Len(t, mbcg, 0)
+			},
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			reg := NewRegistry()
+			newEntityCollectorTuple := collectorpkg.EntityCollectorTuple{}
+			newEntityCollectorTuple.SetEntity(dcgm.FE_GPU)
+			newEntityCollectorTuple.SetCollector(collector)
+			reg.Register(newEntityCollectorTuple)
+			mockCall := tc.collectorState()
+			got, err := reg.Gather()
+			tc.assert(got, err)
+			mockCall.Unset()
+			reg.Cleanup()
+		})
+	}
+}
+
+func TestRegistry_Register_Accepts_Duplicates_(t *testing.T) {
+	reg := NewRegistry()
+	collector := new(mockCollector)
+
+	newEntityCollectorTuple1 := collectorpkg.EntityCollectorTuple{}
+	newEntityCollectorTuple1.SetEntity(dcgm.FE_GPU)
+	newEntityCollectorTuple1.SetCollector(collector)
+
+	newEntityCollectorTuple2 := collectorpkg.EntityCollectorTuple{}
+	newEntityCollectorTuple2.SetEntity(dcgm.FE_GPU)
+	newEntityCollectorTuple2.SetCollector(collector)
+
+	reg.Register(newEntityCollectorTuple1)
+	reg.Register(newEntityCollectorTuple2)
+	assert.Len(t, reg.collectorGroups, 1)
+	assert.Len(t, reg.collectorGroupsSeen, 1)
+}
diff --git a/internal/pkg/registry/types.go b/internal/pkg/registry/types.go
new file mode 100644
index 00000000..d4faf7d9
--- /dev/null
+++ b/internal/pkg/registry/types.go
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package registry
+
+import (
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/collector"
+)
+
+// MetricsByCounterGroup represents a group of metrics by specific counter groups
+type MetricsByCounterGroup map[dcgm.Field_Entity_Group]collector.MetricsByCounter
diff --git a/internal/pkg/rendermetrics/render_metrics.go b/internal/pkg/rendermetrics/render_metrics.go
new file mode 100644
index 00000000..f99af1d8
--- /dev/null
+++ b/internal/pkg/rendermetrics/render_metrics.go
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package rendermetrics
+
+import (
+	"fmt"
+	"io"
+	"sync"
+	"text/template"
+
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/collector"
+)
+
+/*
+* The goal here is to get to the following format:
+* ```
+* # HELP FIELD_ID HELP_MSG
+* # TYPE FIELD_ID PROM_TYPE
+* FIELD_ID{gpu="GPU_INDEX_0",uuid="GPU_UUID", attr...} VALUE
+* FIELD_ID{gpu="GPU_INDEX_N",uuid="GPU_UUID", attr...} VALUE
+* ...
+* ```
+ */
+
+var (
+	gpuMetricsFormat = `
+{{- range $counter, $metrics := . -}}
+# HELP {{ $counter.FieldName }} {{ $counter.Help }}
+# TYPE {{ $counter.FieldName }} {{ $counter.PromType }}
+{{- range $metric := $metrics }}
+{{ $counter.FieldName }}{gpu="{{ $metric.GPU }}",{{ $metric.UUID }}="{{ $metric.GPUUUID }}",pci_bus_id="{{ $metric.GPUPCIBusID }}",device="{{ $metric.GPUDevice }}",modelName="{{ $metric.GPUModelName }}"{{if $metric.MigProfile}},GPU_I_PROFILE="{{ $metric.MigProfile }}",GPU_I_ID="{{ $metric.GPUInstanceID }}"{{end}}{{if $metric.Hostname }},Hostname="{{ $metric.Hostname }}"{{end}}
+
+{{- range $k, $v := $metric.Labels -}}
+	,{{ $k }}="{{ $v }}"
+{{- end -}}
+{{- range $k, $v := $metric.Attributes -}}
+	,{{ $k }}="{{ $v }}"
+{{- end -}}
+
+} {{ $metric.Value -}}
+{{- end }}
+{{ end }}`
+
+	switchMetricsFormat = `
+{{- range $counter, $metrics := . -}}
+# HELP {{ $counter.FieldName }} {{ $counter.Help }}
+# TYPE {{ $counter.FieldName }} {{ $counter.PromType }}
+{{- range $metric := $metrics }}
+{{ $counter.FieldName }}{nvswitch="{{ $metric.GPU }}"{{if $metric.Hostname }},Hostname="{{ $metric.Hostname }}"{{end}}
+
+{{- range $k, $v := $metric.Labels -}}
+	,{{ $k }}="{{ $v }}"
+{{- end -}}
+} {{ $metric.Value -}}
+{{- end }}
+{{ end }}`
+
+	linkMetricsFormat = `
+{{- range $counter, $metrics := . -}}
+# HELP {{ $counter.FieldName }} {{ $counter.Help }}
+# TYPE {{ $counter.FieldName }} {{ $counter.PromType }}
+{{- range $metric := $metrics }}
+{{ $counter.FieldName }}{nvlink="{{ $metric.GPU }}",nvswitch="{{ $metric.GPUDevice }}"{{if $metric.Hostname }},Hostname="{{ $metric.Hostname }}"{{end}}
+
+{{- range $k, $v := $metric.Labels -}}
+	,{{ $k }}="{{ $v }}"
+{{- end -}}
+} {{ $metric.Value -}}
+{{- end }}
+{{ end }}`
+
+	cpuMetricsFormat = `
+{{- range $counter, $metrics := . -}}
+# HELP {{ $counter.FieldName }} {{ $counter.Help }}
+# TYPE {{ $counter.FieldName }} {{ $counter.PromType }}
+{{- range $metric := $metrics }}
+{{ $counter.FieldName }}{cpu="{{ $metric.GPU }}"{{if $metric.Hostname }},Hostname="{{ $metric.Hostname }}"{{end}}
+
+{{- range $k, $v := $metric.Labels -}}
+	,{{ $k }}="{{ $v }}"
+{{- end -}}
+} {{ $metric.Value -}}
+{{- end }}
+{{ end }}`
+
+	cpuCoreMetricsFormat = `
+{{- range $counter, $metrics := . -}}
+# HELP {{ $counter.FieldName }} {{ $counter.Help }}
+# TYPE {{ $counter.FieldName }} {{ $counter.PromType }}
+{{- range $metric := $metrics }}
+{{ $counter.FieldName }}{cpucore="{{ $metric.GPU }}",cpu="{{ $metric.GPUDevice }}"{{if $metric.Hostname }},Hostname="{{ $metric.Hostname }}"{{end}}
+
+{{- range $k, $v := $metric.Labels -}}
+	,{{ $k }}="{{ $v }}"
+{{- end -}}
+} {{ $metric.Value -}}
+{{- end }}
+{{ end }}`
+)
+
+var getGPUMetricsTemplate = sync.OnceValue(func() *template.Template {
+	return template.Must(template.New("gpuMetricsFormat").Parse(gpuMetricsFormat))
+})
+
+var getSwitchMetricsTemplate = sync.OnceValue(func() *template.Template {
+	return template.Must(template.New("switchMetricsFormat").Parse(switchMetricsFormat))
+})
+
+var getLinkMetricsTemplate = sync.OnceValue(func() *template.Template {
+	return template.Must(template.New("linkMetricsFormat").Parse(linkMetricsFormat))
+})
+
+var getCPUMetricsTemplate = sync.OnceValue(func() *template.Template {
+	return template.Must(template.New("cpuMetricsFormat").Parse(cpuMetricsFormat))
+})
+
+var getCPUCoreMetricsTemplate = sync.OnceValue(func() *template.Template {
+	return template.Must(template.New("cpuMetricsFormat").Parse(cpuCoreMetricsFormat))
+})
+
+func RenderGroup(w io.Writer, group dcgm.Field_Entity_Group, metrics collector.MetricsByCounter) error {
+	var tmpl *template.Template
+
+	switch group {
+	case dcgm.FE_GPU:
+		tmpl = getGPUMetricsTemplate()
+	case dcgm.FE_SWITCH:
+		tmpl = getSwitchMetricsTemplate()
+	case dcgm.FE_LINK:
+		tmpl = getLinkMetricsTemplate()
+	case dcgm.FE_CPU:
+		tmpl = getCPUMetricsTemplate()
+	case dcgm.FE_CPU_CORE:
+		tmpl = getCPUCoreMetricsTemplate()
+	default:
+		return fmt.Errorf("unexpected group: %s", group.String())
+	}
+	return tmpl.Execute(w, metrics)
+}
diff --git a/internal/pkg/rendermetrics/render_metrics_test.go b/internal/pkg/rendermetrics/render_metrics_test.go
new file mode 100644
index 00000000..548dbb76
--- /dev/null
+++ b/internal/pkg/rendermetrics/render_metrics_test.go
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package rendermetrics
+
+import (
+	"bytes"
+	"fmt"
+	"testing"
+
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+	"github.com/stretchr/testify/assert"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/collector"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+)
+
+func getMetricsByCounterWithTestMetric() collector.MetricsByCounter {
+	metrics := collector.MetricsByCounter{}
+	counter := getTestMetric()
+
+	metrics[counter] = append(metrics[counter], collector.Metric{
+		GPU:          "0",
+		GPUDevice:    "nvidia0",
+		GPUModelName: "NVIDIA T400 4GB",
+		Hostname:     "testhost",
+		UUID:         "UUID",
+		GPUUUID:      "GPU-00000000-0000-0000-0000-000000000000",
+		Counter:      counter,
+		Value:        "42",
+		Attributes:   map[string]string{},
+	})
+	return metrics
+}
+
+func getTestMetric() counters.Counter {
+	counter := counters.Counter{
+		FieldID:   2000,
+		FieldName: "TEST_METRIC",
+		PromType:  "gauge",
+	}
+	return counter
+}
+
+func Test_render(t *testing.T) {
+	metrics := getMetricsByCounterWithTestMetric()
+
+	tests := []struct {
+		name    string
+		group   dcgm.Field_Entity_Group
+		metrics collector.MetricsByCounter
+		want    string
+		wantErr assert.ErrorAssertionFunc
+	}{
+		{
+			name:    fmt.Sprintf("Render %s", dcgm.FE_GPU.String()),
+			group:   dcgm.FE_GPU,
+			metrics: metrics,
+			want: `# HELP TEST_METRIC 
+# TYPE TEST_METRIC gauge
+TEST_METRIC{gpu="0",UUID="GPU-00000000-0000-0000-0000-000000000000",pci_bus_id="",device="nvidia0",modelName="NVIDIA T400 4GB",Hostname="testhost"} 42
+`,
+		},
+		{
+			name:    fmt.Sprintf("Render %s", dcgm.FE_SWITCH.String()),
+			group:   dcgm.FE_SWITCH,
+			metrics: metrics,
+			want: `# HELP TEST_METRIC 
+# TYPE TEST_METRIC gauge
+TEST_METRIC{nvswitch="0",Hostname="testhost"} 42
+`,
+		},
+		{
+			name:    fmt.Sprintf("Render %s", dcgm.FE_LINK.String()),
+			group:   dcgm.FE_LINK,
+			metrics: metrics,
+			want: `# HELP TEST_METRIC 
+# TYPE TEST_METRIC gauge
+TEST_METRIC{nvlink="0",nvswitch="nvidia0",Hostname="testhost"} 42
+`,
+		},
+		{
+			name:    fmt.Sprintf("Render %s", dcgm.FE_CPU.String()),
+			group:   dcgm.FE_CPU,
+			metrics: metrics,
+			want: `# HELP TEST_METRIC 
+# TYPE TEST_METRIC gauge
+TEST_METRIC{cpu="0",Hostname="testhost"} 42
+`,
+		},
+		{
+			name:    fmt.Sprintf("Render %s", dcgm.FE_CPU_CORE.String()),
+			group:   dcgm.FE_CPU_CORE,
+			metrics: metrics,
+			want: `# HELP TEST_METRIC 
+# TYPE TEST_METRIC gauge
+TEST_METRIC{cpucore="0",cpu="nvidia0",Hostname="testhost"} 42
+`,
+		},
+		{
+			name:    "Render unknown group",
+			group:   42,
+			metrics: metrics,
+			want:    ``,
+			wantErr: assert.Error,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			w := &bytes.Buffer{}
+			err := RenderGroup(w, tt.group, tt.metrics)
+			if tt.wantErr != nil &&
+				!tt.wantErr(t, err, fmt.Sprintf("RenderGroup(w, %v, %v)", tt.group, tt.metrics)) {
+				return
+			}
+			assert.Equalf(t, tt.want, w.String(), "RenderGroup(w, %v, %v)", tt.group, tt.metrics)
+		})
+	}
+}
diff --git a/internal/pkg/server/server.go b/internal/pkg/server/server.go
new file mode 100644
index 00000000..9fe1c32f
--- /dev/null
+++ b/internal/pkg/server/server.go
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package server
+
+import (
+	"bytes"
+	"context"
+	"io"
+	"log/slog"
+	"net/http"
+	"os"
+	"sync"
+	"time"
+
+	"github.com/gorilla/mux"
+	"github.com/prometheus/exporter-toolkit/web"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatchlistmanager"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/logging"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/registry"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/rendermetrics"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/transformation"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/utils"
+)
+
+const internalServerError = "internal server error"
+
+func NewMetricsServer(
+	c *appconfig.Config,
+	metrics chan string,
+	deviceWatchListManager devicewatchlistmanager.Manager,
+	registry *registry.Registry,
+) (*MetricsServer, func(), error) {
+	router := mux.NewRouter()
+	serverv1 := &MetricsServer{
+		server: &http.Server{
+			Addr:         c.Address,
+			Handler:      router,
+			ReadTimeout:  10 * time.Second,
+			WriteTimeout: 10 * time.Second,
+		},
+		webConfig: &web.FlagConfig{
+			WebListenAddresses: &[]string{c.Address},
+			WebSystemdSocket:   &c.WebSystemdSocket,
+			WebConfigFile:      &c.WebConfigFile,
+		},
+		metricsChan:            metrics,
+		metrics:                "",
+		registry:               registry,
+		config:                 c,
+		transformations:        transformation.GetTransformations(c),
+		deviceWatchListManager: deviceWatchListManager,
+	}
+
+	router.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("X-Content-Type-Options", "nosniff")
+		w.WriteHeader(http.StatusOK)
+		_, err := w.Write([]byte(`<html>
+			<head><title>GPU Exporter</title></head>
+			<body>
+			<h1>GPU Exporter</h1>
+			<p><a href="./metrics">Metrics</a></p>
+			</body>
+			</html>`))
+		if err != nil {
+			slog.Error("Failed to write response.", slog.String(logging.ErrorKey, err.Error()))
+			http.Error(w, internalServerError, http.StatusInternalServerError)
+			return
+		}
+	})
+
+	router.HandleFunc("/health", serverv1.Health)
+	router.HandleFunc("/metrics", serverv1.Metrics)
+
+	return serverv1, func() {}, nil
+}
+
+func (s *MetricsServer) Run(stop chan interface{}, wg *sync.WaitGroup) {
+	defer wg.Done()
+
+	var httpwg sync.WaitGroup
+	httpwg.Add(1)
+	go func() {
+		defer httpwg.Done()
+		slog.Info("Starting webserver")
+		if err := web.ListenAndServe(s.server, s.webConfig, slog.Default()); err != nil && err != http.ErrServerClosed {
+			slog.Error("Failed to Listen and Server HTTP server.", slog.String(logging.ErrorKey, err.Error()))
+			os.Exit(1)
+		}
+	}()
+
+	httpwg.Add(1)
+	go func() {
+		defer httpwg.Done()
+		for {
+			select {
+			case <-stop:
+				return
+			}
+		}
+	}()
+
+	<-stop
+	if err := s.server.Shutdown(context.Background()); err != nil {
+		slog.Error("Failed to shutdown HTTP server.", slog.String(logging.ErrorKey, err.Error()))
+		s.fatal()
+	}
+
+	if err := utils.WaitWithTimeout(&httpwg, 3*time.Second); err != nil {
+		slog.Error("Failed waiting for HTTP server to shutdown.", slog.String(logging.ErrorKey, err.Error()))
+		s.fatal()
+	}
+}
+
+func (s *MetricsServer) fatal() {
+	os.Exit(1)
+}
+
+func (s *MetricsServer) Metrics(w http.ResponseWriter, _ *http.Request) {
+	w.Header().Set("X-Content-Type-Options", "nosniff")
+	metricGroups, err := s.registry.Gather()
+	if err != nil {
+		slog.Error("Failed to gather metrics from collectors", slog.String(logging.ErrorKey, err.Error()))
+		http.Error(w, internalServerError, http.StatusInternalServerError)
+		return
+	}
+	var buf bytes.Buffer
+	err = s.render(&buf, metricGroups)
+	if err != nil {
+		http.Error(w, internalServerError, http.StatusInternalServerError)
+		return
+	}
+	_, err = w.Write(buf.Bytes())
+	if err != nil {
+		slog.Error("Failed to write response.", slog.String(logging.ErrorKey, err.Error()))
+		http.Error(w, "failed to write response", http.StatusInternalServerError)
+		return
+	}
+}
+
+func (s *MetricsServer) render(w io.Writer, metricGroups registry.MetricsByCounterGroup) error {
+	for group, metrics := range metricGroups {
+		deviceWatchList, exists := s.deviceWatchListManager.EntityWatchList(group)
+		if exists {
+			for _, transformation := range s.transformations {
+				err := transformation.Process(metrics, deviceWatchList.DeviceInfo())
+				if err != nil {
+					slog.LogAttrs(context.Background(), slog.LevelError, "Failed to apply transformations on metrics",
+						slog.String(logging.ErrorKey, err.Error()),
+						slog.String(logging.FieldEntityGroupKey, group.String()),
+						slog.Any(logging.MetricsKey, metrics),
+						slog.Any(logging.DeviceInfoKey, deviceWatchList.DeviceInfo),
+					)
+					return err
+				}
+			}
+
+			err := rendermetrics.RenderGroup(w, group, metrics)
+			if err != nil {
+				slog.LogAttrs(context.Background(), slog.LevelError, "Failed to renderGroup metrics",
+					slog.String(logging.ErrorKey, err.Error()),
+					slog.String(logging.FieldEntityGroupKey, group.String()),
+					slog.Any(logging.MetricsKey, metrics),
+					slog.Any(logging.DeviceInfoKey, deviceWatchList.DeviceInfo),
+				)
+				return err
+			}
+		}
+	}
+	return nil
+}
+
+func (s *MetricsServer) Health(w http.ResponseWriter, _ *http.Request) {
+	w.Header().Set("X-Content-Type-Options", "nosniff")
+	_, err := w.Write([]byte("KO"))
+	if err != nil {
+		slog.Error("Failed to write response.", slog.String(logging.ErrorKey, err.Error()))
+		http.Error(w, "failed to write response", http.StatusInternalServerError)
+	}
+}
diff --git a/internal/pkg/server/server_test.go b/internal/pkg/server/server_test.go
new file mode 100644
index 00000000..186efab1
--- /dev/null
+++ b/internal/pkg/server/server_test.go
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package server
+
+import (
+	"errors"
+	"net"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"syscall"
+	"testing"
+
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+	"github.com/stretchr/testify/assert"
+	"go.uber.org/mock/gomock"
+
+	mockcollectorpkg "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/collector"
+	mockdeviceinfo "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/deviceinfo"
+	mockdevicewatchlistmanager "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/devicewatchlistmanager"
+	mocktransformation "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/transformation"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/collector"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatcher"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatchlistmanager"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/registry"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/transformation"
+)
+
+const expectedResponse = `# HELP TEST_METRIC 
+# TYPE TEST_METRIC gauge
+TEST_METRIC{gpu="0",UUID="GPU-00000000-0000-0000-0000-000000000000",pci_bus_id="",device="nvidia0",modelName="NVIDIA T400 4GB",Hostname="testhost"} 42
+`
+
+var deviceWatcher = devicewatcher.NewDeviceWatcher()
+
+func getMetricsByCounterWithTestMetric() collector.MetricsByCounter {
+	metrics := collector.MetricsByCounter{}
+	counter := getTestMetric()
+
+	metrics[counter] = append(metrics[counter], collector.Metric{
+		GPU:          "0",
+		GPUDevice:    "nvidia0",
+		GPUModelName: "NVIDIA T400 4GB",
+		Hostname:     "testhost",
+		UUID:         "UUID",
+		GPUUUID:      "GPU-00000000-0000-0000-0000-000000000000",
+		Counter:      counter,
+		Value:        "42",
+		Attributes:   map[string]string{},
+	})
+	return metrics
+}
+
+func getTestMetric() counters.Counter {
+	counter := counters.Counter{
+		FieldID:   2000,
+		FieldName: "TEST_METRIC",
+		PromType:  "gauge",
+	}
+	return counter
+}
+
+func TestMetrics(t *testing.T) {
+	ctrl := gomock.NewController(t)
+
+	metrics := getMetricsByCounterWithTestMetric()
+
+	tests := []struct {
+		name        string
+		group       dcgm.Field_Entity_Group
+		collector   func() collector.Collector
+		transformer func() transformation.Transform
+		assert      func(*testing.T, *httptest.ResponseRecorder)
+	}{
+		{
+			name:  "Returns 200",
+			group: dcgm.FE_GPU,
+			collector: func() collector.Collector {
+				mockCollector := mockcollectorpkg.NewMockCollector(ctrl)
+				mockCollector.EXPECT().GetMetrics().Return(metrics, nil).AnyTimes()
+				return mockCollector
+			},
+			transformer: func() transformation.Transform {
+				mockTransformation := mocktransformation.NewMockTransform(ctrl)
+				mockTransformation.EXPECT().Process(gomock.Any(), gomock.Any())
+				return mockTransformation
+			},
+			assert: func(t *testing.T, recorder *httptest.ResponseRecorder) {
+				assert.Equal(t, http.StatusOK, recorder.Code)
+				assert.Equal(t, expectedResponse, recorder.Body.String())
+			},
+		},
+		{
+			name:  "Returns 500 when Collector return error",
+			group: dcgm.FE_GPU,
+			collector: func() collector.Collector {
+				mockCollector := mockcollectorpkg.NewMockCollector(ctrl)
+				mockCollector.EXPECT().GetMetrics().Return(nil, errors.New("boom")).AnyTimes()
+				return mockCollector
+			},
+			transformer: func() transformation.Transform {
+				return mocktransformation.NewMockTransform(ctrl)
+			},
+			assert: func(t *testing.T, recorder *httptest.ResponseRecorder) {
+				assert.Equal(t, http.StatusInternalServerError, recorder.Code)
+				assert.Equal(t, internalServerError, strings.TrimSpace(recorder.Body.String()))
+			},
+		},
+		{
+			name:  "Returns 500 when Transformer returns error",
+			group: dcgm.FE_GPU,
+			collector: func() collector.Collector {
+				mockCollector := mockcollectorpkg.NewMockCollector(ctrl)
+				mockCollector.EXPECT().GetMetrics().Return(metrics, nil).AnyTimes()
+				return mockCollector
+			},
+			transformer: func() transformation.Transform {
+				mockTransformation := mocktransformation.NewMockTransform(ctrl)
+				mockTransformation.EXPECT().Process(gomock.Any(), gomock.Any()).Return(errors.New("boom")).AnyTimes()
+				return mockTransformation
+			},
+			assert: func(t *testing.T, recorder *httptest.ResponseRecorder) {
+				assert.Equal(t, http.StatusInternalServerError, recorder.Code)
+				assert.Equal(t, internalServerError, strings.TrimSpace(recorder.Body.String()))
+			},
+		},
+		{
+			name:  "Returns 500 when group is unknown",
+			group: dcgm.FE_NONE,
+			collector: func() collector.Collector {
+				mockCollector := mockcollectorpkg.NewMockCollector(ctrl)
+				mockCollector.EXPECT().GetMetrics().Return(metrics, nil).AnyTimes()
+				return mockCollector
+			},
+			transformer: func() transformation.Transform {
+				mockTransformation := mocktransformation.NewMockTransform(ctrl)
+				mockTransformation.EXPECT().Process(gomock.Any(), gomock.Any())
+				return mockTransformation
+			},
+			assert: func(t *testing.T, recorder *httptest.ResponseRecorder) {
+				assert.Equal(t, http.StatusInternalServerError, recorder.Code)
+				assert.Equal(t, internalServerError, strings.TrimSpace(recorder.Body.String()))
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			reg := registry.NewRegistry()
+			entityCollectorTuple := collector.EntityCollectorTuple{}
+			entityCollectorTuple.SetEntity(tt.group)
+			entityCollectorTuple.SetCollector(tt.collector())
+			reg.Register(entityCollectorTuple)
+
+			mockDeviceInfo := mockdeviceinfo.NewMockProvider(ctrl)
+			mockDeviceInfo.EXPECT().InfoType().Return(tt.group).AnyTimes()
+			mockDeviceInfo.EXPECT().GOpts().Return(appconfig.DeviceOptions{}).AnyTimes()
+
+			defaultDeviceWatchList := *devicewatchlistmanager.NewWatchList(
+				mockDeviceInfo,
+				[]dcgm.Short{42},
+				nil,
+				deviceWatcher,
+				1,
+			)
+
+			metricServer := &MetricsServer{
+				registry: reg,
+				deviceWatchListManager: func(group dcgm.Field_Entity_Group) devicewatchlistmanager.Manager {
+					mockDeviceWatchListManager := mockdevicewatchlistmanager.NewMockManager(ctrl)
+					mockDeviceWatchListManager.EXPECT().EntityWatchList(group).Return(defaultDeviceWatchList,
+						true).AnyTimes()
+					return mockDeviceWatchListManager
+				}(tt.group),
+				transformations: []transformation.Transform{
+					tt.transformer(),
+				},
+			}
+
+			recorder := httptest.NewRecorder()
+			metricServer.Metrics(recorder, nil)
+			if tt.assert != nil {
+				tt.assert(t, recorder)
+			}
+		})
+	}
+}
+
+// mockResponseWriter is a custom writer that simulates a network operation error.
+type mockResponseWriter struct {
+	httptest.ResponseRecorder
+}
+
+func (m *mockResponseWriter) Write([]byte) (int, error) {
+	// Simulate a network operation error.
+	return 0, &net.OpError{
+		Op:     "write",
+		Net:    "tcp",
+		Source: nil,
+		Addr:   nil,
+		Err:    syscall.EPIPE,
+	}
+}
+
+func TestMetricsReturnsErrorWhenClientClosedConnection(t *testing.T) {
+	ctrl := gomock.NewController(t)
+
+	metrics := getMetricsByCounterWithTestMetric()
+
+	mockCollector := mockcollectorpkg.NewMockCollector(ctrl)
+	mockCollector.EXPECT().GetMetrics().Return(metrics, nil).AnyTimes()
+
+	reg := registry.NewRegistry()
+	entityCollectorTuple := collector.EntityCollectorTuple{}
+	entityCollectorTuple.SetEntity(dcgm.FE_GPU)
+	entityCollectorTuple.SetCollector(mockCollector)
+	reg.Register(entityCollectorTuple)
+
+	mockDeviceInfo := mockdeviceinfo.NewMockProvider(ctrl)
+	mockDeviceInfo.EXPECT().InfoType().Return(dcgm.FE_CPU).AnyTimes()
+	mockDeviceInfo.EXPECT().GOpts().Return(appconfig.DeviceOptions{}).AnyTimes()
+
+	defaultDeviceWatchList := *devicewatchlistmanager.NewWatchList(
+		mockDeviceInfo,
+		[]dcgm.Short{42},
+		nil,
+		deviceWatcher,
+		1,
+	)
+
+	metricServer := &MetricsServer{
+		registry: reg,
+		deviceWatchListManager: func() devicewatchlistmanager.Manager {
+			mockDeviceWatchListManager := mockdevicewatchlistmanager.NewMockManager(ctrl)
+			mockDeviceWatchListManager.EXPECT().EntityWatchList(dcgm.FE_CPU).Return(defaultDeviceWatchList,
+				true).AnyTimes()
+			mockDeviceWatchListManager.EXPECT().EntityWatchList(gomock.Any()).Return(devicewatchlistmanager.WatchList{},
+				false).AnyTimes()
+			return mockDeviceWatchListManager
+		}(),
+		transformations: []transformation.Transform{},
+	}
+	recorder := &mockResponseWriter{}
+	metricServer.Metrics(recorder, nil)
+	assert.Equal(t, http.StatusInternalServerError, recorder.Code)
+	assert.Nil(t, recorder.Body)
+}
+
+func TestHealthReturnsOK(t *testing.T) {
+	metricServer := &MetricsServer{}
+	recorder := httptest.NewRecorder()
+	metricServer.Health(recorder, nil)
+	assert.Equal(t, http.StatusOK, recorder.Code)
+}
+
+func TestHealthReturnsOKWhenWriteReturnsError(t *testing.T) {
+	metricServer := &MetricsServer{}
+	recorder := &mockResponseWriter{}
+	metricServer.Health(recorder, nil)
+	assert.Equal(t, http.StatusInternalServerError, recorder.Code)
+}
diff --git a/internal/pkg/server/types.go b/internal/pkg/server/types.go
new file mode 100644
index 00000000..0c355992
--- /dev/null
+++ b/internal/pkg/server/types.go
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package server
+
+import (
+	"net/http"
+	"sync"
+
+	"github.com/prometheus/exporter-toolkit/web"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatchlistmanager"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/registry"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/transformation"
+)
+
+type MetricsServer struct {
+	sync.Mutex
+
+	server                 *http.Server
+	webConfig              *web.FlagConfig
+	metrics                string
+	metricsChan            chan string
+	registry               *registry.Registry
+	config                 *appconfig.Config
+	transformations        []transformation.Transform
+	deviceWatchListManager devicewatchlistmanager.Manager
+}
diff --git a/pkg/stdout/capture.go b/internal/pkg/stdout/capture.go
similarity index 84%
rename from pkg/stdout/capture.go
rename to internal/pkg/stdout/capture.go
index d1854911..4817f0de 100644
--- a/pkg/stdout/capture.go
+++ b/internal/pkg/stdout/capture.go
@@ -19,13 +19,12 @@ package stdout
 import (
 	"bufio"
 	"context"
+	"log/slog"
 	"os"
 	"syscall"
-
-	"github.com/sirupsen/logrus"
 )
 
-// Capture go and C stdout and stderr and writes to logrus.StandardLogger
+// Capture go and C stdout and stderr and writes to std output
 func Capture(ctx context.Context, inner func() error) error {
 	stdout, err := syscall.Dup(syscall.Stdout)
 	if err != nil {
@@ -63,13 +62,13 @@ func Capture(ctx context.Context, inner func() error) error {
 			logEntry := scanner.Text()
 			parsedLogEntry := parseOutputEntry(logEntry)
 			if parsedLogEntry.IsRawString {
-				_, err := logrus.StandardLogger().Out.Write([]byte(parsedLogEntry.Message + "\n"))
+				_, err := os.Stdout.Write([]byte(parsedLogEntry.Message + "\n"))
 				if err != nil {
 					return
 				}
 				continue
 			}
-			logrus.WithField("dcgm_level", parsedLogEntry.Level).Info(parsedLogEntry.Message)
+			slog.LogAttrs(ctx, slog.LevelInfo, parsedLogEntry.Message, slog.String("dcgm_level", parsedLogEntry.Level))
 		}
 	}()
 
diff --git a/pkg/stdout/capture_test.go b/internal/pkg/stdout/capture_test.go
similarity index 76%
rename from pkg/stdout/capture_test.go
rename to internal/pkg/stdout/capture_test.go
index 3c1d9a0a..89a2a888 100644
--- a/pkg/stdout/capture_test.go
+++ b/internal/pkg/stdout/capture_test.go
@@ -20,11 +20,10 @@ import (
 	"bytes"
 	"context"
 	"fmt"
+	"os"
 	"strings"
 	"testing"
-	"time"
 
-	"github.com/sirupsen/logrus"
 	"github.com/stretchr/testify/assert"
 )
 
@@ -61,19 +60,36 @@ func TestCapture(t *testing.T) {
 
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
-			ctx, cancel := context.WithCancel(context.Background())
+			// Create a buffer to capture stdout output
+			var buf bytes.Buffer
+
+			// Save the original stdout
+			stdout := os.Stdout
+
+			// Create a pipe to redirect stdout
+			r, w, err := os.Pipe()
+			assert.NoError(t, err)
 
-			buf := &bytes.Buffer{}
-			logrus.SetOutput(buf)
+			os.Stdout = w // Redirect stdout to the write end of the pipe
 
-			err := Capture(ctx, func() error {
+			ctx, cancel := context.WithCancel(context.Background())
+			err = Capture(ctx, func() error {
 				fmt.Println(tc.logMessage)
 				return nil
 			})
 
 			assert.NoError(t, err)
-			time.Sleep(1 * time.Millisecond)
-			tc.assert(t, buf.String())
+
+			// Close the write end of the pipe to allow reading all data
+			_ = w.Close()
+			os.Stdout = stdout // Restore original stdout
+
+			// Read from the pipe directly into the buffer
+			_, err = buf.ReadFrom(r)
+			assert.NoError(t, err)
+			if tc.assert != nil {
+				tc.assert(t, buf.String())
+			}
 			cancel()
 		})
 	}
diff --git a/pkg/stdout/capture_test_wrapper.go b/internal/pkg/stdout/capture_test_wrapper.go
similarity index 56%
rename from pkg/stdout/capture_test_wrapper.go
rename to internal/pkg/stdout/capture_test_wrapper.go
index 2d9b645c..8b8b76fa 100644
--- a/pkg/stdout/capture_test_wrapper.go
+++ b/internal/pkg/stdout/capture_test_wrapper.go
@@ -24,34 +24,52 @@ void printBoom() {
 }
 */
 import "C"
+
 import (
 	"bytes"
 	"context"
+	"os"
 	"strings"
 	"testing"
-	"time"
 
-	"github.com/sirupsen/logrus"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
 
 func testCaptureWithCGO(t *testing.T) {
 	t.Helper()
+	// Create a buffer to capture stdout output
+	var buf bytes.Buffer
 
-	ctx, cancel := context.WithCancel(context.Background())
+	// Save the original stdout
+	stdout := os.Stdout
+
+	// Create a pipe to redirect stdout
+	r, w, err := os.Pipe()
+	assert.NoError(t, err)
+
+	os.Stdout = w // Redirect stdout to the write end of the pipe
 
-	buf := &bytes.Buffer{}
-	logrus.SetOutput(buf)
+	ctx, cancel := context.WithCancel(context.Background())
 
-	err := Capture(ctx, func() error {
+	err = Capture(ctx, func() error {
 		C.printBoom()
 		return nil
 	})
 	assert.NoError(t, err)
-
-	time.Sleep(10 * time.Millisecond)
+	// It takes a time before CGO flushes logs to the std output
+	// We need to wait until we start to receive the data
+	// Create temporary buffer to detect data
+	var tempBuf [1]byte
+	// Read from the pipe to ensure data is available
+	_, err = r.Read(tempBuf[:]) // Block until data is written
+	assert.NoError(t, err)
+	buf.Write(tempBuf[:]) // Start capturing the data
+	// Close the write end of the pipe to allow reading all data
+	_ = w.Close()
+	_, err = buf.ReadFrom(r) // Read the remaining data
+	assert.NoError(t, err)
 	require.Equal(t, "Boom", strings.TrimSpace(buf.String()))
-
+	os.Stdout = stdout // Restore original stdout
 	cancel()
 }
diff --git a/pkg/stdout/stdoutprocessor.go b/internal/pkg/stdout/stdoutprocessor.go
similarity index 100%
rename from pkg/stdout/stdoutprocessor.go
rename to internal/pkg/stdout/stdoutprocessor.go
diff --git a/internal/pkg/testutils/const.go b/internal/pkg/testutils/const.go
new file mode 100644
index 00000000..54ef7fd3
--- /dev/null
+++ b/internal/pkg/testutils/const.go
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package testutils
+
+import (
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/deviceinfo"
+)
+
+var fakeProfileName = "2fake.4gb"
+
+var (
+	MockGPUInstanceInfo1 = deviceinfo.GPUInstanceInfo{
+		Info:        dcgm.MigEntityInfo{GpuUuid: "fake", NvmlProfileSlices: 3},
+		ProfileName: fakeProfileName,
+		EntityId:    0,
+	}
+
+	MockGPUInstanceInfo2 = deviceinfo.GPUInstanceInfo{
+		Info:        dcgm.MigEntityInfo{GpuUuid: "fake", NvmlInstanceId: 1, NvmlProfileSlices: 3},
+		ProfileName: fakeProfileName,
+		EntityId:    14,
+	}
+
+	MockNVLinkVal1 = dcgm.NvLinkStatus{
+		State: 2,
+		Index: 0,
+	}
+
+	MockNVLinkVal2 = dcgm.NvLinkStatus{
+		State: 3,
+		Index: 1,
+	}
+)
diff --git a/internal/pkg/testutils/test_utils.go b/internal/pkg/testutils/test_utils.go
new file mode 100644
index 00000000..782ac7b8
--- /dev/null
+++ b/internal/pkg/testutils/test_utils.go
@@ -0,0 +1,313 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package testutils
+
+import (
+	"context"
+	"fmt"
+	"net"
+	"reflect"
+	"runtime"
+	"testing"
+	"time"
+	"unsafe"
+
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"go.uber.org/mock/gomock"
+	"google.golang.org/grpc"
+	"k8s.io/kubelet/pkg/apis/podresources/v1alpha1"
+
+	mockdeviceinfo "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/deviceinfo"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/deviceinfo"
+)
+
+// MockReader is a mock implementation of rand.Reader that always returns an error
+type MockReader struct {
+	Err error
+}
+
+func (r *MockReader) Read(_ []byte) (n int, err error) {
+	return 0, r.Err
+}
+
+// RequireLinux checks if tests are being executed on a Linux platform or not
+func RequireLinux(t *testing.T) {
+	t.Helper()
+	if runtime.GOOS != "linux" {
+		t.Skipf("Test is not supported on %q", runtime.GOOS)
+	}
+}
+
+func MockGPUDeviceInfo(
+	ctrl *gomock.Controller, gpuCount int, gpuToGpuInstanceInfos map[int][]deviceinfo.GPUInstanceInfo,
+) *mockdeviceinfo.MockProvider {
+	mockSystemInfo := mockdeviceinfo.NewMockProvider(ctrl)
+
+	mockGPUs := make([]deviceinfo.GPUInfo, 0)
+
+	for i := range gpuCount {
+		gpuInfo := deviceinfo.GPUInfo{}
+		gpuInfo.DeviceInfo.GPU = uint(i)
+
+		if gpuInstanceInfos, exist := gpuToGpuInstanceInfos[i]; exist {
+			gpuInfo.GPUInstances = gpuInstanceInfos
+		}
+
+		mockGPUs = append(mockGPUs, gpuInfo)
+		mockSystemInfo.EXPECT().GPU(uint(i)).Return(gpuInfo).AnyTimes()
+	}
+
+	mockSystemInfo.EXPECT().GPUCount().Return(uint(gpuCount)).AnyTimes()
+	mockSystemInfo.EXPECT().GPUs().Return(mockGPUs).AnyTimes()
+	mockSystemInfo.EXPECT().InfoType().Return(dcgm.FE_NONE).AnyTimes()
+
+	return mockSystemInfo
+}
+
+func MockCPUDeviceInfo(
+	ctrl *gomock.Controller, cpuCount int, cpuToCores map[int][]uint, watchedCPUs map[uint]bool,
+	watchedCores map[WatchedEntityKey]bool, infoType dcgm.Field_Entity_Group,
+) *mockdeviceinfo.MockProvider {
+	mockSystemInfo := mockdeviceinfo.NewMockProvider(ctrl)
+
+	mockCPUs := make([]deviceinfo.CPUInfo, 0)
+
+	for i := range cpuCount {
+		cpuInfo := deviceinfo.CPUInfo{}
+		cpuInfo.EntityId = uint(i)
+
+		if cores, exist := cpuToCores[i]; exist {
+			cpuInfo.Cores = []uint{}
+
+			for _, core := range cores {
+				cpuInfo.Cores = append(cpuInfo.Cores, core)
+
+				mockSystemInfo.EXPECT().IsCoreWatched(core,
+					uint(i)).Return(watchedCores[WatchedEntityKey{uint(i), core}]).AnyTimes()
+			}
+		}
+
+		mockSystemInfo.EXPECT().IsCPUWatched(cpuInfo.EntityId).Return(watchedCPUs[cpuInfo.EntityId]).AnyTimes()
+		mockSystemInfo.EXPECT().CPU(uint(i)).Return(cpuInfo).AnyTimes()
+
+		mockCPUs = append(mockCPUs, cpuInfo)
+	}
+
+	mockSystemInfo.EXPECT().CPUs().Return(mockCPUs).AnyTimes()
+	mockSystemInfo.EXPECT().InfoType().Return(infoType).AnyTimes()
+
+	return mockSystemInfo
+}
+
+func MockSwitchDeviceInfo(
+	ctrl *gomock.Controller, switchCount int, switchToNvLinks map[int][]dcgm.NvLinkStatus,
+	watchedSwitches map[uint]bool, watchedLinks map[WatchedEntityKey]bool, infoType dcgm.Field_Entity_Group,
+) *mockdeviceinfo.MockProvider {
+	mockSystemInfo := mockdeviceinfo.NewMockProvider(ctrl)
+
+	mockSwitches := make([]deviceinfo.SwitchInfo, 0)
+
+	for i := range switchCount {
+		switchInfo := deviceinfo.SwitchInfo{}
+		switchInfo.EntityId = uint(i)
+
+		if nvLinks, exist := switchToNvLinks[i]; exist {
+			switchInfo.NvLinks = []dcgm.NvLinkStatus{}
+
+			for _, nvLink := range nvLinks {
+				nvLink.ParentId = uint(i)
+				nvLink.ParentType = dcgm.FE_SWITCH
+				switchInfo.NvLinks = append(switchInfo.NvLinks, nvLink)
+
+				mockSystemInfo.EXPECT().IsLinkWatched(nvLink.Index,
+					uint(i)).Return(watchedLinks[WatchedEntityKey{uint(i), nvLink.Index}]).AnyTimes()
+			}
+		}
+
+		mockSystemInfo.EXPECT().IsSwitchWatched(switchInfo.EntityId).Return(watchedSwitches[switchInfo.EntityId]).AnyTimes()
+		mockSystemInfo.EXPECT().Switch(uint(i)).Return(switchInfo).AnyTimes()
+
+		mockSwitches = append(mockSwitches, switchInfo)
+	}
+
+	mockSystemInfo.EXPECT().Switches().Return(mockSwitches).AnyTimes()
+	mockSystemInfo.EXPECT().InfoType().Return(infoType).AnyTimes()
+
+	return mockSystemInfo
+}
+
+// GetStructPrivateFieldValue returns private field value
+func GetStructPrivateFieldValue[T any](t *testing.T, v any, fieldName string) T {
+	t.Helper()
+	var result T
+	value := reflect.ValueOf(v)
+	if value.Kind() == reflect.Ptr {
+		value = value.Elem()
+	}
+
+	if value.Kind() != reflect.Struct {
+		t.Errorf("The type %s is not stuct", value.Type())
+		return result
+	}
+
+	fieldVal := value.FieldByName(fieldName)
+
+	if !fieldVal.IsValid() {
+		t.Errorf("The field %s is invalid for the %s type", fieldName, value.Type())
+		return result
+	}
+
+	fieldPtr := unsafe.Pointer(fieldVal.UnsafeAddr())
+
+	// Cast the field pointer to a pointer of the correct type
+	realPtr := (*T)(fieldPtr)
+
+	return *realPtr
+}
+
+func CreateTmpDir(t *testing.T) (string, func()) {
+	path, err := os.MkdirTemp("", "dcgm-exporter")
+	require.NoError(t, err)
+
+	return path, func() {
+		require.NoError(t, os.RemoveAll(path))
+	}
+}
+
+type MockPodResourcesServer struct {
+	resourceName string
+	gpus         []string
+}
+
+func NewMockPodResourcesServer(resourceName string, gpus []string) *MockPodResourcesServer {
+	return &MockPodResourcesServer{
+		resourceName: resourceName,
+		gpus:         gpus,
+	}
+}
+
+func (s *MockPodResourcesServer) List(
+	ctx context.Context, req *v1alpha1.ListPodResourcesRequest,
+) (*v1alpha1.ListPodResourcesResponse, error) {
+	podResources := make([]*v1alpha1.PodResources, len(s.gpus))
+
+	for i, gpu := range s.gpus {
+		podResources[i] = &v1alpha1.PodResources{
+			Name:      fmt.Sprintf("gpu-pod-%d", i),
+			Namespace: "default",
+			Containers: []*v1alpha1.ContainerResources{
+				{
+					Name: "default",
+					Devices: []*v1alpha1.ContainerDevices{
+						{
+							ResourceName: s.resourceName,
+							DeviceIds:    []string{gpu},
+						},
+					},
+				},
+			},
+		}
+	}
+
+	return &v1alpha1.ListPodResourcesResponse{
+		PodResources: podResources,
+	}, nil
+}
+
+func StartMockServer(t *testing.T, server *grpc.Server, socket string) func() {
+	l, err := net.Listen("unix", socket)
+	require.NoError(t, err)
+
+	stopped := make(chan interface{})
+
+	go func() {
+		err := server.Serve(l)
+		assert.NoError(t, err)
+		close(stopped)
+	}()
+
+	return func() {
+		server.Stop()
+		select {
+		case <-stopped:
+			return
+		case <-time.After(1 * time.Second):
+			t.Fatal("Failed waiting for gRPC server to stop.")
+		}
+	}
+}
+
+type FieldType int
+
+const (
+	Fields FieldType = iota
+	Functions
+	All
+)
+
+// GetFields returns a map of fields of a struct, including unexported fields, based on the specified field type.
+func GetFields(input interface{}, fieldType FieldType) map[string]interface{} {
+	result := make(map[string]interface{})
+	val := reflect.ValueOf(input)
+
+	if val.Kind() == reflect.Ptr {
+		val = val.Elem()
+	}
+
+	if val.Kind() != reflect.Struct {
+		return result
+	}
+
+	typ := val.Type()
+
+	for i := 0; i < val.NumField(); i++ {
+		field := val.Field(i)
+		fieldTyp := typ.Field(i)
+
+		// Determine if the field should be included based on the specified field type
+		includeField := false
+		switch fieldType {
+		case Fields:
+			includeField = field.Kind() != reflect.Func
+		case Functions:
+			includeField = field.Kind() == reflect.Func
+		case All:
+			includeField = true
+		}
+
+		if !includeField {
+			continue
+		}
+
+		// Access unexported fields
+		if !field.CanInterface() {
+			field = reflect.NewAt(field.Type(), unsafe.Pointer(field.UnsafeAddr())).Elem()
+		}
+
+		result[fieldTyp.Name] = field.Interface()
+	}
+
+	return result
+}
+
+func StrToByteArray(str string) [4096]byte {
+	var byteArray [4096]byte
+	copy(byteArray[:], str)
+	return byteArray
+}
diff --git a/internal/pkg/testutils/testutils.go b/internal/pkg/testutils/testutils.go
deleted file mode 100644
index 8ed485dd..00000000
--- a/internal/pkg/testutils/testutils.go
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package testutils
-
-import (
-	"reflect"
-	"runtime"
-	"testing"
-	"unsafe"
-)
-
-// RequireLinux checks if
-func RequireLinux(t *testing.T) {
-	t.Helper()
-	if runtime.GOOS != "linux" {
-		t.Skipf("Test is not supported on %q", runtime.GOOS)
-	}
-}
-
-// GetStructPrivateFieldValue returns private field value
-func GetStructPrivateFieldValue[T any](t *testing.T, v any, fieldName string) T {
-	t.Helper()
-	var result T
-	value := reflect.ValueOf(v)
-	if value.Kind() == reflect.Ptr {
-		value = value.Elem()
-	}
-
-	if value.Kind() != reflect.Struct {
-		t.Errorf("The type %s is not stuct", value.Type())
-		return result
-	}
-
-	fieldVal := value.FieldByName(fieldName)
-
-	if !fieldVal.IsValid() {
-		t.Errorf("The field %s is invalid for the %s type", fieldName, value.Type())
-		return result
-	}
-
-	fieldPtr := unsafe.Pointer(fieldVal.UnsafeAddr())
-
-	// Cast the field pointer to a pointer of the correct type
-	realPtr := (*T)(fieldPtr)
-
-	return *realPtr
-}
diff --git a/internal/pkg/testutils/types.go b/internal/pkg/testutils/types.go
new file mode 100644
index 00000000..d3c0be1d
--- /dev/null
+++ b/internal/pkg/testutils/types.go
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package testutils
+
+type WatchedEntityKey struct {
+	ParentID uint
+	ChildID  uint
+}
diff --git a/internal/pkg/testutils/variables.go b/internal/pkg/testutils/variables.go
new file mode 100644
index 00000000..95106fa0
--- /dev/null
+++ b/internal/pkg/testutils/variables.go
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package testutils
+
+import (
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+	osinterface "github.com/NVIDIA/dcgm-exporter/internal/pkg/os"
+)
+
+var os osinterface.OS = osinterface.RealOS{}
+
+var (
+	SampleGPUTempCounter = counters.Counter{
+		FieldID:   dcgm.DCGM_FI_DEV_GPU_TEMP,
+		FieldName: "DCGM_FI_DEV_GPU_TEMP",
+		PromType:  "gauge",
+		Help:      "Temperature Help info",
+	}
+
+	SampleGPUTotalEnergyCounter = counters.Counter{
+		FieldID:   dcgm.DCGM_FI_DEV_TOTAL_ENERGY_CONSUMPTION,
+		FieldName: "DCGM_FI_DEV_TOTAL_ENERGY_CONSUMPTION",
+		PromType:  "gauge",
+		Help:      "Energy help info",
+	}
+
+	SampleGPUPowerUsageCounter = counters.Counter{
+		FieldID:   dcgm.DCGM_FI_DEV_POWER_USAGE,
+		FieldName: "DCGM_FI_DEV_POWER_USAGE",
+		PromType:  "gauge",
+		Help:      "Power help info",
+	}
+
+	SampleVGPULicenseStatusCounter = counters.Counter{
+		FieldID:   dcgm.DCGM_FI_DEV_VGPU_LICENSE_STATUS,
+		FieldName: "DCGM_FI_DEV_VGPU_LICENSE_STATUS",
+		PromType:  "gauge",
+		Help:      "vgpu license status",
+	}
+
+	SampleDriverVersionCounter = counters.Counter{
+		FieldID:   dcgm.DCGM_FI_DRIVER_VERSION,
+		FieldName: "DCGM_FI_DRIVER_VERSION",
+		PromType:  "label",
+		Help:      "Driver version",
+	}
+
+	SampleSwitchCurrentTempCounter = counters.Counter{
+		FieldID:   dcgm.DCGM_FI_DEV_NVSWITCH_TEMPERATURE_CURRENT,
+		FieldName: "DCGM_FI_DEV_NVSWITCH_TEMPERATURE_CURRENT",
+		PromType:  "gauge",
+		Help:      "switch temperature",
+	}
+
+	SampleSwitchLinkFlitErrorsCounter = counters.Counter{
+		FieldID:   dcgm.DCGM_FI_DEV_NVSWITCH_LINK_FLIT_ERRORS,
+		FieldName: "DCGM_FI_DEV_NVSWITCH_LINK_FLIT_ERRORS",
+		PromType:  "gauge",
+		Help:      "per-link flit errors",
+	}
+
+	SampleCPUUtilTotalCounter = counters.Counter{
+		FieldID:   dcgm.DCGM_FI_DEV_CPU_UTIL_TOTAL,
+		FieldName: "DCGM_FI_DEV_CPU_UTIL_TOTAL",
+		PromType:  "gauge",
+		Help:      "Total CPU utilization",
+	}
+
+	SampleCounters = []counters.Counter{
+		SampleGPUTempCounter,
+		SampleGPUTotalEnergyCounter,
+		SampleGPUPowerUsageCounter,
+		SampleDriverVersionCounter,
+		/* test that switch and link metrics are filtered out automatically when devices are not detected */
+		SampleSwitchCurrentTempCounter,
+		SampleSwitchLinkFlitErrorsCounter,
+		/* test that vgpu metrics are not filtered out */
+		SampleVGPULicenseStatusCounter,
+		/* test that cpu and cpu core metrics are filtered out automatically when devices are not detected */
+		SampleCPUUtilTotalCounter,
+	}
+
+	SampleAllFieldIDs = []dcgm.Short{
+		SampleGPUTempCounter.FieldID, SampleGPUTotalEnergyCounter.FieldID,
+		SampleGPUPowerUsageCounter.FieldID, SampleVGPULicenseStatusCounter.FieldID,
+		SampleDriverVersionCounter.FieldID, SampleSwitchCurrentTempCounter.FieldID,
+		SampleSwitchLinkFlitErrorsCounter.FieldID, SampleCPUUtilTotalCounter.FieldID,
+	}
+
+	SampleGPUFieldIDs = []dcgm.Short{
+		SampleGPUTempCounter.FieldID, SampleGPUTotalEnergyCounter.FieldID,
+		SampleGPUPowerUsageCounter.FieldID, SampleVGPULicenseStatusCounter.FieldID,
+	}
+
+	SampleFieldIDToFieldMeta = map[dcgm.Short]dcgm.FieldMeta{
+		SampleGPUTempCounter.FieldID: {
+			FieldId:     SampleGPUTempCounter.FieldID,
+			EntityLevel: dcgm.FE_GPU,
+		},
+		SampleGPUTotalEnergyCounter.FieldID: {
+			FieldId:     SampleGPUTotalEnergyCounter.FieldID,
+			EntityLevel: dcgm.FE_GPU,
+		},
+		SampleGPUPowerUsageCounter.FieldID: {
+			FieldId:     SampleGPUPowerUsageCounter.FieldID,
+			EntityLevel: dcgm.FE_GPU_I,
+		},
+		SampleVGPULicenseStatusCounter.FieldID: {
+			FieldId:     SampleVGPULicenseStatusCounter.FieldID,
+			EntityLevel: dcgm.FE_VGPU,
+		},
+		SampleDriverVersionCounter.FieldID: {
+			FieldId:     SampleDriverVersionCounter.FieldID,
+			EntityLevel: dcgm.FE_NONE,
+		},
+		SampleSwitchCurrentTempCounter.FieldID: {
+			FieldId:     SampleSwitchCurrentTempCounter.FieldID,
+			EntityLevel: dcgm.FE_SWITCH,
+		},
+		SampleSwitchLinkFlitErrorsCounter.FieldID: {
+			FieldId:     SampleSwitchLinkFlitErrorsCounter.FieldID,
+			EntityLevel: dcgm.FE_LINK,
+		},
+		SampleCPUUtilTotalCounter.FieldID: {
+			FieldId:     SampleCPUUtilTotalCounter.FieldID,
+			EntityLevel: dcgm.FE_CPU_CORE,
+		},
+	}
+)
diff --git a/internal/pkg/transformation/const.go b/internal/pkg/transformation/const.go
new file mode 100644
index 00000000..3461918d
--- /dev/null
+++ b/internal/pkg/transformation/const.go
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package transformation
+
+const (
+	// Note standard resource attributes
+	podAttribute       = "pod"
+	namespaceAttribute = "namespace"
+	containerAttribute = "container"
+
+	hpcJobAttribute = "hpc_job"
+
+	oldPodAttribute       = "pod_name"
+	oldNamespaceAttribute = "pod_namespace"
+	oldContainerAttribute = "container_name"
+)
diff --git a/pkg/dcgmexporter/hpc.go b/internal/pkg/transformation/hpc.go
similarity index 62%
rename from pkg/dcgmexporter/hpc.go
rename to internal/pkg/transformation/hpc.go
index e360b096..08b6bea7 100644
--- a/pkg/dcgmexporter/hpc.go
+++ b/internal/pkg/transformation/hpc.go
@@ -14,23 +14,29 @@
  * limitations under the License.
  */
 
-package dcgmexporter
+package transformation
 
 import (
 	"bufio"
+	"fmt"
+	"log/slog"
 	sysOS "os"
 	"path"
 	"strconv"
 
-	"github.com/sirupsen/logrus"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/collector"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/deviceinfo"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/logging"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/utils"
 )
 
 type hpcMapper struct {
-	Config *Config
+	Config *appconfig.Config
 }
 
-func newHPCMapper(c *Config) *hpcMapper {
-	logrus.Infof("HPC job mapping is enabled and watch for the %q directory", c.HPCJobMappingDir)
+func newHPCMapper(c *appconfig.Config) *hpcMapper {
+	slog.Info(fmt.Sprintf("HPC job mapping is enabled and watch for the %q directory", c.HPCJobMappingDir))
 	return &hpcMapper{
 		Config: c,
 	}
@@ -40,10 +46,11 @@ func (p *hpcMapper) Name() string {
 	return "hpcMapper"
 }
 
-func (p *hpcMapper) Process(metrics MetricsByCounter, sysInfo SystemInfo) error {
+func (p *hpcMapper) Process(metrics collector.MetricsByCounter, _ deviceinfo.Provider) error {
 	_, err := os.Stat(p.Config.HPCJobMappingDir)
 	if err != nil {
-		logrus.WithError(err).Warnf("Unable to access HPC job mapping file directory '%s' - directory not found. Ignoring.", p.Config.HPCJobMappingDir)
+		slog.Error(fmt.Sprintf("Unable to access HPC job mapping file directory '%s' - directory not found. Ignoring.",
+			p.Config.HPCJobMappingDir), slog.String(logging.ErrorKey, err.Error()))
 		return nil
 	}
 
@@ -54,7 +61,7 @@ func (p *hpcMapper) Process(metrics MetricsByCounter, sysInfo SystemInfo) error
 
 	gpuToJobMap := make(map[string][]string)
 
-	logrus.Debugf("HPC job mapping files: %#v", gpuFiles)
+	slog.Debug(fmt.Sprintf("HPC job mapping files: %#v", gpuFiles))
 
 	for _, gpuFileName := range gpuFiles {
 		jobs, err := readFile(path.Join(p.Config.HPCJobMappingDir, gpuFileName))
@@ -68,17 +75,18 @@ func (p *hpcMapper) Process(metrics MetricsByCounter, sysInfo SystemInfo) error
 		gpuToJobMap[gpuFileName] = append(gpuToJobMap[gpuFileName], jobs...)
 	}
 
-	logrus.Debugf("GPU to job mapping: %+v", gpuToJobMap)
+	slog.Debug(fmt.Sprintf("GPU to job mapping: %+v", gpuToJobMap))
 
 	for counter := range metrics {
-		var modifiedMetrics []Metric
+		var modifiedMetrics []collector.Metric
 		for _, metric := range metrics[counter] {
 			jobs, exists := gpuToJobMap[metric.GPU]
 			if exists {
 				for _, job := range jobs {
-					modifiedMetric, err := deepCopy(metric)
+					modifiedMetric, err := utils.DeepCopy(metric)
 					if err != nil {
-						logrus.WithError(err).Errorf("Can not create deepCopy for the value: %v", metric)
+						slog.Error(fmt.Sprintf("Can not create deepCopy for the value: %v", metric),
+							slog.String(logging.ErrorKey, err.Error()))
 						continue
 					}
 					modifiedMetric.Attributes[hpcJobAttribute] = job
@@ -104,7 +112,8 @@ func readFile(path string) ([]string, error) {
 	defer func(file *sysOS.File) {
 		err := file.Close()
 		if err != nil {
-			logrus.WithError(err).Errorf("Failed for close the file: %s", file.Name())
+			slog.Error(fmt.Sprintf("Failed for close the file: %s", file.Name()),
+				slog.String(logging.ErrorKey, err.Error()))
 		}
 	}(file)
 
@@ -130,25 +139,25 @@ func getGPUFiles(dirPath string) ([]string, error) {
 		return nil, err
 	}
 
-	logrus.Debugf("hpc mapper: %d files in the %q found", len(files), dirPath)
+	slog.Debug(fmt.Sprintf("hpc mapper: %d files in the %q found", len(files), dirPath))
 
 	var mappingFiles []string
 
 	for _, file := range files {
 		finfo, err := file.Info()
 		if err != nil {
-			logrus.Warnf("HPC mapper: can not get file info for the %s file.", file.Name())
+			slog.Warn(fmt.Sprintf("HPC mapper: can not get file info for the %s file.", file.Name()))
 			continue // Skip files that we can't read
 		}
 
 		if finfo.IsDir() {
-			logrus.Debugf("HPC mapper: the %q file is directory", file.Name())
+			slog.Debug(fmt.Sprintf("HPC mapper: the %q file is directory", file.Name()))
 			continue // Skip directories
 		}
 
 		_, err = strconv.Atoi(file.Name())
 		if err != nil {
-			logrus.Debugf("HPC mapper: file %q name doesn't match with GPU ID convention", file.Name())
+			slog.Debug(fmt.Sprintf("HPC mapper: file %q name doesn't match with GPU ID convention", file.Name()))
 			continue
 		}
 		mappingFiles = append(mappingFiles, file.Name())
diff --git a/pkg/dcgmexporter/hpc_test.go b/internal/pkg/transformation/hpc_test.go
similarity index 77%
rename from pkg/dcgmexporter/hpc_test.go
rename to internal/pkg/transformation/hpc_test.go
index 8b834955..0cb3730e 100644
--- a/pkg/dcgmexporter/hpc_test.go
+++ b/internal/pkg/transformation/hpc_test.go
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package dcgmexporter
+package transformation
 
 import (
 	"cmp"
@@ -24,13 +24,15 @@ import (
 	"slices"
 	"testing"
 
-	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
 	"github.com/google/uuid"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"go.uber.org/mock/gomock"
 
-	osmock "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/os"
+	mockos "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/os"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/collector"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
 	osinterface "github.com/NVIDIA/dcgm-exporter/internal/pkg/os"
 )
 
@@ -39,39 +41,39 @@ func TestHPCProcess(t *testing.T) {
 
 	tests := []struct {
 		name      string
-		config    *Config
+		config    *appconfig.Config
 		fsState   func() func()
-		assertion func(*testing.T, MetricsByCounter)
+		assertion func(*testing.T, collector.MetricsByCounter)
 		wantErr   assert.ErrorAssertionFunc
 	}{
 		{
 			name:   "When all GPU have job files",
-			config: &Config{HPCJobMappingDir: "/var/run/nvidia/slurm"},
+			config: &appconfig.Config{HPCJobMappingDir: "/var/run/nvidia/slurm"},
 			fsState: func() func() {
 				ctrl := gomock.NewController(t)
-				mOS := osmock.NewMockOS(ctrl)
-				mFileInfoGPU0 := osmock.NewMockFileInfo(ctrl)
+				mOS := mockos.NewMockOS(ctrl)
+				mFileInfoGPU0 := mockos.NewMockFileInfo(ctrl)
 				mFileInfoGPU0.EXPECT().IsDir().Return(false).AnyTimes()
 
-				mDirEntryGPU0 := osmock.NewMockDirEntry(ctrl)
+				mDirEntryGPU0 := mockos.NewMockDirEntry(ctrl)
 				mDirEntryGPU0.EXPECT().Info().Return(mFileInfoGPU0, nil).AnyTimes()
 				mDirEntryGPU0.EXPECT().Name().Return("0").AnyTimes()
 
-				mFileInfoGPU1 := osmock.NewMockFileInfo(ctrl)
+				mFileInfoGPU1 := mockos.NewMockFileInfo(ctrl)
 				mFileInfoGPU1.EXPECT().IsDir().Return(false).AnyTimes()
 
-				mDirEntryGPU1 := osmock.NewMockDirEntry(ctrl)
+				mDirEntryGPU1 := mockos.NewMockDirEntry(ctrl)
 				mDirEntryGPU1.EXPECT().Info().Return(mFileInfoGPU1, nil).AnyTimes()
 				mDirEntryGPU1.EXPECT().Name().Return("1").AnyTimes()
 
-				mFileInfoDir := osmock.NewMockFileInfo(ctrl)
+				mFileInfoDir := mockos.NewMockFileInfo(ctrl)
 				mFileInfoDir.EXPECT().IsDir().Return(true).AnyTimes()
 
-				mDirEntryDir := osmock.NewMockDirEntry(ctrl)
+				mDirEntryDir := mockos.NewMockDirEntry(ctrl)
 				mDirEntryDir.EXPECT().Info().Return(mFileInfoDir, nil).AnyTimes()
 				mDirEntryDir.EXPECT().Name().Return("iamdir").AnyTimes()
 
-				mDirEntryDamagedFile := osmock.NewMockDirEntry(ctrl)
+				mDirEntryDamagedFile := mockos.NewMockDirEntry(ctrl)
 				mDirEntryDamagedFile.EXPECT().Info().Return(nil, errors.New("boom")).AnyTimes()
 				mDirEntryDamagedFile.EXPECT().Name().Return("iamerror").AnyTimes()
 
@@ -107,13 +109,13 @@ func TestHPCProcess(t *testing.T) {
 					_ = realOS.Remove(slurm1.Name())
 				}
 			},
-			assertion: func(t *testing.T, mbc MetricsByCounter) {
+			assertion: func(t *testing.T, mbc collector.MetricsByCounter) {
 				require.Len(t, mbc, 1, "metrics are expected for a single counter only.")
 				// We get metric value with 0 index
-				metricValues := mbc[reflect.ValueOf(mbc).MapKeys()[0].Interface().(Counter)]
+				metricValues := mbc[reflect.ValueOf(mbc).MapKeys()[0].Interface().(counters.Counter)]
 				require.Len(t, metricValues, 4, "received unexpected number of metric values.")
 				// Sort metrics by GPU ID
-				slices.SortFunc(metricValues, func(a, b Metric) int {
+				slices.SortFunc(metricValues, func(a, b collector.Metric) int {
 					return cmp.Compare(a.GPU, b.GPU)
 				})
 				assert.Equal(t, "0", metricValues[0].GPU)
@@ -141,20 +143,20 @@ func TestHPCProcess(t *testing.T) {
 				defer cleanup()
 			}
 
-			metrics := MetricsByCounter{}
-			counter := Counter{
+			metrics := collector.MetricsByCounter{}
+			counter := counters.Counter{
 				FieldID:   155,
 				FieldName: "DCGM_FI_DEV_POWER_USAGE",
 				PromType:  "gauge",
 			}
 
-			metrics[counter] = append(metrics[counter], Metric{
+			metrics[counter] = append(metrics[counter], collector.Metric{
 				GPU:           "0",
 				GPUUUID:       uuid.New().String(),
 				GPUDevice:     "nvidia0",
 				GPUInstanceID: "",
 				Value:         "42",
-				Counter: Counter{
+				Counter: counters.Counter{
 					FieldID:   155,
 					FieldName: "DCGM_FI_DEV_POWER_USAGE",
 					PromType:  "gauge",
@@ -162,13 +164,13 @@ func TestHPCProcess(t *testing.T) {
 				Attributes: map[string]string{},
 			})
 
-			metrics[counter] = append(metrics[counter], Metric{
+			metrics[counter] = append(metrics[counter], collector.Metric{
 				GPU:           "1",
 				GPUUUID:       uuid.New().String(),
 				GPUDevice:     "nvidia1",
 				GPUInstanceID: "1",
 				Value:         "451",
-				Counter: Counter{
+				Counter: counters.Counter{
 					FieldID:   155,
 					FieldName: "DCGM_FI_DEV_POWER_USAGE",
 					PromType:  "gauge",
@@ -176,13 +178,13 @@ func TestHPCProcess(t *testing.T) {
 				Attributes: map[string]string{},
 			})
 
-			metrics[counter] = append(metrics[counter], Metric{
+			metrics[counter] = append(metrics[counter], collector.Metric{
 				GPU:           "2",
 				GPUUUID:       uuid.New().String(),
 				GPUDevice:     "nvidia3",
 				GPUInstanceID: "2",
 				Value:         "1984",
-				Counter: Counter{
+				Counter: counters.Counter{
 					FieldID:   155,
 					FieldName: "DCGM_FI_DEV_POWER_USAGE",
 					PromType:  "gauge",
@@ -190,26 +192,9 @@ func TestHPCProcess(t *testing.T) {
 				Attributes: map[string]string{},
 			})
 
-			sysInfo := SystemInfo{
-				GPUCount: 2,
-				GPUs: [dcgm.MAX_NUM_DEVICES]GPUInfo{
-					{
-						DeviceInfo: dcgm.Device{
-							UUID: "00000000-0000-0000-0000-000000000000",
-							GPU:  0,
-						},
-					},
-					{
-						DeviceInfo: dcgm.Device{
-							UUID: "00000000-0000-0000-0000-000000000001",
-							GPU:  1,
-						},
-					},
-				},
-			}
 			mapper := newHPCMapper(tt.config)
-			err := mapper.Process(metrics, sysInfo)
-			if tt.wantErr != nil && !tt.wantErr(t, err, fmt.Sprintf("hpcMapper.Process(%v,%v)", metrics, sysInfo)) {
+			err := mapper.Process(metrics, nil)
+			if tt.wantErr != nil && !tt.wantErr(t, err, fmt.Sprintf("hpcMapper.Process(%v,%v)", metrics, nil)) {
 				return
 			}
 			tt.assertion(t, metrics)
@@ -218,5 +203,5 @@ func TestHPCProcess(t *testing.T) {
 }
 
 func TestHPCName(t *testing.T) {
-	assert.Equal(t, "hpcMapper", newHPCMapper(&Config{}).Name())
+	assert.Equal(t, "hpcMapper", newHPCMapper(&appconfig.Config{}).Name())
 }
diff --git a/pkg/dcgmexporter/kubernetes.go b/internal/pkg/transformation/kubernetes.go
similarity index 75%
rename from pkg/dcgmexporter/kubernetes.go
rename to internal/pkg/transformation/kubernetes.go
index 8fb8d7d2..1121023e 100644
--- a/pkg/dcgmexporter/kubernetes.go
+++ b/internal/pkg/transformation/kubernetes.go
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,22 +14,27 @@
  * limitations under the License.
  */
 
-package dcgmexporter
+package transformation
 
 import (
 	"context"
 	"fmt"
+	"log/slog"
 	"net"
 	"regexp"
 	"slices"
 	"strings"
 	"time"
 
-	"github.com/sirupsen/logrus"
+	"google.golang.org/grpc/resolver"
+
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/credentials/insecure"
 	podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1alpha1"
 
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/collector"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/deviceinfo"
 	"github.com/NVIDIA/dcgm-exporter/internal/pkg/nvmlprovider"
 )
 
@@ -38,26 +43,25 @@ var (
 
 	gkeMigDeviceIDRegex            = regexp.MustCompile(`^nvidia([0-9]+)/gi([0-9]+)$`)
 	gkeVirtualGPUDeviceIDSeparator = "/vgpu"
-	nvmlGetMIGDeviceInfoByIDHook   = nvmlprovider.GetMIGDeviceInfoByID
 )
 
-func NewPodMapper(c *Config) (*PodMapper, error) {
-	logrus.Infof("Kubernetes metrics collection enabled!")
+func NewPodMapper(c *appconfig.Config) *PodMapper {
+	slog.Info("Kubernetes metrics collection enabled!")
 
 	return &PodMapper{
 		Config: c,
-	}, nil
+	}
 }
 
 func (p *PodMapper) Name() string {
 	return "podMapper"
 }
 
-func (p *PodMapper) Process(metrics MetricsByCounter, sysInfo SystemInfo) error {
+func (p *PodMapper) Process(metrics collector.MetricsByCounter, deviceInfo deviceinfo.Provider) error {
 	socketPath := p.Config.PodResourcesKubeletSocket
 	_, err := os.Stat(socketPath)
 	if os.IsNotExist(err) {
-		logrus.Info("No Kubelet socket, ignoring")
+		slog.Info("No Kubelet socket, ignoring")
 		return nil
 	}
 
@@ -73,15 +77,17 @@ func (p *PodMapper) Process(metrics MetricsByCounter, sysInfo SystemInfo) error
 		return err
 	}
 
-	deviceToPod := p.toDeviceToPod(pods, sysInfo)
+	slog.Debug(fmt.Sprintf("Podresources API response: %+v", pods))
+
+	deviceToPod := p.toDeviceToPod(pods, deviceInfo)
 
-	logrus.Debugf("Device to pod mapping: %+v", deviceToPod)
+	slog.Debug(fmt.Sprintf("Device to pod mapping: %+v", deviceToPod))
 
 	// Note: for loop are copies the value, if we want to change the value
 	// and not the copy, we need to use the indexes
 	for counter := range metrics {
 		for j, val := range metrics[counter] {
-			deviceID, err := val.getIDOfType(p.Config.KubernetesGPUIdType)
+			deviceID, err := val.GetIDOfType(p.Config.KubernetesGPUIdType)
 			if err != nil {
 				return err
 			}
@@ -105,20 +111,17 @@ func (p *PodMapper) Process(metrics MetricsByCounter, sysInfo SystemInfo) error
 }
 
 func connectToServer(socket string) (*grpc.ClientConn, func(), error) {
-	ctx, cancel := context.WithTimeout(context.Background(), connectionTimeout)
-	defer cancel()
-
-	conn, err := grpc.DialContext(ctx,
+	resolver.SetDefaultScheme("passthrough")
+	conn, err := grpc.NewClient(
 		socket,
 		grpc.WithTransportCredentials(insecure.NewCredentials()),
-		grpc.WithBlock(),
 		grpc.WithContextDialer(func(ctx context.Context, addr string) (net.Conn, error) {
 			d := net.Dialer{}
 			return d.DialContext(ctx, "unix", addr)
 		}),
 	)
 	if err != nil {
-		return nil, func() {}, fmt.Errorf("failure connecting to '%s'; err: %w", socket, err)
+		return nil, doNothing, fmt.Errorf("failure connecting to '%s'; err: %w", socket, err)
 	}
 
 	return conn, func() { conn.Close() }, nil
@@ -139,7 +142,7 @@ func (p *PodMapper) listPods(conn *grpc.ClientConn) (*podresourcesapi.ListPodRes
 }
 
 func (p *PodMapper) toDeviceToPod(
-	devicePods *podresourcesapi.ListPodResourcesResponse, sysInfo SystemInfo,
+	devicePods *podresourcesapi.ListPodResourcesResponse, deviceInfo deviceinfo.Provider,
 ) map[string]PodInfo {
 	deviceToPodMap := make(map[string]PodInfo)
 
@@ -148,9 +151,9 @@ func (p *PodMapper) toDeviceToPod(
 			for _, device := range container.GetDevices() {
 
 				resourceName := device.GetResourceName()
-				if resourceName != nvidiaResourceName && !slices.Contains(p.Config.NvidiaResourceNames, resourceName) {
+				if resourceName != appconfig.NvidiaResourceName && !slices.Contains(p.Config.NvidiaResourceNames, resourceName) {
 					// Mig resources appear differently than GPU resources
-					if !strings.HasPrefix(resourceName, nvidiaMigResourcePrefix) {
+					if !strings.HasPrefix(resourceName, appconfig.NvidiaMigResourcePrefix) {
 						continue
 					}
 				}
@@ -162,14 +165,14 @@ func (p *PodMapper) toDeviceToPod(
 				}
 
 				for _, deviceID := range device.GetDeviceIds() {
-					if strings.HasPrefix(deviceID, MIG_UUID_PREFIX) {
-						migDevice, err := nvmlGetMIGDeviceInfoByIDHook(deviceID)
+					if strings.HasPrefix(deviceID, appconfig.MIG_UUID_PREFIX) {
+						migDevice, err := nvmlprovider.Client().GetMIGDeviceInfoByID(deviceID)
 						if err == nil {
-							giIdentifier := GetGPUInstanceIdentifier(sysInfo, migDevice.ParentUUID,
+							giIdentifier := deviceinfo.GetGPUInstanceIdentifier(deviceInfo, migDevice.ParentUUID,
 								uint(migDevice.GPUInstanceID))
 							deviceToPodMap[giIdentifier] = podInfo
 						}
-						gpuUUID := deviceID[len(MIG_UUID_PREFIX):]
+						gpuUUID := deviceID[len(appconfig.MIG_UUID_PREFIX):]
 						deviceToPodMap[gpuUUID] = podInfo
 					} else if gkeMigDeviceIDMatches := gkeMigDeviceIDRegex.FindStringSubmatch(deviceID); gkeMigDeviceIDMatches != nil {
 						var gpuIndex string
diff --git a/internal/pkg/transformation/kubernetes_test.go b/internal/pkg/transformation/kubernetes_test.go
new file mode 100644
index 00000000..72f26c99
--- /dev/null
+++ b/internal/pkg/transformation/kubernetes_test.go
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package transformation
+
+import (
+	"fmt"
+	"reflect"
+	"testing"
+
+	"github.com/sirupsen/logrus"
+
+	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"go.uber.org/mock/gomock"
+	"google.golang.org/grpc"
+	podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1alpha1"
+
+	mockdeviceinfo "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/deviceinfo"
+	mocknvmlprovider "github.com/NVIDIA/dcgm-exporter/internal/mocks/pkg/nvmlprovider"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/collector"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/dcgmprovider"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/deviceinfo"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/nvmlprovider"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/testutils"
+)
+
+func TestProcessPodMapper_WithD_Different_Format_Of_DeviceID(t *testing.T) {
+	testutils.RequireLinux(t)
+	logrus.SetLevel(logrus.DebugLevel)
+	type TestCase struct {
+		KubernetesGPUIDType appconfig.KubernetesGPUIDType
+		GPUInstanceID       uint
+		ResourceName        string
+		MetricGPUID         string
+		MetricGPUDevice     string
+		MetricMigProfile    string
+		PODGPUID            string
+		NvidiaResourceNames []string
+	}
+
+	testCases := []TestCase{
+		{
+			KubernetesGPUIDType: appconfig.GPUUID,
+			ResourceName:        appconfig.NvidiaResourceName,
+			MetricGPUID:         "b8ea3855-276c-c9cb-b366-c6fa655957c5",
+			PODGPUID:            "b8ea3855-276c-c9cb-b366-c6fa655957c5",
+		},
+		{
+			KubernetesGPUIDType: appconfig.GPUUID,
+			ResourceName:        appconfig.NvidiaResourceName,
+			MetricGPUID:         "MIG-b8ea3855-276c-c9cb-b366-c6fa655957c5",
+			PODGPUID:            "MIG-b8ea3855-276c-c9cb-b366-c6fa655957c5",
+			MetricMigProfile:    "",
+		},
+		{
+			KubernetesGPUIDType: appconfig.GPUUID,
+			ResourceName:        appconfig.NvidiaResourceName,
+			GPUInstanceID:       3,
+			MetricGPUID:         "b8ea3855-276c-c9cb-b366-c6fa655957c5",
+			MetricMigProfile:    "",
+			PODGPUID:            "MIG-b8ea3855-276c-c9cb-b366-c6fa655957c5",
+		},
+		{
+			KubernetesGPUIDType: appconfig.DeviceName,
+			ResourceName:        appconfig.NvidiaResourceName,
+			GPUInstanceID:       3,
+			MetricMigProfile:    "mig",
+			PODGPUID:            "MIG-b8ea3855-276c-c9cb-b366-c6fa655957c5",
+		},
+		{
+			KubernetesGPUIDType: appconfig.DeviceName,
+			ResourceName:        appconfig.NvidiaResourceName,
+			MetricMigProfile:    "mig",
+			PODGPUID:            "nvidia0/gi0",
+		},
+		{
+			KubernetesGPUIDType: appconfig.DeviceName,
+			ResourceName:        appconfig.NvidiaResourceName,
+			MetricGPUDevice:     "0",
+			PODGPUID:            "0/vgpu",
+		},
+		{
+			KubernetesGPUIDType: appconfig.GPUUID,
+			ResourceName:        appconfig.NvidiaResourceName,
+			MetricGPUID:         "b8ea3855-276c-c9cb-b366-c6fa655957c5",
+			PODGPUID:            "b8ea3855-276c-c9cb-b366-c6fa655957c5::",
+		},
+		{
+			KubernetesGPUIDType: appconfig.GPUUID,
+			ResourceName:        "nvidia.com/mig-1g.10gb",
+			MetricMigProfile:    "1g.10gb",
+			MetricGPUID:         "MIG-b8ea3855-276c-c9cb-b366-c6fa655957c5",
+			PODGPUID:            "MIG-b8ea3855-276c-c9cb-b366-c6fa655957c5",
+			MetricGPUDevice:     "0",
+			GPUInstanceID:       3,
+		},
+		{
+			KubernetesGPUIDType: appconfig.GPUUID,
+			ResourceName:        "nvidia.com/a100",
+			MetricGPUID:         "b8ea3855-276c-c9cb-b366-c6fa655957c5",
+			PODGPUID:            "b8ea3855-276c-c9cb-b366-c6fa655957c5",
+			NvidiaResourceNames: []string{"nvidia.com/a100"},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(fmt.Sprintf("when type %s, pod device id %s metric device id %s and gpu device %s",
+			tc.KubernetesGPUIDType,
+			tc.PODGPUID,
+			tc.MetricGPUID,
+			tc.MetricGPUDevice,
+		),
+			func(t *testing.T) {
+				tmpDir, cleanup := testutils.CreateTmpDir(t)
+				defer cleanup()
+				socketPath := tmpDir + "/kubelet.sock"
+				server := grpc.NewServer()
+
+				config := &appconfig.Config{
+					UseRemoteHE: false,
+				}
+
+				dcgmprovider.Initialize(config)
+				defer dcgmprovider.Client().Cleanup()
+
+				gpus := []string{tc.PODGPUID}
+				podresourcesapi.RegisterPodResourcesListerServer(server,
+					testutils.NewMockPodResourcesServer(tc.ResourceName, gpus))
+
+				cleanup = testutils.StartMockServer(t, server, socketPath)
+				defer cleanup()
+
+				migDeviceInfo := &nvmlprovider.MIGDeviceInfo{
+					ParentUUID:        "00000000-0000-0000-0000-000000000000",
+					GPUInstanceID:     3,
+					ComputeInstanceID: 0,
+				}
+
+				ctrl := gomock.NewController(t)
+				mockNVMLProvider := mocknvmlprovider.NewMockNVML(ctrl)
+				mockNVMLProvider.EXPECT().GetMIGDeviceInfoByID(gomock.Any()).Return(migDeviceInfo, nil).AnyTimes()
+				nvmlprovider.SetClient(mockNVMLProvider)
+
+				podMapper := NewPodMapper(&appconfig.Config{
+					KubernetesGPUIdType:       tc.KubernetesGPUIDType,
+					PodResourcesKubeletSocket: socketPath,
+					NvidiaResourceNames:       tc.NvidiaResourceNames,
+				})
+				require.NotNil(t, podMapper)
+				metrics := collector.MetricsByCounter{}
+				counter := counters.Counter{
+					FieldID:   155,
+					FieldName: "DCGM_FI_DEV_POWER_USAGE",
+					PromType:  "gauge",
+				}
+
+				metrics[counter] = append(metrics[counter], collector.Metric{
+					GPU:           "0",
+					GPUUUID:       tc.MetricGPUID,
+					GPUDevice:     tc.MetricGPUDevice,
+					GPUInstanceID: fmt.Sprint(tc.GPUInstanceID),
+					Value:         "42",
+					MigProfile:    tc.MetricMigProfile,
+					Counter: counters.Counter{
+						FieldID:   155,
+						FieldName: "DCGM_FI_DEV_POWER_USAGE",
+						PromType:  "gauge",
+					},
+					Attributes: map[string]string{},
+				})
+
+				mockGPU := deviceinfo.GPUInfo{
+					DeviceInfo: dcgm.Device{
+						UUID: "00000000-0000-0000-0000-000000000000",
+						GPU:  0,
+					},
+					MigEnabled: true,
+				}
+
+				mockSystemInfo := mockdeviceinfo.NewMockProvider(ctrl)
+				mockSystemInfo.EXPECT().GPUCount().Return(uint(1)).AnyTimes()
+				mockSystemInfo.EXPECT().GPU(uint(0)).Return(mockGPU).AnyTimes()
+
+				err := podMapper.Process(metrics, mockSystemInfo)
+				require.NoError(t, err)
+				assert.Len(t, metrics, 1)
+				for _, metric := range metrics[reflect.ValueOf(metrics).MapKeys()[0].Interface().(counters.Counter)] {
+					require.Contains(t, metric.Attributes, podAttribute)
+					require.Contains(t, metric.Attributes, namespaceAttribute)
+					require.Contains(t, metric.Attributes, containerAttribute)
+
+					// TODO currently we rely on ordering and implicit expectations of the mock implementation
+					// This should be a table comparison
+					require.Equal(t, fmt.Sprintf("gpu-pod-%d", 0), metric.Attributes[podAttribute])
+					require.Equal(t, "default", metric.Attributes[namespaceAttribute])
+					require.Equal(t, "default", metric.Attributes[containerAttribute])
+				}
+			})
+	}
+}
diff --git a/internal/pkg/transformation/transformer.go b/internal/pkg/transformation/transformer.go
new file mode 100644
index 00000000..86f56c84
--- /dev/null
+++ b/internal/pkg/transformation/transformer.go
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package transformation
+
+import (
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+)
+
+// GetTransformations return list of transformation applicable for metrics
+func GetTransformations(c *appconfig.Config) []Transform {
+	var transformations []Transform
+	if c.Kubernetes {
+		podMapper := NewPodMapper(c)
+		transformations = append(transformations, podMapper)
+	}
+
+	if c.HPCJobMappingDir != "" {
+		hpcMapper := newHPCMapper(c)
+		transformations = append(transformations, hpcMapper)
+	}
+
+	return transformations
+}
diff --git a/internal/pkg/transformation/transformer_test.go b/internal/pkg/transformation/transformer_test.go
new file mode 100644
index 00000000..f2ab6652
--- /dev/null
+++ b/internal/pkg/transformation/transformer_test.go
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package transformation
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+)
+
+func TestGetTransformations(t *testing.T) {
+	tests := []struct {
+		name   string
+		config *appconfig.Config
+		assert func(*testing.T, []Transform)
+	}{
+		{
+			name: "The environment is not kubernetes",
+			config: &appconfig.Config{
+				Kubernetes: false,
+			},
+			assert: func(t *testing.T, transforms []Transform) {
+				assert.Len(t, transforms, 0)
+			},
+		},
+		{
+			name: "The environment is kubernetes",
+			config: &appconfig.Config{
+				Kubernetes: true,
+			},
+			assert: func(t *testing.T, transforms []Transform) {
+				assert.Len(t, transforms, 1)
+			},
+		},
+		{
+			name: "The environment is HPC cluster",
+			config: &appconfig.Config{
+				HPCJobMappingDir: "/var/run/nvidia/slurm",
+			},
+			assert: func(t *testing.T, transforms []Transform) {
+				assert.Len(t, transforms, 1)
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			transformations := GetTransformations(tt.config)
+			tt.assert(t, transformations)
+		})
+	}
+}
diff --git a/internal/pkg/transformation/types.go b/internal/pkg/transformation/types.go
new file mode 100644
index 00000000..2bc896fc
--- /dev/null
+++ b/internal/pkg/transformation/types.go
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package transformation
+
+import (
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/collector"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/deviceinfo"
+)
+
+//go:generate go run -v go.uber.org/mock/mockgen  -destination=../../mocks/pkg/transformations/mock_transformer.go -package=transformation -copyright_file=../../../hack/header.txt . Transform
+
+type Transform interface {
+	Process(metrics collector.MetricsByCounter, deviceInfo deviceinfo.Provider) error
+	Name() string
+}
+
+type PodMapper struct {
+	Config *appconfig.Config
+}
+
+type PodInfo struct {
+	Name      string
+	Namespace string
+	Container string
+}
diff --git a/internal/pkg/transformation/variables.go b/internal/pkg/transformation/variables.go
new file mode 100644
index 00000000..93e7da6c
--- /dev/null
+++ b/internal/pkg/transformation/variables.go
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package transformation
+
+import osinterface "github.com/NVIDIA/dcgm-exporter/internal/pkg/os"
+
+var os osinterface.OS = osinterface.RealOS{}
+
+var doNothing = func() {
+	// This function is intentionally left blank
+}
diff --git a/pkg/dcgmexporter/utils.go b/internal/pkg/utils/utils.go
similarity index 73%
rename from pkg/dcgmexporter/utils.go
rename to internal/pkg/utils/utils.go
index 6f5391bc..b194a4ae 100644
--- a/pkg/dcgmexporter/utils.go
+++ b/internal/pkg/utils/utils.go
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,10 +14,12 @@
  * limitations under the License.
  */
 
-package dcgmexporter
+package utils
 
 import (
 	"bytes"
+	"crypto/rand"
+	"encoding/binary"
 	"encoding/gob"
 	"fmt"
 	"sync"
@@ -38,7 +40,17 @@ func WaitWithTimeout(wg *sync.WaitGroup, timeout time.Duration) error {
 	}
 }
 
-func deepCopy[T any](src T) (dst T, err error) {
+func RandUint64() (uint64, error) {
+	var num uint64
+	err := binary.Read(rand.Reader, binary.BigEndian, &num)
+	if err != nil {
+		return 0, fmt.Errorf("failed to generate random 64-bit number; err: %w", err)
+	}
+
+	return num, nil
+}
+
+func DeepCopy[T any](src T) (dst T, err error) {
 	var buf bytes.Buffer
 
 	defer func() {
@@ -63,3 +75,11 @@ func deepCopy[T any](src T) (dst T, err error) {
 
 	return dst, nil
 }
+
+func CleanupOnError(cleanups []func()) []func() {
+	for _, cleanup := range cleanups {
+		cleanup()
+	}
+
+	return nil
+}
diff --git a/pkg/dcgmexporter/utils_test.go b/internal/pkg/utils/utils_test.go
similarity index 50%
rename from pkg/dcgmexporter/utils_test.go
rename to internal/pkg/utils/utils_test.go
index c36e1e9f..3c9c488b 100644
--- a/pkg/dcgmexporter/utils_test.go
+++ b/internal/pkg/utils/utils_test.go
@@ -14,15 +14,19 @@
  * limitations under the License.
  */
 
-package dcgmexporter
+package utils
 
 import (
+	"crypto/rand"
+	"fmt"
 	"sync"
 	"testing"
 	"time"
 
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
+
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/testutils"
 )
 
 func TestWaitWithTimeout(t *testing.T) {
@@ -46,17 +50,82 @@ func TestWaitWithTimeout(t *testing.T) {
 	})
 }
 
+func TestRandUint64_Success(t *testing.T) {
+	num, err := RandUint64()
+	assert.Nil(t, err, "Unexpected error: %v", err)
+	assert.NotZero(t, num, "Expected a non-zero uint64, but got 0")
+}
+
+func TestRandUint64_Failure(t *testing.T) {
+	// Simulate a failure in rand.Reader using mock rand.Reader
+	mockReader := &testutils.MockReader{Err: fmt.Errorf("mock error")}
+
+	originalReader := rand.Reader
+	rand.Reader = mockReader
+	defer func() {
+		rand.Reader = originalReader
+	}()
+
+	num, err := RandUint64()
+	assert.NotNil(t, err, "Expected an error")
+	assert.Zero(t, num, fmt.Sprintf("Expected a uint64, but got %d", num))
+}
+
 func TestDeepCopy(t *testing.T) {
 	t.Run("Return error when pointer value is nil", func(t *testing.T) {
-		got, err := deepCopy[*struct{}](nil)
+		got, err := DeepCopy[*struct{}](nil)
 		assert.Nil(t, got)
 		assert.Error(t, err)
 	})
 
 	t.Run("Return error when src is unsupported type", func(t *testing.T) {
 		ch := make(chan int)
-		got, err := deepCopy(ch)
+		got, err := DeepCopy(ch)
 		assert.Nil(t, got)
 		assert.Error(t, err)
 	})
 }
+
+func TestCleanupOnError(t *testing.T) {
+	tests := []struct {
+		name     string
+		cleanups []func()
+		want     []func()
+	}{
+		{
+			name:     "Nil cleanup functions",
+			cleanups: nil,
+			want:     nil,
+		},
+		{
+			name:     "Empty cleanup functions",
+			cleanups: []func(){},
+			want:     nil,
+		},
+		{
+			name: "One cleanup functions",
+			cleanups: []func(){
+				func() {},
+			},
+			want: nil,
+		},
+		{
+			name: "Multiple cleanup functions",
+			cleanups: []func(){
+				func() {},
+				func() {
+					func() {
+						// This function is intentionally left blank
+					}()
+				},
+				func() {},
+			},
+			want: nil,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.Equalf(t, tt.want, CleanupOnError(tt.cleanups), "expected output to be the same.")
+		})
+	}
+}
diff --git a/packaging/config-files/systemd/nvidia-dcgm-exporter.service b/packaging/config-files/systemd/nvidia-dcgm-exporter.service
new file mode 100644
index 00000000..aae6f167
--- /dev/null
+++ b/packaging/config-files/systemd/nvidia-dcgm-exporter.service
@@ -0,0 +1,33 @@
+#
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+[Unit]
+Description=NVIDIA DCGM-exporter service
+Wants=nvidia-dcgm.service
+After=nvidia-dcgm.service
+
+[Service]
+User=root
+PrivateTmp=false
+
+StandardOutput=append:/var/dcgm-exporter.log
+StandardError=append:/var/dcgm-exporter.log
+
+ExecStart=/usr/bin/dcgm-exporter -f /etc/dcgm-exporter/default-counters.csv
+
+Restart=on-abort
+
+[Install]
+WantedBy=multi-user.target
diff --git a/pkg/cmd/app.go b/pkg/cmd/app.go
index c78f8434..7da26ca3 100644
--- a/pkg/cmd/app.go
+++ b/pkg/cmd/app.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"context"
 	"fmt"
+	"log/slog"
 	"os"
 	"os/signal"
 	"runtime"
@@ -17,11 +18,22 @@ import (
 	"time"
 
 	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
-	"github.com/sirupsen/logrus"
 	"github.com/urfave/cli/v2"
 
-	"github.com/NVIDIA/dcgm-exporter/pkg/dcgmexporter"
-	"github.com/NVIDIA/dcgm-exporter/pkg/stdout"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/collector"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/dcgmprovider"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatcher"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatchlistmanager"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/hostname"
+	. "github.com/NVIDIA/dcgm-exporter/internal/pkg/logging"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/nvmlprovider"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/prerequisites"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/registry"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/server"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/stdout"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/utils"
 )
 
 const (
@@ -150,9 +162,9 @@ func NewApp(buildVersion ...string) *cli.App {
 		},
 		&cli.StringFlag{
 			Name:  CLIKubernetesGPUIDType,
-			Value: string(dcgmexporter.GPUUID),
+			Value: string(appconfig.GPUUID),
 			Usage: fmt.Sprintf("Choose Type of GPU ID to use to map kubernetes resources to pods. Possible values: '%s', '%s'",
-				dcgmexporter.GPUUID, dcgmexporter.DeviceName),
+				appconfig.GPUUID, appconfig.DeviceName),
 			EnvVars: []string{"DCGM_EXPORTER_KUBERNETES_GPU_ID_TYPE"},
 		},
 		&cli.StringFlag{
@@ -185,7 +197,7 @@ func NewApp(buildVersion ...string) *cli.App {
 		&cli.StringFlag{
 			Name:    CLIWebConfigFile,
 			Value:   "",
-			Usage:   "TLS config file following webConfig spec.",
+			Usage:   "Web configuration file following webConfig spec: https://github.com/prometheus/exporter-toolkit/blob/master/docs/web-configuration.md.",
 			EnvVars: []string{"DCGM_EXPORTER_WEB_CONFIG_FILE"},
 		},
 		&cli.IntFlag{
@@ -222,7 +234,7 @@ func NewApp(buildVersion ...string) *cli.App {
 		},
 		&cli.StringFlag{
 			Name:    CLIDCGMLogLevel,
-			Value:   dcgmexporter.DCGMDbgLvlNone,
+			Value:   DCGMDbgLvlNone,
 			Usage:   "Specify the DCGM log verbosity level. This parameter is effective only when the '--enable-dcgm-log' option is set to 'true'. Possible values: NONE, FATAL, ERROR, WARN, INFO, DEBUG and VERB",
 			EnvVars: []string{"DCGM_EXPORTER_DCGM_LOG_LEVEL"},
 		},
@@ -255,7 +267,8 @@ func NewApp(buildVersion ...string) *cli.App {
 		})
 	} else {
 		err := "dcgm-exporter is only supported on Linux."
-		logrus.Fatal(err)
+		slog.Error(err)
+		fatal()
 		return nil
 	}
 
@@ -266,6 +279,10 @@ func NewApp(buildVersion ...string) *cli.App {
 	return c
 }
 
+func fatal() {
+	os.Exit(1)
+}
+
 func newOSWatcher(sigs ...os.Signal) chan os.Signal {
 	sigChan := make(chan os.Signal, 1)
 	signal.Notify(sigChan, sigs...)
@@ -280,7 +297,7 @@ func action(c *cli.Context) (err error) {
 		// during initialization and return an error.
 		defer func() {
 			if r := recover(); r != nil {
-				logrus.WithField(dcgmexporter.LoggerStackTrace, string(debug.Stack())).Error("Encountered a failure.")
+				slog.Error("Encountered a failure.", slog.String(StackTrace, string(debug.Stack())))
 				err = fmt.Errorf("encountered a failure; err: %v", r)
 			}
 		}()
@@ -291,7 +308,12 @@ func action(c *cli.Context) (err error) {
 func startDCGMExporter(c *cli.Context, cancel context.CancelFunc) error {
 restart:
 
-	logrus.Info("Starting dcgm-exporter")
+	var version string
+	if c != nil && c.App != nil {
+		version = c.App.Version
+	}
+
+	slog.Info("Starting dcgm-exporter", slog.String("Version", version))
 
 	config, err := contextToConfig(c)
 	if err != nil {
@@ -300,41 +322,40 @@ restart:
 
 	enableDebugLogging(config)
 
-	cleanupDCGM := initDCGM(config)
-	defer cleanupDCGM()
+	err = prerequisites.Validate()
+	if err != nil {
+		return err
+	}
+
+	// Initialize DCGM Provider Instance
+	dcgmprovider.Initialize(config)
+	defer dcgmprovider.Client().Cleanup()
+
+	slog.Info("DCGM successfully initialized!")
 
-	logrus.Info("DCGM successfully initialized!")
+	// Initialize NVML Provider Instance
+	nvmlprovider.Initialize()
+	defer nvmlprovider.Client().Cleanup()
 
-	dcgm.FieldsInit()
-	defer dcgm.FieldsTerm()
+	slog.Info("NVML provider successfully initialized!")
 
 	fillConfigMetricGroups(config)
 
 	cs := getCounters(config)
 
-	fieldEntityGroupTypeSystemInfo := getFieldEntityGroupTypeSystemInfo(cs, config)
+	deviceWatchListManager := startDeviceWatchListManager(cs, config)
 
-	hostname, err := dcgmexporter.GetHostname(config)
+	hostname, err := hostname.GetHostname(config)
 	if err != nil {
 		return err
 	}
 
-	pipeline, cleanup, err := dcgmexporter.NewMetricsPipeline(config,
-		cs.DCGMCounters,
-		hostname,
-		dcgmexporter.NewDCGMCollector,
-		fieldEntityGroupTypeSystemInfo,
-	)
-	defer cleanup()
-	if err != nil {
-		logrus.Fatal(err)
-	}
+	cf := collector.InitCollectorFactory(cs, deviceWatchListManager, hostname, config)
 
-	cRegistry := dcgmexporter.NewRegistry()
-
-	enableDCGMExpXIDErrorsCountCollector(cs, fieldEntityGroupTypeSystemInfo, hostname, config, cRegistry)
-
-	enableDCGMExpClockEventsCount(cs, fieldEntityGroupTypeSystemInfo, hostname, config, cRegistry)
+	cRegistry := registry.NewRegistry()
+	for _, entityCollector := range cf.NewCollectors() {
+		cRegistry.Register(entityCollector)
+	}
 
 	defer func() {
 		cRegistry.Cleanup()
@@ -346,11 +367,8 @@ restart:
 	stop := make(chan interface{})
 
 	wg.Add(1)
-	go pipeline.Run(ch, stop, &wg)
 
-	wg.Add(1)
-
-	server, cleanup, err := dcgmexporter.NewMetricsServer(config, ch, cRegistry)
+	server, cleanup, err := server.NewMetricsServer(config, ch, deviceWatchListManager, cRegistry)
 	defer cleanup()
 	if err != nil {
 		return err
@@ -362,9 +380,10 @@ restart:
 	sig := <-sigs
 	close(stop)
 	cancel()
-	err = dcgmexporter.WaitWithTimeout(&wg, time.Second*2)
+	err = utils.WaitWithTimeout(&wg, time.Second*2)
 	if err != nil {
-		logrus.Fatal(err)
+		slog.Error(err.Error())
+		fatal()
 	}
 
 	if sig == syscall.SIGHUP {
@@ -374,69 +393,40 @@ restart:
 	return nil
 }
 
-func enableDCGMExpClockEventsCount(cs *dcgmexporter.CounterSet, fieldEntityGroupTypeSystemInfo *dcgmexporter.FieldEntityGroupTypeSystemInfo, hostname string, config *dcgmexporter.Config, cRegistry *dcgmexporter.Registry) {
-	if dcgmexporter.IsDCGMExpClockEventsCountEnabled(cs.ExporterCounters) {
-		item, exists := fieldEntityGroupTypeSystemInfo.Get(dcgm.FE_GPU)
-		if !exists {
-			logrus.Fatalf("%s collector cannot be initialized", dcgmexporter.DCGMClockEventsCount.String())
-		}
-		clocksThrottleReasonsCollector, err := dcgmexporter.NewClockEventsCollector(
-			cs.ExporterCounters, hostname, config, item)
-		if err != nil {
-			logrus.Fatal(err)
-		}
-
-		cRegistry.Register(clocksThrottleReasonsCollector)
-
-		logrus.Infof("%s collector initialized", dcgmexporter.DCGMClockEventsCount.String())
-	}
-}
-
-func enableDCGMExpXIDErrorsCountCollector(cs *dcgmexporter.CounterSet, fieldEntityGroupTypeSystemInfo *dcgmexporter.FieldEntityGroupTypeSystemInfo, hostname string, config *dcgmexporter.Config, cRegistry *dcgmexporter.Registry) {
-	if dcgmexporter.IsDCGMExpXIDErrorsCountEnabled(cs.ExporterCounters) {
-		item, exists := fieldEntityGroupTypeSystemInfo.Get(dcgm.FE_GPU)
-		if !exists {
-			logrus.Fatalf("%s collector cannot be initialized", dcgmexporter.DCGMXIDErrorsCount.String())
-		}
-
-		xidCollector, err := dcgmexporter.NewXIDCollector(cs.ExporterCounters, hostname, config, item)
-		if err != nil {
-			logrus.Fatal(err)
-		}
-
-		cRegistry.Register(xidCollector)
-
-		logrus.Infof("%s collector initialized", dcgmexporter.DCGMXIDErrorsCount.String())
-	}
-}
-
-func getFieldEntityGroupTypeSystemInfo(cs *dcgmexporter.CounterSet, config *dcgmexporter.Config) *dcgmexporter.FieldEntityGroupTypeSystemInfo {
-	var allCounters []dcgmexporter.Counter
+func startDeviceWatchListManager(
+	cs *counters.CounterSet, config *appconfig.Config,
+) devicewatchlistmanager.Manager {
+	// Create a list containing DCGM Collector, Exp Collectors and all the label Collectors
+	var allCounters counters.CounterList
+	var deviceWatchListManager devicewatchlistmanager.Manager
 
 	allCounters = append(allCounters, cs.DCGMCounters...)
 
 	allCounters = appendDCGMXIDErrorsCountDependency(allCounters, cs)
 	allCounters = appendDCGMClockEventsCountDependency(cs, allCounters)
 
-	fieldEntityGroupTypeSystemInfo := dcgmexporter.NewEntityGroupTypeSystemInfo(allCounters, config)
+	deviceWatchListManager = devicewatchlistmanager.NewWatchListManager(allCounters, config)
+	deviceWatcher := devicewatcher.NewDeviceWatcher()
 
-	for _, egt := range dcgmexporter.FieldEntityGroupTypeToMonitor {
-		err := fieldEntityGroupTypeSystemInfo.Load(egt)
+	for _, deviceType := range devicewatchlistmanager.DeviceTypesToWatch {
+		err := deviceWatchListManager.CreateEntityWatchList(deviceType, deviceWatcher, int64(config.CollectInterval))
 		if err != nil {
-			logrus.Infof("Not collecting %s metrics; %s", egt.String(), err)
+			slog.Info(fmt.Sprintf("Not collecting %s metrics; %s", deviceType.String(), err))
 		}
 	}
-	return fieldEntityGroupTypeSystemInfo
+	return deviceWatchListManager
 }
 
 // appendDCGMXIDErrorsCountDependency appends DCGM counters required for the DCGM_EXP_CLOCK_EVENTS_COUNT metric
-func appendDCGMClockEventsCountDependency(cs *dcgmexporter.CounterSet, allCounters []dcgmexporter.Counter) []dcgmexporter.Counter {
+func appendDCGMClockEventsCountDependency(
+	cs *counters.CounterSet, allCounters []counters.Counter,
+) []counters.Counter {
 	if len(cs.ExporterCounters) > 0 {
-		if containsField(cs.ExporterCounters, dcgmexporter.DCGMClockEventsCount) &&
-			!containsField(allCounters, dcgm.DCGM_FI_DEV_CLOCK_THROTTLE_REASONS) {
+		if containsField(cs.ExporterCounters, counters.DCGMClockEventsCount) &&
+			!containsField(allCounters, dcgm.DCGM_FI_DEV_CLOCKS_EVENT_REASONS) {
 			allCounters = append(allCounters,
-				dcgmexporter.Counter{
-					FieldID: dcgm.DCGM_FI_DEV_CLOCK_THROTTLE_REASONS,
+				counters.Counter{
+					FieldID: dcgm.DCGM_FI_DEV_CLOCKS_EVENT_REASONS,
 				})
 		}
 	}
@@ -444,12 +434,14 @@ func appendDCGMClockEventsCountDependency(cs *dcgmexporter.CounterSet, allCounte
 }
 
 // appendDCGMXIDErrorsCountDependency appends DCGM counters required for the DCGM_EXP_XID_ERRORS_COUNT metric
-func appendDCGMXIDErrorsCountDependency(allCounters []dcgmexporter.Counter, cs *dcgmexporter.CounterSet) []dcgmexporter.Counter {
+func appendDCGMXIDErrorsCountDependency(
+	allCounters []counters.Counter, cs *counters.CounterSet,
+) []counters.Counter {
 	if len(cs.ExporterCounters) > 0 {
-		if containsField(cs.ExporterCounters, dcgmexporter.DCGMXIDErrorsCount) &&
+		if containsField(cs.ExporterCounters, counters.DCGMXIDErrorsCount) &&
 			!containsField(allCounters, dcgm.DCGM_FI_DEV_XID_ERRORS) {
 			allCounters = append(allCounters,
-				dcgmexporter.Counter{
+				counters.Counter{
 					FieldID: dcgm.DCGM_FI_DEV_XID_ERRORS,
 				})
 		}
@@ -457,16 +449,17 @@ func appendDCGMXIDErrorsCountDependency(allCounters []dcgmexporter.Counter, cs *
 	return allCounters
 }
 
-func containsField(slice []dcgmexporter.Counter, fieldID dcgmexporter.ExporterCounter) bool {
-	return slices.ContainsFunc(slice, func(counter dcgmexporter.Counter) bool {
+func containsField(slice []counters.Counter, fieldID counters.ExporterCounter) bool {
+	return slices.ContainsFunc(slice, func(counter counters.Counter) bool {
 		return counter.FieldID == dcgm.Short(fieldID)
 	})
 }
 
-func getCounters(config *dcgmexporter.Config) *dcgmexporter.CounterSet {
-	cs, err := dcgmexporter.GetCounterSet(config)
+func getCounters(config *appconfig.Config) *counters.CounterSet {
+	cs, err := counters.GetCounterSet(config)
 	if err != nil {
-		logrus.Fatal(err)
+		slog.Error(err.Error())
+		os.Exit(1)
 	}
 
 	// Copy labels from DCGM Counters to ExporterCounters
@@ -478,58 +471,32 @@ func getCounters(config *dcgmexporter.Config) *dcgmexporter.CounterSet {
 	return cs
 }
 
-func fillConfigMetricGroups(config *dcgmexporter.Config) {
+func fillConfigMetricGroups(config *appconfig.Config) {
 	var groups []dcgm.MetricGroup
-	groups, err := dcgm.GetSupportedMetricGroups(0)
+	groups, err := dcgmprovider.Client().GetSupportedMetricGroups(0)
 	if err != nil {
 		config.CollectDCP = false
-		logrus.Info("Not collecting DCP metrics: ", err)
+		slog.Info("Not collecting DCP metrics: " + err.Error())
 	} else {
-		logrus.Info("Collecting DCP Metrics")
+		slog.Info("Collecting DCP Metrics")
 		config.MetricGroups = groups
 	}
 }
 
-func enableDebugLogging(config *dcgmexporter.Config) {
+func enableDebugLogging(config *appconfig.Config) {
 	if config.Debug {
 		// enable debug logging
-		logrus.SetLevel(logrus.DebugLevel)
-		logrus.Debug("Debug output is enabled")
+		slog.SetLogLoggerLevel(slog.LevelDebug)
+		slog.Debug("Debug output is enabled")
 	}
 
-	logrus.Debugf("Command line: %s", strings.Join(os.Args, " "))
+	slog.Debug(fmt.Sprintf("Command line: %s", strings.Join(os.Args, " ")))
 
-	logrus.WithField(dcgmexporter.LoggerDumpKey, fmt.Sprintf("%+v", config)).Debug("Loaded configuration")
-}
-
-func initDCGM(config *dcgmexporter.Config) func() {
-	if config.UseRemoteHE {
-		logrus.Info("Attemping to connect to remote hostengine at ", config.RemoteHEInfo)
-		cleanup, err := dcgm.Init(dcgm.Standalone, config.RemoteHEInfo, "0")
-		if err != nil {
-			cleanup()
-			logrus.Fatal(err)
-		}
-		return cleanup
-	} else {
-
-		if config.EnableDCGMLog {
-			os.Setenv("__DCGM_DBG_FILE", "-")
-			os.Setenv("__DCGM_DBG_LVL", config.DCGMLogLevel)
-		}
-
-		cleanup, err := dcgm.Init(dcgm.Embedded)
-		if err != nil {
-			cleanup()
-			logrus.Fatal(err)
-		}
-
-		return cleanup
-	}
+	slog.Debug("Loaded configuration", slog.String(DumpKey, fmt.Sprintf("%+v", config)))
 }
 
-func parseDeviceOptions(devices string) (dcgmexporter.DeviceOptions, error) {
-	var dOpt dcgmexporter.DeviceOptions
+func parseDeviceOptions(devices string) (appconfig.DeviceOptions, error) {
+	var dOpt appconfig.DeviceOptions
 
 	letterAndRange := strings.Split(devices, ":")
 	count := len(letterAndRange)
@@ -591,7 +558,7 @@ func parseDeviceOptions(devices string) (dcgmexporter.DeviceOptions, error) {
 	return dOpt, nil
 }
 
-func contextToConfig(c *cli.Context) (*dcgmexporter.Config, error) {
+func contextToConfig(c *cli.Context) (*appconfig.Config, error) {
 	gOpt, err := parseDeviceOptions(c.String(CLIGPUDevices))
 	if err != nil {
 		return nil, err
@@ -608,23 +575,23 @@ func contextToConfig(c *cli.Context) (*dcgmexporter.Config, error) {
 	}
 
 	dcgmLogLevel := c.String(CLIDCGMLogLevel)
-	if !slices.Contains(dcgmexporter.DCGMDbgLvlValues, dcgmLogLevel) {
+	if !slices.Contains(DCGMDbgLvlValues, dcgmLogLevel) {
 		return nil, fmt.Errorf("invalid %s parameter value: %s", CLIDCGMLogLevel, dcgmLogLevel)
 	}
 
-	return &dcgmexporter.Config{
+	return &appconfig.Config{
 		CollectorsFile:             c.String(CLIFieldsFile),
 		Address:                    c.String(CLIAddress),
 		CollectInterval:            c.Int(CLICollectInterval),
 		Kubernetes:                 c.Bool(CLIKubernetes),
-		KubernetesGPUIdType:        dcgmexporter.KubernetesGPUIDType(c.String(CLIKubernetesGPUIDType)),
+		KubernetesGPUIdType:        appconfig.KubernetesGPUIDType(c.String(CLIKubernetesGPUIDType)),
 		CollectDCP:                 true,
 		UseOldNamespace:            c.Bool(CLIUseOldNamespace),
 		UseRemoteHE:                c.IsSet(CLIRemoteHEInfo),
 		RemoteHEInfo:               c.String(CLIRemoteHEInfo),
-		GPUDevices:                 gOpt,
-		SwitchDevices:              sOpt,
-		CPUDevices:                 cOpt,
+		GPUDeviceOptions:           gOpt,
+		SwitchDeviceOptions:        sOpt,
+		CPUDeviceOptions:           cOpt,
 		NoHostname:                 c.Bool(CLINoHostname),
 		UseFakeGPUs:                c.Bool(CLIUseFakeGPUs),
 		ConfigMapData:              c.String(CLIConfigMapData),
diff --git a/pkg/cmd/app_test.go b/pkg/cmd/app_test.go
index 9035c6bd..0bcd238f 100644
--- a/pkg/cmd/app_test.go
+++ b/pkg/cmd/app_test.go
@@ -23,27 +23,30 @@ import (
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/appconfig"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/counters"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/dcgmprovider"
+	"github.com/NVIDIA/dcgm-exporter/internal/pkg/devicewatchlistmanager"
 	"github.com/NVIDIA/dcgm-exporter/internal/pkg/testutils"
-	"github.com/NVIDIA/dcgm-exporter/pkg/dcgmexporter"
 )
 
-func Test_getFieldEntityGroupTypeSystemInfo(t *testing.T) {
-	config := &dcgmexporter.Config{
-		GPUDevices:    dcgmexporter.DeviceOptions{},
-		SwitchDevices: dcgmexporter.DeviceOptions{},
-		CPUDevices:    dcgmexporter.DeviceOptions{},
-		UseFakeGPUs:   true,
+func Test_getDeviceWatchListManager(t *testing.T) {
+	config := &appconfig.Config{
+		GPUDeviceOptions:    appconfig.DeviceOptions{},
+		SwitchDeviceOptions: appconfig.DeviceOptions{},
+		CPUDeviceOptions:    appconfig.DeviceOptions{},
+		UseFakeGPUs:         true,
 	}
 
 	tests := []struct {
 		name       string
-		counterSet *dcgmexporter.CounterSet
-		assertion  func(*testing.T, *dcgmexporter.FieldEntityGroupTypeSystemInfo)
+		counterSet *counters.CounterSet
+		assertion  func(*testing.T, devicewatchlistmanager.Manager)
 	}{
 		{
 			name: "When DCGM_FI_DEV_XID_ERRORS and DCGM_EXP_XID_ERRORS_COUNT enabled",
-			counterSet: &dcgmexporter.CounterSet{
-				DCGMCounters: []dcgmexporter.Counter{
+			counterSet: &counters.CounterSet{
+				DCGMCounters: []counters.Counter{
 					{
 						FieldID:   230,
 						FieldName: "DCGM_FI_DEV_XID_ERRORS",
@@ -51,7 +54,7 @@ func Test_getFieldEntityGroupTypeSystemInfo(t *testing.T) {
 						Help:      "Value of the last XID error encountered.",
 					},
 				},
-				ExporterCounters: []dcgmexporter.Counter{
+				ExporterCounters: []counters.Counter{
 					{
 						FieldID:   9001,
 						FieldName: "DCGM_EXP_XID_ERRORS_COUNT",
@@ -60,17 +63,17 @@ func Test_getFieldEntityGroupTypeSystemInfo(t *testing.T) {
 					},
 				},
 			},
-			assertion: func(t *testing.T, got *dcgmexporter.FieldEntityGroupTypeSystemInfo) {
+			assertion: func(t *testing.T, got devicewatchlistmanager.Manager) {
 				require.NotNil(t, got)
-				values := testutils.GetStructPrivateFieldValue[[]dcgmexporter.Counter](t, got, "counters")
+				values := testutils.GetStructPrivateFieldValue[[]counters.Counter](t, got, "counters")
 				require.Len(t, values, 1)
 				assert.Equal(t, dcgm.Short(230), values[0].FieldID)
 			},
 		},
 		{
 			name: "When DCGM_FI_DEV_XID_ERRORS enabled",
-			counterSet: &dcgmexporter.CounterSet{
-				DCGMCounters: []dcgmexporter.Counter{
+			counterSet: &counters.CounterSet{
+				DCGMCounters: []counters.Counter{
 					{
 						FieldID:   230,
 						FieldName: "DCGM_FI_DEV_XID_ERRORS",
@@ -79,17 +82,17 @@ func Test_getFieldEntityGroupTypeSystemInfo(t *testing.T) {
 					},
 				},
 			},
-			assertion: func(t *testing.T, got *dcgmexporter.FieldEntityGroupTypeSystemInfo) {
+			assertion: func(t *testing.T, got devicewatchlistmanager.Manager) {
 				require.NotNil(t, got)
-				values := testutils.GetStructPrivateFieldValue[[]dcgmexporter.Counter](t, got, "counters")
+				values := testutils.GetStructPrivateFieldValue[[]counters.Counter](t, got, "counters")
 				require.Len(t, values, 1)
 				assert.Equal(t, dcgm.Short(230), values[0].FieldID)
 			},
 		},
 		{
 			name: "When DCGM_EXP_XID_ERRORS_COUNT enabled",
-			counterSet: &dcgmexporter.CounterSet{
-				ExporterCounters: []dcgmexporter.Counter{
+			counterSet: &counters.CounterSet{
+				ExporterCounters: []counters.Counter{
 					{
 						FieldID:   9001,
 						FieldName: "DCGM_EXP_XID_ERRORS_COUNT",
@@ -98,33 +101,33 @@ func Test_getFieldEntityGroupTypeSystemInfo(t *testing.T) {
 					},
 				},
 			},
-			assertion: func(t *testing.T, got *dcgmexporter.FieldEntityGroupTypeSystemInfo) {
+			assertion: func(t *testing.T, got devicewatchlistmanager.Manager) {
 				require.NotNil(t, got)
-				values := testutils.GetStructPrivateFieldValue[[]dcgmexporter.Counter](t, got, "counters")
+				values := testutils.GetStructPrivateFieldValue[[]counters.Counter](t, got, "counters")
 				require.Len(t, values, 1)
 				assert.Equal(t, dcgm.Short(230), values[0].FieldID)
 			},
 		},
 		{
 			name:       "When no counters",
-			counterSet: &dcgmexporter.CounterSet{},
-			assertion: func(t *testing.T, got *dcgmexporter.FieldEntityGroupTypeSystemInfo) {
+			counterSet: &counters.CounterSet{},
+			assertion: func(t *testing.T, got devicewatchlistmanager.Manager) {
 				require.NotNil(t, got)
-				values := testutils.GetStructPrivateFieldValue[[]dcgmexporter.Counter](t, got, "counters")
+				values := testutils.GetStructPrivateFieldValue[[]counters.Counter](t, got, "counters")
 				require.Len(t, values, 0)
 			},
 		},
 		{
 			name: "When DCGM_FI_DEV_CLOCK_THROTTLE_REASON and DCGM_EXP_CLOCK_EVENTS_COUNT enabled",
-			counterSet: &dcgmexporter.CounterSet{
-				DCGMCounters: []dcgmexporter.Counter{
+			counterSet: &counters.CounterSet{
+				DCGMCounters: []counters.Counter{
 					{
 						FieldID:   112,
 						FieldName: "DCGM_FI_DEV_CLOCK_THROTTLE_REASON",
 						PromType:  "gauge",
 					},
 				},
-				ExporterCounters: []dcgmexporter.Counter{
+				ExporterCounters: []counters.Counter{
 					{
 						FieldID:   9002,
 						FieldName: "DCGM_EXP_CLOCK_EVENTS_COUNT",
@@ -133,18 +136,18 @@ func Test_getFieldEntityGroupTypeSystemInfo(t *testing.T) {
 					},
 				},
 			},
-			assertion: func(t *testing.T, got *dcgmexporter.FieldEntityGroupTypeSystemInfo) {
+			assertion: func(t *testing.T, got devicewatchlistmanager.Manager) {
 				require.NotNil(t, got)
 				require.NotNil(t, got)
-				values := testutils.GetStructPrivateFieldValue[[]dcgmexporter.Counter](t, got, "counters")
+				values := testutils.GetStructPrivateFieldValue[[]counters.Counter](t, got, "counters")
 				require.Len(t, values, 1)
 				assert.Equal(t, dcgm.Short(112), values[0].FieldID)
 			},
 		},
 		{
 			name: "When DCGM_FI_DEV_CLOCK_THROTTLE_REASON enabled",
-			counterSet: &dcgmexporter.CounterSet{
-				DCGMCounters: []dcgmexporter.Counter{
+			counterSet: &counters.CounterSet{
+				DCGMCounters: []counters.Counter{
 					{
 						FieldID:   112,
 						FieldName: "DCGM_FI_DEV_CLOCK_THROTTLE_REASON",
@@ -152,17 +155,17 @@ func Test_getFieldEntityGroupTypeSystemInfo(t *testing.T) {
 					},
 				},
 			},
-			assertion: func(t *testing.T, got *dcgmexporter.FieldEntityGroupTypeSystemInfo) {
+			assertion: func(t *testing.T, got devicewatchlistmanager.Manager) {
 				require.NotNil(t, got)
-				values := testutils.GetStructPrivateFieldValue[[]dcgmexporter.Counter](t, got, "counters")
+				values := testutils.GetStructPrivateFieldValue[[]counters.Counter](t, got, "counters")
 				require.Len(t, values, 1)
 				assert.Equal(t, dcgm.Short(112), values[0].FieldID)
 			},
 		},
 		{
 			name: "When DCGM_EXP_CLOCK_EVENTS_COUNT enabled",
-			counterSet: &dcgmexporter.CounterSet{
-				ExporterCounters: []dcgmexporter.Counter{
+			counterSet: &counters.CounterSet{
+				ExporterCounters: []counters.Counter{
 					{
 						FieldID:   9002,
 						FieldName: "DCGM_EXP_CLOCK_EVENTS_COUNT",
@@ -171,21 +174,21 @@ func Test_getFieldEntityGroupTypeSystemInfo(t *testing.T) {
 					},
 				},
 			},
-			assertion: func(t *testing.T, got *dcgmexporter.FieldEntityGroupTypeSystemInfo) {
+			assertion: func(t *testing.T, got devicewatchlistmanager.Manager) {
 				require.NotNil(t, got)
-				values := testutils.GetStructPrivateFieldValue[[]dcgmexporter.Counter](t, got, "counters")
+				values := testutils.GetStructPrivateFieldValue[[]counters.Counter](t, got, "counters")
 				require.Len(t, values, 1)
 				assert.Equal(t, dcgm.Short(112), values[0].FieldID)
 			},
 		},
 	}
 
-	cleanupDCGM := initDCGM(config)
-	defer cleanupDCGM()
+	dcgmprovider.Initialize(config)
+	defer dcgmprovider.Client().Cleanup()
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			got := getFieldEntityGroupTypeSystemInfo(tt.counterSet, config)
+			got := startDeviceWatchListManager(tt.counterSet, config)
 			if tt.assertion == nil {
 				t.Skip(tt.name)
 			}
diff --git a/pkg/dcgmexporter/const.go b/pkg/cmd/const.go
similarity index 73%
rename from pkg/dcgmexporter/const.go
rename to pkg/cmd/const.go
index 594356cc..49b4795c 100644
--- a/pkg/dcgmexporter/const.go
+++ b/pkg/cmd/const.go
@@ -14,23 +14,7 @@
  * limitations under the License.
  */
 
-package dcgmexporter
-
-// Constants for logging fields
-const (
-	LoggerGroupIDKey = "groupID"
-	LoggerDumpKey    = "dump"
-	LoggerStackTrace = "stacktrace"
-)
-
-const (
-	PARENT_ID_IGNORED      = 0
-	DCGM_ST_NOT_CONFIGURED = "Setting not configured"
-)
-
-const (
-	windowSizeInMSLabel = "window_size_in_ms"
-)
+package cmd
 
 // DCGMDbgLvl is a DCGM library debug level.
 const (
@@ -43,7 +27,8 @@ const (
 	DCGMDbgLvlVerb  = "VERB"
 )
 
-var DCGMDbgLvlValues = []string{DCGMDbgLvlNone,
+var DCGMDbgLvlValues = []string{
+	DCGMDbgLvlNone,
 	DCGMDbgLvlFatal,
 	DCGMDbgLvlError,
 	DCGMDbgLvlWarn,
diff --git a/pkg/dcgmexporter/clock_events_collector_test.go b/pkg/dcgmexporter/clock_events_collector_test.go
deleted file mode 100644
index 380715c5..00000000
--- a/pkg/dcgmexporter/clock_events_collector_test.go
+++ /dev/null
@@ -1,483 +0,0 @@
-/*
- * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package dcgmexporter
-
-import (
-	"fmt"
-	"reflect"
-	"slices"
-	"strconv"
-	"testing"
-	"time"
-
-	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
-	"github.com/stretchr/testify/require"
-	"google.golang.org/grpc"
-	podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1alpha1"
-
-	"github.com/NVIDIA/dcgm-exporter/internal/pkg/testutils"
-)
-
-func TestClockEventsCollector_Gather(t *testing.T) {
-	teardownTest := setupTest(t)
-	defer teardownTest(t)
-	runOnlyWithLiveGPUs(t)
-	testutils.RequireLinux(t)
-
-	hostname := "local-test"
-	config := &Config{
-		GPUDevices: DeviceOptions{
-			Flex:       true,
-			MajorRange: []int{-1},
-			MinorRange: []int{-1},
-		},
-		ClockEventsCountWindowSize: int(time.Duration(5) * time.Minute),
-	}
-
-	records := [][]string{
-		{"DCGM_EXP_CLOCK_EVENTS_COUNT", "gauge", ""},
-		{"DCGM_FI_DRIVER_VERSION", "label", "Driver Version"},
-	}
-
-	cc, err := extractCounters(records, config)
-	require.NoError(t, err)
-	require.Len(t, cc.ExporterCounters, 1)
-	require.Len(t, cc.DCGMCounters, 1)
-
-	for i := range cc.DCGMCounters {
-		if cc.DCGMCounters[i].PromType == "label" {
-			cc.ExporterCounters = append(cc.ExporterCounters, cc.DCGMCounters[i])
-		}
-	}
-
-	// Create fake GPU
-	numGPUs, err := dcgm.GetAllDeviceCount()
-	require.NoError(t, err)
-
-	if numGPUs+1 > dcgm.MAX_NUM_DEVICES {
-		t.Skipf("Unable to add fake GPU with more than %d gpus", dcgm.MAX_NUM_DEVICES)
-	}
-
-	entityList := []dcgm.MigHierarchyInfo{
-		{Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU}},
-		{Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU}},
-		{Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU}},
-	}
-
-	gpuIDs, err := dcgm.CreateFakeEntities(entityList)
-	require.NoError(t, err)
-	require.NotEmpty(t, gpuIDs)
-
-	type clockEventsCountExpectation map[string]string
-	expectations := map[string]clockEventsCountExpectation{}
-
-	for i, gpuID := range gpuIDs {
-		err = dcgm.InjectFieldValue(gpuID,
-			dcgm.DCGM_FI_DEV_CLOCK_THROTTLE_REASONS,
-			dcgm.DCGM_FT_INT64,
-			0,
-			time.Now().Add(-time.Duration(i)*time.Second).UnixMicro(),
-			int64(DCGM_CLOCKS_THROTTLE_REASON_SW_THERMAL|DCGM_CLOCKS_THROTTLE_REASON_HW_THERMAL),
-		)
-		require.NoError(t, err)
-
-		err = dcgm.InjectFieldValue(gpuID,
-			dcgm.DCGM_FI_DEV_CLOCK_THROTTLE_REASONS,
-			dcgm.DCGM_FT_INT64,
-			0,
-			time.Now().Add(-time.Duration(i)*time.Second).UnixMicro(),
-			int64(DCGM_CLOCKS_THROTTLE_REASON_SW_THERMAL|DCGM_CLOCKS_THROTTLE_REASON_HW_THERMAL),
-		)
-		require.NoError(t, err)
-
-		err = dcgm.InjectFieldValue(gpuID,
-			dcgm.DCGM_FI_DEV_CLOCK_THROTTLE_REASONS,
-			dcgm.DCGM_FT_INT64,
-			0,
-			time.Now().Add(-time.Duration(i)*time.Second).UnixMicro(),
-			int64(DCGM_CLOCKS_THROTTLE_REASON_GPU_IDLE),
-		)
-		require.NoError(t, err)
-
-		expectations[fmt.Sprint(gpuID)] = clockEventsCountExpectation{
-			DCGM_CLOCKS_THROTTLE_REASON_SW_THERMAL.String(): "2",
-			DCGM_CLOCKS_THROTTLE_REASON_HW_THERMAL.String(): "2",
-			DCGM_CLOCKS_THROTTLE_REASON_GPU_IDLE.String():   "1",
-		}
-	}
-
-	// Create a fake K8S to emulate work on K8S environment
-	tmpDir, cleanup := CreateTmpDir(t)
-	defer cleanup()
-	socketPath := tmpDir + "/kubelet.sock"
-	server := grpc.NewServer()
-
-	gpuIDsAsString := make([]string, len(gpuIDs))
-
-	for i, g := range gpuIDs {
-		gpuIDsAsString[i] = fmt.Sprint(g)
-	}
-
-	podresourcesapi.RegisterPodResourcesListerServer(server, NewPodResourcesMockServer(nvidiaResourceName, gpuIDsAsString))
-	// Tell that the app is running on K8S
-	config.Kubernetes = true
-	config.PodResourcesKubeletSocket = socketPath
-
-	allCounters := []Counter{
-		{
-			FieldID: dcgm.DCGM_FI_DEV_CLOCK_THROTTLE_REASONS,
-		},
-	}
-
-	fieldEntityGroupTypeSystemInfo := NewEntityGroupTypeSystemInfo(allCounters, config)
-	err = fieldEntityGroupTypeSystemInfo.Load(dcgm.FE_GPU)
-	require.NoError(t, err)
-
-	item, _ := fieldEntityGroupTypeSystemInfo.Get(dcgm.FE_GPU)
-
-	collector, err := NewClockEventsCollector(cc.ExporterCounters, hostname, config, item)
-	require.NoError(t, err)
-
-	defer func() {
-		collector.Cleanup()
-	}()
-
-	metrics, err := collector.GetMetrics()
-	require.NoError(t, err)
-	require.NotEmpty(t, metrics)
-	// We expect 1 metric: DCGM_EXP_CLOCK_EVENTS_COUNT
-	require.Len(t, metrics, 1)
-	// We get metric value with 0 index
-	metricValues := metrics[reflect.ValueOf(metrics).MapKeys()[0].Interface().(Counter)]
-
-	for i := 0; i < len(metricValues); i++ {
-		gpuID, err := strconv.ParseUint(metricValues[i].GPU, 10, 64)
-		if err == nil {
-			if !slices.Contains(gpuIDs, uint(gpuID)) {
-				metricValues = append(metricValues[:i], metricValues[i+1:]...)
-			}
-		}
-	}
-
-	// We expect 9 records, because we have 3 fake GPU and each GPU experienced 3 CLOCK_EVENTS
-	require.Len(t, metricValues, 9)
-	for _, val := range metricValues {
-		require.Contains(t, val.Labels, "window_size_in_ms")
-		require.Equal(t, fmt.Sprint(config.ClockEventsCountWindowSize), val.Labels["window_size_in_ms"])
-		expected, exists := expectations[val.GPU]
-		require.True(t, exists)
-		actualReason, exists := val.Labels["clock_event"]
-		require.True(t, exists)
-		expectedVal, exists := expected[actualReason]
-		require.True(t, exists)
-		require.Equal(t, expectedVal, val.Value)
-	}
-}
-
-func TestClockEventsCollector_NewClocksThrottleReasonsCollector(t *testing.T) {
-	config := &Config{
-		GPUDevices: DeviceOptions{
-			Flex:       true,
-			MajorRange: []int{-1},
-			MinorRange: []int{-1},
-		},
-	}
-
-	teardownTest := setupTest(t)
-	defer teardownTest(t)
-
-	allCounters := []Counter{
-		{
-			FieldID: dcgm.DCGM_FI_DEV_CLOCK_THROTTLE_REASONS,
-		},
-	}
-
-	fieldEntityGroupTypeSystemInfo := NewEntityGroupTypeSystemInfo(allCounters, config)
-	err := fieldEntityGroupTypeSystemInfo.Load(dcgm.FE_GPU)
-	require.NoError(t, err)
-	item, _ := fieldEntityGroupTypeSystemInfo.Get(dcgm.FE_GPU)
-
-	t.Run("Should Return Error When DCGM_EXP_CLOCK_EVENTS_COUNT is not present", func(t *testing.T) {
-		records := [][]string{
-			{"DCGM_FI_DRIVER_VERSION", "label", "Driver Version"},
-		}
-		cc, err := extractCounters(records, config)
-		require.NoError(t, err)
-		require.Len(t, cc.ExporterCounters, 0)
-		require.Len(t, cc.DCGMCounters, 1)
-		collector, err := NewClockEventsCollector(cc.DCGMCounters, "", config, item)
-		require.Error(t, err)
-		require.Nil(t, collector)
-	})
-
-	t.Run("Should Return Error When Counter Param Is Empty", func(t *testing.T) {
-		counters := make([]Counter, 0)
-		collector, err := NewClockEventsCollector(counters, "", config, item)
-		require.Error(t, err)
-		require.Nil(t, collector)
-	})
-
-	t.Run("Should Not Return Error When DCGM_EXP_CLOCK_EVENTS_COUNT Present More Than Once", func(t *testing.T) {
-		records := [][]string{
-			{"DCGM_FI_DRIVER_VERSION", "label", "Driver Version"},
-			{"DCGM_EXP_CLOCK_EVENTS_COUNT", "gauge", ""},
-			{"DCGM_EXP_CLOCK_EVENTS_COUNT", "gauge", ""},
-			{"DCGM_EXP_CLOCK_EVENTS_COUNT", "gauge", ""},
-		}
-		cc, err := extractCounters(records, config)
-		require.NoError(t, err)
-		for i := range cc.DCGMCounters {
-			if cc.DCGMCounters[i].PromType == "label" {
-				cc.ExporterCounters = append(cc.ExporterCounters, cc.DCGMCounters[i])
-			}
-		}
-		collector, err := NewClockEventsCollector(cc.ExporterCounters, "", config, item)
-		require.NoError(t, err)
-		require.NotNil(t, collector)
-	})
-}
-
-func TestClockEventsCollector_Gather_AllTheThings(t *testing.T) {
-	teardownTest := setupTest(t)
-	defer teardownTest(t)
-	runOnlyWithLiveGPUs(t)
-
-	hostname := "local-test"
-	config := &Config{
-		GPUDevices: DeviceOptions{
-			Flex:       true,
-			MajorRange: []int{-1},
-			MinorRange: []int{-1},
-		},
-		ClockEventsCountWindowSize: int(time.Duration(5) * time.Minute),
-	}
-
-	records := [][]string{
-		{"DCGM_EXP_CLOCK_EVENTS_COUNT", "gauge", ""},
-		{"DCGM_FI_DRIVER_VERSION", "label", "Driver Version"},
-	}
-
-	cc, err := extractCounters(records, config)
-	require.NoError(t, err)
-	require.Len(t, cc.ExporterCounters, 1)
-	require.Len(t, cc.DCGMCounters, 1)
-
-	for i := range cc.DCGMCounters {
-		if cc.DCGMCounters[i].PromType == "label" {
-			cc.ExporterCounters = append(cc.ExporterCounters, cc.DCGMCounters[i])
-		}
-	}
-
-	// Create fake GPU
-	numGPUs, err := dcgm.GetAllDeviceCount()
-	require.NoError(t, err)
-
-	if numGPUs+1 > dcgm.MAX_NUM_DEVICES {
-		t.Skipf("Unable to add fake GPU with more than %d gpus", dcgm.MAX_NUM_DEVICES)
-	}
-
-	entityList := []dcgm.MigHierarchyInfo{
-		{Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU}},
-	}
-
-	gpuIDs, err := dcgm.CreateFakeEntities(entityList)
-	require.NoError(t, err)
-	require.NotEmpty(t, gpuIDs)
-
-	type clockThrottleReasonExpectation map[string]string
-	expectations := map[string]clockThrottleReasonExpectation{}
-
-	require.Len(t, gpuIDs, 1)
-	gpuID := gpuIDs[0]
-	err = dcgm.InjectFieldValue(gpuID,
-		dcgm.DCGM_FI_DEV_CLOCK_THROTTLE_REASONS,
-		dcgm.DCGM_FT_INT64,
-		0,
-		time.Now().Add(-time.Duration(1)*time.Second).UnixMicro(),
-		int64(DCGM_CLOCKS_THROTTLE_REASON_GPU_IDLE|
-			DCGM_CLOCKS_THROTTLE_REASON_CLOCKS_SETTING|
-			DCGM_CLOCKS_THROTTLE_REASON_SW_POWER_CAP|
-			DCGM_CLOCKS_THROTTLE_REASON_HW_SLOWDOWN|
-			DCGM_CLOCKS_THROTTLE_REASON_SYNC_BOOST|
-			DCGM_CLOCKS_THROTTLE_REASON_SW_THERMAL|
-			DCGM_CLOCKS_THROTTLE_REASON_HW_THERMAL|
-			DCGM_CLOCKS_THROTTLE_REASON_HW_POWER_BRAKE|
-			DCGM_CLOCKS_THROTTLE_REASON_DISPLAY_CLOCKS),
-	)
-
-	require.NoError(t, err)
-
-	expectations[fmt.Sprint(gpuID)] = clockThrottleReasonExpectation{
-		DCGM_CLOCKS_THROTTLE_REASON_GPU_IDLE.String():       "1",
-		DCGM_CLOCKS_THROTTLE_REASON_CLOCKS_SETTING.String(): "1",
-		DCGM_CLOCKS_THROTTLE_REASON_SW_POWER_CAP.String():   "1",
-		DCGM_CLOCKS_THROTTLE_REASON_HW_SLOWDOWN.String():    "1",
-		DCGM_CLOCKS_THROTTLE_REASON_SYNC_BOOST.String():     "1",
-		DCGM_CLOCKS_THROTTLE_REASON_SW_THERMAL.String():     "1",
-		DCGM_CLOCKS_THROTTLE_REASON_HW_THERMAL.String():     "1",
-		DCGM_CLOCKS_THROTTLE_REASON_HW_POWER_BRAKE.String(): "1",
-		DCGM_CLOCKS_THROTTLE_REASON_DISPLAY_CLOCKS.String(): "1",
-	}
-
-	allCounters := []Counter{
-		{
-			FieldID: dcgm.DCGM_FI_DEV_CLOCK_THROTTLE_REASONS,
-		},
-	}
-
-	fieldEntityGroupTypeSystemInfo := NewEntityGroupTypeSystemInfo(allCounters, config)
-
-	err = fieldEntityGroupTypeSystemInfo.Load(dcgm.FE_GPU)
-	require.NoError(t, err)
-
-	item, _ := fieldEntityGroupTypeSystemInfo.Get(dcgm.FE_GPU)
-
-	collector, err := NewClockEventsCollector(cc.ExporterCounters, hostname, config, item)
-	require.NoError(t, err)
-
-	defer func() {
-		collector.Cleanup()
-	}()
-
-	metrics, err := collector.GetMetrics()
-	require.NoError(t, err)
-	require.NotEmpty(t, metrics)
-	// We expect 1 metric: DCGM_EXP_CLOCK_EVENTS_COUNT
-	require.Len(t, metrics, 1)
-	// We get metric value with 0 index
-	metricValues := metrics[reflect.ValueOf(metrics).MapKeys()[0].Interface().(Counter)]
-
-	metricValues = getFakeGPUMetrics(metricValues, gpuIDs)
-
-	// Expected 9 metric values, because we injected 9 reasons
-	require.Len(t, metricValues, 9)
-	for _, val := range metricValues {
-		require.Contains(t, val.Labels, "window_size_in_ms")
-		require.Equal(t, fmt.Sprint(config.ClockEventsCountWindowSize), val.Labels["window_size_in_ms"])
-		expected, exists := expectations[val.GPU]
-		require.True(t, exists)
-		actualReason, exists := val.Labels["clock_event"]
-		require.True(t, exists)
-		expectedVal, exists := expected[actualReason]
-		require.True(t, exists)
-		require.Equal(t, expectedVal, val.Value)
-	}
-}
-
-func TestClockEventsCollector_Gather_AllTheThings_WhenNoLabels(t *testing.T) {
-	teardownTest := setupTest(t)
-	defer teardownTest(t)
-	runOnlyWithLiveGPUs(t)
-
-	hostname := "local-test"
-	config := &Config{
-		GPUDevices: DeviceOptions{
-			Flex:       true,
-			MajorRange: []int{-1},
-			MinorRange: []int{-1},
-		},
-		ClockEventsCountWindowSize: int(time.Duration(5) * time.Minute),
-	}
-
-	records := [][]string{
-		{"DCGM_EXP_CLOCK_EVENTS_COUNT", "gauge", ""},
-	}
-
-	cc, err := extractCounters(records, config)
-	require.NoError(t, err)
-	require.Len(t, cc.ExporterCounters, 1)
-	require.Len(t, cc.DCGMCounters, 0)
-
-	// Create fake GPU
-	numGPUs, err := dcgm.GetAllDeviceCount()
-	require.NoError(t, err)
-
-	if numGPUs+1 > dcgm.MAX_NUM_DEVICES {
-		t.Skipf("Unable to add fake GPU with more than %d gpus", dcgm.MAX_NUM_DEVICES)
-	}
-
-	entityList := []dcgm.MigHierarchyInfo{
-		{Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU}},
-	}
-
-	gpuIDs, err := dcgm.CreateFakeEntities(entityList)
-	require.NoError(t, err)
-	require.NotEmpty(t, gpuIDs)
-
-	gpuID := gpuIDs[0]
-	err = dcgm.InjectFieldValue(gpuID,
-		dcgm.DCGM_FI_DEV_CLOCK_THROTTLE_REASONS,
-		dcgm.DCGM_FT_INT64,
-		0,
-		time.Now().Add(-time.Duration(1)*time.Second).UnixMicro(),
-		int64(DCGM_CLOCKS_THROTTLE_REASON_GPU_IDLE|
-			DCGM_CLOCKS_THROTTLE_REASON_CLOCKS_SETTING|
-			DCGM_CLOCKS_THROTTLE_REASON_SW_POWER_CAP|
-			DCGM_CLOCKS_THROTTLE_REASON_HW_SLOWDOWN|
-			DCGM_CLOCKS_THROTTLE_REASON_SYNC_BOOST|
-			DCGM_CLOCKS_THROTTLE_REASON_SW_THERMAL|
-			DCGM_CLOCKS_THROTTLE_REASON_HW_THERMAL|
-			DCGM_CLOCKS_THROTTLE_REASON_HW_POWER_BRAKE|
-			DCGM_CLOCKS_THROTTLE_REASON_DISPLAY_CLOCKS),
-	)
-
-	require.NoError(t, err)
-
-	allCounters := []Counter{
-		{
-			FieldID: dcgm.DCGM_FI_DEV_CLOCK_THROTTLE_REASONS,
-		},
-	}
-
-	fieldEntityGroupTypeSystemInfo := NewEntityGroupTypeSystemInfo(allCounters, config)
-
-	err = fieldEntityGroupTypeSystemInfo.Load(dcgm.FE_GPU)
-	require.NoError(t, err)
-
-	item, _ := fieldEntityGroupTypeSystemInfo.Get(dcgm.FE_GPU)
-
-	collector, err := NewClockEventsCollector(cc.ExporterCounters, hostname, config, item)
-	require.NoError(t, err)
-
-	defer func() {
-		collector.Cleanup()
-	}()
-
-	metrics, err := collector.GetMetrics()
-	require.NoError(t, err)
-	require.NotEmpty(t, metrics)
-	// We expect 1 metric: DCGM_EXP_CLOCK_EVENTS_COUNT
-	require.Len(t, metrics, 1)
-	// We get metric value with 0 index
-	metricValues := metrics[reflect.ValueOf(metrics).MapKeys()[0].Interface().(Counter)]
-	// Exclude the real GPU from the test
-	metricValues = getFakeGPUMetrics(metricValues, gpuIDs)
-	// Expected 9 metric values, because we injected 9 reasons
-	require.Len(t, metricValues, 9)
-}
-
-func getFakeGPUMetrics(metricValues []Metric, gpuIDs []uint) []Metric {
-	for i := 0; i < len(metricValues); i++ {
-		gpuID, err := strconv.ParseUint(metricValues[i].GPU, 10, 64)
-		if err == nil {
-			if !slices.Contains(gpuIDs, uint(gpuID)) {
-				metricValues = append(metricValues[:i], metricValues[i+1:]...)
-			}
-		}
-	}
-	return metricValues
-}
diff --git a/pkg/dcgmexporter/dcgm.go b/pkg/dcgmexporter/dcgm.go
deleted file mode 100644
index e348bf96..00000000
--- a/pkg/dcgmexporter/dcgm.go
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package dcgmexporter
-
-import (
-	"fmt"
-	"math/rand"
-
-	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
-	"github.com/sirupsen/logrus"
-)
-
-func NewGroup() (dcgm.GroupHandle, func(), error) {
-	group, err := dcgm.NewDefaultGroup(fmt.Sprintf("gpu-collector-group-%d", rand.Uint64()))
-	if err != nil {
-		return dcgm.GroupHandle{}, func() {}, err
-	}
-
-	return group, func() {
-		err := dcgm.DestroyGroup(group)
-		if err != nil {
-			logrus.WithError(err).Warn("Cannot destroy field group.")
-		}
-	}, nil
-}
-
-func NewDeviceFields(counters []Counter, entityType dcgm.Field_Entity_Group) []dcgm.Short {
-	var deviceFields []dcgm.Short
-	for _, f := range counters {
-		meta := dcgm.FieldGetById(f.FieldID)
-
-		if meta.EntityLevel == entityType || meta.EntityLevel == dcgm.FE_NONE {
-			deviceFields = append(deviceFields, f.FieldID)
-		} else if entityType == dcgm.FE_GPU && (meta.EntityLevel == dcgm.FE_GPU_CI || meta.EntityLevel == dcgm.FE_GPU_I || meta.EntityLevel == dcgm.FE_VGPU) {
-			deviceFields = append(deviceFields, f.FieldID)
-		} else if entityType == dcgm.FE_CPU && (meta.EntityLevel == dcgm.FE_CPU || meta.EntityLevel == dcgm.FE_CPU_CORE) {
-			deviceFields = append(deviceFields, f.FieldID)
-		}
-	}
-
-	return deviceFields
-}
-
-func NewFieldGroup(deviceFields []dcgm.Short) (dcgm.FieldHandle, func(), error) {
-	name := fmt.Sprintf("gpu-collector-fieldgroup-%d", rand.Uint64())
-	fieldGroup, err := dcgm.FieldGroupCreate(name, deviceFields)
-	if err != nil {
-		return dcgm.FieldHandle{}, func() {}, err
-	}
-
-	return fieldGroup, func() {
-		err := dcgm.FieldGroupDestroy(fieldGroup)
-		if err != nil {
-			logrus.WithError(err).Warn("Cannot destroy field group.")
-		}
-	}, nil
-}
-
-func WatchFieldGroup(
-	group dcgm.GroupHandle, field dcgm.FieldHandle, updateFreq int64, maxKeepAge float64, maxKeepSamples int32,
-) error {
-	err := dcgm.WatchFieldsWithGroupEx(field, group, updateFreq, maxKeepAge, maxKeepSamples)
-	if err != nil {
-		return err
-	}
-
-	return nil
-}
-
-func SetupDcgmFieldsWatch(deviceFields []dcgm.Short, sysInfo SystemInfo, collectIntervalUsec int64) ([]dcgm.GroupHandle, dcgm.FieldHandle, []func(), error) {
-	var err error
-	var cleanups []func()
-	var cleanup func()
-	var groups []dcgm.GroupHandle
-	var fieldGroup dcgm.FieldHandle
-
-	if sysInfo.InfoType == dcgm.FE_LINK {
-		/* one group per-nvswitch is created for nvlinks */
-		groups, cleanups, err = CreateLinkGroupsFromSystemInfo(sysInfo)
-	} else if sysInfo.InfoType == dcgm.FE_CPU_CORE {
-		/* one group per-CPU is created for cpu cores */
-		groups, cleanups, err = CreateCoreGroupsFromSystemInfo(sysInfo)
-	} else {
-		group, cleanup, err := CreateGroupFromSystemInfo(sysInfo)
-		if err == nil {
-			groups = append(groups, group)
-			cleanups = append(cleanups, cleanup)
-		}
-	}
-
-	if err != nil {
-		goto fail
-	}
-
-	for _, gr := range groups {
-		fieldGroup, cleanup, err = NewFieldGroup(deviceFields)
-		if err != nil {
-			goto fail
-		}
-
-		cleanups = append(cleanups, cleanup)
-
-		err = WatchFieldGroup(gr, fieldGroup, collectIntervalUsec, 0.0, 1)
-		if err != nil {
-			goto fail
-		}
-	}
-
-	return groups, fieldGroup, cleanups, nil
-
-fail:
-	for _, f := range cleanups {
-		f()
-	}
-
-	return nil, dcgm.FieldHandle{}, nil, err
-}
diff --git a/pkg/dcgmexporter/expcollector.go b/pkg/dcgmexporter/expcollector.go
deleted file mode 100644
index 68778db8..00000000
--- a/pkg/dcgmexporter/expcollector.go
+++ /dev/null
@@ -1,260 +0,0 @@
-/*
- * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package dcgmexporter
-
-import (
-	"fmt"
-	"io"
-	"maps"
-	"sync"
-	"sync/atomic"
-	"text/template"
-	"time"
-
-	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
-	"github.com/sirupsen/logrus"
-)
-
-var expMetricsFormat = `
-
-{{- range $counter, $metrics := . -}}
-# HELP {{ $counter.FieldName }} {{ $counter.Help }}
-# TYPE {{ $counter.FieldName }} {{ $counter.PromType }}
-{{- range $metric := $metrics }}
-{{ $counter.FieldName }}{gpu="{{ $metric.GPU }}",{{ $metric.UUID }}="{{ $metric.GPUUUID }}",pci_bus_id="{{ $metric.GPUPCIBusID }}",device="{{ $metric.GPUDevice }}",modelName="{{ $metric.GPUModelName }}"{{if $metric.MigProfile}},GPU_I_PROFILE="{{ $metric.MigProfile }}",GPU_I_ID="{{ $metric.GPUInstanceID }}"{{end}}{{if $metric.Hostname }},Hostname="{{ $metric.Hostname }}"{{end}}
-
-{{- range $k, $v := $metric.Labels -}}
-	,{{ $k }}="{{ $v }}"
-{{- end -}}
-{{- range $k, $v := $metric.Attributes -}}
-	,{{ $k }}="{{ $v }}"
-{{- end -}}
-
-} {{ $metric.Value -}}
-{{- end }}
-{{ end }}`
-
-// Collector interface
-type Collector interface {
-	GetMetrics() (MetricsByCounter, error)
-	Cleanup()
-}
-
-var getExpMetricTemplate = sync.OnceValue(func() *template.Template {
-	return template.Must(template.New("expMetrics").Parse(expMetricsFormat))
-})
-
-func encodeExpMetrics(w io.Writer, metrics MetricsByCounter) error {
-	tmpl := getExpMetricTemplate()
-	return tmpl.Execute(w, metrics)
-}
-
-var expCollectorFieldGroupIdx atomic.Uint32
-
-type expCollector struct {
-	sysInfo             SystemInfo                     // Hardware system info
-	counter             Counter                        // Counter that collector
-	hostname            string                         // Hostname
-	config              *Config                        // Configuration settings
-	labelDeviceFields   []dcgm.Short                   // Fields used for labels
-	counterDeviceFields []dcgm.Short                   // Fields used for the counter
-	labelsCounters      []Counter                      // Counters used for labels
-	cleanups            []func()                       // Cleanup functions
-	fieldValueParser    func(val int64) []int64        // Function to parse the field value
-	labelFiller         func(map[string]string, int64) // Function to fill labels
-	windowSize          int                            // Window size
-	transformations     []Transform                    // Transformers for metric postprocessing
-	deviceGroups        []dcgm.GroupHandle
-	deviceFieldGroup    dcgm.FieldHandle
-}
-
-func (c *expCollector) getMetrics() (MetricsByCounter, error) {
-	err := dcgm.UpdateAllFields()
-	if err != nil {
-		return nil, err
-	}
-
-	mapEntityIDToValues := map[uint]map[int64]int{}
-
-	window := time.Now().Add(-time.Duration(c.windowSize) * time.Millisecond)
-
-	for _, group := range c.deviceGroups {
-		values, _, err := dcgm.GetValuesSince(group, c.deviceFieldGroup, window)
-		if err != nil {
-			return nil, err
-		}
-		for _, val := range values {
-			if val.Status == 0 {
-				if _, exists := mapEntityIDToValues[val.EntityId]; !exists {
-					mapEntityIDToValues[val.EntityId] = map[int64]int{}
-				}
-				for _, v := range c.fieldValueParser(val.Int64()) {
-					mapEntityIDToValues[val.EntityId][v] += 1
-				}
-			}
-		}
-	}
-
-	labels := map[string]string{}
-	labels[windowSizeInMSLabel] = fmt.Sprint(c.windowSize)
-
-	monitoringInfo := GetMonitoredEntities(c.sysInfo)
-	metrics := make(MetricsByCounter)
-	useOld := c.config.UseOldNamespace
-	uuid := "UUID"
-	if useOld {
-		uuid = "uuid"
-	}
-	for _, mi := range monitoringInfo {
-		if len(c.labelsCounters) > 0 {
-			err := c.getLabelsFromCounters(mi, labels)
-			if err != nil {
-				return nil, err
-			}
-		}
-		entityValues, exists := mapEntityIDToValues[mi.DeviceInfo.GPU]
-		if exists {
-			for entityValue, val := range entityValues {
-
-				metricValueLabels := maps.Clone(labels)
-				c.labelFiller(metricValueLabels, entityValue)
-
-				m := c.createMetric(metricValueLabels, mi, uuid, val)
-
-				metrics[c.counter] = append(metrics[c.counter], m)
-			}
-		} else {
-			// Create metric with Zero value if group (mapEntityIDToValues) is empty
-			m := c.createMetric(labels, mi, uuid, 0)
-			metrics[c.counter] = append(metrics[c.counter], m)
-		}
-	}
-
-	for _, transform := range c.transformations {
-		err := transform.Process(metrics, c.sysInfo)
-		if err != nil {
-			return nil, fmt.Errorf("failed to transform metrics for transform '%s'; err: %v", transform.Name(), err)
-		}
-	}
-
-	return metrics, nil
-}
-
-func (c *expCollector) createMetric(labels map[string]string, mi MonitoringInfo, uuid string, val int) Metric {
-	gpuModel := getGPUModel(mi.DeviceInfo, c.config.ReplaceBlanksInModelName)
-
-	m := Metric{
-		Counter:      c.counter,
-		Value:        fmt.Sprint(val),
-		UUID:         uuid,
-		GPU:          fmt.Sprintf("%d", mi.DeviceInfo.GPU),
-		GPUUUID:      mi.DeviceInfo.UUID,
-		GPUDevice:    fmt.Sprintf("nvidia%d", mi.DeviceInfo.GPU),
-		GPUModelName: gpuModel,
-		GPUPCIBusID:  mi.DeviceInfo.PCI.BusID,
-		Hostname:     c.hostname,
-
-		Labels:     labels,
-		Attributes: map[string]string{},
-	}
-	if mi.InstanceInfo != nil {
-		m.MigProfile = mi.InstanceInfo.ProfileName
-		m.GPUInstanceID = fmt.Sprintf("%d", mi.InstanceInfo.Info.NvmlInstanceId)
-	} else {
-		m.MigProfile = ""
-		m.GPUInstanceID = ""
-	}
-	return m
-}
-
-func (c *expCollector) getLabelsFromCounters(mi MonitoringInfo, labels map[string]string) error {
-	latestValues, err := dcgm.EntityGetLatestValues(mi.Entity.EntityGroupId, mi.Entity.EntityId, c.labelDeviceFields)
-	if err != nil {
-		return err
-	}
-	// Extract Labels
-	for _, val := range latestValues {
-		v := ToString(val)
-		// Filter out counters with no value and ignored fields for this entity
-		if v == SkipDCGMValue {
-			continue
-		}
-
-		counter, err := FindCounterField(c.labelsCounters, val.FieldId)
-		if err != nil {
-			continue
-		}
-
-		if counter.PromType == "label" {
-			labels[counter.FieldName] = v
-			continue
-		}
-	}
-	return nil
-}
-
-func (c *expCollector) Cleanup() {
-	for _, cleanup := range c.cleanups {
-		cleanup()
-	}
-}
-
-// newExpCollector is a constructor for the expCollector
-func newExpCollector(
-	counters []Counter,
-	hostname string,
-	counterDeviceFields []dcgm.Short,
-	config *Config,
-	fieldEntityGroupTypeSystemInfo FieldEntityGroupTypeSystemInfoItem,
-) expCollector {
-	var labelsCounters []Counter
-	for i := 0; i < len(counters); i++ {
-		if counters[i].PromType == "label" {
-			labelsCounters = append(labelsCounters, counters[i])
-		}
-	}
-
-	labelDeviceFields := NewDeviceFields(labelsCounters, dcgm.FE_GPU)
-
-	transformations := getTransformations(config)
-
-	collector := expCollector{
-		hostname:            hostname,
-		config:              config,
-		labelDeviceFields:   labelDeviceFields,
-		labelsCounters:      labelsCounters,
-		counterDeviceFields: counterDeviceFields,
-		fieldValueParser: func(val int64) []int64 {
-			return []int64{val}
-		},
-		labelFiller:     func(metricValueLabels map[string]string, entityValue int64) {},
-		transformations: transformations,
-	}
-
-	collector.sysInfo = fieldEntityGroupTypeSystemInfo.SystemInfo
-
-	var err error
-
-	collector.deviceGroups, collector.deviceFieldGroup, collector.cleanups, err = SetupDcgmFieldsWatch(collector.counterDeviceFields,
-		collector.sysInfo,
-		int64(config.CollectInterval)*1000)
-	if err != nil {
-		logrus.Fatal("Failed to watch metrics: ", err)
-	}
-
-	return collector
-}
diff --git a/pkg/dcgmexporter/field_entity_group_system_info.go b/pkg/dcgmexporter/field_entity_group_system_info.go
deleted file mode 100644
index e6ce4b53..00000000
--- a/pkg/dcgmexporter/field_entity_group_system_info.go
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package dcgmexporter
-
-import (
-	"fmt"
-
-	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
-)
-
-// FieldEntityGroupTypeToMonitor supported entity group types
-var FieldEntityGroupTypeToMonitor = []dcgm.Field_Entity_Group{
-	dcgm.FE_GPU,
-	dcgm.FE_SWITCH,
-	dcgm.FE_LINK,
-	dcgm.FE_CPU,
-	dcgm.FE_CPU_CORE,
-}
-
-type FieldEntityGroupTypeSystemInfoItem struct {
-	SystemInfo   SystemInfo
-	DeviceFields []dcgm.Short
-}
-
-func (f FieldEntityGroupTypeSystemInfoItem) isEmpty() bool {
-	return len(f.DeviceFields) == 0
-}
-
-// FieldEntityGroupTypeSystemInfo represents a mapping between FieldEntityGroupType and SystemInfo
-type FieldEntityGroupTypeSystemInfo struct {
-	items         map[dcgm.Field_Entity_Group]FieldEntityGroupTypeSystemInfoItem
-	counters      []Counter
-	gpuDevices    DeviceOptions
-	switchDevices DeviceOptions
-	cpuDevices    DeviceOptions
-	useFakeGPUs   bool
-}
-
-// NewEntityGroupTypeSystemInfo creates a new instance of the FieldEntityGroupTypeSystemInfo
-func NewEntityGroupTypeSystemInfo(c []Counter, config *Config) *FieldEntityGroupTypeSystemInfo {
-	return &FieldEntityGroupTypeSystemInfo{
-		items:         make(map[dcgm.Field_Entity_Group]FieldEntityGroupTypeSystemInfoItem),
-		counters:      c,
-		gpuDevices:    config.GPUDevices,
-		switchDevices: config.SwitchDevices,
-		cpuDevices:    config.CPUDevices,
-		useFakeGPUs:   config.UseFakeGPUs,
-	}
-}
-
-// Load loads SystemInfo for a provided Field_Entity_Group
-func (e *FieldEntityGroupTypeSystemInfo) Load(entityType dcgm.Field_Entity_Group) error {
-	var deviceFields = NewDeviceFields(e.counters, entityType)
-
-	if !ShouldMonitorDeviceType(deviceFields, entityType) {
-		return fmt.Errorf("no fields to watch for device type: %d", entityType)
-	}
-
-	sysInfo, err := GetSystemInfo(&Config{
-		GPUDevices:    e.gpuDevices,
-		SwitchDevices: e.switchDevices,
-		CPUDevices:    e.cpuDevices,
-		UseFakeGPUs:   e.useFakeGPUs,
-	}, entityType)
-	if err != nil {
-		return err
-	}
-
-	e.items[entityType] = FieldEntityGroupTypeSystemInfoItem{
-		SystemInfo:   *sysInfo,
-		DeviceFields: deviceFields,
-	}
-
-	return err
-}
-
-// Get returns FieldEntityGroupTypeSystemInfoItem, bool by dcgm.Field_Entity_Group
-func (e *FieldEntityGroupTypeSystemInfo) Get(key dcgm.Field_Entity_Group) (FieldEntityGroupTypeSystemInfoItem, bool) {
-	val, exists := e.items[key]
-	return val, exists
-}
diff --git a/pkg/dcgmexporter/gpu_collector_test.go b/pkg/dcgmexporter/gpu_collector_test.go
deleted file mode 100644
index 2f38d442..00000000
--- a/pkg/dcgmexporter/gpu_collector_test.go
+++ /dev/null
@@ -1,486 +0,0 @@
-/*
- * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package dcgmexporter
-
-import (
-	"fmt"
-	"reflect"
-	"testing"
-
-	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-)
-
-var sampleCounters = []Counter{
-	{dcgm.DCGM_FI_DEV_GPU_TEMP, "DCGM_FI_DEV_GPU_TEMP", "gauge", "Temperature Help info"},
-	{dcgm.DCGM_FI_DEV_TOTAL_ENERGY_CONSUMPTION, "DCGM_FI_DEV_TOTAL_ENERGY_CONSUMPTION", "gauge", "Energy help info"},
-	{dcgm.DCGM_FI_DEV_POWER_USAGE, "DCGM_FI_DEV_POWER_USAGE", "gauge", "Power help info"},
-	{dcgm.DCGM_FI_DRIVER_VERSION, "DCGM_FI_DRIVER_VERSION", "label", "Driver version"},
-	/* test that switch and link metrics are filtered out automatically when devices are not detected */
-	{
-		dcgm.DCGM_FI_DEV_NVSWITCH_TEMPERATURE_CURRENT,
-		"DCGM_FI_DEV_NVSWITCH_TEMPERATURE_CURRENT",
-		"gauge",
-		"switch temperature",
-	},
-	{
-		dcgm.DCGM_FI_DEV_NVSWITCH_LINK_FLIT_ERRORS,
-		"DCGM_FI_DEV_NVSWITCH_LINK_FLIT_ERRORS",
-		"gauge",
-		"per-link flit errors",
-	},
-	/* test that vgpu metrics are not filtered out */
-	{dcgm.DCGM_FI_DEV_VGPU_LICENSE_STATUS, "DCGM_FI_DEV_VGPU_LICENSE_STATUS", "gauge", "vgpu license status"},
-	/* test that cpu and cpu core metrics are filtered out automatically when devices are not detected */
-	{dcgm.DCGM_FI_DEV_CPU_UTIL_TOTAL, "DCGM_FI_DEV_CPU_UTIL_TOTAL", "gauge", "Total CPU utilization"},
-}
-
-var expectedMetrics = map[string]bool{
-	"DCGM_FI_DEV_GPU_TEMP":                 true,
-	"DCGM_FI_DEV_TOTAL_ENERGY_CONSUMPTION": true,
-	"DCGM_FI_DEV_POWER_USAGE":              true,
-	"DCGM_FI_DEV_VGPU_LICENSE_STATUS":      true,
-}
-
-var expectedCPUMetrics = map[string]bool{
-	"DCGM_FI_DEV_CPU_UTIL_TOTAL": true,
-}
-
-func TestDCGMCollector(t *testing.T) {
-	cleanup, err := dcgm.Init(dcgm.Embedded)
-	require.NoError(t, err)
-	defer cleanup()
-
-	_, cleanup = testDCGMGPUCollector(t, sampleCounters)
-	cleanup()
-
-	_, cleanup = testDCGMCPUCollector(t, sampleCounters)
-	cleanup()
-}
-
-func testDCGMGPUCollector(t *testing.T, counters []Counter) (*DCGMCollector, func()) {
-	dOpt := DeviceOptions{
-		Flex:       true,
-		MajorRange: []int{-1},
-		MinorRange: []int{-1},
-	}
-	config := Config{
-		GPUDevices:      dOpt,
-		NoHostname:      false,
-		UseOldNamespace: false,
-		UseFakeGPUs:     false,
-		CollectInterval: 1,
-	}
-
-	dcgmGetAllDeviceCount = func() (uint, error) {
-		return 1, nil
-	}
-
-	dcgmGetDeviceInfo = func(gpuId uint) (dcgm.Device, error) {
-		dev := dcgm.Device{
-			GPU:  0,
-			UUID: fmt.Sprintf("fake%d", gpuId),
-			PCI: dcgm.PCIInfo{
-				BusID: "00000000:0000:0000.0",
-			},
-		}
-
-		return dev, nil
-	}
-
-	dcgmGetGpuInstanceHierarchy = func() (dcgm.MigHierarchy_v2, error) {
-		hierarchy := dcgm.MigHierarchy_v2{
-			Count: 0,
-		}
-		return hierarchy, nil
-	}
-
-	dcgmAddEntityToGroup = func(
-		groupId dcgm.GroupHandle, entityGroupId dcgm.Field_Entity_Group, entityId uint,
-	) (err error) {
-		return nil
-	}
-
-	dcgmGetCpuHierarchy = func() (dcgm.CpuHierarchy_v1, error) {
-		CPU := dcgm.CpuHierarchyCpu_v1{
-			CpuId:      0,
-			OwnedCores: []uint64{0},
-		}
-		hierarchy := dcgm.CpuHierarchy_v1{
-			Version: 0,
-			NumCpus: 1,
-			Cpus:    [dcgm.MAX_NUM_CPUS]dcgm.CpuHierarchyCpu_v1{CPU},
-		}
-
-		return hierarchy, nil
-	}
-
-	defer func() {
-		dcgmGetAllDeviceCount = dcgm.GetAllDeviceCount
-		dcgmGetDeviceInfo = dcgm.GetDeviceInfo
-		dcgmGetGpuInstanceHierarchy = dcgm.GetGpuInstanceHierarchy
-		dcgmAddEntityToGroup = dcgm.AddEntityToGroup
-	}()
-
-	fieldEntityGroupTypeSystemInfo := NewEntityGroupTypeSystemInfo(counters, &config)
-
-	err := fieldEntityGroupTypeSystemInfo.Load(dcgm.FE_GPU)
-	require.NoError(t, err)
-
-	gpuItem, exists := fieldEntityGroupTypeSystemInfo.Get(dcgm.FE_GPU)
-	require.True(t, exists)
-
-	g, cleanup, err := NewDCGMCollector(counters, "", &config, gpuItem)
-	require.NoError(t, err)
-
-	/* Test for error when no switches are available to monitor. */
-	switchItem, exists := fieldEntityGroupTypeSystemInfo.Get(dcgm.FE_SWITCH)
-	assert.False(t, exists, "dcgm.FE_SWITCH should not be available")
-
-	_, _, err = NewDCGMCollector(counters, "", &config, switchItem)
-	require.Error(t, err, "NewDCGMCollector should return error")
-
-	/* Test for error when no cpus are available to monitor. */
-	cpuItem, exist := fieldEntityGroupTypeSystemInfo.Get(dcgm.FE_CPU)
-	require.False(t, exist, "dcgm.FE_CPU should not be available")
-
-	_, _, err = NewDCGMCollector(counters, "", &config, cpuItem)
-	require.Error(t, err, "NewDCGMCollector should return error")
-
-	out, err := g.GetMetrics()
-	require.NoError(t, err)
-	require.Greater(t, len(out), 0, "Check that you have a GPU on this node")
-	require.Len(t, out, len(expectedMetrics))
-
-	seenMetrics := map[string]bool{}
-	for _, metrics := range out {
-		for _, metric := range metrics {
-			seenMetrics[metric.Counter.FieldName] = true
-			require.NotEmpty(t, metric.GPU)
-			require.NotEmpty(t, metric.GPUUUID)
-			require.NotEmpty(t, metric.GPUPCIBusID)
-			require.NotEmpty(t, metric.Value)
-			require.NotEqual(t, metric.Value, FailedToConvert)
-		}
-	}
-	require.Equal(t, seenMetrics, expectedMetrics)
-
-	return g, cleanup
-}
-
-func testDCGMCPUCollector(t *testing.T, counters []Counter) (*DCGMCollector, func()) {
-	dOpt := DeviceOptions{true, []int{-1}, []int{-1}}
-	config := Config{
-		CPUDevices:      dOpt,
-		NoHostname:      false,
-		UseOldNamespace: false,
-		UseFakeGPUs:     false,
-	}
-
-	dcgmGetAllDeviceCount = func() (uint, error) {
-		return 0, nil
-	}
-
-	dcgmGetDeviceInfo = func(gpuId uint) (dcgm.Device, error) {
-		dev := dcgm.Device{
-			GPU:           0,
-			DCGMSupported: "No",
-			UUID:          fmt.Sprintf("fake%d", gpuId),
-			PCI: dcgm.PCIInfo{
-				BusID: "00000000:0000:0000.0",
-			},
-		}
-
-		return dev, nil
-	}
-
-	dcgmGetGpuInstanceHierarchy = func() (dcgm.MigHierarchy_v2, error) {
-		hierarchy := dcgm.MigHierarchy_v2{
-			Count: 0,
-		}
-		return hierarchy, nil
-	}
-
-	dcgmAddEntityToGroup = func(
-		groupId dcgm.GroupHandle, entityGroupId dcgm.Field_Entity_Group, entityId uint,
-	) (err error) {
-		return nil
-	}
-
-	dcgmGetCpuHierarchy = func() (dcgm.CpuHierarchy_v1, error) {
-		CPU := dcgm.CpuHierarchyCpu_v1{
-			CpuId:      0,
-			OwnedCores: []uint64{0, 18446744073709551360, 65535},
-		}
-		hierarchy := dcgm.CpuHierarchy_v1{
-			Version: 0,
-			NumCpus: 1,
-			Cpus:    [dcgm.MAX_NUM_CPUS]dcgm.CpuHierarchyCpu_v1{CPU},
-		}
-
-		return hierarchy, nil
-	}
-
-	defer func() {
-		dcgmGetAllDeviceCount = dcgm.GetAllDeviceCount
-		dcgmGetDeviceInfo = dcgm.GetDeviceInfo
-		dcgmGetGpuInstanceHierarchy = dcgm.GetGpuInstanceHierarchy
-		dcgmAddEntityToGroup = dcgm.AddEntityToGroup
-	}()
-
-	/* Test that only cpu metrics are collected for cpu entities. */
-
-	fieldEntityGroupTypeSystemInfo := NewEntityGroupTypeSystemInfo(counters, &config)
-	err := fieldEntityGroupTypeSystemInfo.Load(dcgm.FE_CPU)
-	require.NoError(t, err)
-
-	err = fieldEntityGroupTypeSystemInfo.Load(dcgm.FE_CPU)
-	require.NoError(t, err)
-
-	cpuItem, cpuItemExist := fieldEntityGroupTypeSystemInfo.Get(dcgm.FE_CPU)
-	require.True(t, cpuItemExist)
-
-	c, cleanup, err := NewDCGMCollector(counters, "", &config, cpuItem)
-	require.NoError(t, err)
-
-	out, err := c.GetMetrics()
-	require.NoError(t, err)
-	require.Greater(t, len(out), 0, "Check that the fake CPU has been registered")
-
-	for _, dev := range out {
-		seenMetrics := map[string]bool{}
-		for _, metric := range dev {
-			seenMetrics[metric.Counter.FieldName] = true
-			require.NotEmpty(t, metric.GPU)
-			require.Empty(t, metric.GPUUUID)
-			require.Empty(t, metric.GPUPCIBusID)
-			require.NotEmpty(t, metric.Value)
-			require.NotEqual(t, metric.Value, FailedToConvert)
-		}
-		require.Equal(t, seenMetrics, expectedCPUMetrics)
-	}
-
-	return c, cleanup
-}
-
-func TestToMetric(t *testing.T) {
-	fieldValue := [4096]byte{}
-	fieldValue[0] = 42
-	values := []dcgm.FieldValue_v1{
-		{
-			FieldId:   150,
-			FieldType: dcgm.DCGM_FT_INT64,
-			Value:     fieldValue,
-		},
-	}
-
-	c := []Counter{
-		{
-			FieldID:   150,
-			FieldName: "DCGM_FI_DEV_GPU_TEMP",
-			PromType:  "gauge",
-			Help:      "Temperature Help info",
-		},
-	}
-
-	d := dcgm.Device{
-		UUID: "fake0",
-		Identifiers: dcgm.DeviceIdentifiers{
-			Model: "NVIDIA T400 4GB",
-		},
-		PCI: dcgm.PCIInfo{
-			BusID: "00000000:0000:0000.0",
-		},
-	}
-
-	var instanceInfo *GPUInstanceInfo = nil
-
-	type testCase struct {
-		replaceBlanksInModelName bool
-		expectedGPUModelName     string
-	}
-
-	testCases := []testCase{
-		{
-			replaceBlanksInModelName: true,
-			expectedGPUModelName:     "NVIDIA-T400-4GB",
-		},
-		{
-			replaceBlanksInModelName: false,
-			expectedGPUModelName:     "NVIDIA T400 4GB",
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(fmt.Sprintf("When replaceBlanksInModelName is %t", tc.replaceBlanksInModelName), func(t *testing.T) {
-			metrics := make(map[Counter][]Metric)
-			ToMetric(metrics, values, c, d, instanceInfo, false, "", tc.replaceBlanksInModelName)
-			assert.Len(t, metrics, 1)
-			// We get metric value with 0 index
-			metricValues := metrics[reflect.ValueOf(metrics).MapKeys()[0].Interface().(Counter)]
-			assert.Equal(t, "42", metricValues[0].Value)
-			assert.Equal(t, tc.expectedGPUModelName, metricValues[0].GPUModelName)
-
-			assert.Equal(t, d.UUID, metricValues[0].GPUUUID)
-			assert.Equal(t, d.PCI.BusID, metricValues[0].GPUPCIBusID)
-		})
-	}
-}
-
-func TestToMetricWhenDCGM_FI_DEV_XID_ERRORSField(t *testing.T) {
-	c := []Counter{
-		{
-			FieldID:   dcgm.DCGM_FI_DEV_XID_ERRORS,
-			FieldName: "DCGM_FI_DEV_GPU_TEMP",
-			PromType:  "gauge",
-			Help:      "Temperature Help info",
-		},
-	}
-
-	d := dcgm.Device{
-		UUID: "fake0",
-		Identifiers: dcgm.DeviceIdentifiers{
-			Model: "NVIDIA T400 4GB",
-		},
-		PCI: dcgm.PCIInfo{
-			BusID: "00000000:0000:0000.0",
-		},
-	}
-
-	var instanceInfo *GPUInstanceInfo = nil
-
-	type testCase struct {
-		name        string
-		fieldValue  byte
-		expectedErr string
-	}
-
-	testCases := []testCase{
-		{
-			name:        "when DCGM_FI_DEV_XID_ERRORS has no error",
-			fieldValue:  0,
-			expectedErr: xidErrCodeToText[0],
-		},
-		{
-			name:        "when DCGM_FI_DEV_XID_ERRORS has known value",
-			fieldValue:  42,
-			expectedErr: xidErrCodeToText[42],
-		},
-		{
-			name:        "when DCGM_FI_DEV_XID_ERRORS has unknown value",
-			fieldValue:  255,
-			expectedErr: unknownErr,
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(tc.name, func(t *testing.T) {
-			fieldValue := [4096]byte{}
-			fieldValue[0] = tc.fieldValue
-			values := []dcgm.FieldValue_v1{
-				{
-					FieldId:   dcgm.DCGM_FI_DEV_XID_ERRORS,
-					FieldType: dcgm.DCGM_FT_INT64,
-					Value:     fieldValue,
-				},
-			}
-
-			metrics := make(map[Counter][]Metric)
-			ToMetric(metrics, values, c, d, instanceInfo, false, "", false)
-			assert.Len(t, metrics, 1)
-			// We get metric value with 0 index
-			metricValues := metrics[reflect.ValueOf(metrics).MapKeys()[0].Interface().(Counter)]
-			assert.Equal(t, fmt.Sprint(tc.fieldValue), metricValues[0].Value)
-			assert.Contains(t, metricValues[0].Attributes, "err_code")
-			assert.Equal(t, fmt.Sprint(tc.fieldValue), metricValues[0].Attributes["err_code"])
-			assert.Contains(t, metricValues[0].Attributes, "err_msg")
-			assert.Equal(t, tc.expectedErr, metricValues[0].Attributes["err_msg"])
-
-			assert.Equal(t, d.UUID, metricValues[0].GPUUUID)
-			assert.Equal(t, d.PCI.BusID, metricValues[0].GPUPCIBusID)
-		})
-	}
-}
-
-func TestGPUCollector_GetMetrics(t *testing.T) {
-	teardownTest := setupTest(t)
-	defer teardownTest(t)
-
-	runOnlyWithLiveGPUs(t)
-	// Create fake GPU
-	numGPUs, err := dcgm.GetAllDeviceCount()
-	require.NoError(t, err)
-
-	if numGPUs+1 > dcgm.MAX_NUM_DEVICES {
-		t.Skipf("Unable to add fake GPU with more than %d gpus", dcgm.MAX_NUM_DEVICES)
-	}
-
-	entityList := []dcgm.MigHierarchyInfo{
-		{Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU}},
-		{Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU}},
-		{Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU}},
-	}
-
-	gpuIDs, err := dcgm.CreateFakeEntities(entityList)
-	require.NoError(t, err)
-	require.NotEmpty(t, gpuIDs)
-
-	numGPUs, err = dcgm.GetAllDeviceCount()
-	require.NoError(t, err)
-
-	counters := []Counter{
-		{
-			FieldID:   100,
-			FieldName: "DCGM_FI_DEV_SM_CLOCK",
-			PromType:  "gauge",
-			Help:      "SM clock frequency (in MHz).",
-		},
-	}
-
-	dOpt := DeviceOptions{
-		Flex:       true,
-		MajorRange: []int{-1},
-		MinorRange: []int{-1},
-	}
-	config := Config{
-		GPUDevices:      dOpt,
-		NoHostname:      false,
-		UseOldNamespace: false,
-		UseFakeGPUs:     false,
-	}
-
-	fieldEntityGroupTypeSystemInfo := NewEntityGroupTypeSystemInfo(counters, &config)
-	err = fieldEntityGroupTypeSystemInfo.Load(dcgm.FE_GPU)
-	require.NoError(t, err)
-
-	gpuItem, exists := fieldEntityGroupTypeSystemInfo.Get(dcgm.FE_GPU)
-	require.True(t, exists)
-
-	c, cleanup, err := NewDCGMCollector(counters, "", &config, gpuItem)
-	require.NoError(t, err)
-
-	defer cleanup()
-
-	out, err := c.GetMetrics()
-	require.NoError(t, err)
-	require.Len(t, out, 1)
-
-	values := out[counters[0]]
-
-	require.Equal(t, numGPUs, uint(len(values)))
-}
diff --git a/pkg/dcgmexporter/kubernetes_test.go b/pkg/dcgmexporter/kubernetes_test.go
deleted file mode 100644
index 3b48efe2..00000000
--- a/pkg/dcgmexporter/kubernetes_test.go
+++ /dev/null
@@ -1,337 +0,0 @@
-/*
- * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package dcgmexporter
-
-import (
-	"context"
-	"fmt"
-	"net"
-	"reflect"
-	"testing"
-	"time"
-
-	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-	"google.golang.org/grpc"
-	podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1alpha1"
-
-	"github.com/NVIDIA/dcgm-exporter/internal/pkg/nvmlprovider"
-	"github.com/NVIDIA/dcgm-exporter/internal/pkg/testutils"
-)
-
-func TestProcessPodMapper(t *testing.T) {
-	testutils.RequireLinux(t)
-
-	tmpDir, cleanup := CreateTmpDir(t)
-	defer cleanup()
-
-	cleanup, err := dcgm.Init(dcgm.Embedded)
-	require.NoError(t, err)
-	defer cleanup()
-
-	c, cleanup := testDCGMGPUCollector(t, sampleCounters)
-	defer cleanup()
-
-	out, err := c.GetMetrics()
-	require.NoError(t, err)
-
-	original := out
-
-	arbirtaryMetric := out[reflect.ValueOf(out).MapKeys()[0].Interface().(Counter)]
-
-	socketPath := tmpDir + "/kubelet.sock"
-	server := grpc.NewServer()
-	gpus := GetGPUUUIDs(arbirtaryMetric)
-	podresourcesapi.RegisterPodResourcesListerServer(server, NewPodResourcesMockServer(nvidiaResourceName, gpus))
-
-	cleanup = StartMockServer(t, server, socketPath)
-	defer cleanup()
-
-	podMapper, err := NewPodMapper(&Config{KubernetesGPUIdType: GPUUID, PodResourcesKubeletSocket: socketPath})
-	require.NoError(t, err)
-	var sysInfo SystemInfo
-	err = podMapper.Process(out, sysInfo)
-	require.NoError(t, err)
-
-	require.Len(t, out, len(original))
-	for _, metrics := range out {
-		for _, metric := range metrics {
-			require.Contains(t, metric.Attributes, podAttribute)
-			require.Contains(t, metric.Attributes, namespaceAttribute)
-			require.Contains(t, metric.Attributes, containerAttribute)
-			require.Equal(t, metric.Attributes[podAttribute], fmt.Sprintf("gpu-pod-%s", metric.GPU))
-			require.Equal(t, metric.Attributes[namespaceAttribute], "default")
-			require.Equal(t, metric.Attributes[containerAttribute], "default")
-		}
-	}
-}
-
-func GetGPUUUIDs(metrics []Metric) []string {
-	gpus := make([]string, len(metrics))
-	for i, dev := range metrics {
-		gpus[i] = dev.GPUUUID
-	}
-
-	return gpus
-}
-
-func StartMockServer(t *testing.T, server *grpc.Server, socket string) func() {
-	l, err := net.Listen("unix", socket)
-	require.NoError(t, err)
-
-	stopped := make(chan interface{})
-
-	go func() {
-		err := server.Serve(l)
-		assert.NoError(t, err)
-		close(stopped)
-	}()
-
-	return func() {
-		server.Stop()
-		select {
-		case <-stopped:
-			return
-		case <-time.After(1 * time.Second):
-			t.Fatal("Failed waiting for gRPC server to stop.")
-		}
-	}
-}
-
-func CreateTmpDir(t *testing.T) (string, func()) {
-	path, err := os.MkdirTemp("", "dcgm-exporter")
-	require.NoError(t, err)
-
-	return path, func() {
-		require.NoError(t, os.RemoveAll(path))
-	}
-}
-
-// Contains a list of UUIDs
-type PodResourcesMockServer struct {
-	resourceName string
-	gpus         []string
-}
-
-func NewPodResourcesMockServer(resourceName string, gpus []string) *PodResourcesMockServer {
-	return &PodResourcesMockServer{
-		resourceName: resourceName,
-		gpus:         gpus,
-	}
-}
-
-func (s *PodResourcesMockServer) List(
-	ctx context.Context, req *podresourcesapi.ListPodResourcesRequest,
-) (*podresourcesapi.ListPodResourcesResponse, error) {
-	podResources := make([]*podresourcesapi.PodResources, len(s.gpus))
-
-	for i, gpu := range s.gpus {
-		podResources[i] = &podresourcesapi.PodResources{
-			Name:      fmt.Sprintf("gpu-pod-%d", i),
-			Namespace: "default",
-			Containers: []*podresourcesapi.ContainerResources{
-				{
-					Name: "default",
-					Devices: []*podresourcesapi.ContainerDevices{
-						{
-							ResourceName: s.resourceName,
-							DeviceIds:    []string{gpu},
-						},
-					},
-				},
-			},
-		}
-	}
-
-	return &podresourcesapi.ListPodResourcesResponse{
-		PodResources: podResources,
-	}, nil
-}
-
-func TestProcessPodMapper_WithD_Different_Format_Of_DeviceID(t *testing.T) {
-	testutils.RequireLinux(t)
-
-	type TestCase struct {
-		KubernetesGPUIDType KubernetesGPUIDType
-		GPUInstanceID       uint
-		ResourceName        string
-		MetricGPUID         string
-		MetricGPUDevice     string
-		MetricMigProfile    string
-		PODGPUID            string
-		NvidiaResourceNames []string
-	}
-
-	testCases := []TestCase{
-		{
-			KubernetesGPUIDType: GPUUID,
-			ResourceName:        nvidiaResourceName,
-			MetricGPUID:         "b8ea3855-276c-c9cb-b366-c6fa655957c5",
-			PODGPUID:            "b8ea3855-276c-c9cb-b366-c6fa655957c5",
-		},
-		{
-			KubernetesGPUIDType: GPUUID,
-			ResourceName:        nvidiaResourceName,
-			MetricGPUID:         "MIG-b8ea3855-276c-c9cb-b366-c6fa655957c5",
-			PODGPUID:            "MIG-b8ea3855-276c-c9cb-b366-c6fa655957c5",
-			MetricMigProfile:    "",
-		},
-		{
-			KubernetesGPUIDType: GPUUID,
-			ResourceName:        nvidiaResourceName,
-			GPUInstanceID:       3,
-			MetricGPUID:         "b8ea3855-276c-c9cb-b366-c6fa655957c5",
-			MetricMigProfile:    "",
-			PODGPUID:            "MIG-b8ea3855-276c-c9cb-b366-c6fa655957c5",
-		},
-		{
-			KubernetesGPUIDType: DeviceName,
-			ResourceName:        nvidiaResourceName,
-			GPUInstanceID:       3,
-			MetricMigProfile:    "mig",
-			PODGPUID:            "MIG-b8ea3855-276c-c9cb-b366-c6fa655957c5",
-		},
-		{
-			KubernetesGPUIDType: DeviceName,
-			ResourceName:        nvidiaResourceName,
-			MetricMigProfile:    "mig",
-			PODGPUID:            "nvidia0/gi0",
-		},
-		{
-			KubernetesGPUIDType: DeviceName,
-			ResourceName:        nvidiaResourceName,
-			MetricGPUDevice:     "0",
-			PODGPUID:            "0/vgpu",
-		},
-		{
-			KubernetesGPUIDType: GPUUID,
-			ResourceName:        nvidiaResourceName,
-			MetricGPUID:         "b8ea3855-276c-c9cb-b366-c6fa655957c5",
-			PODGPUID:            "b8ea3855-276c-c9cb-b366-c6fa655957c5::",
-		},
-		{
-			KubernetesGPUIDType: GPUUID,
-			ResourceName:        "nvidia.com/mig-1g.10gb",
-			MetricMigProfile:    "1g.10gb",
-			MetricGPUID:         "MIG-b8ea3855-276c-c9cb-b366-c6fa655957c5",
-			PODGPUID:            "MIG-b8ea3855-276c-c9cb-b366-c6fa655957c5",
-			MetricGPUDevice:     "0",
-			GPUInstanceID:       3,
-		},
-		{
-			KubernetesGPUIDType: GPUUID,
-			ResourceName:        "nvidia.com/a100",
-			MetricGPUID:         "b8ea3855-276c-c9cb-b366-c6fa655957c5",
-			PODGPUID:            "b8ea3855-276c-c9cb-b366-c6fa655957c5",
-			NvidiaResourceNames: []string{"nvidia.com/a100"},
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(fmt.Sprintf("when type %s, pod device id %s metric device id %s and gpu device %s",
-			tc.KubernetesGPUIDType,
-			tc.PODGPUID,
-			tc.MetricGPUID,
-			tc.MetricGPUDevice,
-		),
-			func(t *testing.T) {
-				tmpDir, cleanup := CreateTmpDir(t)
-				defer cleanup()
-				socketPath := tmpDir + "/kubelet.sock"
-				server := grpc.NewServer()
-
-				cleanup, err := dcgm.Init(dcgm.Embedded)
-				require.NoError(t, err)
-				defer cleanup()
-
-				gpus := []string{tc.PODGPUID}
-				podresourcesapi.RegisterPodResourcesListerServer(server, NewPodResourcesMockServer(tc.ResourceName, gpus))
-
-				cleanup = StartMockServer(t, server, socketPath)
-				defer cleanup()
-
-				nvmlGetMIGDeviceInfoByIDHook = func(uuid string) (*nvmlprovider.MIGDeviceInfo, error) {
-					return &nvmlprovider.MIGDeviceInfo{
-						ParentUUID:        "00000000-0000-0000-0000-000000000000",
-						GPUInstanceID:     3,
-						ComputeInstanceID: 0,
-					}, nil
-				}
-
-				defer func() {
-					nvmlGetMIGDeviceInfoByIDHook = nvmlprovider.GetMIGDeviceInfoByID
-				}()
-
-				podMapper, err := NewPodMapper(&Config{
-					KubernetesGPUIdType:       tc.KubernetesGPUIDType,
-					PodResourcesKubeletSocket: socketPath,
-					NvidiaResourceNames:       tc.NvidiaResourceNames,
-				})
-				require.NoError(t, err)
-				require.NotNil(t, podMapper)
-				metrics := MetricsByCounter{}
-				counter := Counter{
-					FieldID:   155,
-					FieldName: "DCGM_FI_DEV_POWER_USAGE",
-					PromType:  "gauge",
-				}
-
-				metrics[counter] = append(metrics[counter], Metric{
-					GPU:           "0",
-					GPUUUID:       tc.MetricGPUID,
-					GPUDevice:     tc.MetricGPUDevice,
-					GPUInstanceID: fmt.Sprint(tc.GPUInstanceID),
-					Value:         "42",
-					MigProfile:    tc.MetricMigProfile,
-					Counter: Counter{
-						FieldID:   155,
-						FieldName: "DCGM_FI_DEV_POWER_USAGE",
-						PromType:  "gauge",
-					},
-					Attributes: map[string]string{},
-				})
-
-				sysInfo := SystemInfo{
-					GPUCount: 1,
-					GPUs: [dcgm.MAX_NUM_DEVICES]GPUInfo{
-						{
-							DeviceInfo: dcgm.Device{
-								UUID: "00000000-0000-0000-0000-000000000000",
-								GPU:  0,
-							},
-							MigEnabled: true,
-						},
-					},
-				}
-				err = podMapper.Process(metrics, sysInfo)
-				require.NoError(t, err)
-				assert.Len(t, metrics, 1)
-				for _, metric := range metrics[reflect.ValueOf(metrics).MapKeys()[0].Interface().(Counter)] {
-					require.Contains(t, metric.Attributes, podAttribute)
-					require.Contains(t, metric.Attributes, namespaceAttribute)
-					require.Contains(t, metric.Attributes, containerAttribute)
-
-					// TODO currently we rely on ordering and implicit expectations of the mock implementation
-					// This should be a table comparison
-					require.Equal(t, fmt.Sprintf("gpu-pod-%d", 0), metric.Attributes[podAttribute])
-					require.Equal(t, "default", metric.Attributes[namespaceAttribute])
-					require.Equal(t, "default", metric.Attributes[containerAttribute])
-				}
-			})
-	}
-}
diff --git a/pkg/dcgmexporter/pipeline.go b/pkg/dcgmexporter/pipeline.go
deleted file mode 100644
index fd4b25c0..00000000
--- a/pkg/dcgmexporter/pipeline.go
+++ /dev/null
@@ -1,377 +0,0 @@
-/*
- * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package dcgmexporter
-
-import (
-	"bytes"
-	"fmt"
-	"sync"
-	"text/template"
-	"time"
-
-	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
-	"github.com/sirupsen/logrus"
-)
-
-func NewMetricsPipeline(config *Config,
-	counters []Counter,
-	hostname string,
-	newDCGMCollector DCGMCollectorConstructor,
-	fieldEntityGroupTypeSystemInfo *FieldEntityGroupTypeSystemInfo,
-) (*MetricsPipeline, func(), error) {
-	logrus.WithField(LoggerDumpKey, fmt.Sprintf("%+v", counters)).Debug("Counters are initialized")
-
-	cleanups := []func(){}
-
-	var (
-		gpuCollector    *DCGMCollector
-		switchCollector *DCGMCollector
-		linkCollector   *DCGMCollector
-		cpuCollector    *DCGMCollector
-		coreCollector   *DCGMCollector
-		err             error
-	)
-
-	if item, exists := fieldEntityGroupTypeSystemInfo.Get(dcgm.FE_GPU); exists {
-		var cleanup func()
-		gpuCollector, cleanup, err = newDCGMCollector(counters, hostname, config, item)
-		if err != nil {
-			logrus.Warn("Cannot create DCGMCollector for dcgm.FE_GPU")
-		}
-		cleanups = append(cleanups, cleanup)
-	}
-
-	if item, exists := fieldEntityGroupTypeSystemInfo.Get(dcgm.FE_SWITCH); exists {
-		var cleanup func()
-		switchCollector, cleanup, err = newDCGMCollector(counters, hostname, config, item)
-		if err != nil {
-			logrus.Warn("Cannot create DCGMCollector for dcgm.FE_SWITCH")
-		}
-		cleanups = append(cleanups, cleanup)
-	}
-
-	if item, exists := fieldEntityGroupTypeSystemInfo.Get(dcgm.FE_LINK); exists {
-		var cleanup func()
-		linkCollector, cleanup, err = newDCGMCollector(counters, hostname, config, item)
-		if err != nil {
-			logrus.Warn("Cannot create DCGMCollector for dcgm.FE_LINK")
-		}
-		cleanups = append(cleanups, cleanup)
-	}
-
-	if item, exists := fieldEntityGroupTypeSystemInfo.Get(dcgm.FE_CPU); exists {
-		var cleanup func()
-		cpuCollector, cleanup, err = newDCGMCollector(counters, hostname, config, item)
-		if err != nil {
-			logrus.Warn("Cannot create DCGMCollector for dcgm.FE_CPU")
-		}
-		cleanups = append(cleanups, cleanup)
-	}
-
-	if item, exists := fieldEntityGroupTypeSystemInfo.Get(dcgm.FE_CPU_CORE); exists {
-		var cleanup func()
-		coreCollector, cleanup, err = newDCGMCollector(counters, hostname, config, item)
-		if err != nil {
-			logrus.Warn("Cannot create DCGMCollector for dcgm.FE_CPU_CORE")
-		}
-		cleanups = append(cleanups, cleanup)
-	}
-
-	transformations := getTransformations(config)
-
-	return &MetricsPipeline{
-			config: config,
-
-			migMetricsFormat:     template.Must(template.New("migMetrics").Parse(migMetricsFormat)),
-			switchMetricsFormat:  template.Must(template.New("switchMetrics").Parse(switchMetricsFormat)),
-			linkMetricsFormat:    template.Must(template.New("switchMetrics").Parse(linkMetricsFormat)),
-			cpuMetricsFormat:     template.Must(template.New("cpuMetrics").Parse(cpuMetricsFormat)),
-			cpuCoreMetricsFormat: template.Must(template.New("cpuMetrics").Parse(cpuCoreMetricsFormat)),
-
-			counters:        counters,
-			gpuCollector:    gpuCollector,
-			switchCollector: switchCollector,
-			linkCollector:   linkCollector,
-			transformations: transformations,
-			cpuCollector:    cpuCollector,
-			coreCollector:   coreCollector,
-		}, func() {
-			for _, cleanup := range cleanups {
-				cleanup()
-			}
-		}, nil
-}
-
-func getTransformations(c *Config) []Transform {
-	transformations := []Transform{}
-	if c.Kubernetes {
-		podMapper, err := NewPodMapper(c)
-		if err != nil {
-			logrus.Warnf("Could not enable kubernetes metric collection: %v", err)
-		} else {
-			transformations = append(transformations, podMapper)
-		}
-	}
-
-	if c.HPCJobMappingDir != "" {
-		hpcMapper := newHPCMapper(c)
-		transformations = append(transformations, hpcMapper)
-	}
-
-	return transformations
-}
-
-// Primarely for testing, caller expected to cleanup the collector
-func NewMetricsPipelineWithGPUCollector(c *Config, collector *DCGMCollector) (*MetricsPipeline, func(), error) {
-	return &MetricsPipeline{
-		config: c,
-
-		migMetricsFormat:     template.Must(template.New("migMetrics").Parse(migMetricsFormat)),
-		switchMetricsFormat:  template.Must(template.New("switchMetrics").Parse(switchMetricsFormat)),
-		linkMetricsFormat:    template.Must(template.New("switchMetrics").Parse(linkMetricsFormat)),
-		cpuMetricsFormat:     template.Must(template.New("cpuMetrics").Parse(cpuMetricsFormat)),
-		cpuCoreMetricsFormat: template.Must(template.New("cpuMetrics").Parse(cpuCoreMetricsFormat)),
-
-		counters:     collector.Counters,
-		gpuCollector: collector,
-	}, func() {}, nil
-}
-
-func (m *MetricsPipeline) Run(out chan string, stop chan interface{}, wg *sync.WaitGroup) {
-	defer wg.Done()
-
-	logrus.Info("Pipeline starting")
-
-	// Note we are using a ticker so that we can stick as close as possible to the collect interval.
-	// e.g: The CollectInterval is 10s and the transformation pipeline takes 5s, the time will
-	// ensure we really collect metrics every 10s by firing an event 5s after the run function completes.
-	t := time.NewTicker(time.Millisecond * time.Duration(m.config.CollectInterval))
-	defer t.Stop()
-
-	for {
-		select {
-		case <-stop:
-			return
-		case <-t.C:
-			o, err := m.run()
-			if err != nil {
-				logrus.Errorf("Failed to collect metrics; err: %v", err)
-				/* flush output rather than output stale data */
-				out <- ""
-				continue
-			}
-
-			if len(out) == cap(out) {
-				logrus.Errorf("Channel is full skipping.")
-			} else {
-				out <- o
-			}
-		}
-	}
-}
-
-func (m *MetricsPipeline) run() (string, error) {
-	var metrics map[Counter][]Metric
-	var err error
-	var formatted string
-
-	if m.gpuCollector != nil {
-		/* Collect GPU Metrics */
-		metrics, err = m.gpuCollector.GetMetrics()
-		if err != nil {
-			return "", fmt.Errorf("failed to collect gpu metrics; err: %w", err)
-		}
-
-		for _, transform := range m.transformations {
-			err := transform.Process(metrics, m.gpuCollector.SysInfo)
-			if err != nil {
-				return "", fmt.Errorf("failed to transform metrics for transform '%s'; err: %w", transform.Name(), err)
-			}
-		}
-
-		formatted, err = FormatMetrics(m.migMetricsFormat, metrics)
-		if err != nil {
-			return "", fmt.Errorf("failed to format metrics; err: %w", err)
-		}
-	}
-
-	if m.switchCollector != nil {
-		/* Collect Switch Metrics */
-		metrics, err = m.switchCollector.GetMetrics()
-		if err != nil {
-			return "", fmt.Errorf("failed to collect switch metrics; err: %w", err)
-		}
-
-		if len(metrics) > 0 {
-			switchFormatted, err := FormatMetrics(m.switchMetricsFormat, metrics)
-			if err != nil {
-				logrus.Warnf("Failed to format switch metrics with error: %v", err)
-			}
-
-			formatted = formatted + switchFormatted
-		}
-	}
-
-	if m.linkCollector != nil {
-		/* Collect Link Metrics */
-		metrics, err = m.linkCollector.GetMetrics()
-		if err != nil {
-			return "", fmt.Errorf("failed to collect link metrics; err: %w", err)
-		}
-
-		if len(metrics) > 0 {
-			switchFormatted, err := FormatMetrics(m.linkMetricsFormat, metrics)
-			if err != nil {
-				logrus.Warnf("failed to format link metrics; err: %v", err)
-			}
-
-			formatted = formatted + switchFormatted
-		}
-	}
-
-	if m.cpuCollector != nil {
-		/* Collect CPU Metrics */
-		metrics, err = m.cpuCollector.GetMetrics()
-		if err != nil {
-			return "", fmt.Errorf("failed to collect CPU metrics; err: %w", err)
-		}
-
-		if len(metrics) > 0 {
-			cpuFormatted, err := FormatMetrics(m.cpuMetricsFormat, metrics)
-			if err != nil {
-				logrus.Warnf("Failed to format cpu metrics with error: %v", err)
-			}
-
-			formatted = formatted + cpuFormatted
-		}
-	}
-
-	if m.coreCollector != nil {
-		/* Collect cpu core Metrics */
-		metrics, err = m.coreCollector.GetMetrics()
-		if err != nil {
-			return "", fmt.Errorf("failed to collect CPU core metrics; err: %w", err)
-		}
-
-		if len(metrics) > 0 {
-			coreFormatted, err := FormatMetrics(m.cpuCoreMetricsFormat, metrics)
-			if err != nil {
-				logrus.Warnf("failed to format cpu core metrics; err: %v", err)
-			}
-
-			formatted = formatted + coreFormatted
-		}
-	}
-
-	return formatted, nil
-}
-
-/*
-* The goal here is to get to the following format:
-* ```
-* # HELP FIELD_ID HELP_MSG
-* # TYPE FIELD_ID PROM_TYPE
-* FIELD_ID{gpu="GPU_INDEX_0",uuid="GPU_UUID", attr...} VALUE
-* FIELD_ID{gpu="GPU_INDEX_N",uuid="GPU_UUID", attr...} VALUE
-* ...
-* ```
- */
-
-var migMetricsFormat = `
-{{- range $counter, $metrics := . -}}
-# HELP {{ $counter.FieldName }} {{ $counter.Help }}
-# TYPE {{ $counter.FieldName }} {{ $counter.PromType }}
-{{- range $metric := $metrics }}
-{{ $counter.FieldName }}{gpu="{{ $metric.GPU }}",{{ $metric.UUID }}="{{ $metric.GPUUUID }}",pci_bus_id="{{ $metric.GPUPCIBusID }}",device="{{ $metric.GPUDevice }}",modelName="{{ $metric.GPUModelName }}"{{if $metric.MigProfile}},GPU_I_PROFILE="{{ $metric.MigProfile }}",GPU_I_ID="{{ $metric.GPUInstanceID }}"{{end}}{{if $metric.Hostname }},Hostname="{{ $metric.Hostname }}"{{end}}
-
-{{- range $k, $v := $metric.Labels -}}
-	,{{ $k }}="{{ $v }}"
-{{- end -}}
-{{- range $k, $v := $metric.Attributes -}}
-	,{{ $k }}="{{ $v }}"
-{{- end -}}
-
-} {{ $metric.Value -}}
-{{- end }}
-{{ end }}`
-
-var switchMetricsFormat = `
-{{- range $counter, $metrics := . -}}
-# HELP {{ $counter.FieldName }} {{ $counter.Help }}
-# TYPE {{ $counter.FieldName }} {{ $counter.PromType }}
-{{- range $metric := $metrics }}
-{{ $counter.FieldName }}{nvswitch="{{ $metric.GPU }}"{{if $metric.Hostname }},Hostname="{{ $metric.Hostname }}"{{end}}
-
-{{- range $k, $v := $metric.Labels -}}
-	,{{ $k }}="{{ $v }}"
-{{- end -}}
-} {{ $metric.Value -}}
-{{- end }}
-{{ end }}`
-
-var linkMetricsFormat = `
-{{- range $counter, $metrics := . -}}
-# HELP {{ $counter.FieldName }} {{ $counter.Help }}
-# TYPE {{ $counter.FieldName }} {{ $counter.PromType }}
-{{- range $metric := $metrics }}
-{{ $counter.FieldName }}{nvlink="{{ $metric.GPU }}",nvswitch="{{ $metric.GPUDevice }}"{{if $metric.Hostname }},Hostname="{{ $metric.Hostname }}"{{end}}
-
-{{- range $k, $v := $metric.Labels -}}
-	,{{ $k }}="{{ $v }}"
-{{- end -}}
-} {{ $metric.Value -}}
-{{- end }}
-{{ end }}`
-
-var cpuMetricsFormat = `
-{{- range $counter, $metrics := . -}}
-# HELP {{ $counter.FieldName }} {{ $counter.Help }}
-# TYPE {{ $counter.FieldName }} {{ $counter.PromType }}
-{{- range $metric := $metrics }}
-{{ $counter.FieldName }}{cpu="{{ $metric.GPU }}"{{if $metric.Hostname }},Hostname="{{ $metric.Hostname }}"{{end}}
-
-{{- range $k, $v := $metric.Labels -}}
-	,{{ $k }}="{{ $v }}"
-{{- end -}}
-} {{ $metric.Value -}}
-{{- end }}
-{{ end }}`
-
-var cpuCoreMetricsFormat = `
-{{- range $counter, $metrics := . -}}
-# HELP {{ $counter.FieldName }} {{ $counter.Help }}
-# TYPE {{ $counter.FieldName }} {{ $counter.PromType }}
-{{- range $metric := $metrics }}
-{{ $counter.FieldName }}{cpucore="{{ $metric.GPU }}",cpu="{{ $metric.GPUDevice }}"{{if $metric.Hostname }},Hostname="{{ $metric.Hostname }}"{{end}}
-
-{{- range $k, $v := $metric.Labels -}}
-	,{{ $k }}="{{ $v }}"
-{{- end -}}
-} {{ $metric.Value -}}
-{{- end }}
-{{ end }}`
-
-// FormatMetrics Template is passed here so that it isn't recompiled at each iteration
-func FormatMetrics(t *template.Template, groupedMetrics MetricsByCounter) (string, error) {
-	// Format metrics
-	var res bytes.Buffer
-	if err := t.Execute(&res, groupedMetrics); err != nil {
-		return "", err
-	}
-
-	return res.String(), nil
-}
diff --git a/pkg/dcgmexporter/pipeline_test.go b/pkg/dcgmexporter/pipeline_test.go
deleted file mode 100644
index f9385eda..00000000
--- a/pkg/dcgmexporter/pipeline_test.go
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package dcgmexporter
-
-import (
-	"errors"
-	"testing"
-
-	"github.com/sirupsen/logrus"
-	"github.com/stretchr/testify/assert"
-
-	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
-	"github.com/stretchr/testify/require"
-)
-
-func TestRun(t *testing.T) {
-	cleanup, err := dcgm.Init(dcgm.Embedded)
-	require.NoError(t, err)
-	defer cleanup()
-
-	c, cleanup := testDCGMGPUCollector(t, sampleCounters)
-	defer cleanup()
-
-	p, cleanup, err := NewMetricsPipelineWithGPUCollector(&Config{}, c)
-	require.NoError(t, err)
-	defer cleanup()
-	require.NoError(t, err)
-
-	out, err := p.run()
-	require.NoError(t, err)
-	require.NotEmpty(t, out)
-
-	// Note it is pretty difficult to make non superficial tests without
-	// writting a full blown parser, always look at the results
-	// We'll be testing them more throughly in the e2e tests (e.g: by running prometheus).
-	t.Logf("Pipeline result is:\n%v", out)
-}
-
-func testNewDCGMCollector(t *testing.T,
-	counter *int, enabledCollector map[dcgm.Field_Entity_Group]struct{},
-) DCGMCollectorConstructor {
-	t.Helper()
-	return func(c []Counter,
-		hostname string,
-		config *Config,
-		fieldEntityGroupTypeSystemInfo FieldEntityGroupTypeSystemInfoItem,
-	) (*DCGMCollector, func(), error) {
-		// should always create GPU Collector
-		if fieldEntityGroupTypeSystemInfo.SystemInfo.InfoType != dcgm.FE_GPU {
-			if _, ok := enabledCollector[fieldEntityGroupTypeSystemInfo.SystemInfo.InfoType]; !ok {
-				t.Errorf("collector '%s' should not be created", fieldEntityGroupTypeSystemInfo.SystemInfo.InfoType)
-				return nil, func() {}, nil
-			}
-		}
-
-		collector := &DCGMCollector{}
-		cleanups := []func(){
-			func() {
-				*counter++
-			},
-		}
-		collector.Cleanups = cleanups
-
-		return collector, func() { collector.Cleanup() }, nil
-	}
-}
-
-func TestCountPipelineCleanup(t *testing.T) {
-	f, err := os.CreateTemp("", "empty.*.csv")
-	require.NoError(t, err)
-	defer os.Remove(f.Name())
-	defer f.Close()
-
-	for _, c := range []struct {
-		name             string
-		enabledCollector map[dcgm.Field_Entity_Group]struct{}
-	}{{
-		name: "only_gpu",
-		enabledCollector: map[dcgm.Field_Entity_Group]struct{}{
-			dcgm.FE_GPU: {},
-		},
-	}, {
-		name: "gpu_switch",
-		enabledCollector: map[dcgm.Field_Entity_Group]struct{}{
-			dcgm.FE_SWITCH: {},
-		},
-	}, {
-		name: "gpu_link",
-		enabledCollector: map[dcgm.Field_Entity_Group]struct{}{
-			dcgm.FE_LINK: {},
-		},
-	}, {
-		name: "gpu_cpu",
-		enabledCollector: map[dcgm.Field_Entity_Group]struct{}{
-			dcgm.FE_CPU: {},
-		},
-	}, {
-		name: "gpu_core",
-		enabledCollector: map[dcgm.Field_Entity_Group]struct{}{
-			dcgm.FE_CPU_CORE: {},
-		},
-	}, {
-		name: "gpu_switch_link",
-		enabledCollector: map[dcgm.Field_Entity_Group]struct{}{
-			dcgm.FE_SWITCH: {},
-			dcgm.FE_LINK:   {},
-		},
-	}, {
-		name: "gpu_cpu_core",
-		enabledCollector: map[dcgm.Field_Entity_Group]struct{}{
-			dcgm.FE_CPU:      {},
-			dcgm.FE_CPU_CORE: {},
-		},
-	}, {
-		name: "all",
-		enabledCollector: map[dcgm.Field_Entity_Group]struct{}{
-			dcgm.FE_SWITCH:   {},
-			dcgm.FE_LINK:     {},
-			dcgm.FE_CPU:      {},
-			dcgm.FE_CPU_CORE: {},
-		},
-	}} {
-		t.Run(c.name, func(t *testing.T) {
-			cleanupCounter := 0
-
-			config := &Config{
-				Kubernetes:     false,
-				ConfigMapData:  undefinedConfigMapData,
-				CollectorsFile: f.Name(),
-			}
-
-			cc, err := GetCounterSet(config)
-			if err != nil {
-				logrus.Fatal(err)
-			}
-
-			fieldEntityGroupTypeSystemInfo := NewEntityGroupTypeSystemInfo(cc.DCGMCounters, config)
-
-			for egt := range c.enabledCollector {
-				// We inject system info for unit test purpose
-				fieldEntityGroupTypeSystemInfo.items[egt] = FieldEntityGroupTypeSystemInfoItem{
-					SystemInfo: SystemInfo{
-						InfoType: egt,
-					},
-				}
-			}
-
-			_, cleanup, err := NewMetricsPipeline(config,
-				cc.DCGMCounters,
-				"",
-				testNewDCGMCollector(t, &cleanupCounter, c.enabledCollector),
-				fieldEntityGroupTypeSystemInfo)
-			require.NoError(t, err, "case: %s failed", c.name)
-
-			cleanup()
-			require.Equal(t, len(c.enabledCollector), cleanupCounter, "case: %s failed", c.name)
-		})
-	}
-}
-
-func TestNewMetricsPipelineWhenFieldEntityGroupTypeSystemInfoItemIsEmpty(t *testing.T) {
-	cleanup, err := dcgm.Init(dcgm.Embedded)
-	require.NoError(t, err)
-	defer cleanup()
-
-	config := &Config{}
-
-	fieldEntityGroupTypeSystemInfo := &FieldEntityGroupTypeSystemInfo{
-		items: map[dcgm.Field_Entity_Group]FieldEntityGroupTypeSystemInfoItem{
-			dcgm.FE_GPU:      {},
-			dcgm.FE_SWITCH:   {},
-			dcgm.FE_LINK:     {},
-			dcgm.FE_CPU:      {},
-			dcgm.FE_CPU_CORE: {},
-		},
-	}
-
-	p, cleanup, err := NewMetricsPipeline(config,
-		sampleCounters,
-		"",
-		func(_ []Counter, _ string, _ *Config, item FieldEntityGroupTypeSystemInfoItem) (*DCGMCollector, func(), error) {
-			assert.True(t, item.isEmpty())
-			return nil, func() {}, errors.New("empty")
-		},
-		fieldEntityGroupTypeSystemInfo,
-	)
-	require.NoError(t, err)
-	defer cleanup()
-	require.NoError(t, err)
-
-	out, err := p.run()
-	require.NoError(t, err)
-	require.Empty(t, out)
-}
diff --git a/pkg/dcgmexporter/registry.go b/pkg/dcgmexporter/registry.go
deleted file mode 100644
index 3b62df4c..00000000
--- a/pkg/dcgmexporter/registry.go
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package dcgmexporter
-
-import (
-	"sync"
-
-	"golang.org/x/sync/errgroup"
-)
-
-type Registry struct {
-	collectors []Collector
-	mtx        sync.RWMutex
-}
-
-func NewRegistry() *Registry {
-	return &Registry{
-		collectors: make([]Collector, 0),
-	}
-}
-
-// Register registers a collector with the registry.
-func (r *Registry) Register(c Collector) {
-	r.collectors = append(r.collectors, c)
-}
-
-// Gather gathers metrics from all registered collectors.
-func (r *Registry) Gather() (MetricsByCounter, error) {
-	r.mtx.Lock()
-	defer r.mtx.Unlock()
-
-	var wg sync.WaitGroup
-	wg.Add(len(r.collectors))
-
-	g := new(errgroup.Group)
-
-	var sm sync.Map
-
-	for _, c := range r.collectors {
-		c := c //creates new c, see https://golang.org/doc/faq#closures_and_goroutines
-		g.Go(func() error {
-			metrics, err := c.GetMetrics()
-
-			if err != nil {
-				return err
-			}
-
-			for counter, metricVals := range metrics {
-				val, _ := sm.LoadOrStore(counter, []Metric{})
-				out := val.([]Metric)
-				out = append(out, metricVals...)
-				sm.Store(counter, out)
-			}
-
-			return nil
-		})
-	}
-
-	if err := g.Wait(); err != nil {
-		return nil, err
-	}
-
-	output := MetricsByCounter{}
-
-	sm.Range(func(key, value interface{}) bool {
-		output[key.(Counter)] = value.([]Metric)
-		return true // continue iteration
-	})
-
-	return output, nil
-}
-
-// Cleanup resources of registered collectors
-func (r *Registry) Cleanup() {
-	for _, c := range r.collectors {
-		c.Cleanup()
-	}
-}
diff --git a/pkg/dcgmexporter/registry_test.go b/pkg/dcgmexporter/registry_test.go
deleted file mode 100644
index f7da1ccf..00000000
--- a/pkg/dcgmexporter/registry_test.go
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package dcgmexporter
-
-import (
-	"errors"
-	"testing"
-
-	"github.com/stretchr/testify/mock"
-	"github.com/stretchr/testify/require"
-)
-
-type mockCollector struct {
-	mock.Mock
-}
-
-func (m *mockCollector) GetMetrics() (MetricsByCounter, error) {
-	args := m.Called()
-	return args.Get(0).(MetricsByCounter), args.Error(1)
-}
-
-func (m *mockCollector) Cleanup() {
-	m.Called()
-}
-
-func TestRegistry_Gather(t *testing.T) {
-	collector := new(mockCollector)
-	reg := NewRegistry()
-
-	metrics := MetricsByCounter{}
-	counterA := Counter{
-		FieldID:   155,
-		FieldName: "DCGM_FI_DEV_POWER_USAGE",
-		PromType:  "gauge",
-	}
-	metrics[counterA] = append(metrics[counterA], Metric{
-		GPU:        "0",
-		Counter:    counterA,
-		Attributes: map[string]string{},
-	})
-
-	counterB := Counter{
-		FieldName: "DCGM_FI_EXP_CLOCK_THROTTLE_REASONS_COUNT",
-		PromType:  "gauge",
-	}
-
-	metrics[counterB] = append(metrics[counterB], Metric{
-		GPU:        "0",
-		Counter:    counterB,
-		Value:      "42",
-		Attributes: map[string]string{},
-	})
-
-	type test struct {
-		name           string
-		collectorState func() *mock.Call
-		assert         func(MetricsByCounter, error)
-	}
-
-	tests := []test{
-		{
-			name: "When collector return no errors",
-			collectorState: func() *mock.Call {
-				return collector.On("GetMetrics").Return(metrics, nil)
-			},
-			assert: func(mbc MetricsByCounter, err error) {
-				require.NoError(t, err)
-				require.Len(t, mbc, 2)
-			},
-		},
-		{
-			name: "When collector return errors",
-			collectorState: func() *mock.Call {
-				return collector.On("GetMetrics").Return(MetricsByCounter{}, errors.New("Boom!"))
-			},
-			assert: func(mbc MetricsByCounter, err error) {
-				require.Error(t, err)
-				require.Len(t, mbc, 0)
-			},
-		},
-	}
-
-	for _, tc := range tests {
-		t.Run(tc.name, func(t *testing.T) {
-			reg.collectors = nil
-			reg.Register(collector)
-			mockCall := tc.collectorState()
-			got, err := reg.Gather()
-			tc.assert(got, err)
-			mockCall.Unset()
-		})
-
-	}
-}
diff --git a/pkg/dcgmexporter/server.go b/pkg/dcgmexporter/server.go
deleted file mode 100644
index 5910094d..00000000
--- a/pkg/dcgmexporter/server.go
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package dcgmexporter
-
-import (
-	"context"
-	"net/http"
-	"sync"
-	"time"
-
-	"github.com/gorilla/mux"
-	"github.com/prometheus/exporter-toolkit/web"
-	"github.com/sirupsen/logrus"
-
-	"github.com/NVIDIA/dcgm-exporter/internal/pkg/logging"
-)
-
-func NewMetricsServer(c *Config, metrics chan string, registry *Registry) (*MetricsServer, func(), error) {
-	router := mux.NewRouter()
-	serverv1 := &MetricsServer{
-		server: &http.Server{
-			Addr:         c.Address,
-			Handler:      router,
-			ReadTimeout:  10 * time.Second,
-			WriteTimeout: 10 * time.Second,
-		},
-		webConfig: &web.FlagConfig{
-			WebListenAddresses: &[]string{c.Address},
-			WebSystemdSocket:   &c.WebSystemdSocket,
-			WebConfigFile:      &c.WebConfigFile,
-		},
-		metricsChan: metrics,
-		metrics:     "",
-		registry:    registry,
-	}
-
-	router.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
-		w.Header().Set("X-Content-Type-Options", "nosniff")
-		w.WriteHeader(http.StatusOK)
-		_, err := w.Write([]byte(`<html>
-			<head><title>GPU Exporter</title></head>
-			<body>
-			<h1>GPU Exporter</h1>
-			<p><a href="./metrics">Metrics</a></p>
-			</body>
-			</html>`))
-		if err != nil {
-			logrus.WithError(err).Error("Failed to write response.")
-			http.Error(w, "failed to write response", http.StatusInternalServerError)
-			return
-		}
-	})
-
-	router.HandleFunc("/health", serverv1.Health)
-	router.HandleFunc("/metrics", serverv1.Metrics)
-
-	return serverv1, func() {}, nil
-}
-
-func (s *MetricsServer) Run(stop chan interface{}, wg *sync.WaitGroup) {
-	defer wg.Done()
-	// Wrap the logrus logger with the LogrusAdapter
-	logger := logging.NewLogrusAdapter(logrus.StandardLogger())
-
-	var httpwg sync.WaitGroup
-	httpwg.Add(1)
-	go func() {
-		defer httpwg.Done()
-		logrus.Info("Starting webserver")
-		if err := web.ListenAndServe(s.server, s.webConfig, logger); err != nil && err != http.ErrServerClosed {
-			logrus.WithError(err).Fatal("Failed to Listen and Server HTTP server.")
-		}
-	}()
-
-	httpwg.Add(1)
-	go func() {
-		defer httpwg.Done()
-		for {
-			select {
-			case <-stop:
-				return
-			case m := <-s.metricsChan:
-				s.updateMetrics(m)
-			}
-		}
-	}()
-
-	<-stop
-	if err := s.server.Shutdown(context.Background()); err != nil {
-		logrus.WithError(err).Fatal("Failed to shutdown HTTP server.")
-	}
-
-	if err := WaitWithTimeout(&httpwg, 3*time.Second); err != nil {
-		logrus.WithError(err).Fatal("Failed waiting for HTTP server to shutdown.")
-	}
-}
-
-func (s *MetricsServer) Metrics(w http.ResponseWriter, r *http.Request) {
-	w.Header().Set("X-Content-Type-Options", "nosniff")
-	w.WriteHeader(http.StatusOK)
-	_, err := w.Write([]byte(s.getMetrics()))
-	if err != nil {
-		logrus.WithError(err).Error("Failed to write response.")
-		http.Error(w, "failed to write response", http.StatusInternalServerError)
-		return
-	}
-	metrics, err := s.registry.Gather()
-	if err != nil {
-		logrus.WithError(err).Error("Failed to write response.")
-		http.Error(w, "failed to write response", http.StatusInternalServerError)
-		return
-	}
-	err = encodeExpMetrics(w, metrics)
-	if err != nil {
-		http.Error(w, "failed to write response", http.StatusInternalServerError)
-		return
-	}
-}
-
-func (s *MetricsServer) Health(w http.ResponseWriter, r *http.Request) {
-	if s.getMetrics() == "" {
-		w.Header().Set("X-Content-Type-Options", "nosniff")
-		w.WriteHeader(http.StatusServiceUnavailable)
-		_, err := w.Write([]byte("KO"))
-		if err != nil {
-			logrus.WithError(err).Error("Failed to write response.")
-			http.Error(w, "failed to write response", http.StatusInternalServerError)
-		}
-	} else {
-		w.Header().Set("X-Content-Type-Options", "nosniff")
-		w.WriteHeader(http.StatusOK)
-		_, err := w.Write([]byte("OK"))
-		if err != nil {
-			logrus.WithError(err).Error("Failed to write response.")
-			http.Error(w, "failed to write response", http.StatusInternalServerError)
-		}
-	}
-}
-
-func (s *MetricsServer) updateMetrics(m string) {
-	s.Lock()
-	defer s.Unlock()
-
-	s.metrics = m
-}
-
-func (s *MetricsServer) getMetrics() string {
-	s.Lock()
-	defer s.Unlock()
-
-	return s.metrics
-}
diff --git a/pkg/dcgmexporter/system_info.go b/pkg/dcgmexporter/system_info.go
deleted file mode 100644
index 6d448828..00000000
--- a/pkg/dcgmexporter/system_info.go
+++ /dev/null
@@ -1,890 +0,0 @@
-/*
- * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package dcgmexporter
-
-import (
-	"fmt"
-	"math/rand"
-	"slices"
-	"strings"
-
-	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
-	"github.com/bits-and-blooms/bitset"
-	"github.com/sirupsen/logrus"
-)
-
-var (
-	dcgmGetAllDeviceCount       = dcgm.GetAllDeviceCount
-	dcgmGetDeviceInfo           = dcgm.GetDeviceInfo
-	dcgmGetGpuInstanceHierarchy = dcgm.GetGpuInstanceHierarchy
-	dcgmAddEntityToGroup        = dcgm.AddEntityToGroup
-	dcgmCreateGroup             = dcgm.CreateGroup
-	dcgmGetCpuHierarchy         = dcgm.GetCpuHierarchy
-)
-
-type ComputeInstanceInfo struct {
-	InstanceInfo dcgm.MigEntityInfo
-	ProfileName  string
-	EntityId     uint
-}
-
-type GPUInstanceInfo struct {
-	Info             dcgm.MigEntityInfo
-	ProfileName      string
-	EntityId         uint
-	ComputeInstances []ComputeInstanceInfo
-}
-
-type GPUInfo struct {
-	DeviceInfo   dcgm.Device
-	GPUInstances []GPUInstanceInfo
-	MigEnabled   bool
-}
-
-type SwitchInfo struct {
-	EntityId uint
-	NvLinks  []dcgm.NvLinkStatus
-}
-
-type CPUInfo struct {
-	EntityId uint
-	Cores    []uint
-}
-
-type SystemInfo struct {
-	GPUCount uint
-	GPUs     [dcgm.MAX_NUM_DEVICES]GPUInfo
-	gOpt     DeviceOptions
-	sOpt     DeviceOptions
-	cOpt     DeviceOptions
-	InfoType dcgm.Field_Entity_Group
-	Switches []SwitchInfo
-	CPUs     []CPUInfo
-}
-
-type MonitoringInfo struct {
-	Entity       dcgm.GroupEntityPair
-	DeviceInfo   dcgm.Device
-	InstanceInfo *GPUInstanceInfo
-	ParentId     uint
-}
-
-func SetGPUInstanceProfileName(sysInfo *SystemInfo, entityId uint, profileName string) bool {
-	for i := uint(0); i < sysInfo.GPUCount; i++ {
-		for j := range sysInfo.GPUs[i].GPUInstances {
-			if sysInfo.GPUs[i].GPUInstances[j].EntityId == entityId {
-				sysInfo.GPUs[i].GPUInstances[j].ProfileName = profileName
-				return true
-			}
-		}
-	}
-
-	return false
-}
-
-func SetMigProfileNames(sysInfo *SystemInfo, values []dcgm.FieldValue_v2) error {
-	var err error
-	var errFound bool
-	errStr := "cannot find match for entities:"
-
-	for _, v := range values {
-		if !SetGPUInstanceProfileName(sysInfo, v.EntityId, dcgm.Fv2_String(v)) {
-			errStr = fmt.Sprintf("%s group %d, id %d", errStr, v.EntityGroupId, v.EntityId)
-			errFound = true
-		}
-	}
-
-	if errFound {
-		err = fmt.Errorf("%s", errStr)
-	}
-
-	return err
-}
-
-func PopulateMigProfileNames(sysInfo *SystemInfo, entities []dcgm.GroupEntityPair) error {
-	if len(entities) == 0 {
-		// There are no entities to populate
-		return nil
-	}
-
-	var fields []dcgm.Short
-	fields = append(fields, dcgm.DCGM_FI_DEV_NAME)
-	flags := dcgm.DCGM_FV_FLAG_LIVE_DATA
-	values, err := dcgm.EntitiesGetLatestValues(entities, fields, flags)
-
-	if err != nil {
-		return err
-	}
-
-	return SetMigProfileNames(sysInfo, values)
-}
-
-func GPUIdExists(sysInfo *SystemInfo, gpuId int) bool {
-	for i := uint(0); i < sysInfo.GPUCount; i++ {
-		if sysInfo.GPUs[i].DeviceInfo.GPU == uint(gpuId) {
-			return true
-		}
-	}
-	return false
-}
-
-func SwitchIdExists(sysInfo *SystemInfo, switchId int) bool {
-	for _, sw := range sysInfo.Switches {
-		if sw.EntityId == uint(switchId) {
-			return true
-		}
-	}
-	return false
-}
-
-func CPUIdExists(sysInfo *SystemInfo, cpuId int) bool {
-	for _, cpu := range sysInfo.CPUs {
-		if cpu.EntityId == uint(cpuId) {
-			return true
-		}
-	}
-	return false
-}
-
-func GPUInstanceIdExists(sysInfo *SystemInfo, gpuInstanceId int) bool {
-	for i := uint(0); i < sysInfo.GPUCount; i++ {
-		for _, instance := range sysInfo.GPUs[i].GPUInstances {
-			if instance.EntityId == uint(gpuInstanceId) {
-				return true
-			}
-		}
-	}
-	return false
-}
-
-func LinkIdExists(sysInfo *SystemInfo, linkId int) bool {
-	for _, sw := range sysInfo.Switches {
-		for _, link := range sw.NvLinks {
-			if link.Index == uint(linkId) {
-				return true
-			}
-		}
-	}
-	return false
-}
-
-func CPUCoreIdExists(sysInfo *SystemInfo, coreId int) bool {
-	for _, cpu := range sysInfo.CPUs {
-		for _, core := range cpu.Cores {
-			if core == uint(coreId) {
-				return true
-			}
-		}
-	}
-	return false
-}
-
-func VerifyCPUDevicePresence(sysInfo *SystemInfo, sOpt DeviceOptions) error {
-	if sOpt.Flex {
-		return nil
-	}
-
-	if len(sOpt.MajorRange) > 0 && sOpt.MajorRange[0] != -1 {
-		// Verify we can find all the specified Switches
-		for _, cpuID := range sOpt.MajorRange {
-			if !SwitchIdExists(sysInfo, cpuID) {
-				return fmt.Errorf("couldn't find requested CPU ID '%d'", cpuID)
-			}
-		}
-	}
-
-	if len(sOpt.MinorRange) > 0 && sOpt.MinorRange[0] != -1 {
-		for _, coreID := range sOpt.MinorRange {
-			if !CPUCoreIdExists(sysInfo, coreID) {
-				return fmt.Errorf("couldn't find requested CPU core '%d'", coreID)
-			}
-		}
-	}
-
-	return nil
-}
-
-func VerifySwitchDevicePresence(sysInfo *SystemInfo, sOpt DeviceOptions) error {
-	if sOpt.Flex {
-		return nil
-	}
-
-	if len(sOpt.MajorRange) > 0 && sOpt.MajorRange[0] != -1 {
-		// Verify we can find all the specified Switches
-		for _, swID := range sOpt.MajorRange {
-			if !SwitchIdExists(sysInfo, swID) {
-				return fmt.Errorf("couldn't find requested NvSwitch ID '%d'", swID)
-			}
-		}
-	}
-
-	if len(sOpt.MinorRange) > 0 && sOpt.MinorRange[0] != -1 {
-		for _, linkID := range sOpt.MinorRange {
-			if !LinkIdExists(sysInfo, linkID) {
-				return fmt.Errorf("couldn't find requested NvLink '%d'", linkID)
-			}
-		}
-	}
-
-	return nil
-}
-
-func VerifyDevicePresence(sysInfo *SystemInfo, gOpt DeviceOptions) error {
-	if gOpt.Flex {
-		return nil
-	}
-
-	if len(gOpt.MajorRange) > 0 && gOpt.MajorRange[0] != -1 {
-		// Verify we can find all the specified GPUs
-		for _, gpuID := range gOpt.MajorRange {
-			if !GPUIdExists(sysInfo, gpuID) {
-				return fmt.Errorf("couldn't find requested GPU ID '%d'", gpuID)
-			}
-		}
-	}
-
-	if len(gOpt.MinorRange) > 0 && gOpt.MinorRange[0] != -1 {
-		for _, gpuInstanceID := range gOpt.MinorRange {
-			if !GPUInstanceIdExists(sysInfo, gpuInstanceID) {
-				return fmt.Errorf("couldn't find requested GPU instance ID '%d'", gpuInstanceID)
-			}
-		}
-	}
-
-	return nil
-}
-
-func getCoreArray(bitmask []uint64) []uint {
-
-	var cores []uint
-	bits := make([]uint64, dcgm.MAX_CPU_CORE_BITMASK_COUNT)
-
-	for i := 0; i < len(bitmask); i++ {
-		bits[i] = uint64(bitmask[i])
-	}
-
-	b := bitset.From(bits)
-
-	for i := uint(0); i < dcgm.MAX_NUM_CPU_CORES; i++ {
-		if b.Test(i) {
-			cores = append(cores, uint(i))
-		}
-	}
-
-	return cores
-}
-
-func InitializeCPUInfo(sysInfo SystemInfo, sOpt DeviceOptions) (SystemInfo, error) {
-	hierarchy, err := dcgmGetCpuHierarchy()
-	if err != nil {
-		return sysInfo, err
-	}
-
-	if hierarchy.NumCpus <= 0 {
-		return sysInfo, fmt.Errorf("no CPUs to monitor")
-	}
-
-	for i := 0; i < int(hierarchy.NumCpus); i++ {
-		cores := getCoreArray([]uint64(hierarchy.Cpus[i].OwnedCores))
-
-		cpu := CPUInfo{
-			hierarchy.Cpus[i].CpuId,
-			cores,
-		}
-
-		sysInfo.CPUs = append(sysInfo.CPUs, cpu)
-	}
-
-	sysInfo.cOpt = sOpt
-
-	err = VerifyCPUDevicePresence(&sysInfo, sOpt)
-	if err != nil {
-		return sysInfo, err
-	}
-	logrus.Debugf("System entities of type %s initialized", sysInfo.InfoType)
-	return sysInfo, nil
-}
-
-func InitializeNvSwitchInfo(sysInfo SystemInfo, sOpt DeviceOptions) (SystemInfo, error) {
-	switches, err := dcgm.GetEntityGroupEntities(dcgm.FE_SWITCH)
-	if err != nil {
-		return sysInfo, err
-	}
-
-	if len(switches) <= 0 {
-		return sysInfo, fmt.Errorf("no switches to monitor")
-	}
-
-	links, err := dcgm.GetNvLinkLinkStatus()
-	if err != nil {
-		return sysInfo, err
-	}
-
-	for i := 0; i < len(switches); i++ {
-		var matchingLinks []dcgm.NvLinkStatus
-		for _, link := range links {
-			if link.ParentType == dcgm.FE_SWITCH && link.ParentId == uint(switches[i]) {
-				matchingLinks = append(matchingLinks, link)
-			}
-		}
-
-		sw := SwitchInfo{
-			switches[i],
-			matchingLinks,
-		}
-
-		sysInfo.Switches = append(sysInfo.Switches, sw)
-	}
-
-	sysInfo.sOpt = sOpt
-	err = VerifySwitchDevicePresence(&sysInfo, sOpt)
-	if err == nil {
-		logrus.Debugf("System entities of type %s initialized", sysInfo.InfoType)
-	}
-
-	return sysInfo, err
-}
-
-func InitializeGPUInfo(sysInfo SystemInfo, gOpt DeviceOptions, useFakeGPUs bool) (SystemInfo, error) {
-	gpuCount, err := dcgmGetAllDeviceCount()
-	if err != nil {
-		return sysInfo, err
-	}
-	sysInfo.GPUCount = gpuCount
-
-	for i := uint(0); i < sysInfo.GPUCount; i++ {
-		// Default mig enabled to false
-		sysInfo.GPUs[i].MigEnabled = false
-		sysInfo.GPUs[i].DeviceInfo, err = dcgmGetDeviceInfo(i)
-		if err != nil {
-			if useFakeGPUs {
-				sysInfo.GPUs[i].DeviceInfo.GPU = i
-				sysInfo.GPUs[i].DeviceInfo.UUID = fmt.Sprintf("fake%d", i)
-			} else {
-				return sysInfo, err
-			}
-		}
-	}
-
-	hierarchy, err := dcgmGetGpuInstanceHierarchy()
-	if err != nil {
-		return sysInfo, err
-	}
-
-	if hierarchy.Count > 0 {
-		var entities []dcgm.GroupEntityPair
-
-		gpuID := uint(0)
-		instanceIndex := 0
-		for i := uint(0); i < hierarchy.Count; i++ {
-			if hierarchy.EntityList[i].Parent.EntityGroupId == dcgm.FE_GPU {
-				// We are adding a GPU instance
-				gpuID = hierarchy.EntityList[i].Parent.EntityId
-				entityID := hierarchy.EntityList[i].Entity.EntityId
-				instanceInfo := GPUInstanceInfo{
-					Info:        hierarchy.EntityList[i].Info,
-					ProfileName: "",
-					EntityId:    entityID,
-				}
-				sysInfo.GPUs[gpuID].MigEnabled = true
-				sysInfo.GPUs[gpuID].GPUInstances = append(sysInfo.GPUs[gpuID].GPUInstances, instanceInfo)
-				entities = append(entities, dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU_I, EntityId: entityID})
-				instanceIndex = len(sysInfo.GPUs[gpuID].GPUInstances) - 1
-			} else if hierarchy.EntityList[i].Parent.EntityGroupId == dcgm.FE_GPU_I {
-				// Add the compute instance, gpuId is recorded previously
-				entityID := hierarchy.EntityList[i].Entity.EntityId
-				ciInfo := ComputeInstanceInfo{hierarchy.EntityList[i].Info, "", entityID}
-				sysInfo.GPUs[gpuID].GPUInstances[instanceIndex].ComputeInstances = append(sysInfo.GPUs[gpuID].GPUInstances[instanceIndex].ComputeInstances,
-					ciInfo)
-			}
-		}
-
-		err = PopulateMigProfileNames(&sysInfo, entities)
-		if err != nil {
-			return sysInfo, err
-		}
-	}
-
-	sysInfo.gOpt = gOpt
-	err = VerifyDevicePresence(&sysInfo, gOpt)
-	if err == nil {
-		logrus.Debugf("System entities of type %s initialized", sysInfo.InfoType)
-	}
-	return sysInfo, err
-}
-
-func InitializeSystemInfo(
-	gOpt DeviceOptions, sOpt DeviceOptions, cOpt DeviceOptions, useFakeGPUs bool, entityType dcgm.Field_Entity_Group,
-) (SystemInfo, error) {
-	sysInfo := SystemInfo{}
-
-	logrus.Info("Initializing system entities of type: ", entityType)
-	switch entityType {
-	case dcgm.FE_LINK:
-		sysInfo.InfoType = dcgm.FE_LINK
-		return InitializeNvSwitchInfo(sysInfo, sOpt)
-	case dcgm.FE_SWITCH:
-		sysInfo.InfoType = dcgm.FE_SWITCH
-		return InitializeNvSwitchInfo(sysInfo, sOpt)
-	case dcgm.FE_GPU:
-		sysInfo.InfoType = dcgm.FE_GPU
-		return InitializeGPUInfo(sysInfo, gOpt, useFakeGPUs)
-	case dcgm.FE_CPU:
-		sysInfo.InfoType = dcgm.FE_CPU
-		return InitializeCPUInfo(sysInfo, cOpt)
-	case dcgm.FE_CPU_CORE:
-		sysInfo.InfoType = dcgm.FE_CPU_CORE
-		return InitializeCPUInfo(sysInfo, cOpt)
-	}
-
-	return sysInfo, fmt.Errorf("unhandled entity type '%d'", entityType)
-}
-
-func CreateCoreGroupsFromSystemInfo(sysInfo SystemInfo) ([]dcgm.GroupHandle, []func(), error) {
-	var groups []dcgm.GroupHandle
-	var cleanups []func()
-	var groupID dcgm.GroupHandle
-	var err error
-
-	/* Create per-cpu core groups */
-	for _, cpu := range sysInfo.CPUs {
-		if !IsCPUWatched(cpu.EntityId, sysInfo) {
-			continue
-		}
-
-		for i, core := range cpu.Cores {
-
-			if i == 0 || i%dcgm.DCGM_GROUP_MAX_ENTITIES == 0 {
-				groupID, err = dcgm.CreateGroup(fmt.Sprintf("gpu-collector-group-%d", rand.Uint64()))
-				if err != nil {
-					return nil, cleanups, err
-				}
-
-				groups = append(groups, groupID)
-			}
-
-			if !IsCoreWatched(core, cpu.EntityId, sysInfo) {
-				continue
-			}
-
-			err = dcgm.AddEntityToGroup(groupID, dcgm.FE_CPU_CORE, core)
-
-			if err != nil {
-				return groups, cleanups, err
-			}
-
-			cleanups = append(cleanups, func() {
-				err := dcgm.DestroyGroup(groupID)
-				if err != nil && !strings.Contains(err.Error(), DCGM_ST_NOT_CONFIGURED) {
-					logrus.WithFields(logrus.Fields{
-						LoggerGroupIDKey: groupID,
-						logrus.ErrorKey:  err,
-					}).Warn("can not destroy group")
-				}
-			})
-		}
-	}
-
-	return groups, cleanups, nil
-}
-
-func CreateLinkGroupsFromSystemInfo(sysInfo SystemInfo) ([]dcgm.GroupHandle, []func(), error) {
-	var groups []dcgm.GroupHandle
-	var cleanups []func()
-
-	/* Create per-switch link groups */
-	for _, sw := range sysInfo.Switches {
-		if !IsSwitchWatched(sw.EntityId, sysInfo) {
-			continue
-		}
-
-		groupID, err := dcgmCreateGroup(fmt.Sprintf("gpu-collector-group-%d", rand.Uint64()))
-		if err != nil {
-			return nil, cleanups, err
-		}
-
-		groups = append(groups, groupID)
-
-		for _, link := range sw.NvLinks {
-			if link.State != dcgm.LS_UP {
-				continue
-			}
-
-			if !IsLinkWatched(link.Index, sw.EntityId, sysInfo) {
-				continue
-			}
-
-			err = dcgm.AddLinkEntityToGroup(groupID, link.Index, link.ParentId)
-
-			if err != nil {
-				return groups, cleanups, err
-			}
-
-			cleanups = append(cleanups, func() {
-				err := dcgm.DestroyGroup(groupID)
-				if err != nil && !strings.Contains(err.Error(), DCGM_ST_NOT_CONFIGURED) {
-					logrus.WithFields(logrus.Fields{
-						LoggerGroupIDKey: groupID,
-						logrus.ErrorKey:  err,
-					}).Warn("can not destroy group")
-				}
-			})
-		}
-	}
-
-	return groups, cleanups, nil
-}
-
-func CreateGroupFromSystemInfo(sysInfo SystemInfo) (dcgm.GroupHandle, func(), error) {
-	monitoringInfo := GetMonitoredEntities(sysInfo)
-	groupID, err := dcgmCreateGroup(fmt.Sprintf("gpu-collector-group-%d", rand.Uint64()))
-	if err != nil {
-		return dcgm.GroupHandle{}, func() {}, err
-	}
-
-	for _, mi := range monitoringInfo {
-		err := dcgmAddEntityToGroup(groupID, mi.Entity.EntityGroupId, mi.Entity.EntityId)
-		if err != nil {
-			return groupID, func() {
-				err := dcgm.DestroyGroup(groupID)
-				if err != nil && !strings.Contains(err.Error(), DCGM_ST_NOT_CONFIGURED) {
-					logrus.WithFields(logrus.Fields{
-						LoggerGroupIDKey: groupID,
-						logrus.ErrorKey:  err,
-					}).Warn("can not destroy group")
-				}
-			}, err
-		}
-	}
-
-	return groupID, func() {
-		err := dcgm.DestroyGroup(groupID)
-		if err != nil && !strings.Contains(err.Error(), DCGM_ST_NOT_CONFIGURED) {
-			logrus.WithFields(logrus.Fields{
-				LoggerGroupIDKey: groupID,
-				logrus.ErrorKey:  err,
-			}).Warn("can not destroy group")
-		}
-	}, nil
-}
-
-func AddAllGPUs(sysInfo SystemInfo) []MonitoringInfo {
-	var monitoring []MonitoringInfo
-
-	for i := uint(0); i < sysInfo.GPUCount; i++ {
-		mi := MonitoringInfo{
-			dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: sysInfo.GPUs[i].DeviceInfo.GPU},
-			sysInfo.GPUs[i].DeviceInfo,
-			nil,
-			PARENT_ID_IGNORED,
-		}
-		monitoring = append(monitoring, mi)
-	}
-
-	return monitoring
-}
-
-func AddAllSwitches(sysInfo SystemInfo) []MonitoringInfo {
-	var monitoring []MonitoringInfo
-
-	for _, sw := range sysInfo.Switches {
-		if !IsSwitchWatched(sw.EntityId, sysInfo) {
-			continue
-		}
-
-		mi := MonitoringInfo{
-			dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_SWITCH, EntityId: sw.EntityId},
-			dcgm.Device{},
-			nil,
-			PARENT_ID_IGNORED,
-		}
-		monitoring = append(monitoring, mi)
-	}
-
-	return monitoring
-}
-
-func AddAllLinks(sysInfo SystemInfo) []MonitoringInfo {
-	var monitoring []MonitoringInfo
-
-	for _, sw := range sysInfo.Switches {
-		for _, link := range sw.NvLinks {
-			if link.State != dcgm.LS_UP {
-				continue
-			}
-
-			if !IsSwitchWatched(sw.EntityId, sysInfo) {
-				continue
-			}
-
-			if !IsLinkWatched(link.Index, sw.EntityId, sysInfo) {
-				continue
-			}
-
-			mi := MonitoringInfo{
-				dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_LINK, EntityId: link.Index},
-				dcgm.Device{},
-				nil,
-				link.ParentId,
-			}
-			monitoring = append(monitoring, mi)
-		}
-	}
-
-	return monitoring
-}
-
-func IsSwitchWatched(switchID uint, sysInfo SystemInfo) bool {
-	if sysInfo.sOpt.Flex {
-		return true
-	}
-
-	// When MajorRange contains -1 value, we do monitorig of all switches
-	if len(sysInfo.sOpt.MajorRange) > 0 && sysInfo.sOpt.MajorRange[0] == -1 {
-		return true
-	}
-
-	return slices.Contains(sysInfo.sOpt.MajorRange, int(switchID))
-}
-
-func IsLinkWatched(linkIndex uint, switchID uint, sysInfo SystemInfo) bool {
-	if sysInfo.sOpt.Flex {
-		return true
-	}
-
-	// Find a switch
-	switchIdx := slices.IndexFunc(sysInfo.Switches, func(si SwitchInfo) bool {
-		return si.EntityId == switchID && IsSwitchWatched(si.EntityId, sysInfo)
-	})
-
-	if switchIdx > -1 {
-		// Switch exists and is watched
-		sw := sysInfo.Switches[switchIdx]
-
-		if len(sysInfo.sOpt.MinorRange) > 0 && sysInfo.sOpt.MinorRange[0] == -1 {
-			return true
-		}
-
-		// The Link exists
-		if slices.ContainsFunc(sw.NvLinks, func(nls dcgm.NvLinkStatus) bool {
-			return nls.Index == linkIndex
-		}) {
-			// and the link index in the Minor range
-			return slices.Contains(sysInfo.sOpt.MinorRange, int(linkIndex))
-		}
-	}
-
-	return false
-}
-
-func IsCPUWatched(cpuID uint, sysInfo SystemInfo) bool {
-
-	if !slices.ContainsFunc(sysInfo.CPUs, func(cpu CPUInfo) bool {
-		return cpu.EntityId == cpuID
-	}) {
-		return false
-	}
-
-	if sysInfo.cOpt.Flex {
-		return true
-	}
-
-	if len(sysInfo.cOpt.MajorRange) > 0 && sysInfo.cOpt.MajorRange[0] == -1 {
-		return true
-	}
-
-	return slices.ContainsFunc(sysInfo.cOpt.MajorRange, func(cpu int) bool {
-		return uint(cpu) == cpuID
-	})
-}
-
-func IsCoreWatched(coreID uint, cpuID uint, sysInfo SystemInfo) bool {
-	if sysInfo.cOpt.Flex {
-		return true
-	}
-
-	// Find a CPU
-	cpuIdx := slices.IndexFunc(sysInfo.CPUs, func(cpu CPUInfo) bool {
-		return IsCPUWatched(cpu.EntityId, sysInfo) && cpu.EntityId == cpuID
-	})
-
-	if cpuIdx > -1 {
-		if len(sysInfo.cOpt.MinorRange) > 0 && sysInfo.cOpt.MinorRange[0] == -1 {
-			return true
-		}
-
-		return slices.Contains(sysInfo.cOpt.MinorRange, int(coreID))
-	}
-
-	return false
-}
-
-func AddAllCPUs(sysInfo SystemInfo) []MonitoringInfo {
-	var monitoring []MonitoringInfo
-
-	for _, cpu := range sysInfo.CPUs {
-		if !IsCPUWatched(cpu.EntityId, sysInfo) {
-			continue
-		}
-
-		mi := MonitoringInfo{
-			dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_CPU, EntityId: cpu.EntityId},
-			dcgm.Device{},
-			nil,
-			PARENT_ID_IGNORED,
-		}
-		monitoring = append(monitoring, mi)
-	}
-
-	return monitoring
-}
-
-func AddAllCPUCores(sysInfo SystemInfo) []MonitoringInfo {
-	var monitoring []MonitoringInfo
-
-	for _, cpu := range sysInfo.CPUs {
-		for _, core := range cpu.Cores {
-			if !IsCPUWatched(cpu.EntityId, sysInfo) {
-				continue
-			}
-
-			if !IsCoreWatched(core, cpu.EntityId, sysInfo) {
-				continue
-			}
-
-			mi := MonitoringInfo{
-				dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_CPU_CORE, EntityId: core},
-				dcgm.Device{},
-				nil,
-				cpu.EntityId,
-			}
-			monitoring = append(monitoring, mi)
-		}
-	}
-
-	return monitoring
-}
-
-func AddAllGPUInstances(sysInfo SystemInfo, addFlexibly bool) []MonitoringInfo {
-	var monitoring []MonitoringInfo
-
-	for i := uint(0); i < sysInfo.GPUCount; i++ {
-		if addFlexibly && len(sysInfo.GPUs[i].GPUInstances) == 0 {
-			mi := MonitoringInfo{
-				dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: sysInfo.GPUs[i].DeviceInfo.GPU},
-				sysInfo.GPUs[i].DeviceInfo,
-				nil,
-				PARENT_ID_IGNORED,
-			}
-			monitoring = append(monitoring, mi)
-		} else {
-			for j := 0; j < len(sysInfo.GPUs[i].GPUInstances); j++ {
-				mi := MonitoringInfo{
-					dcgm.GroupEntityPair{
-						EntityGroupId: dcgm.FE_GPU_I,
-						EntityId:      sysInfo.GPUs[i].GPUInstances[j].EntityId,
-					},
-					sysInfo.GPUs[i].DeviceInfo,
-					&sysInfo.GPUs[i].GPUInstances[j],
-					PARENT_ID_IGNORED,
-				}
-				monitoring = append(monitoring, mi)
-			}
-		}
-	}
-
-	return monitoring
-}
-
-func GetMonitoringInfoForGPU(sysInfo SystemInfo, gpuID int) *MonitoringInfo {
-	for i := uint(0); i < sysInfo.GPUCount; i++ {
-		if sysInfo.GPUs[i].DeviceInfo.GPU == uint(gpuID) {
-			return &MonitoringInfo{
-				dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU, EntityId: sysInfo.GPUs[i].DeviceInfo.GPU},
-				sysInfo.GPUs[i].DeviceInfo,
-				nil,
-				PARENT_ID_IGNORED,
-			}
-		}
-	}
-
-	return nil
-}
-
-func GetMonitoringInfoForGPUInstance(sysInfo SystemInfo, gpuInstanceID int) *MonitoringInfo {
-	for i := uint(0); i < sysInfo.GPUCount; i++ {
-		for _, instance := range sysInfo.GPUs[i].GPUInstances {
-			if instance.EntityId == uint(gpuInstanceID) {
-				return &MonitoringInfo{
-					dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU_I, EntityId: uint(gpuInstanceID)},
-					sysInfo.GPUs[i].DeviceInfo,
-					&instance,
-					PARENT_ID_IGNORED,
-				}
-			}
-		}
-	}
-
-	return nil
-}
-
-func GetMonitoredEntities(sysInfo SystemInfo) []MonitoringInfo {
-	var monitoring []MonitoringInfo
-
-	if sysInfo.InfoType == dcgm.FE_SWITCH {
-		monitoring = AddAllSwitches(sysInfo)
-	} else if sysInfo.InfoType == dcgm.FE_LINK {
-		monitoring = AddAllLinks(sysInfo)
-	} else if sysInfo.InfoType == dcgm.FE_CPU {
-		monitoring = AddAllCPUs(sysInfo)
-	} else if sysInfo.InfoType == dcgm.FE_CPU_CORE {
-		monitoring = AddAllCPUCores(sysInfo)
-	} else if sysInfo.gOpt.Flex {
-		monitoring = AddAllGPUInstances(sysInfo, true)
-	} else {
-		if len(sysInfo.gOpt.MajorRange) > 0 && sysInfo.gOpt.MajorRange[0] == -1 {
-			monitoring = AddAllGPUs(sysInfo)
-		} else {
-			for _, gpuID := range sysInfo.gOpt.MajorRange {
-				// We've already verified that everything in the options list exists
-				monitoring = append(monitoring, *GetMonitoringInfoForGPU(sysInfo, gpuID))
-			}
-		}
-
-		if len(sysInfo.gOpt.MinorRange) > 0 && sysInfo.gOpt.MinorRange[0] == -1 {
-			monitoring = AddAllGPUInstances(sysInfo, false)
-		} else {
-			for _, gpuInstanceID := range sysInfo.gOpt.MinorRange {
-				// We've already verified that everything in the options list exists
-				monitoring = append(monitoring, *GetMonitoringInfoForGPUInstance(sysInfo, gpuInstanceID))
-			}
-		}
-	}
-
-	return monitoring
-}
-
-func GetGPUInstanceIdentifier(sysInfo SystemInfo, gpuuuid string, gpuInstanceID uint) string {
-	for i := uint(0); i < sysInfo.GPUCount; i++ {
-		if sysInfo.GPUs[i].DeviceInfo.UUID == gpuuuid {
-			identifier := fmt.Sprintf("%d-%d", sysInfo.GPUs[i].DeviceInfo.GPU, gpuInstanceID)
-			return identifier
-		}
-	}
-
-	return ""
-}
diff --git a/pkg/dcgmexporter/system_info_test.go b/pkg/dcgmexporter/system_info_test.go
deleted file mode 100644
index 6f2e45cc..00000000
--- a/pkg/dcgmexporter/system_info_test.go
+++ /dev/null
@@ -1,671 +0,0 @@
-/*
- * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package dcgmexporter
-
-import (
-	"fmt"
-	"testing"
-
-	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-)
-
-var fakeProfileName = "2fake.4gb"
-
-func SpoofSwitchSystemInfo() SystemInfo {
-	var sysInfo SystemInfo
-	sysInfo.InfoType = dcgm.FE_SWITCH
-	sw1 := SwitchInfo{
-		EntityId: 0,
-	}
-	sw2 := SwitchInfo{
-		EntityId: 1,
-	}
-
-	l1 := dcgm.NvLinkStatus{
-		ParentId:   0,
-		ParentType: dcgm.FE_SWITCH,
-		State:      2,
-		Index:      0,
-	}
-
-	l2 := dcgm.NvLinkStatus{
-		ParentId:   0,
-		ParentType: dcgm.FE_SWITCH,
-		State:      3,
-		Index:      1,
-	}
-
-	l3 := dcgm.NvLinkStatus{
-		ParentId:   1,
-		ParentType: dcgm.FE_SWITCH,
-		State:      2,
-		Index:      0,
-	}
-
-	l4 := dcgm.NvLinkStatus{
-		ParentId:   1,
-		ParentType: dcgm.FE_SWITCH,
-		State:      3,
-		Index:      1,
-	}
-
-	sw1.NvLinks = append(sw1.NvLinks, l1)
-	sw1.NvLinks = append(sw1.NvLinks, l2)
-	sw2.NvLinks = append(sw2.NvLinks, l3)
-	sw2.NvLinks = append(sw2.NvLinks, l4)
-
-	sysInfo.Switches = append(sysInfo.Switches, sw1)
-	sysInfo.Switches = append(sysInfo.Switches, sw2)
-
-	sysInfo.sOpt.MajorRange = []int{-1}
-	sysInfo.sOpt.MinorRange = []int{-1}
-
-	return sysInfo
-}
-
-func SpoofSystemInfo() SystemInfo {
-	var sysInfo SystemInfo
-	sysInfo.GPUCount = 2
-	sysInfo.GPUs[0].DeviceInfo.GPU = 0
-	gi := GPUInstanceInfo{
-		Info:        dcgm.MigEntityInfo{GpuUuid: "fake", NvmlProfileSlices: 3},
-		ProfileName: fakeProfileName,
-		EntityId:    0,
-	}
-	sysInfo.GPUs[0].GPUInstances = append(sysInfo.GPUs[0].GPUInstances, gi)
-	gi2 := GPUInstanceInfo{
-		Info:        dcgm.MigEntityInfo{GpuUuid: "fake", NvmlInstanceId: 1, NvmlProfileSlices: 3},
-		ProfileName: fakeProfileName,
-		EntityId:    14,
-	}
-	sysInfo.GPUs[1].GPUInstances = append(sysInfo.GPUs[1].GPUInstances, gi2)
-	sysInfo.GPUs[1].DeviceInfo.GPU = 1
-
-	return sysInfo
-}
-
-func TestMonitoredEntities(t *testing.T) {
-	sysInfo := SpoofSystemInfo()
-	sysInfo.gOpt.Flex = true
-
-	monitoring := GetMonitoredEntities(sysInfo)
-	require.Equal(t, len(monitoring), 2, fmt.Sprintf("Should have 2 monitored entities but found %d", len(monitoring)))
-	instanceCount := 0
-	gpuCount := 0
-	for _, mi := range monitoring {
-		if mi.Entity.EntityGroupId == dcgm.FE_GPU_I {
-			instanceCount = instanceCount + 1
-			require.NotEqual(t, mi.InstanceInfo, nil, "Expected InstanceInfo to be populated but it wasn't")
-			require.Equal(t, mi.InstanceInfo.ProfileName, fakeProfileName, "Expected profile named '%s' but found '%s'",
-				fakeProfileName, mi.InstanceInfo.ProfileName)
-			if mi.Entity.EntityId != uint(0) {
-				// One of these should be 0, the other should be 14
-				require.Equal(t, mi.Entity.EntityId, uint(14), "Expected 14 as EntityId but found %s",
-					monitoring[1].Entity.EntityId)
-			}
-		} else {
-			gpuCount = gpuCount + 1
-			require.Equal(t, mi.InstanceInfo, (*GPUInstanceInfo)(nil), "Expected InstanceInfo to be nil but it wasn't")
-		}
-	}
-	require.Equal(t, instanceCount, 2, "Expected 2 GPU instances but found %d", instanceCount)
-	require.Equal(t, gpuCount, 0, "Expected 0 GPUs but found %d", gpuCount)
-
-	sysInfo.GPUs[0].GPUInstances = sysInfo.GPUs[0].GPUInstances[:0]
-	sysInfo.GPUs[1].GPUInstances = sysInfo.GPUs[1].GPUInstances[:0]
-	monitoring = GetMonitoredEntities(sysInfo)
-	require.Equal(t, 2, len(monitoring), fmt.Sprintf("Should have 2 monitored entities but found %d", len(monitoring)))
-	for i, mi := range monitoring {
-		require.Equal(t, mi.Entity.EntityGroupId, dcgm.FE_GPU, "Expected FE_GPU but found %d", mi.Entity.EntityGroupId)
-		require.Equal(t, uint(i), mi.DeviceInfo.GPU, "Expected GPU %d but found %d", i, mi.DeviceInfo.GPU)
-		require.Equal(t, (*GPUInstanceInfo)(nil), mi.InstanceInfo,
-			"Expected InstanceInfo not to be populated but it was")
-	}
-}
-
-func TestVerifyDevicePresence(t *testing.T) {
-	sysInfo := SpoofSystemInfo()
-	var dOpt DeviceOptions
-	dOpt.Flex = true
-	err := VerifyDevicePresence(&sysInfo, dOpt)
-	require.Equal(t, err, nil, "Expected to have no error, but found %s", err)
-
-	dOpt.Flex = false
-	dOpt.MajorRange = append(dOpt.MajorRange, -1)
-	dOpt.MinorRange = append(dOpt.MinorRange, -1)
-	err = VerifyDevicePresence(&sysInfo, dOpt)
-	require.Equal(t, err, nil, "Expected to have no error, but found %s", err)
-
-	dOpt.MinorRange[0] = 10 // this GPU instance doesn't exist
-	err = VerifyDevicePresence(&sysInfo, dOpt)
-	require.NotEqual(t, err, nil, "Expected to have an error for a non-existent GPU instance, but none found")
-
-	dOpt.MajorRange[0] = 10 // this GPU doesn't exist
-	dOpt.MinorRange[0] = -1
-	err = VerifyDevicePresence(&sysInfo, dOpt)
-	require.NotEqual(t, err, nil, "Expected to have an error for a non-existent GPU, but none found")
-
-	// Add GPUs and instances that exist
-	dOpt.MajorRange[0] = 0
-	dOpt.MajorRange = append(dOpt.MajorRange, 1)
-	dOpt.MinorRange[0] = 0
-	dOpt.MinorRange = append(dOpt.MinorRange, 14)
-	err = VerifyDevicePresence(&sysInfo, dOpt)
-	require.Equal(t, err, nil, "Expected to have no error, but found %s", err)
-}
-
-func TestMonitoredSwitches(t *testing.T) {
-	sysInfo := SpoofSwitchSystemInfo()
-
-	/* test that only switches are returned */
-	monitoring := GetMonitoredEntities(sysInfo)
-	require.Equal(t, len(monitoring), 2, fmt.Sprintf("Should have 2 monitored switches but found %d", len(monitoring)))
-	for _, mi := range monitoring {
-		require.Equal(t, mi.Entity.EntityGroupId, dcgm.FE_SWITCH,
-			fmt.Sprintf("Should have only returned switches but returned %d", mi.Entity.EntityGroupId))
-	}
-
-	/* test that only "up" links are monitored and 1 from each switch */
-	sysInfo.InfoType = dcgm.FE_LINK
-	monitoring = GetMonitoredEntities(sysInfo)
-	require.Equal(t, len(monitoring), 2, fmt.Sprintf("Should have 2 monitored links but found %d", len(monitoring)))
-	for i, mi := range monitoring {
-		require.Equal(t, mi.Entity.EntityGroupId, dcgm.FE_LINK,
-			fmt.Sprintf("Should have only returned links but returned %d", mi.Entity.EntityGroupId))
-		require.Equal(t, mi.ParentId, uint(i), "Link should reference switch parent")
-	}
-}
-
-func TestIsSwitchWatched(t *testing.T) {
-	tests := []struct {
-		name     string
-		switchID uint
-		sysInfo  SystemInfo
-		want     bool
-	}{
-		{
-			name:     "Monitor all devices",
-			switchID: 1,
-			sysInfo: SystemInfo{
-				sOpt: DeviceOptions{
-					Flex: true,
-				},
-			},
-			want: true,
-		},
-		{
-			name:     "MajorRange empty",
-			switchID: 2,
-			sysInfo: SystemInfo{
-				sOpt: DeviceOptions{
-					MajorRange: []int{},
-				},
-			},
-			want: false,
-		},
-		{
-			name:     "MajorRange contains -1 to watch all devices",
-			switchID: 3,
-			sysInfo: SystemInfo{
-				sOpt: DeviceOptions{
-					MajorRange: []int{-1},
-				},
-			},
-			want: true,
-		},
-		{
-			name:     "SwitchID in MajorRange",
-			switchID: 4,
-			sysInfo: SystemInfo{
-				sOpt: DeviceOptions{
-					MajorRange: []int{3, 4, 5},
-				},
-			},
-			want: true,
-		},
-		{
-			name:     "SwitchID not in MajorRange",
-			switchID: 5,
-			sysInfo: SystemInfo{
-				sOpt: DeviceOptions{
-					MajorRange: []int{3, 4, 6},
-				},
-			},
-			want: false,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got := IsSwitchWatched(tt.switchID, tt.sysInfo)
-			assert.Equal(t, tt.want, got)
-		})
-	}
-}
-
-func TestIsLinkWatched(t *testing.T) {
-	tests := []struct {
-		name      string
-		linkIndex uint
-		switchID  uint
-		sysInfo   SystemInfo
-		want      bool
-	}{
-		{
-			name:      "Monitor all devices",
-			linkIndex: 1,
-			sysInfo:   SystemInfo{sOpt: DeviceOptions{Flex: true}},
-			want:      true,
-		},
-		{
-			name:      "No watched devices",
-			linkIndex: 1,
-			sysInfo:   SystemInfo{},
-			want:      false,
-		},
-		{
-			name:      "Watched link with empty MinorRange",
-			linkIndex: 2,
-			sysInfo: SystemInfo{
-				sOpt: DeviceOptions{
-					MajorRange: []int{-1},
-				},
-				Switches: []SwitchInfo{
-					{
-						EntityId: 1,
-						NvLinks: []dcgm.NvLinkStatus{
-							{Index: 2},
-						},
-					},
-				},
-			},
-			want: false,
-		},
-		{
-			name:      "MinorRange contains -1 to watch all links",
-			switchID:  1,
-			linkIndex: 3,
-			sysInfo: SystemInfo{
-				sOpt: DeviceOptions{
-					MajorRange: []int{-1},
-					MinorRange: []int{-1},
-				},
-				Switches: []SwitchInfo{
-					{
-						EntityId: 1,
-						NvLinks: []dcgm.NvLinkStatus{
-							{Index: 3},
-						},
-					},
-				},
-			},
-			want: true,
-		},
-		{
-			name:      "The link not in the watched switch",
-			switchID:  1,
-			linkIndex: 4,
-			sysInfo: SystemInfo{
-				sOpt: DeviceOptions{
-					MajorRange: []int{-1},
-					MinorRange: []int{1, 2, 3},
-				},
-				Switches: []SwitchInfo{
-					{
-						EntityId: 1,
-						NvLinks: []dcgm.NvLinkStatus{
-							{Index: 4},
-						},
-					},
-				},
-			},
-			want: false,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got := IsLinkWatched(tt.linkIndex, tt.switchID, tt.sysInfo)
-			assert.Equal(t, tt.want, got)
-		})
-	}
-}
-
-func TestIsCPUWatched(t *testing.T) {
-	tests := []struct {
-		name    string
-		cpuID   uint
-		sysInfo SystemInfo
-		want    bool
-	}{
-		{
-			name:  "Monitor all devices",
-			cpuID: 1,
-			sysInfo: SystemInfo{
-				cOpt: DeviceOptions{Flex: true},
-				CPUs: []CPUInfo{
-					{
-						EntityId: 1,
-					},
-				},
-			},
-			want: true,
-		},
-		{
-			name:  "MajorRange Contains -1",
-			cpuID: 2,
-			sysInfo: SystemInfo{
-				cOpt: DeviceOptions{MajorRange: []int{-1}},
-				CPUs: []CPUInfo{
-					{
-						EntityId: 2,
-					},
-				},
-			},
-			want: true,
-		},
-		{
-			name:  "CPU ID in MajorRange",
-			cpuID: 3,
-			sysInfo: SystemInfo{
-				cOpt: DeviceOptions{MajorRange: []int{1, 2, 3}},
-				CPUs: []CPUInfo{
-					{
-						EntityId: 3,
-					},
-				},
-			},
-			want: true,
-		},
-		{
-			name:  "CPU ID Not in MajorRange",
-			cpuID: 4,
-			sysInfo: SystemInfo{
-				cOpt: DeviceOptions{MajorRange: []int{1, 2, 3}},
-				CPUs: []CPUInfo{
-					{
-						EntityId: 4,
-					},
-				},
-			},
-			want: false,
-		},
-		{
-			name:  "MajorRange Empty",
-			cpuID: 5,
-			sysInfo: SystemInfo{
-				cOpt: DeviceOptions{MajorRange: []int{}},
-				CPUs: []CPUInfo{
-					{
-						EntityId: 5,
-					},
-				},
-			},
-			want: false,
-		},
-		{
-			name:  "CPU not found",
-			cpuID: 6,
-			sysInfo: SystemInfo{
-				cOpt: DeviceOptions{MajorRange: []int{}},
-				CPUs: []CPUInfo{
-					{
-						EntityId: 5,
-					},
-				},
-			},
-			want: false,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			assert.Equal(t, tt.want, IsCPUWatched(tt.cpuID, tt.sysInfo))
-		})
-	}
-}
-
-func TestIsCoreWatched(t *testing.T) {
-	tests := []struct {
-		name    string
-		coreID  uint
-		cpuID   uint
-		sysInfo SystemInfo
-		want    bool
-	}{
-		{
-			name:   "Monitor all devices",
-			coreID: 1,
-			cpuID:  1,
-			sysInfo: SystemInfo{
-				cOpt: DeviceOptions{Flex: true},
-			},
-			want: true,
-		},
-		{
-			name:   "Core in MinorRange",
-			coreID: 2,
-			cpuID:  1,
-			sysInfo: SystemInfo{
-				cOpt: DeviceOptions{
-					MinorRange: []int{1, 2, 3},
-					MajorRange: []int{-1},
-				},
-				CPUs: []CPUInfo{{EntityId: 1}},
-			},
-			want: true,
-		},
-		{
-			name:   "Core Not in MinorRange",
-			coreID: 4,
-			cpuID:  1,
-			sysInfo: SystemInfo{
-				cOpt: DeviceOptions{
-					MinorRange: []int{1, 2, 3},
-					MajorRange: []int{-1},
-				},
-				CPUs: []CPUInfo{{EntityId: 1}},
-			},
-			want: false,
-		},
-		{
-			name:   "MinorRange Contains -1",
-			coreID: 5,
-			cpuID:  1,
-			sysInfo: SystemInfo{
-				cOpt: DeviceOptions{
-					MinorRange: []int{-1},
-					MajorRange: []int{-1},
-				},
-				CPUs: []CPUInfo{{EntityId: 1}},
-			},
-			want: true,
-		},
-		{
-			name:   "CPU Not Found",
-			coreID: 1,
-			cpuID:  2,
-			sysInfo: SystemInfo{
-				cOpt: DeviceOptions{
-					MinorRange: []int{1, 2, 3},
-					MajorRange: []int{-1},
-				},
-				CPUs: []CPUInfo{{EntityId: 1}},
-			},
-			want: false,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			assert.Equal(t, tt.want, IsCoreWatched(tt.coreID, tt.cpuID, tt.sysInfo))
-		})
-	}
-}
-
-func TestSetMigProfileNames(t *testing.T) {
-	tests := []struct {
-		name    string
-		sysInfo SystemInfo
-		values  []dcgm.FieldValue_v2
-		valid   bool
-	}{
-		{
-			name: "MIG profile found",
-			sysInfo: SystemInfo{
-				GPUCount: 1,
-				GPUs: [dcgm.MAX_NUM_DEVICES]GPUInfo{
-					{
-						GPUInstances: []GPUInstanceInfo{
-							{EntityId: 1},
-						},
-					},
-				},
-			},
-			values: []dcgm.FieldValue_v2{
-				{
-					EntityId:    1,
-					FieldType:   dcgm.DCGM_FT_STRING,
-					StringValue: &fakeProfileName,
-				},
-			},
-			valid: true,
-		},
-		{
-			name: "Multiple MIG GPUs",
-			sysInfo: SystemInfo{
-				GPUCount: 3,
-				GPUs: [dcgm.MAX_NUM_DEVICES]GPUInfo{
-					{
-						GPUInstances: []GPUInstanceInfo{
-							{EntityId: 1},
-						},
-					},
-					{
-						GPUInstances: []GPUInstanceInfo{
-							{EntityId: 2},
-						},
-					},
-					{
-						GPUInstances: []GPUInstanceInfo{
-							{EntityId: 3},
-						},
-					},
-				},
-			},
-			values: []dcgm.FieldValue_v2{
-				{
-					EntityId:    2,
-					FieldType:   dcgm.DCGM_FT_STRING,
-					StringValue: &fakeProfileName,
-				},
-			},
-			valid: true,
-		},
-		{
-			name: "Multiple MIG GPUs and Values",
-			sysInfo: SystemInfo{
-				GPUCount: 3,
-				GPUs: [dcgm.MAX_NUM_DEVICES]GPUInfo{
-					{
-						GPUInstances: []GPUInstanceInfo{
-							{EntityId: 1},
-						},
-					},
-					{
-						GPUInstances: []GPUInstanceInfo{
-							{EntityId: 2},
-						},
-					},
-					{
-						GPUInstances: []GPUInstanceInfo{
-							{EntityId: 3},
-						},
-					},
-				},
-			},
-			values: []dcgm.FieldValue_v2{
-				{
-					EntityId:    2,
-					FieldType:   dcgm.DCGM_FT_STRING,
-					StringValue: &fakeProfileName,
-				},
-				{
-					EntityId:    3,
-					FieldType:   dcgm.DCGM_FT_STRING,
-					StringValue: &fakeProfileName,
-				},
-			},
-			valid: true,
-		},
-		{
-			name: "MIG profile not found",
-			sysInfo: SystemInfo{
-				GPUCount: 1,
-				GPUs: [dcgm.MAX_NUM_DEVICES]GPUInfo{
-					{
-						GPUInstances: []GPUInstanceInfo{
-							{EntityId: 1},
-						},
-					},
-				},
-			},
-			values: []dcgm.FieldValue_v2{
-				{
-					EntityId:    2,
-					FieldType:   dcgm.DCGM_FT_STRING,
-					StringValue: &fakeProfileName,
-				},
-			},
-			valid: false,
-		},
-		{
-			name: "MIG profile not string type",
-			sysInfo: SystemInfo{
-				GPUCount: 1,
-				GPUs: [dcgm.MAX_NUM_DEVICES]GPUInfo{
-					{
-						GPUInstances: []GPUInstanceInfo{
-							{EntityId: 1},
-						},
-					},
-				},
-			},
-			values: []dcgm.FieldValue_v2{
-				{
-					EntityId:    1,
-					FieldType:   dcgm.DCGM_FT_BINARY,
-					StringValue: &fakeProfileName,
-					Value:       [4096]byte{'1', '2', '3'},
-				},
-			},
-			valid: true,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			if tt.valid {
-				assert.NoError(t, SetMigProfileNames(&tt.sysInfo, tt.values), "Expected no error.")
-			} else {
-				assert.Error(t, SetMigProfileNames(&tt.sysInfo, tt.values), "Expected an error.")
-			}
-		})
-	}
-}
diff --git a/pkg/dcgmexporter/types.go b/pkg/dcgmexporter/types.go
deleted file mode 100644
index 246afe02..00000000
--- a/pkg/dcgmexporter/types.go
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package dcgmexporter
-
-import (
-	"fmt"
-	"net/http"
-	"sync"
-	"text/template"
-
-	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
-	"github.com/prometheus/exporter-toolkit/web"
-)
-
-var (
-	SkipDCGMValue   = "SKIPPING DCGM VALUE"
-	FailedToConvert = "ERROR - FAILED TO CONVERT TO STRING"
-
-	nvidiaResourceName      = "nvidia.com/gpu"
-	nvidiaMigResourcePrefix = "nvidia.com/mig-"
-	MIG_UUID_PREFIX         = "MIG-"
-
-	// Note standard resource attributes
-	podAttribute       = "pod"
-	namespaceAttribute = "namespace"
-	containerAttribute = "container"
-
-	hpcJobAttribute = "hpc_job"
-
-	oldPodAttribute       = "pod_name"
-	oldNamespaceAttribute = "pod_namespace"
-	oldContainerAttribute = "container_name"
-
-	undefinedConfigMapData = "none"
-)
-
-type Transform interface {
-	Process(metrics MetricsByCounter, sysInfo SystemInfo) error
-	Name() string
-}
-
-type MetricsPipeline struct {
-	config *Config
-
-	transformations      []Transform
-	migMetricsFormat     *template.Template
-	switchMetricsFormat  *template.Template
-	linkMetricsFormat    *template.Template
-	cpuMetricsFormat     *template.Template
-	cpuCoreMetricsFormat *template.Template
-
-	counters        []Counter
-	gpuCollector    *DCGMCollector
-	switchCollector *DCGMCollector
-	linkCollector   *DCGMCollector
-	cpuCollector    *DCGMCollector
-	coreCollector   *DCGMCollector
-}
-
-type DCGMCollector struct {
-	Counters                 []Counter
-	DeviceFields             []dcgm.Short
-	Cleanups                 []func()
-	UseOldNamespace          bool
-	SysInfo                  SystemInfo
-	Hostname                 string
-	ReplaceBlanksInModelName bool
-}
-
-type Counter struct {
-	FieldID   dcgm.Short
-	FieldName string
-	PromType  string
-	Help      string
-}
-
-type Metric struct {
-	Counter Counter
-	Value   string
-
-	GPU          string
-	GPUUUID      string
-	GPUDevice    string
-	GPUModelName string
-	GPUPCIBusID  string
-
-	UUID string
-
-	MigProfile    string
-	GPUInstanceID string
-	Hostname      string
-
-	Labels     map[string]string
-	Attributes map[string]string
-}
-
-func (m Metric) getIDOfType(idType KubernetesGPUIDType) (string, error) {
-	// For MIG devices, return the MIG profile instead of
-	if m.MigProfile != "" {
-		return fmt.Sprintf("%s-%s", m.GPU, m.GPUInstanceID), nil
-	}
-	switch idType {
-	case GPUUID:
-		return m.GPUUUID, nil
-	case DeviceName:
-		return m.GPUDevice, nil
-	}
-	return "", fmt.Errorf("unsupported KubernetesGPUIDType for MetricID '%s'", idType)
-}
-
-var promMetricType = map[string]bool{
-	"gauge":     true,
-	"counter":   true,
-	"histogram": true,
-	"summary":   true,
-	"label":     true,
-}
-
-type MetricsServer struct {
-	sync.Mutex
-
-	server      *http.Server
-	webConfig   *web.FlagConfig
-	metrics     string
-	metricsChan chan string
-	registry    *Registry
-}
-
-type PodMapper struct {
-	Config *Config
-}
-
-type PodInfo struct {
-	Name      string
-	Namespace string
-	Container string
-}
-
-// MetricsByCounter represents a map where each Counter is associated with a slice of Metric objects
-type MetricsByCounter map[Counter][]Metric
-
-// CounterSet return
-type CounterSet struct {
-	DCGMCounters     []Counter
-	ExporterCounters []Counter
-}
diff --git a/pkg/dcgmexporter/xid_collector.go b/pkg/dcgmexporter/xid_collector.go
deleted file mode 100644
index 88ca020c..00000000
--- a/pkg/dcgmexporter/xid_collector.go
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package dcgmexporter
-
-import (
-	"fmt"
-	"slices"
-
-	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
-	"github.com/sirupsen/logrus"
-)
-
-type xidCollector struct {
-	expCollector
-}
-
-func (c *xidCollector) GetMetrics() (MetricsByCounter, error) {
-	return c.expCollector.getMetrics()
-}
-
-func NewXIDCollector(counters []Counter,
-	hostname string,
-	config *Config,
-	fieldEntityGroupTypeSystemInfo FieldEntityGroupTypeSystemInfoItem) (Collector, error) {
-	if !IsDCGMExpXIDErrorsCountEnabled(counters) {
-		logrus.Error(dcgmExpXIDErrorsCount + " collector is disabled")
-		return nil, fmt.Errorf(dcgmExpXIDErrorsCount + " collector is disabled")
-	}
-
-	collector := xidCollector{}
-	collector.expCollector = newExpCollector(counters,
-		hostname,
-		[]dcgm.Short{dcgm.DCGM_FI_DEV_XID_ERRORS},
-		config,
-		fieldEntityGroupTypeSystemInfo)
-
-	collector.counter = counters[slices.IndexFunc(counters, func(c Counter) bool {
-		return c.FieldName == dcgmExpXIDErrorsCount
-	})]
-
-	collector.labelFiller = func(metricValueLabels map[string]string, entityValue int64) {
-		metricValueLabels["xid"] = fmt.Sprint(entityValue)
-	}
-
-	collector.windowSize = config.XIDCountWindowSize
-
-	return &collector, nil
-}
-
-func IsDCGMExpXIDErrorsCountEnabled(counters []Counter) bool {
-	return slices.ContainsFunc(counters, func(c Counter) bool {
-		return c.FieldName == dcgmExpXIDErrorsCount
-	})
-}
diff --git a/pkg/dcgmexporter/xid_collector_test.go b/pkg/dcgmexporter/xid_collector_test.go
deleted file mode 100644
index ceaf02d1..00000000
--- a/pkg/dcgmexporter/xid_collector_test.go
+++ /dev/null
@@ -1,312 +0,0 @@
-/*
- * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package dcgmexporter
-
-import (
-	"bytes"
-	"fmt"
-	"reflect"
-	"slices"
-	"testing"
-	"time"
-
-	"github.com/NVIDIA/go-dcgm/pkg/dcgm"
-	io_prometheus_client "github.com/prometheus/client_model/go"
-	"github.com/prometheus/common/expfmt"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-	"k8s.io/utils/ptr"
-)
-
-func TestXIDCollector_Gather_Encode(t *testing.T) {
-	teardownTest := setupTest(t)
-	defer teardownTest(t)
-	runOnlyWithLiveGPUs(t)
-
-	hostname := "local-test"
-	config := &Config{
-		GPUDevices: DeviceOptions{
-			Flex:       true,
-			MajorRange: []int{-1},
-			MinorRange: []int{-1},
-		},
-		XIDCountWindowSize: int(time.Duration(5) * time.Minute),
-	}
-
-	records := [][]string{
-		{"DCGM_EXP_XID_ERRORS_COUNT", "gauge", "Count of XID Errors within user-specified time window (see xid-count-window-size param)."},
-		{"DCGM_FI_DRIVER_VERSION", "label", "Driver Version"},
-	}
-
-	cc, err := extractCounters(records, config)
-	require.NoError(t, err)
-	require.Len(t, cc.ExporterCounters, 1)
-	require.Len(t, cc.DCGMCounters, 1)
-
-	for i := range cc.DCGMCounters {
-		if cc.DCGMCounters[i].PromType == "label" {
-			cc.ExporterCounters = append(cc.ExporterCounters, cc.DCGMCounters[i])
-		}
-	}
-
-	// Get a number of hardware GPUs
-	hardwareGPUs, err := dcgm.GetAllDeviceCount()
-	require.NoError(t, err)
-
-	if hardwareGPUs+1 > dcgm.MAX_NUM_DEVICES {
-		t.Skipf("Unable to add fake GPU with more than %d gpus", dcgm.MAX_NUM_DEVICES)
-	}
-
-	entityList := []dcgm.MigHierarchyInfo{
-		{Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU}},
-		{Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU}},
-		{Entity: dcgm.GroupEntityPair{EntityGroupId: dcgm.FE_GPU}},
-	}
-
-	// Create fake GPU
-	fakeGPUIDs, err := dcgm.CreateFakeEntities(entityList)
-	require.NoError(t, err)
-	require.NotEmpty(t, fakeGPUIDs)
-
-	for i, gpuID := range fakeGPUIDs {
-		err = dcgm.InjectFieldValue(gpuID,
-			dcgm.DCGM_FI_DEV_XID_ERRORS,
-			dcgm.DCGM_FT_INT64,
-			0,
-			time.Now().Add(-time.Duration(i)*time.Second).UnixMicro(),
-			int64(42),
-		)
-		require.NoError(t, err)
-
-		err = dcgm.InjectFieldValue(gpuID,
-			dcgm.DCGM_FI_DEV_XID_ERRORS,
-			dcgm.DCGM_FT_INT64,
-			0,
-			time.Now().Add(-time.Duration(i)*time.Second).UnixMicro(),
-			int64(42),
-		)
-		require.NoError(t, err)
-
-		err = dcgm.InjectFieldValue(gpuID,
-			dcgm.DCGM_FI_DEV_XID_ERRORS,
-			dcgm.DCGM_FT_INT64,
-			0,
-			time.Now().Add(-time.Duration(i)*time.Second).UnixMicro(),
-			int64(46),
-		)
-		require.NoError(t, err)
-
-	}
-
-	allCounters := []Counter{
-		{
-			FieldID: dcgm.DCGM_FI_DEV_CLOCK_THROTTLE_REASONS,
-		},
-	}
-
-	fieldEntityGroupTypeSystemInfo := NewEntityGroupTypeSystemInfo(allCounters, config)
-	err = fieldEntityGroupTypeSystemInfo.Load(dcgm.FE_GPU)
-	require.NoError(t, err)
-
-	item, exists := fieldEntityGroupTypeSystemInfo.Get(dcgm.FE_GPU)
-	require.True(t, exists)
-
-	xidCollector, err := NewXIDCollector(cc.ExporterCounters, hostname, config, item)
-	require.NoError(t, err)
-
-	defer func() {
-		xidCollector.Cleanup()
-	}()
-
-	metrics, err := xidCollector.GetMetrics()
-	require.NoError(t, err)
-	require.NotEmpty(t, metrics)
-	// We expect 1 metric: DCGM_EXP_XID_ERRORS_COUNT
-	require.Len(t, metrics, 1)
-	// We get metric value with 0 index
-	metricValues := metrics[reflect.ValueOf(metrics).MapKeys()[0].Interface().(Counter)]
-
-	fakeGPUIDMap := map[string]struct{}{}
-	for _, fakeGPUID := range fakeGPUIDs {
-		fakeGPUIDMap[fmt.Sprint(fakeGPUID)] = struct{}{}
-	}
-
-	conditionFakeGPUOnly := func(m Metric) bool {
-		_, exists := fakeGPUIDMap[m.GPU]
-		return exists
-	}
-
-	// We want to filter out physical GPU and keep fake only
-	metricValues = filterMetrics(metricValues, conditionFakeGPUOnly)
-
-	require.Len(t, metricValues, len(fakeGPUIDs)*2)
-	for _, val := range metricValues {
-		require.Contains(t, val.Labels, "window_size_in_ms")
-		require.Equal(t, fmt.Sprint(config.XIDCountWindowSize), val.Labels["window_size_in_ms"])
-	}
-
-	// We inject new error
-	err = dcgm.InjectFieldValue(fakeGPUIDs[0],
-		dcgm.DCGM_FI_DEV_XID_ERRORS,
-		dcgm.DCGM_FT_INT64,
-		0,
-		time.Now().UnixMicro(),
-		int64(19),
-	)
-	require.NoError(t, err)
-
-	// Wait for 1 second
-	time.Sleep(1 * time.Second)
-
-	metrics, err = xidCollector.GetMetrics()
-	require.NoError(t, err)
-	require.NotEmpty(t, metrics)
-
-	// We expect 1 metric: DCGM_EXP_XID_ERRORS_COUNT
-	require.Len(t, metrics, 1)
-
-	// We get metric value with the last index
-	metricValues = metrics[reflect.ValueOf(metrics).MapKeys()[0].Interface().(Counter)]
-	// We want to filter out physical GPU and keep fake only
-	metricValues = filterMetrics(metricValues, conditionFakeGPUOnly)
-	// We update metrics with slice, that doesn't contain physical GPU
-	metrics[reflect.ValueOf(metrics).MapKeys()[0].Interface().(Counter)] = metricValues
-
-	// We have 3 fake GPU and each GPU experienced 3 XID errors: 42, 46, 19 to GPU0
-	require.Len(t, metricValues, 1+(len(fakeGPUIDs)*2))
-	for _, val := range metricValues {
-		require.Contains(t, val.Labels, "window_size_in_ms")
-		require.Equal(t, fmt.Sprint(config.XIDCountWindowSize), val.Labels["window_size_in_ms"])
-	}
-
-	// Now we check the metric rendering
-	var b bytes.Buffer
-	err = encodeExpMetrics(&b, metrics)
-	require.NoError(t, err)
-	require.NotEmpty(t, b)
-
-	var parser expfmt.TextParser
-	mf, err := parser.TextToMetricFamilies(&b)
-	require.NoError(t, err)
-	require.NotEmpty(t, mf)
-	require.Len(t, mf, 1)
-	metricFamily := mf[reflect.ValueOf(mf).MapKeys()[0].Interface().(string)]
-	require.NotNil(t, metricFamily.Name)
-	assert.Equal(t, "DCGM_EXP_XID_ERRORS_COUNT", *metricFamily.Name)
-	assert.Equal(t, "Count of XID Errors within user-specified time window (see xid-count-window-size param).", *metricFamily.Help)
-	assert.Equal(t, io_prometheus_client.MetricType_GAUGE, *metricFamily.Type)
-	// We have 3 fake GPU and each GPU, except the one experienced XID errors: 42, 46, 19
-	require.Len(t, metricFamily.Metric, 1+(len(fakeGPUIDs)*2))
-	for _, mv := range metricFamily.Metric {
-		require.NotNil(t, mv.Gauge.Value)
-		if *(mv.Gauge.Value) == 0 {
-			// We don't inject XID errors into the hardware GPU, so we do not expect XID label
-			assert.Len(t, mv.Label, 7)
-			assert.False(t, slices.ContainsFunc(mv.Label, func(lp *io_prometheus_client.LabelPair) bool {
-				return ptr.Deref(lp.Name, "") == "xid"
-			}))
-			continue
-		}
-		assert.Len(t, mv.Label, 9)
-		assert.Equal(t, "gpu", *mv.Label[0].Name)
-		assert.Equal(t, "UUID", *mv.Label[1].Name)
-		assert.Equal(t, "pci_bus_id", *mv.Label[2].Name)
-		assert.NotEmpty(t, *mv.Label[2].Value)
-		assert.Equal(t, "device", *mv.Label[3].Name)
-		assert.Equal(t, "modelName", *mv.Label[4].Name)
-		assert.Equal(t, "Hostname", *mv.Label[5].Name)
-		assert.Equal(t, "DCGM_FI_DRIVER_VERSION", *mv.Label[6].Name)
-		assert.Equal(t, "window_size_in_ms", *mv.Label[7].Name)
-		assert.Equal(t, "xid", *mv.Label[8].Name)
-		assert.NotEmpty(t, *mv.Label[8].Value)
-	}
-}
-
-func filterMetrics(metricValues []Metric, condition func(Metric) bool) []Metric {
-	var result []Metric
-	for _, metricValue := range metricValues {
-		if condition(metricValue) {
-			result = append(result, metricValue)
-		}
-	}
-	return result
-}
-
-func TestXIDCollector_NewXIDCollector(t *testing.T) {
-	config := &Config{
-		GPUDevices: DeviceOptions{
-			Flex:       true,
-			MajorRange: []int{-1},
-			MinorRange: []int{-1},
-		},
-	}
-
-	teardownTest := setupTest(t)
-	defer teardownTest(t)
-
-	allCounters := []Counter{
-		{
-			FieldID: dcgm.DCGM_FI_DEV_CLOCK_THROTTLE_REASONS,
-		},
-	}
-
-	fieldEntityGroupTypeSystemInfo := NewEntityGroupTypeSystemInfo(allCounters, config)
-	err := fieldEntityGroupTypeSystemInfo.Load(dcgm.FE_GPU)
-	require.NoError(t, err)
-
-	item, _ := fieldEntityGroupTypeSystemInfo.Get(dcgm.FE_GPU)
-
-	t.Run("Should Return Error When DCGM_EXP_XID_ERRORS_COUNT is not present", func(t *testing.T) {
-		records := [][]string{
-			{"DCGM_FI_DRIVER_VERSION", "label", "Driver Version"},
-		}
-		cc, err := extractCounters(records, config)
-		require.NoError(t, err)
-		require.Len(t, cc.ExporterCounters, 0)
-		require.Len(t, cc.DCGMCounters, 1)
-
-		xidCollector, err := NewXIDCollector(cc.DCGMCounters, "", config, item)
-		require.Error(t, err)
-		require.Nil(t, xidCollector)
-	})
-
-	t.Run("Should Return Error When Counters Param Is Empty", func(t *testing.T) {
-		counters := make([]Counter, 0)
-		xidCollector, err := NewXIDCollector(counters, "", config, item)
-		require.Error(t, err)
-		require.Nil(t, xidCollector)
-	})
-
-	t.Run("Should Not Return Error When DCGM_EXP_XID_ERRORS_COUNT Present More Than Once", func(t *testing.T) {
-		records := [][]string{
-			{"DCGM_FI_DRIVER_VERSION", "label", "Driver Version"},
-			{"DCGM_EXP_XID_ERRORS_COUNT", "gauge", "Count of XID Errors within user-specified time window (see xid-count-window-size param)."},
-			{"DCGM_EXP_XID_ERRORS_COUNT", "gauge", "Count of XID Errors within user-specified time window (see xid-count-window-size param)."},
-			{"DCGM_EXP_XID_ERRORS_COUNT", "gauge", "Count of XID Errors within user-specified time window (see xid-count-window-size param)."},
-		}
-		cc, err := extractCounters(records, config)
-		require.NoError(t, err)
-		for i := range cc.DCGMCounters {
-			if cc.DCGMCounters[i].PromType == "label" {
-				cc.ExporterCounters = append(cc.ExporterCounters, cc.DCGMCounters[i])
-			}
-		}
-		xidCollector, err := NewXIDCollector(cc.ExporterCounters, "", config, item)
-		require.NoError(t, err)
-		require.NotNil(t, xidCollector)
-	})
-}
diff --git a/scripts/test_coverage.sh b/scripts/test_coverage.sh
index db49bd43..e7859be5 100644
--- a/scripts/test_coverage.sh
+++ b/scripts/test_coverage.sh
@@ -24,11 +24,30 @@ go test $(go list ./... | grep -v "/tests/e2e/") \
   -coverprofile=unit_coverage.out \
   -json > test_results.json
 
+if [ $? -ne 0 ]; then
+  echo "Unit tests failed."
+  exit 1
+fi
+
+echo "Running integration tests..."
+go test ./internal/pkg/integration_test/... \
+  -count=1 \
+  -timeout 5m \
+  -covermode=count \
+  -coverpkg=./internal/pkg/... \
+  -coverprofile=integration_coverage.out \
+  -json >> test_results.json
+
+if [ $? -ne 0 ]; then
+  echo "Integration tests failed."
+  exit 1
+fi
+
 echo "Merging coverage profiles..."
-gocovmerge unit_coverage.out > combined_coverage.out.tmp
+gocovmerge unit_coverage.out integration_coverage.out > combined_coverage.out.tmp
 
 # Remove mocks from coverage
 cat combined_coverage.out.tmp | grep -v "mock_" > tests.cov
 
 # Cleanup
-rm combined_coverage.out.tmp unit_coverage.out
+rm combined_coverage.out.tmp integration_coverage.out unit_coverage.out
\ No newline at end of file
diff --git a/service-monitor.yaml b/service-monitor.yaml
index 57aaa800..e70dc90a 100644
--- a/service-monitor.yaml
+++ b/service-monitor.yaml
@@ -18,12 +18,12 @@ metadata:
   name: "dcgm-exporter"
   labels:
     app.kubernetes.io/name: "dcgm-exporter"
-    app.kubernetes.io/version: "3.6.1"
+    app.kubernetes.io/version: "4.0.0"
 spec:
   selector:
     matchLabels:
       app.kubernetes.io/name: "dcgm-exporter"
-      app.kubernetes.io/version: "3.6.1"
+      app.kubernetes.io/version: "4.0.0"
   endpoints:
   - port: "metrics"
     path: "/metrics"
diff --git a/tests/e2e/Makefile b/tests/e2e/Makefile
index ae9a6411..6607eb2a 100644
--- a/tests/e2e/Makefile
+++ b/tests/e2e/Makefile
@@ -16,8 +16,10 @@ GO_CMD ?= go
 NAMESPACE ?= "dcgm-exporter"
 CHART ?= "./../../deployment/"
 IMAGE_REPOSITORY ?= "nvcr.io/nvidia/k8s/dcgm-exporter"
-IMAGE_TAG ?= "3.3.9-3.6.1-ubuntu22.04"
+IMAGE_TAG ?= "4.0.0-4.0.0-ubuntu22.04"
 KUBECONFIG ?= "~/.kube/config"
+RUNTIME_CLASS ?= ""
+NO_CLEANUP ?= "false"
 
 define TEST_CMD
 	@if [ -z ${KUBECONFIG} ]; then \
@@ -32,7 +34,9 @@ define TEST_CMD
 		-chart="$(CHART)" \
 		-namespace=$(NAMESPACE) \
 		-image-repository=$(IMAGE_REPOSITORY) \
-		-image-tag=$(IMAGE_TAG)
+		-image-tag=$(IMAGE_TAG) \
+		-runtime-class=$(RUNTIME_CLASS) \
+		-no-cleanup=$(NO_CLEANUP)
 endef
 
 .PHONY: e2e-test
@@ -45,5 +49,23 @@ e2e-test-no-profiling:
 	@$(TEST_CMD) \
 		-arguments="{-f=/etc/dcgm-exporter/default-counters.csv}"
 
+.PHONY: e2e-basic-auth
+e2e-basic-auth:
+	@$(TEST_CMD) \
+		-arguments="{-f=/etc/dcgm-exporter/default-counters.csv}" \
+		--ginkgo.label-filter=basicAuth
+
+.PHONY: e2e-tls
+e2e-tls:
+	@$(TEST_CMD) \
+		-arguments="{-f=/etc/dcgm-exporter/default-counters.csv}" \
+		--ginkgo.label-filter=tls
+
+.PHONY: e2e-default
+e2e-default:
+	@$(TEST_CMD) \
+		-arguments="{-f=/etc/dcgm-exporter/default-counters.csv}" \
+		--ginkgo.label-filter=default
+
 binary:
-	go test -c --tags="e2e" .
\ No newline at end of file
+	go test -c --tags="e2e" .
diff --git a/tests/e2e/e2e_actions_test.go b/tests/e2e/e2e_actions_test.go
index 40e6b15c..2c879fcb 100644
--- a/tests/e2e/e2e_actions_test.go
+++ b/tests/e2e/e2e_actions_test.go
@@ -21,13 +21,16 @@ import (
 	"context"
 	"fmt"
 	"os"
+	"time"
 
-	"github.com/NVIDIA/dcgm-exporter/tests/e2e/internal/framework"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
+	corev1 "k8s.io/api/core/v1"
 	"k8s.io/client-go/rest"
 	restclient "k8s.io/client-go/rest"
 	"k8s.io/client-go/tools/clientcmd"
+
+	"github.com/NVIDIA/dcgm-exporter/tests/e2e/internal/framework"
 )
 
 func shouldCreateK8SConfig() *restclient.Config {
@@ -45,10 +48,10 @@ func shouldResolvePath() {
 }
 
 func shouldCreateNamespace(ctx context.Context, kubeClient *framework.KubeClient, labels map[string]string) {
-	_, _ = fmt.Fprintf(GinkgoWriter, "Creating namespace: %q started.\n", testContext.namespace)
+	By(fmt.Sprintf("Creating namespace: %q started.", testContext.namespace))
 	_, err := kubeClient.CreateNamespace(ctx, testContext.namespace, labels)
 	Expect(err).ShouldNot(HaveOccurred(), "Creating namespace: failed")
-	_, _ = fmt.Fprintf(GinkgoWriter, "Creating namespace: %q completed\n", testContext.namespace)
+	By(fmt.Sprintf("Creating namespace: %q completed\n", testContext.namespace))
 }
 
 func shouldCreateKubeClient(config *rest.Config) *framework.KubeClient {
@@ -77,17 +80,17 @@ func shouldCreateHelmClient(config *rest.Config) *framework.HelmClient {
 
 func shouldUninstallHelmChart(helmClient *framework.HelmClient, helmReleaseName string) {
 	if helmClient != nil && helmReleaseName != "" {
-		_, _ = fmt.Fprintf(GinkgoWriter, "Helm chart uninstall: release %q of the helm chart: %q started.\n",
+		By(fmt.Sprintf("Helm chart uninstall: release %q of the helm chart: %q started.",
 			helmReleaseName,
-			testContext.chart)
+			testContext.chart))
 
 		err := helmClient.Uninstall(helmReleaseName)
 		if err != nil {
 			Fail(fmt.Sprintf("Helm chart uninstall: release: %s uninstall failed with error: %v", helmReleaseName, err))
 		} else {
-			_, _ = fmt.Fprintf(GinkgoWriter, "Helm chart uninstall: release %q of the helm chart: %q completed.\n",
+			By(fmt.Sprintf("Helm chart uninstall: release %q of the helm chart: %q completed.",
 				helmReleaseName,
-				testContext.chart)
+				testContext.chart))
 		}
 	}
 }
@@ -102,13 +105,98 @@ func shouldCleanupHelmClient(helmClient *framework.HelmClient) {
 }
 
 func shouldDeleteNamespace(ctx context.Context, kubeClient *framework.KubeClient) {
-	_, _ = fmt.Fprintf(GinkgoWriter, "Namespace deletion: %q namespace started.\n", testContext.namespace)
+	By(fmt.Sprintf("Namespace deletion: %q namespace started.", testContext.namespace))
 	if kubeClient != nil {
 		err := kubeClient.DeleteNamespace(ctx, testContext.namespace)
 		if err != nil {
-			Fail(fmt.Sprintf("Namespace deletion: Failed to delete namespace %q with error: %v", testContext.namespace, err))
+			Fail(fmt.Sprintf("Namespace deletion: Failed to delete namespace %q with error: %v", testContext.namespace,
+				err))
 		} else {
-			_, _ = fmt.Fprintf(GinkgoWriter, "Namespace deletion: %q namespace completed.\n", testContext.namespace)
+			By(fmt.Sprintf("Namespace deletion: %q namespace completed.\n", testContext.namespace))
 		}
 	}
 }
+
+func shouldCheckIfPodCreated(
+	ctx context.Context, kubeClient *framework.KubeClient, labels map[string]string,
+) *corev1.Pod {
+	By("Pod creation verification: started")
+
+	var dcgmExpPod *corev1.Pod
+
+	Eventually(func(ctx context.Context) bool {
+		pods, err := kubeClient.GetPodsByLabel(ctx, testContext.namespace, labels)
+		if err != nil {
+			Fail(fmt.Sprintf("Pod creation: Failed with error: %v", err))
+			return false
+		}
+
+		if len(pods) == 1 {
+			dcgmExpPod = &pods[0]
+			return true
+		}
+
+		return false
+	}).WithPolling(time.Second).Within(15 * time.Minute).WithContext(ctx).Should(BeTrue())
+
+	By("Pod creation verification: completed")
+
+	return dcgmExpPod
+}
+
+func getDefaultHelmValues() []string {
+	values := []string{
+		fmt.Sprintf("serviceMonitor.enabled=%v", false),
+	}
+
+	if testContext.arguments != "" {
+		values = append(values, fmt.Sprintf("arguments=%s", testContext.arguments))
+	}
+
+	if testContext.imageRepository != "" {
+		values = append(values, fmt.Sprintf("image.repository=%s", testContext.imageRepository))
+	}
+
+	if testContext.imageTag != "" {
+		values = append(values, fmt.Sprintf("image.tag=%s", testContext.imageTag))
+	}
+
+	if testContext.runtimeClass != "" {
+		values = append(values, fmt.Sprintf("runtimeClassName=%s", testContext.runtimeClass))
+	}
+
+	return values
+}
+
+func shouldCheckIfPodIsReady(ctx context.Context, kubeClient *framework.KubeClient, namespace, podName string) {
+	By("Checking pod status: started")
+	Eventually(func(ctx context.Context) bool {
+		isReady, err := kubeClient.CheckPodStatus(ctx,
+			namespace,
+			podName,
+			func(namespace, podName string, status corev1.PodStatus) (bool, error) {
+				for _, c := range status.Conditions {
+					if c.Type != corev1.PodReady {
+						continue
+					}
+					if c.Status == corev1.ConditionTrue {
+						return true, nil
+					}
+				}
+
+				for _, c := range status.ContainerStatuses {
+					if c.State.Waiting != nil && c.State.Waiting.Reason == "CrashLoopBackOff" {
+						return false, fmt.Errorf("pod %s in namespace %s is in CrashLoopBackOff", podName, namespace)
+					}
+				}
+
+				return false, nil
+			})
+		if err != nil {
+			Fail(fmt.Sprintf("Checking pod status: Failed with error: %v", err))
+		}
+
+		return isReady
+	}).WithPolling(time.Second).Within(15 * time.Minute).WithContext(ctx).Should(BeTrue())
+	By("Checking pod status: completed")
+}
diff --git a/tests/e2e/e2e_suite_test.go b/tests/e2e/e2e_suite_test.go
index 6e2c056c..a5831ed1 100644
--- a/tests/e2e/e2e_suite_test.go
+++ b/tests/e2e/e2e_suite_test.go
@@ -18,25 +18,19 @@
 package e2e
 
 import (
-	"bytes"
 	"context"
 	"fmt"
-	"slices"
-	"time"
 
-	corev1 "k8s.io/api/core/v1"
-	"k8s.io/utils/ptr"
+	. "github.com/onsi/ginkgo/v2"
 
 	"github.com/NVIDIA/dcgm-exporter/tests/e2e/internal/framework"
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-	"github.com/prometheus/common/expfmt"
 )
 
 const (
 	podLabel       = "pod"
 	namespaceLabel = "namespace"
 	containerLabel = "container"
+	e2eRunIDLabel  = "e2eRunID"
 
 	dcgmExporterPort = 9400
 
@@ -48,7 +42,10 @@ const (
 	workloadImage         = "nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda11.7.1-ubuntu20.04"
 )
 
-var expectedLabels = []string{podLabel, namespaceLabel, containerLabel}
+var (
+	expectedLabels        = []string{podLabel, namespaceLabel, containerLabel}
+	dcgmExporterPodLabels = map[string]string{dcgmExporterPodNameLabel: dcgmExporterPodNameLabelValue}
+)
 
 type testContextType struct {
 	kubeconfig      string
@@ -57,240 +54,62 @@ type testContextType struct {
 	imageTag        string
 	arguments       string
 	namespace       string
+	runtimeClass    string
+	noCleanup       bool
 }
 
 var _ = Describe("dcgm-exporter-e2e-suite", func() {
-	When("DCGM exporter is deployed on kubernetes", Ordered, func() {
+	Context("DCGM exporter is deployed on kubernetes", Ordered, func() {
 		// Init global suite vars
 		var (
-			kubeClient *framework.KubeClient
-			helmClient *framework.HelmClient
-
-			labels = map[string]string{
-				"e2eRunID": runID.String(),
+			kubeClient    *framework.KubeClient
+			helmClient    *framework.HelmClient
+			testRunLabels = map[string]string{
+				e2eRunIDLabel: runID.String(),
 			}
-
-			helmReleaseName string
-			dcgmExpPod      *corev1.Pod
-			workloadPod     *corev1.Pod
 		)
 
-		BeforeAll(func(ctx context.Context) {
-			if testContext.kubeconfig == "" {
-				_, _ = fmt.Fprintln(GinkgoWriter, "kubeconfig parameter is empty. Defaulting to ~/.kube/config")
-			}
-
-			if len(testContext.chart) == 0 {
-				Fail("chart parameter is empty")
-			}
-
-			shouldResolvePath()
-
-			kubeConfigShouldExists()
+		if testContext.kubeconfig == "" {
+			_, _ = fmt.Fprintln(GinkgoWriter, "kubeconfig parameter is empty. Defaulting to ~/.kube/config")
+		}
 
-			k8sConfig := shouldCreateK8SConfig()
+		if len(testContext.chart) == 0 {
+			Fail("chart parameter is empty")
+		}
 
-			kubeClient = shouldCreateKubeClient(k8sConfig)
+		shouldResolvePath()
 
-			helmClient = shouldCreateHelmClient(k8sConfig)
-		})
-
-		AfterAll(func(ctx context.Context) {
-			_, _ = fmt.Fprintln(GinkgoWriter, "Clean up: starting")
+		kubeConfigShouldExists()
 
-			shouldUninstallHelmChart(helmClient, helmReleaseName)
-			shouldCleanupHelmClient(helmClient)
+		k8sConfig := shouldCreateK8SConfig()
 
-			shouldDeleteNamespace(ctx, kubeClient)
+		kubeClient = shouldCreateKubeClient(k8sConfig)
 
-			_, _ = fmt.Fprintln(GinkgoWriter, "Clean up: completed")
-		})
+		helmClient = shouldCreateHelmClient(k8sConfig)
 
-		It("should create namespace", func(ctx context.Context) {
-			shouldCreateNamespace(ctx, kubeClient, labels)
+		BeforeAll(func(ctx context.Context) {
+			shouldCreateNamespace(ctx, kubeClient, testRunLabels)
 		})
 
-		It("should install dcgm-exporter helm chart", func(ctx context.Context) {
-			_, _ = fmt.Fprintf(GinkgoWriter, "Helm chart installation: %q chart started.\n",
-				testContext.chart)
-
-			values := []string{
-				fmt.Sprintf("serviceMonitor.enabled=%v", false),
-			}
-
-			if testContext.arguments != "" {
-				values = append(values, fmt.Sprintf("arguments=%s", testContext.arguments))
-			}
-
-			if testContext.imageRepository != "" {
-				values = append(values, fmt.Sprintf("image.repository=%s", testContext.imageRepository))
-			}
-			if testContext.imageTag != "" {
-				values = append(values, fmt.Sprintf("image.tag=%s", testContext.imageTag))
+		AfterAll(func(ctx context.Context) {
+			if testContext.noCleanup {
+				_, _ = fmt.Fprintln(GinkgoWriter, "Clean up: skipped")
+				Skip("Clean up skipped, by user request")
 			}
 
-			var err error
-
-			helmReleaseName, err = helmClient.Install(ctx, values, framework.HelmChartOptions{
-				CleanupOnFail: true,
-				GenerateName:  true,
-				Timeout:       5 * time.Minute,
-				Wait:          true,
-				DryRun:        false,
-			})
-			Expect(err).ShouldNot(HaveOccurred(), "Helm chart installation: %q chart failed with error err: %v", testContext.chart, err)
-
-			_, _ = fmt.Fprintf(GinkgoWriter, "Helm chart installation: %q completed.\n",
-				testContext.chart)
-			_, _ = fmt.Fprintf(GinkgoWriter, "Helm chart installation: new %q release name.\n",
-				helmReleaseName)
-		})
-
-		labelMap := map[string]string{dcgmExporterPodNameLabel: dcgmExporterPodNameLabelValue}
-
-		It("should create dcgm-exporter pod", func(ctx context.Context) {
-			_, _ = fmt.Fprintln(GinkgoWriter, "Pod creation verification: started")
-
-			Eventually(func(ctx context.Context) bool {
-				pods, err := kubeClient.GetPodsByLabel(ctx, testContext.namespace, labelMap)
-				if err != nil {
-					Fail(fmt.Sprintf("Pod creation: Failed with error: %v", err))
-					return false
-				}
-
-				if len(pods) == 1 {
-					dcgmExpPod = &pods[0]
-					return true
-				}
-
-				return false
-			}).WithPolling(time.Second).Within(15 * time.Minute).WithContext(ctx).Should(BeTrue())
-
-			_, _ = fmt.Fprintln(GinkgoWriter, "Pod creation verification: completed")
-		})
-
-		It("should ensure that the dcgm-exporter pod is ready", func(ctx context.Context) {
-			_, _ = fmt.Fprintln(GinkgoWriter, "Checking pod status: started")
-			Eventually(func(ctx context.Context) bool {
-				isReady, err := kubeClient.CheckPodStatus(ctx,
-					testContext.namespace,
-					dcgmExpPod.Name,
-					func(namespace, podName string, status corev1.PodStatus) (bool, error) {
-						for _, c := range status.Conditions {
-							if c.Type != corev1.PodReady {
-								continue
-							}
-							if c.Status == corev1.ConditionTrue {
-								return true, nil
-							}
-						}
-
-						for _, c := range status.ContainerStatuses {
-							if c.State.Waiting != nil && c.State.Waiting.Reason == "CrashLoopBackOff" {
-								return false, fmt.Errorf("pod %s in namespace %s is in CrashLoopBackOff", podName, namespace)
-							}
-						}
-
-						return false, nil
-					})
-				if err != nil {
-					Fail(fmt.Sprintf("Checking pod status: Failed with error: %v", err))
-				}
-
-				return isReady
-			}).WithPolling(time.Second).Within(15 * time.Minute).WithContext(ctx).Should(BeTrue())
-			_, _ = fmt.Fprintln(GinkgoWriter, "Checking pod status: completed")
-		})
-
-		It("should create a workload pod", func(ctx context.Context) {
-			_, _ = fmt.Fprintln(GinkgoWriter, "Workload pod creation: started")
-
-			var err error
+			By("Clean up: starting")
 
-			workloadPod, err = kubeClient.CreatePod(ctx,
-				testContext.namespace,
-				labels,
-				workloadPodName,
-				workloadContainerName,
-				workloadImage,
-			)
-
-			Expect(err).ShouldNot(HaveOccurred(),
-				"Workload pod creation: Failed create workload pod with err: %v", err)
-			Eventually(func(ctx context.Context) bool {
-				isReady, err := kubeClient.CheckPodStatus(ctx,
-					testContext.namespace,
-					workloadPod.Name, func(namespace, podName string, status corev1.PodStatus) (bool, error) {
-						return status.Phase == corev1.PodSucceeded, nil
-					})
-				if err != nil {
-					Fail(fmt.Sprintf("Workload pod creation: Checking pod status: Failed with error: %v", err))
-				}
-
-				return isReady
-			}).WithPolling(time.Second).Within(15 * time.Minute).WithContext(ctx).Should(BeTrue())
-
-			_, _ = fmt.Fprintln(GinkgoWriter, "Workload pod creation: completed")
-		})
-
-		It("should wait for 30 seconds, to read metrics", func() {
-			time.Sleep(30 * time.Second)
-		})
-
-		var metricsResponse []byte
-
-		It("should read metrics", func(ctx context.Context) {
-			_, _ = fmt.Fprintln(GinkgoWriter, "Read metrics: started")
-
-			Eventually(func(ctx context.Context) bool {
-				var err error
+			shouldCleanupHelmClient(helmClient)
 
-				metricsResponse, err = kubeClient.DoHttpRequest(ctx,
-					testContext.namespace,
-					dcgmExpPod.Name,
-					dcgmExporterPort,
-					"metrics")
-				if err != nil {
-					Fail(fmt.Sprintf("Read metrics: Failed with error: %v", err))
-				}
+			shouldDeleteNamespace(ctx, kubeClient)
 
-				return len(metricsResponse) > 0
-			}).WithPolling(time.Second).Within(time.Minute).WithContext(ctx).Should(BeTrue())
-			_, _ = fmt.Fprintln(GinkgoWriter, "Read metrics: completed")
+			By("Clean up: completed")
 		})
 
-		It("should verify metrics", func(ctx context.Context) {
-			Expect(metricsResponse).ShouldNot(BeEmpty())
+		VerifyDefaultHelmConfiguration(kubeClient, helmClient, testRunLabels)
 
-			var parser expfmt.TextParser
-			metricFamilies, err := parser.TextToMetricFamilies(bytes.NewReader(metricsResponse))
-			Expect(err).ShouldNot(HaveOccurred())
-			Expect(len(metricFamilies)).Should(BeNumerically(">", 0))
+		VerifyHelmConfigurationWhenTLSEnabled(kubeClient, helmClient, testRunLabels)
 
-			for _, metricFamily := range metricFamilies {
-				Expect(metricFamily).ShouldNot(BeNil())
-				metrics := metricFamily.GetMetric()
-				Expect(metrics).ShouldNot(BeNil())
-
-				// Each metric must have namespace, pod and container labels
-				for _, metric := range metrics {
-					var actualLabels []string
-					for _, label := range metric.Label {
-						labelName := ptr.Deref(label.Name, "")
-						if slices.Contains(expectedLabels, labelName) {
-							actualLabels = append(actualLabels, labelName)
-							Expect(label.Value).ShouldNot(BeNil())
-							Expect(ptr.Deref(label.Value, "")).ShouldNot(BeEmpty(), "The %s metric contains a label named %q label with empty value.",
-								ptr.Deref(metricFamily.Name, ""),
-								labelName,
-							)
-						}
-					}
-					Expect(len(actualLabels)).Should(Equal(len(expectedLabels)),
-						"Metric %s doesn't contains expected labels: %v, actual labels: %v",
-						ptr.Deref(metricFamily.Name, ""), expectedLabels, metric.Label)
-				}
-			}
-		})
+		VerifyHelmConfigurationWhenHttpBasicAuthEnabled(kubeClient, helmClient, testRunLabels)
 	})
 })
diff --git a/tests/e2e/e2e_verify_default_configuration_test.go b/tests/e2e/e2e_verify_default_configuration_test.go
new file mode 100644
index 00000000..c96b8816
--- /dev/null
+++ b/tests/e2e/e2e_verify_default_configuration_test.go
@@ -0,0 +1,178 @@
+//go:build e2e
+
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package e2e
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"slices"
+	"time"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+	"github.com/prometheus/common/expfmt"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/utils/ptr"
+
+	"github.com/NVIDIA/dcgm-exporter/tests/e2e/internal/framework"
+)
+
+// VerifyDefaultHelmConfiguration tests the helm chart with default configuration
+var VerifyDefaultHelmConfiguration = func(
+	kubeClient *framework.KubeClient,
+	helmClient *framework.HelmClient,
+	testRunLabels map[string]string,
+) bool {
+	return Context("and uses a default helm configuration", Label("default"), func() {
+		var (
+			helmReleaseName string
+			dcgmExpPod      *corev1.Pod
+			workloadPod     *corev1.Pod
+		)
+
+		AfterAll(func(ctx context.Context) {
+			shouldUninstallHelmChart(helmClient, helmReleaseName)
+		})
+
+		It("should install dcgm-exporter helm chart", func(ctx context.Context) {
+			By(fmt.Sprintf("Helm chart installation: %q chart started.",
+				testContext.chart))
+
+			values := getDefaultHelmValues()
+
+			var err error
+
+			helmReleaseName, err = helmClient.Install(ctx, framework.HelmChartOptions{
+				CleanupOnFail: true,
+				GenerateName:  true,
+				Timeout:       5 * time.Minute,
+				Wait:          true,
+				DryRun:        false,
+			}, framework.WithValues(values...))
+			Expect(err).ShouldNot(HaveOccurred(), "Helm chart installation: %q chart failed with error err: %v",
+				testContext.chart, err)
+
+			By(fmt.Sprintf("Helm chart installation: %q completed.",
+				testContext.chart))
+			By(fmt.Sprintf("Helm chart installation: new %q release name.",
+				helmReleaseName))
+		})
+
+		It("should create dcgm-exporter pod", func(ctx context.Context) {
+			dcgmExpPod = shouldCheckIfPodCreated(ctx, kubeClient, dcgmExporterPodLabels)
+		})
+
+		It("should ensure that the dcgm-exporter pod is ready", func(ctx context.Context) {
+			shouldCheckIfPodIsReady(ctx, kubeClient, dcgmExpPod.Namespace, dcgmExpPod.Name)
+		})
+
+		It("should create a workload pod", func(ctx context.Context) {
+			_, _ = fmt.Fprintln(GinkgoWriter, "Workload pod creation: started")
+
+			var err error
+
+			workloadPod, err = kubeClient.CreatePod(ctx,
+				testContext.namespace,
+				testRunLabels,
+				workloadPodName,
+				workloadContainerName,
+				workloadImage,
+				testContext.runtimeClass,
+			)
+
+			Expect(err).ShouldNot(HaveOccurred(),
+				"Workload pod creation: Failed create workload pod with err: %v", err)
+			Eventually(func(ctx context.Context) bool {
+				isReady, err := kubeClient.CheckPodStatus(ctx,
+					testContext.namespace,
+					workloadPod.Name, func(namespace, podName string, status corev1.PodStatus) (bool, error) {
+						return status.Phase == corev1.PodSucceeded, nil
+					})
+				if err != nil {
+					Fail(fmt.Sprintf("Workload pod creation: Checking pod status: Failed with error: %v", err))
+				}
+
+				return isReady
+			}).WithPolling(time.Second).Within(15 * time.Minute).WithContext(ctx).Should(BeTrue())
+
+			By("Workload pod creation: completed")
+		})
+
+		It("should wait for 30 seconds, to read metrics", func() {
+			time.Sleep(30 * time.Second)
+		})
+
+		var metricsResponse []byte
+
+		It("should read metrics", func(ctx context.Context) {
+			_, _ = fmt.Fprintln(GinkgoWriter, "Read metrics: started")
+
+			Eventually(func(ctx context.Context) bool {
+				var err error
+
+				metricsResponse, err = kubeClient.DoHTTPRequest(ctx,
+					testContext.namespace,
+					dcgmExpPod.Name,
+					dcgmExporterPort,
+					"metrics")
+				if err != nil {
+					Fail(fmt.Sprintf("Read metrics: Failed with error: %v", err))
+				}
+
+				return len(metricsResponse) > 0
+			}).WithPolling(time.Second).Within(time.Minute).WithContext(ctx).Should(BeTrue())
+			_, _ = fmt.Fprintln(GinkgoWriter, "Read metrics: completed")
+		})
+
+		It("should verify metrics", func(ctx context.Context) {
+			Expect(metricsResponse).ShouldNot(BeEmpty())
+
+			var parser expfmt.TextParser
+			metricFamilies, err := parser.TextToMetricFamilies(bytes.NewReader(metricsResponse))
+			Expect(err).ShouldNot(HaveOccurred())
+			Expect(len(metricFamilies)).Should(BeNumerically(">", 0))
+
+			for _, metricFamily := range metricFamilies {
+				Expect(metricFamily).ShouldNot(BeNil())
+				metrics := metricFamily.GetMetric()
+				Expect(metrics).ShouldNot(BeNil())
+
+				// Each metric must have namespace, pod and container labels
+				for _, metric := range metrics {
+					var actualLabels []string
+					for _, label := range metric.Label {
+						labelName := ptr.Deref(label.Name, "")
+						if slices.Contains(expectedLabels, labelName) {
+							actualLabels = append(actualLabels, labelName)
+							Expect(label.Value).ShouldNot(BeNil())
+							Expect(ptr.Deref(label.Value, "")).ShouldNot(BeEmpty(),
+								"The %s metric contains a label named %q label with empty value.",
+								ptr.Deref(metricFamily.Name, ""),
+								labelName,
+							)
+						}
+					}
+					Expect(len(actualLabels)).Should(Equal(len(expectedLabels)),
+						"Metric %s doesn't contains expected labels: %v, actual labels: %v",
+						ptr.Deref(metricFamily.Name, ""), expectedLabels, metric.Label)
+				}
+			}
+		})
+	})
+}
diff --git a/tests/e2e/e2e_verify_http_basic_auth_test.go b/tests/e2e/e2e_verify_http_basic_auth_test.go
new file mode 100644
index 00000000..4dcedc9f
--- /dev/null
+++ b/tests/e2e/e2e_verify_http_basic_auth_test.go
@@ -0,0 +1,134 @@
+//go:build e2e
+
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package e2e
+
+import (
+	"context"
+	"crypto/tls"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"time"
+
+	"github.com/NVIDIA/dcgm-exporter/tests/e2e/internal/framework"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+	corev1 "k8s.io/api/core/v1"
+)
+
+// VerifyHelmConfigurationWhenHttpBasicAuthEnabled tests helm chart when Http basic authentication is enabled
+var VerifyHelmConfigurationWhenHttpBasicAuthEnabled = func(kubeClient *framework.KubeClient,
+	helmClient *framework.HelmClient,
+	testRunLabels map[string]string,
+) bool {
+	return Context("and HTTP basic auth is enabled", Label("basicAuth"), func() {
+		var (
+			helmReleaseName string
+			dcgmExpPod      *corev1.Pod
+		)
+
+		AfterAll(func(ctx context.Context) {
+			shouldUninstallHelmChart(helmClient, helmReleaseName)
+		})
+
+		userName := "alice"
+		userPassword := "Pa$$w0rd"
+
+		It("should install dcgm-exporter helm chart", func(ctx context.Context) {
+			By(fmt.Sprintf("Helm chart installation: %q chart started.",
+				testContext.chart))
+
+			values := getDefaultHelmValues()
+
+			var jsonValues []string
+
+			type basicAuth struct {
+				Users map[string]string `json:"users"`
+			}
+
+			basicAuthValue := basicAuth{
+				Users: map[string]string{
+					userName: userPassword,
+				},
+			}
+			basicAuthValueJson, err := json.Marshal(basicAuthValue)
+			Expect(err).ShouldNot(HaveOccurred())
+
+			jsonValues = append(jsonValues, fmt.Sprintf("basicAuth=%s", string(basicAuthValueJson)))
+
+			helmReleaseName, err = helmClient.Install(ctx, framework.HelmChartOptions{
+				CleanupOnFail: true,
+				GenerateName:  true,
+				Timeout:       5 * time.Minute,
+				Wait:          true,
+				DryRun:        false,
+			}, framework.WithValues(values...), framework.WithJSONValues(jsonValues...))
+			Expect(err).ShouldNot(HaveOccurred(), "Helm chart installation: %q chart failed with error err: %v", testContext.chart, err)
+
+			By(fmt.Sprintf("Helm chart installation: %q completed.",
+				testContext.chart))
+			By(fmt.Sprintf("Helm chart installation: new %q release name.",
+				helmReleaseName))
+		})
+
+		It("should create dcgm-exporter pod", func(ctx context.Context) {
+			dcgmExpPod = shouldCheckIfPodCreated(ctx, kubeClient, dcgmExporterPodLabels)
+		})
+
+		It("should ensure that the dcgm-exporter pod is ready", func(ctx context.Context) {
+			shouldCheckIfPodIsReady(ctx, kubeClient, dcgmExpPod.Namespace, dcgmExpPod.Name)
+		})
+
+		It("should check that the metric endpoint protected by basic HTTP auth", func(ctx context.Context) {
+			ctx, cancel := context.WithCancel(ctx)
+			defer cancel()
+			kubeClient.ErrWriter = GinkgoWriter
+			kubeClient.OutWriter = GinkgoWriter
+			localPort, err := kubeClient.PortForward(ctx, dcgmExpPod.Namespace, dcgmExpPod.Name, 9400)
+			Expect(err).ShouldNot(HaveOccurred())
+			Expect(localPort).Should(BeNumerically(">", 0))
+			httpClient := &http.Client{
+				Timeout: 5 * time.Second,
+				Transport: &http.Transport{
+					TLSClientConfig: &tls.Config{
+						InsecureSkipVerify: true,
+					},
+				},
+			}
+
+			By("Ensure that HTTP request returns 401 error when no credentials provided")
+			resp, err := httpClient.Get(fmt.Sprintf("http://localhost:%d/metrics", localPort))
+			Expect(err).ShouldNot(HaveOccurred())
+			Expect(resp.StatusCode).To(Equal(401))
+			body, err := io.ReadAll(resp.Body)
+			Expect(err).NotTo(HaveOccurred())
+			Expect(string(body)).To(ContainSubstring("Unauthorized"))
+
+			By("Ensure that HTTP request returns 200 error")
+			req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("http://localhost:%d/metrics", localPort), http.NoBody)
+			Expect(err).ShouldNot(HaveOccurred())
+			req.SetBasicAuth(userName, userPassword)
+			resp, err = httpClient.Do(req)
+			Expect(err).ShouldNot(HaveOccurred())
+			Expect(resp.StatusCode).To(Equal(200))
+			_, err = io.ReadAll(resp.Body)
+			Expect(err).NotTo(HaveOccurred())
+		})
+	})
+}
diff --git a/tests/e2e/e2e_verify_tls_test.go b/tests/e2e/e2e_verify_tls_test.go
new file mode 100644
index 00000000..41f4ed7b
--- /dev/null
+++ b/tests/e2e/e2e_verify_tls_test.go
@@ -0,0 +1,118 @@
+//go:build e2e
+
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package e2e
+
+import (
+	"context"
+	"crypto/tls"
+	"fmt"
+	"io"
+	"net/http"
+	"time"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+	corev1 "k8s.io/api/core/v1"
+
+	"github.com/NVIDIA/dcgm-exporter/tests/e2e/internal/framework"
+)
+
+// VerifyHelmConfigurationWhenTLSEnabled tests configuration when TLS is enabled
+var VerifyHelmConfigurationWhenTLSEnabled = func(
+	kubeClient *framework.KubeClient,
+	helmClient *framework.HelmClient,
+	testRunLabels map[string]string,
+) bool {
+	return Context("and TLS is enabled", Label("tls"), func() {
+		var (
+			helmReleaseName string
+			dcgmExpPod      *corev1.Pod
+		)
+
+		AfterAll(func(ctx context.Context) {
+			shouldUninstallHelmChart(helmClient, helmReleaseName)
+		})
+
+		It("should install dcgm-exporter helm chart", func(ctx context.Context) {
+			By(fmt.Sprintf("Helm chart installation: %q chart started.",
+				testContext.chart))
+
+			values := getDefaultHelmValues()
+
+			values = append(values, "tlsServerConfig.enabled=true")
+
+			var err error
+
+			helmReleaseName, err = helmClient.Install(ctx, framework.HelmChartOptions{
+				CleanupOnFail: true,
+				GenerateName:  true,
+				Timeout:       5 * time.Minute,
+				Wait:          true,
+				DryRun:        false,
+			}, framework.WithValues(values...))
+			Expect(err).ShouldNot(HaveOccurred(), "Helm chart installation: %q chart failed with error err: %v",
+				testContext.chart, err)
+
+			By(fmt.Sprintf("Helm chart installation: %q completed.",
+				testContext.chart))
+			By(fmt.Sprintf("Helm chart installation: new %q release name.",
+				helmReleaseName))
+		})
+
+		It("should create dcgm-exporter pod", func(ctx context.Context) {
+			dcgmExpPod = shouldCheckIfPodCreated(ctx, kubeClient, dcgmExporterPodLabels)
+		})
+
+		It("should ensure that the dcgm-exporter pod is ready", func(ctx context.Context) {
+			shouldCheckIfPodIsReady(ctx, kubeClient, dcgmExpPod.Namespace, dcgmExpPod.Name)
+		})
+
+		It("should check that the port accepts TLS", func(ctx context.Context) {
+			ctx, cancel := context.WithCancel(ctx)
+			defer cancel()
+			kubeClient.ErrWriter = GinkgoWriter
+			kubeClient.OutWriter = GinkgoWriter
+			localPort, err := kubeClient.PortForward(ctx, dcgmExpPod.Namespace, dcgmExpPod.Name, 9400)
+			Expect(err).ShouldNot(HaveOccurred())
+			Expect(localPort).Should(BeNumerically(">", 0))
+			httpClient := &http.Client{
+				Timeout: 5 * time.Second,
+				Transport: &http.Transport{
+					TLSClientConfig: &tls.Config{
+						InsecureSkipVerify: true,
+					},
+				},
+			}
+
+			By("Ensure that HTTP request returns 400 error")
+			resp, err := httpClient.Get(fmt.Sprintf("http://localhost:%d/metrics", localPort))
+			Expect(err).ShouldNot(HaveOccurred())
+			Expect(resp.StatusCode).To(Equal(400))
+			body, err := io.ReadAll(resp.Body)
+			Expect(err).NotTo(HaveOccurred())
+			Expect(string(body)).To(ContainSubstring("Client sent an HTTP request to an HTTPS server"))
+
+			By("Ensure that HTTP request returns 200 error")
+			resp, err = httpClient.Get(fmt.Sprintf("https://localhost:%d/metrics", localPort))
+			Expect(err).ShouldNot(HaveOccurred())
+			Expect(resp.StatusCode).To(Equal(200))
+			_, err = io.ReadAll(resp.Body)
+			Expect(err).NotTo(HaveOccurred())
+		})
+	})
+}
diff --git a/tests/e2e/internal/framework/helm.go b/tests/e2e/internal/framework/helm.go
index 727771f1..49e8842f 100644
--- a/tests/e2e/internal/framework/helm.go
+++ b/tests/e2e/internal/framework/helm.go
@@ -111,10 +111,26 @@ type HelmChartOptions struct {
 	DryRun        bool
 }
 
+type HelmChartValueOption func(*helmValues.Options)
+
+func WithValues(values ...string) HelmChartValueOption {
+	return func(o *helmValues.Options) {
+		o.Values = values
+	}
+}
+
+func WithJSONValues(values ...string) HelmChartValueOption {
+	return func(o *helmValues.Options) {
+		o.JSONValues = values
+	}
+}
+
 // Install deploys the helm chart
-func (c *HelmClient) Install(ctx context.Context, params []string, chartOpts HelmChartOptions) (string, error) {
-	values := helmValues.Options{
-		Values: params,
+func (c *HelmClient) Install(ctx context.Context, chartOpts HelmChartOptions, valuesOptions ...HelmChartValueOption) (string, error) {
+	values := helmValues.Options{}
+
+	for _, valueOption := range valuesOptions {
+		valueOption(&values)
 	}
 
 	chartSpec := helm.ChartSpec{
@@ -136,7 +152,6 @@ func (c *HelmClient) Install(ctx context.Context, params []string, chartOpts Hel
 	}
 
 	res, err := c.client.InstallChart(ctx, &chartSpec, nil)
-
 	if err != nil {
 		return "", fmt.Errorf("error installing the chart; err: %w", err)
 	}
diff --git a/tests/e2e/internal/framework/kube.go b/tests/e2e/internal/framework/kube.go
index a9448c53..728a8f5d 100644
--- a/tests/e2e/internal/framework/kube.go
+++ b/tests/e2e/internal/framework/kube.go
@@ -19,6 +19,14 @@ package framework
 import (
 	"context"
 	"fmt"
+	"io"
+	"net"
+	"net/http"
+
+	"github.com/pkg/errors"
+	"k8s.io/client-go/transport/spdy"
+
+	"k8s.io/client-go/tools/portforward"
 
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
@@ -32,16 +40,25 @@ const nvidiaResourceName = "nvidia.com/gpu"
 
 // KubeClient is a kubernetes client
 type KubeClient struct {
-	client *kubernetes.Clientset
+	client     *kubernetes.Clientset
+	restConfig *rest.Config
+	OutWriter  io.Writer
+	ErrWriter  io.Writer
 }
 
 // NewKubeClient creates a new KubeClient instance
-func NewKubeClient(k8sConfig *rest.Config) (*KubeClient, error) {
-	client, err := kubernetes.NewForConfig(k8sConfig)
+func NewKubeClient(restConfig *rest.Config) (*KubeClient, error) {
+	client, err := kubernetes.NewForConfig(restConfig)
 	if err != nil {
 		return nil, err
 	}
-	return &KubeClient{client: client}, nil
+
+	return &KubeClient{
+		client:     client,
+		restConfig: restConfig,
+		OutWriter:  io.Discard,
+		ErrWriter:  io.Discard,
+	}, nil
 }
 
 // CreateNamespace creates a new namespace
@@ -70,7 +87,9 @@ func (c *KubeClient) DeleteNamespace(
 }
 
 // GetPodsByLabel returns a list of pods that matches with the label selector
-func (c *KubeClient) GetPodsByLabel(ctx context.Context, namespace string, labelMap map[string]string) ([]corev1.Pod, error) {
+func (c *KubeClient) GetPodsByLabel(ctx context.Context, namespace string, labelMap map[string]string) ([]corev1.Pod,
+	error,
+) {
 	podList, err := c.client.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
 		LabelSelector: labels.SelectorFromSet(labelMap).String(),
 	})
@@ -80,7 +99,9 @@ func (c *KubeClient) GetPodsByLabel(ctx context.Context, namespace string, label
 	return podList.Items, nil
 }
 
-func (c *KubeClient) CheckPodStatus(ctx context.Context,
+// CheckPodStatus check pod status
+func (c *KubeClient) CheckPodStatus(
+	ctx context.Context,
 	namespace, podName string,
 	condition func(namespace, podName string, status corev1.PodStatus) (bool, error),
 ) (bool, error) {
@@ -103,14 +124,23 @@ func (c *KubeClient) CheckPodStatus(ctx context.Context,
 }
 
 // CreatePod creates a new pod in the defined namespace
-func (c *KubeClient) CreatePod(ctx context.Context,
+func (c *KubeClient) CreatePod(
+	ctx context.Context,
 	namespace string,
 	labels map[string]string,
 	name string,
 	containerName string,
 	image string,
+	runtimeClassName string,
 ) (*corev1.Pod, error) {
+	// RuntimeClassName does not accept a reference to empty string, however nil is acceptable.
+	var runtimeClassNameRef *string
+	if runtimeClassName != "" {
+		runtimeClassNameRef = &runtimeClassName
+	}
+
 	quantity, _ := resource.ParseQuantity("1")
+
 	pod := &corev1.Pod{
 		ObjectMeta: metav1.ObjectMeta{
 			Name:      name,
@@ -118,7 +148,8 @@ func (c *KubeClient) CreatePod(ctx context.Context,
 			Labels:    labels,
 		},
 		Spec: corev1.PodSpec{
-			RestartPolicy: corev1.RestartPolicyNever,
+			RuntimeClassName: runtimeClassNameRef,
+			RestartPolicy:    corev1.RestartPolicyNever,
 			Containers: []corev1.Container{
 				{
 					Name:  containerName,
@@ -132,19 +163,22 @@ func (c *KubeClient) CreatePod(ctx context.Context,
 			},
 		},
 	}
+
 	return c.client.CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{})
 }
 
 // DeletePod deletes a pod in the defined namespace
-func (c *KubeClient) DeletePod(ctx context.Context,
+func (c *KubeClient) DeletePod(
+	ctx context.Context,
 	namespace string,
 	name string,
 ) error {
 	return c.client.CoreV1().Pods(namespace).Delete(ctx, name, metav1.DeleteOptions{})
 }
 
-// DoHttpRequest makes http request to path on the pod
-func (c *KubeClient) DoHttpRequest(ctx context.Context,
+// DoHTTPRequest makes http request to path on the pod
+func (c *KubeClient) DoHTTPRequest(
+	ctx context.Context,
 	namespace string,
 	name string,
 	port uint,
@@ -169,3 +203,53 @@ func (c *KubeClient) DoHttpRequest(ctx context.Context,
 
 	return rawResponse, nil
 }
+
+// PortForward turn on port forwarding for the pod
+func (c *KubeClient) PortForward(
+	ctx context.Context, namespace string,
+	podName string,
+	targetPort int,
+) (int, error) {
+	transport, upgrader, err := spdy.RoundTripperFor(c.restConfig)
+	if err != nil {
+		return -1, err
+	}
+
+	req := c.client.CoreV1().RESTClient().Post().
+		Resource("pods").
+		Namespace(namespace).
+		Name(podName).
+		SubResource("portforward")
+
+	dialer := spdy.NewDialer(upgrader, &http.Client{Transport: transport}, "POST", req.URL())
+
+	// random select a unused port using port number 0
+	ln, err := net.Listen("tcp", "localhost:0")
+	if err != nil {
+		return -1, err
+	}
+
+	localPort := ln.Addr().(*net.TCPAddr).Port
+	ln.Close()
+
+	fw, err := portforward.New(dialer, []string{fmt.Sprintf("%d:%d", localPort, targetPort)}, ctx.Done(),
+		make(chan struct{}),
+		c.OutWriter,
+		c.ErrWriter)
+	if err != nil {
+		return -1, err
+	}
+
+	errCh := make(chan error, 1)
+	go func() {
+		errCh <- fw.ForwardPorts()
+	}()
+
+	select {
+	case err = <-errCh:
+		return -1, errors.Wrap(err, "port forwarding failed")
+	case <-fw.Ready:
+	}
+
+	return localPort, nil
+}
diff --git a/tests/e2e/main_test.go b/tests/e2e/main_test.go
index e0850d63..37f482e3 100644
--- a/tests/e2e/main_test.go
+++ b/tests/e2e/main_test.go
@@ -63,6 +63,16 @@ func TestMain(m *testing.M) {
 		"",
 		`DCGM-exporter command line arguments. Example: -arguments="{-f=/etc/dcgm-exporter/default-counters.csv}"`)
 
+	flag.BoolVar(&testContext.noCleanup,
+		"no-cleanup",
+		false,
+		`Skip clean up after tests execution`)
+
+	flag.StringVar(&testContext.runtimeClass,
+		"runtime-class",
+		"",
+		"Runtime Class to use for the DCGM-exporter deployment and workload pods")
+
 	flag.Parse()
 
 	os.Exit(m.Run())
diff --git a/tests/integration/start_read_test.go b/tests/integration/start_read_test.go
index ca97b3ac..a398af6b 100644
--- a/tests/integration/start_read_test.go
+++ b/tests/integration/start_read_test.go
@@ -38,7 +38,7 @@ func TestStartAndReadMetrics(t *testing.T) {
 	}
 	app := cmd.NewApp()
 	args := os.Args[0:1]
-	args = append(args, "-f=../../etc/default-counters.csv") // Append a file with default counters
+	args = append(args, "-f=./testdata/default-counters.csv") // Append a file with default counters
 	port := getRandomAvailablePort(t)
 	args = append(args, fmt.Sprintf("-a=:%d", port))
 	ctx, cancel := context.WithCancel(context.Background())
@@ -47,9 +47,6 @@ func TestStartAndReadMetrics(t *testing.T) {
 		require.NoError(t, err)
 	}(ctx)
 
-	t.Log("The dcgm-exporter is running, we wait for 30 seconds to read metrics")
-	<-time.After(30 * time.Second)
-
 	t.Logf("Read metrics from http://localhost:%d/metrics", port)
 
 	metricsResp, _ := retry.DoWithData(
diff --git a/tests/integration/start_with_tls_test.go b/tests/integration/start_with_tls_test.go
index 77af70ec..532cff5e 100644
--- a/tests/integration/start_with_tls_test.go
+++ b/tests/integration/start_with_tls_test.go
@@ -21,7 +21,7 @@ func TestStartWithTLSEnabledAndBasicAuth(t *testing.T) {
 	}
 	app := cmd.NewApp()
 	args := os.Args[0:1]
-	args = append(args, "-f=../../etc/default-counters.csv") // Append a file with default counters
+	args = append(args, "-f=./testdata/default-counters.csv") // Append a file with default counters
 	port := getRandomAvailablePort(t)
 	args = append(args, fmt.Sprintf("-a=:%d", port))
 	args = append(args, "--web-config-file=./testdata/web-config.yml")
@@ -59,7 +59,8 @@ func TestStartWithTLSEnabledAndBasicAuth(t *testing.T) {
 		}
 		status, err := retry.DoWithData(
 			func() (int, error) {
-				req := newRequestWithBasicAuth(t, "alice", "password", http.MethodGet, fmt.Sprintf("https://localhost:%d/metrics", port), nil)
+				req := newRequestWithBasicAuth(t, "alice", "password", http.MethodGet,
+					fmt.Sprintf("https://localhost:%d/metrics", port), nil)
 				resp, err := client.Do(req)
 				if err != nil {
 					return -1, err
@@ -84,7 +85,8 @@ func TestStartWithTLSEnabledAndBasicAuth(t *testing.T) {
 		}
 		status, err := retry.DoWithData(
 			func() (int, error) {
-				req := newRequestWithBasicAuth(t, "alice", "bad password", http.MethodGet, fmt.Sprintf("https://localhost:%d/metrics", port), nil)
+				req := newRequestWithBasicAuth(t, "alice", "bad password", http.MethodGet,
+					fmt.Sprintf("https://localhost:%d/metrics", port), nil)
 				resp, err := client.Do(req)
 				if err != nil {
 					return -1, err
diff --git a/tests/integration/testdata/default-counters.csv b/tests/integration/testdata/default-counters.csv
new file mode 100644
index 00000000..ab5e545d
--- /dev/null
+++ b/tests/integration/testdata/default-counters.csv
@@ -0,0 +1,77 @@
+# Format
+# If line starts with a '#' it is considered a comment
+# DCGM FIELD, Prometheus metric type, help message
+
+# Clocks
+DCGM_FI_DEV_SM_CLOCK,  gauge, SM clock frequency (in MHz).
+DCGM_FI_DEV_MEM_CLOCK, gauge, Memory clock frequency (in MHz).
+# DCGM_EXP_CLOCK_EVENTS_COUNT, gauge, Count of clock events within the user-specified time window (see clock-events-count-window-size param).
+
+# Temperature
+DCGM_FI_DEV_MEMORY_TEMP, gauge, Memory temperature (in C).
+DCGM_FI_DEV_GPU_TEMP,    gauge, GPU temperature (in C).
+
+# Power
+DCGM_FI_DEV_POWER_USAGE,              gauge, Power draw (in W).
+DCGM_FI_DEV_TOTAL_ENERGY_CONSUMPTION, counter, Total energy consumption since boot (in mJ).
+
+# PCIE
+# DCGM_FI_PROF_PCIE_TX_BYTES,  counter, Total number of bytes transmitted through PCIe TX via NVML.
+# DCGM_FI_PROF_PCIE_RX_BYTES,  counter, Total number of bytes received through PCIe RX via NVML.
+# DCGM_FI_DEV_PCIE_REPLAY_COUNTER, counter, Total number of PCIe retries.
+
+# Utilization (the sample period varies depending on the product)
+DCGM_FI_DEV_GPU_UTIL,      gauge, GPU utilization (in %).
+DCGM_FI_DEV_MEM_COPY_UTIL, gauge, Memory utilization (in %).
+DCGM_FI_DEV_ENC_UTIL,      gauge, Encoder utilization (in %).
+DCGM_FI_DEV_DEC_UTIL ,     gauge, Decoder utilization (in %).
+
+# Errors and violations
+DCGM_FI_DEV_XID_ERRORS,              gauge,   Value of the last XID error encountered.
+# DCGM_FI_DEV_POWER_VIOLATION,       counter, Throttling duration due to power constraints (in us).
+# DCGM_FI_DEV_THERMAL_VIOLATION,     counter, Throttling duration due to thermal constraints (in us).
+# DCGM_FI_DEV_SYNC_BOOST_VIOLATION,  counter, Throttling duration due to sync-boost constraints (in us).
+# DCGM_FI_DEV_BOARD_LIMIT_VIOLATION, counter, Throttling duration due to board limit constraints (in us).
+# DCGM_FI_DEV_LOW_UTIL_VIOLATION,    counter, Throttling duration due to low utilization (in us).
+# DCGM_FI_DEV_RELIABILITY_VIOLATION, counter, Throttling duration due to reliability constraints (in us).
+# DCGM_EXP_XID_ERRORS_COUNT,         gauge,   Count of XID Errors within user-specified time window (see xid-count-window-size param).
+# Memory usage
+DCGM_FI_DEV_FB_FREE, gauge, Frame buffer memory free (in MB).
+DCGM_FI_DEV_FB_USED, gauge, Frame buffer memory used (in MB).
+
+# ECC
+# DCGM_FI_DEV_ECC_SBE_VOL_TOTAL, counter, Total number of single-bit volatile ECC errors.
+# DCGM_FI_DEV_ECC_DBE_VOL_TOTAL, counter, Total number of double-bit volatile ECC errors.
+# DCGM_FI_DEV_ECC_SBE_AGG_TOTAL, counter, Total number of single-bit persistent ECC errors.
+# DCGM_FI_DEV_ECC_DBE_AGG_TOTAL, counter, Total number of double-bit persistent ECC errors.
+
+# Retired pages
+# DCGM_FI_DEV_RETIRED_SBE,     counter, Total number of retired pages due to single-bit errors.
+# DCGM_FI_DEV_RETIRED_DBE,     counter, Total number of retired pages due to double-bit errors.
+# DCGM_FI_DEV_RETIRED_PENDING, counter, Total number of pages pending retirement.
+
+# NVLink
+# DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_TOTAL, counter, Total number of NVLink flow-control CRC errors.
+# DCGM_FI_DEV_NVLINK_CRC_DATA_ERROR_COUNT_TOTAL, counter, Total number of NVLink data CRC errors.
+# DCGM_FI_DEV_NVLINK_REPLAY_ERROR_COUNT_TOTAL,   counter, Total number of NVLink retries.
+# DCGM_FI_DEV_NVLINK_RECOVERY_ERROR_COUNT_TOTAL, counter, Total number of NVLink recovery errors.
+DCGM_FI_DEV_NVLINK_BANDWIDTH_TOTAL,            counter, Total number of NVLink bandwidth counters for all lanes
+
+# VGPU License status
+DCGM_FI_DEV_VGPU_LICENSE_STATUS, gauge, vGPU License status
+
+# Remapped rows
+DCGM_FI_DEV_UNCORRECTABLE_REMAPPED_ROWS, counter, Number of remapped rows for uncorrectable errors
+DCGM_FI_DEV_CORRECTABLE_REMAPPED_ROWS,   counter, Number of remapped rows for correctable errors
+DCGM_FI_DEV_ROW_REMAP_FAILURE,           gauge,   Whether remapping of rows has failed
+
+# Static configuration information. These appear as labels on the other metrics
+DCGM_FI_DRIVER_VERSION,        label, Driver Version
+# DCGM_FI_NVML_VERSION,          label, NVML Version
+# DCGM_FI_DEV_BRAND,             label, Device Brand
+# DCGM_FI_DEV_SERIAL,            label, Device Serial Number
+# DCGM_FI_DEV_OEM_INFOROM_VER,   label, OEM inforom version
+# DCGM_FI_DEV_ECC_INFOROM_VER,   label, ECC inforom version
+# DCGM_FI_DEV_POWER_INFOROM_VER, label, Power management object inforom version
+# DCGM_FI_DEV_INFOROM_IMAGE_VER, label, Inforom image version
+# DCGM_FI_DEV_VBIOS_VERSION,     label, VBIOS version of the device