run-ai · gshaibi · Jul 16, 2024 · Jul 15, 2024 · Jul 15, 2024 · Jul 15, 2024
diff --git a/README.md b/README.md
@@ -2,40 +2,44 @@
 
 The purpose of the _fake GPU Operator_ or GPU Operator Simulator is to simulate the NVIDIA GPU Operator without a GPU. The software has been created by Run:ai in order to save money on actual machines in situations that do not require the GPU itself. This simulator:
 
-* Allows you to take a CPU-only node and externalize it as if it has 1 or more GPUs. 
-* Simulates all aspects of the NVIDIA GPU Operator including feature discovery, NVIDIA MIG and more. 
-* Emits metrics to Prometheus simulating actual GPUs
+* Allows a CPU-only node to be represented as if it has one or more GPUs.
+* Simulates all features of the NVIDIA GPU Operator, including feature discovery and NVIDIA MIG.
+* Emits metrics to Prometheus, simulating actual GPU behavior.
 
-You can configure the simulator to have any NVIDIA GPU topology, including type and amount of GPU memory. 
+You can configure the simulator to have any NVIDIA GPU topology, including the type and amount of GPU memory.
 
 
 
 ## Prerequisites
 
-The real Nvidia GPU Operator should not exist in the Kubernetes cluster
+Ensure that the real Nvidia GPU Operator is not present in the Kubernetes cluster.
 
 ## Installation
 
-Label the nodes you wish to have fake GPUs on, with the following labels:
+Assign the nodes you want to simulate GPUs on to a node pool by labeling them with the `run.ai/simulated-gpu-node-pool` label. For example:
 
-```
-kubectl label node <node-name> nvidia.com/gpu.deploy.device-plugin=true nvidia.com/gpu.deploy.dcgm-exporter=true --overwrite
+```sh
+kubectl label node <node-name> run.ai/simulated-gpu-node-pool=default
 ```
 
-By default, the operator creates a GPU topology of 2 Tesla K80 GPUs for each node in the cluster. To create a different GPU topology, see the __customization__ section below.
+NodePools are used to group nodes that should have the same GPU topology.
+These are defined in the `topology.nodePools` section of the Helm `values.yaml` file.
+By default, a node pool with 2 Tesla K80 GPUs will be created for all nodes labeled with `run.ai/simulated-gpu-node-pool=default`.
+To create a different GPU topology, refer to the __customization__ section below.
 
 
-Install the operator:
+To install the operator:
 
-```
+
+```sh
 helm repo add fake-gpu-operator https://fake-gpu-operator.storage.googleapis.com
 helm repo update
 helm upgrade -i gpu-operator fake-gpu-operator/fake-gpu-operator --namespace gpu-operator --create-namespace
 ```
 
 ## Usage
 
-Submit any workload with a request for NVIDIA GPU: 
+Submit any workload with a request for an NVIDIA GPU:
 
 ```
 resources:
@@ -57,43 +61,11 @@ kubectl label ns gpu-operator pod-security.kubernetes.io/enforce=privileged
 
 ## Customization
 
-The base GPU topology is defined using a Kubernetes configmap named `topology`.
-
-To customize the GPU topology, edit the Kubernetes configmap by running:
-
-```
-kubectl edit cm topology -n gpu-operator
-```
-
-The configmap should look like this:
-
-```
-apiVersion: v1
-data:
-  topology.yml: |
-    config:
-      node-autofill:
-        gpu-count: 16
-        gpu-memory: 11441
-        gpu-product: Tesla-K80
-    mig-strategy: mixed
-```
-
-The configmap defines the GPU topology for all nodes.
-
-* __gpu-count__ - number of GPUs per node.
-* __gpu-memory__ - amount of GPU memory per GPU.
-* __gpu-product__ - GPU type. For example: `Tesla-K80`, `Tesla-V100`, etc.
-* __mig-strategy__ - MIG strategy. Can be `none`, `mixed` or `single`.
-
-### Node specific customization
-
-Each node can have a different GPU topology. To customize a specific node, edit the configmap named `<node-name>-topology` in the `gpu-operator` namespace.
-
+The GPU topology can be customized by editing the `values.yaml` file on the `topology` section before installing/upgrading the helm chart.
 
-### GPU metrics
+## GPU metrics
 
-By default, dcgm exporter will export maximum GPU utilization for every pod that requests GPUs.
+By default, the DCGM exporter will report maximum GPU utilization for every pod requesting GPUs.
 
-If you want to customize the GPU utilization, add a `run.ai/simulated-gpu-utilization` annotation to the pod with a value that represents the range of the GPU utilization that should be simulated.
-For example, add `run.ai/simulated-gpu-utilization: 10-30` annotation to simulate a pod that utilizes the GPU between 10% to 30%.
+To customize GPU utilization, add a `run.ai/simulated-gpu-utilization` annotation to the pod with a value representing the desired range of GPU utilization.
+For example, add `run.ai/simulated-gpu-utilization: 10-30` to simulate a pod that utilizes between 10% and 30% of the GPU.
diff --git a/cmd/topology-server/main.go b/cmd/topology-server/main.go
@@ -18,23 +18,23 @@ func main() {
 	http.HandleFunc("/topology", func(w http.ResponseWriter, r *http.Request) {
 
 		w.Header().Set("Content-Type", "application/json")
-		baseTopology, err := topology.GetBaseTopologyFromCM(kubeclient.ClientSet)
+		clusterTopology, err := topology.GetClusterTopologyFromCM(kubeclient.ClientSet)
 		if err != nil {
 			w.WriteHeader(http.StatusInternalServerError)
 			_, _ = w.Write([]byte(err.Error()))
 			return
 		}
 
-		baseTopologyJSON, err := json.Marshal(baseTopology)
+		clusterTopologyJSON, err := json.Marshal(clusterTopology)
 		if err != nil {
 			w.WriteHeader(http.StatusInternalServerError)
 			_, _ = w.Write([]byte(err.Error()))
 			return
 		}
 
-		log.Printf("Returning cluster topology: %s", baseTopologyJSON)
+		log.Printf("Returning cluster topology: %s", clusterTopologyJSON)
 
-		_, err = w.Write(baseTopologyJSON)
+		_, err = w.Write(clusterTopologyJSON)
 		if err != nil {
 			panic(err)
 		}

diff --git a/deploy/fake-gpu-operator/templates/device-plugin/_helpers.tpl b/deploy/fake-gpu-operator/templates/device-plugin/_helpers.tpl
@@ -18,7 +18,7 @@ matchLabels:
 
 {{- define "fake-gpu-operator.device-plugin.common.podTemplate.metadata" }}
 annotations:
-  checksum/initialTopology: {{ include (print $.Template.BasePath "/topology-cm.yml") . | sha256sum }}
+  checksum/topology: {{ include (print $.Template.BasePath "/topology-cm.yml") . | sha256sum }}
 labels:
   app: device-plugin
   component: device-plugin

diff --git a/deploy/fake-gpu-operator/templates/mig-faker/daemonset.yml b/deploy/fake-gpu-operator/templates/mig-faker/daemonset.yml
@@ -12,7 +12,7 @@ spec:
   template:
     metadata:
       annotations:
-        checksum/initialTopology: {{ include (print $.Template.BasePath "/topology-cm.yml") . | sha256sum }}
+        checksum/topology: {{ include (print $.Template.BasePath "/topology-cm.yml") . | sha256sum }}
       labels:
         app: mig-faker
         component: mig-faker

diff --git a/deploy/fake-gpu-operator/templates/status-updater/deployment.yaml b/deploy/fake-gpu-operator/templates/status-updater/deployment.yaml
@@ -3,7 +3,7 @@ kind: Deployment
 metadata:
   name: status-updater
   annotations:
-    checksum/initialTopology: {{ include (print $.Template.BasePath "/topology-cm.yml") . | sha256sum }}
+    checksum/topology: {{ include (print $.Template.BasePath "/topology-cm.yml") . | sha256sum }}
   labels:
     app: status-updater
 spec:

diff --git a/deploy/fake-gpu-operator/templates/topology-cm.yml b/deploy/fake-gpu-operator/templates/topology-cm.yml
@@ -1,7 +1,7 @@
 apiVersion: v1
 data:
   topology.yml: |-
-{{ toYaml .Values.initialTopology | indent 4 }}
+{{ toYaml .Values.topology | indent 4 }}
 kind: ConfigMap
 metadata:
   name: topology
diff --git a/deploy/fake-gpu-operator/values.yaml b/deploy/fake-gpu-operator/values.yaml
@@ -60,11 +60,16 @@ migFaker:
     repository: gcr.io/run-ai-staging/fake-gpu-operator/mig-faker
     tag: 0.0.1
 
-initialTopology:
-  config:
-    node-autofill:
-      gpu-count: 2
-      gpu-product: Tesla-K80
-      gpu-memory: 11441
-  mig-strategy: mixed
-  nodes: {}
+topology:
+  # nodePools is a map of node pool name to node pool configuration.
+  # Nodes are assigned to node pools based on the node pool label's value (key is configurable via nodePoolLabelKey).
+  # 
+  # For example, nodes that have the label "run.ai/simulated-gpu-node-pool: default"
+  # will be assigned to the "default" node pool.
+  nodePools:
+    default:
+      gpuProduct: Tesla-K80
+      gpuCount: 2
+      gpuMemory: 11441
+  nodePoolLabelKey: run.ai/simulated-gpu-node-pool
+  mig-strategy: mixed
diff --git a/internal/common/topology/const.go b/internal/common/topology/const.go
@@ -1,5 +1,5 @@
 package topology
 
 const (
-	CmTopologyKey = "topology.yml"
+	cmTopologyKey = "topology.yml"
 )
diff --git a/internal/common/topology/kubernetes.go b/internal/common/topology/kubernetes.go
@@ -48,49 +48,48 @@ func UpdateNodeTopologyCM(kubeclient kubernetes.Interface, nodeTopology *NodeTop
 }
 
 func DeleteNodeTopologyCM(kubeclient kubernetes.Interface, nodeName string) error {
-
 	err := kubeclient.CoreV1().ConfigMaps(
 		viper.GetString(constants.EnvTopologyCmNamespace)).Delete(context.TODO(), GetNodeTopologyCMName(nodeName), metav1.DeleteOptions{})
 	return err
 }
 
-func GetBaseTopologyFromCM(kubeclient kubernetes.Interface) (*BaseTopology, error) {
+func GetClusterTopologyFromCM(kubeclient kubernetes.Interface) (*ClusterTopology, error) {
 	topologyCm, err := kubeclient.CoreV1().ConfigMaps(
 		viper.GetString(constants.EnvTopologyCmNamespace)).Get(
 		context.TODO(), viper.GetString(constants.EnvTopologyCmName), metav1.GetOptions{})
 	if err != nil {
 		return nil, fmt.Errorf("failed to get topology configmap: %v", err)
 	}
 
-	cluster, err := FromBaseTopologyCM(topologyCm)
+	cluster, err := FromClusterTopologyCM(topologyCm)
 	if err != nil {
 		return nil, fmt.Errorf("failed to parse topology configmap: %v", err)
 	}
 
 	return cluster, nil
 }
 
-func FromBaseTopologyCM(cm *corev1.ConfigMap) (*BaseTopology, error) {
-	var baseTopology BaseTopology
-	err := yaml.Unmarshal([]byte(cm.Data[CmTopologyKey]), &baseTopology)
+func FromClusterTopologyCM(cm *corev1.ConfigMap) (*ClusterTopology, error) {
+	var clusterTopology ClusterTopology
+	err := yaml.Unmarshal([]byte(cm.Data[cmTopologyKey]), &clusterTopology)
 	if err != nil {
 		return nil, err
 	}
 
-	return &baseTopology, nil
+	return &clusterTopology, nil
 }
 
 func FromNodeTopologyCM(cm *corev1.ConfigMap) (*NodeTopology, error) {
 	var nodeTopology NodeTopology
-	err := yaml.Unmarshal([]byte(cm.Data[CmTopologyKey]), &nodeTopology)
+	err := yaml.Unmarshal([]byte(cm.Data[cmTopologyKey]), &nodeTopology)
 	if err != nil {
 		return nil, err
 	}
 
 	return &nodeTopology, nil
 }
 
-func ToBaseTopologyCM(baseTopology *BaseTopology) (*corev1.ConfigMap, error) {
+func ToClusterTopologyCM(clusterTopology *ClusterTopology) (*corev1.ConfigMap, error) {
 	cm := &corev1.ConfigMap{
 		ObjectMeta: metav1.ObjectMeta{
 			Name:      viper.GetString(constants.EnvTopologyCmName),
@@ -99,12 +98,12 @@ func ToBaseTopologyCM(baseTopology *BaseTopology) (*corev1.ConfigMap, error) {
 		Data: make(map[string]string),
 	}
 
-	topologyData, err := yaml.Marshal(baseTopology)
+	topologyData, err := yaml.Marshal(clusterTopology)
 	if err != nil {
 		return nil, err
 	}
 
-	cm.Data[CmTopologyKey] = string(topologyData)
+	cm.Data[cmTopologyKey] = string(topologyData)
 
 	return cm, nil
 }
@@ -127,7 +126,7 @@ func ToNodeTopologyCM(nodeTopology *NodeTopology, nodeName string) (*corev1.Conf
 		return nil, err
 	}
 
-	cm.Data[CmTopologyKey] = string(topologyData)
+	cm.Data[cmTopologyKey] = string(topologyData)
 
 	return cm, nil
 }

diff --git a/internal/common/topology/types.go b/internal/common/topology/types.go
@@ -6,28 +6,37 @@ import (
 	"k8s.io/apimachinery/pkg/types"
 )
 
-type BaseTopology struct {
-	Config Config `json:"config"`
+type ClusterTopology struct {
+	NodePools        map[string]NodePoolTopology `yaml:"nodePools"`
+	NodePoolLabelKey string                      `yaml:"nodePoolLabelKey"`
+
+	MigStrategy string `yaml:"migStrategy"`
+}
+
+type NodePoolTopology struct {
+	GpuCount   int    `yaml:"gpuCount"`
+	GpuMemory  int    `yaml:"gpuMemory"`
+	GpuProduct string `yaml:"gpuProduct"`
 }
 
 type NodeTopology struct {
-	GpuMemory   int          `yaml:"gpu-memory"`
-	GpuProduct  string       `yaml:"gpu-product"`
+	GpuMemory   int          `yaml:"gpuMemory"`
+	GpuProduct  string       `yaml:"gpuProduct"`
 	Gpus        []GpuDetails `yaml:"gpus"`
-	MigStrategy string       `yaml:"mig-strategy"`
+	MigStrategy string       `yaml:"migStrategy"`
 }
 
 type GpuDetails struct {
-	ID     string    `json:"id"`
-	Status GpuStatus `json:"status"`
+	ID     string    `yaml:"id"`
+	Status GpuStatus `yaml:"status"`
 }
 
 type PodGpuUsageStatusMap map[types.UID]GpuUsageStatus
 
 type GpuStatus struct {
-	AllocatedBy ContainerDetails `yaml:"allocated-by"`
+	AllocatedBy ContainerDetails `yaml:"allocatedBy"`
 	// Maps PodUID to its GPU usage status
-	PodGpuUsageStatus PodGpuUsageStatusMap `yaml:"pod-gpu-usage-status"`
+	PodGpuUsageStatus PodGpuUsageStatusMap `yaml:"podGpuUsageStatus"`
 }
 
 type ContainerDetails struct {
@@ -38,26 +47,15 @@ type ContainerDetails struct {
 
 type GpuUsageStatus struct {
 	Utilization           Range `yaml:"utilization"`
-	FbUsed                int   `yaml:"fb-used"`
-	UseKnativeUtilization bool  `yaml:"use-knative-utilization"`
+	FbUsed                int   `yaml:"fbUsed"`
+	UseKnativeUtilization bool  `yaml:"useKnativeUtilization"`
 }
 
 type Range struct {
 	Min int `yaml:"min"`
 	Max int `yaml:"max"`
 }
 
-type Config struct {
-	NodeAutofill NodeAutofillSettings `yaml:"node-autofill"`
-}
-
-type NodeAutofillSettings struct {
-	GpuCount    int    `yaml:"gpu-count"`
-	GpuMemory   int    `yaml:"gpu-memory"`
-	GpuProduct  string `yaml:"gpu-product"`
-	MigStrategy string `yaml:"mig-strategy"`
-}
-
 // Errors
 var ErrNoNodes = fmt.Errorf("no nodes found")
 var ErrNoNode = fmt.Errorf("node not found")