diff --git a/VERSION b/VERSION index c4475d3..24ff855 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.0.26 +0.0.27 diff --git a/api/v1/labelgroup_types.go b/api/v1/labelgroup_types.go index 62d93bd..01ba08d 100644 --- a/api/v1/labelgroup_types.go +++ b/api/v1/labelgroup_types.go @@ -56,7 +56,10 @@ type LabelGroupStatus struct { TotalCarbon string `json:"totalCarbon,omitempty"` // Prometheus query to get the total energy for this LabelGroup - SusQLPrometheusQuery string `json:"susqlPrometheusQuery,omitempty"` + SusQLPrometheusEnergyQuery string `json:"susqlPrometheusEnergyQuery,omitempty"` + + // Prometheus query to get the total CO2 for this LabelGroup + SusQLPrometheusCarbonQuery string `json:"susqlPrometheusCarbonQuery,omitempty"` // Active containers associated with these set of labels ActiveContainerIds map[string]float64 `json:"activeContainerIds,omitempty"` diff --git a/bundle/manifests/susql-operator.clusterserviceversion.yaml b/bundle/manifests/susql-operator.clusterserviceversion.yaml index be727d4..1c70a5e 100644 --- a/bundle/manifests/susql-operator.clusterserviceversion.yaml +++ b/bundle/manifests/susql-operator.clusterserviceversion.yaml @@ -27,15 +27,15 @@ metadata: ] capabilities: Basic Install categories: Monitoring - containerImage: quay.io/sustainable_computing_io/susql_operator:0.0.26 - createdAt: "2024-09-02T06:52:46Z" + containerImage: quay.io/sustainable_computing_io/susql_operator:0.0.27 + createdAt: "2024-09-04T07:50:22Z" description: 'Aggregates energy and CO2 emission data for pods tagged with SusQL labels ' operators.operatorframework.io/builder: operator-sdk-v1.36.1 operators.operatorframework.io/project_layout: go.kubebuilder.io/v4 repository: https://github.com/sustainable-computing-io/susql-operator support: https://github.com/sustainable-computing-io/susql-operator/issues - name: susql-operator.v0.0.26 + name: susql-operator.v0.0.27 namespace: placeholder spec: apiservicedefinitions: {} @@ -59,6 +59,8 @@ spec: 1. Deployment of [Kepler](https://sustainable-computing.io/) on the cluster 2. Ensure that [User Project Monitoring](https://docs.openshift.com/container-platform/latest/monitoring/enabling-monitoring-for-user-defined-projects.html) is enabled to monitor energy consumed in user projects. + 3. Creation of a SusQL Service Monitor: + `oc apply -n -f https://raw.githubusercontent.com/sustainable-computing-io/susql-operator/main/hack/susql-servicemonitor.yaml` ### API Backward Compatibility @@ -295,7 +297,7 @@ spec: key: CARBON-QUERY-CONV-2J name: susql-config optional: true - image: quay.io/sustainable_computing_io/susql_operator:0.0.26 + image: quay.io/sustainable_computing_io/susql_operator:0.0.27 imagePullPolicy: IfNotPresent livenessProbe: httpGet: @@ -398,4 +400,4 @@ spec: provider: name: SusQL Operator Contributors url: https://github.com/sustainable-computing-io/susql-operator - version: 0.0.26 + version: 0.0.27 diff --git a/bundle/manifests/susql.ibm.com_labelgroups.yaml b/bundle/manifests/susql.ibm.com_labelgroups.yaml index ec2a4e4..3ef68ce 100644 --- a/bundle/manifests/susql.ibm.com_labelgroups.yaml +++ b/bundle/manifests/susql.ibm.com_labelgroups.yaml @@ -70,7 +70,10 @@ spec: type: string description: SusQL Prometheus labels constructed from the spec type: object - susqlPrometheusQuery: + susqlPrometheusCarbonQuery: + description: Prometheus query to get the total CO2 for this LabelGroup + type: string + susqlPrometheusEnergyQuery: description: Prometheus query to get the total energy for this LabelGroup type: string totalCarbon: diff --git a/config/crd/bases/susql.ibm.com_labelgroups.yaml b/config/crd/bases/susql.ibm.com_labelgroups.yaml index b21c78f..24a76c3 100644 --- a/config/crd/bases/susql.ibm.com_labelgroups.yaml +++ b/config/crd/bases/susql.ibm.com_labelgroups.yaml @@ -70,7 +70,10 @@ spec: type: string description: SusQL Prometheus labels constructed from the spec type: object - susqlPrometheusQuery: + susqlPrometheusCarbonQuery: + description: Prometheus query to get the total CO2 for this LabelGroup + type: string + susqlPrometheusEnergyQuery: description: Prometheus query to get the total energy for this LabelGroup type: string totalCarbon: diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index f98e5bd..42906cd 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -5,4 +5,4 @@ kind: Kustomization images: - name: controller newName: quay.io/sustainable_computing_io/susql_operator - newTag: 0.0.26 + newTag: 0.0.27 diff --git a/internal/controller/labelgroup_controller.go b/internal/controller/labelgroup_controller.go index 3358441..183e8c1 100644 --- a/internal/controller/labelgroup_controller.go +++ b/internal/controller/labelgroup_controller.go @@ -54,10 +54,11 @@ type LabelGroupReconciler struct { } const ( - susqlMetricName = "susql_total_energy_joules" // SusQL metric to query - fixingDelay = 15 * time.Second // Time to wait in the event the LabelGroup was badly constructed - nopodDelay = 15 * time.Second // Time to wait in the event no pods are found - errorDelay = 1 * time.Second // Time to wait when an error happens due to network connectivity issues + susqlEnergyMetricName = "susql_total_energy_joules" // SusQL energy metric to query + susqlCarbonMetricName = "susql_total_carbon_dioxide_grams" // SusQL carbon metric to query + fixingDelay = 15 * time.Second // Time to wait in the event the LabelGroup was badly constructed + nopodDelay = 15 * time.Second // Time to wait in the event no pods are found + errorDelay = 1 * time.Second // Time to wait when an error happens due to network connectivity issues ) var ( @@ -81,12 +82,6 @@ var ( func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { _ = log.FromContext(ctx) - r.Logger.V(5).Info("[Reconcile] Entered Reconcile().") - - var m coreruntime.MemStats - coreruntime.ReadMemStats(&m) - r.Logger.V(5).Info(fmt.Sprintf("Memory: Alloc=%.2f MB TotalAlloc=%.2f MB Sys= %.2f MB NumGC=%v", float32(m.Alloc)/1024.0/1024.0, float32(m.TotalAlloc)/1024.0/1024.0, float32(m.Sys)/1024.0/1024.0, m.NumGC)) - // Get LabelGroup object to process if it exists labelGroup := &susqlv1.LabelGroup{} @@ -97,6 +92,12 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, nil } + r.Logger.V(1).Info(fmt.Sprintf("[Reconcile] Entered Reconcile() for LabelGroup '%s' in namespace '%s'.", labelGroup.Name, labelGroup.Namespace)) + + var m coreruntime.MemStats + coreruntime.ReadMemStats(&m) + r.Logger.V(5).Info(fmt.Sprintf("Memory: Alloc=%.2f MB TotalAlloc=%.2f MB Sys= %.2f MB NumGC=%v", float32(m.Alloc)/1024.0/1024.0, float32(m.TotalAlloc)/1024.0/1024.0, float32(m.Sys)/1024.0/1024.0, m.NumGC)) + // Check that the susql prometheus labels are created if len(labelGroup.Status.PrometheusLabels) == 0 && labelGroup.Status.Phase != susqlv1.Initializing { r.Logger.V(1).Info(fmt.Sprintf("[Reconcile] The SusQL prometheus labels for LabelGroup '%s' in namespace '%s' have not been created. Reinitializing this LabelGroup.", labelGroup.Name, labelGroup.Namespace)) @@ -119,10 +120,9 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) if err == nil { r.CarbonIntensity = newCarbonIntensity r.CarbonIntensityTimeStamp = currentEpoch - r.Logger.V(5).Info("[Reconcile] Entered initializing case.") - r.Logger.V(5).Info(fmt.Sprintf("[Reconcile] Obtained dynamic carbon intensity of %.10f.", newCarbonIntensity)) + r.Logger.V(5).Info(fmt.Sprintf("[Reconcile-simpledynamic] Obtained dynamic carbon intensity of %.10f.", newCarbonIntensity)) } else { - r.Logger.V(0).Error(err, "[Reconcile] Unable to query carbon intensity.") + r.Logger.V(0).Error(err, "[Reconcile-simpledynamic] Unable to query carbon intensity.") } } } @@ -152,24 +152,42 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) } } - var susqlPrometheusQuery string - susqlPrometheusQuery = susqlMetricName - susqlPrometheusQuery += "{" + // Create energy query string + var susqlPrometheusEnergyQuery string + susqlPrometheusEnergyQuery = susqlEnergyMetricName + susqlPrometheusEnergyQuery += "{" + for ldx := 0; ldx < len(susqlKubernetesLabelNames); ldx++ { + if ldx < len(labelGroup.Spec.Labels) { + susqlPrometheusEnergyQuery += fmt.Sprintf("%s=\"%s\"", susqlPrometheusLabelNames[ldx], labelGroup.Spec.Labels[ldx]) + } else { + susqlPrometheusEnergyQuery += fmt.Sprintf("%s=\"\"", susqlPrometheusLabelNames[ldx]) + } + if ldx < len(susqlKubernetesLabelNames)-1 { + susqlPrometheusEnergyQuery += "," + } + } + susqlPrometheusEnergyQuery += "}" + + // Create carbon query string + var susqlPrometheusCarbonQuery string + susqlPrometheusCarbonQuery = susqlCarbonMetricName + susqlPrometheusCarbonQuery += "{" for ldx := 0; ldx < len(susqlKubernetesLabelNames); ldx++ { if ldx < len(labelGroup.Spec.Labels) { - susqlPrometheusQuery += fmt.Sprintf("%s=\"%s\"", susqlPrometheusLabelNames[ldx], labelGroup.Spec.Labels[ldx]) + susqlPrometheusCarbonQuery += fmt.Sprintf("%s=\"%s\"", susqlPrometheusLabelNames[ldx], labelGroup.Spec.Labels[ldx]) } else { - susqlPrometheusQuery += fmt.Sprintf("%s=\"\"", susqlPrometheusLabelNames[ldx]) + susqlPrometheusCarbonQuery += fmt.Sprintf("%s=\"\"", susqlPrometheusLabelNames[ldx]) } if ldx < len(susqlKubernetesLabelNames)-1 { - susqlPrometheusQuery += "," + susqlPrometheusCarbonQuery += "," } } - susqlPrometheusQuery += "}" + susqlPrometheusCarbonQuery += "}" labelGroup.Status.KubernetesLabels = susqlKubernetesLabels labelGroup.Status.PrometheusLabels = susqlPrometheusLabels - labelGroup.Status.SusQLPrometheusQuery = susqlPrometheusQuery + labelGroup.Status.SusQLPrometheusEnergyQuery = susqlPrometheusEnergyQuery + labelGroup.Status.SusQLPrometheusCarbonQuery = susqlPrometheusCarbonQuery labelGroup.Status.Phase = susqlv1.Reloading if err := r.Status().Update(ctx, labelGroup); err != nil { @@ -184,16 +202,23 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) r.Logger.V(5).Info("[Reconcile-Reloading] Entered reloading case.") // Reload data from existing database if !labelGroup.Spec.DisableUsingMostRecentValue { - totalEnergy, err := r.GetMostRecentValue(labelGroup.Status.SusQLPrometheusQuery) + totalEnergy, err := r.GetMostRecentValue(labelGroup.Status.SusQLPrometheusEnergyQuery) if err != nil { - r.Logger.V(0).Error(err, "[Reconcile-Reloading] Couldn't retrieve most recent value.") + r.Logger.V(0).Error(err, "[Reconcile-Reloading] Couldn't retrieve most recent energy value.") return ctrl.Result{RequeueAfter: fixingDelay}, nil } labelGroup.Status.TotalEnergy = fmt.Sprintf("%f", totalEnergy) - labelGroup.Status.TotalCarbon = fmt.Sprintf("%.10f", float64(totalEnergy)*r.CarbonIntensity) + totalCarbon, err := r.GetMostRecentValue(labelGroup.Status.SusQLPrometheusCarbonQuery) + + if err != nil { + r.Logger.V(0).Error(err, "[Reconcile-Reloading] Couldn't retrieve most recent carbon value.") + return ctrl.Result{RequeueAfter: fixingDelay}, nil + } + + labelGroup.Status.TotalCarbon = fmt.Sprintf("%.10f", float64(totalCarbon)) } labelGroup.Status.Phase = susqlv1.Aggregating @@ -213,14 +238,10 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) podsInNamespace, err := r.filterPodsInNamespace(ctx, labelGroup.Namespace, labelGroup.Status.KubernetesLabels) if err != nil || len(podsInNamespace) == 0 { - r.Logger.V(5).Info("[Reconcile-Aggregating] Unable to get podlist.") // trace - r.Logger.V(5).Info(fmt.Sprintf("[Reconcile-Aggregating] LabelName: %s", labelGroup.Name)) // trace - r.Logger.V(5).Info(fmt.Sprintf("[Reconcile-Aggregating] Namespace: %s", labelGroup.Namespace)) // trace - r.Logger.V(5).Info(fmt.Sprintf("[Reconcile-Aggregating] KubernetesLabels: %#v", labelGroup.Status.KubernetesLabels)) // trace - r.Logger.V(5).Info(fmt.Sprintf("[Reconcile-Aggregating] podNamesinNamespace: %s", podsInNamespace)) // trace - r.Logger.V(5).Info(fmt.Sprintf("[Reconcile] ctx: %#v", ctx)) // trace + r.Logger.V(5).Info(fmt.Sprintf("[Reconcile-Aggregating] Unable to get podlist: Namespace: %s LabelName: %s", labelGroup.Namespace, labelGroup.Name)) + r.Logger.V(5).Info(fmt.Sprintf("[Reconcile-Aggregating] KubernetesLabels: %#v", labelGroup.Status.KubernetesLabels)) if err != nil { - r.Logger.V(0).Error(err, "[Reconcile-Aggregating] ERROR: Couldn't get pods for the labels provided.") + r.Logger.V(0).Error(err, "[Reconcile-Aggregating] ERROR: Unable to get pods for the labels provided due to this error.") } return ctrl.Result{RequeueAfter: nopodDelay}, nil @@ -244,6 +265,8 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) totalEnergy = 0.0 } + var originalTotalEnergy float64 = totalEnergy + if labelGroup.Status.ActiveContainerIds == nil { // First pass with this pod group labelGroup.Status.ActiveContainerIds = make(map[string]float64) @@ -273,7 +296,16 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) // 4) Update ETCD with the values labelGroup.Status.TotalEnergy = fmt.Sprintf("%.2f", totalEnergy) - labelGroup.Status.TotalCarbon = fmt.Sprintf("%.10f", float64(totalEnergy)*r.CarbonIntensity) + var totalCarbon float64 + + if value, err := strconv.ParseFloat(labelGroup.Status.TotalCarbon, 64); err == nil { + totalCarbon = value + } else { + totalCarbon = 0.0 + } + + totalCarbon = totalCarbon + (totalEnergy-originalTotalEnergy)*r.CarbonIntensity + labelGroup.Status.TotalCarbon = fmt.Sprintf("%.10f", totalCarbon) if err := r.Status().Update(ctx, labelGroup); err != nil { return ctrl.Result{}, err @@ -281,7 +313,7 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) // 5) Add energy aggregation to Prometheus table r.SetAggregatedEnergyForLabels(totalEnergy, labelGroup.Status.PrometheusLabels) - r.SetAggregatedCarbonForLabels(float64(totalEnergy)*r.CarbonIntensity, labelGroup.Status.PrometheusLabels) + r.SetAggregatedCarbonForLabels(totalCarbon, labelGroup.Status.PrometheusLabels) // Requeue return ctrl.Result{RequeueAfter: r.SamplingRate}, nil diff --git a/internal/controller/resource_manager.go b/internal/controller/resource_manager.go index 809fd98..a6a4d5e 100644 --- a/internal/controller/resource_manager.go +++ b/internal/controller/resource_manager.go @@ -39,7 +39,6 @@ func (r *LabelGroupReconciler) filterPodsInNamespace(ctx context.Context, namesp var podList v1.PodList if err := r.Client.List(ctx, &podList, listOptions); err != nil { r.Logger.V(5).Info(fmt.Sprintf("[filterPodsInNamespace] labelSelector: %#v", labelSelector)) - r.Logger.V(5).Info(fmt.Sprintf("[filterPodsInNamespace] ctx: %#v", ctx)) r.Logger.V(5).Info(fmt.Sprintf("[filterPodsInNamespace] podList: %#v", podList)) r.Logger.V(5).Info(fmt.Sprintf("[filterPodsInNamespace] listOptions: %#v", listOptions)) r.Logger.V(0).Error(err, "[filterPodsInNamespace] List Error:")