Skip to content

Commit

Permalink
improve dynamic carbon delta calculation
Browse files Browse the repository at this point in the history
Signed-off-by: Scott Trent <[email protected]>
  • Loading branch information
trent-s committed Sep 4, 2024
1 parent e6b6b9c commit 5877ba5
Show file tree
Hide file tree
Showing 8 changed files with 86 additions and 44 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.0.26
0.0.27
5 changes: 4 additions & 1 deletion api/v1/labelgroup_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,10 @@ type LabelGroupStatus struct {
TotalCarbon string `json:"totalCarbon,omitempty"`

// Prometheus query to get the total energy for this LabelGroup
SusQLPrometheusQuery string `json:"susqlPrometheusQuery,omitempty"`
SusQLPrometheusEnergyQuery string `json:"susqlPrometheusEnergyQuery,omitempty"`

// Prometheus query to get the total CO2 for this LabelGroup
SusQLPrometheusCarbonQuery string `json:"susqlPrometheusCarbonQuery,omitempty"`

// Active containers associated with these set of labels
ActiveContainerIds map[string]float64 `json:"activeContainerIds,omitempty"`
Expand Down
12 changes: 7 additions & 5 deletions bundle/manifests/susql-operator.clusterserviceversion.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@ metadata:
]
capabilities: Basic Install
categories: Monitoring
containerImage: quay.io/sustainable_computing_io/susql_operator:0.0.26
createdAt: "2024-09-02T06:52:46Z"
containerImage: quay.io/sustainable_computing_io/susql_operator:0.0.27
createdAt: "2024-09-04T07:50:22Z"
description: 'Aggregates energy and CO2 emission data for pods tagged with SusQL
labels '
operators.operatorframework.io/builder: operator-sdk-v1.36.1
operators.operatorframework.io/project_layout: go.kubebuilder.io/v4
repository: https://github.com/sustainable-computing-io/susql-operator
support: https://github.com/sustainable-computing-io/susql-operator/issues
name: susql-operator.v0.0.26
name: susql-operator.v0.0.27
namespace: placeholder
spec:
apiservicedefinitions: {}
Expand All @@ -59,6 +59,8 @@ spec:
1. Deployment of [Kepler](https://sustainable-computing.io/) on the cluster
2. Ensure that [User Project Monitoring](https://docs.openshift.com/container-platform/latest/monitoring/enabling-monitoring-for-user-defined-projects.html)
is enabled to monitor energy consumed in user projects.
3. Creation of a SusQL Service Monitor:
`oc apply -n <NAMESPACE> -f https://raw.githubusercontent.com/sustainable-computing-io/susql-operator/main/hack/susql-servicemonitor.yaml`
### API Backward Compatibility
Expand Down Expand Up @@ -295,7 +297,7 @@ spec:
key: CARBON-QUERY-CONV-2J
name: susql-config
optional: true
image: quay.io/sustainable_computing_io/susql_operator:0.0.26
image: quay.io/sustainable_computing_io/susql_operator:0.0.27
imagePullPolicy: IfNotPresent
livenessProbe:
httpGet:
Expand Down Expand Up @@ -398,4 +400,4 @@ spec:
provider:
name: SusQL Operator Contributors
url: https://github.com/sustainable-computing-io/susql-operator
version: 0.0.26
version: 0.0.27
5 changes: 4 additions & 1 deletion bundle/manifests/susql.ibm.com_labelgroups.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,10 @@ spec:
type: string
description: SusQL Prometheus labels constructed from the spec
type: object
susqlPrometheusQuery:
susqlPrometheusCarbonQuery:
description: Prometheus query to get the total CO2 for this LabelGroup
type: string
susqlPrometheusEnergyQuery:
description: Prometheus query to get the total energy for this LabelGroup
type: string
totalCarbon:
Expand Down
5 changes: 4 additions & 1 deletion config/crd/bases/susql.ibm.com_labelgroups.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,10 @@ spec:
type: string
description: SusQL Prometheus labels constructed from the spec
type: object
susqlPrometheusQuery:
susqlPrometheusCarbonQuery:
description: Prometheus query to get the total CO2 for this LabelGroup
type: string
susqlPrometheusEnergyQuery:
description: Prometheus query to get the total energy for this LabelGroup
type: string
totalCarbon:
Expand Down
2 changes: 1 addition & 1 deletion config/manager/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ kind: Kustomization
images:
- name: controller
newName: quay.io/sustainable_computing_io/susql_operator
newTag: 0.0.26
newTag: 0.0.27
98 changes: 65 additions & 33 deletions internal/controller/labelgroup_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,11 @@ type LabelGroupReconciler struct {
}

const (
susqlMetricName = "susql_total_energy_joules" // SusQL metric to query
fixingDelay = 15 * time.Second // Time to wait in the event the LabelGroup was badly constructed
nopodDelay = 15 * time.Second // Time to wait in the event no pods are found
errorDelay = 1 * time.Second // Time to wait when an error happens due to network connectivity issues
susqlEnergyMetricName = "susql_total_energy_joules" // SusQL energy metric to query
susqlCarbonMetricName = "susql_total_carbon_dioxide_grams" // SusQL carbon metric to query
fixingDelay = 15 * time.Second // Time to wait in the event the LabelGroup was badly constructed
nopodDelay = 15 * time.Second // Time to wait in the event no pods are found
errorDelay = 1 * time.Second // Time to wait when an error happens due to network connectivity issues
)

var (
Expand All @@ -81,12 +82,6 @@ var (
func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
_ = log.FromContext(ctx)

r.Logger.V(5).Info("[Reconcile] Entered Reconcile().")

var m coreruntime.MemStats
coreruntime.ReadMemStats(&m)
r.Logger.V(5).Info(fmt.Sprintf("Memory: Alloc=%.2f MB TotalAlloc=%.2f MB Sys= %.2f MB NumGC=%v", float32(m.Alloc)/1024.0/1024.0, float32(m.TotalAlloc)/1024.0/1024.0, float32(m.Sys)/1024.0/1024.0, m.NumGC))

// Get LabelGroup object to process if it exists
labelGroup := &susqlv1.LabelGroup{}

Expand All @@ -97,6 +92,12 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request)
return ctrl.Result{}, nil
}

r.Logger.V(1).Info(fmt.Sprintf("[Reconcile] Entered Reconcile() for LabelGroup '%s' in namespace '%s'.", labelGroup.Name, labelGroup.Namespace))

var m coreruntime.MemStats
coreruntime.ReadMemStats(&m)
r.Logger.V(5).Info(fmt.Sprintf("Memory: Alloc=%.2f MB TotalAlloc=%.2f MB Sys= %.2f MB NumGC=%v", float32(m.Alloc)/1024.0/1024.0, float32(m.TotalAlloc)/1024.0/1024.0, float32(m.Sys)/1024.0/1024.0, m.NumGC))

// Check that the susql prometheus labels are created
if len(labelGroup.Status.PrometheusLabels) == 0 && labelGroup.Status.Phase != susqlv1.Initializing {
r.Logger.V(1).Info(fmt.Sprintf("[Reconcile] The SusQL prometheus labels for LabelGroup '%s' in namespace '%s' have not been created. Reinitializing this LabelGroup.", labelGroup.Name, labelGroup.Namespace))
Expand All @@ -119,10 +120,9 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request)
if err == nil {
r.CarbonIntensity = newCarbonIntensity
r.CarbonIntensityTimeStamp = currentEpoch
r.Logger.V(5).Info("[Reconcile] Entered initializing case.")
r.Logger.V(5).Info(fmt.Sprintf("[Reconcile] Obtained dynamic carbon intensity of %.10f.", newCarbonIntensity))
r.Logger.V(5).Info(fmt.Sprintf("[Reconcile-simpledynamic] Obtained dynamic carbon intensity of %.10f.", newCarbonIntensity))
} else {
r.Logger.V(0).Error(err, "[Reconcile] Unable to query carbon intensity.")
r.Logger.V(0).Error(err, "[Reconcile-simpledynamic] Unable to query carbon intensity.")
}
}
}
Expand Down Expand Up @@ -152,24 +152,42 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request)
}
}

var susqlPrometheusQuery string
susqlPrometheusQuery = susqlMetricName
susqlPrometheusQuery += "{"
// Create energy query string
var susqlPrometheusEnergyQuery string
susqlPrometheusEnergyQuery = susqlEnergyMetricName
susqlPrometheusEnergyQuery += "{"
for ldx := 0; ldx < len(susqlKubernetesLabelNames); ldx++ {
if ldx < len(labelGroup.Spec.Labels) {
susqlPrometheusEnergyQuery += fmt.Sprintf("%s=\"%s\"", susqlPrometheusLabelNames[ldx], labelGroup.Spec.Labels[ldx])
} else {
susqlPrometheusEnergyQuery += fmt.Sprintf("%s=\"\"", susqlPrometheusLabelNames[ldx])
}
if ldx < len(susqlKubernetesLabelNames)-1 {
susqlPrometheusEnergyQuery += ","
}
}
susqlPrometheusEnergyQuery += "}"

// Create carbon query string
var susqlPrometheusCarbonQuery string
susqlPrometheusCarbonQuery = susqlCarbonMetricName
susqlPrometheusCarbonQuery += "{"
for ldx := 0; ldx < len(susqlKubernetesLabelNames); ldx++ {
if ldx < len(labelGroup.Spec.Labels) {
susqlPrometheusQuery += fmt.Sprintf("%s=\"%s\"", susqlPrometheusLabelNames[ldx], labelGroup.Spec.Labels[ldx])
susqlPrometheusCarbonQuery += fmt.Sprintf("%s=\"%s\"", susqlPrometheusLabelNames[ldx], labelGroup.Spec.Labels[ldx])
} else {
susqlPrometheusQuery += fmt.Sprintf("%s=\"\"", susqlPrometheusLabelNames[ldx])
susqlPrometheusCarbonQuery += fmt.Sprintf("%s=\"\"", susqlPrometheusLabelNames[ldx])
}
if ldx < len(susqlKubernetesLabelNames)-1 {
susqlPrometheusQuery += ","
susqlPrometheusCarbonQuery += ","
}
}
susqlPrometheusQuery += "}"
susqlPrometheusCarbonQuery += "}"

labelGroup.Status.KubernetesLabels = susqlKubernetesLabels
labelGroup.Status.PrometheusLabels = susqlPrometheusLabels
labelGroup.Status.SusQLPrometheusQuery = susqlPrometheusQuery
labelGroup.Status.SusQLPrometheusEnergyQuery = susqlPrometheusEnergyQuery
labelGroup.Status.SusQLPrometheusCarbonQuery = susqlPrometheusCarbonQuery
labelGroup.Status.Phase = susqlv1.Reloading

if err := r.Status().Update(ctx, labelGroup); err != nil {
Expand All @@ -184,16 +202,23 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request)
r.Logger.V(5).Info("[Reconcile-Reloading] Entered reloading case.")
// Reload data from existing database
if !labelGroup.Spec.DisableUsingMostRecentValue {
totalEnergy, err := r.GetMostRecentValue(labelGroup.Status.SusQLPrometheusQuery)
totalEnergy, err := r.GetMostRecentValue(labelGroup.Status.SusQLPrometheusEnergyQuery)

if err != nil {
r.Logger.V(0).Error(err, "[Reconcile-Reloading] Couldn't retrieve most recent value.")
r.Logger.V(0).Error(err, "[Reconcile-Reloading] Couldn't retrieve most recent energy value.")
return ctrl.Result{RequeueAfter: fixingDelay}, nil
}

labelGroup.Status.TotalEnergy = fmt.Sprintf("%f", totalEnergy)

labelGroup.Status.TotalCarbon = fmt.Sprintf("%.10f", float64(totalEnergy)*r.CarbonIntensity)
totalCarbon, err := r.GetMostRecentValue(labelGroup.Status.SusQLPrometheusCarbonQuery)

if err != nil {
r.Logger.V(0).Error(err, "[Reconcile-Reloading] Couldn't retrieve most recent carbon value.")
return ctrl.Result{RequeueAfter: fixingDelay}, nil
}

labelGroup.Status.TotalCarbon = fmt.Sprintf("%.10f", float64(totalCarbon))
}

labelGroup.Status.Phase = susqlv1.Aggregating
Expand All @@ -213,14 +238,10 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request)
podsInNamespace, err := r.filterPodsInNamespace(ctx, labelGroup.Namespace, labelGroup.Status.KubernetesLabels)

if err != nil || len(podsInNamespace) == 0 {
r.Logger.V(5).Info("[Reconcile-Aggregating] Unable to get podlist.") // trace
r.Logger.V(5).Info(fmt.Sprintf("[Reconcile-Aggregating] LabelName: %s", labelGroup.Name)) // trace
r.Logger.V(5).Info(fmt.Sprintf("[Reconcile-Aggregating] Namespace: %s", labelGroup.Namespace)) // trace
r.Logger.V(5).Info(fmt.Sprintf("[Reconcile-Aggregating] KubernetesLabels: %#v", labelGroup.Status.KubernetesLabels)) // trace
r.Logger.V(5).Info(fmt.Sprintf("[Reconcile-Aggregating] podNamesinNamespace: %s", podsInNamespace)) // trace
r.Logger.V(5).Info(fmt.Sprintf("[Reconcile] ctx: %#v", ctx)) // trace
r.Logger.V(5).Info(fmt.Sprintf("[Reconcile-Aggregating] Unable to get podlist: Namespace: %s LabelName: %s", labelGroup.Namespace, labelGroup.Name))
r.Logger.V(5).Info(fmt.Sprintf("[Reconcile-Aggregating] KubernetesLabels: %#v", labelGroup.Status.KubernetesLabels))
if err != nil {
r.Logger.V(0).Error(err, "[Reconcile-Aggregating] ERROR: Couldn't get pods for the labels provided.")
r.Logger.V(0).Error(err, "[Reconcile-Aggregating] ERROR: Unable to get pods for the labels provided due to this error.")
}

return ctrl.Result{RequeueAfter: nopodDelay}, nil
Expand All @@ -244,6 +265,8 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request)
totalEnergy = 0.0
}

var originalTotalEnergy float64 = totalEnergy

if labelGroup.Status.ActiveContainerIds == nil {
// First pass with this pod group
labelGroup.Status.ActiveContainerIds = make(map[string]float64)
Expand Down Expand Up @@ -273,15 +296,24 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request)
// 4) Update ETCD with the values
labelGroup.Status.TotalEnergy = fmt.Sprintf("%.2f", totalEnergy)

labelGroup.Status.TotalCarbon = fmt.Sprintf("%.10f", float64(totalEnergy)*r.CarbonIntensity)
var totalCarbon float64

if value, err := strconv.ParseFloat(labelGroup.Status.TotalCarbon, 64); err == nil {
totalCarbon = value
} else {
totalCarbon = 0.0
}

totalCarbon = totalCarbon + (totalEnergy-originalTotalEnergy)*r.CarbonIntensity
labelGroup.Status.TotalCarbon = fmt.Sprintf("%.10f", totalCarbon)

if err := r.Status().Update(ctx, labelGroup); err != nil {
return ctrl.Result{}, err
}

// 5) Add energy aggregation to Prometheus table
r.SetAggregatedEnergyForLabels(totalEnergy, labelGroup.Status.PrometheusLabels)
r.SetAggregatedCarbonForLabels(float64(totalEnergy)*r.CarbonIntensity, labelGroup.Status.PrometheusLabels)
r.SetAggregatedCarbonForLabels(totalCarbon, labelGroup.Status.PrometheusLabels)

// Requeue
return ctrl.Result{RequeueAfter: r.SamplingRate}, nil
Expand Down
1 change: 0 additions & 1 deletion internal/controller/resource_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ func (r *LabelGroupReconciler) filterPodsInNamespace(ctx context.Context, namesp
var podList v1.PodList
if err := r.Client.List(ctx, &podList, listOptions); err != nil {
r.Logger.V(5).Info(fmt.Sprintf("[filterPodsInNamespace] labelSelector: %#v", labelSelector))
r.Logger.V(5).Info(fmt.Sprintf("[filterPodsInNamespace] ctx: %#v", ctx))
r.Logger.V(5).Info(fmt.Sprintf("[filterPodsInNamespace] podList: %#v", podList))
r.Logger.V(5).Info(fmt.Sprintf("[filterPodsInNamespace] listOptions: %#v", listOptions))
r.Logger.V(0).Error(err, "[filterPodsInNamespace] List Error:")
Expand Down

0 comments on commit 5877ba5

Please sign in to comment.