From c46f561dd479d30f9b6a5f2bca4161368a44c271 Mon Sep 17 00:00:00 2001 From: Scott Trent Date: Sun, 29 Sep 2024 18:29:16 +0900 Subject: [PATCH] carbon3: improve docs & error output and handling Signed-off-by: Scott Trent --- README.md | 13 +++--- VERSION | 2 +- .../susql-operator.clusterserviceversion.yaml | 12 ++--- cmd/main.go | 33 +++++++------- config/manager/kustomization.yaml | 2 +- .../susql-operator.clusterserviceversion.yaml | 2 +- doc/c3-susql-config.yaml | 21 +++++++++ doc/carbon.md | 17 ++++--- doc/helm-installation.md | 2 +- doc/openshift-installation.md | 2 +- doc/operatorhub-installation.md | 2 +- internal/controller/carbon_query.go | 18 ++++---- internal/controller/labelgroup_controller.go | 44 +++++++++++-------- {doc => samples}/susql-config.yaml | 0 14 files changed, 102 insertions(+), 68 deletions(-) create mode 100644 doc/c3-susql-config.yaml rename {doc => samples}/susql-config.yaml (100%) diff --git a/README.md b/README.md index a6532f6..426cf69 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # SusQL Operator -SusQL is a Kubernetes operator that aggregates energy and estimated carbon dioxide emission data for pods tagged with SusQL specific labels. The energy measurements are taken from [Kepler](https://sustainable-computing.io/) which should be deployed on the cluster before using SusQL. Watch a video with a demonstration by clicking the following link. +SusQL is a Kubernetes operator that aggregates energy and estimated carbon dioxide emission data for pods tagged with SusQL specific labels. The energy measurements are obtained from [Kepler](https://sustainable-computing.io/) which should be deployed on the cluster before using SusQL. Watch a demonstration video by clicking the following link. https://github.com/sustainable-computing-io/susql-operator/wiki/files/SusQL-Demo-2024-09.mp4 @@ -9,21 +9,24 @@ https://github.com/sustainable-computing-io/susql-operator/wiki/files/SusQL-Demo ## Getting Started -SusQL is an operator that can be deployed in a Kubernetes/OpenShift cluster. You can use [kind](https://sigs.k8s.io/kind) or [minikube](https://minikube.sigs.k8s.io/) to get a local cluster for testing, or run against a remote cluster. +SusQL is an operator that can be deployed in a Kubernetes/OpenShift cluster. You can also use [kind](https://sigs.k8s.io/kind) or [minikube](https://minikube.sigs.k8s.io/) as a local cluster for testing, or run against a remote cluster. ## Carbon Dioxide Emission Calculation By default SusQL calculates carbon dioxide emission in grams of CO2 using a carbon intensity value from [US EPA](https://www.epa.gov/energy/greenhouse-gases-equivalencies-calculator-calculations-and-references). +SusQL can be configured to use other static carbon intensity values or query carbon intensity values for a +given location from web API's such as those provided by +the Green Software Foundation's [Carbon Aware SDK](https://github.com/Green-Software-Foundation/carbon-aware-sdk). Detailed information on configuration of CO2 emission calculation in SusQL is available in the [SusQL carbon calculation documentation.](doc/carbon.md) -### Prerequisites +## Prerequisites Kepler is assumed to be installed in the cluster. -### Installation +## Installation - Follow these instructions for easy SusQL installation from the Red Hat Community Operator catalog on an OpenShift cluster. - [Installation on OpenShift](doc/openshift-installation.md) @@ -31,7 +34,7 @@ Kepler is assumed to be installed in the cluster. - Follow these instructions to install the SusQL Operator from [OperatorHub.io](https://operatorhub.io) on a Kubernetes cluster including OpenShift. - [Installation from OperatorHub.io](doc/operatorhub-installation.md) -- Follow these instructions to install the SusQL Operator with Helm on a Kubernetes cluster including OpenShift. +- Follow these instructions to install the SusQL Operator from a Helm chart on a Kubernetes cluster, including OpenShift. - [Installation with Helm](doc/helm-installation.md) diff --git a/VERSION b/VERSION index d788d43..78bae5b 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.0.31 +0.0.32 diff --git a/bundle/manifests/susql-operator.clusterserviceversion.yaml b/bundle/manifests/susql-operator.clusterserviceversion.yaml index d956a76..9a65c77 100644 --- a/bundle/manifests/susql-operator.clusterserviceversion.yaml +++ b/bundle/manifests/susql-operator.clusterserviceversion.yaml @@ -27,15 +27,15 @@ metadata: ] capabilities: Basic Install categories: Monitoring - containerImage: quay.io/sustainable_computing_io/susql_operator:0.0.31 - createdAt: "2024-09-28T12:08:54Z" + containerImage: quay.io/sustainable_computing_io/susql_operator:0.0.32 + createdAt: "2024-09-29T09:27:06Z" description: 'Aggregates energy and CO2 emission data for pods tagged with SusQL labels ' operators.operatorframework.io/builder: operator-sdk-v1.36.1 operators.operatorframework.io/project_layout: go.kubebuilder.io/v4 repository: https://github.com/sustainable-computing-io/susql-operator support: https://github.com/sustainable-computing-io/susql-operator/issues - name: susql-operator.v0.0.31 + name: susql-operator.v0.0.32 namespace: placeholder spec: apiservicedefinitions: {} @@ -301,7 +301,7 @@ spec: key: CARBON-QUERY-CONV-2J name: susql-config optional: true - image: quay.io/sustainable_computing_io/susql_operator:0.0.31 + image: quay.io/sustainable_computing_io/susql_operator:0.0.32 imagePullPolicy: Always livenessProbe: httpGet: @@ -408,5 +408,5 @@ spec: provider: name: SusQL Operator Contributors url: https://github.com/sustainable-computing-io/susql-operator - replaces: susql-operator.v0.0.29 - version: 0.0.31 + replaces: susql-operator.v0.0.30 + version: 0.0.32 diff --git a/cmd/main.go b/cmd/main.go index 28f13fc..801984f 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -230,22 +230,23 @@ func main() { susqlLog.Info("Setting up labelGroupReconciler.") if err = (&controller.LabelGroupReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - KeplerPrometheusUrl: keplerPrometheusUrl, - KeplerMetricName: keplerMetricName, - SusQLPrometheusDatabaseUrl: susqlPrometheusDatabaseUrl, - SusQLPrometheusMetricsUrl: susqlPrometheusMetricsUrl, - SamplingRate: time.Duration(samplingRateInteger) * time.Second, - CarbonMethod: carbonMethod, - CarbonIntensity: carbonIntensityFloat, - CarbonIntensityUrl: carbonIntensityUrl, - CarbonIntensityTimeStamp: 0, - CarbonLocation: carbonLocation, - CarbonQueryRate: carbonQueryRateInteger, - CarbonQueryFilter: carbonQueryFilter, - CarbonQueryConv2J: carbonQueryConv2JFloat, - Logger: susqlLog, + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + KeplerPrometheusUrl: keplerPrometheusUrl, + KeplerMetricName: keplerMetricName, + SusQLPrometheusDatabaseUrl: susqlPrometheusDatabaseUrl, + SusQLPrometheusMetricsUrl: susqlPrometheusMetricsUrl, + SamplingRate: time.Duration(samplingRateInteger) * time.Second, + CarbonMethod: carbonMethod, + CarbonIntensity: carbonIntensityFloat, + CarbonIntensityUrl: carbonIntensityUrl, + CarbonIntensityTimeStamp: 0, + CarbonIntensityErrorTimeStamp: 0, + CarbonLocation: carbonLocation, + CarbonQueryRate: carbonQueryRateInteger, + CarbonQueryFilter: carbonQueryFilter, + CarbonQueryConv2J: carbonQueryConv2JFloat, + Logger: susqlLog, }).SetupWithManager(mgr); err != nil { susqlLog.Error(err, "unable to create controller", "controller", "LabelGroup") os.Exit(1) diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 851a8cd..c86c163 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -5,4 +5,4 @@ kind: Kustomization images: - name: controller newName: quay.io/sustainable_computing_io/susql_operator - newTag: 0.0.31 + newTag: 0.0.32 diff --git a/config/manifests/bases/susql-operator.clusterserviceversion.yaml b/config/manifests/bases/susql-operator.clusterserviceversion.yaml index a3d74f8..91ad44d 100644 --- a/config/manifests/bases/susql-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/susql-operator.clusterserviceversion.yaml @@ -116,5 +116,5 @@ spec: provider: name: SusQL Operator Contributors url: https://github.com/sustainable-computing-io/susql-operator - replaces: susql-operator.v0.0.29 + replaces: susql-operator.v0.0.30 version: 0.0.0 diff --git a/doc/c3-susql-config.yaml b/doc/c3-susql-config.yaml new file mode 100644 index 0000000..3383f9b --- /dev/null +++ b/doc/c3-susql-config.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: susql-config +data: + KEPLER-PROMETHEUS-URL: "https://thanos-querier.openshift-monitoring.svc.cluster.local:9091" + KEPLER-METRIC-NAME: "kepler_container_joules_total" + SUSQL-PROMETHEUS-DATABASE-URL: "https://thanos-querier.openshift-monitoring.svc.cluster.local:9091" + SUSQL-PROMETHEUS-METRICS-URL: "http://0.0.0.0:8082" + SAMPLING-RATE: "2" + LEADER-ELECT: "false" + HEALTH-PROBE-BIND-ADDRESS: ":8081" + SUSQL-LOG-LEVEL: "-5" + CARBON-METHOD: "casdk" + CARBON-INTENSITY: "0.0001158333333333" + CARBON-INTENSITY-URL: "https://api.electricitymap.org/v3/carbon-intensity/latest?zone=%s" + CARBON-INTENSITY-URL: "http://webapi-green-software-foundation.apps.multi-nic-v2.llmdev.res.ibm.com/emissions/bylocation?location=%s" + CARBON-LOCATION: "japanwestERROR" + CARBON-QUERY-RATE: "7200" + CARBON-QUERY-FILTER: "rating" + CARBON-QUERY-CONV-2J: "0.0000002777777778" diff --git a/doc/carbon.md b/doc/carbon.md index 6335e5f..c981f6f 100644 --- a/doc/carbon.md +++ b/doc/carbon.md @@ -4,11 +4,14 @@ There are three primary CO2 emission calculation methods. "Out-of-the-box" SusQL reports an estimated CO2 emission value for all measured workloads using the `static` method: +The behavior of SusQL carbon calculation can be tuned by modyfing the `susql-config` `ConfigMap` in the same namespace that the SusQL operator is running in. +A sample file is provided in `samples/susql-config.yaml`. + ## `static` Method - This `static` method uses a static "carbon intensity value" as a coefficient to calculate grams of CO2 emitted. This calculation method is used when the `CARBON-METHOD` `ConfigMap` value is set to `static`. -#### `static` Method `ConfigMap` Configurable items +#### `static` Method `ConfigMap` Configurable Items - `CARBON-METHOD` - The `static` method is enabled when this is set to `static`. - `CARBON-INTENSITY` - Carbon intensity value. A coefficient used to convert Joules to grams of CO2 per Joule. The unit definition is grams of CO2 per Joule. The default carbon intensity value is based on [US EPA](https://www.epa.gov/energy/greenhouse-gases-equivalencies-calculator-calculations-and-references) data. @@ -34,7 +37,7 @@ There are three primary CO2 emission calculation methods. #### Configuring and installing Carbon Aware SDK - Following guidance in https://github.com/Green-Software-Foundation/carbon-aware-sdk/blob/dev/casdk-docs/docs/overview/enablement.md, the Carbon Aware SDK can be easily installed on a Kubernetes cluster such as OpenShift. -- Preparation: clone the repository and edit `helm-chart/values.yaml` as needed to reflect private password, configuration, etc. +- Preparation: clone the Carbon Aware SDK repository and edit `helm-chart/values.yaml` as needed to reflect private password, configuration, etc. (Useful configuration tips available at https://github.com/Green-Software-Foundation/carbon-aware-sdk/blob/dev/casdk-docs/docs/tutorial-extras/configuration.md ) ``` @@ -43,8 +46,8 @@ vi helm-chart/values.yaml ``` - Preparation: required software and permission - Ensure that `helm`, and `kubectl` (or `oc`) are installed - - Ensure that CLI user is logged in to cluster with sufficient permissions -- Perform installation + - Ensure that CLI user is logged in to the cluster with sufficient permissions +- Perform installation: (The example installes into namespace `gsf`. However, other namespaces may be used.) ``` cd carbon-aware-sdk helm upgrade --install --wait carbon-aware-sdk helm-chart --create-namespace gsf @@ -56,11 +59,11 @@ Note the value reported for "HOST/PORT". This will be used in the next configura Update the following items in the `susql-config.yaml` file: ``` CARBON-METHOD: "casdk" - CARBON-INTENSITY-URL: "http:///emissions/bylocation?location=%s" + CARBON-INTENSITY-URL: "http:///emissions/bylocation?location=%s" CARBON-LOCATION: "" CARBON-QUERY-FILTER: "rating" ``` -Tip: try this command to verify sdk container functionality and also view available locations: `curl -s "http:/locations"` +Tip: try this command to verify sdk container functionality and also view available locations: `curl -s "http:/locations"` Apply the updated `susql-config.yaml` file: @@ -68,7 +71,7 @@ Apply the updated `susql-config.yaml` file: oc apply -f susql-config.yaml -n ``` You are now ready to install and use the SusQL operator. -If the SusQL Operator is alreay installed, then restart the control pod. +If the SusQL Operator is already installed, then restart the control pod. #### `casdk` Method `ConfigMap` Configurable Items - `CARBON-METHOD` - The `casdk` method is enabled when this is set to `casdk`. diff --git a/doc/helm-installation.md b/doc/helm-installation.md index 56129e8..8005b74 100644 --- a/doc/helm-installation.md +++ b/doc/helm-installation.md @@ -65,7 +65,7 @@ The following environment variables will influence the way that the SusQL Operat | SUSQL_REGISTRY | quay.io/sustainable_computing_io | Container registry that SusQL is stored in | | SUSQL_IMAGE_NAME | susql_operator | Image name used on SusQL container registry | | SUSQL_IMAGE_TAG | latest | Tag for SusQL container | -| CARBON_METHOD | static | "static", "simpledynamic", "scadk" | +| CARBON_METHOD | static | "static", "simpledynamic", "casdk" | | CARBON_INTENSITY | "0.00011583333" | Carbon intensity in grams CO2 / Joule | | CARBON_INTENSITY_URL | | Web API to query carbon intensity | | CARBON_LOCATION | | Location for carbon intensity query | diff --git a/doc/openshift-installation.md b/doc/openshift-installation.md index 2e64d29..c0f250c 100644 --- a/doc/openshift-installation.md +++ b/doc/openshift-installation.md @@ -24,7 +24,7 @@ Use the OpenShift web console to install the SusQL Operator: Before deploying the SusQL Operator create a `ConfigMap` called `susql-config` in the same namespace that the operator will run in. -[susql-config.yaml](susql-config.yaml) is a good starting point. If you download it first, you +[susql-config.yaml](../samples/susql-config.yaml) is a good starting point. If you download it first, you could create the ConfigMap with `oc apply -n -f susql-config.yaml`. If you update (or create) the ConfigMap after the SusQL Operator has been installed, then restarting the SusQL Operator controller pod will enable the changes. (e.g., Delete the pod, and allow it to be recreated automatically.) diff --git a/doc/operatorhub-installation.md b/doc/operatorhub-installation.md index 5f91caa..7224eb4 100644 --- a/doc/operatorhub-installation.md +++ b/doc/operatorhub-installation.md @@ -49,7 +49,7 @@ Next, use the OpenShift web console to install the SusQL Operator: Before deploying the SusQL Operator create a `ConfigMap` called `susql-config` in the same namespace that the operator will run in. -[susql-config.yaml](susql-config.yaml) is a good starting point. If you download it first, you +[susql-config.yaml](../samples/susql-config.yaml) is a good starting point. If you download it first, you could create the ConfigMap with `oc apply -n -f susql-config.yaml`. If you update (or create) the ConfigMap after the SusQL Operator has been installed, then restarting the SusQL Operator controller pod will enable the changes. (e.g., Delete the pod, and allow it to be recreated automatically.) diff --git a/internal/controller/carbon_query.go b/internal/controller/carbon_query.go index 2ffff31..b037468 100644 --- a/internal/controller/carbon_query.go +++ b/internal/controller/carbon_query.go @@ -25,17 +25,16 @@ import ( ) func queryCarbonIntensity(url string, location string, filter string, conv2J float64) (float64, error) { + queryUrl := fmt.Sprintf(url, location) - fmt.Println("CARBON QUERY: url=" + fmt.Sprintf(url, location)) - - response, err := http.Get(fmt.Sprintf(url, location)) + response, err := http.Get(queryUrl) if err != nil { - return 0.0, err + return 0.0, fmt.Errorf("queryCarbonIntensity: %w\nURL=%s", err, queryUrl) } responseData, err := ioutil.ReadAll(response.Body) if err != nil { - return 0.0, err + return 0.0, fmt.Errorf("queryCarbonIntensity: %w\nURL=%s\nresponse=%s", err, queryUrl, string(responseData)) } length := gjson.Get(string(responseData), "#").Int() - 1 @@ -46,7 +45,7 @@ func queryCarbonIntensity(url string, location string, filter string, conv2J flo carbonIntensityFloat, err := strconv.ParseFloat(carbonIntensityString, 64) if err != nil { - return 0.0, err + return 0.0, fmt.Errorf("queryCarbonIntensity: %w\nURL=%s\nresponse=%s\nfilter=%s\nresult=%s", err, queryUrl, string(responseData), newFilter, carbonIntensityString) } // return nil error since no error @@ -54,22 +53,23 @@ func queryCarbonIntensity(url string, location string, filter string, conv2J flo } func querySimpleCarbonIntensity(url string, location string, filter string, conv2J float64) (float64, error) { + queryUrl := fmt.Sprintf(url, location) response, err := http.Get(fmt.Sprintf(url, location)) if err != nil { - return 0.0, err + return 0.0, fmt.Errorf("querySimpleCarbonIntensity: %w\nURL=%s", err, queryUrl) } responseData, err := ioutil.ReadAll(response.Body) if err != nil { - return 0.0, err + return 0.0, fmt.Errorf("queryCarbonSimpleIntensity: %w\nURL=%s\nresponse=%s", err, queryUrl, string(responseData)) } carbonIntensityString := gjson.Get(string(responseData), filter).String() carbonIntensityFloat, err := strconv.ParseFloat(carbonIntensityString, 64) if err != nil { - return 0.0, err + return 0.0, fmt.Errorf("queryCarbonSimpleIntensity: %w\nURL=%s\nresponse=%s\nfilter=%s\nresult=%s", err, queryUrl, string(responseData), filter, carbonIntensityString) } // return nil error since no error diff --git a/internal/controller/labelgroup_controller.go b/internal/controller/labelgroup_controller.go index 030d521..9b5c83c 100644 --- a/internal/controller/labelgroup_controller.go +++ b/internal/controller/labelgroup_controller.go @@ -36,21 +36,22 @@ import ( // LabelGroupReconciler reconciles a LabelGroup object type LabelGroupReconciler struct { client.Client - Scheme *runtime.Scheme - KeplerPrometheusUrl string - KeplerMetricName string - SusQLPrometheusDatabaseUrl string - SusQLPrometheusMetricsUrl string - SamplingRate time.Duration // Sampling rate for all LabelGroups - CarbonMethod string - CarbonIntensity float64 - CarbonIntensityUrl string - CarbonIntensityTimeStamp int64 - CarbonLocation string - CarbonQueryRate int64 - CarbonQueryFilter string - CarbonQueryConv2J float64 - Logger logr.Logger + Scheme *runtime.Scheme + KeplerPrometheusUrl string + KeplerMetricName string + SusQLPrometheusDatabaseUrl string + SusQLPrometheusMetricsUrl string + SamplingRate time.Duration // Sampling rate for all LabelGroups + CarbonMethod string + CarbonIntensity float64 + CarbonIntensityUrl string + CarbonIntensityTimeStamp int64 + CarbonIntensityErrorTimeStamp int64 + CarbonLocation string + CarbonQueryRate int64 + CarbonQueryFilter string + CarbonQueryConv2J float64 + Logger logr.Logger } const ( @@ -59,6 +60,7 @@ const ( fixingDelay = 15 * time.Second // Time to wait in the event the LabelGroup was badly constructed nopodDelay = 15 * time.Second // Time to wait in the event no pods are found errorDelay = 1 * time.Second // Time to wait when an error happens due to network connectivity issues + carbonRetryDelay = 300 // Number of seconds to wait for retry after carbon query failure ) var ( @@ -115,27 +117,31 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) // TODO: put this code only in Reloading and Aggregating cases if r.CarbonMethod == "simpledynamic" { currentEpoch := time.Now().Unix() - if (currentEpoch - r.CarbonIntensityTimeStamp) > r.CarbonQueryRate { + if (currentEpoch-r.CarbonIntensityTimeStamp) > r.CarbonQueryRate && (currentEpoch-r.CarbonIntensityErrorTimeStamp) > carbonRetryDelay { newCarbonIntensity, err := querySimpleCarbonIntensity(r.CarbonIntensityUrl, r.CarbonLocation, r.CarbonQueryFilter, r.CarbonQueryConv2J) if err == nil { r.CarbonIntensity = newCarbonIntensity r.CarbonIntensityTimeStamp = currentEpoch + r.CarbonIntensityErrorTimeStamp = 0 r.Logger.V(5).Info(fmt.Sprintf("[Reconcile-simpledynamic] Obtained dynamic carbon intensity of %.10f.", newCarbonIntensity)) } else { + r.CarbonIntensityErrorTimeStamp = currentEpoch r.Logger.V(0).Error(err, "[Reconcile-simpledynamic] Unable to query carbon intensity.") } } } if r.CarbonMethod == "casdk" { currentEpoch := time.Now().Unix() - if (currentEpoch - r.CarbonIntensityTimeStamp) > r.CarbonQueryRate { + if (currentEpoch-r.CarbonIntensityTimeStamp) > r.CarbonQueryRate && (currentEpoch-r.CarbonIntensityErrorTimeStamp) > carbonRetryDelay { newCarbonIntensity, err := queryCarbonIntensity(r.CarbonIntensityUrl, r.CarbonLocation, r.CarbonQueryFilter, r.CarbonQueryConv2J) if err == nil { r.CarbonIntensity = newCarbonIntensity r.CarbonIntensityTimeStamp = currentEpoch - r.Logger.V(5).Info(fmt.Sprintf("[Reconcile-simpledynamic] Obtained dynamic carbon intensity of %.10f.", newCarbonIntensity)) + r.CarbonIntensityErrorTimeStamp = 0 + r.Logger.V(5).Info(fmt.Sprintf("[Reconcile-casdk] Obtained dynamic carbon intensity of %.10f.", newCarbonIntensity)) } else { - r.Logger.V(0).Error(err, "[Reconcile-simpledynamic] Unable to query carbon intensity.") + r.CarbonIntensityErrorTimeStamp = currentEpoch + r.Logger.V(0).Error(err, "[Reconcile-casdk] Unable to query carbon intensity.") } } } diff --git a/doc/susql-config.yaml b/samples/susql-config.yaml similarity index 100% rename from doc/susql-config.yaml rename to samples/susql-config.yaml