From c46f561dd479d30f9b6a5f2bca4161368a44c271 Mon Sep 17 00:00:00 2001 From: Scott Trent Date: Sun, 29 Sep 2024 18:29:16 +0900 Subject: [PATCH 01/11] carbon3: improve docs & error output and handling Signed-off-by: Scott Trent --- README.md | 13 +++--- VERSION | 2 +- .../susql-operator.clusterserviceversion.yaml | 12 ++--- cmd/main.go | 33 +++++++------- config/manager/kustomization.yaml | 2 +- .../susql-operator.clusterserviceversion.yaml | 2 +- doc/c3-susql-config.yaml | 21 +++++++++ doc/carbon.md | 17 ++++--- doc/helm-installation.md | 2 +- doc/openshift-installation.md | 2 +- doc/operatorhub-installation.md | 2 +- internal/controller/carbon_query.go | 18 ++++---- internal/controller/labelgroup_controller.go | 44 +++++++++++-------- {doc => samples}/susql-config.yaml | 0 14 files changed, 102 insertions(+), 68 deletions(-) create mode 100644 doc/c3-susql-config.yaml rename {doc => samples}/susql-config.yaml (100%) diff --git a/README.md b/README.md index a6532f6..426cf69 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # SusQL Operator -SusQL is a Kubernetes operator that aggregates energy and estimated carbon dioxide emission data for pods tagged with SusQL specific labels. The energy measurements are taken from [Kepler](https://sustainable-computing.io/) which should be deployed on the cluster before using SusQL. Watch a video with a demonstration by clicking the following link. +SusQL is a Kubernetes operator that aggregates energy and estimated carbon dioxide emission data for pods tagged with SusQL specific labels. The energy measurements are obtained from [Kepler](https://sustainable-computing.io/) which should be deployed on the cluster before using SusQL. Watch a demonstration video by clicking the following link. https://github.com/sustainable-computing-io/susql-operator/wiki/files/SusQL-Demo-2024-09.mp4 @@ -9,21 +9,24 @@ https://github.com/sustainable-computing-io/susql-operator/wiki/files/SusQL-Demo ## Getting Started -SusQL is an operator that can be deployed in a Kubernetes/OpenShift cluster. You can use [kind](https://sigs.k8s.io/kind) or [minikube](https://minikube.sigs.k8s.io/) to get a local cluster for testing, or run against a remote cluster. +SusQL is an operator that can be deployed in a Kubernetes/OpenShift cluster. You can also use [kind](https://sigs.k8s.io/kind) or [minikube](https://minikube.sigs.k8s.io/) as a local cluster for testing, or run against a remote cluster. ## Carbon Dioxide Emission Calculation By default SusQL calculates carbon dioxide emission in grams of CO2 using a carbon intensity value from [US EPA](https://www.epa.gov/energy/greenhouse-gases-equivalencies-calculator-calculations-and-references). +SusQL can be configured to use other static carbon intensity values or query carbon intensity values for a +given location from web API's such as those provided by +the Green Software Foundation's [Carbon Aware SDK](https://github.com/Green-Software-Foundation/carbon-aware-sdk). Detailed information on configuration of CO2 emission calculation in SusQL is available in the [SusQL carbon calculation documentation.](doc/carbon.md) -### Prerequisites +## Prerequisites Kepler is assumed to be installed in the cluster. -### Installation +## Installation - Follow these instructions for easy SusQL installation from the Red Hat Community Operator catalog on an OpenShift cluster. - [Installation on OpenShift](doc/openshift-installation.md) @@ -31,7 +34,7 @@ Kepler is assumed to be installed in the cluster. - Follow these instructions to install the SusQL Operator from [OperatorHub.io](https://operatorhub.io) on a Kubernetes cluster including OpenShift. - [Installation from OperatorHub.io](doc/operatorhub-installation.md) -- Follow these instructions to install the SusQL Operator with Helm on a Kubernetes cluster including OpenShift. +- Follow these instructions to install the SusQL Operator from a Helm chart on a Kubernetes cluster, including OpenShift. - [Installation with Helm](doc/helm-installation.md) diff --git a/VERSION b/VERSION index d788d43..78bae5b 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.0.31 +0.0.32 diff --git a/bundle/manifests/susql-operator.clusterserviceversion.yaml b/bundle/manifests/susql-operator.clusterserviceversion.yaml index d956a76..9a65c77 100644 --- a/bundle/manifests/susql-operator.clusterserviceversion.yaml +++ b/bundle/manifests/susql-operator.clusterserviceversion.yaml @@ -27,15 +27,15 @@ metadata: ] capabilities: Basic Install categories: Monitoring - containerImage: quay.io/sustainable_computing_io/susql_operator:0.0.31 - createdAt: "2024-09-28T12:08:54Z" + containerImage: quay.io/sustainable_computing_io/susql_operator:0.0.32 + createdAt: "2024-09-29T09:27:06Z" description: 'Aggregates energy and CO2 emission data for pods tagged with SusQL labels ' operators.operatorframework.io/builder: operator-sdk-v1.36.1 operators.operatorframework.io/project_layout: go.kubebuilder.io/v4 repository: https://github.com/sustainable-computing-io/susql-operator support: https://github.com/sustainable-computing-io/susql-operator/issues - name: susql-operator.v0.0.31 + name: susql-operator.v0.0.32 namespace: placeholder spec: apiservicedefinitions: {} @@ -301,7 +301,7 @@ spec: key: CARBON-QUERY-CONV-2J name: susql-config optional: true - image: quay.io/sustainable_computing_io/susql_operator:0.0.31 + image: quay.io/sustainable_computing_io/susql_operator:0.0.32 imagePullPolicy: Always livenessProbe: httpGet: @@ -408,5 +408,5 @@ spec: provider: name: SusQL Operator Contributors url: https://github.com/sustainable-computing-io/susql-operator - replaces: susql-operator.v0.0.29 - version: 0.0.31 + replaces: susql-operator.v0.0.30 + version: 0.0.32 diff --git a/cmd/main.go b/cmd/main.go index 28f13fc..801984f 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -230,22 +230,23 @@ func main() { susqlLog.Info("Setting up labelGroupReconciler.") if err = (&controller.LabelGroupReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - KeplerPrometheusUrl: keplerPrometheusUrl, - KeplerMetricName: keplerMetricName, - SusQLPrometheusDatabaseUrl: susqlPrometheusDatabaseUrl, - SusQLPrometheusMetricsUrl: susqlPrometheusMetricsUrl, - SamplingRate: time.Duration(samplingRateInteger) * time.Second, - CarbonMethod: carbonMethod, - CarbonIntensity: carbonIntensityFloat, - CarbonIntensityUrl: carbonIntensityUrl, - CarbonIntensityTimeStamp: 0, - CarbonLocation: carbonLocation, - CarbonQueryRate: carbonQueryRateInteger, - CarbonQueryFilter: carbonQueryFilter, - CarbonQueryConv2J: carbonQueryConv2JFloat, - Logger: susqlLog, + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + KeplerPrometheusUrl: keplerPrometheusUrl, + KeplerMetricName: keplerMetricName, + SusQLPrometheusDatabaseUrl: susqlPrometheusDatabaseUrl, + SusQLPrometheusMetricsUrl: susqlPrometheusMetricsUrl, + SamplingRate: time.Duration(samplingRateInteger) * time.Second, + CarbonMethod: carbonMethod, + CarbonIntensity: carbonIntensityFloat, + CarbonIntensityUrl: carbonIntensityUrl, + CarbonIntensityTimeStamp: 0, + CarbonIntensityErrorTimeStamp: 0, + CarbonLocation: carbonLocation, + CarbonQueryRate: carbonQueryRateInteger, + CarbonQueryFilter: carbonQueryFilter, + CarbonQueryConv2J: carbonQueryConv2JFloat, + Logger: susqlLog, }).SetupWithManager(mgr); err != nil { susqlLog.Error(err, "unable to create controller", "controller", "LabelGroup") os.Exit(1) diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 851a8cd..c86c163 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -5,4 +5,4 @@ kind: Kustomization images: - name: controller newName: quay.io/sustainable_computing_io/susql_operator - newTag: 0.0.31 + newTag: 0.0.32 diff --git a/config/manifests/bases/susql-operator.clusterserviceversion.yaml b/config/manifests/bases/susql-operator.clusterserviceversion.yaml index a3d74f8..91ad44d 100644 --- a/config/manifests/bases/susql-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/susql-operator.clusterserviceversion.yaml @@ -116,5 +116,5 @@ spec: provider: name: SusQL Operator Contributors url: https://github.com/sustainable-computing-io/susql-operator - replaces: susql-operator.v0.0.29 + replaces: susql-operator.v0.0.30 version: 0.0.0 diff --git a/doc/c3-susql-config.yaml b/doc/c3-susql-config.yaml new file mode 100644 index 0000000..3383f9b --- /dev/null +++ b/doc/c3-susql-config.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: susql-config +data: + KEPLER-PROMETHEUS-URL: "https://thanos-querier.openshift-monitoring.svc.cluster.local:9091" + KEPLER-METRIC-NAME: "kepler_container_joules_total" + SUSQL-PROMETHEUS-DATABASE-URL: "https://thanos-querier.openshift-monitoring.svc.cluster.local:9091" + SUSQL-PROMETHEUS-METRICS-URL: "http://0.0.0.0:8082" + SAMPLING-RATE: "2" + LEADER-ELECT: "false" + HEALTH-PROBE-BIND-ADDRESS: ":8081" + SUSQL-LOG-LEVEL: "-5" + CARBON-METHOD: "casdk" + CARBON-INTENSITY: "0.0001158333333333" + CARBON-INTENSITY-URL: "https://api.electricitymap.org/v3/carbon-intensity/latest?zone=%s" + CARBON-INTENSITY-URL: "http://webapi-green-software-foundation.apps.multi-nic-v2.llmdev.res.ibm.com/emissions/bylocation?location=%s" + CARBON-LOCATION: "japanwestERROR" + CARBON-QUERY-RATE: "7200" + CARBON-QUERY-FILTER: "rating" + CARBON-QUERY-CONV-2J: "0.0000002777777778" diff --git a/doc/carbon.md b/doc/carbon.md index 6335e5f..c981f6f 100644 --- a/doc/carbon.md +++ b/doc/carbon.md @@ -4,11 +4,14 @@ There are three primary CO2 emission calculation methods. "Out-of-the-box" SusQL reports an estimated CO2 emission value for all measured workloads using the `static` method: +The behavior of SusQL carbon calculation can be tuned by modyfing the `susql-config` `ConfigMap` in the same namespace that the SusQL operator is running in. +A sample file is provided in `samples/susql-config.yaml`. + ## `static` Method - This `static` method uses a static "carbon intensity value" as a coefficient to calculate grams of CO2 emitted. This calculation method is used when the `CARBON-METHOD` `ConfigMap` value is set to `static`. -#### `static` Method `ConfigMap` Configurable items +#### `static` Method `ConfigMap` Configurable Items - `CARBON-METHOD` - The `static` method is enabled when this is set to `static`. - `CARBON-INTENSITY` - Carbon intensity value. A coefficient used to convert Joules to grams of CO2 per Joule. The unit definition is grams of CO2 per Joule. The default carbon intensity value is based on [US EPA](https://www.epa.gov/energy/greenhouse-gases-equivalencies-calculator-calculations-and-references) data. @@ -34,7 +37,7 @@ There are three primary CO2 emission calculation methods. #### Configuring and installing Carbon Aware SDK - Following guidance in https://github.com/Green-Software-Foundation/carbon-aware-sdk/blob/dev/casdk-docs/docs/overview/enablement.md, the Carbon Aware SDK can be easily installed on a Kubernetes cluster such as OpenShift. -- Preparation: clone the repository and edit `helm-chart/values.yaml` as needed to reflect private password, configuration, etc. +- Preparation: clone the Carbon Aware SDK repository and edit `helm-chart/values.yaml` as needed to reflect private password, configuration, etc. (Useful configuration tips available at https://github.com/Green-Software-Foundation/carbon-aware-sdk/blob/dev/casdk-docs/docs/tutorial-extras/configuration.md ) ``` @@ -43,8 +46,8 @@ vi helm-chart/values.yaml ``` - Preparation: required software and permission - Ensure that `helm`, and `kubectl` (or `oc`) are installed - - Ensure that CLI user is logged in to cluster with sufficient permissions -- Perform installation + - Ensure that CLI user is logged in to the cluster with sufficient permissions +- Perform installation: (The example installes into namespace `gsf`. However, other namespaces may be used.) ``` cd carbon-aware-sdk helm upgrade --install --wait carbon-aware-sdk helm-chart --create-namespace gsf @@ -56,11 +59,11 @@ Note the value reported for "HOST/PORT". This will be used in the next configura Update the following items in the `susql-config.yaml` file: ``` CARBON-METHOD: "casdk" - CARBON-INTENSITY-URL: "http:///emissions/bylocation?location=%s" + CARBON-INTENSITY-URL: "http:///emissions/bylocation?location=%s" CARBON-LOCATION: "" CARBON-QUERY-FILTER: "rating" ``` -Tip: try this command to verify sdk container functionality and also view available locations: `curl -s "http:/locations"` +Tip: try this command to verify sdk container functionality and also view available locations: `curl -s "http:/locations"` Apply the updated `susql-config.yaml` file: @@ -68,7 +71,7 @@ Apply the updated `susql-config.yaml` file: oc apply -f susql-config.yaml -n ``` You are now ready to install and use the SusQL operator. -If the SusQL Operator is alreay installed, then restart the control pod. +If the SusQL Operator is already installed, then restart the control pod. #### `casdk` Method `ConfigMap` Configurable Items - `CARBON-METHOD` - The `casdk` method is enabled when this is set to `casdk`. diff --git a/doc/helm-installation.md b/doc/helm-installation.md index 56129e8..8005b74 100644 --- a/doc/helm-installation.md +++ b/doc/helm-installation.md @@ -65,7 +65,7 @@ The following environment variables will influence the way that the SusQL Operat | SUSQL_REGISTRY | quay.io/sustainable_computing_io | Container registry that SusQL is stored in | | SUSQL_IMAGE_NAME | susql_operator | Image name used on SusQL container registry | | SUSQL_IMAGE_TAG | latest | Tag for SusQL container | -| CARBON_METHOD | static | "static", "simpledynamic", "scadk" | +| CARBON_METHOD | static | "static", "simpledynamic", "casdk" | | CARBON_INTENSITY | "0.00011583333" | Carbon intensity in grams CO2 / Joule | | CARBON_INTENSITY_URL | | Web API to query carbon intensity | | CARBON_LOCATION | | Location for carbon intensity query | diff --git a/doc/openshift-installation.md b/doc/openshift-installation.md index 2e64d29..c0f250c 100644 --- a/doc/openshift-installation.md +++ b/doc/openshift-installation.md @@ -24,7 +24,7 @@ Use the OpenShift web console to install the SusQL Operator: Before deploying the SusQL Operator create a `ConfigMap` called `susql-config` in the same namespace that the operator will run in. -[susql-config.yaml](susql-config.yaml) is a good starting point. If you download it first, you +[susql-config.yaml](../samples/susql-config.yaml) is a good starting point. If you download it first, you could create the ConfigMap with `oc apply -n -f susql-config.yaml`. If you update (or create) the ConfigMap after the SusQL Operator has been installed, then restarting the SusQL Operator controller pod will enable the changes. (e.g., Delete the pod, and allow it to be recreated automatically.) diff --git a/doc/operatorhub-installation.md b/doc/operatorhub-installation.md index 5f91caa..7224eb4 100644 --- a/doc/operatorhub-installation.md +++ b/doc/operatorhub-installation.md @@ -49,7 +49,7 @@ Next, use the OpenShift web console to install the SusQL Operator: Before deploying the SusQL Operator create a `ConfigMap` called `susql-config` in the same namespace that the operator will run in. -[susql-config.yaml](susql-config.yaml) is a good starting point. If you download it first, you +[susql-config.yaml](../samples/susql-config.yaml) is a good starting point. If you download it first, you could create the ConfigMap with `oc apply -n -f susql-config.yaml`. If you update (or create) the ConfigMap after the SusQL Operator has been installed, then restarting the SusQL Operator controller pod will enable the changes. (e.g., Delete the pod, and allow it to be recreated automatically.) diff --git a/internal/controller/carbon_query.go b/internal/controller/carbon_query.go index 2ffff31..b037468 100644 --- a/internal/controller/carbon_query.go +++ b/internal/controller/carbon_query.go @@ -25,17 +25,16 @@ import ( ) func queryCarbonIntensity(url string, location string, filter string, conv2J float64) (float64, error) { + queryUrl := fmt.Sprintf(url, location) - fmt.Println("CARBON QUERY: url=" + fmt.Sprintf(url, location)) - - response, err := http.Get(fmt.Sprintf(url, location)) + response, err := http.Get(queryUrl) if err != nil { - return 0.0, err + return 0.0, fmt.Errorf("queryCarbonIntensity: %w\nURL=%s", err, queryUrl) } responseData, err := ioutil.ReadAll(response.Body) if err != nil { - return 0.0, err + return 0.0, fmt.Errorf("queryCarbonIntensity: %w\nURL=%s\nresponse=%s", err, queryUrl, string(responseData)) } length := gjson.Get(string(responseData), "#").Int() - 1 @@ -46,7 +45,7 @@ func queryCarbonIntensity(url string, location string, filter string, conv2J flo carbonIntensityFloat, err := strconv.ParseFloat(carbonIntensityString, 64) if err != nil { - return 0.0, err + return 0.0, fmt.Errorf("queryCarbonIntensity: %w\nURL=%s\nresponse=%s\nfilter=%s\nresult=%s", err, queryUrl, string(responseData), newFilter, carbonIntensityString) } // return nil error since no error @@ -54,22 +53,23 @@ func queryCarbonIntensity(url string, location string, filter string, conv2J flo } func querySimpleCarbonIntensity(url string, location string, filter string, conv2J float64) (float64, error) { + queryUrl := fmt.Sprintf(url, location) response, err := http.Get(fmt.Sprintf(url, location)) if err != nil { - return 0.0, err + return 0.0, fmt.Errorf("querySimpleCarbonIntensity: %w\nURL=%s", err, queryUrl) } responseData, err := ioutil.ReadAll(response.Body) if err != nil { - return 0.0, err + return 0.0, fmt.Errorf("queryCarbonSimpleIntensity: %w\nURL=%s\nresponse=%s", err, queryUrl, string(responseData)) } carbonIntensityString := gjson.Get(string(responseData), filter).String() carbonIntensityFloat, err := strconv.ParseFloat(carbonIntensityString, 64) if err != nil { - return 0.0, err + return 0.0, fmt.Errorf("queryCarbonSimpleIntensity: %w\nURL=%s\nresponse=%s\nfilter=%s\nresult=%s", err, queryUrl, string(responseData), filter, carbonIntensityString) } // return nil error since no error diff --git a/internal/controller/labelgroup_controller.go b/internal/controller/labelgroup_controller.go index 030d521..9b5c83c 100644 --- a/internal/controller/labelgroup_controller.go +++ b/internal/controller/labelgroup_controller.go @@ -36,21 +36,22 @@ import ( // LabelGroupReconciler reconciles a LabelGroup object type LabelGroupReconciler struct { client.Client - Scheme *runtime.Scheme - KeplerPrometheusUrl string - KeplerMetricName string - SusQLPrometheusDatabaseUrl string - SusQLPrometheusMetricsUrl string - SamplingRate time.Duration // Sampling rate for all LabelGroups - CarbonMethod string - CarbonIntensity float64 - CarbonIntensityUrl string - CarbonIntensityTimeStamp int64 - CarbonLocation string - CarbonQueryRate int64 - CarbonQueryFilter string - CarbonQueryConv2J float64 - Logger logr.Logger + Scheme *runtime.Scheme + KeplerPrometheusUrl string + KeplerMetricName string + SusQLPrometheusDatabaseUrl string + SusQLPrometheusMetricsUrl string + SamplingRate time.Duration // Sampling rate for all LabelGroups + CarbonMethod string + CarbonIntensity float64 + CarbonIntensityUrl string + CarbonIntensityTimeStamp int64 + CarbonIntensityErrorTimeStamp int64 + CarbonLocation string + CarbonQueryRate int64 + CarbonQueryFilter string + CarbonQueryConv2J float64 + Logger logr.Logger } const ( @@ -59,6 +60,7 @@ const ( fixingDelay = 15 * time.Second // Time to wait in the event the LabelGroup was badly constructed nopodDelay = 15 * time.Second // Time to wait in the event no pods are found errorDelay = 1 * time.Second // Time to wait when an error happens due to network connectivity issues + carbonRetryDelay = 300 // Number of seconds to wait for retry after carbon query failure ) var ( @@ -115,27 +117,31 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) // TODO: put this code only in Reloading and Aggregating cases if r.CarbonMethod == "simpledynamic" { currentEpoch := time.Now().Unix() - if (currentEpoch - r.CarbonIntensityTimeStamp) > r.CarbonQueryRate { + if (currentEpoch-r.CarbonIntensityTimeStamp) > r.CarbonQueryRate && (currentEpoch-r.CarbonIntensityErrorTimeStamp) > carbonRetryDelay { newCarbonIntensity, err := querySimpleCarbonIntensity(r.CarbonIntensityUrl, r.CarbonLocation, r.CarbonQueryFilter, r.CarbonQueryConv2J) if err == nil { r.CarbonIntensity = newCarbonIntensity r.CarbonIntensityTimeStamp = currentEpoch + r.CarbonIntensityErrorTimeStamp = 0 r.Logger.V(5).Info(fmt.Sprintf("[Reconcile-simpledynamic] Obtained dynamic carbon intensity of %.10f.", newCarbonIntensity)) } else { + r.CarbonIntensityErrorTimeStamp = currentEpoch r.Logger.V(0).Error(err, "[Reconcile-simpledynamic] Unable to query carbon intensity.") } } } if r.CarbonMethod == "casdk" { currentEpoch := time.Now().Unix() - if (currentEpoch - r.CarbonIntensityTimeStamp) > r.CarbonQueryRate { + if (currentEpoch-r.CarbonIntensityTimeStamp) > r.CarbonQueryRate && (currentEpoch-r.CarbonIntensityErrorTimeStamp) > carbonRetryDelay { newCarbonIntensity, err := queryCarbonIntensity(r.CarbonIntensityUrl, r.CarbonLocation, r.CarbonQueryFilter, r.CarbonQueryConv2J) if err == nil { r.CarbonIntensity = newCarbonIntensity r.CarbonIntensityTimeStamp = currentEpoch - r.Logger.V(5).Info(fmt.Sprintf("[Reconcile-simpledynamic] Obtained dynamic carbon intensity of %.10f.", newCarbonIntensity)) + r.CarbonIntensityErrorTimeStamp = 0 + r.Logger.V(5).Info(fmt.Sprintf("[Reconcile-casdk] Obtained dynamic carbon intensity of %.10f.", newCarbonIntensity)) } else { - r.Logger.V(0).Error(err, "[Reconcile-simpledynamic] Unable to query carbon intensity.") + r.CarbonIntensityErrorTimeStamp = currentEpoch + r.Logger.V(0).Error(err, "[Reconcile-casdk] Unable to query carbon intensity.") } } } diff --git a/doc/susql-config.yaml b/samples/susql-config.yaml similarity index 100% rename from doc/susql-config.yaml rename to samples/susql-config.yaml From 75a3e98e3acf8042d44bce2ce34591b2af491818 Mon Sep 17 00:00:00 2001 From: Scott Trent Date: Mon, 30 Sep 2024 11:40:08 +0900 Subject: [PATCH 02/11] add required Red Hat catalog annotations Signed-off-by: Scott Trent --- .gitignore | 1 + .../susql-operator.clusterserviceversion.yaml | 9 +++++++- .../susql-operator.clusterserviceversion.yaml | 7 +++++++ doc/c3-susql-config.yaml | 21 ------------------- 4 files changed, 16 insertions(+), 22 deletions(-) delete mode 100644 doc/c3-susql-config.yaml diff --git a/.gitignore b/.gitignore index fd38148..eea678d 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,4 @@ volume temp-prometheus.yaml .susql-deploy-info.txt .susql-deploy-info-last.txt +c3-susql-config.yaml diff --git a/bundle/manifests/susql-operator.clusterserviceversion.yaml b/bundle/manifests/susql-operator.clusterserviceversion.yaml index 9a65c77..0bba6d3 100644 --- a/bundle/manifests/susql-operator.clusterserviceversion.yaml +++ b/bundle/manifests/susql-operator.clusterserviceversion.yaml @@ -28,9 +28,16 @@ metadata: capabilities: Basic Install categories: Monitoring containerImage: quay.io/sustainable_computing_io/susql_operator:0.0.32 - createdAt: "2024-09-29T09:27:06Z" + createdAt: "2024-09-30T02:37:03Z" description: 'Aggregates energy and CO2 emission data for pods tagged with SusQL labels ' + features.operators.openshift.io/disconnected: "false" + features.operators.openshift.io/fips-compliant: "false" + features.operators.openshift.io/proxy-aware: "false" + features.operators.openshift.io/tls-profiles: "false" + features.operators.openshift.io/token-auth-aws: "false" + features.operators.openshift.io/token-auth-azure: "false" + features.operators.openshift.io/token-auth-gcp: "false" operators.operatorframework.io/builder: operator-sdk-v1.36.1 operators.operatorframework.io/project_layout: go.kubebuilder.io/v4 repository: https://github.com/sustainable-computing-io/susql-operator diff --git a/config/manifests/bases/susql-operator.clusterserviceversion.yaml b/config/manifests/bases/susql-operator.clusterserviceversion.yaml index 91ad44d..2796234 100644 --- a/config/manifests/bases/susql-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/susql-operator.clusterserviceversion.yaml @@ -8,6 +8,13 @@ metadata: containerImage: description: 'Aggregates energy and CO2 emission data for pods tagged with SusQL labels ' + features.operators.openshift.io/disconnected: "false" + features.operators.openshift.io/fips-compliant: "false" + features.operators.openshift.io/proxy-aware: "false" + features.operators.openshift.io/tls-profiles: "false" + features.operators.openshift.io/token-auth-aws: "false" + features.operators.openshift.io/token-auth-azure: "false" + features.operators.openshift.io/token-auth-gcp: "false" repository: https://github.com/sustainable-computing-io/susql-operator support: https://github.com/sustainable-computing-io/susql-operator/issues name: susql-operator.v0.0.0 diff --git a/doc/c3-susql-config.yaml b/doc/c3-susql-config.yaml deleted file mode 100644 index 3383f9b..0000000 --- a/doc/c3-susql-config.yaml +++ /dev/null @@ -1,21 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: susql-config -data: - KEPLER-PROMETHEUS-URL: "https://thanos-querier.openshift-monitoring.svc.cluster.local:9091" - KEPLER-METRIC-NAME: "kepler_container_joules_total" - SUSQL-PROMETHEUS-DATABASE-URL: "https://thanos-querier.openshift-monitoring.svc.cluster.local:9091" - SUSQL-PROMETHEUS-METRICS-URL: "http://0.0.0.0:8082" - SAMPLING-RATE: "2" - LEADER-ELECT: "false" - HEALTH-PROBE-BIND-ADDRESS: ":8081" - SUSQL-LOG-LEVEL: "-5" - CARBON-METHOD: "casdk" - CARBON-INTENSITY: "0.0001158333333333" - CARBON-INTENSITY-URL: "https://api.electricitymap.org/v3/carbon-intensity/latest?zone=%s" - CARBON-INTENSITY-URL: "http://webapi-green-software-foundation.apps.multi-nic-v2.llmdev.res.ibm.com/emissions/bylocation?location=%s" - CARBON-LOCATION: "japanwestERROR" - CARBON-QUERY-RATE: "7200" - CARBON-QUERY-FILTER: "rating" - CARBON-QUERY-CONV-2J: "0.0000002777777778" From 6148ebe38dca8e885dd776a0b5b001292a27be02 Mon Sep 17 00:00:00 2001 From: Scott Trent Date: Mon, 30 Sep 2024 14:11:54 +0900 Subject: [PATCH 03/11] boost default pod security Signed-off-by: Scott Trent --- .../manifests/susql-operator.clusterserviceversion.yaml | 9 ++------- config/default/manager_auth_proxy_patch.yaml | 2 -- config/default/manager_config_patch.yaml | 3 +-- config/manager/manager.yaml | 5 +---- deployment/susql-controller/templates/deployment.yaml | 3 +-- 5 files changed, 5 insertions(+), 17 deletions(-) diff --git a/bundle/manifests/susql-operator.clusterserviceversion.yaml b/bundle/manifests/susql-operator.clusterserviceversion.yaml index 0bba6d3..72b5482 100644 --- a/bundle/manifests/susql-operator.clusterserviceversion.yaml +++ b/bundle/manifests/susql-operator.clusterserviceversion.yaml @@ -28,7 +28,7 @@ metadata: capabilities: Basic Install categories: Monitoring containerImage: quay.io/sustainable_computing_io/susql_operator:0.0.32 - createdAt: "2024-09-30T02:37:03Z" + createdAt: "2024-09-30T05:10:03Z" description: 'Aggregates energy and CO2 emission data for pods tagged with SusQL labels ' features.operators.openshift.io/disconnected: "false" @@ -212,9 +212,7 @@ spec: drop: - ALL readOnlyRootFilesystem: true - runAsGroup: 14001 runAsNonRoot: true - runAsUser: 14001 - command: - /manager env: @@ -339,12 +337,9 @@ spec: capabilities: drop: - ALL - runAsGroup: 12001 - runAsUser: 12001 + readOnlyRootFilesystem: true securityContext: - runAsGroup: 11001 runAsNonRoot: true - runAsUser: 11001 serviceAccountName: susql-operator-susql-controller-manager terminationGracePeriodSeconds: 10 permissions: diff --git a/config/default/manager_auth_proxy_patch.yaml b/config/default/manager_auth_proxy_patch.yaml index 3b1abf2..73f91cc 100644 --- a/config/default/manager_auth_proxy_patch.yaml +++ b/config/default/manager_auth_proxy_patch.yaml @@ -14,8 +14,6 @@ spec: allowPrivilegeEscalation: false readOnlyRootFilesystem: true runAsNonRoot: true - runAsUser: 14001 - runAsGroup: 14001 capabilities: drop: - "ALL" diff --git a/config/default/manager_config_patch.yaml b/config/default/manager_config_patch.yaml index 550ee39..bd993a2 100644 --- a/config/default/manager_config_patch.yaml +++ b/config/default/manager_config_patch.yaml @@ -10,9 +10,8 @@ spec: - name: manager imagePullPolicy: Always securityContext: - runAsUser: 11001 - runAsGroup: 11001 allowPrivilegeEscalation: false + readOnlyRootFilesystem: true runAsNonRoot: true capabilities: drop: diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 67a81de..c3c2fc2 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -57,8 +57,6 @@ spec: # - linux securityContext: runAsNonRoot: true - runAsUser: 11001 - runAsGroup: 11001 # TODO(user): For common cases that do not require escalating privileges # it is recommended to ensure that all your Pods/Containers are restrictive. # More info: https://kubernetes.io/docs/concepts/security/pod-security-standards/#restricted @@ -165,9 +163,8 @@ spec: imagePullPolicy: Always name: manager securityContext: - runAsUser: 12001 - runAsGroup: 12001 allowPrivilegeEscalation: false + readOnlyRootFilesystem : true capabilities: drop: - "ALL" diff --git a/deployment/susql-controller/templates/deployment.yaml b/deployment/susql-controller/templates/deployment.yaml index 037dd4b..f45dd34 100644 --- a/deployment/susql-controller/templates/deployment.yaml +++ b/deployment/susql-controller/templates/deployment.yaml @@ -24,9 +24,8 @@ spec: image: {{ required "Please specify a 'containerImage' in the user file" .Values.containerImage }} imagePullPolicy: {{ .Values.imagePullPolicy | default "Always" }} securityContext: - runAsUser: 10001 - runAsGroup: 10001 allowPrivilegeEscalation: false + readOnlyRootFilesystem: true runAsNonRoot: true capabilities: drop: From cdad0e74f022c7449d5c4a7ce65326b40994f269 Mon Sep 17 00:00:00 2001 From: Scott Trent Date: Mon, 30 Sep 2024 15:07:14 +0900 Subject: [PATCH 04/11] update demo link in readme Signed-off-by: Scott Trent --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 426cf69..cf519c0 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ SusQL is a Kubernetes operator that aggregates energy and estimated carbon dioxide emission data for pods tagged with SusQL specific labels. The energy measurements are obtained from [Kepler](https://sustainable-computing.io/) which should be deployed on the cluster before using SusQL. Watch a demonstration video by clicking the following link. -https://github.com/sustainable-computing-io/susql-operator/wiki/files/SusQL-Demo-2024-09.mp4 +https://raw.githubusercontent.com/wiki/sustainable-computing-io/susql-operator/files/SusQL-Demo-2024-09.mp4 ![SusQL Architecture](https://github.com/sustainable-computing-io/susql-operator/wiki/files/SusQL-Demo-Thumbnail.png) From 071ff0b740b9eea2b6baf2d0f257e7ae8b8785fa Mon Sep 17 00:00:00 2001 From: Scott Trent Date: Mon, 30 Sep 2024 16:00:51 +0900 Subject: [PATCH 05/11] first attempt at new video location Signed-off-by: Scott Trent --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cf519c0..8976b87 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ SusQL is a Kubernetes operator that aggregates energy and estimated carbon dioxide emission data for pods tagged with SusQL specific labels. The energy measurements are obtained from [Kepler](https://sustainable-computing.io/) which should be deployed on the cluster before using SusQL. Watch a demonstration video by clicking the following link. -https://raw.githubusercontent.com/wiki/sustainable-computing-io/susql-operator/files/SusQL-Demo-2024-09.mp4 +https://youtu.be/lGQL1GhflD0 ![SusQL Architecture](https://github.com/sustainable-computing-io/susql-operator/wiki/files/SusQL-Demo-Thumbnail.png) From 10149b30119d87e0ee340fcfe8fc357a4a76a2f4 Mon Sep 17 00:00:00 2001 From: Scott Trent Date: Mon, 30 Sep 2024 16:07:18 +0900 Subject: [PATCH 06/11] better demo link Signed-off-by: Scott Trent --- README.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/README.md b/README.md index 8976b87..9f831d5 100644 --- a/README.md +++ b/README.md @@ -2,10 +2,7 @@ SusQL is a Kubernetes operator that aggregates energy and estimated carbon dioxide emission data for pods tagged with SusQL specific labels. The energy measurements are obtained from [Kepler](https://sustainable-computing.io/) which should be deployed on the cluster before using SusQL. Watch a demonstration video by clicking the following link. -https://youtu.be/lGQL1GhflD0 - -![SusQL Architecture](https://github.com/sustainable-computing-io/susql-operator/wiki/files/SusQL-Demo-Thumbnail.png) - +[![SusQL Demo](https://github.com/sustainable-computing-io/susql-operator/wiki/files/SusQL-Demo-Thumbnail.png)](https://youtu.be/lGQL1GhflD0) ## Getting Started From 1c0aeef1dcda588246379f9ee05573ac2444cba9 Mon Sep 17 00:00:00 2001 From: Scott Trent Date: Mon, 30 Sep 2024 16:15:49 +0900 Subject: [PATCH 07/11] Improved description Signed-off-by: Scott Trent --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9f831d5..fc631a1 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # SusQL Operator -SusQL is a Kubernetes operator that aggregates energy and estimated carbon dioxide emission data for pods tagged with SusQL specific labels. The energy measurements are obtained from [Kepler](https://sustainable-computing.io/) which should be deployed on the cluster before using SusQL. Watch a demonstration video by clicking the following link. +SusQL is a Kubernetes operator that aggregates energy and estimated carbon dioxide emission data for pods tagged with SusQL specific labels. The energy measurements are obtained from [Kepler](https://sustainable-computing.io/) which should be deployed on the cluster before using SusQL. Click the picture below to watch the demo video. [![SusQL Demo](https://github.com/sustainable-computing-io/susql-operator/wiki/files/SusQL-Demo-Thumbnail.png)](https://youtu.be/lGQL1GhflD0) From b38201941f20d25defb22054970da96b7f44c8c4 Mon Sep 17 00:00:00 2001 From: Scott Trent Date: Tue, 1 Oct 2024 16:33:40 +0900 Subject: [PATCH 08/11] Allow script to run independent of cwd and use VERSION file. Signed-off-by: Scott Trent --- deployment/setup-local.sh | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/deployment/setup-local.sh b/deployment/setup-local.sh index 24db11d..f5c19e8 100644 --- a/deployment/setup-local.sh +++ b/deployment/setup-local.sh @@ -1,5 +1,11 @@ #!/usr/bin/env bash +# no matter how the script is invoked, TOP will point to the "deployment" +# directory and hence can be used to access files relative +# to the known directory of the setup-local.sh script. + +TOP=$(dirname $0) + set -e set -o pipefail @@ -17,7 +23,7 @@ export CLUSTER_NAME="local-cluster" if kind get clusters | grep -q "$CLUSTER_NAME"; then echo "> Kind cluster $CLUSTER_NAME already exists" else - kind create cluster --name="$CLUSTER_NAME" --config=./deployment/local-cluster-config.yaml + kind create cluster --name="$CLUSTER_NAME" --config=${TOP}/local-cluster-config.yaml fi # Install Prometheus via Helm @@ -53,7 +59,7 @@ GF_POD=$( -l app.kubernetes.io/name=grafana \ -o jsonpath="{.items[0].metadata.name}" ) -kubectl cp deployment/kepler_dashboard.json monitoring/$GF_POD:/tmp/dashboards/kepler_dashboard.json +kubectl cp ${TOP}/kepler_dashboard.json monitoring/$GF_POD:/tmp/dashboards/kepler_dashboard.json # echo "> Install OLM" @@ -65,10 +71,10 @@ kubectl cp deployment/kepler_dashboard.json monitoring/$GF_POD:/tmp/dashboards/k # kubectl create -f https://operatorhub.io/install/susql-operator.yaml # echo "> Wait for susql to be ready" -# kubectl wait --for=condition=Installed csv/susql-operator.v0.0.30 -n operators --timeout=300s +# kubectl wait --for=condition=Installed csv/susql-operator.v$(cat ${TOP}/../VERSION) -n operators --timeout=300s # Optional: Delete Kind cluster after use # kind delete cluster --name="$CLUSTER_NAME" - echo "> Done" \ No newline at end of file + echo "> Done" From 1f946fcf655023871b552619f0b0b17dca7aa996 Mon Sep 17 00:00:00 2001 From: Scott Trent Date: Tue, 1 Oct 2024 16:36:37 +0900 Subject: [PATCH 09/11] improve docs Signed-off-by: Scott Trent --- deployment/README.md | 32 +++++++++++++++++++++++++++++ doc/carbon.md | 12 +++++------ doc/openshift-installation.md | 5 +++-- samples/README.md | 38 +++++++++++++++++++++++++++++++++++ 4 files changed, 79 insertions(+), 8 deletions(-) create mode 100644 deployment/README.md create mode 100644 samples/README.md diff --git a/deployment/README.md b/deployment/README.md new file mode 100644 index 0000000..e664591 --- /dev/null +++ b/deployment/README.md @@ -0,0 +1,32 @@ +# Deployment files + +- `deploy.sh` + Deploy SusQL via a Helm chart. +- `kepler-check.yaml` + Used by `deploy.sh` to verify that Kepler is available. +- `kepler_dashboard.json` + A grafana dashboard. +- `local-cluster-config.yaml` + A yaml file used to deploy a local `kind` cluster. +- `prometheus.yaml` + Used by `deploy.sh` to deploy Prometheus in the cluster. +- `setup-local.sh` + Deploy SusQL on a local `kind` cluster. +- `susql-controller` + Helm Chart files used to deploy SusQL. + +## License + +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/doc/carbon.md b/doc/carbon.md index c981f6f..0b86092 100644 --- a/doc/carbon.md +++ b/doc/carbon.md @@ -1,10 +1,10 @@ # Carbon Dioxide Emission Estimation -There are three primary CO2 emission calculation methods. +SusQL supports three primary CO2 emission calculation methods. "Out-of-the-box" SusQL reports an estimated CO2 emission value for all measured workloads using the `static` method: -The behavior of SusQL carbon calculation can be tuned by modyfing the `susql-config` `ConfigMap` in the same namespace that the SusQL operator is running in. +The behavior of SusQL carbon calculation can be tuned by modifying the `susql-config` `ConfigMap` in the same namespace that the SusQL operator is running in. A sample file is provided in `samples/susql-config.yaml`. ## `static` Method @@ -13,7 +13,7 @@ A sample file is provided in `samples/susql-config.yaml`. #### `static` Method `ConfigMap` Configurable Items - `CARBON-METHOD` - The `static` method is enabled when this is set to `static`. - - `CARBON-INTENSITY` - Carbon intensity value. A coefficient used to convert Joules to grams of CO2 per Joule. The unit definition is grams of CO2 per Joule. + - `CARBON-INTENSITY` - Carbon intensity value. A coefficient used to convert Joules to grams of CO2 per Joule. The unit definition is grams of CO2 per Joule. (If you have a custom grams per KWH carbon intensity value, you can multiple it times 0.0000002777777778 to get grams CO2 per Joule.) The default carbon intensity value is based on [US EPA](https://www.epa.gov/energy/greenhouse-gases-equivalencies-calculator-calculations-and-references) data. ## `simpledynamic` Method @@ -46,8 +46,8 @@ vi helm-chart/values.yaml ``` - Preparation: required software and permission - Ensure that `helm`, and `kubectl` (or `oc`) are installed - - Ensure that CLI user is logged in to the cluster with sufficient permissions -- Perform installation: (The example installes into namespace `gsf`. However, other namespaces may be used.) + - Ensure that the CLI user is logged in to the cluster with sufficient permissions +- Perform installation: (The example installs into namespace `gsf`. However, other namespaces may be used.) ``` cd carbon-aware-sdk helm upgrade --install --wait carbon-aware-sdk helm-chart --create-namespace gsf @@ -71,7 +71,7 @@ Apply the updated `susql-config.yaml` file: oc apply -f susql-config.yaml -n ``` You are now ready to install and use the SusQL operator. -If the SusQL Operator is already installed, then restart the control pod. +If the SusQL Operator is already installed, then restart the control pod to enable new `susql-config` values. #### `casdk` Method `ConfigMap` Configurable Items - `CARBON-METHOD` - The `casdk` method is enabled when this is set to `casdk`. diff --git a/doc/openshift-installation.md b/doc/openshift-installation.md index c0f250c..c0e2e75 100644 --- a/doc/openshift-installation.md +++ b/doc/openshift-installation.md @@ -1,6 +1,7 @@ -# SusQL Operator Installation via OpenShift Community Operator Catalog +# SusQL Operator Installation via OpenShift Red Hat Community Operator Catalog -Installation of the SusQL Operator on OpenShift is very easy. +Installation of the SusQL Operator on OpenShift using the OpenShift +Red Hat Community Opertor Catalog is very easy. ## Prerequisites diff --git a/samples/README.md b/samples/README.md new file mode 100644 index 0000000..cfe62b7 --- /dev/null +++ b/samples/README.md @@ -0,0 +1,38 @@ +# samples + +## scripts +- `start.sh` - deploy labels and start workloads +- `clean.sh` - cleanup labels and workloads +- `labelgroups.sh` - view LabelGroup information directly from LabelGroup CR +- `susqltop` - show top energy consuming groups +- `susqltopmon` - run susqltop periodically + +## configuration yaml file +- `susql-config.yaml` + +## labelgroup yaml files +- `labelgroups.yaml` + +## workload yaml files +- `energy-consumer-job.yaml` +- `gpu-consumer-job.yaml` +- `training-job-1.yaml` +- `training-job-2.yaml` +- `rhosaij.yaml` + + +## License + +Copyright 2023, 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. From 360bd4f96fc1aa93d4ad58db4f0a232c3957fb03 Mon Sep 17 00:00:00 2001 From: Scott Trent Date: Fri, 4 Oct 2024 12:02:24 +0900 Subject: [PATCH 10/11] update thumbnail Signed-off-by: Scott Trent --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fc631a1..884f102 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ SusQL is a Kubernetes operator that aggregates energy and estimated carbon dioxide emission data for pods tagged with SusQL specific labels. The energy measurements are obtained from [Kepler](https://sustainable-computing.io/) which should be deployed on the cluster before using SusQL. Click the picture below to watch the demo video. -[![SusQL Demo](https://github.com/sustainable-computing-io/susql-operator/wiki/files/SusQL-Demo-Thumbnail.png)](https://youtu.be/lGQL1GhflD0) +[![SusQL Demo](https://github.com/sustainable-computing-io/susql-operator/wiki/files/SusQL-Demo-2024-10-Thumbnail.png)](https://youtu.be/lGQL1GhflD0) ## Getting Started From 8d7065049167d2e8a2a3e6511e5627738da33760 Mon Sep 17 00:00:00 2001 From: Scott Trent Date: Fri, 4 Oct 2024 14:20:51 +0900 Subject: [PATCH 11/11] update video link Signed-off-by: Scott Trent --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 884f102..845f56b 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ SusQL is a Kubernetes operator that aggregates energy and estimated carbon dioxide emission data for pods tagged with SusQL specific labels. The energy measurements are obtained from [Kepler](https://sustainable-computing.io/) which should be deployed on the cluster before using SusQL. Click the picture below to watch the demo video. -[![SusQL Demo](https://github.com/sustainable-computing-io/susql-operator/wiki/files/SusQL-Demo-2024-10-Thumbnail.png)](https://youtu.be/lGQL1GhflD0) +[![SusQL Demo](https://github.com/sustainable-computing-io/susql-operator/wiki/files/SusQL-Demo-2024-10-Thumbnail.png)](https://youtu.be/9CwuhOfVtjE) ## Getting Started