diff --git a/.gitignore b/.gitignore index fd38148..eea678d 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,4 @@ volume temp-prometheus.yaml .susql-deploy-info.txt .susql-deploy-info-last.txt +c3-susql-config.yaml diff --git a/README.md b/README.md index a6532f6..845f56b 100644 --- a/README.md +++ b/README.md @@ -1,29 +1,29 @@ # SusQL Operator -SusQL is a Kubernetes operator that aggregates energy and estimated carbon dioxide emission data for pods tagged with SusQL specific labels. The energy measurements are taken from [Kepler](https://sustainable-computing.io/) which should be deployed on the cluster before using SusQL. Watch a video with a demonstration by clicking the following link. - -https://github.com/sustainable-computing-io/susql-operator/wiki/files/SusQL-Demo-2024-09.mp4 - -![SusQL Architecture](https://github.com/sustainable-computing-io/susql-operator/wiki/files/SusQL-Demo-Thumbnail.png) +SusQL is a Kubernetes operator that aggregates energy and estimated carbon dioxide emission data for pods tagged with SusQL specific labels. The energy measurements are obtained from [Kepler](https://sustainable-computing.io/) which should be deployed on the cluster before using SusQL. Click the picture below to watch the demo video. +[![SusQL Demo](https://github.com/sustainable-computing-io/susql-operator/wiki/files/SusQL-Demo-2024-10-Thumbnail.png)](https://youtu.be/9CwuhOfVtjE) ## Getting Started -SusQL is an operator that can be deployed in a Kubernetes/OpenShift cluster. You can use [kind](https://sigs.k8s.io/kind) or [minikube](https://minikube.sigs.k8s.io/) to get a local cluster for testing, or run against a remote cluster. +SusQL is an operator that can be deployed in a Kubernetes/OpenShift cluster. You can also use [kind](https://sigs.k8s.io/kind) or [minikube](https://minikube.sigs.k8s.io/) as a local cluster for testing, or run against a remote cluster. ## Carbon Dioxide Emission Calculation By default SusQL calculates carbon dioxide emission in grams of CO2 using a carbon intensity value from [US EPA](https://www.epa.gov/energy/greenhouse-gases-equivalencies-calculator-calculations-and-references). +SusQL can be configured to use other static carbon intensity values or query carbon intensity values for a +given location from web API's such as those provided by +the Green Software Foundation's [Carbon Aware SDK](https://github.com/Green-Software-Foundation/carbon-aware-sdk). Detailed information on configuration of CO2 emission calculation in SusQL is available in the [SusQL carbon calculation documentation.](doc/carbon.md) -### Prerequisites +## Prerequisites Kepler is assumed to be installed in the cluster. -### Installation +## Installation - Follow these instructions for easy SusQL installation from the Red Hat Community Operator catalog on an OpenShift cluster. - [Installation on OpenShift](doc/openshift-installation.md) @@ -31,7 +31,7 @@ Kepler is assumed to be installed in the cluster. - Follow these instructions to install the SusQL Operator from [OperatorHub.io](https://operatorhub.io) on a Kubernetes cluster including OpenShift. - [Installation from OperatorHub.io](doc/operatorhub-installation.md) -- Follow these instructions to install the SusQL Operator with Helm on a Kubernetes cluster including OpenShift. +- Follow these instructions to install the SusQL Operator from a Helm chart on a Kubernetes cluster, including OpenShift. - [Installation with Helm](doc/helm-installation.md) diff --git a/VERSION b/VERSION index d788d43..78bae5b 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.0.31 +0.0.32 diff --git a/bundle/manifests/susql-operator.clusterserviceversion.yaml b/bundle/manifests/susql-operator.clusterserviceversion.yaml index d956a76..72b5482 100644 --- a/bundle/manifests/susql-operator.clusterserviceversion.yaml +++ b/bundle/manifests/susql-operator.clusterserviceversion.yaml @@ -27,15 +27,22 @@ metadata: ] capabilities: Basic Install categories: Monitoring - containerImage: quay.io/sustainable_computing_io/susql_operator:0.0.31 - createdAt: "2024-09-28T12:08:54Z" + containerImage: quay.io/sustainable_computing_io/susql_operator:0.0.32 + createdAt: "2024-09-30T05:10:03Z" description: 'Aggregates energy and CO2 emission data for pods tagged with SusQL labels ' + features.operators.openshift.io/disconnected: "false" + features.operators.openshift.io/fips-compliant: "false" + features.operators.openshift.io/proxy-aware: "false" + features.operators.openshift.io/tls-profiles: "false" + features.operators.openshift.io/token-auth-aws: "false" + features.operators.openshift.io/token-auth-azure: "false" + features.operators.openshift.io/token-auth-gcp: "false" operators.operatorframework.io/builder: operator-sdk-v1.36.1 operators.operatorframework.io/project_layout: go.kubebuilder.io/v4 repository: https://github.com/sustainable-computing-io/susql-operator support: https://github.com/sustainable-computing-io/susql-operator/issues - name: susql-operator.v0.0.31 + name: susql-operator.v0.0.32 namespace: placeholder spec: apiservicedefinitions: {} @@ -205,9 +212,7 @@ spec: drop: - ALL readOnlyRootFilesystem: true - runAsGroup: 14001 runAsNonRoot: true - runAsUser: 14001 - command: - /manager env: @@ -301,7 +306,7 @@ spec: key: CARBON-QUERY-CONV-2J name: susql-config optional: true - image: quay.io/sustainable_computing_io/susql_operator:0.0.31 + image: quay.io/sustainable_computing_io/susql_operator:0.0.32 imagePullPolicy: Always livenessProbe: httpGet: @@ -332,12 +337,9 @@ spec: capabilities: drop: - ALL - runAsGroup: 12001 - runAsUser: 12001 + readOnlyRootFilesystem: true securityContext: - runAsGroup: 11001 runAsNonRoot: true - runAsUser: 11001 serviceAccountName: susql-operator-susql-controller-manager terminationGracePeriodSeconds: 10 permissions: @@ -408,5 +410,5 @@ spec: provider: name: SusQL Operator Contributors url: https://github.com/sustainable-computing-io/susql-operator - replaces: susql-operator.v0.0.29 - version: 0.0.31 + replaces: susql-operator.v0.0.30 + version: 0.0.32 diff --git a/cmd/main.go b/cmd/main.go index 28f13fc..801984f 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -230,22 +230,23 @@ func main() { susqlLog.Info("Setting up labelGroupReconciler.") if err = (&controller.LabelGroupReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - KeplerPrometheusUrl: keplerPrometheusUrl, - KeplerMetricName: keplerMetricName, - SusQLPrometheusDatabaseUrl: susqlPrometheusDatabaseUrl, - SusQLPrometheusMetricsUrl: susqlPrometheusMetricsUrl, - SamplingRate: time.Duration(samplingRateInteger) * time.Second, - CarbonMethod: carbonMethod, - CarbonIntensity: carbonIntensityFloat, - CarbonIntensityUrl: carbonIntensityUrl, - CarbonIntensityTimeStamp: 0, - CarbonLocation: carbonLocation, - CarbonQueryRate: carbonQueryRateInteger, - CarbonQueryFilter: carbonQueryFilter, - CarbonQueryConv2J: carbonQueryConv2JFloat, - Logger: susqlLog, + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + KeplerPrometheusUrl: keplerPrometheusUrl, + KeplerMetricName: keplerMetricName, + SusQLPrometheusDatabaseUrl: susqlPrometheusDatabaseUrl, + SusQLPrometheusMetricsUrl: susqlPrometheusMetricsUrl, + SamplingRate: time.Duration(samplingRateInteger) * time.Second, + CarbonMethod: carbonMethod, + CarbonIntensity: carbonIntensityFloat, + CarbonIntensityUrl: carbonIntensityUrl, + CarbonIntensityTimeStamp: 0, + CarbonIntensityErrorTimeStamp: 0, + CarbonLocation: carbonLocation, + CarbonQueryRate: carbonQueryRateInteger, + CarbonQueryFilter: carbonQueryFilter, + CarbonQueryConv2J: carbonQueryConv2JFloat, + Logger: susqlLog, }).SetupWithManager(mgr); err != nil { susqlLog.Error(err, "unable to create controller", "controller", "LabelGroup") os.Exit(1) diff --git a/config/default/manager_auth_proxy_patch.yaml b/config/default/manager_auth_proxy_patch.yaml index 3b1abf2..73f91cc 100644 --- a/config/default/manager_auth_proxy_patch.yaml +++ b/config/default/manager_auth_proxy_patch.yaml @@ -14,8 +14,6 @@ spec: allowPrivilegeEscalation: false readOnlyRootFilesystem: true runAsNonRoot: true - runAsUser: 14001 - runAsGroup: 14001 capabilities: drop: - "ALL" diff --git a/config/default/manager_config_patch.yaml b/config/default/manager_config_patch.yaml index 550ee39..bd993a2 100644 --- a/config/default/manager_config_patch.yaml +++ b/config/default/manager_config_patch.yaml @@ -10,9 +10,8 @@ spec: - name: manager imagePullPolicy: Always securityContext: - runAsUser: 11001 - runAsGroup: 11001 allowPrivilegeEscalation: false + readOnlyRootFilesystem: true runAsNonRoot: true capabilities: drop: diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 851a8cd..c86c163 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -5,4 +5,4 @@ kind: Kustomization images: - name: controller newName: quay.io/sustainable_computing_io/susql_operator - newTag: 0.0.31 + newTag: 0.0.32 diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 67a81de..c3c2fc2 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -57,8 +57,6 @@ spec: # - linux securityContext: runAsNonRoot: true - runAsUser: 11001 - runAsGroup: 11001 # TODO(user): For common cases that do not require escalating privileges # it is recommended to ensure that all your Pods/Containers are restrictive. # More info: https://kubernetes.io/docs/concepts/security/pod-security-standards/#restricted @@ -165,9 +163,8 @@ spec: imagePullPolicy: Always name: manager securityContext: - runAsUser: 12001 - runAsGroup: 12001 allowPrivilegeEscalation: false + readOnlyRootFilesystem : true capabilities: drop: - "ALL" diff --git a/config/manifests/bases/susql-operator.clusterserviceversion.yaml b/config/manifests/bases/susql-operator.clusterserviceversion.yaml index a3d74f8..2796234 100644 --- a/config/manifests/bases/susql-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/susql-operator.clusterserviceversion.yaml @@ -8,6 +8,13 @@ metadata: containerImage: description: 'Aggregates energy and CO2 emission data for pods tagged with SusQL labels ' + features.operators.openshift.io/disconnected: "false" + features.operators.openshift.io/fips-compliant: "false" + features.operators.openshift.io/proxy-aware: "false" + features.operators.openshift.io/tls-profiles: "false" + features.operators.openshift.io/token-auth-aws: "false" + features.operators.openshift.io/token-auth-azure: "false" + features.operators.openshift.io/token-auth-gcp: "false" repository: https://github.com/sustainable-computing-io/susql-operator support: https://github.com/sustainable-computing-io/susql-operator/issues name: susql-operator.v0.0.0 @@ -116,5 +123,5 @@ spec: provider: name: SusQL Operator Contributors url: https://github.com/sustainable-computing-io/susql-operator - replaces: susql-operator.v0.0.29 + replaces: susql-operator.v0.0.30 version: 0.0.0 diff --git a/deployment/README.md b/deployment/README.md new file mode 100644 index 0000000..e664591 --- /dev/null +++ b/deployment/README.md @@ -0,0 +1,32 @@ +# Deployment files + +- `deploy.sh` + Deploy SusQL via a Helm chart. +- `kepler-check.yaml` + Used by `deploy.sh` to verify that Kepler is available. +- `kepler_dashboard.json` + A grafana dashboard. +- `local-cluster-config.yaml` + A yaml file used to deploy a local `kind` cluster. +- `prometheus.yaml` + Used by `deploy.sh` to deploy Prometheus in the cluster. +- `setup-local.sh` + Deploy SusQL on a local `kind` cluster. +- `susql-controller` + Helm Chart files used to deploy SusQL. + +## License + +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/deployment/setup-local.sh b/deployment/setup-local.sh index 0bc7af2..0664099 100644 --- a/deployment/setup-local.sh +++ b/deployment/setup-local.sh @@ -1,5 +1,11 @@ #!/usr/bin/env bash +# no matter how the script is invoked, TOP will point to the "deployment" +# directory and hence can be used to access files relative +# to the known directory of the setup-local.sh script. + +TOP=$(dirname $0) + set -e set -o pipefail @@ -19,7 +25,7 @@ export CLUSTER_NAME="local-cluster" if kind get clusters | grep -q "$CLUSTER_NAME"; then echo "> Kind cluster $CLUSTER_NAME already exists" else - kind create cluster --name="$CLUSTER_NAME" --config=./deployment/local-cluster-config.yaml + kind create cluster --name="$CLUSTER_NAME" --config=${TOP}/local-cluster-config.yaml fi # Install Prometheus via Helm @@ -55,7 +61,7 @@ GF_POD=$( -l app.kubernetes.io/name=grafana \ -o jsonpath="{.items[0].metadata.name}" ) -kubectl cp deployment/kepler_dashboard.json monitoring/$GF_POD:/tmp/dashboards/kepler_dashboard.json +kubectl cp ${TOP}/kepler_dashboard.json monitoring/$GF_POD:/tmp/dashboards/kepler_dashboard.json # echo "> Install OLM" @@ -67,7 +73,7 @@ kubectl cp deployment/kepler_dashboard.json monitoring/$GF_POD:/tmp/dashboards/k # kubectl create -f https://operatorhub.io/install/susql-operator.yaml # echo "> Wait for susql to be ready" -# kubectl wait --for=condition=Installed csv/susql-operator.v0.0.30 -n operators --timeout=300s +# kubectl wait --for=condition=Installed csv/susql-operator.v$(cat ${TOP}/../VERSION) -n operators --timeout=300s # Optional: Delete Kind cluster after use diff --git a/deployment/susql-controller/templates/deployment.yaml b/deployment/susql-controller/templates/deployment.yaml index 037dd4b..f45dd34 100644 --- a/deployment/susql-controller/templates/deployment.yaml +++ b/deployment/susql-controller/templates/deployment.yaml @@ -24,9 +24,8 @@ spec: image: {{ required "Please specify a 'containerImage' in the user file" .Values.containerImage }} imagePullPolicy: {{ .Values.imagePullPolicy | default "Always" }} securityContext: - runAsUser: 10001 - runAsGroup: 10001 allowPrivilegeEscalation: false + readOnlyRootFilesystem: true runAsNonRoot: true capabilities: drop: diff --git a/doc/carbon.md b/doc/carbon.md index 6335e5f..0b86092 100644 --- a/doc/carbon.md +++ b/doc/carbon.md @@ -1,16 +1,19 @@ # Carbon Dioxide Emission Estimation -There are three primary CO2 emission calculation methods. +SusQL supports three primary CO2 emission calculation methods. "Out-of-the-box" SusQL reports an estimated CO2 emission value for all measured workloads using the `static` method: +The behavior of SusQL carbon calculation can be tuned by modifying the `susql-config` `ConfigMap` in the same namespace that the SusQL operator is running in. +A sample file is provided in `samples/susql-config.yaml`. + ## `static` Method - This `static` method uses a static "carbon intensity value" as a coefficient to calculate grams of CO2 emitted. This calculation method is used when the `CARBON-METHOD` `ConfigMap` value is set to `static`. -#### `static` Method `ConfigMap` Configurable items +#### `static` Method `ConfigMap` Configurable Items - `CARBON-METHOD` - The `static` method is enabled when this is set to `static`. - - `CARBON-INTENSITY` - Carbon intensity value. A coefficient used to convert Joules to grams of CO2 per Joule. The unit definition is grams of CO2 per Joule. + - `CARBON-INTENSITY` - Carbon intensity value. A coefficient used to convert Joules to grams of CO2 per Joule. The unit definition is grams of CO2 per Joule. (If you have a custom grams per KWH carbon intensity value, you can multiple it times 0.0000002777777778 to get grams CO2 per Joule.) The default carbon intensity value is based on [US EPA](https://www.epa.gov/energy/greenhouse-gases-equivalencies-calculator-calculations-and-references) data. ## `simpledynamic` Method @@ -34,7 +37,7 @@ There are three primary CO2 emission calculation methods. #### Configuring and installing Carbon Aware SDK - Following guidance in https://github.com/Green-Software-Foundation/carbon-aware-sdk/blob/dev/casdk-docs/docs/overview/enablement.md, the Carbon Aware SDK can be easily installed on a Kubernetes cluster such as OpenShift. -- Preparation: clone the repository and edit `helm-chart/values.yaml` as needed to reflect private password, configuration, etc. +- Preparation: clone the Carbon Aware SDK repository and edit `helm-chart/values.yaml` as needed to reflect private password, configuration, etc. (Useful configuration tips available at https://github.com/Green-Software-Foundation/carbon-aware-sdk/blob/dev/casdk-docs/docs/tutorial-extras/configuration.md ) ``` @@ -43,8 +46,8 @@ vi helm-chart/values.yaml ``` - Preparation: required software and permission - Ensure that `helm`, and `kubectl` (or `oc`) are installed - - Ensure that CLI user is logged in to cluster with sufficient permissions -- Perform installation + - Ensure that the CLI user is logged in to the cluster with sufficient permissions +- Perform installation: (The example installs into namespace `gsf`. However, other namespaces may be used.) ``` cd carbon-aware-sdk helm upgrade --install --wait carbon-aware-sdk helm-chart --create-namespace gsf @@ -56,11 +59,11 @@ Note the value reported for "HOST/PORT". This will be used in the next configura Update the following items in the `susql-config.yaml` file: ``` CARBON-METHOD: "casdk" - CARBON-INTENSITY-URL: "http:///emissions/bylocation?location=%s" + CARBON-INTENSITY-URL: "http:///emissions/bylocation?location=%s" CARBON-LOCATION: "" CARBON-QUERY-FILTER: "rating" ``` -Tip: try this command to verify sdk container functionality and also view available locations: `curl -s "http:/locations"` +Tip: try this command to verify sdk container functionality and also view available locations: `curl -s "http:/locations"` Apply the updated `susql-config.yaml` file: @@ -68,7 +71,7 @@ Apply the updated `susql-config.yaml` file: oc apply -f susql-config.yaml -n ``` You are now ready to install and use the SusQL operator. -If the SusQL Operator is alreay installed, then restart the control pod. +If the SusQL Operator is already installed, then restart the control pod to enable new `susql-config` values. #### `casdk` Method `ConfigMap` Configurable Items - `CARBON-METHOD` - The `casdk` method is enabled when this is set to `casdk`. diff --git a/doc/helm-installation.md b/doc/helm-installation.md index 56129e8..8005b74 100644 --- a/doc/helm-installation.md +++ b/doc/helm-installation.md @@ -65,7 +65,7 @@ The following environment variables will influence the way that the SusQL Operat | SUSQL_REGISTRY | quay.io/sustainable_computing_io | Container registry that SusQL is stored in | | SUSQL_IMAGE_NAME | susql_operator | Image name used on SusQL container registry | | SUSQL_IMAGE_TAG | latest | Tag for SusQL container | -| CARBON_METHOD | static | "static", "simpledynamic", "scadk" | +| CARBON_METHOD | static | "static", "simpledynamic", "casdk" | | CARBON_INTENSITY | "0.00011583333" | Carbon intensity in grams CO2 / Joule | | CARBON_INTENSITY_URL | | Web API to query carbon intensity | | CARBON_LOCATION | | Location for carbon intensity query | diff --git a/doc/openshift-installation.md b/doc/openshift-installation.md index 2e64d29..c0e2e75 100644 --- a/doc/openshift-installation.md +++ b/doc/openshift-installation.md @@ -1,6 +1,7 @@ -# SusQL Operator Installation via OpenShift Community Operator Catalog +# SusQL Operator Installation via OpenShift Red Hat Community Operator Catalog -Installation of the SusQL Operator on OpenShift is very easy. +Installation of the SusQL Operator on OpenShift using the OpenShift +Red Hat Community Opertor Catalog is very easy. ## Prerequisites @@ -24,7 +25,7 @@ Use the OpenShift web console to install the SusQL Operator: Before deploying the SusQL Operator create a `ConfigMap` called `susql-config` in the same namespace that the operator will run in. -[susql-config.yaml](susql-config.yaml) is a good starting point. If you download it first, you +[susql-config.yaml](../samples/susql-config.yaml) is a good starting point. If you download it first, you could create the ConfigMap with `oc apply -n -f susql-config.yaml`. If you update (or create) the ConfigMap after the SusQL Operator has been installed, then restarting the SusQL Operator controller pod will enable the changes. (e.g., Delete the pod, and allow it to be recreated automatically.) diff --git a/doc/operatorhub-installation.md b/doc/operatorhub-installation.md index 5f91caa..7224eb4 100644 --- a/doc/operatorhub-installation.md +++ b/doc/operatorhub-installation.md @@ -49,7 +49,7 @@ Next, use the OpenShift web console to install the SusQL Operator: Before deploying the SusQL Operator create a `ConfigMap` called `susql-config` in the same namespace that the operator will run in. -[susql-config.yaml](susql-config.yaml) is a good starting point. If you download it first, you +[susql-config.yaml](../samples/susql-config.yaml) is a good starting point. If you download it first, you could create the ConfigMap with `oc apply -n -f susql-config.yaml`. If you update (or create) the ConfigMap after the SusQL Operator has been installed, then restarting the SusQL Operator controller pod will enable the changes. (e.g., Delete the pod, and allow it to be recreated automatically.) diff --git a/internal/controller/carbon_query.go b/internal/controller/carbon_query.go index 2ffff31..b037468 100644 --- a/internal/controller/carbon_query.go +++ b/internal/controller/carbon_query.go @@ -25,17 +25,16 @@ import ( ) func queryCarbonIntensity(url string, location string, filter string, conv2J float64) (float64, error) { + queryUrl := fmt.Sprintf(url, location) - fmt.Println("CARBON QUERY: url=" + fmt.Sprintf(url, location)) - - response, err := http.Get(fmt.Sprintf(url, location)) + response, err := http.Get(queryUrl) if err != nil { - return 0.0, err + return 0.0, fmt.Errorf("queryCarbonIntensity: %w\nURL=%s", err, queryUrl) } responseData, err := ioutil.ReadAll(response.Body) if err != nil { - return 0.0, err + return 0.0, fmt.Errorf("queryCarbonIntensity: %w\nURL=%s\nresponse=%s", err, queryUrl, string(responseData)) } length := gjson.Get(string(responseData), "#").Int() - 1 @@ -46,7 +45,7 @@ func queryCarbonIntensity(url string, location string, filter string, conv2J flo carbonIntensityFloat, err := strconv.ParseFloat(carbonIntensityString, 64) if err != nil { - return 0.0, err + return 0.0, fmt.Errorf("queryCarbonIntensity: %w\nURL=%s\nresponse=%s\nfilter=%s\nresult=%s", err, queryUrl, string(responseData), newFilter, carbonIntensityString) } // return nil error since no error @@ -54,22 +53,23 @@ func queryCarbonIntensity(url string, location string, filter string, conv2J flo } func querySimpleCarbonIntensity(url string, location string, filter string, conv2J float64) (float64, error) { + queryUrl := fmt.Sprintf(url, location) response, err := http.Get(fmt.Sprintf(url, location)) if err != nil { - return 0.0, err + return 0.0, fmt.Errorf("querySimpleCarbonIntensity: %w\nURL=%s", err, queryUrl) } responseData, err := ioutil.ReadAll(response.Body) if err != nil { - return 0.0, err + return 0.0, fmt.Errorf("queryCarbonSimpleIntensity: %w\nURL=%s\nresponse=%s", err, queryUrl, string(responseData)) } carbonIntensityString := gjson.Get(string(responseData), filter).String() carbonIntensityFloat, err := strconv.ParseFloat(carbonIntensityString, 64) if err != nil { - return 0.0, err + return 0.0, fmt.Errorf("queryCarbonSimpleIntensity: %w\nURL=%s\nresponse=%s\nfilter=%s\nresult=%s", err, queryUrl, string(responseData), filter, carbonIntensityString) } // return nil error since no error diff --git a/internal/controller/labelgroup_controller.go b/internal/controller/labelgroup_controller.go index 030d521..9b5c83c 100644 --- a/internal/controller/labelgroup_controller.go +++ b/internal/controller/labelgroup_controller.go @@ -36,21 +36,22 @@ import ( // LabelGroupReconciler reconciles a LabelGroup object type LabelGroupReconciler struct { client.Client - Scheme *runtime.Scheme - KeplerPrometheusUrl string - KeplerMetricName string - SusQLPrometheusDatabaseUrl string - SusQLPrometheusMetricsUrl string - SamplingRate time.Duration // Sampling rate for all LabelGroups - CarbonMethod string - CarbonIntensity float64 - CarbonIntensityUrl string - CarbonIntensityTimeStamp int64 - CarbonLocation string - CarbonQueryRate int64 - CarbonQueryFilter string - CarbonQueryConv2J float64 - Logger logr.Logger + Scheme *runtime.Scheme + KeplerPrometheusUrl string + KeplerMetricName string + SusQLPrometheusDatabaseUrl string + SusQLPrometheusMetricsUrl string + SamplingRate time.Duration // Sampling rate for all LabelGroups + CarbonMethod string + CarbonIntensity float64 + CarbonIntensityUrl string + CarbonIntensityTimeStamp int64 + CarbonIntensityErrorTimeStamp int64 + CarbonLocation string + CarbonQueryRate int64 + CarbonQueryFilter string + CarbonQueryConv2J float64 + Logger logr.Logger } const ( @@ -59,6 +60,7 @@ const ( fixingDelay = 15 * time.Second // Time to wait in the event the LabelGroup was badly constructed nopodDelay = 15 * time.Second // Time to wait in the event no pods are found errorDelay = 1 * time.Second // Time to wait when an error happens due to network connectivity issues + carbonRetryDelay = 300 // Number of seconds to wait for retry after carbon query failure ) var ( @@ -115,27 +117,31 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) // TODO: put this code only in Reloading and Aggregating cases if r.CarbonMethod == "simpledynamic" { currentEpoch := time.Now().Unix() - if (currentEpoch - r.CarbonIntensityTimeStamp) > r.CarbonQueryRate { + if (currentEpoch-r.CarbonIntensityTimeStamp) > r.CarbonQueryRate && (currentEpoch-r.CarbonIntensityErrorTimeStamp) > carbonRetryDelay { newCarbonIntensity, err := querySimpleCarbonIntensity(r.CarbonIntensityUrl, r.CarbonLocation, r.CarbonQueryFilter, r.CarbonQueryConv2J) if err == nil { r.CarbonIntensity = newCarbonIntensity r.CarbonIntensityTimeStamp = currentEpoch + r.CarbonIntensityErrorTimeStamp = 0 r.Logger.V(5).Info(fmt.Sprintf("[Reconcile-simpledynamic] Obtained dynamic carbon intensity of %.10f.", newCarbonIntensity)) } else { + r.CarbonIntensityErrorTimeStamp = currentEpoch r.Logger.V(0).Error(err, "[Reconcile-simpledynamic] Unable to query carbon intensity.") } } } if r.CarbonMethod == "casdk" { currentEpoch := time.Now().Unix() - if (currentEpoch - r.CarbonIntensityTimeStamp) > r.CarbonQueryRate { + if (currentEpoch-r.CarbonIntensityTimeStamp) > r.CarbonQueryRate && (currentEpoch-r.CarbonIntensityErrorTimeStamp) > carbonRetryDelay { newCarbonIntensity, err := queryCarbonIntensity(r.CarbonIntensityUrl, r.CarbonLocation, r.CarbonQueryFilter, r.CarbonQueryConv2J) if err == nil { r.CarbonIntensity = newCarbonIntensity r.CarbonIntensityTimeStamp = currentEpoch - r.Logger.V(5).Info(fmt.Sprintf("[Reconcile-simpledynamic] Obtained dynamic carbon intensity of %.10f.", newCarbonIntensity)) + r.CarbonIntensityErrorTimeStamp = 0 + r.Logger.V(5).Info(fmt.Sprintf("[Reconcile-casdk] Obtained dynamic carbon intensity of %.10f.", newCarbonIntensity)) } else { - r.Logger.V(0).Error(err, "[Reconcile-simpledynamic] Unable to query carbon intensity.") + r.CarbonIntensityErrorTimeStamp = currentEpoch + r.Logger.V(0).Error(err, "[Reconcile-casdk] Unable to query carbon intensity.") } } } diff --git a/samples/README.md b/samples/README.md new file mode 100644 index 0000000..cfe62b7 --- /dev/null +++ b/samples/README.md @@ -0,0 +1,38 @@ +# samples + +## scripts +- `start.sh` - deploy labels and start workloads +- `clean.sh` - cleanup labels and workloads +- `labelgroups.sh` - view LabelGroup information directly from LabelGroup CR +- `susqltop` - show top energy consuming groups +- `susqltopmon` - run susqltop periodically + +## configuration yaml file +- `susql-config.yaml` + +## labelgroup yaml files +- `labelgroups.yaml` + +## workload yaml files +- `energy-consumer-job.yaml` +- `gpu-consumer-job.yaml` +- `training-job-1.yaml` +- `training-job-2.yaml` +- `rhosaij.yaml` + + +## License + +Copyright 2023, 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/doc/susql-config.yaml b/samples/susql-config.yaml similarity index 100% rename from doc/susql-config.yaml rename to samples/susql-config.yaml