From f38ebee8f36e1135d6de3beed3af5a45ba0b722a Mon Sep 17 00:00:00 2001 From: Scott Trent Date: Fri, 21 Jun 2024 13:05:10 +0900 Subject: [PATCH] Improve configuration and debugging Signed-off-by: Scott Trent --- Dockerfile | 3 +++ .../susql-operator.clusterserviceversion.yaml | 14 ++++++++++---- cmd/debug-entrypoint.sh | 16 ++++++++++++++++ cmd/main.go | 17 +++++++++++------ config/manager/manager.yaml | 13 ++++++++++--- .../susql-controller/templates/deployment.yaml | 5 ++++- deployment/susql-controller/values.yaml | 3 +++ 7 files changed, 57 insertions(+), 14 deletions(-) create mode 100755 cmd/debug-entrypoint.sh diff --git a/Dockerfile b/Dockerfile index 6981bf2..ce43f79 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,8 +26,11 @@ RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o ma # Use distroless as minimal base image to package the manager binary # Refer to https://github.com/GoogleContainerTools/distroless for more details FROM gcr.io/distroless/static:nonroot +# FROM gcr.io/distroless/static:debug WORKDIR / COPY --from=builder /workspace/manager . +COPY cmd/debug-entrypoint.sh . USER 65532:65532 ENTRYPOINT ["/manager"] +# ENTRYPOINT ["/debug-entrypoint.sh"] diff --git a/bundle/manifests/susql-operator.clusterserviceversion.yaml b/bundle/manifests/susql-operator.clusterserviceversion.yaml index befe36e..5f40f66 100644 --- a/bundle/manifests/susql-operator.clusterserviceversion.yaml +++ b/bundle/manifests/susql-operator.clusterserviceversion.yaml @@ -23,7 +23,7 @@ metadata: capabilities: Basic Install categories: Monitoring containerImage: quay.io/sustainable_computing_io/susql_operator:0.0.19 - createdAt: "2024-06-14T17:42:05Z" + createdAt: "2024-06-21T03:56:29Z" description: 'Aggregates energy data from pods tagged with SusQL labels ' operators.operatorframework.io/builder: operator-sdk-v1.34.1 operators.operatorframework.io/project_layout: go.kubebuilder.io/v4 @@ -197,14 +197,14 @@ spec: drop: - ALL - args: - - --leader-elect + - --leader-elect=$(LEADER-ELECT) - --kepler-prometheus-url=$(KEPLER-PROMETHEUS-URL) - --kepler-metric-name=$(KEPLER-METRIC-NAME) - --susql-prometheus-database-url=$(SUSQL-PROMETHEUS-DATABASE-URL) - --susql-prometheus-metrics-url=$(SUSQL-PROMETHEUS-METRICS-URL) - --sampling-rate=$(SAMPLING-RATE) - - --health-probe-bind-address=:8081 - - --metrics-bind-address=127.0.0.1:9999 + - --health-probe-bind-address=$(HEALTH-PROBE-BIND-ADDRESS) + - --metrics-bind-address=$(METRICS-BIND-ADDRESS) command: - /manager env: @@ -218,6 +218,12 @@ spec: value: http://0.0.0.0:8082 - name: SAMPLING-RATE value: "2" + - name: LEADER-ELECT + value: "true" + - name: HEALTH-PROBE-BIND-ADDRESS + value: :8081 + - name: METRICS-BIND-ADDRESS + value: 127.0.0.1:9999 image: quay.io/sustainable_computing_io/susql_operator:0.0.19 imagePullPolicy: IfNotPresent livenessProbe: diff --git a/cmd/debug-entrypoint.sh b/cmd/debug-entrypoint.sh new file mode 100755 index 0000000..6972314 --- /dev/null +++ b/cmd/debug-entrypoint.sh @@ -0,0 +1,16 @@ +#!/busybox/sh + +echo +date +echo entered entrypoint.sh + +/manager +ec=$? + +echo +echo MANAGER TERMINATED! ERROR=${ec} AT $(date) +echo + +echo sleeping +sleep 9999 +echo terminating diff --git a/cmd/main.go b/cmd/main.go index ec07534..15b5cce 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -61,24 +61,29 @@ func main() { var susqlPrometheusDatabaseUrl string var samplingRate string - flag.StringVar(&metricsAddr, "metrics-bind-address", ":8082", "The address the metric endpoint binds to.") - flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") - flag.BoolVar(&enableLeaderElection, "leader-elect", false, - "Enable leader election for controller manager. "+ - "Enabling this will ensure there is only one active controller manager.") - // NOTE: these can be set as env or flag, flag takes precedence over env keplerPrometheusUrlEnv := os.Getenv("KEPLER-PROMETHEUS-URL") keplerMetricNameEnv := os.Getenv("KEPLER-METRIC-NAME") susqlPrometheusDatabaseUrlEnv := os.Getenv("SUSQL-PROMETHEUS-DATABASE-URL") susqlPrometheusMetricsUrlEnv := os.Getenv("SUSQL-PROMETHEUS-METRICS-URL") samplingRateEnv := os.Getenv("SAMPLING-RATE") + metricsAddrEnv := os.Getenv("METRICS-BIND-ADDRESS") + probeAddrEnv := os.Getenv("HEALTH-PROBE-BIND-ADDRESS") + enableLeaderElectionEnv, err := strconv.ParseBool(os.Getenv("LEADER-ELECT")) + if err != nil { + enableLeaderElectionEnv = false + } flag.StringVar(&keplerPrometheusUrl, "kepler-prometheus-url", keplerPrometheusUrlEnv, "The URL for the Prometheus server where Kepler stores the energy data") flag.StringVar(&keplerMetricName, "kepler-metric-name", keplerMetricNameEnv, "The metric name to be queried in the kepler Prometheus server") flag.StringVar(&susqlPrometheusDatabaseUrl, "susql-prometheus-database-url", susqlPrometheusDatabaseUrlEnv, "The URL for the Prometheus database where SusQL stores the energy data") flag.StringVar(&susqlPrometheusMetricsUrl, "susql-prometheus-metrics-url", susqlPrometheusMetricsUrlEnv, "The URL for the Prometheus metrics where SusQL exposes the energy data") flag.StringVar(&samplingRate, "sampling-rate", samplingRateEnv, "Sampling rate in seconds") + flag.StringVar(&metricsAddr, "metrics-bind-address", metricsAddrEnv, "The address the metric endpoint binds to.") + flag.StringVar(&probeAddr, "health-probe-bind-address", probeAddrEnv, "The address the probe endpoint binds to.") + flag.BoolVar(&enableLeaderElection, "leader-elect", enableLeaderElectionEnv, + "Enable leader election for controller manager. "+ + "Enabling this will ensure there is only one active controller manager.") opts := zap.Options{ Development: true, diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index b060fad..9b8ace9 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -67,6 +67,7 @@ spec: containers: - command: - /manager + # - /debug-entrypoint.sh env: - name: KEPLER-PROMETHEUS-URL value: "https://thanos-querier.openshift-monitoring.svc.cluster.local:9091" @@ -79,16 +80,22 @@ spec: value: "http://0.0.0.0:8082" - name: SAMPLING-RATE value: "2" + - name: LEADER-ELECT + value: "true" + - name: HEALTH-PROBE-BIND-ADDRESS + value: ":8081" + - name: METRICS-BIND-ADDRESS + value: "127.0.0.1:9999" args: - - --leader-elect + - --leader-elect=$(LEADER-ELECT) # - --deployment-namespace=susql-operator - --kepler-prometheus-url=$(KEPLER-PROMETHEUS-URL) - --kepler-metric-name=$(KEPLER-METRIC-NAME) - --susql-prometheus-database-url=$(SUSQL-PROMETHEUS-DATABASE-URL) - --susql-prometheus-metrics-url=$(SUSQL-PROMETHEUS-METRICS-URL) - --sampling-rate=$(SAMPLING-RATE) - - --health-probe-bind-address=:8081 - - --metrics-bind-address=127.0.0.1:9999 + - --health-probe-bind-address=$(HEALTH-PROBE-BIND-ADDRESS) + - --metrics-bind-address=$(METRICS-BIND-ADDRESS) image: '' imagePullPolicy: IfNotPresent name: manager diff --git a/deployment/susql-controller/templates/deployment.yaml b/deployment/susql-controller/templates/deployment.yaml index f026d2c..67f049d 100644 --- a/deployment/susql-controller/templates/deployment.yaml +++ b/deployment/susql-controller/templates/deployment.yaml @@ -25,10 +25,13 @@ spec: imagePullPolicy: {{ .Values.imagePullPolicy | default "IfNotPresent" }} args: - "--kepler-prometheus-url={{ .Values.keplerPrometheusUrl }}" - - "--kepler-metric-name={{ .Values.KeplerMetricName }}" + - "--kepler-metric-name={{ .Values.keplerMetricName }}" - "--susql-prometheus-database-url={{ .Values.susqlPrometheusDatabaseUrl }}" - "--susql-prometheus-metrics-url={{ .Values.susqlPrometheusMetricsUrl }}" - "--sampling-rate={{ .Values.samplingRate }}" + - "--metrics-bind-address={{ .Values.metricsAddr }}" + - "--health-prove-bind-address={{ .Values.healthProbeAddr }}" + - "--leader-elect={{ .Values.leaderElect }}" ports: - name: metrics containerPort: 8082 diff --git a/deployment/susql-controller/values.yaml b/deployment/susql-controller/values.yaml index f1d7638..2edb672 100644 --- a/deployment/susql-controller/values.yaml +++ b/deployment/susql-controller/values.yaml @@ -24,3 +24,6 @@ keplerMetricName: "kepler_container_joules_total" susqlPrometheusDatabaseUrl: "http://prometheus-susql.openshift-kepler-operator.svc.cluster.local:9090" susqlPrometheusMetricsUrl: "http://0.0.0.0:8082" samplingRate: "2" +metricsAddr: "127.0.0.1:9999" +healthProbeAddr: ":8081" +leaderElect: "true"