From 6809ee60bf7c449434982b3ce02590d0141c5cca Mon Sep 17 00:00:00 2001 From: Scott Trent Date: Tue, 5 Mar 2024 15:30:20 +0900 Subject: [PATCH 1/2] exploratory output Signed-off-by: Scott Trent --- internal/controller/resource_manager.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/internal/controller/resource_manager.go b/internal/controller/resource_manager.go index 924cce5..47c283e 100644 --- a/internal/controller/resource_manager.go +++ b/internal/controller/resource_manager.go @@ -19,6 +19,8 @@ package controller import ( "context" + "fmt" + "sigs.k8s.io/controller-runtime/pkg/client" susql "github.com/sustainable-computing-io/susql-operator/api/v1" @@ -37,6 +39,7 @@ func (r *LabelGroupReconciler) GetPodNamesMatchingLabels(ctx context.Context, la for _, pod := range pods.Items { podNames = append(podNames, pod.Name) + fmt.Printf("%+v\n", pod) } return podNames, nil From 5e87d394d598d1ba2489118b0a5a8a08b4ac6fe3 Mon Sep 17 00:00:00 2001 From: Scott Trent Date: Tue, 12 Mar 2024 16:43:40 +0900 Subject: [PATCH 2/2] include namespace with pod serch to resolve issue 2 Signed-off-by: Scott Trent --- internal/controller/labelgroup_controller.go | 4 ++-- internal/controller/prometheus_manager.go | 21 +++++++++++++++++--- internal/controller/resource_manager.go | 11 +++++----- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/internal/controller/labelgroup_controller.go b/internal/controller/labelgroup_controller.go index ac4c074..998811d 100644 --- a/internal/controller/labelgroup_controller.go +++ b/internal/controller/labelgroup_controller.go @@ -167,7 +167,7 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) case susql.Aggregating: // Get list of pods matching the label group - podNames, err := r.GetPodNamesMatchingLabels(ctx, labelGroup) + podNames, namespaceNames, err := r.GetPodNamesMatchingLabels(ctx, labelGroup) if err != nil { fmt.Printf("ERROR [Reconcile]: Couldn't get pods for the labels provided\n") @@ -175,7 +175,7 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) } // Aggregate Kepler measurements for these set of pods - metricValues, err := r.GetMetricValuesForPodNames(keplerMetricName, podNames) + metricValues, err := r.GetMetricValuesForPodNames(keplerMetricName, podNames, namespaceNames) if err != nil { fmt.Printf("ERROR [Reconcile]: Querying Prometheus didn't work: %v\n", err) diff --git a/internal/controller/prometheus_manager.go b/internal/controller/prometheus_manager.go index bb3e1c2..721c4bd 100644 --- a/internal/controller/prometheus_manager.go +++ b/internal/controller/prometheus_manager.go @@ -86,7 +86,12 @@ func (r *LabelGroupReconciler) GetMostRecentValue(susqlPrometheusQuery string) ( } } -func (r *LabelGroupReconciler) GetMetricValuesForPodNames(metricName string, podNames []string) (map[string]float64, error) { +func (r *LabelGroupReconciler) GetMetricValuesForPodNames(metricName string, podNames []string, namespaceNames[]string) (map[string]float64, error) { + if len(podNames) == 0 { + fmt.Printf("ERROR [GetMetricValuesForPodNames]: No pods under observation. Currently len(podNames)=0.\n") + return nil, nil + } + var roundtripper http.RoundTripper = nil if strings.HasPrefix(r.KeplerPrometheusUrl, "https://") { rttls := &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}} @@ -108,13 +113,22 @@ func (r *LabelGroupReconciler) GetMetricValuesForPodNames(metricName string, pod ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() - queryString := fmt.Sprintf("%s{pod_name=~\"%s\",mode=\"dynamic\"}", metricName, strings.Join(podNames, "|")) +/* original query */ +/* oldQueryString := fmt.Sprintf("%s{pod_name=~\"%s\",mode=\"dynamic\"}", metricName, strings.Join(podNames, "|")) */ + +/* new query for issue 2: can improve runtime efficiency... */ + queryString := fmt.Sprintf("sum(%s{pod_name=\"%s\",container_namespace=\"%s\",mode=\"dynamic\"})", metricName, podNames[0], namespaceNames[0]) + for i := 1; i