diff --git a/cmd/main.go b/cmd/main.go index e923eea..f41c66d 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -32,6 +32,7 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/healthz" "sigs.k8s.io/controller-runtime/pkg/log/zap" + "go.uber.org/zap/zapcore" susqlv1 "github.com/sustainable-computing-io/susql-operator/api/v1" "github.com/sustainable-computing-io/susql-operator/internal/controller" @@ -40,7 +41,7 @@ import ( var ( scheme = runtime.NewScheme() - setupLog = ctrl.Log.WithName("setup") + susqlLog = ctrl.Log.WithName("susql") ) func init() { @@ -71,6 +72,7 @@ func main() { opts := zap.Options{ Development: true, + Level: zapcore.Level(-2), } opts.BindFlags(flag.CommandLine) flag.Parse() @@ -97,7 +99,7 @@ func main() { // LeaderElectionReleaseOnCancel: true, }) if err != nil { - setupLog.Error(err, "unable to start manager") + susqlLog.Error(err, "unable to start manager") os.Exit(1) } @@ -114,24 +116,25 @@ func main() { SusQLPrometheusDatabaseUrl: susqlPrometheusDatabaseUrl, SusQLPrometheusMetricsUrl: susqlPrometheusMetricsUrl, SamplingRate: time.Duration(samplingRateInteger) * time.Second, + Logger: susqlLog, }).SetupWithManager(mgr); err != nil { - setupLog.Error(err, "unable to create controller", "controller", "LabelGroup") + susqlLog.Error(err, "unable to create controller", "controller", "LabelGroup") os.Exit(1) } //+kubebuilder:scaffold:builder if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { - setupLog.Error(err, "unable to set up health check") + susqlLog.Error(err, "unable to set up health check") os.Exit(1) } if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { - setupLog.Error(err, "unable to set up ready check") + susqlLog.Error(err, "unable to set up ready check") os.Exit(1) } - setupLog.Info("starting manager") + susqlLog.Info("starting manager") if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { - setupLog.Error(err, "problem running manager") + susqlLog.Error(err, "problem running manager") os.Exit(1) } } diff --git a/deployment/deploy.sh b/deployment/deploy.sh index a6f1778..4cc25b5 100644 --- a/deployment/deploy.sh +++ b/deployment/deploy.sh @@ -11,7 +11,7 @@ if ! command -v kubectl &> /dev/null; then fi if [[ ! -z ${SUSQL_ENHANCED} ]]; then - echo "->Deploying enhanced SusQL configuration." + echo "->Using enhanced SusQL configuration." fi # Check if there is a current context @@ -81,7 +81,7 @@ if [[ -z $(kubectl get namespaces --no-headers -o custom-columns=':{.metadata.na echo "Namespace '${SUSQL_NAMESPACE}' doesn't exist. Creating it." kubectl create namespace ${SUSQL_NAMESPACE} else - echo "Namespace '${SUSQL_NAMESPACE}' found. Deploying using it." + echo "Namespace '${SUSQL_NAMESPACE}' found. Using it." fi # Set SusQL installation variables @@ -117,8 +117,6 @@ echo "========================================================================== # Actions to perform, separated by comma actions=${1:-"kepler-check,prometheus-undeploy,prometheus-deploy,susql-undeploy,susql-deploy"} -exit - # output deploy information LOGFILE=.susql-deploy-info.txt LASTLOGFILE=.susql-deploy-info-last.txt diff --git a/internal/controller/labelgroup_controller.go b/internal/controller/labelgroup_controller.go index 9f7c861..363bec7 100644 --- a/internal/controller/labelgroup_controller.go +++ b/internal/controller/labelgroup_controller.go @@ -26,6 +26,7 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" + "github.com/go-logr/logr" susql "github.com/sustainable-computing-io/susql-operator/api/v1" ) @@ -39,6 +40,7 @@ type LabelGroupReconciler struct { SusQLPrometheusDatabaseUrl string SusQLPrometheusMetricsUrl string SamplingRate time.Duration // Sampling rate for all the label groups + Logger logr.Logger } const ( @@ -79,12 +81,12 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) // Check that the susql prometheus labels are created if len(labelGroup.Status.PrometheusLabels) == 0 && labelGroup.Status.Phase != susql.Initializing { - fmt.Printf("WARNING [Reconcile]: The SusQL prometheus labels for LabelGroup '%s' in namespace '%s' have not been created. Reinitializing this LabelGroup.\n", labelGroup.Name, labelGroup.Namespace) + r.Logger.V(1).Info(fmt.Sprintf("[Reconcile] The SusQL prometheus labels for LabelGroup '%s' in namespace '%s' have not been created. Reinitializing this LabelGroup.", labelGroup.Name, labelGroup.Namespace)) labelGroup.Status.Phase = susql.Initializing if err := r.Status().Update(ctx, labelGroup); err != nil { - fmt.Printf("ERROR [Reconcile]: Couldn't update the phase\n") + r.Logger.V(0).Error(err,"[Reconcile] Couldn't update the phase.") } return ctrl.Result{}, nil @@ -93,8 +95,9 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) // Decide what action to take based on the state of the labelGroup switch labelGroup.Status.Phase { case susql.Initializing: + r.Logger.V(2).Info("[Reconcile] Entered initializing case.") if len(labelGroup.Spec.Labels) > len(susqlPrometheusLabelNames) { - fmt.Printf("ERROR [Reconcile]: The number of provided labels is greater than the maximum number of supported labels (e.g., up to %d labels)\n", len(susqlPrometheusLabelNames)) + r.Logger.V(0).Error(fmt.Errorf("[Reconcile] The number of provided labels is greater than the maximum number of supported labels (e.g., up to %d labels).", len(susqlPrometheusLabelNames)),"") return ctrl.Result{RequeueAfter: fixingDelay}, nil } @@ -135,7 +138,7 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) labelGroup.Status.Phase = susql.Reloading if err := r.Status().Update(ctx, labelGroup); err != nil { - fmt.Printf("ERROR [Reconcile]: Couldn't update status of the LabelGroup\n") + r.Logger.V(0).Error(err, "[Reconcile] Couldn't update status of the LabelGroup.") return ctrl.Result{RequeueAfter: fixingDelay}, nil } @@ -143,12 +146,13 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, nil case susql.Reloading: + r.Logger.V(2).Info("[Reconcile] Entered reloading case.") // Reload data from existing database if !labelGroup.Spec.DisableUsingMostRecentValue { totalEnergy, err := r.GetMostRecentValue(labelGroup.Status.SusQLPrometheusQuery) if err != nil { - fmt.Printf("ERROR [Reconcile]: Couldn't retrieve most recent value\n") + r.Logger.V(0).Error(err,"[Reconcile] Couldn't retrieve most recent value.") return ctrl.Result{RequeueAfter: fixingDelay}, nil } @@ -158,7 +162,7 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) labelGroup.Status.Phase = susql.Aggregating if err := r.Status().Update(ctx, labelGroup); err != nil { - fmt.Printf("ERROR [Reconcile]: Couldn't update status of the LabelGroup\n") + r.Logger.V(0).Error(err, "[Reconcile] Couldn't update status of the LabelGroup.") return ctrl.Result{RequeueAfter: fixingDelay}, nil } @@ -166,11 +170,12 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, nil case susql.Aggregating: +// r.Logger.V(2).Info("[Reconcile] Entered aggregating case.") // trace // Get list of pods matching the label group podNames, namespaceNames, err := r.GetPodNamesMatchingLabels(ctx, labelGroup) if err != nil { - fmt.Printf("ERROR [Reconcile]: Couldn't get pods for the labels provided\n") + r.Logger.V(0).Error(err,"[Reconcile] Couldn't get pods for the labels provided.") return ctrl.Result{}, err } @@ -178,7 +183,7 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) metricValues, err := r.GetMetricValuesForPodNames(r.KeplerMetricName, podNames, namespaceNames) if err != nil { - fmt.Printf("ERROR [Reconcile]: Querying Prometheus didn't work: %v\n", err) + r.Logger.V(0).Error(err,"[Reconcile] Querying Prometheus didn't work.") return ctrl.Result{RequeueAfter: errorDelay}, nil } @@ -230,11 +235,12 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{RequeueAfter: r.SamplingRate}, nil default: + r.Logger.V(2).Info("[Reconcile] Entered default case.") // First time seeing this object labelGroup.Status.Phase = susql.Initializing if err := r.Status().Update(ctx, labelGroup); err != nil { - fmt.Printf("ERROR [Reconcile]: Couldn't set object to 'Initializing'\n") + r.Logger.V(0).Error(err,"[Reconcile] Couldn't set object to 'Initializing'.") } return ctrl.Result{}, nil diff --git a/internal/controller/prometheus_manager.go b/internal/controller/prometheus_manager.go index 721c4bd..fbefcfc 100644 --- a/internal/controller/prometheus_manager.go +++ b/internal/controller/prometheus_manager.go @@ -53,9 +53,9 @@ func (r *LabelGroupReconciler) GetMostRecentValue(susqlPrometheusQuery string) ( }) if err != nil { - fmt.Printf("ERROR [GetMostRecentValue]: Couldn't create HTTP client: %v\n", err) - fmt.Printf("Query: %s\n", susqlPrometheusQuery) - fmt.Printf("SusQLPrometheusDatabaseUrl: %s\n", r.SusQLPrometheusDatabaseUrl) + r.Logger.V(0).Error(err, fmt.Sprintf("[GetMostRecentValue] Couldn't create HTTP client.\n") + + fmt.Sprintf("\tQuery: %s\n", susqlPrometheusQuery) + + fmt.Sprintf("\tSusQLPrometheusDatabaseUrl: %s\n", r.SusQLPrometheusDatabaseUrl) ) os.Exit(1) } @@ -66,16 +66,19 @@ func (r *LabelGroupReconciler) GetMostRecentValue(susqlPrometheusQuery string) ( queryString := fmt.Sprintf("max_over_time(%s[%s])", susqlPrometheusQuery, maxQueryTime) results, warnings, err := v1api.Query(ctx, queryString, time.Now(), v1.WithTimeout(0*time.Second)) + r.Logger.V(2).Info(fmt.Sprintf("[GetMostRecentValue] Query: %s", queryString)) // trace + r.Logger.V(2).Info(fmt.Sprintf("[GetMostRecentValue] Results: '%v'", results)) // trace + if len(warnings) > 0 { - fmt.Printf("WARNING [GetMostRecentValue]: %v\n", warnings) - fmt.Printf("Query: %s\n", susqlPrometheusQuery) - fmt.Printf("SusQLPrometheusDatabaseUrl: %s\n", r.SusQLPrometheusDatabaseUrl) + r.Logger.V(0).Info(fmt.Sprintf("WARNING [GetMostRecentValue] %v\n", warnings) + + fmt.Sprintf("\tQuery: %s\n", susqlPrometheusQuery) + + fmt.Sprintf("\tSusQLPrometheusDatabaseUrl: %s", r.SusQLPrometheusDatabaseUrl) ) } if err != nil { - fmt.Printf("ERROR [GetMostRecentValue]: Querying Prometheus didn't work: %v\n", err) - fmt.Printf("Query: %s\n", susqlPrometheusQuery) - fmt.Printf("SusQLPrometheusDatabaseUrl: %s\n", r.SusQLPrometheusDatabaseUrl) + r.Logger.V(0).Error(err, "[GetMostRecentValue] Querying Prometheus didn't work.\n" + + fmt.Sprintf("\tQuery: %s\n", susqlPrometheusQuery) + + fmt.Sprintf("\tSusQLPrometheusDatabaseUrl: %s", r.SusQLPrometheusDatabaseUrl) ) return 0.0, err } @@ -88,7 +91,7 @@ func (r *LabelGroupReconciler) GetMostRecentValue(susqlPrometheusQuery string) ( func (r *LabelGroupReconciler) GetMetricValuesForPodNames(metricName string, podNames []string, namespaceNames[]string) (map[string]float64, error) { if len(podNames) == 0 { - fmt.Printf("ERROR [GetMetricValuesForPodNames]: No pods under observation. Currently len(podNames)=0.\n") + r.Logger.V(1).Info("[GetMetricValuesForPodNames] No pods under observation. Currently len(podNames)=0.") return nil, nil } @@ -103,9 +106,9 @@ func (r *LabelGroupReconciler) GetMetricValuesForPodNames(metricName string, pod }) if err != nil { - fmt.Printf("ERROR [GetMetricValuesForPodNames]: Couldn't create an HTTP client: %v\n", err) - fmt.Printf("metricName: %s\n", metricName) - fmt.Printf("KeplerPrometheusUrl: %s\n", r.KeplerPrometheusUrl) + r.Logger.V(0).Error(err, "[GetMetricValuesForPodNames] Couldn't create an HTTP client.\n" + + fmt.Sprintf("\tmetricName: %s\n", metricName) + + fmt.Sprintf("\tKeplerPrometheusUrl: %s\n", r.KeplerPrometheusUrl) ) os.Exit(1) } @@ -125,18 +128,18 @@ func (r *LabelGroupReconciler) GetMetricValuesForPodNames(metricName string, pod results, warnings, err := v1api.Query(ctx, queryString, time.Now(), v1.WithTimeout(0*time.Second)) if err != nil || results == nil { - fmt.Printf("ERROR [GetMetricValuesForPodNames]: Querying Prometheus didn't work: %v\n", err) - fmt.Printf("metricName: %s\n", metricName) - fmt.Printf("KeplerPrometheusUrl: %s\n", r.KeplerPrometheusUrl) - fmt.Printf("queryString: %s\n", queryString) + r.Logger.V(0).Error(err, "[GetMetricValuesForPodNames] Querying Prometheus didn't work.\n" + + fmt.Sprintf("\tmetricName: %s\n", metricName) + + fmt.Sprintf("\tKeplerPrometheusUrl: %s\n", r.KeplerPrometheusUrl) + + fmt.Sprintf("\tqueryString: %s", queryString) ) return nil, err } if len(warnings) > 0 { - fmt.Printf("WARNING [GetMetricValuesForPodNames]: %v\n", warnings) - fmt.Printf("metricName: %s\n", metricName) - fmt.Printf("KeplerPrometheusUrl: %s\n", r.KeplerPrometheusUrl) - fmt.Printf("queryString: %s\n", queryString) + r.Logger.V(0).Info(fmt.Sprintf("WARNING [GetMetricValuesForPodNames] %v\n", warnings) + + fmt.Sprintf("\tmetricName: %s\n", metricName) + + fmt.Sprintf("\tKeplerPrometheusUrl: %s\n", r.KeplerPrometheusUrl) + + fmt.Sprintf("\tqueryString: %s", queryString) ) } metricValues := make(map[string]float64, len(results.(model.Vector))) @@ -175,17 +178,19 @@ func (r *LabelGroupReconciler) InitializeMetricsExporter() { http.Handle("/metrics", prometheusHandler) if metricsUrl, parseErr := url.Parse(r.SusQLPrometheusMetricsUrl); parseErr == nil { - fmt.Printf("Serving metrics at '%s:%s'...\n", metricsUrl.Hostname(), metricsUrl.Port()) + r.Logger.V(2).Info(fmt.Sprintf("[InitializeMetricsExporter] Serving metrics at '%s:%s'...", metricsUrl.Hostname(), metricsUrl.Port())) go func() { err := http.ListenAndServe(metricsUrl.Hostname() + ":" + metricsUrl.Port(), nil) if err != nil { - panic("PANIC [SetAggregatedEnergyForLabels]: ListenAndServe: " + err.Error()) + r.Logger.V(0).Error(err,"PANIC [SetAggregatedEnergyForLabels] ListenAndServe" ) + panic("PANIC [SetAggregatedEnergyForLabels] ListenAndServe: " + err.Error()) } }() } else { - panic(fmt.Sprintf("PANIC [SetAggregatedEnergyForLabels]: Parsing the URL '%s' to set the metrics address didn't work (%v)", r.SusQLPrometheusMetricsUrl, parseErr)) + r.Logger.V(0).Error(parseErr,fmt.Sprintf("PANIC [SetAggregatedEnergyForLabels] Parsing the URL '%s' to set the metrics address didn't work.", r.SusQLPrometheusMetricsUrl)) + panic(fmt.Sprintf("PANIC [SetAggregatedEnergyForLabels] Parsing the URL '%s' to set the metrics address didn't work (%v)", r.SusQLPrometheusMetricsUrl, parseErr)) } } } @@ -194,5 +199,7 @@ func (r *LabelGroupReconciler) SetAggregatedEnergyForLabels(totalEnergy float64, // Save aggregated energy to Prometheus table susqlMetrics.totalEnergy.With(prometheusLabels).Set(totalEnergy) + r.Logger.V(2).Info(fmt.Sprintf("[SetAggregatedEnergyForLabels] Setting energy %f for %v.", totalEnergy, prometheusLabels)) // trace + return nil }