Skip to content

Commit

Permalink
Merge pull request #65 from sustainable-computing-io/logging
Browse files Browse the repository at this point in the history
refactor internal logging
  • Loading branch information
trent-s authored Mar 19, 2024
2 parents 9d5c44c + 76f6886 commit 0510f5e
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 44 deletions.
17 changes: 10 additions & 7 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
"go.uber.org/zap/zapcore"

susqlv1 "github.com/sustainable-computing-io/susql-operator/api/v1"
"github.com/sustainable-computing-io/susql-operator/internal/controller"
Expand All @@ -40,7 +41,7 @@ import (

var (
scheme = runtime.NewScheme()
setupLog = ctrl.Log.WithName("setup")
susqlLog = ctrl.Log.WithName("susql")
)

func init() {
Expand Down Expand Up @@ -71,6 +72,7 @@ func main() {

opts := zap.Options{
Development: true,
Level: zapcore.Level(-2),
}
opts.BindFlags(flag.CommandLine)
flag.Parse()
Expand All @@ -97,7 +99,7 @@ func main() {
// LeaderElectionReleaseOnCancel: true,
})
if err != nil {
setupLog.Error(err, "unable to start manager")
susqlLog.Error(err, "unable to start manager")
os.Exit(1)
}

Expand All @@ -114,24 +116,25 @@ func main() {
SusQLPrometheusDatabaseUrl: susqlPrometheusDatabaseUrl,
SusQLPrometheusMetricsUrl: susqlPrometheusMetricsUrl,
SamplingRate: time.Duration(samplingRateInteger) * time.Second,
Logger: susqlLog,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "LabelGroup")
susqlLog.Error(err, "unable to create controller", "controller", "LabelGroup")
os.Exit(1)
}
//+kubebuilder:scaffold:builder

if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
setupLog.Error(err, "unable to set up health check")
susqlLog.Error(err, "unable to set up health check")
os.Exit(1)
}
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
setupLog.Error(err, "unable to set up ready check")
susqlLog.Error(err, "unable to set up ready check")
os.Exit(1)
}

setupLog.Info("starting manager")
susqlLog.Info("starting manager")
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
setupLog.Error(err, "problem running manager")
susqlLog.Error(err, "problem running manager")
os.Exit(1)
}
}
6 changes: 2 additions & 4 deletions deployment/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ if ! command -v kubectl &> /dev/null; then
fi

if [[ ! -z ${SUSQL_ENHANCED} ]]; then
echo "->Deploying enhanced SusQL configuration."
echo "->Using enhanced SusQL configuration."
fi

# Check if there is a current context
Expand Down Expand Up @@ -81,7 +81,7 @@ if [[ -z $(kubectl get namespaces --no-headers -o custom-columns=':{.metadata.na
echo "Namespace '${SUSQL_NAMESPACE}' doesn't exist. Creating it."
kubectl create namespace ${SUSQL_NAMESPACE}
else
echo "Namespace '${SUSQL_NAMESPACE}' found. Deploying using it."
echo "Namespace '${SUSQL_NAMESPACE}' found. Using it."
fi

# Set SusQL installation variables
Expand Down Expand Up @@ -117,8 +117,6 @@ echo "==========================================================================
# Actions to perform, separated by comma
actions=${1:-"kepler-check,prometheus-undeploy,prometheus-deploy,susql-undeploy,susql-deploy"}

exit

# output deploy information
LOGFILE=.susql-deploy-info.txt
LASTLOGFILE=.susql-deploy-info-last.txt
Expand Down
24 changes: 15 additions & 9 deletions internal/controller/labelgroup_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
"github.com/go-logr/logr"

susql "github.com/sustainable-computing-io/susql-operator/api/v1"
)
Expand All @@ -39,6 +40,7 @@ type LabelGroupReconciler struct {
SusQLPrometheusDatabaseUrl string
SusQLPrometheusMetricsUrl string
SamplingRate time.Duration // Sampling rate for all the label groups
Logger logr.Logger
}

const (
Expand Down Expand Up @@ -79,12 +81,12 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request)

// Check that the susql prometheus labels are created
if len(labelGroup.Status.PrometheusLabels) == 0 && labelGroup.Status.Phase != susql.Initializing {
fmt.Printf("WARNING [Reconcile]: The SusQL prometheus labels for LabelGroup '%s' in namespace '%s' have not been created. Reinitializing this LabelGroup.\n", labelGroup.Name, labelGroup.Namespace)
r.Logger.V(1).Info(fmt.Sprintf("[Reconcile] The SusQL prometheus labels for LabelGroup '%s' in namespace '%s' have not been created. Reinitializing this LabelGroup.", labelGroup.Name, labelGroup.Namespace))

labelGroup.Status.Phase = susql.Initializing

if err := r.Status().Update(ctx, labelGroup); err != nil {
fmt.Printf("ERROR [Reconcile]: Couldn't update the phase\n")
r.Logger.V(0).Error(err,"[Reconcile] Couldn't update the phase.")
}

return ctrl.Result{}, nil
Expand All @@ -93,8 +95,9 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request)
// Decide what action to take based on the state of the labelGroup
switch labelGroup.Status.Phase {
case susql.Initializing:
r.Logger.V(2).Info("[Reconcile] Entered initializing case.")
if len(labelGroup.Spec.Labels) > len(susqlPrometheusLabelNames) {
fmt.Printf("ERROR [Reconcile]: The number of provided labels is greater than the maximum number of supported labels (e.g., up to %d labels)\n", len(susqlPrometheusLabelNames))
r.Logger.V(0).Error(fmt.Errorf("[Reconcile] The number of provided labels is greater than the maximum number of supported labels (e.g., up to %d labels).", len(susqlPrometheusLabelNames)),"")
return ctrl.Result{RequeueAfter: fixingDelay}, nil
}

Expand Down Expand Up @@ -135,20 +138,21 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request)
labelGroup.Status.Phase = susql.Reloading

if err := r.Status().Update(ctx, labelGroup); err != nil {
fmt.Printf("ERROR [Reconcile]: Couldn't update status of the LabelGroup\n")
r.Logger.V(0).Error(err, "[Reconcile] Couldn't update status of the LabelGroup.")
return ctrl.Result{RequeueAfter: fixingDelay}, nil
}

// Requeue
return ctrl.Result{}, nil

case susql.Reloading:
r.Logger.V(2).Info("[Reconcile] Entered reloading case.")
// Reload data from existing database
if !labelGroup.Spec.DisableUsingMostRecentValue {
totalEnergy, err := r.GetMostRecentValue(labelGroup.Status.SusQLPrometheusQuery)

if err != nil {
fmt.Printf("ERROR [Reconcile]: Couldn't retrieve most recent value\n")
r.Logger.V(0).Error(err,"[Reconcile] Couldn't retrieve most recent value.")
return ctrl.Result{RequeueAfter: fixingDelay}, nil
}

Expand All @@ -158,27 +162,28 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request)
labelGroup.Status.Phase = susql.Aggregating

if err := r.Status().Update(ctx, labelGroup); err != nil {
fmt.Printf("ERROR [Reconcile]: Couldn't update status of the LabelGroup\n")
r.Logger.V(0).Error(err, "[Reconcile] Couldn't update status of the LabelGroup.")
return ctrl.Result{RequeueAfter: fixingDelay}, nil
}

// Requeue
return ctrl.Result{}, nil

case susql.Aggregating:
// r.Logger.V(2).Info("[Reconcile] Entered aggregating case.") // trace
// Get list of pods matching the label group
podNames, namespaceNames, err := r.GetPodNamesMatchingLabels(ctx, labelGroup)

if err != nil {
fmt.Printf("ERROR [Reconcile]: Couldn't get pods for the labels provided\n")
r.Logger.V(0).Error(err,"[Reconcile] Couldn't get pods for the labels provided.")
return ctrl.Result{}, err
}

// Aggregate Kepler measurements for these set of pods
metricValues, err := r.GetMetricValuesForPodNames(r.KeplerMetricName, podNames, namespaceNames)

if err != nil {
fmt.Printf("ERROR [Reconcile]: Querying Prometheus didn't work: %v\n", err)
r.Logger.V(0).Error(err,"[Reconcile] Querying Prometheus didn't work.")
return ctrl.Result{RequeueAfter: errorDelay}, nil
}

Expand Down Expand Up @@ -230,11 +235,12 @@ func (r *LabelGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request)
return ctrl.Result{RequeueAfter: r.SamplingRate}, nil

default:
r.Logger.V(2).Info("[Reconcile] Entered default case.")
// First time seeing this object
labelGroup.Status.Phase = susql.Initializing

if err := r.Status().Update(ctx, labelGroup); err != nil {
fmt.Printf("ERROR [Reconcile]: Couldn't set object to 'Initializing'\n")
r.Logger.V(0).Error(err,"[Reconcile] Couldn't set object to 'Initializing'.")
}

return ctrl.Result{}, nil
Expand Down
55 changes: 31 additions & 24 deletions internal/controller/prometheus_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ func (r *LabelGroupReconciler) GetMostRecentValue(susqlPrometheusQuery string) (
})

if err != nil {
fmt.Printf("ERROR [GetMostRecentValue]: Couldn't create HTTP client: %v\n", err)
fmt.Printf("Query: %s\n", susqlPrometheusQuery)
fmt.Printf("SusQLPrometheusDatabaseUrl: %s\n", r.SusQLPrometheusDatabaseUrl)
r.Logger.V(0).Error(err, fmt.Sprintf("[GetMostRecentValue] Couldn't create HTTP client.\n") +
fmt.Sprintf("\tQuery: %s\n", susqlPrometheusQuery) +
fmt.Sprintf("\tSusQLPrometheusDatabaseUrl: %s\n", r.SusQLPrometheusDatabaseUrl) )
os.Exit(1)
}

Expand All @@ -66,16 +66,19 @@ func (r *LabelGroupReconciler) GetMostRecentValue(susqlPrometheusQuery string) (
queryString := fmt.Sprintf("max_over_time(%s[%s])", susqlPrometheusQuery, maxQueryTime)
results, warnings, err := v1api.Query(ctx, queryString, time.Now(), v1.WithTimeout(0*time.Second))

r.Logger.V(2).Info(fmt.Sprintf("[GetMostRecentValue] Query: %s", queryString)) // trace
r.Logger.V(2).Info(fmt.Sprintf("[GetMostRecentValue] Results: '%v'", results)) // trace

if len(warnings) > 0 {
fmt.Printf("WARNING [GetMostRecentValue]: %v\n", warnings)
fmt.Printf("Query: %s\n", susqlPrometheusQuery)
fmt.Printf("SusQLPrometheusDatabaseUrl: %s\n", r.SusQLPrometheusDatabaseUrl)
r.Logger.V(0).Info(fmt.Sprintf("WARNING [GetMostRecentValue] %v\n", warnings) +
fmt.Sprintf("\tQuery: %s\n", susqlPrometheusQuery) +
fmt.Sprintf("\tSusQLPrometheusDatabaseUrl: %s", r.SusQLPrometheusDatabaseUrl) )
}

if err != nil {
fmt.Printf("ERROR [GetMostRecentValue]: Querying Prometheus didn't work: %v\n", err)
fmt.Printf("Query: %s\n", susqlPrometheusQuery)
fmt.Printf("SusQLPrometheusDatabaseUrl: %s\n", r.SusQLPrometheusDatabaseUrl)
r.Logger.V(0).Error(err, "[GetMostRecentValue] Querying Prometheus didn't work.\n" +
fmt.Sprintf("\tQuery: %s\n", susqlPrometheusQuery) +
fmt.Sprintf("\tSusQLPrometheusDatabaseUrl: %s", r.SusQLPrometheusDatabaseUrl) )
return 0.0, err
}

Expand All @@ -88,7 +91,7 @@ func (r *LabelGroupReconciler) GetMostRecentValue(susqlPrometheusQuery string) (

func (r *LabelGroupReconciler) GetMetricValuesForPodNames(metricName string, podNames []string, namespaceNames[]string) (map[string]float64, error) {
if len(podNames) == 0 {
fmt.Printf("ERROR [GetMetricValuesForPodNames]: No pods under observation. Currently len(podNames)=0.\n")
r.Logger.V(1).Info("[GetMetricValuesForPodNames] No pods under observation. Currently len(podNames)=0.")
return nil, nil
}

Expand All @@ -103,9 +106,9 @@ func (r *LabelGroupReconciler) GetMetricValuesForPodNames(metricName string, pod
})

if err != nil {
fmt.Printf("ERROR [GetMetricValuesForPodNames]: Couldn't create an HTTP client: %v\n", err)
fmt.Printf("metricName: %s\n", metricName)
fmt.Printf("KeplerPrometheusUrl: %s\n", r.KeplerPrometheusUrl)
r.Logger.V(0).Error(err, "[GetMetricValuesForPodNames] Couldn't create an HTTP client.\n" +
fmt.Sprintf("\tmetricName: %s\n", metricName) +
fmt.Sprintf("\tKeplerPrometheusUrl: %s\n", r.KeplerPrometheusUrl) )
os.Exit(1)
}

Expand All @@ -125,18 +128,18 @@ func (r *LabelGroupReconciler) GetMetricValuesForPodNames(metricName string, pod
results, warnings, err := v1api.Query(ctx, queryString, time.Now(), v1.WithTimeout(0*time.Second))

if err != nil || results == nil {
fmt.Printf("ERROR [GetMetricValuesForPodNames]: Querying Prometheus didn't work: %v\n", err)
fmt.Printf("metricName: %s\n", metricName)
fmt.Printf("KeplerPrometheusUrl: %s\n", r.KeplerPrometheusUrl)
fmt.Printf("queryString: %s\n", queryString)
r.Logger.V(0).Error(err, "[GetMetricValuesForPodNames] Querying Prometheus didn't work.\n" +
fmt.Sprintf("\tmetricName: %s\n", metricName) +
fmt.Sprintf("\tKeplerPrometheusUrl: %s\n", r.KeplerPrometheusUrl) +
fmt.Sprintf("\tqueryString: %s", queryString) )
return nil, err
}

if len(warnings) > 0 {
fmt.Printf("WARNING [GetMetricValuesForPodNames]: %v\n", warnings)
fmt.Printf("metricName: %s\n", metricName)
fmt.Printf("KeplerPrometheusUrl: %s\n", r.KeplerPrometheusUrl)
fmt.Printf("queryString: %s\n", queryString)
r.Logger.V(0).Info(fmt.Sprintf("WARNING [GetMetricValuesForPodNames] %v\n", warnings) +
fmt.Sprintf("\tmetricName: %s\n", metricName) +
fmt.Sprintf("\tKeplerPrometheusUrl: %s\n", r.KeplerPrometheusUrl) +
fmt.Sprintf("\tqueryString: %s", queryString) )
}

metricValues := make(map[string]float64, len(results.(model.Vector)))
Expand Down Expand Up @@ -175,17 +178,19 @@ func (r *LabelGroupReconciler) InitializeMetricsExporter() {
http.Handle("/metrics", prometheusHandler)

if metricsUrl, parseErr := url.Parse(r.SusQLPrometheusMetricsUrl); parseErr == nil {
fmt.Printf("Serving metrics at '%s:%s'...\n", metricsUrl.Hostname(), metricsUrl.Port())
r.Logger.V(2).Info(fmt.Sprintf("[InitializeMetricsExporter] Serving metrics at '%s:%s'...", metricsUrl.Hostname(), metricsUrl.Port()))

go func() {
err := http.ListenAndServe(metricsUrl.Hostname() + ":" + metricsUrl.Port(), nil)

if err != nil {
panic("PANIC [SetAggregatedEnergyForLabels]: ListenAndServe: " + err.Error())
r.Logger.V(0).Error(err,"PANIC [SetAggregatedEnergyForLabels] ListenAndServe" )
panic("PANIC [SetAggregatedEnergyForLabels] ListenAndServe: " + err.Error())
}
}()
} else {
panic(fmt.Sprintf("PANIC [SetAggregatedEnergyForLabels]: Parsing the URL '%s' to set the metrics address didn't work (%v)", r.SusQLPrometheusMetricsUrl, parseErr))
r.Logger.V(0).Error(parseErr,fmt.Sprintf("PANIC [SetAggregatedEnergyForLabels] Parsing the URL '%s' to set the metrics address didn't work.", r.SusQLPrometheusMetricsUrl))
panic(fmt.Sprintf("PANIC [SetAggregatedEnergyForLabels] Parsing the URL '%s' to set the metrics address didn't work (%v)", r.SusQLPrometheusMetricsUrl, parseErr))
}
}
}
Expand All @@ -194,5 +199,7 @@ func (r *LabelGroupReconciler) SetAggregatedEnergyForLabels(totalEnergy float64,
// Save aggregated energy to Prometheus table
susqlMetrics.totalEnergy.With(prometheusLabels).Set(totalEnergy)

r.Logger.V(2).Info(fmt.Sprintf("[SetAggregatedEnergyForLabels] Setting energy %f for %v.", totalEnergy, prometheusLabels)) // trace

return nil
}

0 comments on commit 0510f5e

Please sign in to comment.