From b55de4794076b66c8b6fb162aabaaeeda94055dd Mon Sep 17 00:00:00 2001 From: Anna Kapuscinska Date: Thu, 27 Jul 2023 15:31:48 +0100 Subject: [PATCH] [WIP] Delete metrics for deleted pods Signed-off-by: Anna Kapuscinska --- pkg/metrics/metrics.go | 60 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index e7e4d41723c..40024721d12 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -7,11 +7,71 @@ import ( "net/http" "github.com/cilium/tetragon/pkg/logger" + "github.com/cilium/tetragon/pkg/podhooks" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" + corev1 "k8s.io/api/core/v1" + "k8s.io/client-go/tools/cache" ) +func init() { + // Register handler for deleting metrics associated with deleted pods. + // Without it, Tetragon agent keeps exposing metrics for all deleted pods. + // This causes continuous increase in memory usage in Tetragon agent, as + // well as in the metrics scraper. + // TODO: check if metrics are enabled + // TODO: consider delaying deletion a bit, to allow metrics scraper scrape + // final values + logger.GetLogger().Info("Registering pod deletion event handler") + podhooks.RegisterCallbacksAtInit(podhooks.Callbacks{ + PodCallbacks: func(podInformer cache.SharedIndexInformer) { + podInformer.AddEventHandler( + cache.ResourceEventHandlerFuncs{ + DeleteFunc: func(obj interface{}) { + var pod *corev1.Pod + switch concreteObj := obj.(type) { + case *corev1.Pod: + pod = concreteObj + case cache.DeletedFinalStateUnknown: + // Handle the case when the watcher missed the deletion event + // (e.g. due to a lost apiserver connection). + deletedObj, ok := concreteObj.Obj.(*corev1.Pod) + if !ok { + return + } + pod = deletedObj + default: + return + } + for _, metric := range listMetrics() { + deleteMetricForPod(metric, pod) + } + }, + }, + ) + }, + }) +} + func EnableMetrics(address string) { logger.GetLogger().WithField("addr", address).Info("Starting metrics server") http.Handle("/metrics", promhttp.Handler()) http.ListenAndServe(address, nil) } + +func listMetrics() []*prometheus.MetricVec { + // TODO + return []*prometheus.MetricVec{} +} + +func deleteMetricForPod(metric *prometheus.MetricVec, pod *corev1.Pod) { + // TODO: make these labels constants + metric.DeletePartialMatch(prometheus.Labels{ + "pod": pod.Name, + "namespace": pod.Namespace, + }) + metric.DeletePartialMatch(prometheus.Labels{ + "dstpod": pod.Name, + "dstnamespace": pod.Namespace, + }) +}