From 0db7253bfa836609511da218f7f47cca57fae3cb Mon Sep 17 00:00:00 2001 From: Rodrigo Queiro Date: Fri, 11 Oct 2024 15:22:34 +0200 Subject: [PATCH] Drop more heavy metrics (#455) After these, we're down to useful metrics like node_cpu_seconds_total or grpc_io_client_roundtrip_latency_bucket which we wouldn't want to drop but perhaps relabel (for example to aggregate CPU usage across all CPUs rather than generating 8 metrics for all 30 or 100 vCPUs on every node, or to drop pod IDs from grpc client metrics so they're aggregated across different instances of the same client) but I've never tried this. Tested as for #453. --- src/app_charts/prometheus/prometheus-robot.values.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/app_charts/prometheus/prometheus-robot.values.yaml b/src/app_charts/prometheus/prometheus-robot.values.yaml index 351cd9ca..5847f076 100644 --- a/src/app_charts/prometheus/prometheus-robot.values.yaml +++ b/src/app_charts/prometheus/prometheus-robot.values.yaml @@ -124,7 +124,7 @@ kubeApiServer: metricRelabelings: # Drop high cardinality apiserver metrics. - action: drop - regex: "apiserver_request.*|etcd_request.*|apiserver_watch.*|code_*" + regex: "apiserver_(request|response|watch|admission).*|etcd_request.*|code_*" sourceLabels: [__name__] relabelings: - sourceLabels: [__meta_kubernetes_pod_node_name] @@ -148,7 +148,7 @@ kubelet: regex: 'container_fs_(io_current|io_time_seconds_total|io_time_weighted_seconds_total|reads_merged_total|sector_reads_total|sector_writes_total|writes_merged_total)' - sourceLabels: [__name__] action: drop - regex: 'container_memory_(mapped_file|swap)' + regex: 'container_memory_(mapped_file|swap|failures_total)' - sourceLabels: [__name__] action: drop regex: 'container_(file_descriptors|tasks_state|threads_max)'