From 90cfbf14996d4877ade4499850c4b9e852229484 Mon Sep 17 00:00:00 2001 From: Peter Stranak Date: Mon, 8 Jan 2024 12:54:40 +0100 Subject: [PATCH] NH-67458: Make sure no histogram metrics are sent to SWO For Istio metrics, publish k8s.istio_request_bytes.rate, k8s.istio_response_bytes.rate and k8s.istio_request_duration_milliseconds.rate instead. --- deploy/helm/CHANGELOG.md | 5 ++ deploy/helm/Chart.yaml | 2 +- deploy/helm/metrics-collector-config.yaml | 6 ++ deploy/helm/node-collector-config.yaml | 32 +++++++++++ .../logs-fargate-config-map_test.yaml.snap | 4 +- ...ollector-config-map-fargate_test.yaml.snap | 5 ++ ...etrics-collector-config-map_test.yaml.snap | 15 +++++ ...ollector-config-map-windows_test.yaml.snap | 29 ++++++++++ .../node-collector-config-map_test.yaml.snap | 57 +++++++++++++++++++ doc/exported_metrics.md | 5 +- 10 files changed, 156 insertions(+), 4 deletions(-) diff --git a/deploy/helm/CHANGELOG.md b/deploy/helm/CHANGELOG.md index ff2e8928..dbdcb6cb 100644 --- a/deploy/helm/CHANGELOG.md +++ b/deploy/helm/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +## [3.2.0-alpha.13] - 2024-01-08 + +- Make sure discoverd histogram metrics are not sent to SWO +- Publish custom Istio metrics, when available: `k8s.istio_request_bytes.rate`, `k8s.istio_response_bytes.rate` and `k8s.istio_request_duration_milliseconds.rate`. + ## [3.2.0-alpha.12] - 2024-01-04 - Upgraded OTEL collector image to `0.9.2` (see [Release notes](https://github.com/solarwinds/swi-k8s-opentelemetry-collector/releases/tag/0.9.2)) which brings following changes diff --git a/deploy/helm/Chart.yaml b/deploy/helm/Chart.yaml index 683b6825..4e621224 100644 --- a/deploy/helm/Chart.yaml +++ b/deploy/helm/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v2 name: swo-k8s-collector -version: 3.2.0-alpha.12 +version: 3.2.0-alpha.13 appVersion: "0.9.2" description: SolarWinds Kubernetes Integration keywords: diff --git a/deploy/helm/metrics-collector-config.yaml b/deploy/helm/metrics-collector-config.yaml index ea1868f5..22ca3481 100644 --- a/deploy/helm/metrics-collector-config.yaml +++ b/deploy/helm/metrics-collector-config.yaml @@ -1010,6 +1010,11 @@ processors: - "^kubelet_volume_stats_available_percent$" - "^kubernetes_build_info$" + filter/histograms: + metrics: + metric: + - 'type == METRIC_DATA_TYPE_HISTOGRAM' + connectors: forward/prometheus: forward/metric-exporter: @@ -1284,6 +1289,7 @@ service: - otlp processors: - memory_limiter + - filter/histograms - batch receivers: - forward/metric-exporter diff --git a/deploy/helm/node-collector-config.yaml b/deploy/helm/node-collector-config.yaml index adf64e8a..5ba77700 100644 --- a/deploy/helm/node-collector-config.yaml +++ b/deploy/helm/node-collector-config.yaml @@ -284,6 +284,34 @@ processors: {{ toYaml .Values.otel.metrics.autodiscovery.prometheusEndpoints.filter | indent 6 }} {{- end }} + filter/histograms: + metrics: + metric: + - 'type == METRIC_DATA_TYPE_HISTOGRAM' + + transform/istio-histograms: + metric_statements: + - context: metric + statements: + - extract_sum_metric(true) where (name == "k8s.istio_request_bytes" or name == "k8s.istio_response_bytes" or name == "k8s.istio_request_duration_milliseconds") + - set(name, "k8s.istio_request_bytes.rate") where name == "k8s.istio_request_bytes_sum" + - set(name, "k8s.istio_response_bytes.rate") where name == "k8s.istio_response_bytes_sum" + - set(name, "k8s.istio_request_duration_milliseconds.rate") where name == "k8s.istio_request_duration_milliseconds_sum" + + cumulativetodelta/istio-histograms: + include: + metrics: + - k8s.istio_request_bytes.rate + - k8s.istio_response_bytes.rate + - k8s.istio_request_duration_milliseconds.rate + match_type: strict + + deltatorate/istio-histograms: + metrics: + - k8s.istio_request_bytes.rate + - k8s.istio_response_bytes.rate + - k8s.istio_request_duration_milliseconds.rate + connectors: {{- if and .Values.otel.metrics.enabled (or (not .Values.aws_fargate.enabled) .Values.otel.metrics.autodiscovery.prometheusEndpoints.enabled) }} forward/metric-exporter: @@ -570,6 +598,9 @@ service: - filter/metrics-discovery {{- end }} - metricstransform/rename + - transform/istio-histograms + - cumulativetodelta/istio-histograms + - deltatorate/istio-histograms {{- if .Values.otel.metrics.autodiscovery.prometheusEndpoints.customTransformations.counterToRate }} - cumulativetodelta/discovery - deltatorate/discovery @@ -611,6 +642,7 @@ service: - otlp processors: - memory_limiter + - filter/histograms - k8sattributes {{- if not (empty .Values.otel.metrics.k8s_instrumentation.labels.excludePattern) }} - resource/k8sattributes_labels_filter diff --git a/deploy/helm/tests/__snapshot__/logs-fargate-config-map_test.yaml.snap b/deploy/helm/tests/__snapshot__/logs-fargate-config-map_test.yaml.snap index 7fa8c161..cb22e7b9 100644 --- a/deploy/helm/tests/__snapshot__/logs-fargate-config-map_test.yaml.snap +++ b/deploy/helm/tests/__snapshot__/logs-fargate-config-map_test.yaml.snap @@ -24,7 +24,7 @@ Fargate logging ConfigMap spec should include additional filters when they are c Match * Add sw.k8s.cluster.uid Add sw.k8s.log.type container - Add sw.k8s.agent.manifest.version "3.2.0-alpha.12" + Add sw.k8s.agent.manifest.version "3.2.0-alpha.13" flb_log_cw: "false" output.conf: | [OUTPUT] @@ -64,7 +64,7 @@ Fargate logging ConfigMap spec should match snapshot when Fargate logging is ena Match * Add sw.k8s.cluster.uid Add sw.k8s.log.type container - Add sw.k8s.agent.manifest.version "3.2.0-alpha.12" + Add sw.k8s.agent.manifest.version "3.2.0-alpha.13" flb_log_cw: "false" output.conf: | [OUTPUT] diff --git a/deploy/helm/tests/__snapshot__/metrics-collector-config-map-fargate_test.yaml.snap b/deploy/helm/tests/__snapshot__/metrics-collector-config-map-fargate_test.yaml.snap index acb163c1..eaa3fbac 100644 --- a/deploy/helm/tests/__snapshot__/metrics-collector-config-map-fargate_test.yaml.snap +++ b/deploy/helm/tests/__snapshot__/metrics-collector-config-map-fargate_test.yaml.snap @@ -518,6 +518,10 @@ Metrics config should match snapshot when using default values: match_type: regexp metric_names: - ^ebpf_net.*$ + filter/histograms: + metrics: + metric: + - type == METRIC_DATA_TYPE_HISTOGRAM filter/kube-state-metrics: metrics: include: @@ -2239,6 +2243,7 @@ Metrics config should match snapshot when using default values: - otlp processors: - memory_limiter + - filter/histograms - batch receivers: - forward/metric-exporter diff --git a/deploy/helm/tests/__snapshot__/metrics-collector-config-map_test.yaml.snap b/deploy/helm/tests/__snapshot__/metrics-collector-config-map_test.yaml.snap index 4016c36f..b54631fc 100644 --- a/deploy/helm/tests/__snapshot__/metrics-collector-config-map_test.yaml.snap +++ b/deploy/helm/tests/__snapshot__/metrics-collector-config-map_test.yaml.snap @@ -518,6 +518,10 @@ Metrics config should match snapshot when fargate is enabled: match_type: regexp metric_names: - ^ebpf_net.*$ + filter/histograms: + metrics: + metric: + - type == METRIC_DATA_TYPE_HISTOGRAM filter/kube-state-metrics: metrics: include: @@ -2239,6 +2243,7 @@ Metrics config should match snapshot when fargate is enabled: - otlp processors: - memory_limiter + - filter/histograms - batch receivers: - forward/metric-exporter @@ -2882,6 +2887,10 @@ Metrics config should match snapshot when using Prometheus url with extra_scrape match_type: regexp metric_names: - ^ebpf_net.*$ + filter/histograms: + metrics: + metric: + - type == METRIC_DATA_TYPE_HISTOGRAM filter/kube-state-metrics: metrics: include: @@ -4538,6 +4547,7 @@ Metrics config should match snapshot when using Prometheus url with extra_scrape - otlp processors: - memory_limiter + - filter/histograms - batch receivers: - forward/metric-exporter @@ -5188,6 +5198,10 @@ Metrics config should match snapshot when using default values: match_type: regexp metric_names: - ^ebpf_net.*$ + filter/histograms: + metrics: + metric: + - type == METRIC_DATA_TYPE_HISTOGRAM filter/kube-state-metrics: metrics: include: @@ -6829,6 +6843,7 @@ Metrics config should match snapshot when using default values: - otlp processors: - memory_limiter + - filter/histograms - batch receivers: - forward/metric-exporter diff --git a/deploy/helm/tests/__snapshot__/node-collector-config-map-windows_test.yaml.snap b/deploy/helm/tests/__snapshot__/node-collector-config-map-windows_test.yaml.snap index 9ca8dd2f..5f07f941 100644 --- a/deploy/helm/tests/__snapshot__/node-collector-config-map-windows_test.yaml.snap +++ b/deploy/helm/tests/__snapshot__/node-collector-config-map-windows_test.yaml.snap @@ -98,6 +98,13 @@ Node collector config for windows nodes should match snapshot when using default - k8s.node.network.packets_transmitted - k8s.node.network.receive_packets_dropped - k8s.node.network.transmit_packets_dropped + cumulativetodelta/istio-histograms: + include: + match_type: strict + metrics: + - k8s.istio_request_bytes.rate + - k8s.istio_response_bytes.rate + - k8s.istio_request_duration_milliseconds.rate deltatorate/cadvisor: metrics: - k8s.node.cpu.usage.seconds.rate @@ -127,6 +134,15 @@ Node collector config for windows nodes should match snapshot when using default - k8s.node.network.packets_transmitted - k8s.node.network.receive_packets_dropped - k8s.node.network.transmit_packets_dropped + deltatorate/istio-histograms: + metrics: + - k8s.istio_request_bytes.rate + - k8s.istio_response_bytes.rate + - k8s.istio_request_duration_milliseconds.rate + filter/histograms: + metrics: + metric: + - type == METRIC_DATA_TYPE_HISTOGRAM filter/logs: logs: include: @@ -839,6 +855,15 @@ Node collector config for windows nodes should match snapshot when using default - action: insert from_attribute: persistentvolumeclaim key: k8s.persistentvolumeclaim.name + transform/istio-histograms: + metric_statements: + - context: metric + statements: + - extract_sum_metric(true) where (name == "k8s.istio_request_bytes" or name + == "k8s.istio_response_bytes" or name == "k8s.istio_request_duration_milliseconds") + - set(name, "k8s.istio_request_bytes.rate") where name == "k8s.istio_request_bytes_sum" + - set(name, "k8s.istio_response_bytes.rate") where name == "k8s.istio_response_bytes_sum" + - set(name, "k8s.istio_request_duration_milliseconds.rate") where name == "k8s.istio_request_duration_milliseconds_sum" transform/syslogify: error_mode: ignore log_statements: @@ -1040,6 +1065,7 @@ Node collector config for windows nodes should match snapshot when using default - otlp processors: - memory_limiter + - filter/histograms - k8sattributes - batch/metrics receivers: @@ -1050,6 +1076,9 @@ Node collector config for windows nodes should match snapshot when using default processors: - memory_limiter - metricstransform/rename + - transform/istio-histograms + - cumulativetodelta/istio-histograms + - deltatorate/istio-histograms - groupbyattrs/common-all - resource/all receivers: diff --git a/deploy/helm/tests/__snapshot__/node-collector-config-map_test.yaml.snap b/deploy/helm/tests/__snapshot__/node-collector-config-map_test.yaml.snap index 3d8e0338..54d0c2f6 100644 --- a/deploy/helm/tests/__snapshot__/node-collector-config-map_test.yaml.snap +++ b/deploy/helm/tests/__snapshot__/node-collector-config-map_test.yaml.snap @@ -95,6 +95,13 @@ Node collector config should match snapshot when fargate is enabled: - k8s.node.network.packets_transmitted - k8s.node.network.receive_packets_dropped - k8s.node.network.transmit_packets_dropped + cumulativetodelta/istio-histograms: + include: + match_type: strict + metrics: + - k8s.istio_request_bytes.rate + - k8s.istio_response_bytes.rate + - k8s.istio_request_duration_milliseconds.rate deltatorate/cadvisor: metrics: - k8s.node.cpu.usage.seconds.rate @@ -124,6 +131,15 @@ Node collector config should match snapshot when fargate is enabled: - k8s.node.network.packets_transmitted - k8s.node.network.receive_packets_dropped - k8s.node.network.transmit_packets_dropped + deltatorate/istio-histograms: + metrics: + - k8s.istio_request_bytes.rate + - k8s.istio_response_bytes.rate + - k8s.istio_request_duration_milliseconds.rate + filter/histograms: + metrics: + metric: + - type == METRIC_DATA_TYPE_HISTOGRAM filter/logs: logs: include: @@ -856,6 +872,15 @@ Node collector config should match snapshot when fargate is enabled: - action: insert from_attribute: persistentvolumeclaim key: k8s.persistentvolumeclaim.name + transform/istio-histograms: + metric_statements: + - context: metric + statements: + - extract_sum_metric(true) where (name == "k8s.istio_request_bytes" or name + == "k8s.istio_response_bytes" or name == "k8s.istio_request_duration_milliseconds") + - set(name, "k8s.istio_request_bytes.rate") where name == "k8s.istio_request_bytes_sum" + - set(name, "k8s.istio_response_bytes.rate") where name == "k8s.istio_response_bytes_sum" + - set(name, "k8s.istio_request_duration_milliseconds.rate") where name == "k8s.istio_request_duration_milliseconds_sum" transform/syslogify: error_mode: ignore log_statements: @@ -1035,6 +1060,9 @@ Node collector config should match snapshot when fargate is enabled: processors: - memory_limiter - metricstransform/rename + - transform/istio-histograms + - cumulativetodelta/istio-histograms + - deltatorate/istio-histograms - groupbyattrs/common-all - resource/all receivers: @@ -1147,6 +1175,13 @@ Node collector config should match snapshot when using default values: - k8s.node.network.packets_transmitted - k8s.node.network.receive_packets_dropped - k8s.node.network.transmit_packets_dropped + cumulativetodelta/istio-histograms: + include: + match_type: strict + metrics: + - k8s.istio_request_bytes.rate + - k8s.istio_response_bytes.rate + - k8s.istio_request_duration_milliseconds.rate deltatorate/cadvisor: metrics: - k8s.node.cpu.usage.seconds.rate @@ -1176,6 +1211,15 @@ Node collector config should match snapshot when using default values: - k8s.node.network.packets_transmitted - k8s.node.network.receive_packets_dropped - k8s.node.network.transmit_packets_dropped + deltatorate/istio-histograms: + metrics: + - k8s.istio_request_bytes.rate + - k8s.istio_response_bytes.rate + - k8s.istio_request_duration_milliseconds.rate + filter/histograms: + metrics: + metric: + - type == METRIC_DATA_TYPE_HISTOGRAM filter/logs: logs: include: @@ -1908,6 +1952,15 @@ Node collector config should match snapshot when using default values: - action: insert from_attribute: persistentvolumeclaim key: k8s.persistentvolumeclaim.name + transform/istio-histograms: + metric_statements: + - context: metric + statements: + - extract_sum_metric(true) where (name == "k8s.istio_request_bytes" or name + == "k8s.istio_response_bytes" or name == "k8s.istio_request_duration_milliseconds") + - set(name, "k8s.istio_request_bytes.rate") where name == "k8s.istio_request_bytes_sum" + - set(name, "k8s.istio_response_bytes.rate") where name == "k8s.istio_response_bytes_sum" + - set(name, "k8s.istio_request_duration_milliseconds.rate") where name == "k8s.istio_request_duration_milliseconds_sum" transform/syslogify: error_mode: ignore log_statements: @@ -2123,6 +2176,7 @@ Node collector config should match snapshot when using default values: - otlp processors: - memory_limiter + - filter/histograms - k8sattributes - batch/metrics receivers: @@ -2133,6 +2187,9 @@ Node collector config should match snapshot when using default values: processors: - memory_limiter - metricstransform/rename + - transform/istio-histograms + - cumulativetodelta/istio-histograms + - deltatorate/istio-histograms - groupbyattrs/common-all - resource/all receivers: diff --git a/doc/exported_metrics.md b/doc/exported_metrics.md index 59b96f8b..b8399e21 100644 --- a/doc/exported_metrics.md +++ b/doc/exported_metrics.md @@ -304,4 +304,7 @@ The following tables contain the list of all metrics exported by the swi-k8s-ope | k8s.dns.responses | Counter | | The total number of DNS responses sent between the source and destination measured for the prior thirty seconds. | custom | | k8s.dns.timeouts | Counter | | The total number of DNS timeouts between the source and destination measured for the prior thirty seconds. | custom | | k8s.dns.client.duration_average | Counter | | This metric is the average duration in microseconds from when the client sends a DNS request, until the response is received back from the server. As such, it includes the communication round-trip times, plus the server processing latency. Computed by the summation of all times, divided by dns.responses. | custom | -| k8s.dns.server.duration_average | Counter | | This metric is the average duration in microseconds for the server to respond to a request received locally. Thus, it does not include the network latency from or to the client. Computed by the summation of all times, divided by dns.responses. | custom | \ No newline at end of file +| k8s.dns.server.duration_average | Counter | | This metric is the average duration in microseconds for the server to respond to a request received locally. Thus, it does not include the network latency from or to the client. Computed by the summation of all times, divided by dns.responses. | custom | +| k8s.istio_request_bytes.rate | Gauge | | Bytes per second used for HTTP request bodies. | custom | +| k8s.istio_response_bytes.rate | Gauge | | Bytes per second used for HTTP response bodies. | custom | +| k8s.istio_request_duration_milliseconds.rate | Gauge | | Duration of requests per second. | custom |