From e3dec5db6cd0ade99cb99156151fdd2c46097e71 Mon Sep 17 00:00:00 2001 From: Sam DeHaan Date: Wed, 13 Nov 2024 13:05:53 -0500 Subject: [PATCH 1/2] Capture second metrics sample to provide metrics delta for investigating issues --- CHANGELOG.md | 4 + docs/sources/troubleshoot/support_bundle.md | 3 +- internal/service/http/supportbundle.go | 109 +++++++++++--------- 3 files changed, 66 insertions(+), 50 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 28fc106bc..d34beb380 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,10 @@ Main (unreleased) - Add `otelcol.exporter.splunkhec` allowing to export otel data to Splunk HEC (@adlotsof) +### Enhancements + +- Add second metrics sample to the support bundle to provide delta information (@dehaansa) + ### Bugfixes - Fixed an issue in the `prometheus.exporter.postgres` component that would leak goroutines when the target was not reachable (@dehaansa) diff --git a/docs/sources/troubleshoot/support_bundle.md b/docs/sources/troubleshoot/support_bundle.md index 2bb870bc5..db543911e 100644 --- a/docs/sources/troubleshoot/support_bundle.md +++ b/docs/sources/troubleshoot/support_bundle.md @@ -38,7 +38,8 @@ A support bundle contains the following data: `/api/v0/web/components` endpoint. * `alloy-logs.txt` contains the logs during the bundle generation. * `alloy-metadata.yaml` contains the {{< param "PRODUCT_NAME" >}} build version and the installation's operating system, architecture, and uptime. -* `alloy-metrics.txt` contains a snapshot of the internal metrics for {{< param "PRODUCT_NAME" >}}. +* `alloy-metrics-sample-1.txt` contains a snapshot of the internal metrics for {{< param "PRODUCT_NAME" >}} at the start of the bundle collection. +* `alloy-metrics-sample-2.txt` contains a snapshot of the internal metrics for {{< param "PRODUCT_NAME" >}} at the end of the bundle collection. * `alloy-peers.json` contains information about the identified cluster peers of this {{< param "PRODUCT_NAME" >}} instance, generated by the `/api/v0/web/peers` endpoint. * `alloy-runtime-flags.txt` contains the values of the runtime flags available in {{< param "PRODUCT_NAME" >}}. diff --git a/internal/service/http/supportbundle.go b/internal/service/http/supportbundle.go index 3c75c3515..783a5c3d0 100644 --- a/internal/service/http/supportbundle.go +++ b/internal/service/http/supportbundle.go @@ -28,16 +28,17 @@ type SupportBundleContext struct { // Bundle collects all the data that is exposed as a support bundle. type Bundle struct { - meta []byte - alloyMetrics []byte - components []byte - peers []byte - runtimeFlags []byte - heapBuf *bytes.Buffer - goroutineBuf *bytes.Buffer - blockBuf *bytes.Buffer - mutexBuf *bytes.Buffer - cpuBuf *bytes.Buffer + meta []byte + alloyMetricsStart []byte + alloyMetricsEnd []byte + components []byte + peers []byte + runtimeFlags []byte + heapBuf *bytes.Buffer + goroutineBuf *bytes.Buffer + blockBuf *bytes.Buffer + mutexBuf *bytes.Buffer + cpuBuf *bytes.Buffer } // Metadata contains general runtime information about the current Alloy environment. @@ -50,6 +51,26 @@ type Metadata struct { // ExportSupportBundle gathers the information required for the support bundle. func ExportSupportBundle(ctx context.Context, runtimeFlags []string, srvAddress string, dialContext server.DialContextFunc) (*Bundle, error) { + var httpClient http.Client + httpClient.Transport = &http.Transport{DialContext: dialContext} + + // Gather Alloy's own metrics. + alloyMetricsStart, err := retrieveAPIEndpoint(httpClient, srvAddress, "metrics") + if err != nil { + return nil, fmt.Errorf("failed to get internal Alloy metrics: %s", err) + } + + // Gather running component configuration + components, err := retrieveAPIEndpoint(httpClient, srvAddress, "api/v0/web/components") + if err != nil { + return nil, fmt.Errorf("failed to get component details: %s", err) + } + // Gather cluster peers information + peers, err := retrieveAPIEndpoint(httpClient, srvAddress, "api/v0/web/peers") + if err != nil { + return nil, fmt.Errorf("failed to get peer details: %s", err) + } + // The block profiler is disabled by default. Temporarily enable recording // of all blocking events. Also, temporarily record all mutex contentions, // and defer restoring of earlier mutex profiling fraction. @@ -76,24 +97,6 @@ func ExportSupportBundle(ctx context.Context, runtimeFlags []string, srvAddress return nil, fmt.Errorf("failed to marshal support bundle metadata: %s", err) } - var httpClient http.Client - httpClient.Transport = &http.Transport{DialContext: dialContext} - // Gather Alloy's own metrics. - alloyMetrics, err := retrieveAPIEndpoint(httpClient, srvAddress, "metrics") - if err != nil { - return nil, fmt.Errorf("failed to get internal Alloy metrics: %s", err) - } - // Gather running component configuration - components, err := retrieveAPIEndpoint(httpClient, srvAddress, "api/v0/web/components") - if err != nil { - return nil, fmt.Errorf("failed to get component details: %s", err) - } - // Gather cluster peers information - peers, err := retrieveAPIEndpoint(httpClient, srvAddress, "api/v0/web/peers") - if err != nil { - return nil, fmt.Errorf("failed to get peer details: %s", err) - } - // Export pprof data. var ( cpuBuf bytes.Buffer @@ -129,19 +132,26 @@ func ExportSupportBundle(ctx context.Context, runtimeFlags []string, srvAddress return nil, err } + // Gather Alloy's own metrics after the profile completes + alloyMetricsEnd, err := retrieveAPIEndpoint(httpClient, srvAddress, "metrics") + if err != nil { + return nil, fmt.Errorf("failed to get internal Alloy metrics: %s", err) + } + // Finally, bundle everything up to be served, either as a zip from // memory, or exported to a directory. bundle := &Bundle{ - meta: meta, - alloyMetrics: alloyMetrics, - components: components, - peers: peers, - runtimeFlags: []byte(strings.Join(runtimeFlags, "\n")), - heapBuf: &heapBuf, - goroutineBuf: &goroutineBuf, - blockBuf: &blockBuf, - mutexBuf: &mutexBuf, - cpuBuf: &cpuBuf, + meta: meta, + alloyMetricsStart: alloyMetricsStart, + alloyMetricsEnd: alloyMetricsEnd, + components: components, + peers: peers, + runtimeFlags: []byte(strings.Join(runtimeFlags, "\n")), + heapBuf: &heapBuf, + goroutineBuf: &goroutineBuf, + blockBuf: &blockBuf, + mutexBuf: &mutexBuf, + cpuBuf: &cpuBuf, } return bundle, nil @@ -169,17 +179,18 @@ func ServeSupportBundle(rw http.ResponseWriter, b *Bundle, logsBuf *bytes.Buffer rw.Header().Set("Content-Disposition", "attachment; filename=\"alloy-support-bundle.zip\"") zipStructure := map[string][]byte{ - "alloy-metadata.yaml": b.meta, - "alloy-components.json": b.components, - "alloy-peers.json": b.peers, - "alloy-metrics.txt": b.alloyMetrics, - "alloy-runtime-flags.txt": b.runtimeFlags, - "alloy-logs.txt": logsBuf.Bytes(), - "pprof/cpu.pprof": b.cpuBuf.Bytes(), - "pprof/heap.pprof": b.heapBuf.Bytes(), - "pprof/goroutine.pprof": b.goroutineBuf.Bytes(), - "pprof/mutex.pprof": b.mutexBuf.Bytes(), - "pprof/block.pprof": b.blockBuf.Bytes(), + "alloy-metadata.yaml": b.meta, + "alloy-components.json": b.components, + "alloy-peers.json": b.peers, + "alloy-metrics-sample-1.txt": b.alloyMetricsStart, + "alloy-metrics-sample-2.txt": b.alloyMetricsEnd, + "alloy-runtime-flags.txt": b.runtimeFlags, + "alloy-logs.txt": logsBuf.Bytes(), + "pprof/cpu.pprof": b.cpuBuf.Bytes(), + "pprof/heap.pprof": b.heapBuf.Bytes(), + "pprof/goroutine.pprof": b.goroutineBuf.Bytes(), + "pprof/mutex.pprof": b.mutexBuf.Bytes(), + "pprof/block.pprof": b.blockBuf.Bytes(), } for fn, b := range zipStructure { From 368aab9b9e3dac0a5baad5b8df33d1eb47c1e057 Mon Sep 17 00:00:00 2001 From: Sam DeHaan Date: Fri, 15 Nov 2024 09:21:45 -0500 Subject: [PATCH 2/2] Update names of metrics samples --- docs/sources/troubleshoot/support_bundle.md | 4 ++-- internal/service/http/supportbundle.go | 24 ++++++++++----------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/sources/troubleshoot/support_bundle.md b/docs/sources/troubleshoot/support_bundle.md index db543911e..d38c9dd41 100644 --- a/docs/sources/troubleshoot/support_bundle.md +++ b/docs/sources/troubleshoot/support_bundle.md @@ -38,8 +38,8 @@ A support bundle contains the following data: `/api/v0/web/components` endpoint. * `alloy-logs.txt` contains the logs during the bundle generation. * `alloy-metadata.yaml` contains the {{< param "PRODUCT_NAME" >}} build version and the installation's operating system, architecture, and uptime. -* `alloy-metrics-sample-1.txt` contains a snapshot of the internal metrics for {{< param "PRODUCT_NAME" >}} at the start of the bundle collection. -* `alloy-metrics-sample-2.txt` contains a snapshot of the internal metrics for {{< param "PRODUCT_NAME" >}} at the end of the bundle collection. +* `alloy-metrics-sample-start.txt` contains a snapshot of the internal metrics for {{< param "PRODUCT_NAME" >}} at the start of the bundle collection. +* `alloy-metrics-sample-end.txt` contains a snapshot of the internal metrics for {{< param "PRODUCT_NAME" >}} at the end of the bundle collection. * `alloy-peers.json` contains information about the identified cluster peers of this {{< param "PRODUCT_NAME" >}} instance, generated by the `/api/v0/web/peers` endpoint. * `alloy-runtime-flags.txt` contains the values of the runtime flags available in {{< param "PRODUCT_NAME" >}}. diff --git a/internal/service/http/supportbundle.go b/internal/service/http/supportbundle.go index 783a5c3d0..ac0898ce5 100644 --- a/internal/service/http/supportbundle.go +++ b/internal/service/http/supportbundle.go @@ -179,18 +179,18 @@ func ServeSupportBundle(rw http.ResponseWriter, b *Bundle, logsBuf *bytes.Buffer rw.Header().Set("Content-Disposition", "attachment; filename=\"alloy-support-bundle.zip\"") zipStructure := map[string][]byte{ - "alloy-metadata.yaml": b.meta, - "alloy-components.json": b.components, - "alloy-peers.json": b.peers, - "alloy-metrics-sample-1.txt": b.alloyMetricsStart, - "alloy-metrics-sample-2.txt": b.alloyMetricsEnd, - "alloy-runtime-flags.txt": b.runtimeFlags, - "alloy-logs.txt": logsBuf.Bytes(), - "pprof/cpu.pprof": b.cpuBuf.Bytes(), - "pprof/heap.pprof": b.heapBuf.Bytes(), - "pprof/goroutine.pprof": b.goroutineBuf.Bytes(), - "pprof/mutex.pprof": b.mutexBuf.Bytes(), - "pprof/block.pprof": b.blockBuf.Bytes(), + "alloy-metadata.yaml": b.meta, + "alloy-components.json": b.components, + "alloy-peers.json": b.peers, + "alloy-metrics-sample-start.txt": b.alloyMetricsStart, + "alloy-metrics-sample-end.txt": b.alloyMetricsEnd, + "alloy-runtime-flags.txt": b.runtimeFlags, + "alloy-logs.txt": logsBuf.Bytes(), + "pprof/cpu.pprof": b.cpuBuf.Bytes(), + "pprof/heap.pprof": b.heapBuf.Bytes(), + "pprof/goroutine.pprof": b.goroutineBuf.Bytes(), + "pprof/mutex.pprof": b.mutexBuf.Bytes(), + "pprof/block.pprof": b.blockBuf.Bytes(), } for fn, b := range zipStructure {