Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Capture second metrics sample in support bundle to provide metrics delta for investigating issues #2085

Merged
merged 3 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ Main (unreleased)

- Add `otelcol.exporter.splunkhec` allowing to export otel data to Splunk HEC (@adlotsof)

### Enhancements

- Add second metrics sample to the support bundle to provide delta information (@dehaansa)

### Bugfixes

- Fixed an issue in the `prometheus.exporter.postgres` component that would leak goroutines when the target was not reachable (@dehaansa)
Expand Down
3 changes: 2 additions & 1 deletion docs/sources/troubleshoot/support_bundle.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ A support bundle contains the following data:
`/api/v0/web/components` endpoint.
* `alloy-logs.txt` contains the logs during the bundle generation.
* `alloy-metadata.yaml` contains the {{< param "PRODUCT_NAME" >}} build version and the installation's operating system, architecture, and uptime.
* `alloy-metrics.txt` contains a snapshot of the internal metrics for {{< param "PRODUCT_NAME" >}}.
* `alloy-metrics-sample-1.txt` contains a snapshot of the internal metrics for {{< param "PRODUCT_NAME" >}} at the start of the bundle collection.
* `alloy-metrics-sample-2.txt` contains a snapshot of the internal metrics for {{< param "PRODUCT_NAME" >}} at the end of the bundle collection.
wildum marked this conversation as resolved.
Show resolved Hide resolved
* `alloy-peers.json` contains information about the identified cluster peers of this {{< param "PRODUCT_NAME" >}} instance, generated by the
`/api/v0/web/peers` endpoint.
* `alloy-runtime-flags.txt` contains the values of the runtime flags available in {{< param "PRODUCT_NAME" >}}.
Expand Down
109 changes: 60 additions & 49 deletions internal/service/http/supportbundle.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,17 @@ type SupportBundleContext struct {

// Bundle collects all the data that is exposed as a support bundle.
type Bundle struct {
meta []byte
alloyMetrics []byte
components []byte
peers []byte
runtimeFlags []byte
heapBuf *bytes.Buffer
goroutineBuf *bytes.Buffer
blockBuf *bytes.Buffer
mutexBuf *bytes.Buffer
cpuBuf *bytes.Buffer
meta []byte
alloyMetricsStart []byte
alloyMetricsEnd []byte
components []byte
peers []byte
runtimeFlags []byte
heapBuf *bytes.Buffer
goroutineBuf *bytes.Buffer
blockBuf *bytes.Buffer
mutexBuf *bytes.Buffer
cpuBuf *bytes.Buffer
}

// Metadata contains general runtime information about the current Alloy environment.
Expand All @@ -50,6 +51,26 @@ type Metadata struct {

// ExportSupportBundle gathers the information required for the support bundle.
func ExportSupportBundle(ctx context.Context, runtimeFlags []string, srvAddress string, dialContext server.DialContextFunc) (*Bundle, error) {
var httpClient http.Client
httpClient.Transport = &http.Transport{DialContext: dialContext}

// Gather Alloy's own metrics.
alloyMetricsStart, err := retrieveAPIEndpoint(httpClient, srvAddress, "metrics")
if err != nil {
return nil, fmt.Errorf("failed to get internal Alloy metrics: %s", err)
}

// Gather running component configuration
components, err := retrieveAPIEndpoint(httpClient, srvAddress, "api/v0/web/components")
if err != nil {
return nil, fmt.Errorf("failed to get component details: %s", err)
}
// Gather cluster peers information
peers, err := retrieveAPIEndpoint(httpClient, srvAddress, "api/v0/web/peers")
if err != nil {
return nil, fmt.Errorf("failed to get peer details: %s", err)
}

// The block profiler is disabled by default. Temporarily enable recording
// of all blocking events. Also, temporarily record all mutex contentions,
// and defer restoring of earlier mutex profiling fraction.
Expand All @@ -76,24 +97,6 @@ func ExportSupportBundle(ctx context.Context, runtimeFlags []string, srvAddress
return nil, fmt.Errorf("failed to marshal support bundle metadata: %s", err)
}

var httpClient http.Client
httpClient.Transport = &http.Transport{DialContext: dialContext}
// Gather Alloy's own metrics.
alloyMetrics, err := retrieveAPIEndpoint(httpClient, srvAddress, "metrics")
if err != nil {
return nil, fmt.Errorf("failed to get internal Alloy metrics: %s", err)
}
// Gather running component configuration
components, err := retrieveAPIEndpoint(httpClient, srvAddress, "api/v0/web/components")
if err != nil {
return nil, fmt.Errorf("failed to get component details: %s", err)
}
// Gather cluster peers information
peers, err := retrieveAPIEndpoint(httpClient, srvAddress, "api/v0/web/peers")
if err != nil {
return nil, fmt.Errorf("failed to get peer details: %s", err)
}

// Export pprof data.
var (
cpuBuf bytes.Buffer
Expand Down Expand Up @@ -129,19 +132,26 @@ func ExportSupportBundle(ctx context.Context, runtimeFlags []string, srvAddress
return nil, err
}

// Gather Alloy's own metrics after the profile completes
alloyMetricsEnd, err := retrieveAPIEndpoint(httpClient, srvAddress, "metrics")
if err != nil {
return nil, fmt.Errorf("failed to get internal Alloy metrics: %s", err)
}

// Finally, bundle everything up to be served, either as a zip from
// memory, or exported to a directory.
bundle := &Bundle{
meta: meta,
alloyMetrics: alloyMetrics,
components: components,
peers: peers,
runtimeFlags: []byte(strings.Join(runtimeFlags, "\n")),
heapBuf: &heapBuf,
goroutineBuf: &goroutineBuf,
blockBuf: &blockBuf,
mutexBuf: &mutexBuf,
cpuBuf: &cpuBuf,
meta: meta,
alloyMetricsStart: alloyMetricsStart,
alloyMetricsEnd: alloyMetricsEnd,
components: components,
peers: peers,
runtimeFlags: []byte(strings.Join(runtimeFlags, "\n")),
heapBuf: &heapBuf,
goroutineBuf: &goroutineBuf,
blockBuf: &blockBuf,
mutexBuf: &mutexBuf,
cpuBuf: &cpuBuf,
}

return bundle, nil
Expand Down Expand Up @@ -169,17 +179,18 @@ func ServeSupportBundle(rw http.ResponseWriter, b *Bundle, logsBuf *bytes.Buffer
rw.Header().Set("Content-Disposition", "attachment; filename=\"alloy-support-bundle.zip\"")

zipStructure := map[string][]byte{
"alloy-metadata.yaml": b.meta,
"alloy-components.json": b.components,
"alloy-peers.json": b.peers,
"alloy-metrics.txt": b.alloyMetrics,
"alloy-runtime-flags.txt": b.runtimeFlags,
"alloy-logs.txt": logsBuf.Bytes(),
"pprof/cpu.pprof": b.cpuBuf.Bytes(),
"pprof/heap.pprof": b.heapBuf.Bytes(),
"pprof/goroutine.pprof": b.goroutineBuf.Bytes(),
"pprof/mutex.pprof": b.mutexBuf.Bytes(),
"pprof/block.pprof": b.blockBuf.Bytes(),
"alloy-metadata.yaml": b.meta,
"alloy-components.json": b.components,
"alloy-peers.json": b.peers,
"alloy-metrics-sample-1.txt": b.alloyMetricsStart,
"alloy-metrics-sample-2.txt": b.alloyMetricsEnd,
"alloy-runtime-flags.txt": b.runtimeFlags,
"alloy-logs.txt": logsBuf.Bytes(),
"pprof/cpu.pprof": b.cpuBuf.Bytes(),
"pprof/heap.pprof": b.heapBuf.Bytes(),
"pprof/goroutine.pprof": b.goroutineBuf.Bytes(),
"pprof/mutex.pprof": b.mutexBuf.Bytes(),
"pprof/block.pprof": b.blockBuf.Bytes(),
}

for fn, b := range zipStructure {
Expand Down