Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support the capturing of environment variables #327

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cli_flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ const (
defaultProbabilisticInterval = 1 * time.Minute
defaultArgSendErrorFrames = false
defaultOffCPUThreshold = 0
defaultEnvVarsValue = ""

// This is the X in 2^(n + x) where n is the default hardcoded map size value
defaultArgMapScaleFactor = 0
Expand Down Expand Up @@ -67,6 +68,7 @@ var (
"Valid values are in the range [1..%d], and 0 to disable off-cpu profiling."+
"Default is %d.",
support.OffCPUThresholdMax, defaultOffCPUThreshold)
envVarsHelp = "Defines a comma seperated list of env vars that should be reported with the captured profiling samples"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
envVarsHelp = "Defines a comma seperated list of env vars that should be reported with the captured profiling samples"
envVarsHelp = "Comma seperated list of environment variables that should be reported with the captured profiling samples."

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are wildcards supposed to be supported?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not for our usecase

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think with wild cards the mentioned data security issues might become more relevant

)

// Package-scope variable, so that conditionally compiled other components can refer
Expand Down Expand Up @@ -123,6 +125,8 @@ func parseArgs() (*controller.Config, error) {
fs.UintVar(&args.OffCPUThreshold, "off-cpu-threshold",
defaultOffCPUThreshold, offCPUThresholdHelp)

fs.StringVar(&args.IncludeEnvVars, "env-vars", defaultEnvVarsValue, envVarsHelp)

fs.Usage = func() {
fs.PrintDefaults()
}
Expand Down
1 change: 1 addition & 0 deletions host/host.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,5 @@ type Trace struct {
APMTraceID libpf.APMTraceID
APMTransactionID libpf.APMTransactionID
CPU int
EnvVars map[string]string
}
2 changes: 2 additions & 0 deletions internal/controller/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ type Config struct {
Reporter reporter.Reporter

Fs *flag.FlagSet

IncludeEnvVars string
}

const (
Expand Down
11 changes: 11 additions & 0 deletions internal/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@ package controller // import "go.opentelemetry.io/ebpf-profiler/internal/control
import (
"context"
"fmt"
"strings"
"time"

log "github.com/sirupsen/logrus"
"github.com/tklauser/numcpus"

"go.opentelemetry.io/ebpf-profiler/host"
"go.opentelemetry.io/ebpf-profiler/libpf"
"go.opentelemetry.io/ebpf-profiler/metrics"
"go.opentelemetry.io/ebpf-profiler/reporter"
"go.opentelemetry.io/ebpf-profiler/times"
Expand Down Expand Up @@ -71,6 +73,14 @@ func (c *Controller) Start(ctx context.Context) error {
return fmt.Errorf("failed to start reporter: %w", err)
}

envVars := libpf.Set[string]{}
splittedEnvVars := strings.Split(c.config.IncludeEnvVars, ",")
for _, envVar := range splittedEnvVars {
if envVar != "" {
envVars[envVar] = libpf.Void{}
}
}

metrics.SetReporter(c.reporter)

// Load the eBPF code and map definitions
Expand All @@ -87,6 +97,7 @@ func (c *Controller) Start(ctx context.Context) error {
ProbabilisticInterval: c.config.ProbabilisticInterval,
ProbabilisticThreshold: c.config.ProbabilisticThreshold,
OffCPUThreshold: uint32(c.config.OffCPUThreshold),
IncludeEnvVars: envVars,
})
if err != nil {
return fmt.Errorf("failed to load eBPF tracer: %w", err)
Expand Down
3 changes: 2 additions & 1 deletion processmanager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ var (
// implementation.
func New(ctx context.Context, includeTracers types.IncludedTracers, monitorInterval time.Duration,
ebpf pmebpf.EbpfHandler, fileIDMapper FileIDMapper, symbolReporter reporter.SymbolReporter,
sdp nativeunwind.StackDeltaProvider, filterErrorFrames bool) (*ProcessManager, error) {
sdp nativeunwind.StackDeltaProvider, filterErrorFrames bool, includeEnvVars libpf.Set[string]) (*ProcessManager, error) {
if fileIDMapper == nil {
var err error
fileIDMapper, err = newFileIDMapper(lruFileIDCacheSize)
Expand Down Expand Up @@ -101,6 +101,7 @@ func New(ctx context.Context, includeTracers types.IncludedTracers, monitorInter
reporter: symbolReporter,
metricsAddSlice: metrics.AddSlice,
filterErrorFrames: filterErrorFrames,
includeEnvVars: includeEnvVars,
}

collectInterpreterMetrics(ctx, pm, monitorInterval)
Expand Down
27 changes: 27 additions & 0 deletions processmanager/processinfo.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"fmt"
"os"
"path"
"strings"
"syscall"
"time"

Expand Down Expand Up @@ -88,11 +89,26 @@ func (pm *ProcessManager) updatePidInformation(pid libpf.PID, m *Mapping) (bool,
if name, err := os.ReadFile(fmt.Sprintf("/prod/%d/comm", pid)); err == nil {
processName = string(name)
}

envVarMap := make(map[string]string)
if envVars, err := os.ReadFile(fmt.Sprintf("/proc/%d/environ", pid)); err == nil {
splittedVars := strings.Split(string(envVars), "\000")
for _, envVar := range splittedVars {
keyValuePair := strings.SplitN(envVar, "=", 2)
_, envVarShouldBeCaptured := pm.includeEnvVars[keyValuePair[0]]

if envVarShouldBeCaptured {
envVarMap[keyValuePair[0]] = keyValuePair[1]
}
Comment on lines +98 to +102
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
_, envVarShouldBeCaptured := pm.includeEnvVars[keyValuePair[0]]
if envVarShouldBeCaptured {
envVarMap[keyValuePair[0]] = keyValuePair[1]
}
if _, ok := pm.includeEnvVars[keyValuePair[0]]; ok {
envVarMap[keyValuePair[0]] = keyValuePair[1]
}

}
}

info = &processInfo{
name: processName,
executable: exePath,
mappings: make(map[libpf.Address]*Mapping),
mappingsByFileID: make(map[host.FileID]map[libpf.Address]*Mapping),
envVariables: envVarMap,
tsdInfo: nil,
}
pm.pidToProcessInfo[pid] = info
Expand Down Expand Up @@ -730,3 +746,14 @@ func (pm *ProcessManager) ProcessedUntil(traceCaptureKTime times.KTime) {

// Compile time check to make sure we satisfy the interface.
var _ tracehandler.TraceProcessor = (*ProcessManager)(nil)

func (pm *ProcessManager) EnvVarsForPID(pid libpf.PID) map[string]string {
var envVars map[string]string

pm.mu.RLock()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How critical are information about environment variables, that they should hold a global lock on the ProcessManager?

Copy link
Contributor

@rockdaboot rockdaboot Jan 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When a user requires this information, it has the same criticality as data from other *ForPID functions.

In loadBpfTrace() we call now three of these functions when initializing the host.Trace{} instance. This means we do an RLock three times.

I'd suggest to fetch the data with a single function call / single lock in a follow-up PR.

defer pm.mu.RUnlock()
if procInfo, ok := pm.pidToProcessInfo[pid]; ok {
envVars = procInfo.envVariables
}
return envVars
}
5 changes: 5 additions & 0 deletions processmanager/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ type ProcessManager struct {

// filterErrorFrames determines whether error frames are dropped by `ConvertTrace`.
filterErrorFrames bool

// includeEnvVars holds a list of env vars that should be captured from processes
includeEnvVars libpf.Set[string]
}

// Mapping represents an executable memory mapping of a process.
Expand Down Expand Up @@ -148,6 +151,8 @@ type processInfo struct {
mappingsByFileID map[host.FileID]map[libpf.Address]*Mapping
// C-library Thread Specific Data information
tsdInfo *tpbase.TSDInfo
// process env vars from /proc/PID/environ
envVariables map[string]string
}

// addMapping adds a mapping to the internal indices.
Expand Down
1 change: 1 addition & 0 deletions reporter/base_reporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ func (b *baseReporter) ReportTraceEvent(trace *libpf.Trace, meta *samples.TraceE
MappingFileOffsets: trace.MappingFileOffsets,
Timestamps: []uint64{uint64(meta.Timestamp)},
OffTimes: []int64{meta.OffTime},
EnvVars: meta.EnvVars,
}
}

Expand Down
5 changes: 5 additions & 0 deletions reporter/internal/pdata/generate.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
log "github.com/sirupsen/logrus"
"go.opentelemetry.io/collector/pdata/pcommon"
"go.opentelemetry.io/collector/pdata/pprofile"
"go.opentelemetry.io/otel/attribute"
semconv "go.opentelemetry.io/otel/semconv/v1.4.0"

"go.opentelemetry.io/ebpf-profiler/libpf"
Expand Down Expand Up @@ -206,6 +207,10 @@ func (p *Pdata) setProfile(
attrMgr.AppendInt(sample.AttributeIndices(),
semconv.ProcessPIDKey, traceKey.Pid)

for key, value := range traceInfo.EnvVars {
attrMgr.AppendOptionalString(sample.AttributeIndices(), attribute.Key("env."+key), value)
}

if p.ExtraSampleAttrProd != nil {
extra := p.ExtraSampleAttrProd.ExtraSampleAttrs(attrMgr, traceKey.ExtraMeta)
sample.AttributeIndices().Append(extra...)
Expand Down
3 changes: 3 additions & 0 deletions reporter/samples/samples.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ type TraceEventMeta struct {
CPU int
Origin libpf.Origin
OffTime int64
EnvVars map[string]string
}

// TraceEvents holds known information about a trace.
Expand All @@ -27,6 +28,7 @@ type TraceEvents struct {
MappingFileOffsets []uint64
Timestamps []uint64 // in nanoseconds
OffTimes []int64 // in nanoseconds
EnvVars map[string]string
}

// TraceAndMetaKey is the deduplication key for samples. This **must always**
Expand All @@ -44,6 +46,7 @@ type TraceAndMetaKey struct {
ProcessName string
// Executable path is retrieved from /proc/PID/exe
ExecutablePath string

// ExtraMeta stores extra meta info that may have been produced by a
// `SampleAttrProducer` instance. May be nil.
ExtraMeta any
Expand Down
3 changes: 1 addition & 2 deletions support/ebpf/errors.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,7 @@ typedef enum ErrorCode {
// Native: Unable to read the IRQ stack link
ERR_NATIVE_CHASE_IRQ_STACK_LINK = 4010,

// Native: Unexpectedly encountered a kernel mode pointer while attempting to unwind user-mode
// stack
// Native: Unexpectedly encountered a kernel mode pointer while attempting to unwind user-mode stack
ERR_NATIVE_UNEXPECTED_KERNEL_ADDRESS = 4011,

// Native: Unable to locate the PID page mapping for the current instruction pointer
Expand Down
1 change: 1 addition & 0 deletions tracehandler/tracehandler.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ func (m *traceHandler) HandleTrace(bpfTrace *host.Trace) {
ExecutablePath: bpfTrace.ExecutablePath,
Origin: bpfTrace.Origin,
OffTime: bpfTrace.OffTime,
EnvVars: bpfTrace.EnvVars,
}

if !m.reporter.SupportsReportTraceEvent() {
Expand Down
5 changes: 4 additions & 1 deletion tracer/tracer.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@ type Config struct {
ProbabilisticThreshold uint
// OffCPUThreshold is the user defined threshold for off-cpu profiling.
OffCPUThreshold uint32
// IncludeEnvVars holds a list of environment variables that should be captured and reported from processes
IncludeEnvVars libpf.Set[string]
}

// hookPoint specifies the group and name of the hooked point in the kernel.
Expand Down Expand Up @@ -296,7 +298,7 @@ func NewTracer(ctx context.Context, cfg *Config) (*Tracer, error) {

processManager, err := pm.New(ctx, cfg.IncludeTracers, cfg.Intervals.MonitorInterval(),
ebpfHandler, nil, cfg.Reporter, elfunwindinfo.NewStackDeltaProvider(),
cfg.FilterErrorFrames)
cfg.FilterErrorFrames, cfg.IncludeEnvVars)
if err != nil {
return nil, fmt.Errorf("failed to create processManager: %v", err)
}
Expand Down Expand Up @@ -990,6 +992,7 @@ func (t *Tracer) loadBpfTrace(raw []byte, cpu int) *host.Trace {
OffTime: int64(ptr.offtime),
KTime: times.KTime(ptr.ktime),
CPU: cpu,
EnvVars: t.processManager.EnvVarsForPID(pid),
}

if trace.Origin != support.TraceOriginSampling && trace.Origin != support.TraceOriginOffCPU {
Expand Down