diff --git a/docs/content/en/docs/reference/metrics.md b/docs/content/en/docs/reference/metrics.md index ff0ee4cc92c..db4df7f736b 100644 --- a/docs/content/en/docs/reference/metrics.md +++ b/docs/content/en/docs/reference/metrics.md @@ -181,6 +181,15 @@ The total number of Tetragon events per type that are failed to sent from the ke | ----- | ------ | | `msg_op` | `11, 13, 14, 15, 23, 24, 25, 26, 5, 7` | +### `tetragon_missed_probes_total` + +The total number of Tetragon probe missed per policy,probe. + +| label | values | +| ----- | ------ | +| `attach` | `attach` | +| `policy` | `policy` | + ### `tetragon_msg_op_total` The total number of times we encounter a given message opcode. For internal use only. diff --git a/pkg/metrics/kprobemetrics/collector.go b/pkg/metrics/kprobemetrics/collector.go new file mode 100644 index 00000000000..ee987a299f5 --- /dev/null +++ b/pkg/metrics/kprobemetrics/collector.go @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Authors of Tetragon + +package kprobemetrics + +import ( + "github.com/cilium/ebpf/link" + "github.com/cilium/tetragon/pkg/sensors" + "github.com/cilium/tetragon/pkg/sensors/program" + "github.com/prometheus/client_golang/prometheus" + "golang.org/x/sys/unix" +) + +// bpfCollector implements prometheus.Collector. It collects metrics directly from BPF maps. +type bpfCollector struct{} + +func NewBPFCollector() prometheus.Collector { + return &bpfCollector{} +} + +func (c *bpfCollector) Describe(ch chan<- *prometheus.Desc) { + ch <- MissedLink.Desc() + ch <- MissedProg.Desc() +} + +func collectLink(ch chan<- prometheus.Metric, load *program.Program) { + if load.Link == nil { + return + } + + info, err := load.Link.Info() + if err != nil { + return + } + + missed := uint64(0) + + switch info.Type { + case link.PerfEventType: + pevent := info.PerfEvent() + switch pevent.Type { + case unix.BPF_PERF_EVENT_KPROBE, unix.BPF_PERF_EVENT_KRETPROBE: + kprobe := pevent.Kprobe() + missed, _ = kprobe.Missed() + } + case link.KprobeMultiType: + kmulti := info.KprobeMulti() + missed, _ = kmulti.Missed() + default: + } + + ch <- MissedLink.MustMetric(float64(missed), load.Policy, load.Attach) +} + +func collectProg(ch chan<- prometheus.Metric, load *program.Program) { + info, err := load.Prog.Info() + if err != nil { + return + } + + missed, _ := info.RecursionMisses() + ch <- MissedProg.MustMetric(float64(missed), load.Policy, load.Attach) +} + +func (c *bpfCollector) Collect(ch chan<- prometheus.Metric) { + allPrograms := sensors.AllPrograms() + for _, prog := range allPrograms { + collectLink(ch, prog) + collectProg(ch, prog) + } +} + +// bpfZeroCollector implements prometheus.Collector. It collects "zero" metrics. +// It's intended to be used when BPF metrics are not collected, but we still want +// Prometheus metrics to be exposed. +type bpfZeroCollector struct { + bpfCollector +} + +func NewBPFZeroCollector() prometheus.Collector { + return &bpfZeroCollector{ + bpfCollector: bpfCollector{}, + } +} + +func (c *bpfZeroCollector) Describe(ch chan<- *prometheus.Desc) { + c.bpfCollector.Describe(ch) +} + +func (c *bpfZeroCollector) Collect(ch chan<- prometheus.Metric) { + ch <- MissedLink.MustMetric(0, "policy", "attach") + ch <- MissedProg.MustMetric(0, "policy", "attach") +} diff --git a/pkg/metrics/kprobemetrics/missed.go b/pkg/metrics/kprobemetrics/missed.go new file mode 100644 index 00000000000..2c4bd36350f --- /dev/null +++ b/pkg/metrics/kprobemetrics/missed.go @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Authors of Tetragon + +package kprobemetrics + +import ( + "github.com/cilium/tetragon/pkg/metrics" + "github.com/cilium/tetragon/pkg/metrics/consts" + "github.com/prometheus/client_golang/prometheus" +) + +var ( + MissedLink = metrics.NewBPFCounter(prometheus.NewDesc( + prometheus.BuildFQName(consts.MetricsNamespace, "", "missed_link_probes_total"), + "The total number of Tetragon probe missed by link.", + []string{"policy", "attach"}, nil, + )) + MissedProg = metrics.NewBPFCounter(prometheus.NewDesc( + prometheus.BuildFQName(consts.MetricsNamespace, "", "missed_prog_probes_total"), + "The total number of Tetragon probe missed by program.", + []string{"policy", "attach"}, nil, + )) +) diff --git a/pkg/metrics/metricsconfig/initmetrics.go b/pkg/metrics/metricsconfig/initmetrics.go index 9157b8b8b45..c57e0bdd4bd 100644 --- a/pkg/metrics/metricsconfig/initmetrics.go +++ b/pkg/metrics/metricsconfig/initmetrics.go @@ -61,6 +61,7 @@ func initAllHealthMetrics(registry *prometheus.Registry) { // register custom collectors registry.MustRegister(observer.NewBPFCollector()) registry.MustRegister(eventmetrics.NewBPFCollector()) + registry.MustRegister(kprobemetrics.NewBPFCollector()) } func InitHealthMetricsForDocs(registry *prometheus.Registry) { @@ -72,6 +73,7 @@ func InitHealthMetricsForDocs(registry *prometheus.Registry) { // register custom zero collectors registry.MustRegister(observer.NewBPFZeroCollector()) registry.MustRegister(eventmetrics.NewBPFZeroCollector()) + registry.MustRegister(kprobemetrics.NewBPFZeroCollector()) } func initResourcesMetrics(registry *prometheus.Registry) {