Skip to content

Commit

Permalink
Add metrics for created operands (#837)
Browse files Browse the repository at this point in the history
* Add metrics for created operands

Signed-off-by: Ruben Vargas <[email protected]>

* go mod tidy

Signed-off-by: Ruben Vargas <[email protected]>

* Add CHANGELOG entry

Signed-off-by: Ruben Vargas <[email protected]>

* Update internal/crdmetrics/tempo_stack_test.go

Co-authored-by: Israel Blancas <[email protected]>

* Fix linting issues

Signed-off-by: Ruben Vargas <[email protected]>

* Remove unused constants

Signed-off-by: Ruben Vargas <[email protected]>

* Add jaegerUI and multitenancy metrics

Signed-off-by: Ruben Vargas <[email protected]>

---------

Signed-off-by: Ruben Vargas <[email protected]>
Co-authored-by: Israel Blancas <[email protected]>
  • Loading branch information
rubenvp8510 and iblancasa authored Mar 16, 2024
1 parent eed0197 commit 4cdfd50
Show file tree
Hide file tree
Showing 9 changed files with 490 additions and 0 deletions.
16 changes: 16 additions & 0 deletions .chloggen/crd_metrics.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. operator, github action)
component: operator

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Add storage and managed operands gauge metric to the operator metrics.

# One or more tracking issues related to the change
issues: [838]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:
6 changes: 6 additions & 0 deletions cmd/start/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
configv1alpha1 "github.com/grafana/tempo-operator/apis/config/v1alpha1"
"github.com/grafana/tempo-operator/cmd"
controllers "github.com/grafana/tempo-operator/controllers/tempo"
"github.com/grafana/tempo-operator/internal/crdmetrics"
"github.com/grafana/tempo-operator/internal/upgrade"
"github.com/grafana/tempo-operator/internal/version"
"github.com/grafana/tempo-operator/internal/webhooks"
Expand Down Expand Up @@ -117,6 +118,11 @@ func start(c *cobra.Command, args []string) {
"go-os", runtime.GOOS,
)

if err := crdmetrics.Bootstrap(mgr.GetClient()); err != nil {
setupLog.Error(err, "problem init crd metrics")
os.Exit(1)
}

if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
setupLog.Error(err, "problem running manager")
os.Exit(1)
Expand Down
8 changes: 8 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ require (
github.com/prometheus/client_golang v1.19.0
github.com/prometheus/common v0.50.0
github.com/spf13/cobra v1.8.0
go.opentelemetry.io/otel v1.23.1
go.opentelemetry.io/otel/exporters/prometheus v0.45.2
go.opentelemetry.io/otel/metric v1.23.1
go.opentelemetry.io/otel/sdk/metric v1.23.1
go.uber.org/zap v1.27.0
gopkg.in/yaml.v2 v2.4.0
k8s.io/api v0.28.5
Expand All @@ -44,8 +48,10 @@ require (
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
github.com/evanphx/json-patch v5.6.0+incompatible // indirect
github.com/evanphx/json-patch/v5 v5.6.0 // indirect
github.com/fsnotify/fsnotify v1.6.0 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-openapi/jsonpointer v0.19.6 // indirect
github.com/go-openapi/jsonreference v0.20.2 // indirect
github.com/go-openapi/swag v0.22.3 // indirect
Expand All @@ -70,6 +76,8 @@ require (
github.com/prometheus/procfs v0.12.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/objx v0.5.2 // indirect
go.opentelemetry.io/otel/sdk v1.23.1 // indirect
go.opentelemetry.io/otel/trace v1.23.1 // indirect
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e // indirect
golang.org/x/net v0.22.0 // indirect
Expand Down
15 changes: 15 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,12 @@ github.com/evanphx/json-patch/v5 v5.6.0 h1:b91NhWfaz02IuVxO9faSllyAtNXHMPkC5J8sJ
github.com/evanphx/json-patch/v5 v5.6.0/go.mod h1:G79N1coSVB93tBe7j6PhzjmR3/2VvlbKOFpnXhI9Bw4=
github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ=
github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ=
github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg=
github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE=
Expand Down Expand Up @@ -135,6 +138,18 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.opentelemetry.io/otel v1.23.1 h1:Za4UzOqJYS+MUczKI320AtqZHZb7EqxO00jAHE0jmQY=
go.opentelemetry.io/otel v1.23.1/go.mod h1:Td0134eafDLcTS4y+zQ26GE8u3dEuRBiBCTUIRHaikA=
go.opentelemetry.io/otel/exporters/prometheus v0.45.2 h1:pe2Jqk1K18As0RCw7J08QhgXNqr+6npx0a5W4IgAFA8=
go.opentelemetry.io/otel/exporters/prometheus v0.45.2/go.mod h1:B38pscHKI6bhFS44FDw0eFU3iqG3ASNIvY+fZgR5sAc=
go.opentelemetry.io/otel/metric v1.23.1 h1:PQJmqJ9u2QaJLBOELl1cxIdPcpbwzbkjfEyelTl2rlo=
go.opentelemetry.io/otel/metric v1.23.1/go.mod h1:mpG2QPlAfnK8yNhNJAxDZruU9Y1/HubbC+KyH8FaCWI=
go.opentelemetry.io/otel/sdk v1.23.1 h1:O7JmZw0h76if63LQdsBMKQDWNb5oEcOThG9IrxscV+E=
go.opentelemetry.io/otel/sdk v1.23.1/go.mod h1:LzdEVR5am1uKOOwfBWFef2DCi1nu3SA8XQxx2IerWFk=
go.opentelemetry.io/otel/sdk/metric v1.23.1 h1:T9/8WsYg+ZqIpMWwdISVVrlGb/N0Jr1OHjR/alpKwzg=
go.opentelemetry.io/otel/sdk/metric v1.23.1/go.mod h1:8WX6WnNtHCgUruJ4TJ+UssQjMtpxkpX0zveQC8JG/E0=
go.opentelemetry.io/otel/trace v1.23.1 h1:4LrmmEd8AU2rFvU1zegmvqW7+kWarxtNOPyeL6HmYY8=
go.opentelemetry.io/otel/trace v1.23.1/go.mod h1:4IpnpJFwr1mo/6HL8XIPJaE9y0+u1KcVmuW7dwFSVrI=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
Expand Down
23 changes: 23 additions & 0 deletions internal/crdmetrics/bootstrap.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package crdmetrics

import (
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/exporters/prometheus"
"go.opentelemetry.io/otel/sdk/metric"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/metrics"
)

// Bootstrap configures the OpenTelemetry meter provider with the Prometheus exporter.
func Bootstrap(client client.Client) error {
exporter, err := prometheus.New(prometheus.WithRegisterer(metrics.Registry))
if err != nil {
return err
}
provider := metric.NewMeterProvider(metric.WithReader(exporter))
otel.SetMeterProvider(provider)
// Create metrics
tempoStackMetrics := newTempoStackMetrics(client)
err = tempoStackMetrics.Setup()
return err
}
15 changes: 15 additions & 0 deletions internal/crdmetrics/consts.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package crdmetrics

const (
meterName = "grafana/tempo-operator"
)

// Metric labels

const (
tempoStackMetricsPrefix = "tempo_operator_tempostack"
storageBackendMetric = "storage_backend"
managedMetric = "managed"
jaegerUIUsage = "jaeger_ui"
multitenancy = "multi_tenancy"
)
57 changes: 57 additions & 0 deletions internal/crdmetrics/instance_view.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package crdmetrics

import (
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
"sigs.k8s.io/controller-runtime/pkg/client"
)

type countFn func(instance client.Object) (string, bool)

// This structure contains the labels associated with the instances and a counter of the number of instances.
type instancesView struct {
Name string
Label string
Count map[string]int
Gauge metric.Int64ObservableGauge
KeyFn countFn
}

func (i *instancesView) reset() {
for k := range i.Count {
i.Count[k] = 0
}
}

func (i *instancesView) Record(instance client.Object) {
label, counted := i.KeyFn(instance)
if counted {
i.Count[label]++
}
}

func (i *instancesView) Report(observer metric.Observer) {
for key, count := range i.Count {
opt := metric.WithAttributes(
attribute.Key(i.Label).String(key),
)
observer.ObserveInt64(i.Gauge, int64(count), opt)
}
}

func newObservation(meter metric.Meter, name, desc, label string, keyFn countFn) (instancesView, error) {
observation := instancesView{
Name: name,
Count: make(map[string]int),
KeyFn: keyFn,
Label: label,
}

g, err := meter.Int64ObservableGauge(instanceMetricName(name), metric.WithDescription(desc))
if err != nil {
return instancesView{}, err
}

observation.Gauge = g
return observation, nil
}
119 changes: 119 additions & 0 deletions internal/crdmetrics/tempo_stack.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
package crdmetrics

import (
"context"
"fmt"
"strconv"

"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/metric"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/grafana/tempo-operator/apis/tempo/v1alpha1"
)

type tempoStackMetrics struct {
client client.Client
observations []instancesView
}

func instanceMetricName(name string) string {
return fmt.Sprintf("%s_%s", tempoStackMetricsPrefix, name)
}

func newTempoStackMetrics(client client.Client) *tempoStackMetrics {
return &tempoStackMetrics{
client: client,
}
}

func (i *tempoStackMetrics) Setup() error {
meter := otel.Meter(meterName)

obs, err := newObservation(meter,
storageBackendMetric,
"Number of instances per storage type",
"type",
func(instance client.Object) (string, bool) {
tempoStack := instance.(*v1alpha1.TempoStack)
return string(tempoStack.Spec.Storage.Secret.Type), true
})
if err != nil {
return err
}
i.observations = append(i.observations, obs)

obs, err = newObservation(meter,
managedMetric,
"Instances managed by the operator",
"state",
func(instance client.Object) (string, bool) {
tempoStack := instance.(*v1alpha1.TempoStack)
return string(tempoStack.Spec.ManagementState), true
})
if err != nil {
return err
}
i.observations = append(i.observations, obs)

obs, err = newObservation(meter,
jaegerUIUsage,
"Instances with jaeger UI enabled/disabled",
"enabled",
func(instance client.Object) (string, bool) {
tempoStack := instance.(*v1alpha1.TempoStack)
return strconv.FormatBool(tempoStack.Spec.Template.QueryFrontend.JaegerQuery.Enabled), true
})
if err != nil {
return err
}
i.observations = append(i.observations, obs)

obs, err = newObservation(meter,
multitenancy,
"Instances with multi-tenancy mode static/openshift/disabled",
"type",
func(instance client.Object) (string, bool) {
tempoStack := instance.(*v1alpha1.TempoStack)
if tempoStack.Spec.Tenants != nil && tempoStack.Spec.Tenants.Mode != "" {
return string(tempoStack.Spec.Tenants.Mode), true
}
return "disabled", true
})
if err != nil {
return err
}
i.observations = append(i.observations, obs)

instruments := make([]metric.Observable, 0, len(i.observations))
for _, o := range i.observations {
instruments = append(instruments, o.Gauge)
}
_, err = meter.RegisterCallback(i.callback, instruments...)
return err
}

func (i *tempoStackMetrics) callback(ctx context.Context, observer metric.Observer) error {
instances := &v1alpha1.TempoStackList{}
if err := i.client.List(ctx, instances); err == nil {

// Reset observations
for _, o := range i.observations {
o.reset()
}

for k := range instances.Items {
tempoStack := instances.Items[k]
for _, o := range i.observations {
o.Record(&tempoStack)
}
}
}

// Report metrics
for _, o := range i.observations {
o.Report(observer)
}

return nil
}
Loading

0 comments on commit 4cdfd50

Please sign in to comment.