diff --git a/controllers/secretproviderclasspodstatus_controller.go b/controllers/secretproviderclasspodstatus_controller.go index 8fbee8c14..2532fe7da 100644 --- a/controllers/secretproviderclasspodstatus_controller.go +++ b/controllers/secretproviderclasspodstatus_controller.go @@ -65,6 +65,7 @@ type SecretProviderClassPodStatusReconciler struct { writer client.Writer eventRecorder record.EventRecorder driverName string + reporter StatsReporter } // New creates a new SecretProviderClassPodStatusReconciler @@ -73,6 +74,10 @@ func New(driverName string, mgr manager.Manager, nodeID string) (*SecretProvider kubeClient := kubernetes.NewForConfigOrDie(mgr.GetConfig()) eventBroadcaster.StartRecordingToSink(&clientcorev1.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")}) recorder := eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: "csi-secrets-store-controller"}) + sr, err := newStatsReporter() + if err != nil { + return nil, err + } return &SecretProviderClassPodStatusReconciler{ Client: mgr.GetClient(), @@ -83,6 +88,7 @@ func New(driverName string, mgr manager.Manager, nodeID string) (*SecretProvider writer: mgr.GetClient(), eventRecorder: recorder, driverName: driverName, + reporter: sr, }, nil } @@ -217,7 +223,7 @@ func (r *SecretProviderClassPodStatusReconciler) ListOptionsLabelSelector() clie // +kubebuilder:rbac:groups="",resources=events,verbs=create;patch // +kubebuilder:rbac:groups="storage.k8s.io",resources=csidrivers,verbs=get;list;watch,resourceNames=secrets-store.csi.k8s.io -func (r *SecretProviderClassPodStatusReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { +func (r *SecretProviderClassPodStatusReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res ctrl.Result, e error) { r.mutex.Lock() defer r.mutex.Unlock() @@ -266,6 +272,18 @@ func (r *SecretProviderClassPodStatusReconciler) Reconcile(ctx context.Context, return ctrl.Result{}, nil } + begin := time.Now() + providerName := string(spc.Spec.Provider) + namespace := spcPodStatus.Namespace + secretProviderClass := spc.Name + defer func() { + // if there is SecretObjects defined in the SPC, then report the metric if sync is successful + if e == nil && !res.Requeue { + r.reporter.ReportSyncSecretCtMetric(ctx, providerName, namespace, secretProviderClass) + r.reporter.ReportSyncSecretDuration(ctx, time.Since(begin).Seconds()) + } + }() + // determine which pod volume this is associated with podVol := k8sutil.SPCVolume(pod, r.driverName, spc.Name) if podVol == nil { diff --git a/controllers/stats_reporter.go b/controllers/stats_reporter.go new file mode 100644 index 000000000..fb73f1e6a --- /dev/null +++ b/controllers/stats_reporter.go @@ -0,0 +1,79 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/metric/global" + "runtime" +) + +const ( + scope = "sigs.k8s.io/secrets-store-csi-driver" +) + +var ( + providerKey = "provider" + osTypeKey = "os_type" + runtimeOS = runtime.GOOS + namespaceKey = "namespace" + spcKey = "secret_provider_class" +) + +type reporter struct { + syncK8sSecretTotal metric.Int64Counter + syncK8sSecretDuration metric.Float64Histogram +} + +type StatsReporter interface { + ReportSyncSecretCtMetric(ctx context.Context, provider, namespace, spc string) + ReportSyncSecretDuration(ctx context.Context, duration float64) +} + +func newStatsReporter() (StatsReporter, error) { + var err error + + r := &reporter{} + meter := global.Meter(scope) + + if r.syncK8sSecretTotal, err = meter.Int64Counter("sync_k8s_secret", metric.WithDescription("Total number of k8s secrets synced")); err != nil { + return nil, err + } + if r.syncK8sSecretDuration, err = meter.Float64Histogram("sync_k8s_secret_duration_sec", metric.WithDescription("Distribution of how long it took to sync k8s secret")); err != nil { + return nil, err + } + return r, nil +} + +func (r reporter) ReportSyncSecretCtMetric(ctx context.Context, provider, namespace, spc string) { + opt := metric.WithAttributes( + attribute.Key(providerKey).String(provider), + attribute.Key(osTypeKey).String(runtimeOS), + attribute.Key(namespaceKey).String(namespace), + attribute.Key(spcKey).String(spc), + ) + r.syncK8sSecretTotal.Add(ctx, 1, opt) +} + +func (r reporter) ReportSyncSecretDuration(ctx context.Context, duration float64) { + opt := metric.WithAttributes( + attribute.Key(osTypeKey).String(runtimeOS), + ) + r.syncK8sSecretDuration.Record(ctx, duration, opt) +} diff --git a/docs/book/src/topics/metrics.md b/docs/book/src/topics/metrics.md index a65f7bced..675dd6366 100644 --- a/docs/book/src/topics/metrics.md +++ b/docs/book/src/topics/metrics.md @@ -12,6 +12,8 @@ Prometheus is the only exporter that's currently supported with the driver. | node_unpublish_total | Total number of successful volume unmount requests | `os_type=` | | node_publish_error_total | Total number of errors with volume mount requests | `os_type=`
`provider=`
`error_type=`
`pod_name=`
`pod_namespace=`
`secret_provider_class=` | | node_unpublish_error_total | Total number of errors with volume unmount requests | `os_type=` | +| sync_k8s_secret_total | Total number of k8s secrets synced | `os_type=`
`provider=`
`namespace=`
`secret_provider_class=` | +| sync_k8s_secret_duration_sec | Distribution of how long it took to sync k8s secret | `os_type=` | | rotation_reconcile_total | Total number of rotation reconciles | `os_type=`
`rotated=`
`pod_name=`
`pod_namespace=`
`secret_provider_class=` | | rotation_reconcile_error_total | Total number of rotation reconciles with error | `os_type=`
`rotated=`
`error_type=`
`pod_name=`
`pod_namespace=`
`secret_provider_class=` | | rotation_reconcile_duration_sec | Distribution of how long it took to rotate secrets-store content for pods | `os_type=`
`pod_name=`
`pod_namespace=`
`secret_provider_class=` | @@ -26,6 +28,30 @@ curl localhost:8095/metrics ### Sample Metrics output ```shell +# HELP sync_k8s_secret_duration_sec Distribution of how long it took to sync k8s secret +# TYPE sync_k8s_secret_duration_sec histogram +sync_k8s_secret_duration_sec_bucket{os_type="linux",le="0.1"} 0 +sync_k8s_secret_duration_sec_bucket{os_type="linux",le="0.2"} 0 +sync_k8s_secret_duration_sec_bucket{os_type="linux",le="0.3"} 0 +sync_k8s_secret_duration_sec_bucket{os_type="linux",le="0.4"} 1 +sync_k8s_secret_duration_sec_bucket{os_type="linux",le="0.5"} 1 +sync_k8s_secret_duration_sec_bucket{os_type="linux",le="1"} 1 +sync_k8s_secret_duration_sec_bucket{os_type="linux",le="1.5"} 1 +sync_k8s_secret_duration_sec_bucket{os_type="linux",le="2"} 1 +sync_k8s_secret_duration_sec_bucket{os_type="linux",le="2.5"} 1 +sync_k8s_secret_duration_sec_bucket{os_type="linux",le="3"} 1 +sync_k8s_secret_duration_sec_bucket{os_type="linux",le="5"} 1 +sync_k8s_secret_duration_sec_bucket{os_type="linux",le="10"} 1 +sync_k8s_secret_duration_sec_bucket{os_type="linux",le="15"} 1 +sync_k8s_secret_duration_sec_bucket{os_type="linux",le="30"} 1 +sync_k8s_secret_duration_sec_bucket{os_type="linux",le="+Inf"} 1 +sync_k8s_secret_duration_sec_sum{os_type="linux"} 0.3115892 +sync_k8s_secret_duration_sec_count{os_type="linux"} 1 + +# HELP sync_k8s_secret_total Total number of k8s secrets synced +# TYPE sync_k8s_secret_total counter +sync_k8s_secret_total{namespace="csi-test-secret-ns",os_type="linux",provider="azure",secret_provider_class="csi-test-spc"} 1 + # HELP rotation_reconcile_duration_sec Distribution of how long it took to rotate secrets-store content for pods # TYPE rotation_reconcile_duration_sec histogram rotation_reconcile_duration_sec_bucket{os_type="linux",le="0.1"} 0 diff --git a/pkg/secrets-store/mocks/stats_reporter_mock.go b/pkg/secrets-store/mocks/stats_reporter_mock.go index 5af4afaba..c2aa00722 100644 --- a/pkg/secrets-store/mocks/stats_reporter_mock.go +++ b/pkg/secrets-store/mocks/stats_reporter_mock.go @@ -31,8 +31,6 @@ type FakeReporter struct { reportNodeUnPublishCtMetricInvoked int reportNodePublishErrorCtMetricInvoked int reportNodeUnPublishErrorCtMetricInvoked int - reportSyncK8SecretCtMetricInvoked int - reportSyncK8SecretDurationInvoked int metricDetails []MetricDetails } @@ -71,14 +69,6 @@ func (f *FakeReporter) ReportNodeUnPublishErrorCtMetric(ctx context.Context) { f.reportNodeUnPublishErrorCtMetricInvoked++ } -func (f *FakeReporter) ReportSyncK8SecretCtMetric(ctx context.Context, provider, podName, podNamespace, spc string, count int) { - f.reportSyncK8SecretCtMetricInvoked++ -} - -func (f *FakeReporter) ReportSyncK8SecretDuration(ctx context.Context, duration float64) { - f.reportSyncK8SecretDurationInvoked++ -} - func (f *FakeReporter) ReportNodePublishCtMetricInvoked() int { return f.reportNodePublishCtMetricInvoked } @@ -91,12 +81,6 @@ func (f *FakeReporter) ReportNodePublishErrorCtMetricInvoked() int { func (f *FakeReporter) ReportNodeUnPublishErrorCtMetricInvoked() int { return f.reportNodeUnPublishErrorCtMetricInvoked } -func (f *FakeReporter) ReportSyncK8SecretCtMetricInvoked() int { - return f.reportSyncK8SecretCtMetricInvoked -} -func (f *FakeReporter) ReportSyncK8SecretDurationInvoked() int { - return f.reportSyncK8SecretDurationInvoked -} func (f *FakeReporter) GetMetricDetails() []MetricDetails { return f.metricDetails diff --git a/pkg/secrets-store/stats_reporter.go b/pkg/secrets-store/stats_reporter.go index 57c7b8629..a8559c913 100644 --- a/pkg/secrets-store/stats_reporter.go +++ b/pkg/secrets-store/stats_reporter.go @@ -44,8 +44,6 @@ type reporter struct { nodeUnPublishTotal metric.Int64Counter nodePublishErrorTotal metric.Int64Counter nodeUnPublishErrorTotal metric.Int64Counter - syncK8sSecretTotal metric.Int64Counter - syncK8sSecretDuration metric.Float64Histogram } type StatsReporter interface { @@ -53,8 +51,6 @@ type StatsReporter interface { ReportNodeUnPublishCtMetric(ctx context.Context) ReportNodePublishErrorCtMetric(ctx context.Context, provider, podName, podNamespace, spc, errType string) ReportNodeUnPublishErrorCtMetric(ctx context.Context) - ReportSyncK8SecretCtMetric(ctx context.Context, provider, podName, podNamespace, spc string, count int) - ReportSyncK8SecretDuration(ctx context.Context, duration float64) } func NewStatsReporter() (StatsReporter, error) { @@ -75,12 +71,6 @@ func NewStatsReporter() (StatsReporter, error) { if r.nodeUnPublishErrorTotal, err = meter.Int64Counter("node_unpublish_error", metric.WithDescription("Total number of node unpublish calls with error")); err != nil { return nil, err } - if r.syncK8sSecretTotal, err = meter.Int64Counter("sync_k8s_secret", metric.WithDescription("Total number of k8s secrets synced")); err != nil { - return nil, err - } - if r.syncK8sSecretDuration, err = meter.Float64Histogram("k8s_secret_duration_sec", metric.WithDescription("Distribution of how long it took to sync k8s secret")); err != nil { - return nil, err - } return r, nil } @@ -120,21 +110,3 @@ func (r *reporter) ReportNodeUnPublishErrorCtMetric(ctx context.Context) { ) r.nodeUnPublishErrorTotal.Add(ctx, 1, opt) } - -func (r *reporter) ReportSyncK8SecretCtMetric(ctx context.Context, provider, podName, podNamespace, spc string, count int) { - opt := metric.WithAttributes( - attribute.Key(providerKey).String(provider), - attribute.Key(osTypeKey).String(runtimeOS), - attribute.Key(podNameKey).String(podName), - attribute.Key(podNamespaceKey).String(podNamespace), - attribute.Key(spcKey).String(spc), - ) - r.syncK8sSecretTotal.Add(ctx, int64(count), opt) -} - -func (r *reporter) ReportSyncK8SecretDuration(ctx context.Context, duration float64) { - opt := metric.WithAttributes( - attribute.Key(osTypeKey).String(runtimeOS), - ) - r.syncK8sSecretDuration.Record(ctx, duration, opt) -} diff --git a/test/bats/e2e-provider.bats b/test/bats/e2e-provider.bats index f3e45c650..1f95a13ea 100644 --- a/test/bats/e2e-provider.bats +++ b/test/bats/e2e-provider.bats @@ -428,6 +428,7 @@ export VALIDATE_TOKENS_AUDIENCE=$(get_token_requests_audience) assert_match "node_publish_total" "${output}" assert_match "node_unpublish_total" "${output}" assert_match "rotation_reconcile_total" "${output}" + assert_match "sync_k8s_secret_total" "${output}" done }