Skip to content

Commit

Permalink
Create ServiceMonitor for operator metrics programmatically (#3371)
Browse files Browse the repository at this point in the history
* Create ServiceMonitor for operator metrics programmatically

Signed-off-by: Israel Blancas <[email protected]>

* Apply changes requested in CR

Signed-off-by: Israel Blancas <[email protected]>

* Apply changes requested in CR

Signed-off-by: Israel Blancas <[email protected]>

---------

Signed-off-by: Israel Blancas <[email protected]>
  • Loading branch information
iblancasa authored Nov 8, 2024
1 parent 0ff706a commit 24ead74
Show file tree
Hide file tree
Showing 17 changed files with 411 additions and 35 deletions.
19 changes: 19 additions & 0 deletions .chloggen/3370-create-dynamic-sm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. collector, target allocator, auto-instrumentation, opamp, github action)
component: operator

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Programmatically create the `ServiceMonitor` for the operator metrics endpoint, ensuring correct namespace handling and dynamic configuration.

# One or more tracking issues related to the change
issues: [3370]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext: |
Previously, the `ServiceMonitor` was created statically from a manifest file, causing failures when the
operator was deployed in a non-default namespace. This enhancement ensures automatic adjustment of the
`serverName` and seamless metrics scraping.
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ metadata:
categories: Logging & Tracing,Monitoring
certified: "false"
containerImage: ghcr.io/open-telemetry/opentelemetry-operator/opentelemetry-operator
createdAt: "2024-10-30T17:23:26Z"
createdAt: "2024-10-31T08:49:00Z"
description: Provides the OpenTelemetry components, including the Collector
operators.operatorframework.io/builder: operator-sdk-v1.29.0
operators.operatorframework.io/project_layout: go.kubebuilder.io/v3
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
apiVersion: v1
kind: Service
metadata:
annotations:
service.beta.openshift.io/serving-cert-secret-name: opentelemetry-operator-metrics
creationTimestamp: null
labels:
app.kubernetes.io/name: opentelemetry-operator
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
app.kubernetes.io/managed-by: operator-lifecycle-manager
app.kubernetes.io/name: opentelemetry-operator
app.kubernetes.io/part-of: opentelemetry-operator
name: opentelemetry-operator-prometheus-rules
spec:
groups:
- name: opentelemetry-operator-monitoring.rules
rules:
- expr: sum by (type) (opentelemetry_collector_receivers)
record: type:opentelemetry_collector_receivers:sum
- expr: sum by (type) (opentelemetry_collector_exporters)
record: type:opentelemetry_collector_exporters:sum
- expr: sum by (type) (opentelemetry_collector_processors)
record: type:opentelemetry_collector_processors:sum
- expr: sum by (type) (opentelemetry_collector_extensions)
record: type:opentelemetry_collector_extensions:sum
- expr: sum by (type) (opentelemetry_collector_connectors)
record: type:opentelemetry_collector_connectors:sum
- expr: sum by (type) (opentelemetry_collector_info)
record: type:opentelemetry_collector_info:sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: opentelemetry-operator-prometheus
rules:
- apiGroups:
- ""
resources:
- services
- endpoints
- pods
verbs:
- get
- list
- watch
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: opentelemetry-operator-prometheus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: opentelemetry-operator-prometheus
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: openshift-monitoring
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ metadata:
categories: Logging & Tracing,Monitoring
certified: "false"
containerImage: ghcr.io/open-telemetry/opentelemetry-operator/opentelemetry-operator
createdAt: "2024-10-30T17:23:26Z"
createdAt: "2024-10-31T08:49:04Z"
description: Provides the OpenTelemetry components, including the Collector
operators.operatorframework.io/builder: operator-sdk-v1.29.0
operators.operatorframework.io/project_layout: go.kubebuilder.io/v3
Expand Down Expand Up @@ -479,9 +479,9 @@ spec:
- --zap-time-encoding=rfc3339nano
- --enable-nginx-instrumentation=true
- --enable-go-instrumentation=true
- --enable-multi-instrumentation=true
- --openshift-create-dashboard=true
- --feature-gates=+operator.observability.prometheus
- --enable-cr-metrics=true
env:
- name: SERVICE_ACCOUNT_NAME
valueFrom:
Expand Down Expand Up @@ -518,6 +518,10 @@ spec:
- --upstream=http://127.0.0.1:8080/
- --logtostderr=true
- --v=0
- --tls-cert-file=/var/run/tls/server/tls.crt
- --tls-private-key-file=/var/run/tls/server/tls.key
- --tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,TLS_RSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_CBC_SHA256
- --tls-min-version=VersionTLS12
image: gcr.io/kubebuilder/kube-rbac-proxy:v0.13.1
name: kube-rbac-proxy
ports:
Expand All @@ -531,9 +535,16 @@ spec:
requests:
cpu: 5m
memory: 64Mi
volumeMounts:
- mountPath: /var/run/tls/server
name: opentelemetry-operator-metrics-cert
serviceAccountName: opentelemetry-operator-controller-manager
terminationGracePeriodSeconds: 10
volumes:
- name: opentelemetry-operator-metrics-cert
secret:
defaultMode: 420
secretName: opentelemetry-operator-metrics
- name: cert
secret:
defaultMode: 420
Expand Down
2 changes: 0 additions & 2 deletions config/default/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ bases:
- ../manager
- ../webhook
- ../certmanager
# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'.
#- ../prometheus

patchesStrategicMerge:
# Protect the /metrics endpoint by putting it behind auth.
Expand Down
4 changes: 4 additions & 0 deletions config/overlays/openshift/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,7 @@ patches:
kind: Deployment
name: controller-manager
path: manager-patch.yaml

patchesStrategicMerge:
- metrics_service_tls_patch.yaml
- manager_auth_proxy_tls_patch.yaml
2 changes: 1 addition & 1 deletion config/overlays/openshift/manager-patch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@
- --zap-time-encoding=rfc3339nano
- --enable-nginx-instrumentation=true
- '--enable-go-instrumentation=true'
- '--enable-multi-instrumentation=true'
- '--openshift-create-dashboard=true'
- '--feature-gates=+operator.observability.prometheus'
- '--enable-cr-metrics=true'
29 changes: 29 additions & 0 deletions config/overlays/openshift/manager_auth_proxy_tls_patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: controller-manager
namespace: system
spec:
template:
spec:
containers:
- name: manager # without this line, kustomize reorders the containers, making kube-rbac-proxy the default container
- name: kube-rbac-proxy
args:
- "--secure-listen-address=0.0.0.0:8443"
- "--upstream=http://127.0.0.1:8080/"
- "--logtostderr=true"
- "--v=0"
- "--tls-cert-file=/var/run/tls/server/tls.crt"
- "--tls-private-key-file=/var/run/tls/server/tls.key"
- "--tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,TLS_RSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_CBC_SHA256"
- "--tls-min-version=VersionTLS12"
volumeMounts:
- mountPath: /var/run/tls/server
name: opentelemetry-operator-metrics-cert
volumes:
- name: opentelemetry-operator-metrics-cert
secret:
defaultMode: 420
# secret generated by the 'service.beta.openshift.io/serving-cert-secret-name' annotation on the metrics-service
secretName: opentelemetry-operator-metrics
7 changes: 7 additions & 0 deletions config/overlays/openshift/metrics_service_tls_patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: v1
kind: Service
metadata:
annotations:
service.beta.openshift.io/serving-cert-secret-name: opentelemetry-operator-metrics
name: controller-manager-metrics-service
namespace: system
2 changes: 0 additions & 2 deletions config/prometheus/kustomization.yaml

This file was deleted.

26 changes: 0 additions & 26 deletions config/prometheus/monitor.yaml

This file was deleted.

145 changes: 145 additions & 0 deletions internal/operator-metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
// Copyright The OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package operatormetrics

import (
"context"
"fmt"
"os"

monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/client-go/rest"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/manager"
)

var (
// namespaceFile is the path to the namespace file for the service account.
namespaceFile = "/var/run/secrets/kubernetes.io/serviceaccount/namespace"

// caBundleConfigMap declares the name of the config map for the CA bundle.
caBundleConfigMap = "serving-certs-ca-bundle"

// prometheusCAFile declares the path for prometheus CA file for service monitors in OpenShift.
prometheusCAFile = fmt.Sprintf("/etc/prometheus/configmaps/%s/service-ca.crt", caBundleConfigMap)

// nolint #nosec
// bearerTokenFile declares the path for bearer token file for service monitors.
bearerTokenFile = "/var/run/secrets/kubernetes.io/serviceaccount/token"

// openshiftInClusterMonitoringNamespace declares the namespace for the OpenShift in-cluster monitoring.
openshiftInClusterMonitoringNamespace = "openshift-monitoring"
)

var _ manager.Runnable = &OperatorMetrics{}

type OperatorMetrics struct {
kubeClient client.Client
}

func NewOperatorMetrics(config *rest.Config, scheme *runtime.Scheme) (OperatorMetrics, error) {
kubeClient, err := client.New(config, client.Options{Scheme: scheme})
if err != nil {
return OperatorMetrics{}, err
}

return OperatorMetrics{
kubeClient: kubeClient,
}, nil
}

func (om OperatorMetrics) Start(ctx context.Context) error {
rawNamespace, err := os.ReadFile(namespaceFile)
if err != nil {
return fmt.Errorf("error reading namespace file: %w", err)
}
namespace := string(rawNamespace)

var tlsConfig *monitoringv1.TLSConfig

if om.caConfigMapExists() {
serviceName := fmt.Sprintf("opentelemetry-operator-controller-manager-metrics-service.%s.svc", namespace)

tlsConfig = &monitoringv1.TLSConfig{
CAFile: prometheusCAFile,
SafeTLSConfig: monitoringv1.SafeTLSConfig{
ServerName: &serviceName,
},
}
} else {
t := true
tlsConfig = &monitoringv1.TLSConfig{
SafeTLSConfig: monitoringv1.SafeTLSConfig{
// kube-rbac-proxy uses a self-signed cert by default
InsecureSkipVerify: &t,
},
}
}

sm := monitoringv1.ServiceMonitor{
ObjectMeta: metav1.ObjectMeta{
Name: "opentelemetry-operator-metrics-monitor",
Namespace: namespace,
Labels: map[string]string{
"app.kubernetes.io/name": "opentelemetry-operator",
"app.kubernetes.io/part-of": "opentelemetry-operator",
"control-plane": "controller-manager",
},
},
Spec: monitoringv1.ServiceMonitorSpec{
Selector: metav1.LabelSelector{
MatchLabels: map[string]string{
"app.kubernetes.io/name": "opentelemetry-operator",
},
},
Endpoints: []monitoringv1.Endpoint{
{
BearerTokenFile: bearerTokenFile,
Interval: "30s",
Path: "/metrics",
Scheme: "https",
ScrapeTimeout: "10s",
TargetPort: &intstr.IntOrString{IntVal: 8443},
TLSConfig: tlsConfig,
},
},
},
}

err = om.kubeClient.Create(ctx, &sm)
if err != nil {
return fmt.Errorf("error creating service monitor: %w", err)
}

<-ctx.Done()

return om.kubeClient.Delete(ctx, &sm)
}

func (om OperatorMetrics) NeedLeaderElection() bool {
return true
}

func (om OperatorMetrics) caConfigMapExists() bool {
return om.kubeClient.Get(context.Background(), client.ObjectKey{
Name: caBundleConfigMap,
Namespace: openshiftInClusterMonitoringNamespace,
}, &corev1.ConfigMap{},
) == nil
}
Loading

0 comments on commit 24ead74

Please sign in to comment.