From 20ba01cc3d1707d9b74360c6d095e9bc6095815b Mon Sep 17 00:00:00 2001 From: Lionel Jouin Date: Thu, 19 Oct 2023 14:46:55 +0200 Subject: [PATCH] Interface Metrics Collect kernel rx_packets, tx_packets, rx_bytes, tx_bytes, rx_errors, tx_errors, rx_dropped, tx_dropped metrics of the interfaces Stateless-lb collects the metrics for the interfaces being created via NSM. A new chain element has been implemented to watch/unwatch interfaces based on their name. Frontend collects the metrics of the interface that is passed as environemnt variable. --- cmd/frontend/internal/env/config.go | 2 + cmd/frontend/main.go | 46 ++ cmd/stateless-lb/main.go | 17 + .../deployment/stateless-lb-frontend.yaml | 5 + docs/observability/dashboard.json | 588 +++++++++++++++++- docs/observability/metrics.md | 2 +- pkg/kernel/metrics.go | 264 ++++++++ pkg/metrics/const.go | 8 + pkg/nsm/metrics/server.go | 63 ++ 9 files changed, 992 insertions(+), 3 deletions(-) create mode 100644 pkg/kernel/metrics.go create mode 100644 pkg/nsm/metrics/server.go diff --git a/cmd/frontend/internal/env/config.go b/cmd/frontend/internal/env/config.go index c4816825..83f9091f 100644 --- a/cmd/frontend/internal/env/config.go +++ b/cmd/frontend/internal/env/config.go @@ -44,4 +44,6 @@ type Config struct { DelayConnectivity time.Duration `default:"1s" desc:"Delay between checks with connectivity"` DelayNoConnectivity time.Duration `default:"3s" desc:"Delay between checks without connectivity"` MaxSessionErrors int `default:"5" desc:"Max session errors when checking Bird until denounce"` + MetricsEnabled bool `default:"false" desc:"Enable the metrics collection" split_words:"true"` + MetricsPort int `default:"2224" desc:"Specify the port used to expose the metrics" split_words:"true"` } diff --git a/cmd/frontend/main.go b/cmd/frontend/main.go index 6135ae30..832fc5dc 100644 --- a/cmd/frontend/main.go +++ b/cmd/frontend/main.go @@ -28,6 +28,8 @@ import ( "github.com/go-logr/logr" "github.com/kelseyhightower/envconfig" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" "google.golang.org/grpc" "google.golang.org/grpc/backoff" "google.golang.org/grpc/keepalive" @@ -38,7 +40,9 @@ import ( "github.com/nordix/meridio/cmd/frontend/internal/frontend" "github.com/nordix/meridio/pkg/health" "github.com/nordix/meridio/pkg/health/connection" + linuxKernel "github.com/nordix/meridio/pkg/kernel" "github.com/nordix/meridio/pkg/log" + "github.com/nordix/meridio/pkg/metrics" "github.com/nordix/meridio/pkg/retry" "github.com/nordix/meridio/pkg/security/credentials" ) @@ -85,6 +89,11 @@ func main() { ) defer cancel() + hostname, err := os.Hostname() + if err != nil { + log.Fatal(logger, "Unable to get hostname", "error", err) + } + // create and start health server ctx = health.CreateChecker(ctx) if err := health.RegisterReadinesSubservices(ctx, health.FEReadinessServices...); err != nil { @@ -155,6 +164,43 @@ func main() { // start watching events of interest via NSP go watchConfig(ctx, cancel, c, fe) + interfaceMetrics := linuxKernel.NewInterfaceMetrics([]metric.ObserveOption{ + metric.WithAttributes(attribute.String("Hostname", hostname)), + metric.WithAttributes(attribute.String("Trench", config.TrenchName)), + metric.WithAttributes(attribute.String("Attractor", config.AttractorName)), + }) + interfaceMetrics.Register(config.ExternalInterface) + + if config.MetricsEnabled { + func() { + _, err = metrics.Init(ctx) + if err != nil { + logger.Error(err, "Unable to init metrics collector") + cancel() + return + } + + err = interfaceMetrics.Collect() + if err != nil { + logger.Error(err, "Unable to start interface metrics collector") + cancel() + return + } + + metricsServer := metrics.Server{ + IP: "", + Port: config.MetricsPort, + } + go func() { + err := metricsServer.Start(ctx) + if err != nil { + logger.Error(err, "Unable to start metrics server") + cancel() + } + }() + }() + } + <-ctx.Done() logger.Info("FE shutting down") } diff --git a/cmd/stateless-lb/main.go b/cmd/stateless-lb/main.go index 8833f32e..7fa5c3be 100644 --- a/cmd/stateless-lb/main.go +++ b/cmd/stateless-lb/main.go @@ -56,10 +56,13 @@ import ( "github.com/nordix/meridio/pkg/networking" "github.com/nordix/meridio/pkg/nsm" "github.com/nordix/meridio/pkg/nsm/interfacemonitor" + nsmmetrics "github.com/nordix/meridio/pkg/nsm/metrics" "github.com/nordix/meridio/pkg/retry" "github.com/nordix/meridio/pkg/security/credentials" "github.com/sirupsen/logrus" "github.com/vishvananda/netlink" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" "google.golang.org/grpc" "google.golang.org/grpc/backoff" "google.golang.org/grpc/keepalive" @@ -177,6 +180,12 @@ func main() { log.Fatal(logger, "Unable to init lb target metrics", "error", err) } + interfaceMetrics := linuxKernel.NewInterfaceMetrics([]metric.ObserveOption{ + metric.WithAttributes(attribute.String("Hostname", hostname)), + metric.WithAttributes(attribute.String("Trench", config.TrenchName)), + metric.WithAttributes(attribute.String("Conduit", config.ConduitName)), + }) + lbFactory := nfqlb.NewLbFactory(nfqlb.WithNFQueue(config.Nfqueue)) nfa, err := nfqlb.NewNetfilterAdaptor(nfqlb.WithNFQueue(config.Nfqueue), nfqlb.WithNFQueueFanout(config.NfqueueFanout)) if err != nil { @@ -208,6 +217,7 @@ func main() { noop.MECHANISM: null.NewServer(), }), interfaceMonitorEndpoint, + nsmmetrics.NewServer(interfaceMetrics), sendfd.NewServer(), } @@ -295,6 +305,13 @@ func main() { return } + err = interfaceMetrics.Collect() + if err != nil { + logger.Error(err, "Unable to start interface metrics collector") + cancel() + return + } + metricsServer := metrics.Server{ IP: "", Port: config.MetricsPort, diff --git a/config/templates/charts/meridio/deployment/stateless-lb-frontend.yaml b/config/templates/charts/meridio/deployment/stateless-lb-frontend.yaml index 55b3e8ed..75ee7423 100644 --- a/config/templates/charts/meridio/deployment/stateless-lb-frontend.yaml +++ b/config/templates/charts/meridio/deployment/stateless-lb-frontend.yaml @@ -184,6 +184,9 @@ spec: - name: frontend image: {{ .Values.registry }}/{{ .Values.repository }}/{{ .Values.frontEnd.image }}:{{ .Values.version }} imagePullPolicy: # Kubernetes default according to image tag + ports: + - name: metrics + containerPort: 2224 startupProbe: # will be filled by operator if not specified exec: command: @@ -244,6 +247,8 @@ spec: value: # to be filled by operator - name: NFE_LOG_LEVEL value: # to be filled by operator + - name: NFE_METRICS_ENABLED + value: "true" securityContext: runAsNonRoot: true readOnlyRootFilesystem: true diff --git a/docs/observability/dashboard.json b/docs/observability/dashboard.json index 7a90f483..08875f47 100644 --- a/docs/observability/dashboard.json +++ b/docs/observability/dashboard.json @@ -18,7 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 29, + "id": 28, "links": [], "liveNow": false, "panels": [ @@ -641,6 +641,590 @@ "title": "Stream (bytes per second)", "transformations": [], "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 24, + "x": 0, + "y": 28 + }, + "id": 4, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "10.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "meridio_interface_rx_bytes_total", + "format": "table", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "rx bytes", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "meridio_interface_tx_bytes_total", + "format": "table", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "tx bytes", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "meridio_interface_rx_packets_total", + "format": "table", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "rx packets", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "meridio_interface_tx_packets_total", + "format": "table", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "tx packets", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "meridio_interface_rx_errors_total", + "format": "table", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "rx errors", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "meridio_interface_tx_errors_total", + "format": "table", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "tx errors", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "meridio_interface_rx_dropped_total", + "format": "table", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "rx dropped", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "meridio_interface_tx_dropped_total", + "format": "table", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "tx dropped", + "useBackend": false + } + ], + "title": "Interface Metrics", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "__name__": true, + "container": true, + "endpoint": true, + "instance": true, + "job": true, + "namespace": true, + "otel_scope_name": true, + "pod": true + }, + "indexByName": { + "Attractor": 1, + "Conduit": 2, + "Hostname": 3, + "Interface_Name": 4, + "Time": 5, + "Trench": 0, + "Value #rx bytes": 14, + "Value #rx dropped": 20, + "Value #rx errors": 18, + "Value #rx packets": 16, + "Value #tx bytes": 15, + "Value #tx dropped": 21, + "Value #tx errors": 19, + "Value #tx packets": 17, + "__name__": 6, + "container": 7, + "endpoint": 8, + "instance": 9, + "job": 10, + "namespace": 11, + "otel_scope_name": 12, + "pod": 13 + }, + "renameByName": {} + } + }, + { + "id": "groupBy", + "options": { + "fields": { + "Attractor": { + "aggregations": [], + "operation": "groupby" + }, + "Conduit": { + "aggregations": [], + "operation": "groupby" + }, + "Hostname": { + "aggregations": [], + "operation": "groupby" + }, + "Interface_Name": { + "aggregations": [], + "operation": "groupby" + }, + "Trench": { + "aggregations": [], + "operation": "groupby" + }, + "Value #rx bytes": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "Value #rx dropped": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "Value #rx errors": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "Value #rx packets": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "Value #tx bytes": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "Value #tx dropped": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "Value #tx errors": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "Value #tx packets": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + } + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "Value #rx bytes (lastNotNull)": "RX Bytes", + "Value #rx dropped (lastNotNull)": "RX Dropped", + "Value #rx errors (lastNotNull)": "RX Errors", + "Value #rx packets (lastNotNull)": "RX Packets", + "Value #tx bytes (lastNotNull)": "TX Bytes", + "Value #tx dropped (lastNotNull)": "TX Dropped", + "Value #tx errors (lastNotNull)": "TX Errors", + "Value #tx packets (lastNotNull)": "TX Packets" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 39 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "sum by(Trench, Conduit, Attractor) (rate(meridio_interface_rx_bytes_total[$__rate_interval]))", + "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "rx_bytes.{{Conduit}}{{Attractor}}.{{Trench}}", + "range": true, + "refId": "rx bytes", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "sum by(Trench, Conduit, Attractor) (rate(meridio_interface_tx_bytes_total[$__rate_interval]))", + "format": "time_series", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "tx_bytes.{{Conduit}}{{Attractor}}.{{Trench}}", + "range": true, + "refId": "tx bytes", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "sum by(Trench, Conduit, Attractor) (rate(meridio_interface_rx_packets_total[$__rate_interval]))", + "format": "time_series", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "rx_packets.{{Conduit}}{{Attractor}}.{{Trench}}", + "range": true, + "refId": "rx packets", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "sum by(Trench, Conduit, Attractor) (rate(meridio_interface_tx_packets_total[$__rate_interval]))", + "format": "time_series", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "tx_packets.{{Conduit}}{{Attractor}}.{{Trench}}", + "range": true, + "refId": "tx packets", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "sum by(Trench, Conduit, Attractor) (rate(meridio_interface_rx_errors_total[$__rate_interval]))", + "format": "time_series", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "rx_errors.{{Conduit}}{{Attractor}}.{{Trench}}", + "range": true, + "refId": "rx errors", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "sum by(Trench, Conduit, Attractor) (rate(meridio_interface_tx_errors_total[$__rate_interval]))", + "format": "time_series", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "tx_errors.{{Conduit}}{{Attractor}}.{{Trench}}", + "range": true, + "refId": "tx errors", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "sum by(Trench, Conduit, Attractor) (rate(meridio_interface_rx_dropped_total[$__rate_interval]))", + "format": "time_series", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "rx_dropped.{{Conduit}}{{Attractor}}.{{Trench}}", + "range": true, + "refId": "rx dropped", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "sum by(Trench, Conduit, Attractor) (rate(meridio_interface_tx_dropped_total[$__rate_interval]))", + "format": "time_series", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "tx_dropped.{{Conduit}}{{Attractor}}.{{Trench}}", + "range": true, + "refId": "tx dropped", + "useBackend": false + } + ], + "title": "Interface Metrics", + "transformations": [], + "type": "timeseries" } ], "refresh": "5s", @@ -670,6 +1254,6 @@ "timezone": "", "title": "Meridio", "uid": "f0339d9f-4744-441c-972b-f8b294fb7ff8", - "version": 2, + "version": 3, "weekStart": "" } \ No newline at end of file diff --git a/docs/observability/metrics.md b/docs/observability/metrics.md index 6b7db958..9e1d8b43 100644 --- a/docs/observability/metrics.md +++ b/docs/observability/metrics.md @@ -2,7 +2,7 @@ ## Metric List -### meridio.interface.`METRIC_TYPE` (Planned) +### meridio.interface.`METRIC_TYPE` `METRIC_TYPE`: rx_packets, tx_packets, rx_bytes, tx_bytes, rx_errors, tx_errors, rx_dropped, tx_dropped diff --git a/pkg/kernel/metrics.go b/pkg/kernel/metrics.go new file mode 100644 index 00000000..a72855b2 --- /dev/null +++ b/pkg/kernel/metrics.go @@ -0,0 +1,264 @@ +/* +Copyright (c) 2023 Nordix Foundation + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kernel + +import ( + "context" + "fmt" + "net" + "sync" + + meridioMetrics "github.com/nordix/meridio/pkg/metrics" + "github.com/vishvananda/netlink" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" +) + +type InterfaceMetrics struct { + meter metric.Meter + interfaces map[string]struct{} + metricAttributes []metric.ObserveOption + mu sync.Mutex +} + +func NewInterfaceMetrics(metricAttributes []metric.ObserveOption) *InterfaceMetrics { + meter := otel.GetMeterProvider().Meter(meridioMetrics.METER_NAME) + im := &InterfaceMetrics{ + meter: meter, + interfaces: map[string]struct{}{}, + metricAttributes: metricAttributes, + } + + return im +} + +func (im *InterfaceMetrics) Register(interfaceName string) { + im.mu.Lock() + defer im.mu.Unlock() + im.interfaces[interfaceName] = struct{}{} +} + +func (im *InterfaceMetrics) Unregister(interfaceName string) { + im.mu.Lock() + defer im.mu.Unlock() + delete(im.interfaces, interfaceName) +} + +// Collect collects the metrics for the interfaces. +func (im *InterfaceMetrics) Collect() error { + _, err := im.meter.Int64ObservableCounter( + meridioMetrics.MERIDIO_INTERFACE_RX_BYTES, + metric.WithUnit("bytes"), + metric.WithDescription("Counts number of received bytes for a network interface."), + metric.WithInt64Callback(func(ctx context.Context, observer metric.Int64Observer) error { + return im.observe( + ctx, + observer, + func(metrics *netlink.LinkStatistics) int64 { + return int64(metrics.RxBytes) + }, + ) + }), + ) + if err != nil { + return fmt.Errorf("interface metrics, failed to Int64ObservableCounter (%s): %w", meridioMetrics.MERIDIO_INTERFACE_RX_BYTES, err) + } + + _, err = im.meter.Int64ObservableCounter( + meridioMetrics.MERIDIO_INTERFACE_TX_BYTES, + metric.WithUnit("bytes"), + metric.WithDescription("Counts number of transfered bytes for a network interface."), + metric.WithInt64Callback(func(ctx context.Context, observer metric.Int64Observer) error { + return im.observe( + ctx, + observer, + func(metrics *netlink.LinkStatistics) int64 { + return int64(metrics.TxBytes) + }, + ) + }), + ) + if err != nil { + return fmt.Errorf("interface metrics, failed to Int64ObservableCounter (%s): %w", meridioMetrics.MERIDIO_INTERFACE_TX_BYTES, err) + } + + _, err = im.meter.Int64ObservableCounter( + meridioMetrics.MERIDIO_INTERFACE_RX_PACKETS, + metric.WithUnit("packets"), + metric.WithDescription("Counts number of received packets for a network interface."), + metric.WithInt64Callback(func(ctx context.Context, observer metric.Int64Observer) error { + return im.observe( + ctx, + observer, + func(metrics *netlink.LinkStatistics) int64 { + return int64(metrics.RxPackets) + }, + ) + }), + ) + if err != nil { + return fmt.Errorf("interface metrics, failed to Int64ObservableCounter (%s): %w", meridioMetrics.MERIDIO_INTERFACE_RX_PACKETS, err) + } + + _, err = im.meter.Int64ObservableCounter( + meridioMetrics.MERIDIO_INTERFACE_TX_PACKET, + metric.WithUnit("packets"), + metric.WithDescription("Counts number of transfered packets for a network interface."), + metric.WithInt64Callback(func(ctx context.Context, observer metric.Int64Observer) error { + return im.observe( + ctx, + observer, + func(metrics *netlink.LinkStatistics) int64 { + return int64(metrics.TxPackets) + }, + ) + }), + ) + if err != nil { + return fmt.Errorf("interface metrics, failed to Int64ObservableCounter (%s): %w", meridioMetrics.MERIDIO_INTERFACE_TX_PACKET, err) + } + + _, err = im.meter.Int64ObservableCounter( + meridioMetrics.MERIDIO_INTERFACE_RX_ERRORS, + metric.WithUnit("errors"), + metric.WithDescription("Counts number of received errors for a network interface."), + metric.WithInt64Callback(func(ctx context.Context, observer metric.Int64Observer) error { + return im.observe( + ctx, + observer, + func(metrics *netlink.LinkStatistics) int64 { + return int64(metrics.RxErrors) + }, + ) + }), + ) + if err != nil { + return fmt.Errorf("interface metrics, failed to Int64ObservableCounter (%s): %w", meridioMetrics.MERIDIO_INTERFACE_RX_ERRORS, err) + } + + _, err = im.meter.Int64ObservableCounter( + meridioMetrics.MERIDIO_INTERFACE_TX_ERRORS, + metric.WithUnit("errors"), + metric.WithDescription("Counts number of transfered errors for a network interface."), + metric.WithInt64Callback(func(ctx context.Context, observer metric.Int64Observer) error { + return im.observe( + ctx, + observer, + func(metrics *netlink.LinkStatistics) int64 { + return int64(metrics.TxErrors) + }, + ) + }), + ) + if err != nil { + return fmt.Errorf("interface metrics, failed to Int64ObservableCounter (%s): %w", meridioMetrics.MERIDIO_INTERFACE_TX_ERRORS, err) + } + + _, err = im.meter.Int64ObservableCounter( + meridioMetrics.MERIDIO_INTERFACE_RX_DROPPED, + metric.WithUnit("dropped"), + metric.WithDescription("Counts number of received dropped for a network interface."), + metric.WithInt64Callback(func(ctx context.Context, observer metric.Int64Observer) error { + return im.observe( + ctx, + observer, + func(metrics *netlink.LinkStatistics) int64 { + return int64(metrics.RxDropped) + }, + ) + }), + ) + if err != nil { + return fmt.Errorf("interface metrics, failed to Int64ObservableCounter (%s): %w", meridioMetrics.MERIDIO_INTERFACE_RX_DROPPED, err) + } + + _, err = im.meter.Int64ObservableCounter( + meridioMetrics.MERIDIO_INTERFACE_TX_DROPPED, + metric.WithUnit("dropped"), + metric.WithDescription("Counts number of transfered dropped for a network interface."), + metric.WithInt64Callback(func(ctx context.Context, observer metric.Int64Observer) error { + return im.observe( + ctx, + observer, + func(metrics *netlink.LinkStatistics) int64 { + return int64(metrics.TxDropped) + }, + ) + }), + ) + if err != nil { + return fmt.Errorf("interface metrics, failed to Int64ObservableCounter (%s): %w", meridioMetrics.MERIDIO_INTERFACE_TX_DROPPED, err) + } + + return nil +} + +func (im *InterfaceMetrics) observe(ctx context.Context, observer metric.Int64Observer, valueFunc func(*netlink.LinkStatistics) int64) error { + im.mu.Lock() + defer im.mu.Unlock() + + for interfaceName := range im.interfaces { + metricAttributes := []metric.ObserveOption{ + metric.WithAttributes(attribute.String("Interface Name", interfaceName)), + } + metricAttributes = append(metricAttributes, im.metricAttributes...) + link := getLinkByName(interfaceName) + if link == nil { + continue + } + metricAttributes = append(metricAttributes, metric.WithAttributes(attribute.String("MAC Address", link.Attrs().HardwareAddr.String()))) + metricAttributes = append(metricAttributes, metric.WithAttributes(attribute.StringSlice("IP Addresses", listIPs(link)))) + observer.Observe( + valueFunc(link.Attrs().Statistics), + metricAttributes..., + ) + } + return nil +} + +func getLinkByName(interfaceName string) netlink.Link { + link, err := netlink.LinkByName(interfaceName) + if err != nil || + link == nil || + link.Attrs() == nil || + link.Attrs().Flags&net.FlagUp != net.FlagUp { + return nil + } + return link +} + +func listIPs(link netlink.Link) []string { + res := []string{} + addresses, err := netlink.AddrList(link, netlink.FAMILY_ALL) + if err != nil { + return res + } + + for _, addr := range addresses { + if isLinkLocalIPv6(addr.IP) { + continue + } + res = append(res, addr.IPNet.String()) + } + + return res +} + +func isLinkLocalIPv6(ip net.IP) bool { + return ip.To4() == nil && (ip.IsLinkLocalMulticast() || ip.IsLinkLocalUnicast()) +} diff --git a/pkg/metrics/const.go b/pkg/metrics/const.go index 5c89f30c..2d4fadf3 100644 --- a/pkg/metrics/const.go +++ b/pkg/metrics/const.go @@ -20,6 +20,14 @@ const ( MERIDIO_CONDUIT_STREAM_FLOW_MATCHES = "meridio.conduit.stream.flow.matches" MERIDIO_CONDUIT_STREAM_TARGET_HITS_PACKETS = "meridio.conduit.stream.target.hits.packets" MERIDIO_CONDUIT_STREAM_TARGET_HITS_BYTES = "meridio.conduit.stream.target.hits.bytes" + MERIDIO_INTERFACE_RX_PACKETS = "meridio.interface.rx_packets" + MERIDIO_INTERFACE_TX_PACKET = "meridio.interface.tx_packets" + MERIDIO_INTERFACE_RX_BYTES = "meridio.interface.rx_bytes" + MERIDIO_INTERFACE_TX_BYTES = "meridio.interface.tx_bytes" + MERIDIO_INTERFACE_RX_ERRORS = "meridio.interface.rx_errors" + MERIDIO_INTERFACE_TX_ERRORS = "meridio.interface.tx_errors" + MERIDIO_INTERFACE_RX_DROPPED = "meridio.interface.rx_dropped" + MERIDIO_INTERFACE_TX_DROPPED = "meridio.interface.tx_dropped" METER_NAME = "Meridio" ) diff --git a/pkg/nsm/metrics/server.go b/pkg/nsm/metrics/server.go new file mode 100644 index 00000000..cafea48a --- /dev/null +++ b/pkg/nsm/metrics/server.go @@ -0,0 +1,63 @@ +/* +Copyright (c) 2023 Nordix Foundation + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ipcontext + +import ( + "context" + + "github.com/networkservicemesh/api/pkg/api/networkservice" + "github.com/networkservicemesh/api/pkg/api/networkservice/mechanisms/common" + "github.com/networkservicemesh/sdk/pkg/networkservice/core/next" + "github.com/nordix/meridio/pkg/kernel" + "google.golang.org/protobuf/types/known/emptypb" +) + +type metricsServer struct { + InterfaceMetrics *kernel.InterfaceMetrics +} + +// NewServer +func NewServer(interfaceMetrics *kernel.InterfaceMetrics) networkservice.NetworkServiceServer { + return &metricsServer{ + InterfaceMetrics: interfaceMetrics, + } +} + +// Request +func (ms *metricsServer) Request(ctx context.Context, request *networkservice.NetworkServiceRequest) (*networkservice.Connection, error) { + if request == nil || + request.Connection == nil || + request.Connection.GetMechanism() == nil || + request.Connection.GetMechanism().GetParameters() == nil { + return next.Server(ctx).Request(ctx, request) + } + interfaceName := request.Connection.GetMechanism().GetParameters()[common.InterfaceNameKey] + ms.InterfaceMetrics.Register(interfaceName) + return next.Server(ctx).Request(ctx, request) +} + +// Close +func (ms *metricsServer) Close(ctx context.Context, conn *networkservice.Connection) (*emptypb.Empty, error) { + if conn == nil || + conn.GetMechanism() == nil || + conn.GetMechanism().GetParameters() == nil { + return next.Server(ctx).Close(ctx, conn) + } + interfaceName := conn.GetMechanism().GetParameters()[common.InterfaceNameKey] + ms.InterfaceMetrics.Unregister(interfaceName) + return next.Server(ctx).Close(ctx, conn) +}