diff --git a/cmd/thanos/receive.go b/cmd/thanos/receive.go index d29ab94728..fcc08eb328 100644 --- a/cmd/thanos/receive.go +++ b/cmd/thanos/receive.go @@ -290,6 +290,8 @@ func runReceive( AsyncForwardWorkerCount: conf.asyncForwardWorkerCount, ReplicationProtocol: receive.ReplicationProtocol(conf.replicationProtocol), + OtlpEnableTargetInfo: conf.otlpEnableTargetInfo, + OtlpResourceAttributes: conf.otlpResourceAttributes, }) grpcProbe := prober.NewGRPC() @@ -909,6 +911,8 @@ type receiveConfig struct { headExpandedPostingsCacheSize uint64 compactedBlocksExpandedPostingsCacheSize uint64 + otlpEnableTargetInfo bool + otlpResourceAttributes []string } func (rc *receiveConfig) registerFlag(cmd extkingpin.FlagClause) { @@ -1066,6 +1070,9 @@ func (rc *receiveConfig) registerFlag(cmd extkingpin.FlagClause) { cmd.Flag("receive.limits-config-reload-timer", "Minimum amount of time to pass for the limit configuration to be reloaded. Helps to avoid excessive reloads."). Default("1s").Hidden().DurationVar(&rc.limitsConfigReloadTimer) + cmd.Flag("receive.otlp-enable-target-info", "Enables target information in OTLP metrics ingested by Receive. If enabled, it converts the resource to the target info metric").Default("true").BoolVar(&rc.otlpEnableTargetInfo) + cmd.Flag("receive.otlp-promote-resource-attributes", "(Repeatable) Resource attributes to include in OTLP metrics ingested by Receive.").Default("").StringsVar(&rc.otlpResourceAttributes) + rc.featureList = cmd.Flag("enable-feature", "Comma separated experimental feature names to enable. The current list of features is "+metricNamesFilter+".").Default("").Strings() } diff --git a/docs/components/receive.md b/docs/components/receive.md index be9332ee46..0c18f22c0f 100644 --- a/docs/components/receive.md +++ b/docs/components/receive.md @@ -331,7 +331,7 @@ Please see the metric `thanos_receive_forward_delay_seconds` to see if you need The following formula is used for calculating quorum: -```go mdox-exec="sed -n '1015,1025p' pkg/receive/handler.go" +```go mdox-exec="sed -n '1029,1039p' pkg/receive/handler.go" // writeQuorum returns minimum number of replicas that has to confirm write success before claiming replication success. func (h *Handler) writeQuorum() int { // NOTE(GiedriusS): this is here because otherwise RF=2 doesn't make sense as all writes @@ -458,6 +458,13 @@ Flags: configuration. If it's empty AND hashring configuration was provided, it means that receive will run in RoutingOnly mode. + --receive.otlp-enable-target-info + Enables target information in OTLP metrics + ingested by Receive. If enabled, it converts + the resource to the target info metric + --receive.otlp-promote-resource-attributes= ... + (Repeatable) Resource attributes to include in + OTLP metrics ingested by Receive. --receive.relabel-config= Alternative to 'receive.relabel-config-file' flag (mutually exclusive). Content of YAML file diff --git a/go.mod b/go.mod index dd5cd1ce44..fa73b8b499 100644 --- a/go.mod +++ b/go.mod @@ -114,8 +114,55 @@ require ( github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0 // indirect github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.3.0 // indirect github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 // indirect - github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.32.3 // indirect +) + +require ( + go.opentelemetry.io/collector/pdata v1.14.1 + go.opentelemetry.io/collector/semconv v0.108.1 +) + +require github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165 // indirect + +require ( github.com/HdrHistogram/hdrhistogram-go v1.1.2 // indirect + github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3 // indirect + github.com/cilium/ebpf v0.11.0 // indirect + github.com/containerd/cgroups/v3 v3.0.3 // indirect + github.com/docker/go-units v0.5.0 // indirect + github.com/elastic/go-licenser v0.3.1 // indirect + github.com/go-ini/ini v1.67.0 // indirect + github.com/go-openapi/runtime v0.27.1 // indirect + github.com/goccy/go-json v0.10.3 // indirect + github.com/godbus/dbus/v5 v5.0.4 // indirect + github.com/golang-jwt/jwt/v5 v5.2.1 // indirect + github.com/google/s2a-go v0.1.8 // indirect + github.com/huaweicloud/huaweicloud-sdk-go-obs v3.23.3+incompatible // indirect + github.com/jcchavezs/porto v0.1.0 // indirect + github.com/leesper/go_rng v0.0.0-20190531154944-a612b043e353 // indirect + github.com/mdlayher/socket v0.4.1 // indirect + github.com/mdlayher/vsock v1.2.1 // indirect + github.com/metalmatze/signal v0.0.0-20210307161603-1c9aa721a97a // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/onsi/ginkgo v1.16.5 // indirect + github.com/opencontainers/runtime-spec v1.0.2 // indirect + github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect + github.com/sercand/kuberesolver/v4 v4.0.0 // indirect + github.com/zhangyunhao116/umap v0.0.0-20221211160557-cb7705fafa39 // indirect + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0 // indirect + go.opentelemetry.io/contrib/propagators/ot v1.29.0 // indirect + go4.org/unsafe/assume-no-moving-gc v0.0.0-20230525183740-e7c30c78aeb2 // indirect + golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20240827150818-7e3bb234dfed // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect + k8s.io/apimachinery v0.31.1 // indirect + k8s.io/client-go v0.31.0 // indirect + k8s.io/klog/v2 v2.130.1 // indirect + k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect + zenhack.net/go/util v0.0.0-20230414204917-531d38494cf5 // indirect +) + +require ( + github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.32.3 // indirect github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a // indirect github.com/aliyun/aliyun-oss-go-sdk v2.2.2+incompatible // indirect @@ -134,24 +181,17 @@ require ( github.com/aws/aws-sdk-go-v2/service/sts v1.16.1 // indirect github.com/aws/smithy-go v1.11.1 // indirect github.com/baidubce/bce-sdk-go v0.9.111 // indirect - github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/chromedp/sysutil v1.0.0 // indirect - github.com/cilium/ebpf v0.11.0 // indirect github.com/clbanning/mxj v1.8.4 // indirect - github.com/containerd/cgroups/v3 v3.0.3 // indirect github.com/coreos/go-systemd/v22 v22.5.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/dennwc/varint v1.0.0 // indirect - github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165 // indirect - github.com/docker/go-units v0.5.0 // indirect github.com/edsrzf/mmap-go v1.1.0 // indirect - github.com/elastic/go-licenser v0.3.1 // indirect github.com/elastic/go-sysinfo v1.8.1 // indirect github.com/elastic/go-windows v1.0.1 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect - github.com/go-ini/ini v1.67.0 // indirect github.com/go-logfmt/logfmt v0.6.0 // indirect github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect @@ -161,56 +201,41 @@ require ( github.com/go-openapi/jsonpointer v0.20.2 // indirect github.com/go-openapi/jsonreference v0.20.4 // indirect github.com/go-openapi/loads v0.21.5 // indirect - github.com/go-openapi/runtime v0.27.1 // indirect github.com/go-openapi/spec v0.20.14 // indirect github.com/go-openapi/swag v0.22.9 // indirect github.com/go-openapi/validate v0.23.0 // indirect github.com/gobwas/httphead v0.1.0 // indirect github.com/gobwas/pool v0.2.1 // indirect github.com/gobwas/ws v1.2.1 // indirect - github.com/goccy/go-json v0.10.3 // indirect - github.com/godbus/dbus/v5 v5.0.4 // indirect github.com/gofrs/flock v0.8.1 // indirect github.com/gogo/googleapis v1.4.0 // indirect - github.com/golang-jwt/jwt/v5 v5.2.1 // indirect github.com/google/go-querystring v1.1.0 // indirect github.com/google/pprof v0.0.0-20240827171923-fa2c70bbbfe5 // indirect - github.com/google/s2a-go v0.1.8 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect github.com/googleapis/gax-go/v2 v2.13.0 // indirect github.com/gorilla/mux v1.8.0 // indirect github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 // indirect - github.com/huaweicloud/huaweicloud-sdk-go-obs v3.23.3+incompatible // indirect - github.com/jcchavezs/porto v0.1.0 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/joeshaw/multierror v0.0.0-20140124173710-69b34d4ec901 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/julienschmidt/httprouter v1.3.0 // indirect github.com/klauspost/cpuid/v2 v2.2.8 // indirect github.com/kylelemons/godebug v1.1.0 // indirect - github.com/leesper/go_rng v0.0.0-20190531154944-a612b043e353 // indirect github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20210210170715-a8dfcb80d3a7 // indirect github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect github.com/mailru/easyjson v0.7.7 // indirect github.com/mattn/go-runewidth v0.0.13 // indirect - github.com/mdlayher/socket v0.4.1 // indirect - github.com/mdlayher/vsock v1.2.1 // indirect - github.com/metalmatze/signal v0.0.0-20210307161603-1c9aa721a97a // indirect github.com/minio/md5-simd v1.1.2 // indirect github.com/minio/minio-go/v7 v7.0.80 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/mozillazg/go-httpheader v0.2.1 // indirect - github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/ncw/swift v1.0.53 // indirect - github.com/onsi/ginkgo v1.16.5 // indirect - github.com/opencontainers/runtime-spec v1.0.2 // indirect github.com/opentracing-contrib/go-grpc v0.0.0-20210225150812-73cb765af46e // indirect github.com/opentracing-contrib/go-stdlib v1.0.0 // indirect github.com/oracle/oci-go-sdk/v65 v65.41.1 // indirect - github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect @@ -219,7 +244,6 @@ require ( github.com/rivo/uniseg v0.2.0 // indirect github.com/rs/xid v1.6.0 // indirect github.com/santhosh-tekuri/jsonschema v1.2.4 // indirect - github.com/sercand/kuberesolver/v4 v4.0.0 // indirect github.com/shirou/gopsutil/v3 v3.22.9 // indirect github.com/sirupsen/logrus v1.9.3 // indirect github.com/stretchr/objx v0.5.2 // indirect @@ -230,25 +254,18 @@ require ( github.com/weaveworks/promrus v1.2.0 // indirect github.com/yuin/gopher-lua v0.0.0-20210529063254-f4c35e4016d9 // indirect github.com/yusufpapurcu/wmi v1.2.2 // indirect - github.com/zhangyunhao116/umap v0.0.0-20221211160557-cb7705fafa39 // indirect go.elastic.co/apm/module/apmhttp v1.15.0 // indirect go.elastic.co/fastjson v1.1.0 // indirect go.mongodb.org/mongo-driver v1.14.0 // indirect go.opencensus.io v0.24.0 // indirect - go.opentelemetry.io/collector/pdata v1.14.1 // indirect - go.opentelemetry.io/collector/semconv v0.108.1 // indirect - go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 // indirect go.opentelemetry.io/contrib/propagators/aws v1.29.0 // indirect go.opentelemetry.io/contrib/propagators/b3 v1.29.0 // indirect go.opentelemetry.io/contrib/propagators/jaeger v1.29.0 // indirect - go.opentelemetry.io/contrib/propagators/ot v1.29.0 // indirect go.opentelemetry.io/otel/metric v1.31.0 // indirect go.opentelemetry.io/proto/otlp v1.3.1 // indirect go.uber.org/multierr v1.11.0 // indirect - go4.org/unsafe/assume-no-moving-gc v0.0.0-20230525183740-e7c30c78aeb2 // indirect golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 // indirect - golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect golang.org/x/mod v0.21.0 // indirect golang.org/x/oauth2 v0.23.0 // indirect golang.org/x/sys v0.28.0 // indirect @@ -256,14 +273,7 @@ require ( gonum.org/v1/gonum v0.15.0 // indirect google.golang.org/api v0.195.0 // indirect google.golang.org/genproto v0.0.0-20240823204242-4ba0660f739c // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20240827150818-7e3bb234dfed // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect howett.net/plist v0.0.0-20181124034731-591f970eefbb // indirect - k8s.io/apimachinery v0.31.1 // indirect - k8s.io/client-go v0.31.0 // indirect - k8s.io/klog/v2 v2.130.1 // indirect - k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect - zenhack.net/go/util v0.0.0-20230414204917-531d38494cf5 // indirect ) replace ( diff --git a/pkg/errutil/multierror.go b/pkg/errutil/multierror.go index a99b714e27..600a557324 100644 --- a/pkg/errutil/multierror.go +++ b/pkg/errutil/multierror.go @@ -71,6 +71,15 @@ func (es NonNilMultiError) Cause() error { return es.getCause() } +func (es NonNilMultiError) Is(target error) bool { + for _, err := range es { + if errors.Is(err, target) { + return true + } + } + return false +} + func (es NonNilMultiError) getCause() NonNilMultiRootError { var causes []error for _, err := range es { diff --git a/pkg/receive/handler.go b/pkg/receive/handler.go index 7b28a7fd12..44b5fba367 100644 --- a/pkg/receive/handler.go +++ b/pkg/receive/handler.go @@ -116,6 +116,8 @@ type Options struct { Limiter *Limiter AsyncForwardWorkerCount uint ReplicationProtocol ReplicationProtocol + OtlpEnableTargetInfo bool + OtlpResourceAttributes []string } // Handler serves a Prometheus remote write receiving HTTP endpoint. @@ -275,6 +277,18 @@ func NewHandler(logger log.Logger, o *Options) *Handler { ), ) + h.router.Post( + "/api/v1/otlp", + instrf( + "otlp", + readyf( + middleware.RequestID( + http.HandlerFunc(h.receiveOTLPHTTP), + ), + ), + ), + ) + statusAPI := statusapi.New(statusapi.Options{ GetStats: h.getStats, Registry: h.options.Registry, diff --git a/pkg/receive/handler_otlp.go b/pkg/receive/handler_otlp.go new file mode 100644 index 0000000000..434433f75c --- /dev/null +++ b/pkg/receive/handler_otlp.go @@ -0,0 +1,178 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package receive + +import ( + "context" + "net/http" + "strconv" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/pkg/errors" + "github.com/prometheus/prometheus/storage/remote" + "github.com/thanos-io/thanos/pkg/receive/otlptranslator" + tprompb "github.com/thanos-io/thanos/pkg/store/storepb/prompb" + "github.com/thanos-io/thanos/pkg/tenancy" + "github.com/thanos-io/thanos/pkg/tracing" + "go.opentelemetry.io/collector/pdata/pmetric" +) + +func (h *Handler) receiveOTLPHTTP(w http.ResponseWriter, r *http.Request) { + var err error + span, ctx := tracing.StartSpan(r.Context(), "receive_otlp_http") + span.SetTag("receiver.mode", string(h.receiverMode)) + defer span.Finish() + + tenant, err := tenancy.GetTenantFromHTTP(r, h.options.TenantHeader, h.options.DefaultTenantID, h.options.TenantField) + if err != nil { + level.Error(h.logger).Log("msg", "error getting tenant from HTTP", "err", err) + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + tLogger := log.With(h.logger, "tenant", tenant) + span.SetTag("tenant", tenant) + + writeGate := h.Limiter.WriteGate() + tracing.DoInSpan(r.Context(), "receive_write_gate_ismyturn", func(ctx context.Context) { + err = writeGate.Start(r.Context()) + }) + + defer writeGate.Done() + if err != nil { + level.Error(tLogger).Log("err", err, "msg", "internal server error") + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + under, err := h.Limiter.HeadSeriesLimiter().isUnderLimit(tenant) + if err != nil { + level.Error(tLogger).Log("msg", "error while limiting", "err", err.Error()) + } + + // Fail request fully if tenant has exceeded set limit. + if !under { + http.Error(w, "tenant is above active series limit", http.StatusTooManyRequests) + return + } + + requestLimiter := h.Limiter.RequestLimiter() + if r.ContentLength >= 0 { + if !requestLimiter.AllowSizeBytes(tenant, r.ContentLength) { + http.Error(w, "write request too large", http.StatusRequestEntityTooLarge) + return + } + } + + req, err := remote.DecodeOTLPWriteRequest(r) + if err != nil { + level.Error(h.logger).Log("msg", "Error decoding remote write request", "err", err.Error()) + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + metrics, _, err := h.convertToPrometheusFormat(ctx, req.Metrics()) + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + totalSamples := 0 + for _, ts := range metrics { + totalSamples += len(ts.Samples) + } + + if !requestLimiter.AllowSeries(tenant, int64(len(metrics))) { + http.Error(w, "too many timeseries", http.StatusRequestEntityTooLarge) + return + } + + if !requestLimiter.AllowSamples(tenant, int64(totalSamples)) { + http.Error(w, "too many samples", http.StatusRequestEntityTooLarge) + return + } + + rep := uint64(0) + // If the header is empty, we assume the request is not yet replicated. + if replicaRaw := r.Header.Get(h.options.ReplicaHeader); replicaRaw != "" { + if rep, err = strconv.ParseUint(replicaRaw, 10, 64); err != nil { + http.Error(w, "could not parse replica header", http.StatusBadRequest) + return + } + } + + //TODO: (nicolastakashi) Handle metadata in the future. + wreq := tprompb.WriteRequest{ + Timeseries: metrics, + } + + // Exit early if the request contained no data. We don't support metadata yet. We also cannot fail here, because + // this would mean lack of forward compatibility for remote write proto. + if len(wreq.Timeseries) == 0 { + // TODO(yeya24): Handle remote write metadata. + if len(wreq.Metadata) > 0 { + // TODO(bwplotka): Do we need this error message? + level.Debug(tLogger).Log("msg", "only metadata from client; metadata ingestion not supported; skipping") + return + } + level.Debug(tLogger).Log("msg", "empty remote write request; client bug or newer remote write protocol used?; skipping") + return + } + + // Apply relabeling configs. + h.relabel(&wreq) + if len(wreq.Timeseries) == 0 { + level.Debug(tLogger).Log("msg", "remote write request dropped due to relabeling.") + return + } + + responseStatusCode := http.StatusOK + tenantStats, err := h.handleRequest(ctx, rep, tenant, &wreq) + if err != nil { + level.Debug(tLogger).Log("msg", "failed to handle request", "err", err.Error()) + switch errors.Cause(err) { + case errNotReady: + responseStatusCode = http.StatusServiceUnavailable + case errUnavailable: + responseStatusCode = http.StatusServiceUnavailable + case errConflict: + responseStatusCode = http.StatusConflict + case errBadReplica: + responseStatusCode = http.StatusBadRequest + default: + level.Error(tLogger).Log("err", err, "msg", "internal server error") + responseStatusCode = http.StatusInternalServerError + } + http.Error(w, err.Error(), responseStatusCode) + } + + for tenant, stats := range tenantStats { + h.writeTimeseriesTotal.WithLabelValues(strconv.Itoa(responseStatusCode), tenant).Observe(float64(stats.timeseries)) + h.writeSamplesTotal.WithLabelValues(strconv.Itoa(responseStatusCode), tenant).Observe(float64(stats.totalSamples)) + } + +} + +func (h *Handler) convertToPrometheusFormat(ctx context.Context, pmetrics pmetric.Metrics) ([]tprompb.TimeSeries, []tprompb.MetricMetadata, error) { + converter := otlptranslator.NewPrometheusConverter() + settings := otlptranslator.Settings{ + AddMetricSuffixes: true, + DisableTargetInfo: !h.options.OtlpEnableTargetInfo, + PromoteResourceAttributes: h.options.OtlpResourceAttributes, + } + + annots, err := converter.FromMetrics(ctx, pmetrics, settings) + ws, _ := annots.AsStrings("", 0, 0) + if len(ws) > 0 { + level.Warn(h.logger).Log("msg", "Warnings translating OTLP metrics to Prometheus write request", "warnings", ws) + } + + if err != nil { + level.Error(h.logger).Log("msg", "Error translating OTLP metrics to Prometheus write request", "err", err) + return nil, nil, err.Err() + } + + return converter.TimeSeries(), converter.Metadata(), nil +} diff --git a/pkg/receive/handler_otlp_test.go b/pkg/receive/handler_otlp_test.go new file mode 100644 index 0000000000..58f73f0f33 --- /dev/null +++ b/pkg/receive/handler_otlp_test.go @@ -0,0 +1,129 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package receive + +import ( + "bytes" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/efficientgo/core/testutil" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" + "go.opentelemetry.io/collector/pdata/pmetric/pmetricotlp" +) + +func TestOTLPWriteHandler(t *testing.T) { + exportRequest := generateOTLPWriteRequest() + + buf, err := exportRequest.MarshalProto() + require.NoError(t, err) + + appendables := []*fakeAppendable{ + { + appender: newFakeAppender(nil, nil, nil), + }, + } + + handlers, _, closeFunc, err := newTestHandlerHashring(appendables, 1, AlgorithmHashmod, false) + require.NoError(t, err) + defer func() { + testutil.Ok(t, closeFunc()) + // Wait a few milliseconds for peer workers to process the queue. + time.AfterFunc(50*time.Millisecond, func() { + for _, h := range handlers { + h.Close() + } + }) + }() + + for _, handler := range handlers { + req := httptest.NewRequest("POST", "/api/v1/otlp", bytes.NewReader(buf)) + + req.Header.Set("Content-Type", "application/x-protobuf") + req.Header.Set(handler.options.ReplicaHeader, "0") + req.Header.Set(handler.options.TenantHeader, "test") + + recorder := httptest.NewRecorder() + handler.receiveOTLPHTTP(recorder, req) + + result := recorder.Result() + defer result.Body.Close() + + require.Equal(t, http.StatusOK, result.StatusCode) + for _, app := range appendables { + require.Greater(t, len(app.appender.(*fakeAppender).samples), 0) + require.Greater(t, len(app.appender.(*fakeAppender).exemplars), 0) + } + } + +} + +func generateOTLPWriteRequest() pmetricotlp.ExportRequest { + d := pmetric.NewMetrics() + + // Generate One Counter, One Gauge, One Histogram + // with resource attributes: service.name="test-service", service.instance.id="test-instance", host.name="test-host" + // with metric attribute: foo.bar="baz" + + timestamp := time.Now() + + resourceMetric := d.ResourceMetrics().AppendEmpty() + resourceMetric.Resource().Attributes().PutStr("service.name", "test-service") + resourceMetric.Resource().Attributes().PutStr("service.instance.id", "test-instance") + resourceMetric.Resource().Attributes().PutStr("host.name", "test-host") + + scopeMetric := resourceMetric.ScopeMetrics().AppendEmpty() + + // Generate One Counter + counterMetric := scopeMetric.Metrics().AppendEmpty() + counterMetric.SetName("test-counter") + counterMetric.SetDescription("test-counter-description") + counterMetric.SetEmptySum() + counterMetric.Sum().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + counterMetric.Sum().SetIsMonotonic(true) + + counterDataPoint := counterMetric.Sum().DataPoints().AppendEmpty() + counterDataPoint.SetTimestamp(pcommon.NewTimestampFromTime(timestamp)) + counterDataPoint.SetDoubleValue(10.0) + counterDataPoint.Attributes().PutStr("foo.bar", "baz") + + counterExemplar := counterDataPoint.Exemplars().AppendEmpty() + + counterExemplar.SetTimestamp(pcommon.NewTimestampFromTime(timestamp)) + counterExemplar.SetDoubleValue(10.0) + counterExemplar.SetSpanID(pcommon.SpanID{0, 1, 2, 3, 4, 5, 6, 7}) + counterExemplar.SetTraceID(pcommon.TraceID{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}) + + // Generate One Gauge + gaugeMetric := scopeMetric.Metrics().AppendEmpty() + gaugeMetric.SetName("test-gauge") + gaugeMetric.SetDescription("test-gauge-description") + gaugeMetric.SetEmptyGauge() + + gaugeDataPoint := gaugeMetric.Gauge().DataPoints().AppendEmpty() + gaugeDataPoint.SetTimestamp(pcommon.NewTimestampFromTime(timestamp)) + gaugeDataPoint.SetDoubleValue(10.0) + gaugeDataPoint.Attributes().PutStr("foo.bar", "baz") + + // Generate One Histogram + histogramMetric := scopeMetric.Metrics().AppendEmpty() + histogramMetric.SetName("test-histogram") + histogramMetric.SetDescription("test-histogram-description") + histogramMetric.SetEmptyHistogram() + histogramMetric.Histogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + + histogramDataPoint := histogramMetric.Histogram().DataPoints().AppendEmpty() + histogramDataPoint.SetTimestamp(pcommon.NewTimestampFromTime(timestamp)) + histogramDataPoint.ExplicitBounds().FromRaw([]float64{0.0, 1.0, 2.0, 3.0, 4.0, 5.0}) + histogramDataPoint.BucketCounts().FromRaw([]uint64{2, 2, 2, 2, 2, 2}) + histogramDataPoint.SetCount(10) + histogramDataPoint.SetSum(30.0) + histogramDataPoint.Attributes().PutStr("foo.bar", "baz") + + return pmetricotlp.NewExportRequestFromMetrics(d) +} diff --git a/pkg/receive/handler_test.go b/pkg/receive/handler_test.go index 5711c67be8..35db6f48b6 100644 --- a/pkg/receive/handler_test.go +++ b/pkg/receive/handler_test.go @@ -110,6 +110,7 @@ func newFakeAppender(appendErr, commitErr, rollbackErr func() error) *fakeAppend } return &fakeAppender{ samples: make(map[storage.SeriesRef][]prompb.Sample), + exemplars: make(map[storage.SeriesRef][]exemplar.Exemplar), appendErr: appendErr, commitErr: commitErr, rollbackErr: rollbackErr, diff --git a/pkg/receive/otlptranslator/context.go b/pkg/receive/otlptranslator/context.go new file mode 100644 index 0000000000..dda04c70d7 --- /dev/null +++ b/pkg/receive/otlptranslator/context.go @@ -0,0 +1,32 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +// Provenance-includes-location: https://github.com/prometheus/prometheus/blob/318d6bc4bfe2d81af88697940740a378b503335a/storage/remote/otlptranslator/prometheusremotewrite/context.go#L18 +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The Prometheus Authors. + +package otlptranslator + +import "context" + +// everyNTimes supports checking for context error every n times. +type everyNTimes struct { + n int + i int + err error +} + +// checkContext calls ctx.Err() every e.n times and returns an eventual error. +func (e *everyNTimes) checkContext(ctx context.Context) error { + if e.err != nil { + return e.err + } + + e.i++ + if e.i >= e.n { + e.i = 0 + e.err = ctx.Err() + } + + return e.err +} diff --git a/pkg/receive/otlptranslator/context_test.go b/pkg/receive/otlptranslator/context_test.go new file mode 100644 index 0000000000..9031c00277 --- /dev/null +++ b/pkg/receive/otlptranslator/context_test.go @@ -0,0 +1,31 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +package otlptranslator + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestEveryNTimes(t *testing.T) { + const n = 128 + ctx, cancel := context.WithCancel(context.Background()) + e := &everyNTimes{ + n: n, + } + + for i := 0; i < n; i++ { + require.NoError(t, e.checkContext(ctx)) + } + + cancel() + for i := 0; i < n-1; i++ { + require.NoError(t, e.checkContext(ctx)) + } + require.EqualError(t, e.checkContext(ctx), context.Canceled.Error()) + // e should remember the error. + require.EqualError(t, e.checkContext(ctx), context.Canceled.Error()) +} diff --git a/pkg/receive/otlptranslator/helper.go b/pkg/receive/otlptranslator/helper.go new file mode 100644 index 0000000000..abb47e9131 --- /dev/null +++ b/pkg/receive/otlptranslator/helper.go @@ -0,0 +1,629 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/helper.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. + +package otlptranslator + +import ( + "context" + "encoding/hex" + "fmt" + "log" + "math" + "slices" + "sort" + "strconv" + "unicode/utf8" + + "github.com/cespare/xxhash/v2" + "github.com/prometheus/common/model" + "github.com/thanos-io/thanos/pkg/store/labelpb" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" + conventions "go.opentelemetry.io/collector/semconv/v1.6.1" + + "github.com/prometheus/prometheus/model/timestamp" + "github.com/prometheus/prometheus/model/value" + prometheustranslator "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" + "github.com/thanos-io/thanos/pkg/store/storepb/prompb" +) + +const ( + sumStr = "_sum" + countStr = "_count" + bucketStr = "_bucket" + leStr = "le" + quantileStr = "quantile" + pInfStr = "+Inf" + createdSuffix = "_created" + // maxExemplarRunes is the maximum number of UTF-8 exemplar characters + // according to the prometheus specification + // https://github.com/prometheus/OpenMetrics/blob/v1.0.0/specification/OpenMetrics.md#exemplars + maxExemplarRunes = 128 + // Trace and Span id keys are defined as part of the spec: + // https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification%2Fmetrics%2Fdatamodel.md#exemplars-2 + traceIDKey = "trace_id" + spanIDKey = "span_id" + infoType = "info" + targetMetricName = "target_info" +) + +type bucketBoundsData struct { + ts *prompb.TimeSeries + bound float64 +} + +// byBucketBoundsData enables the usage of sort.Sort() with a slice of bucket bounds. +type byBucketBoundsData []bucketBoundsData + +func (m byBucketBoundsData) Len() int { return len(m) } +func (m byBucketBoundsData) Less(i, j int) bool { return m[i].bound < m[j].bound } +func (m byBucketBoundsData) Swap(i, j int) { m[i], m[j] = m[j], m[i] } + +// ByLabelName enables the usage of sort.Sort() with a slice of labels. +type ByLabelName []labelpb.ZLabel + +func (a ByLabelName) Len() int { return len(a) } +func (a ByLabelName) Less(i, j int) bool { return a[i].Name < a[j].Name } +func (a ByLabelName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } + +// timeSeriesSignature returns a hashed label set signature. +// The label slice should not contain duplicate label names; this method sorts the slice by label name before creating +// the signature. +// The algorithm is the same as in Prometheus' labels.StableHash function. +func timeSeriesSignature(labels []labelpb.ZLabel) uint64 { + sort.Sort(ByLabelName(labels)) + + // Use xxhash.Sum64(b) for fast path as it's faster. + b := make([]byte, 0, 1024) + for i, v := range labels { + if len(b)+len(v.Name)+len(v.Value)+2 >= cap(b) { + // If labels entry is 1KB+ do not allocate whole entry. + h := xxhash.New() + _, _ = h.Write(b) + for _, v := range labels[i:] { + _, _ = h.WriteString(v.Name) + _, _ = h.Write(seps) + _, _ = h.WriteString(v.Value) + _, _ = h.Write(seps) + } + return h.Sum64() + } + + b = append(b, v.Name...) + b = append(b, seps[0]) + b = append(b, v.Value...) + b = append(b, seps[0]) + } + return xxhash.Sum64(b) +} + +var seps = []byte{'\xff'} + +// createAttributes creates a slice of Prometheus Labels with OTLP attributes and pairs of string values. +// Unpaired string values are ignored. String pairs overwrite OTLP labels if collisions happen and +// if logOnOverwrite is true, the overwrite is logged. Resulting label names are sanitized. +// If settings.PromoteResourceAttributes is not empty, it's a set of resource attributes that should be promoted to labels. +func createAttributes(resource pcommon.Resource, attributes pcommon.Map, settings Settings, + ignoreAttrs []string, logOnOverwrite bool, extras ...string) []labelpb.ZLabel { + resourceAttrs := resource.Attributes() + serviceName, haveServiceName := resourceAttrs.Get(conventions.AttributeServiceName) + instance, haveInstanceID := resourceAttrs.Get(conventions.AttributeServiceInstanceID) + + promotedAttrs := make([]labelpb.ZLabel, 0, len(settings.PromoteResourceAttributes)) + for _, name := range settings.PromoteResourceAttributes { + if value, exists := resourceAttrs.Get(name); exists { + promotedAttrs = append(promotedAttrs, labelpb.ZLabel{Name: name, Value: value.AsString()}) + } + } + sort.Stable(ByLabelName(promotedAttrs)) + + // Calculate the maximum possible number of labels we could return so we can preallocate l + maxLabelCount := attributes.Len() + len(settings.ExternalLabels) + len(promotedAttrs) + len(extras)/2 + + if haveServiceName { + maxLabelCount++ + } + + if haveInstanceID { + maxLabelCount++ + } + + // Ensure attributes are sorted by key for consistent merging of keys which + // collide when sanitized. + labels := make([]labelpb.ZLabel, 0, maxLabelCount) + // XXX: Should we always drop service namespace/service name/service instance ID from the labels + // (as they get mapped to other Prometheus labels)? + attributes.Range(func(key string, value pcommon.Value) bool { + if !slices.Contains(ignoreAttrs, key) { + labels = append(labels, labelpb.ZLabel{Name: key, Value: value.AsString()}) + } + return true + }) + sort.Stable(ByLabelName(labels)) + + // map ensures no duplicate label names. + l := make(map[string]string, maxLabelCount) + for _, label := range labels { + finalKey := label.Name + if !settings.AllowUTF8 { + finalKey = prometheustranslator.NormalizeLabel(finalKey) + } + if existingValue, alreadyExists := l[finalKey]; alreadyExists { + l[finalKey] = existingValue + ";" + label.Value + } else { + l[finalKey] = label.Value + } + } + + for _, lbl := range promotedAttrs { + normalized := lbl.Name + if !settings.AllowUTF8 { + normalized = prometheustranslator.NormalizeLabel(normalized) + } + if _, exists := l[normalized]; !exists { + l[normalized] = lbl.Value + } + } + + // Map service.name + service.namespace to job + if haveServiceName { + val := serviceName.AsString() + if serviceNamespace, ok := resourceAttrs.Get(conventions.AttributeServiceNamespace); ok { + val = fmt.Sprintf("%s/%s", serviceNamespace.AsString(), val) + } + l[model.JobLabel] = val + } + // Map service.instance.id to instance + if haveInstanceID { + l[model.InstanceLabel] = instance.AsString() + } + for key, value := range settings.ExternalLabels { + // External labels have already been sanitized + if _, alreadyExists := l[key]; alreadyExists { + // Skip external labels if they are overridden by metric attributes + continue + } + l[key] = value + } + + for i := 0; i < len(extras); i += 2 { + if i+1 >= len(extras) { + break + } + + name := extras[i] + _, found := l[name] + if found && logOnOverwrite { + log.Println("label " + name + " is overwritten. Check if Prometheus reserved labels are used.") + } + // internal labels should be maintained + if !settings.AllowUTF8 && !(len(name) > 4 && name[:2] == "__" && name[len(name)-2:] == "__") { + name = prometheustranslator.NormalizeLabel(name) + } + l[name] = extras[i+1] + } + + labels = labels[:0] + for k, v := range l { + labels = append(labels, labelpb.ZLabel{Name: k, Value: v}) + } + + return labels +} + +// isValidAggregationTemporality checks whether an OTel metric has a valid +// aggregation temporality for conversion to a Prometheus metric. +func isValidAggregationTemporality(metric pmetric.Metric) bool { + //exhaustive:enforce + switch metric.Type() { + case pmetric.MetricTypeGauge, pmetric.MetricTypeSummary: + return true + case pmetric.MetricTypeSum: + return metric.Sum().AggregationTemporality() == pmetric.AggregationTemporalityCumulative + case pmetric.MetricTypeHistogram: + return metric.Histogram().AggregationTemporality() == pmetric.AggregationTemporalityCumulative + case pmetric.MetricTypeExponentialHistogram: + return metric.ExponentialHistogram().AggregationTemporality() == pmetric.AggregationTemporalityCumulative + } + return false +} + +// addHistogramDataPoints adds OTel histogram data points to the corresponding Prometheus time series +// as classical histogram samples. +// +// Note that we can't convert to native histograms, since these have exponential buckets and don't line up +// with the user defined bucket boundaries of non-exponential OTel histograms. +// However, work is under way to resolve this shortcoming through a feature called native histograms custom buckets: +// https://github.com/prometheus/prometheus/issues/13485. +func (c *PrometheusConverter) addHistogramDataPoints(ctx context.Context, dataPoints pmetric.HistogramDataPointSlice, + resource pcommon.Resource, settings Settings, baseName string) error { + for x := 0; x < dataPoints.Len(); x++ { + if err := c.everyN.checkContext(ctx); err != nil { + return err + } + + pt := dataPoints.At(x) + timestamp := convertTimeStamp(pt.Timestamp()) + baseLabels := createAttributes(resource, pt.Attributes(), settings, nil, false) + + // If the sum is unset, it indicates the _sum metric point should be + // omitted + if pt.HasSum() { + // treat sum as a sample in an individual TimeSeries + sum := &prompb.Sample{ + Value: pt.Sum(), + Timestamp: timestamp, + } + if pt.Flags().NoRecordedValue() { + sum.Value = math.Float64frombits(value.StaleNaN) + } + + sumlabels := createLabels(baseName+sumStr, baseLabels) + c.addSample(sum, sumlabels) + + } + + // treat count as a sample in an individual TimeSeries + count := &prompb.Sample{ + Value: float64(pt.Count()), + Timestamp: timestamp, + } + if pt.Flags().NoRecordedValue() { + count.Value = math.Float64frombits(value.StaleNaN) + } + + countlabels := createLabels(baseName+countStr, baseLabels) + c.addSample(count, countlabels) + + // cumulative count for conversion to cumulative histogram + var cumulativeCount uint64 + + var bucketBounds []bucketBoundsData + + // process each bound, based on histograms proto definition, # of buckets = # of explicit bounds + 1 + for i := 0; i < pt.ExplicitBounds().Len() && i < pt.BucketCounts().Len(); i++ { + if err := c.everyN.checkContext(ctx); err != nil { + return err + } + + bound := pt.ExplicitBounds().At(i) + cumulativeCount += pt.BucketCounts().At(i) + bucket := &prompb.Sample{ + Value: float64(cumulativeCount), + Timestamp: timestamp, + } + if pt.Flags().NoRecordedValue() { + bucket.Value = math.Float64frombits(value.StaleNaN) + } + boundStr := strconv.FormatFloat(bound, 'f', -1, 64) + labels := createLabels(baseName+bucketStr, baseLabels, leStr, boundStr) + ts := c.addSample(bucket, labels) + + bucketBounds = append(bucketBounds, bucketBoundsData{ts: ts, bound: bound}) + } + // add le=+Inf bucket + infBucket := &prompb.Sample{ + Timestamp: timestamp, + } + if pt.Flags().NoRecordedValue() { + infBucket.Value = math.Float64frombits(value.StaleNaN) + } else { + infBucket.Value = float64(pt.Count()) + } + infLabels := createLabels(baseName+bucketStr, baseLabels, leStr, pInfStr) + ts := c.addSample(infBucket, infLabels) + + bucketBounds = append(bucketBounds, bucketBoundsData{ts: ts, bound: math.Inf(1)}) + if err := c.addExemplars(ctx, pt, bucketBounds); err != nil { + return err + } + + startTimestamp := pt.StartTimestamp() + if settings.ExportCreatedMetric && startTimestamp != 0 { + labels := createLabels(baseName+createdSuffix, baseLabels) + c.addTimeSeriesIfNeeded(labels, startTimestamp, pt.Timestamp()) + } + } + + return nil +} + +type exemplarType interface { + pmetric.ExponentialHistogramDataPoint | pmetric.HistogramDataPoint | pmetric.NumberDataPoint + Exemplars() pmetric.ExemplarSlice +} + +func getPromExemplars[T exemplarType](ctx context.Context, everyN *everyNTimes, pt T) ([]prompb.Exemplar, error) { + promExemplars := make([]prompb.Exemplar, 0, pt.Exemplars().Len()) + for i := 0; i < pt.Exemplars().Len(); i++ { + if err := everyN.checkContext(ctx); err != nil { + return nil, err + } + + exemplar := pt.Exemplars().At(i) + exemplarRunes := 0 + + promExemplar := prompb.Exemplar{ + Timestamp: timestamp.FromTime(exemplar.Timestamp().AsTime()), + } + switch exemplar.ValueType() { + case pmetric.ExemplarValueTypeInt: + promExemplar.Value = float64(exemplar.IntValue()) + case pmetric.ExemplarValueTypeDouble: + promExemplar.Value = exemplar.DoubleValue() + default: + return nil, fmt.Errorf("unsupported exemplar value type: %v", exemplar.ValueType()) + } + + if traceID := exemplar.TraceID(); !traceID.IsEmpty() { + val := hex.EncodeToString(traceID[:]) + exemplarRunes += utf8.RuneCountInString(traceIDKey) + utf8.RuneCountInString(val) + promLabel := labelpb.ZLabel{ + Name: traceIDKey, + Value: val, + } + promExemplar.Labels = append(promExemplar.Labels, promLabel) + } + if spanID := exemplar.SpanID(); !spanID.IsEmpty() { + val := hex.EncodeToString(spanID[:]) + exemplarRunes += utf8.RuneCountInString(spanIDKey) + utf8.RuneCountInString(val) + promLabel := labelpb.ZLabel{ + Name: spanIDKey, + Value: val, + } + promExemplar.Labels = append(promExemplar.Labels, promLabel) + } + + attrs := exemplar.FilteredAttributes() + labelsFromAttributes := make([]labelpb.ZLabel, 0, attrs.Len()) + attrs.Range(func(key string, value pcommon.Value) bool { + val := value.AsString() + exemplarRunes += utf8.RuneCountInString(key) + utf8.RuneCountInString(val) + promLabel := labelpb.ZLabel{ + Name: key, + Value: val, + } + + labelsFromAttributes = append(labelsFromAttributes, promLabel) + + return true + }) + if exemplarRunes <= maxExemplarRunes { + // only append filtered attributes if it does not cause exemplar + // labels to exceed the max number of runes + promExemplar.Labels = append(promExemplar.Labels, labelsFromAttributes...) + } + + promExemplars = append(promExemplars, promExemplar) + } + + return promExemplars, nil +} + +// mostRecentTimestampInMetric returns the latest timestamp in a batch of metrics. +func mostRecentTimestampInMetric(metric pmetric.Metric) pcommon.Timestamp { + var ts pcommon.Timestamp + // handle individual metric based on type + //exhaustive:enforce + switch metric.Type() { + case pmetric.MetricTypeGauge: + dataPoints := metric.Gauge().DataPoints() + for x := 0; x < dataPoints.Len(); x++ { + ts = max(ts, dataPoints.At(x).Timestamp()) + } + case pmetric.MetricTypeSum: + dataPoints := metric.Sum().DataPoints() + for x := 0; x < dataPoints.Len(); x++ { + ts = max(ts, dataPoints.At(x).Timestamp()) + } + case pmetric.MetricTypeHistogram: + dataPoints := metric.Histogram().DataPoints() + for x := 0; x < dataPoints.Len(); x++ { + ts = max(ts, dataPoints.At(x).Timestamp()) + } + case pmetric.MetricTypeExponentialHistogram: + dataPoints := metric.ExponentialHistogram().DataPoints() + for x := 0; x < dataPoints.Len(); x++ { + ts = max(ts, dataPoints.At(x).Timestamp()) + } + case pmetric.MetricTypeSummary: + dataPoints := metric.Summary().DataPoints() + for x := 0; x < dataPoints.Len(); x++ { + ts = max(ts, dataPoints.At(x).Timestamp()) + } + } + return ts +} + +func (c *PrometheusConverter) addSummaryDataPoints(ctx context.Context, dataPoints pmetric.SummaryDataPointSlice, resource pcommon.Resource, + settings Settings, baseName string) error { + for x := 0; x < dataPoints.Len(); x++ { + if err := c.everyN.checkContext(ctx); err != nil { + return err + } + + pt := dataPoints.At(x) + timestamp := convertTimeStamp(pt.Timestamp()) + baseLabels := createAttributes(resource, pt.Attributes(), settings, nil, false) + + // treat sum as a sample in an individual TimeSeries + sum := &prompb.Sample{ + Value: pt.Sum(), + Timestamp: timestamp, + } + if pt.Flags().NoRecordedValue() { + sum.Value = math.Float64frombits(value.StaleNaN) + } + // sum and count of the summary should append suffix to baseName + sumlabels := createLabels(baseName+sumStr, baseLabels) + c.addSample(sum, sumlabels) + + // treat count as a sample in an individual TimeSeries + count := &prompb.Sample{ + Value: float64(pt.Count()), + Timestamp: timestamp, + } + if pt.Flags().NoRecordedValue() { + count.Value = math.Float64frombits(value.StaleNaN) + } + countlabels := createLabels(baseName+countStr, baseLabels) + c.addSample(count, countlabels) + + // process each percentile/quantile + for i := 0; i < pt.QuantileValues().Len(); i++ { + qt := pt.QuantileValues().At(i) + quantile := &prompb.Sample{ + Value: qt.Value(), + Timestamp: timestamp, + } + if pt.Flags().NoRecordedValue() { + quantile.Value = math.Float64frombits(value.StaleNaN) + } + percentileStr := strconv.FormatFloat(qt.Quantile(), 'f', -1, 64) + qtlabels := createLabels(baseName, baseLabels, quantileStr, percentileStr) + c.addSample(quantile, qtlabels) + } + + startTimestamp := pt.StartTimestamp() + if settings.ExportCreatedMetric && startTimestamp != 0 { + createdLabels := createLabels(baseName+createdSuffix, baseLabels) + c.addTimeSeriesIfNeeded(createdLabels, startTimestamp, pt.Timestamp()) + } + } + + return nil +} + +// createLabels returns a copy of baseLabels, adding to it the pair model.MetricNameLabel=name. +// If extras are provided, corresponding label pairs are also added to the returned slice. +// If extras is uneven length, the last (unpaired) extra will be ignored. +func createLabels(name string, baseLabels []labelpb.ZLabel, extras ...string) []labelpb.ZLabel { + extraLabelCount := len(extras) / 2 + labels := make([]labelpb.ZLabel, len(baseLabels), len(baseLabels)+extraLabelCount+1) // +1 for name + copy(labels, baseLabels) + + n := len(extras) + n -= n % 2 + for extrasIdx := 0; extrasIdx < n; extrasIdx += 2 { + labels = append(labels, labelpb.ZLabel{Name: extras[extrasIdx], Value: extras[extrasIdx+1]}) + } + + labels = append(labels, labelpb.ZLabel{Name: model.MetricNameLabel, Value: name}) + return labels +} + +// getOrCreateTimeSeries returns the time series corresponding to the label set if existent, and false. +// Otherwise it creates a new one and returns that, and true. +func (c *PrometheusConverter) getOrCreateTimeSeries(lbls []labelpb.ZLabel) (*prompb.TimeSeries, bool) { + h := timeSeriesSignature(lbls) + ts := c.unique[h] + if ts != nil { + if isSameMetric(ts, lbls) { + // We already have this metric + return ts, false + } + + // Look for a matching conflict + for _, cTS := range c.conflicts[h] { + if isSameMetric(cTS, lbls) { + // We already have this metric + return cTS, false + } + } + + // New conflict + ts = &prompb.TimeSeries{ + Labels: lbls, + } + c.conflicts[h] = append(c.conflicts[h], ts) + return ts, true + } + + // This metric is new + ts = &prompb.TimeSeries{ + Labels: lbls, + } + c.unique[h] = ts + return ts, true +} + +// addTimeSeriesIfNeeded adds a corresponding time series if it doesn't already exist. +// If the time series doesn't already exist, it gets added with startTimestamp for its value and timestamp for its timestamp, +// both converted to milliseconds. +func (c *PrometheusConverter) addTimeSeriesIfNeeded(lbls []labelpb.ZLabel, startTimestamp pcommon.Timestamp, timestamp pcommon.Timestamp) { + ts, created := c.getOrCreateTimeSeries(lbls) + if created { + ts.Samples = []prompb.Sample{ + { + // convert ns to ms + Value: float64(convertTimeStamp(startTimestamp)), + Timestamp: convertTimeStamp(timestamp), + }, + } + } +} + +// addResourceTargetInfo converts the resource to the target info metric. +func addResourceTargetInfo(resource pcommon.Resource, settings Settings, timestamp pcommon.Timestamp, converter *PrometheusConverter) { + if settings.DisableTargetInfo || timestamp == 0 { + return + } + + attributes := resource.Attributes() + identifyingAttrs := []string{ + conventions.AttributeServiceNamespace, + conventions.AttributeServiceName, + conventions.AttributeServiceInstanceID, + } + nonIdentifyingAttrsCount := attributes.Len() + for _, a := range identifyingAttrs { + _, haveAttr := attributes.Get(a) + if haveAttr { + nonIdentifyingAttrsCount-- + } + } + if nonIdentifyingAttrsCount == 0 { + // If we only have job + instance, then target_info isn't useful, so don't add it. + return + } + + name := targetMetricName + if len(settings.Namespace) > 0 { + name = settings.Namespace + "_" + name + } + + settings.PromoteResourceAttributes = nil + if settings.KeepIdentifyingResourceAttributes { + // Do not pass identifying attributes as ignoreAttrs below. + identifyingAttrs = nil + } + labels := createAttributes(resource, attributes, settings, identifyingAttrs, false, model.MetricNameLabel, name) + haveIdentifier := false + for _, l := range labels { + if l.Name == model.JobLabel || l.Name == model.InstanceLabel { + haveIdentifier = true + break + } + } + + if !haveIdentifier { + // We need at least one identifying label to generate target_info. + return + } + + sample := &prompb.Sample{ + Value: float64(1), + // convert ns to ms + Timestamp: convertTimeStamp(timestamp), + } + converter.addSample(sample, labels) +} + +// convertTimeStamp converts OTLP timestamp in ns to timestamp in ms. +func convertTimeStamp(timestamp pcommon.Timestamp) int64 { + return int64(timestamp) / 1_000_000 +} diff --git a/pkg/receive/otlptranslator/helper_test.go b/pkg/receive/otlptranslator/helper_test.go new file mode 100644 index 0000000000..a4d4b1c8ea --- /dev/null +++ b/pkg/receive/otlptranslator/helper_test.go @@ -0,0 +1,476 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/debbf30360b8d3a0ded8db09c4419d2a9c99b94a/pkg/translator/prometheusremotewrite/helper_test.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. + +package otlptranslator + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" + + "github.com/prometheus/common/model" + + "github.com/thanos-io/thanos/pkg/store/labelpb" + "github.com/thanos-io/thanos/pkg/store/storepb/prompb" +) + +func TestCreateAttributes(t *testing.T) { + resourceAttrs := map[string]string{ + "service.name": "service name", + "service.instance.id": "service ID", + "existent-attr": "resource value", + // This one is for testing conflict with metric attribute. + "metric-attr": "resource value", + // This one is for testing conflict with auto-generated job attribute. + "job": "resource value", + // This one is for testing conflict with auto-generated instance attribute. + "instance": "resource value", + } + + resource := pcommon.NewResource() + for k, v := range resourceAttrs { + resource.Attributes().PutStr(k, v) + } + attrs := pcommon.NewMap() + attrs.PutStr("metric-attr", "metric value") + attrs.PutStr("metric-attr-other", "metric value other") + + testCases := []struct { + name string + promoteResourceAttributes []string + ignoreAttrs []string + expectedLabels []labelpb.ZLabel + }{ + { + name: "Successful conversion without resource attribute promotion", + promoteResourceAttributes: nil, + expectedLabels: []labelpb.ZLabel{ + { + Name: "__name__", + Value: "test_metric", + }, + { + Name: "instance", + Value: "service ID", + }, + { + Name: "job", + Value: "service name", + }, + { + Name: "metric_attr", + Value: "metric value", + }, + { + Name: "metric_attr_other", + Value: "metric value other", + }, + }, + }, + { + name: "Successful conversion with some attributes ignored", + promoteResourceAttributes: nil, + ignoreAttrs: []string{"metric-attr-other"}, + expectedLabels: []labelpb.ZLabel{ + { + Name: "__name__", + Value: "test_metric", + }, + { + Name: "instance", + Value: "service ID", + }, + { + Name: "job", + Value: "service name", + }, + { + Name: "metric_attr", + Value: "metric value", + }, + }, + }, + { + name: "Successful conversion with resource attribute promotion", + promoteResourceAttributes: []string{"non-existent-attr", "existent-attr"}, + expectedLabels: []labelpb.ZLabel{ + { + Name: "__name__", + Value: "test_metric", + }, + { + Name: "instance", + Value: "service ID", + }, + { + Name: "job", + Value: "service name", + }, + { + Name: "metric_attr", + Value: "metric value", + }, + { + Name: "metric_attr_other", + Value: "metric value other", + }, + { + Name: "existent_attr", + Value: "resource value", + }, + }, + }, + { + name: "Successful conversion with resource attribute promotion, conflicting resource attributes are ignored", + promoteResourceAttributes: []string{"non-existent-attr", "existent-attr", "metric-attr", "job", "instance"}, + expectedLabels: []labelpb.ZLabel{ + { + Name: "__name__", + Value: "test_metric", + }, + { + Name: "instance", + Value: "service ID", + }, + { + Name: "job", + Value: "service name", + }, + { + Name: "existent_attr", + Value: "resource value", + }, + { + Name: "metric_attr", + Value: "metric value", + }, + { + Name: "metric_attr_other", + Value: "metric value other", + }, + }, + }, + { + name: "Successful conversion with resource attribute promotion, attributes are only promoted once", + promoteResourceAttributes: []string{"existent-attr", "existent-attr"}, + expectedLabels: []labelpb.ZLabel{ + { + Name: "__name__", + Value: "test_metric", + }, + { + Name: "instance", + Value: "service ID", + }, + { + Name: "job", + Value: "service name", + }, + { + Name: "existent_attr", + Value: "resource value", + }, + { + Name: "metric_attr", + Value: "metric value", + }, + { + Name: "metric_attr_other", + Value: "metric value other", + }, + }, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + settings := Settings{ + PromoteResourceAttributes: tc.promoteResourceAttributes, + } + lbls := createAttributes(resource, attrs, settings, tc.ignoreAttrs, false, model.MetricNameLabel, "test_metric") + + assert.ElementsMatch(t, lbls, tc.expectedLabels) + }) + } +} + +func Test_convertTimeStamp(t *testing.T) { + tests := []struct { + name string + arg pcommon.Timestamp + want int64 + }{ + {"zero", 0, 0}, + {"1ms", 1_000_000, 1}, + {"1s", pcommon.Timestamp(time.Unix(1, 0).UnixNano()), 1000}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := convertTimeStamp(tt.arg) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestPrometheusConverter_AddSummaryDataPoints(t *testing.T) { + ts := pcommon.Timestamp(time.Now().UnixNano()) + tests := []struct { + name string + metric func() pmetric.Metric + want func() map[uint64]*prompb.TimeSeries + }{ + { + name: "summary with start time", + metric: func() pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName("test_summary") + metric.SetEmptySummary() + + dp := metric.Summary().DataPoints().AppendEmpty() + dp.SetTimestamp(ts) + dp.SetStartTimestamp(ts) + + return metric + }, + want: func() map[uint64]*prompb.TimeSeries { + labels := []labelpb.ZLabel{ + {Name: model.MetricNameLabel, Value: "test_summary" + countStr}, + } + createdLabels := []labelpb.ZLabel{ + {Name: model.MetricNameLabel, Value: "test_summary" + createdSuffix}, + } + sumLabels := []labelpb.ZLabel{ + {Name: model.MetricNameLabel, Value: "test_summary" + sumStr}, + } + return map[uint64]*prompb.TimeSeries{ + timeSeriesSignature(labels): { + Labels: labels, + Samples: []prompb.Sample{ + {Value: 0, Timestamp: convertTimeStamp(ts)}, + }, + }, + timeSeriesSignature(sumLabels): { + Labels: sumLabels, + Samples: []prompb.Sample{ + {Value: 0, Timestamp: convertTimeStamp(ts)}, + }, + }, + timeSeriesSignature(createdLabels): { + Labels: createdLabels, + Samples: []prompb.Sample{ + {Value: float64(convertTimeStamp(ts)), Timestamp: convertTimeStamp(ts)}, + }, + }, + } + }, + }, + { + name: "summary without start time", + metric: func() pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName("test_summary") + metric.SetEmptySummary() + + dp := metric.Summary().DataPoints().AppendEmpty() + dp.SetTimestamp(ts) + + return metric + }, + want: func() map[uint64]*prompb.TimeSeries { + labels := []labelpb.ZLabel{ + {Name: model.MetricNameLabel, Value: "test_summary" + countStr}, + } + sumLabels := []labelpb.ZLabel{ + {Name: model.MetricNameLabel, Value: "test_summary" + sumStr}, + } + return map[uint64]*prompb.TimeSeries{ + timeSeriesSignature(labels): { + Labels: labels, + Samples: []prompb.Sample{ + {Value: 0, Timestamp: convertTimeStamp(ts)}, + }, + }, + timeSeriesSignature(sumLabels): { + Labels: sumLabels, + Samples: []prompb.Sample{ + {Value: 0, Timestamp: convertTimeStamp(ts)}, + }, + }, + } + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + metric := tt.metric() + converter := NewPrometheusConverter() + + _ = converter.addSummaryDataPoints( + context.Background(), + metric.Summary().DataPoints(), + pcommon.NewResource(), + Settings{ + ExportCreatedMetric: true, + }, + metric.Name(), + ) + + assert.Equal(t, tt.want(), converter.unique) + assert.Empty(t, converter.conflicts) + }) + } +} + +func TestPrometheusConverter_AddHistogramDataPoints(t *testing.T) { + ts := pcommon.Timestamp(time.Now().UnixNano()) + tests := []struct { + name string + metric func() pmetric.Metric + want func() map[uint64]*prompb.TimeSeries + }{ + { + name: "histogram with start time", + metric: func() pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName("test_hist") + metric.SetEmptyHistogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + + pt := metric.Histogram().DataPoints().AppendEmpty() + pt.SetTimestamp(ts) + pt.SetStartTimestamp(ts) + + return metric + }, + want: func() map[uint64]*prompb.TimeSeries { + labels := []labelpb.ZLabel{ + {Name: model.MetricNameLabel, Value: "test_hist" + countStr}, + } + createdLabels := []labelpb.ZLabel{ + {Name: model.MetricNameLabel, Value: "test_hist" + createdSuffix}, + } + infLabels := []labelpb.ZLabel{ + {Name: model.MetricNameLabel, Value: "test_hist_bucket"}, + {Name: model.BucketLabel, Value: "+Inf"}, + } + return map[uint64]*prompb.TimeSeries{ + timeSeriesSignature(infLabels): { + Labels: infLabels, + Samples: []prompb.Sample{ + {Value: 0, Timestamp: convertTimeStamp(ts)}, + }, + }, + timeSeriesSignature(labels): { + Labels: labels, + Samples: []prompb.Sample{ + {Value: 0, Timestamp: convertTimeStamp(ts)}, + }, + }, + timeSeriesSignature(createdLabels): { + Labels: createdLabels, + Samples: []prompb.Sample{ + {Value: float64(convertTimeStamp(ts)), Timestamp: convertTimeStamp(ts)}, + }, + }, + } + }, + }, + { + name: "histogram without start time", + metric: func() pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName("test_hist") + metric.SetEmptyHistogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + + pt := metric.Histogram().DataPoints().AppendEmpty() + pt.SetTimestamp(ts) + + return metric + }, + want: func() map[uint64]*prompb.TimeSeries { + labels := []labelpb.ZLabel{ + {Name: model.MetricNameLabel, Value: "test_hist" + countStr}, + } + infLabels := []labelpb.ZLabel{ + {Name: model.MetricNameLabel, Value: "test_hist_bucket"}, + {Name: model.BucketLabel, Value: "+Inf"}, + } + return map[uint64]*prompb.TimeSeries{ + timeSeriesSignature(infLabels): { + Labels: infLabels, + Samples: []prompb.Sample{ + {Value: 0, Timestamp: convertTimeStamp(ts)}, + }, + }, + timeSeriesSignature(labels): { + Labels: labels, + Samples: []prompb.Sample{ + {Value: 0, Timestamp: convertTimeStamp(ts)}, + }, + }, + } + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + metric := tt.metric() + converter := NewPrometheusConverter() + + _ = converter.addHistogramDataPoints( + context.Background(), + metric.Histogram().DataPoints(), + pcommon.NewResource(), + Settings{ + ExportCreatedMetric: true, + }, + metric.Name(), + ) + + assert.Equal(t, tt.want(), converter.unique) + assert.Empty(t, converter.conflicts) + }) + } +} + +func TestGetPromExemplars(t *testing.T) { + ctx := context.Background() + everyN := &everyNTimes{n: 1} + + t.Run("Exemplars with int value", func(t *testing.T) { + pt := pmetric.NewNumberDataPoint() + exemplar := pt.Exemplars().AppendEmpty() + exemplar.SetTimestamp(pcommon.Timestamp(time.Now().UnixNano())) + exemplar.SetIntValue(42) + exemplars, err := getPromExemplars(ctx, everyN, pt) + assert.NoError(t, err) + assert.Len(t, exemplars, 1) + assert.Equal(t, float64(42), exemplars[0].Value) + }) + + t.Run("Exemplars with double value", func(t *testing.T) { + pt := pmetric.NewNumberDataPoint() + exemplar := pt.Exemplars().AppendEmpty() + exemplar.SetTimestamp(pcommon.Timestamp(time.Now().UnixNano())) + exemplar.SetDoubleValue(69.420) + exemplars, err := getPromExemplars(ctx, everyN, pt) + assert.NoError(t, err) + assert.Len(t, exemplars, 1) + assert.Equal(t, 69.420, exemplars[0].Value) + }) + + t.Run("Exemplars with unsupported value type", func(t *testing.T) { + pt := pmetric.NewNumberDataPoint() + exemplar := pt.Exemplars().AppendEmpty() + exemplar.SetTimestamp(pcommon.Timestamp(time.Now().UnixNano())) + _, err := getPromExemplars(ctx, everyN, pt) + assert.Error(t, err) + }) +} diff --git a/pkg/receive/otlptranslator/helpers_from_stdlib.go b/pkg/receive/otlptranslator/helpers_from_stdlib.go new file mode 100644 index 0000000000..3627c0ed6b --- /dev/null +++ b/pkg/receive/otlptranslator/helpers_from_stdlib.go @@ -0,0 +1,97 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +// Provenance-includes-location: https://github.com/golang/go/blob/f2d118fd5f7e872804a5825ce29797f81a28b0fa/src/strings/strings.go +// Provenance-includes-license: BSD-3-Clause +// Provenance-includes-copyright: Copyright The Go Authors. + +package otlptranslator + +import "strings" + +// fieldsFunc is a copy of strings.FieldsFunc from the Go standard library, +// but it also returns the separators as part of the result. +func fieldsFunc(s string, f func(rune) bool) ([]string, []string) { + // A span is used to record a slice of s of the form s[start:end]. + // The start index is inclusive and the end index is exclusive. + type span struct { + start int + end int + } + spans := make([]span, 0, 32) + separators := make([]string, 0, 32) + + // Find the field start and end indices. + // Doing this in a separate pass (rather than slicing the string s + // and collecting the result substrings right away) is significantly + // more efficient, possibly due to cache effects. + start := -1 // valid span start if >= 0 + for end, rune := range s { + if f(rune) { + if start >= 0 { + spans = append(spans, span{start, end}) + // Set start to a negative value. + // Note: using -1 here consistently and reproducibly + // slows down this code by a several percent on amd64. + start = ^start + separators = append(separators, string(s[end])) + } + } else { + if start < 0 { + start = end + } + } + } + + // Last field might end at EOF. + if start >= 0 { + spans = append(spans, span{start, len(s)}) + } + + // Create strings from recorded field indices. + a := make([]string, len(spans)) + for i, span := range spans { + a[i] = s[span.start:span.end] + } + + return a, separators +} + +// join is a copy of strings.Join from the Go standard library, +// but it also accepts a slice of separators to join the elements with. +// If the slice of separators is shorter than the slice of elements, use a default value. +// We also don't check for integer overflow. +func join(elems []string, separators []string, def string) string { + switch len(elems) { + case 0: + return "" + case 1: + return elems[0] + } + + var n int + var sep string + sepLen := len(separators) + for i, elem := range elems { + if i >= sepLen { + sep = def + } else { + sep = separators[i] + } + n += len(sep) + len(elem) + } + + var b strings.Builder + b.Grow(n) + b.WriteString(elems[0]) + for i, s := range elems[1:] { + if i >= sepLen { + sep = def + } else { + sep = separators[i] + } + b.WriteString(sep) + b.WriteString(s) + } + return b.String() +} diff --git a/pkg/receive/otlptranslator/histograms.go b/pkg/receive/otlptranslator/histograms.go new file mode 100644 index 0000000000..238ce5a48b --- /dev/null +++ b/pkg/receive/otlptranslator/histograms.go @@ -0,0 +1,219 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/histograms.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. + +package otlptranslator + +import ( + "context" + "fmt" + "math" + + "github.com/prometheus/common/model" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" + + "github.com/prometheus/prometheus/model/value" + "github.com/prometheus/prometheus/util/annotations" + "github.com/thanos-io/thanos/pkg/store/storepb/prompb" +) + +const defaultZeroThreshold = 1e-128 + +// addExponentialHistogramDataPoints adds OTel exponential histogram data points to the corresponding time series +// as native histogram samples. +func (c *PrometheusConverter) addExponentialHistogramDataPoints(ctx context.Context, dataPoints pmetric.ExponentialHistogramDataPointSlice, + resource pcommon.Resource, settings Settings, promName string) (annotations.Annotations, error) { + var annots annotations.Annotations + for x := 0; x < dataPoints.Len(); x++ { + if err := c.everyN.checkContext(ctx); err != nil { + return annots, err + } + + pt := dataPoints.At(x) + + histogram, ws, err := exponentialToNativeHistogram(pt) + annots.Merge(ws) + if err != nil { + return annots, err + } + + lbls := createAttributes( + resource, + pt.Attributes(), + settings, + nil, + true, + model.MetricNameLabel, + promName, + ) + ts, _ := c.getOrCreateTimeSeries(lbls) + ts.Histograms = append(ts.Histograms, histogram) + + exemplars, err := getPromExemplars[pmetric.ExponentialHistogramDataPoint](ctx, &c.everyN, pt) + if err != nil { + return annots, err + } + ts.Exemplars = append(ts.Exemplars, exemplars...) + } + + return annots, nil +} + +// exponentialToNativeHistogram translates an OTel Exponential Histogram data point +// to a Prometheus Native Histogram. +func exponentialToNativeHistogram(p pmetric.ExponentialHistogramDataPoint) (prompb.Histogram, annotations.Annotations, error) { + var annots annotations.Annotations + scale := p.Scale() + if scale < -4 { + return prompb.Histogram{}, annots, + fmt.Errorf("cannot convert exponential to native histogram."+ + " Scale must be >= -4, was %d", scale) + } + + var scaleDown int32 + if scale > 8 { + scaleDown = scale - 8 + scale = 8 + } + + pSpans, pDeltas := convertBucketsLayout(p.Positive(), scaleDown) + nSpans, nDeltas := convertBucketsLayout(p.Negative(), scaleDown) + + h := prompb.Histogram{ + // The counter reset detection must be compatible with Prometheus to + // safely set ResetHint to NO. This is not ensured currently. + // Sending a sample that triggers counter reset but with ResetHint==NO + // would lead to Prometheus panic as it does not double check the hint. + // Thus we're explicitly saying UNKNOWN here, which is always safe. + // TODO: using created time stamp should be accurate, but we + // need to know here if it was used for the detection. + // Ref: https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/28663#issuecomment-1810577303 + // Counter reset detection in Prometheus: https://github.com/prometheus/prometheus/blob/f997c72f294c0f18ca13fa06d51889af04135195/tsdb/chunkenc/histogram.go#L232 + ResetHint: prompb.Histogram_UNKNOWN, + Schema: scale, + + ZeroCount: &prompb.Histogram_ZeroCountInt{ZeroCountInt: p.ZeroCount()}, + // TODO use zero_threshold, if set, see + // https://github.com/open-telemetry/opentelemetry-proto/pull/441 + ZeroThreshold: defaultZeroThreshold, + + PositiveSpans: pSpans, + PositiveDeltas: pDeltas, + NegativeSpans: nSpans, + NegativeDeltas: nDeltas, + + Timestamp: convertTimeStamp(p.Timestamp()), + } + + if p.Flags().NoRecordedValue() { + h.Sum = math.Float64frombits(value.StaleNaN) + h.Count = &prompb.Histogram_CountInt{CountInt: value.StaleNaN} + } else { + if p.HasSum() { + h.Sum = p.Sum() + } + h.Count = &prompb.Histogram_CountInt{CountInt: p.Count()} + if p.Count() == 0 && h.Sum != 0 { + annots.Add(fmt.Errorf("exponential histogram data point has zero count, but non-zero sum: %f", h.Sum)) + } + } + return h, annots, nil +} + +// convertBucketsLayout translates OTel Exponential Histogram dense buckets +// representation to Prometheus Native Histogram sparse bucket representation. +// +// The translation logic is taken from the client_golang `histogram.go#makeBuckets` +// function, see `makeBuckets` https://github.com/prometheus/client_golang/blob/main/prometheus/histogram.go +// The bucket indexes conversion was adjusted, since OTel exp. histogram bucket +// index 0 corresponds to the range (1, base] while Prometheus bucket index 0 +// to the range (base 1]. +// +// scaleDown is the factor by which the buckets are scaled down. In other words 2^scaleDown buckets will be merged into one. +func convertBucketsLayout(buckets pmetric.ExponentialHistogramDataPointBuckets, scaleDown int32) ([]prompb.BucketSpan, []int64) { + bucketCounts := buckets.BucketCounts() + if bucketCounts.Len() == 0 { + return nil, nil + } + + var ( + spans []prompb.BucketSpan + deltas []int64 + count int64 + prevCount int64 + ) + + appendDelta := func(count int64) { + spans[len(spans)-1].Length++ + deltas = append(deltas, count-prevCount) + prevCount = count + } + + // Let the compiler figure out that this is const during this function by + // moving it into a local variable. + numBuckets := bucketCounts.Len() + + // The offset is scaled and adjusted by 1 as described above. + bucketIdx := buckets.Offset()>>scaleDown + 1 + spans = append(spans, prompb.BucketSpan{ + Offset: bucketIdx, + Length: 0, + }) + + for i := 0; i < numBuckets; i++ { + // The offset is scaled and adjusted by 1 as described above. + nextBucketIdx := (int32(i)+buckets.Offset())>>scaleDown + 1 + if bucketIdx == nextBucketIdx { // We have not collected enough buckets to merge yet. + count += int64(bucketCounts.At(i)) + continue + } + if count == 0 { + count = int64(bucketCounts.At(i)) + continue + } + + gap := nextBucketIdx - bucketIdx - 1 + if gap > 2 { + // We have to create a new span, because we have found a gap + // of more than two buckets. The constant 2 is copied from the logic in + // https://github.com/prometheus/client_golang/blob/27f0506d6ebbb117b6b697d0552ee5be2502c5f2/prometheus/histogram.go#L1296 + spans = append(spans, prompb.BucketSpan{ + Offset: gap, + Length: 0, + }) + } else { + // We have found a small gap (or no gap at all). + // Insert empty buckets as needed. + for j := int32(0); j < gap; j++ { + appendDelta(0) + } + } + appendDelta(count) + count = int64(bucketCounts.At(i)) + bucketIdx = nextBucketIdx + } + // Need to use the last item's index. The offset is scaled and adjusted by 1 as described above. + gap := (int32(numBuckets)+buckets.Offset()-1)>>scaleDown + 1 - bucketIdx + if gap > 2 { + // We have to create a new span, because we have found a gap + // of more than two buckets. The constant 2 is copied from the logic in + // https://github.com/prometheus/client_golang/blob/27f0506d6ebbb117b6b697d0552ee5be2502c5f2/prometheus/histogram.go#L1296 + spans = append(spans, prompb.BucketSpan{ + Offset: gap, + Length: 0, + }) + } else { + // We have found a small gap (or no gap at all). + // Insert empty buckets as needed. + for j := int32(0); j < gap; j++ { + appendDelta(0) + } + } + appendDelta(count) + + return spans, deltas +} diff --git a/pkg/receive/otlptranslator/histograms_test.go b/pkg/receive/otlptranslator/histograms_test.go new file mode 100644 index 0000000000..1bf8376305 --- /dev/null +++ b/pkg/receive/otlptranslator/histograms_test.go @@ -0,0 +1,764 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/247a9f996e09a83cdc25addf70c05e42b8b30186/pkg/translator/prometheusremotewrite/histograms_test.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. + +package otlptranslator + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/prometheus/common/model" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" + + "github.com/thanos-io/thanos/pkg/store/labelpb" + "github.com/thanos-io/thanos/pkg/store/storepb/prompb" +) + +type expectedBucketLayout struct { + wantSpans []prompb.BucketSpan + wantDeltas []int64 +} + +func TestConvertBucketsLayout(t *testing.T) { + tests := []struct { + name string + buckets func() pmetric.ExponentialHistogramDataPointBuckets + wantLayout map[int32]expectedBucketLayout + }{ + { + name: "zero offset", + buckets: func() pmetric.ExponentialHistogramDataPointBuckets { + b := pmetric.NewExponentialHistogramDataPointBuckets() + b.SetOffset(0) + b.BucketCounts().FromRaw([]uint64{4, 3, 2, 1}) + return b + }, + wantLayout: map[int32]expectedBucketLayout{ + 0: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 1, + Length: 4, + }, + }, + wantDeltas: []int64{4, -1, -1, -1}, + }, + 1: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 1, + Length: 2, + }, + }, + // 4+3, 2+1 = 7, 3 =delta= 7, -4 + wantDeltas: []int64{7, -4}, + }, + 2: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 1, + Length: 1, + }, + }, + // 4+3+2+1 = 10 =delta= 10 + wantDeltas: []int64{10}, + }, + }, + }, + { + name: "offset 1", + buckets: func() pmetric.ExponentialHistogramDataPointBuckets { + b := pmetric.NewExponentialHistogramDataPointBuckets() + b.SetOffset(1) + b.BucketCounts().FromRaw([]uint64{4, 3, 2, 1}) + return b + }, + wantLayout: map[int32]expectedBucketLayout{ + 0: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 2, + Length: 4, + }, + }, + wantDeltas: []int64{4, -1, -1, -1}, + }, + 1: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 1, + Length: 3, + }, + }, + wantDeltas: []int64{4, 1, -4}, // 0+4, 3+2, 1+0 = 4, 5, 1 + }, + 2: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 1, + Length: 2, + }, + }, + wantDeltas: []int64{9, -8}, // 0+4+3+2, 1+0+0+0 = 9, 1 + }, + }, + }, + { + name: "positive offset", + buckets: func() pmetric.ExponentialHistogramDataPointBuckets { + b := pmetric.NewExponentialHistogramDataPointBuckets() + b.SetOffset(4) + b.BucketCounts().FromRaw([]uint64{4, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}) + return b + }, + wantLayout: map[int32]expectedBucketLayout{ + 0: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 5, + Length: 4, + }, + { + Offset: 12, + Length: 1, + }, + }, + wantDeltas: []int64{4, -2, -2, 2, -1}, + }, + 1: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 3, + Length: 2, + }, + { + Offset: 6, + Length: 1, + }, + }, + // Downscale: + // 4+2, 0+2, 0+0, 0+0, 0+0, 0+0, 0+0, 0+0, 1+0 = 6, 2, 0, 0, 0, 0, 0, 0, 1 + wantDeltas: []int64{6, -4, -1}, + }, + 2: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 2, + Length: 1, + }, + { + Offset: 3, + Length: 1, + }, + }, + // Downscale: + // 4+2+0+2, 0+0+0+0, 0+0+0+0, 0+0+0+0, 1+0+0+0 = 8, 0, 0, 0, 1 + // Check from scaling from previous: 6+2, 0+0, 0+0, 0+0, 1+0 = 8, 0, 0, 0, 1 + wantDeltas: []int64{8, -7}, + }, + }, + }, + { + name: "scaledown merges spans", + buckets: func() pmetric.ExponentialHistogramDataPointBuckets { + b := pmetric.NewExponentialHistogramDataPointBuckets() + b.SetOffset(4) + b.BucketCounts().FromRaw([]uint64{4, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1}) + return b + }, + wantLayout: map[int32]expectedBucketLayout{ + 0: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 5, + Length: 4, + }, + { + Offset: 8, + Length: 1, + }, + }, + wantDeltas: []int64{4, -2, -2, 2, -1}, + }, + 1: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 3, + Length: 2, + }, + { + Offset: 4, + Length: 1, + }, + }, + // Downscale: + // 4+2, 0+2, 0+0, 0+0, 0+0, 0+0, 1+0 = 6, 2, 0, 0, 0, 0, 1 + wantDeltas: []int64{6, -4, -1}, + }, + 2: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 2, + Length: 4, + }, + }, + // Downscale: + // 4+2+0+2, 0+0+0+0, 0+0+0+0, 1+0+0+0 = 8, 0, 0, 1 + // Check from scaling from previous: 6+2, 0+0, 0+0, 1+0 = 8, 0, 0, 1 + wantDeltas: []int64{8, -8, 0, 1}, + }, + }, + }, + { + name: "negative offset", + buckets: func() pmetric.ExponentialHistogramDataPointBuckets { + b := pmetric.NewExponentialHistogramDataPointBuckets() + b.SetOffset(-2) + b.BucketCounts().FromRaw([]uint64{3, 1, 0, 0, 0, 1}) + return b + }, + wantLayout: map[int32]expectedBucketLayout{ + 0: { + wantSpans: []prompb.BucketSpan{ + { + Offset: -1, + Length: 2, + }, + { + Offset: 3, + Length: 1, + }, + }, + wantDeltas: []int64{3, -2, 0}, + }, + 1: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 0, + Length: 3, + }, + }, + // Downscale: + // 3+1, 0+0, 0+1 = 4, 0, 1 + wantDeltas: []int64{4, -4, 1}, + }, + 2: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 0, + Length: 2, + }, + }, + // Downscale: + // 0+0+3+1, 0+0+0+0 = 4, 1 + wantDeltas: []int64{4, -3}, + }, + }, + }, + { + name: "buckets with gaps of size 1", + buckets: func() pmetric.ExponentialHistogramDataPointBuckets { + b := pmetric.NewExponentialHistogramDataPointBuckets() + b.SetOffset(-2) + b.BucketCounts().FromRaw([]uint64{3, 1, 0, 1, 0, 1}) + return b + }, + wantLayout: map[int32]expectedBucketLayout{ + 0: { + wantSpans: []prompb.BucketSpan{ + { + Offset: -1, + Length: 6, + }, + }, + wantDeltas: []int64{3, -2, -1, 1, -1, 1}, + }, + 1: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 0, + Length: 3, + }, + }, + // Downscale: + // 3+1, 0+1, 0+1 = 4, 1, 1 + wantDeltas: []int64{4, -3, 0}, + }, + 2: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 0, + Length: 2, + }, + }, + // Downscale: + // 0+0+3+1, 0+1+0+1 = 4, 2 + wantDeltas: []int64{4, -2}, + }, + }, + }, + { + name: "buckets with gaps of size 2", + buckets: func() pmetric.ExponentialHistogramDataPointBuckets { + b := pmetric.NewExponentialHistogramDataPointBuckets() + b.SetOffset(-2) + b.BucketCounts().FromRaw([]uint64{3, 0, 0, 1, 0, 0, 1}) + return b + }, + wantLayout: map[int32]expectedBucketLayout{ + 0: { + wantSpans: []prompb.BucketSpan{ + { + Offset: -1, + Length: 7, + }, + }, + wantDeltas: []int64{3, -3, 0, 1, -1, 0, 1}, + }, + 1: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 0, + Length: 4, + }, + }, + // Downscale: + // 3+0, 0+1, 0+0, 0+1 = 3, 1, 0, 1 + wantDeltas: []int64{3, -2, -1, 1}, + }, + 2: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 0, + Length: 3, + }, + }, + // Downscale: + // 0+0+3+0, 0+1+0+0, 1+0+0+0 = 3, 1, 1 + wantDeltas: []int64{3, -2, 0}, + }, + }, + }, + { + name: "zero buckets", + buckets: pmetric.NewExponentialHistogramDataPointBuckets, + wantLayout: map[int32]expectedBucketLayout{ + 0: { + wantSpans: nil, + wantDeltas: nil, + }, + 1: { + wantSpans: nil, + wantDeltas: nil, + }, + 2: { + wantSpans: nil, + wantDeltas: nil, + }, + }, + }, + } + for _, tt := range tests { + for scaleDown, wantLayout := range tt.wantLayout { + t.Run(fmt.Sprintf("%s-scaleby-%d", tt.name, scaleDown), func(t *testing.T) { + gotSpans, gotDeltas := convertBucketsLayout(tt.buckets(), scaleDown) + assert.Equal(t, wantLayout.wantSpans, gotSpans) + assert.Equal(t, wantLayout.wantDeltas, gotDeltas) + }) + } + } +} + +func BenchmarkConvertBucketLayout(b *testing.B) { + scenarios := []struct { + gap int + }{ + {gap: 0}, + {gap: 1}, + {gap: 2}, + {gap: 3}, + } + + for _, scenario := range scenarios { + buckets := pmetric.NewExponentialHistogramDataPointBuckets() + buckets.SetOffset(0) + for i := 0; i < 1000; i++ { + if i%(scenario.gap+1) == 0 { + buckets.BucketCounts().Append(10) + } else { + buckets.BucketCounts().Append(0) + } + } + b.Run(fmt.Sprintf("gap %d", scenario.gap), func(b *testing.B) { + for i := 0; i < b.N; i++ { + convertBucketsLayout(buckets, 0) + } + }) + } +} + +func TestExponentialToNativeHistogram(t *testing.T) { + tests := []struct { + name string + exponentialHist func() pmetric.ExponentialHistogramDataPoint + wantNativeHist func() prompb.Histogram + wantErrMessage string + }{ + { + name: "convert exp. to native histogram", + exponentialHist: func() pmetric.ExponentialHistogramDataPoint { + pt := pmetric.NewExponentialHistogramDataPoint() + pt.SetStartTimestamp(pcommon.NewTimestampFromTime(time.UnixMilli(100))) + pt.SetTimestamp(pcommon.NewTimestampFromTime(time.UnixMilli(500))) + pt.SetCount(4) + pt.SetSum(10.1) + pt.SetScale(1) + pt.SetZeroCount(1) + + pt.Positive().BucketCounts().FromRaw([]uint64{1, 1}) + pt.Positive().SetOffset(1) + + pt.Negative().BucketCounts().FromRaw([]uint64{1, 1}) + pt.Negative().SetOffset(1) + + return pt + }, + wantNativeHist: func() prompb.Histogram { + return prompb.Histogram{ + Count: &prompb.Histogram_CountInt{CountInt: 4}, + Sum: 10.1, + Schema: 1, + ZeroThreshold: defaultZeroThreshold, + ZeroCount: &prompb.Histogram_ZeroCountInt{ZeroCountInt: 1}, + NegativeSpans: []prompb.BucketSpan{{Offset: 2, Length: 2}}, + NegativeDeltas: []int64{1, 0}, + PositiveSpans: []prompb.BucketSpan{{Offset: 2, Length: 2}}, + PositiveDeltas: []int64{1, 0}, + Timestamp: 500, + } + }, + }, + { + name: "convert exp. to native histogram with no sum", + exponentialHist: func() pmetric.ExponentialHistogramDataPoint { + pt := pmetric.NewExponentialHistogramDataPoint() + pt.SetStartTimestamp(pcommon.NewTimestampFromTime(time.UnixMilli(100))) + pt.SetTimestamp(pcommon.NewTimestampFromTime(time.UnixMilli(500))) + + pt.SetCount(4) + pt.SetScale(1) + pt.SetZeroCount(1) + + pt.Positive().BucketCounts().FromRaw([]uint64{1, 1}) + pt.Positive().SetOffset(1) + + pt.Negative().BucketCounts().FromRaw([]uint64{1, 1}) + pt.Negative().SetOffset(1) + + return pt + }, + wantNativeHist: func() prompb.Histogram { + return prompb.Histogram{ + Count: &prompb.Histogram_CountInt{CountInt: 4}, + Schema: 1, + ZeroThreshold: defaultZeroThreshold, + ZeroCount: &prompb.Histogram_ZeroCountInt{ZeroCountInt: 1}, + NegativeSpans: []prompb.BucketSpan{{Offset: 2, Length: 2}}, + NegativeDeltas: []int64{1, 0}, + PositiveSpans: []prompb.BucketSpan{{Offset: 2, Length: 2}}, + PositiveDeltas: []int64{1, 0}, + Timestamp: 500, + } + }, + }, + { + name: "invalid negative scale", + exponentialHist: func() pmetric.ExponentialHistogramDataPoint { + pt := pmetric.NewExponentialHistogramDataPoint() + pt.SetScale(-10) + return pt + }, + wantErrMessage: "cannot convert exponential to native histogram." + + " Scale must be >= -4, was -10", + }, + { + name: "no downscaling at scale 8", + exponentialHist: func() pmetric.ExponentialHistogramDataPoint { + pt := pmetric.NewExponentialHistogramDataPoint() + pt.SetTimestamp(pcommon.NewTimestampFromTime(time.UnixMilli(500))) + pt.SetCount(6) + pt.SetSum(10.1) + pt.SetScale(8) + pt.SetZeroCount(1) + + pt.Positive().BucketCounts().FromRaw([]uint64{1, 1, 1}) + pt.Positive().SetOffset(1) + + pt.Negative().BucketCounts().FromRaw([]uint64{1, 1, 1}) + pt.Negative().SetOffset(2) + return pt + }, + wantNativeHist: func() prompb.Histogram { + return prompb.Histogram{ + Count: &prompb.Histogram_CountInt{CountInt: 6}, + Sum: 10.1, + Schema: 8, + ZeroThreshold: defaultZeroThreshold, + ZeroCount: &prompb.Histogram_ZeroCountInt{ZeroCountInt: 1}, + PositiveSpans: []prompb.BucketSpan{{Offset: 2, Length: 3}}, + PositiveDeltas: []int64{1, 0, 0}, // 1, 1, 1 + NegativeSpans: []prompb.BucketSpan{{Offset: 3, Length: 3}}, + NegativeDeltas: []int64{1, 0, 0}, // 1, 1, 1 + Timestamp: 500, + } + }, + }, + { + name: "downsample if scale is more than 8", + exponentialHist: func() pmetric.ExponentialHistogramDataPoint { + pt := pmetric.NewExponentialHistogramDataPoint() + pt.SetTimestamp(pcommon.NewTimestampFromTime(time.UnixMilli(500))) + pt.SetCount(6) + pt.SetSum(10.1) + pt.SetScale(9) + pt.SetZeroCount(1) + + pt.Positive().BucketCounts().FromRaw([]uint64{1, 1, 1}) + pt.Positive().SetOffset(1) + + pt.Negative().BucketCounts().FromRaw([]uint64{1, 1, 1}) + pt.Negative().SetOffset(2) + return pt + }, + wantNativeHist: func() prompb.Histogram { + return prompb.Histogram{ + Count: &prompb.Histogram_CountInt{CountInt: 6}, + Sum: 10.1, + Schema: 8, + ZeroThreshold: defaultZeroThreshold, + ZeroCount: &prompb.Histogram_ZeroCountInt{ZeroCountInt: 1}, + PositiveSpans: []prompb.BucketSpan{{Offset: 1, Length: 2}}, + PositiveDeltas: []int64{1, 1}, // 0+1, 1+1 = 1, 2 + NegativeSpans: []prompb.BucketSpan{{Offset: 2, Length: 2}}, + NegativeDeltas: []int64{2, -1}, // 1+1, 1+0 = 2, 1 + Timestamp: 500, + } + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + validateExponentialHistogramCount(t, tt.exponentialHist()) // Sanity check. + got, annots, err := exponentialToNativeHistogram(tt.exponentialHist()) + if tt.wantErrMessage != "" { + assert.ErrorContains(t, err, tt.wantErrMessage) + return + } + + require.NoError(t, err) + require.Empty(t, annots) + assert.Equal(t, tt.wantNativeHist(), got) + validateNativeHistogramCount(t, got) + }) + } +} + +func validateExponentialHistogramCount(t *testing.T, h pmetric.ExponentialHistogramDataPoint) { + actualCount := uint64(0) + for _, bucket := range h.Positive().BucketCounts().AsRaw() { + actualCount += bucket + } + for _, bucket := range h.Negative().BucketCounts().AsRaw() { + actualCount += bucket + } + require.Equal(t, h.Count(), actualCount, "exponential histogram count mismatch") +} + +func validateNativeHistogramCount(t *testing.T, h prompb.Histogram) { + require.NotNil(t, h.Count) + require.IsType(t, &prompb.Histogram_CountInt{}, h.Count) + want := h.Count.(*prompb.Histogram_CountInt).CountInt + var ( + actualCount uint64 + prevBucket int64 + ) + for _, delta := range h.PositiveDeltas { + prevBucket += delta + actualCount += uint64(prevBucket) + } + prevBucket = 0 + for _, delta := range h.NegativeDeltas { + prevBucket += delta + actualCount += uint64(prevBucket) + } + assert.Equal(t, want, actualCount, "native histogram count mismatch") +} + +func TestPrometheusConverter_addExponentialHistogramDataPoints(t *testing.T) { + tests := []struct { + name string + metric func() pmetric.Metric + wantSeries func() map[uint64]*prompb.TimeSeries + }{ + { + name: "histogram data points with same labels", + metric: func() pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName("test_hist") + metric.SetEmptyExponentialHistogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + + pt := metric.ExponentialHistogram().DataPoints().AppendEmpty() + pt.SetCount(7) + pt.SetScale(1) + pt.Positive().SetOffset(-1) + pt.Positive().BucketCounts().FromRaw([]uint64{4, 2}) + pt.Exemplars().AppendEmpty().SetDoubleValue(1) + pt.Attributes().PutStr("attr", "test_attr") + + pt = metric.ExponentialHistogram().DataPoints().AppendEmpty() + pt.SetCount(4) + pt.SetScale(1) + pt.Positive().SetOffset(-1) + pt.Positive().BucketCounts().FromRaw([]uint64{4, 2, 1}) + pt.Exemplars().AppendEmpty().SetDoubleValue(2) + pt.Attributes().PutStr("attr", "test_attr") + + return metric + }, + wantSeries: func() map[uint64]*prompb.TimeSeries { + labels := []labelpb.ZLabel{ + {Name: model.MetricNameLabel, Value: "test_hist"}, + {Name: "attr", Value: "test_attr"}, + } + return map[uint64]*prompb.TimeSeries{ + timeSeriesSignature(labels): { + Labels: labels, + Histograms: []prompb.Histogram{ + { + Count: &prompb.Histogram_CountInt{CountInt: 7}, + Schema: 1, + ZeroThreshold: defaultZeroThreshold, + ZeroCount: &prompb.Histogram_ZeroCountInt{ZeroCountInt: 0}, + PositiveSpans: []prompb.BucketSpan{{Offset: 0, Length: 2}}, + PositiveDeltas: []int64{4, -2}, + }, + { + Count: &prompb.Histogram_CountInt{CountInt: 4}, + Schema: 1, + ZeroThreshold: defaultZeroThreshold, + ZeroCount: &prompb.Histogram_ZeroCountInt{ZeroCountInt: 0}, + PositiveSpans: []prompb.BucketSpan{{Offset: 0, Length: 3}}, + PositiveDeltas: []int64{4, -2, -1}, + }, + }, + Exemplars: []prompb.Exemplar{ + {Value: 1}, + {Value: 2}, + }, + }, + } + }, + }, + { + name: "histogram data points with different labels", + metric: func() pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName("test_hist") + metric.SetEmptyExponentialHistogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + + pt := metric.ExponentialHistogram().DataPoints().AppendEmpty() + pt.SetCount(7) + pt.SetScale(1) + pt.Positive().SetOffset(-1) + pt.Positive().BucketCounts().FromRaw([]uint64{4, 2}) + pt.Exemplars().AppendEmpty().SetDoubleValue(1) + pt.Attributes().PutStr("attr", "test_attr") + + pt = metric.ExponentialHistogram().DataPoints().AppendEmpty() + pt.SetCount(4) + pt.SetScale(1) + pt.Negative().SetOffset(-1) + pt.Negative().BucketCounts().FromRaw([]uint64{4, 2, 1}) + pt.Exemplars().AppendEmpty().SetDoubleValue(2) + pt.Attributes().PutStr("attr", "test_attr_two") + + return metric + }, + wantSeries: func() map[uint64]*prompb.TimeSeries { + labels := []labelpb.ZLabel{ + {Name: model.MetricNameLabel, Value: "test_hist"}, + {Name: "attr", Value: "test_attr"}, + } + labelsAnother := []labelpb.ZLabel{ + {Name: model.MetricNameLabel, Value: "test_hist"}, + {Name: "attr", Value: "test_attr_two"}, + } + + return map[uint64]*prompb.TimeSeries{ + timeSeriesSignature(labels): { + Labels: labels, + Histograms: []prompb.Histogram{ + { + Count: &prompb.Histogram_CountInt{CountInt: 7}, + Schema: 1, + ZeroThreshold: defaultZeroThreshold, + ZeroCount: &prompb.Histogram_ZeroCountInt{ZeroCountInt: 0}, + PositiveSpans: []prompb.BucketSpan{{Offset: 0, Length: 2}}, + PositiveDeltas: []int64{4, -2}, + }, + }, + Exemplars: []prompb.Exemplar{ + {Value: 1}, + }, + }, + timeSeriesSignature(labelsAnother): { + Labels: labelsAnother, + Histograms: []prompb.Histogram{ + { + Count: &prompb.Histogram_CountInt{CountInt: 4}, + Schema: 1, + ZeroThreshold: defaultZeroThreshold, + ZeroCount: &prompb.Histogram_ZeroCountInt{ZeroCountInt: 0}, + NegativeSpans: []prompb.BucketSpan{{Offset: 0, Length: 3}}, + NegativeDeltas: []int64{4, -2, -1}, + }, + }, + Exemplars: []prompb.Exemplar{ + {Value: 2}, + }, + }, + } + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + metric := tt.metric() + + converter := NewPrometheusConverter() + annots, err := converter.addExponentialHistogramDataPoints( + context.Background(), + metric.ExponentialHistogram().DataPoints(), + pcommon.NewResource(), + Settings{ + ExportCreatedMetric: true, + }, + BuildCompliantName(metric, "", true, true), + ) + require.NoError(t, err) + require.Empty(t, annots) + + assert.Equal(t, tt.wantSeries(), converter.unique) + assert.Empty(t, converter.conflicts) + }) + } +} diff --git a/pkg/receive/otlptranslator/metrics_to_prw.go b/pkg/receive/otlptranslator/metrics_to_prw.go new file mode 100644 index 0000000000..19ca05ff6c --- /dev/null +++ b/pkg/receive/otlptranslator/metrics_to_prw.go @@ -0,0 +1,236 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/helper.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. + +package otlptranslator + +import ( + "context" + "errors" + "fmt" + "sort" + + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" + + "github.com/prometheus/prometheus/util/annotations" + "github.com/thanos-io/thanos/pkg/errutil" + "github.com/thanos-io/thanos/pkg/store/labelpb" + "github.com/thanos-io/thanos/pkg/store/storepb/prompb" +) + +type Settings struct { + Namespace string + ExternalLabels map[string]string + DisableTargetInfo bool + ExportCreatedMetric bool + AddMetricSuffixes bool + AllowUTF8 bool + PromoteResourceAttributes []string + KeepIdentifyingResourceAttributes bool +} + +// PrometheusConverter converts from OTel write format to Prometheus remote write format. +type PrometheusConverter struct { + unique map[uint64]*prompb.TimeSeries + conflicts map[uint64][]*prompb.TimeSeries + everyN everyNTimes + metadata []prompb.MetricMetadata +} + +func NewPrometheusConverter() *PrometheusConverter { + return &PrometheusConverter{ + unique: map[uint64]*prompb.TimeSeries{}, + conflicts: map[uint64][]*prompb.TimeSeries{}, + } +} + +// FromMetrics converts pmetric.Metrics to Prometheus remote write format. +func (c *PrometheusConverter) FromMetrics(ctx context.Context, md pmetric.Metrics, settings Settings) (annots annotations.Annotations, errs errutil.MultiError) { + c.everyN = everyNTimes{n: 128} + resourceMetricsSlice := md.ResourceMetrics() + + numMetrics := 0 + for i := 0; i < resourceMetricsSlice.Len(); i++ { + scopeMetricsSlice := resourceMetricsSlice.At(i).ScopeMetrics() + for j := 0; j < scopeMetricsSlice.Len(); j++ { + numMetrics += scopeMetricsSlice.At(j).Metrics().Len() + } + } + c.metadata = make([]prompb.MetricMetadata, 0, numMetrics) + + for i := 0; i < resourceMetricsSlice.Len(); i++ { + resourceMetrics := resourceMetricsSlice.At(i) + resource := resourceMetrics.Resource() + scopeMetricsSlice := resourceMetrics.ScopeMetrics() + // keep track of the most recent timestamp in the ResourceMetrics for + // use with the "target" info metric + var mostRecentTimestamp pcommon.Timestamp + for j := 0; j < scopeMetricsSlice.Len(); j++ { + metricSlice := scopeMetricsSlice.At(j).Metrics() + + // TODO: decide if instrumentation library information should be exported as labels + for k := 0; k < metricSlice.Len(); k++ { + if err := c.everyN.checkContext(ctx); err != nil { + errs.Add(err) + return + } + + metric := metricSlice.At(k) + mostRecentTimestamp = max(mostRecentTimestamp, mostRecentTimestampInMetric(metric)) + + if !isValidAggregationTemporality(metric) { + errs.Add(fmt.Errorf("invalid temporality for metric %q", metric.Name())) + continue + } + + promName := BuildCompliantName(metric, settings.Namespace, settings.AddMetricSuffixes, settings.AllowUTF8) + c.metadata = append(c.metadata, prompb.MetricMetadata{ + Type: otelMetricTypeToPromMetricType(metric), + MetricFamilyName: promName, + Help: metric.Description(), + Unit: metric.Unit(), + }) + + // handle individual metrics based on type + //exhaustive:enforce + switch metric.Type() { + case pmetric.MetricTypeGauge: + dataPoints := metric.Gauge().DataPoints() + if dataPoints.Len() == 0 { + errs.Add(fmt.Errorf("empty data points. %s is dropped", metric.Name())) + break + } + if err := c.addGaugeNumberDataPoints(ctx, dataPoints, resource, settings, promName); err != nil { + errs.Add(err) + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return + } + } + case pmetric.MetricTypeSum: + dataPoints := metric.Sum().DataPoints() + if dataPoints.Len() == 0 { + errs.Add(fmt.Errorf("empty data points. %s is dropped", metric.Name())) + break + } + if err := c.addSumNumberDataPoints(ctx, dataPoints, resource, metric, settings, promName); err != nil { + errs.Add(err) + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return + } + } + case pmetric.MetricTypeHistogram: + dataPoints := metric.Histogram().DataPoints() + if dataPoints.Len() == 0 { + errs.Add(fmt.Errorf("empty data points. %s is dropped", metric.Name())) + break + } + if err := c.addHistogramDataPoints(ctx, dataPoints, resource, settings, promName); err != nil { + errs.Add(err) + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return + } + } + case pmetric.MetricTypeExponentialHistogram: + dataPoints := metric.ExponentialHistogram().DataPoints() + if dataPoints.Len() == 0 { + errs.Add(fmt.Errorf("empty data points. %s is dropped", metric.Name())) + break + } + ws, err := c.addExponentialHistogramDataPoints( + ctx, + dataPoints, + resource, + settings, + promName, + ) + annots.Merge(ws) + if err != nil { + errs.Add(err) + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return + } + } + case pmetric.MetricTypeSummary: + dataPoints := metric.Summary().DataPoints() + if dataPoints.Len() == 0 { + errs.Add(fmt.Errorf("empty data points. %s is dropped", metric.Name())) + break + } + if err := c.addSummaryDataPoints(ctx, dataPoints, resource, settings, promName); err != nil { + errs.Add(err) + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return + } + } + default: + errs.Add(errors.New("unsupported metric type")) + } + } + } + addResourceTargetInfo(resource, settings, mostRecentTimestamp, c) + } + + return annots, errs +} + +func isSameMetric(ts *prompb.TimeSeries, lbls []labelpb.ZLabel) bool { + if len(ts.Labels) != len(lbls) { + return false + } + for i, l := range ts.Labels { + if l.Name != ts.Labels[i].Name || l.Value != ts.Labels[i].Value { + return false + } + } + return true +} + +// addExemplars adds exemplars for the dataPoint. For each exemplar, if it can find a bucket bound corresponding to its value, +// the exemplar is added to the bucket bound's time series, provided that the time series' has samples. +func (c *PrometheusConverter) addExemplars(ctx context.Context, dataPoint pmetric.HistogramDataPoint, bucketBounds []bucketBoundsData) error { + if len(bucketBounds) == 0 { + return nil + } + + exemplars, err := getPromExemplars(ctx, &c.everyN, dataPoint) + if err != nil { + return err + } + if len(exemplars) == 0 { + return nil + } + + sort.Sort(byBucketBoundsData(bucketBounds)) + for _, exemplar := range exemplars { + for _, bound := range bucketBounds { + if err := c.everyN.checkContext(ctx); err != nil { + return err + } + if len(bound.ts.Samples) > 0 && exemplar.Value <= bound.bound { + bound.ts.Exemplars = append(bound.ts.Exemplars, exemplar) + break + } + } + } + + return nil +} + +// addSample finds a TimeSeries that corresponds to lbls, and adds sample to it. +// If there is no corresponding TimeSeries already, it's created. +// The corresponding TimeSeries is returned. +// If either lbls is nil/empty or sample is nil, nothing is done. +func (c *PrometheusConverter) addSample(sample *prompb.Sample, lbls []labelpb.ZLabel) *prompb.TimeSeries { + if sample == nil || len(lbls) == 0 { + // This shouldn't happen + return nil + } + + ts, _ := c.getOrCreateTimeSeries(lbls) + ts.Samples = append(ts.Samples, *sample) + return ts +} diff --git a/pkg/receive/otlptranslator/metrics_to_prw_test.go b/pkg/receive/otlptranslator/metrics_to_prw_test.go new file mode 100644 index 0000000000..e604332d43 --- /dev/null +++ b/pkg/receive/otlptranslator/metrics_to_prw_test.go @@ -0,0 +1,279 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/metrics_to_prw_test.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. + +package otlptranslator + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" + "go.opentelemetry.io/collector/pdata/pmetric/pmetricotlp" + + "github.com/prometheus/prometheus/model/labels" + "github.com/thanos-io/thanos/pkg/store/labelpb" + "github.com/thanos-io/thanos/pkg/store/storepb/prompb" +) + +func labelProtosToLabels(b *labels.ScratchBuilder, labelPairs []labelpb.ZLabel) labels.Labels { + b.Reset() + for _, l := range labelPairs { + b.Add(l.Name, l.Value) + } + b.Sort() + return b.Labels() +} + +func TestFromMetrics(t *testing.T) { + for _, keepIdentifyingResourceAttributes := range []bool{false, true} { + t.Run(fmt.Sprintf("successful/keepIdentifyingAttributes=%v", keepIdentifyingResourceAttributes), func(t *testing.T) { + converter := NewPrometheusConverter() + payload := createExportRequest(5, 128, 128, 2, 0) + var expMetadata []prompb.MetricMetadata + resourceMetricsSlice := payload.Metrics().ResourceMetrics() + for i := 0; i < resourceMetricsSlice.Len(); i++ { + scopeMetricsSlice := resourceMetricsSlice.At(i).ScopeMetrics() + for j := 0; j < scopeMetricsSlice.Len(); j++ { + metricSlice := scopeMetricsSlice.At(j).Metrics() + for k := 0; k < metricSlice.Len(); k++ { + metric := metricSlice.At(k) + promName := BuildCompliantName(metric, "", false, false) + expMetadata = append(expMetadata, prompb.MetricMetadata{ + Type: otelMetricTypeToPromMetricType(metric), + MetricFamilyName: promName, + Help: metric.Description(), + Unit: metric.Unit(), + }) + } + } + } + + annots, err := converter.FromMetrics( + context.Background(), + payload.Metrics(), + Settings{KeepIdentifyingResourceAttributes: keepIdentifyingResourceAttributes}, + ) + require.NoError(t, err.Err()) + require.Empty(t, annots) + + if diff := cmp.Diff(expMetadata, converter.Metadata()); diff != "" { + t.Errorf("mismatch (-want +got):\n%s", diff) + } + + ts := converter.TimeSeries() + require.Len(t, ts, 1408+1) // +1 for the target_info. + + target_info_count := 0 + for _, s := range ts { + b := labels.NewScratchBuilder(2) + lbls := labelProtosToLabels(&b, s.Labels) + if lbls.Get(labels.MetricName) == "target_info" { + target_info_count++ + require.Equal(t, "test-namespace/test-service", lbls.Get("job")) + require.Equal(t, "id1234", lbls.Get("instance")) + if keepIdentifyingResourceAttributes { + require.Equal(t, "test-service", lbls.Get("service_name")) + require.Equal(t, "test-namespace", lbls.Get("service_namespace")) + require.Equal(t, "id1234", lbls.Get("service_instance_id")) + } else { + require.False(t, lbls.Has("service_name")) + require.False(t, lbls.Has("service_namespace")) + require.False(t, lbls.Has("service_instance_id")) + } + } + } + require.Equal(t, 1, target_info_count) + }) + } + + t.Run("context cancellation", func(t *testing.T) { + converter := NewPrometheusConverter() + ctx, cancel := context.WithCancel(context.Background()) + // Verify that converter.FromMetrics respects cancellation. + cancel() + payload := createExportRequest(5, 128, 128, 2, 0) + + annots, err := converter.FromMetrics(ctx, payload.Metrics(), Settings{}) + require.ErrorIs(t, err.Err(), context.Canceled) + require.Empty(t, annots) + }) + + t.Run("context timeout", func(t *testing.T) { + converter := NewPrometheusConverter() + // Verify that converter.FromMetrics respects timeout. + ctx, cancel := context.WithTimeout(context.Background(), 0) + t.Cleanup(cancel) + payload := createExportRequest(5, 128, 128, 2, 0) + + annots, err := converter.FromMetrics(ctx, payload.Metrics(), Settings{}) + require.ErrorIs(t, err.Err(), context.DeadlineExceeded) + require.Empty(t, annots) + }) + + t.Run("exponential histogram warnings for zero count and non-zero sum", func(t *testing.T) { + request := pmetricotlp.NewExportRequest() + rm := request.Metrics().ResourceMetrics().AppendEmpty() + generateAttributes(rm.Resource().Attributes(), "resource", 10) + + metrics := rm.ScopeMetrics().AppendEmpty().Metrics() + ts := pcommon.NewTimestampFromTime(time.Now()) + + for i := 1; i <= 10; i++ { + m := metrics.AppendEmpty() + m.SetEmptyExponentialHistogram() + m.SetName(fmt.Sprintf("histogram-%d", i)) + m.ExponentialHistogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + h := m.ExponentialHistogram().DataPoints().AppendEmpty() + h.SetTimestamp(ts) + + h.SetCount(0) + h.SetSum(155) + + generateAttributes(h.Attributes(), "series", 10) + } + + converter := NewPrometheusConverter() + annots, err := converter.FromMetrics(context.Background(), request.Metrics(), Settings{}) + require.NoError(t, err.Err()) + require.NotEmpty(t, annots) + ws, infos := annots.AsStrings("", 0, 0) + require.Empty(t, infos) + require.Equal(t, []string{ + "exponential histogram data point has zero count, but non-zero sum: 155.000000", + }, ws) + }) +} + +func BenchmarkPrometheusConverter_FromMetrics(b *testing.B) { + for _, resourceAttributeCount := range []int{0, 5, 50} { + b.Run(fmt.Sprintf("resource attribute count: %v", resourceAttributeCount), func(b *testing.B) { + for _, histogramCount := range []int{0, 1000} { + b.Run(fmt.Sprintf("histogram count: %v", histogramCount), func(b *testing.B) { + nonHistogramCounts := []int{0, 1000} + + if resourceAttributeCount == 0 && histogramCount == 0 { + // Don't bother running a scenario where we'll generate no series. + nonHistogramCounts = []int{1000} + } + + for _, nonHistogramCount := range nonHistogramCounts { + b.Run(fmt.Sprintf("non-histogram count: %v", nonHistogramCount), func(b *testing.B) { + for _, labelsPerMetric := range []int{2, 20} { + b.Run(fmt.Sprintf("labels per metric: %v", labelsPerMetric), func(b *testing.B) { + for _, exemplarsPerSeries := range []int{0, 5, 10} { + b.Run(fmt.Sprintf("exemplars per series: %v", exemplarsPerSeries), func(b *testing.B) { + payload := createExportRequest(resourceAttributeCount, histogramCount, nonHistogramCount, labelsPerMetric, exemplarsPerSeries) + b.ResetTimer() + + for range b.N { + converter := NewPrometheusConverter() + annots, err := converter.FromMetrics(context.Background(), payload.Metrics(), Settings{}) + require.NoError(b, err.Err()) + require.Empty(b, annots) + require.NotNil(b, converter.TimeSeries()) + require.NotNil(b, converter.Metadata()) + } + }) + } + }) + } + }) + } + }) + } + }) + } +} + +func createExportRequest(resourceAttributeCount, histogramCount, nonHistogramCount, labelsPerMetric, exemplarsPerSeries int) pmetricotlp.ExportRequest { + request := pmetricotlp.NewExportRequest() + + rm := request.Metrics().ResourceMetrics().AppendEmpty() + generateAttributes(rm.Resource().Attributes(), "resource", resourceAttributeCount) + + // Fake some resource attributes. + for k, v := range map[string]string{ + "service.name": "test-service", + "service.namespace": "test-namespace", + "service.instance.id": "id1234", + } { + rm.Resource().Attributes().PutStr(k, v) + } + + metrics := rm.ScopeMetrics().AppendEmpty().Metrics() + ts := pcommon.NewTimestampFromTime(time.Now()) + + for i := 1; i <= histogramCount; i++ { + m := metrics.AppendEmpty() + m.SetEmptyHistogram() + m.SetName(fmt.Sprintf("histogram-%v", i)) + m.SetDescription("histogram") + m.SetUnit("unit") + m.Histogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + h := m.Histogram().DataPoints().AppendEmpty() + h.SetTimestamp(ts) + + // Set 50 samples, 10 each with values 0.5, 1, 2, 4, and 8 + h.SetCount(50) + h.SetSum(155) + h.BucketCounts().FromRaw([]uint64{10, 10, 10, 10, 10, 0}) + h.ExplicitBounds().FromRaw([]float64{.5, 1, 2, 4, 8, 16}) // Bucket boundaries include the upper limit (ie. each sample is on the upper limit of its bucket) + + generateAttributes(h.Attributes(), "series", labelsPerMetric) + generateExemplars(h.Exemplars(), exemplarsPerSeries, ts) + } + + for i := 1; i <= nonHistogramCount; i++ { + m := metrics.AppendEmpty() + m.SetEmptySum() + m.SetName(fmt.Sprintf("sum-%v", i)) + m.SetDescription("sum") + m.SetUnit("unit") + m.Sum().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + point := m.Sum().DataPoints().AppendEmpty() + point.SetTimestamp(ts) + point.SetDoubleValue(1.23) + generateAttributes(point.Attributes(), "series", labelsPerMetric) + generateExemplars(point.Exemplars(), exemplarsPerSeries, ts) + } + + for i := 1; i <= nonHistogramCount; i++ { + m := metrics.AppendEmpty() + m.SetEmptyGauge() + m.SetName(fmt.Sprintf("gauge-%v", i)) + m.SetDescription("gauge") + m.SetUnit("unit") + point := m.Gauge().DataPoints().AppendEmpty() + point.SetTimestamp(ts) + point.SetDoubleValue(1.23) + generateAttributes(point.Attributes(), "series", labelsPerMetric) + generateExemplars(point.Exemplars(), exemplarsPerSeries, ts) + } + + return request +} + +func generateAttributes(m pcommon.Map, prefix string, count int) { + for i := 1; i <= count; i++ { + m.PutStr(fmt.Sprintf("%v-name-%v", prefix, i), fmt.Sprintf("value-%v", i)) + } +} + +func generateExemplars(exemplars pmetric.ExemplarSlice, count int, ts pcommon.Timestamp) { + for i := 1; i <= count; i++ { + e := exemplars.AppendEmpty() + e.SetTimestamp(ts) + e.SetDoubleValue(2.22) + e.SetSpanID(pcommon.SpanID{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}) + e.SetTraceID(pcommon.TraceID{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f}) + } +} diff --git a/pkg/receive/otlptranslator/normalize_name.go b/pkg/receive/otlptranslator/normalize_name.go new file mode 100644 index 0000000000..4e45791295 --- /dev/null +++ b/pkg/receive/otlptranslator/normalize_name.go @@ -0,0 +1,315 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheus/normalize_name.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. + +package otlptranslator + +import ( + "regexp" + "slices" + "strings" + "unicode" + + "github.com/prometheus/prometheus/util/strutil" + "go.opentelemetry.io/collector/pdata/pmetric" +) + +// The map to translate OTLP units to Prometheus units +// OTLP metrics use the c/s notation as specified at https://ucum.org/ucum.html +// (See also https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/semantic_conventions/README.md#instrument-units) +// Prometheus best practices for units: https://prometheus.io/docs/practices/naming/#base-units +// OpenMetrics specification for units: https://github.com/prometheus/OpenMetrics/blob/v1.0.0/specification/OpenMetrics.md#units-and-base-units +var unitMap = map[string]string{ + // Time + "d": "days", + "h": "hours", + "min": "minutes", + "s": "seconds", + "ms": "milliseconds", + "us": "microseconds", + "ns": "nanoseconds", + + // Bytes + "By": "bytes", + "KiBy": "kibibytes", + "MiBy": "mebibytes", + "GiBy": "gibibytes", + "TiBy": "tibibytes", + "KBy": "kilobytes", + "MBy": "megabytes", + "GBy": "gigabytes", + "TBy": "terabytes", + + // SI + "m": "meters", + "V": "volts", + "A": "amperes", + "J": "joules", + "W": "watts", + "g": "grams", + + // Misc + "Cel": "celsius", + "Hz": "hertz", + "1": "", + "%": "percent", +} + +// The map that translates the "per" unit +// Example: s => per second (singular). +var perUnitMap = map[string]string{ + "s": "second", + "m": "minute", + "h": "hour", + "d": "day", + "w": "week", + "mo": "month", + "y": "year", +} + +// BuildCompliantName builds a Prometheus-compliant metric name for the specified metric. +// +// Metric name is prefixed with specified namespace and underscore (if any). +// Namespace is not cleaned up. Make sure specified namespace follows Prometheus +// naming convention. +// +// See rules at https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels, +// https://prometheus.io/docs/practices/naming/#metric-and-label-naming +// and https://github.com/open-telemetry/opentelemetry-specification/blob/v1.38.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus. +func BuildCompliantName(metric pmetric.Metric, namespace string, addMetricSuffixes, allowUTF8 bool) string { + // Full normalization following standard Prometheus naming conventions + if addMetricSuffixes { + return normalizeName(metric, namespace, allowUTF8) + } + + var metricName string + if !allowUTF8 { + // Regexp for metric name characters that should be replaced with _. + invalidMetricCharRE := regexp.MustCompile(`[^a-zA-Z0-9:_]`) + + // Simple case (no full normalization, no units, etc.). + metricName = strings.Join(strings.FieldsFunc(metric.Name(), func(r rune) bool { + return invalidMetricCharRE.MatchString(string(r)) + }), "_") + } else { + metricName = metric.Name() + } + + // Namespace? + if namespace != "" { + return namespace + "_" + metricName + } + + // Metric name starts with a digit and utf8 not allowed? Prefix it with an underscore. + if metricName != "" && unicode.IsDigit(rune(metricName[0])) && !allowUTF8 { + metricName = "_" + metricName + } + + return metricName +} + +// Build a normalized name for the specified metric. +func normalizeName(metric pmetric.Metric, namespace string, allowUTF8 bool) string { + var nameTokens []string + var separators []string + if !allowUTF8 { + nonTokenMetricCharRE := regexp.MustCompile(`[^a-zA-Z0-9:]`) + // Split metric name into "tokens" (of supported metric name runes). + // Note that this has the side effect of replacing multiple consecutive underscores with a single underscore. + // This is part of the OTel to Prometheus specification: https://github.com/open-telemetry/opentelemetry-specification/blob/v1.38.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus. + nameTokens = strings.FieldsFunc( + metric.Name(), + func(r rune) bool { return nonTokenMetricCharRE.MatchString(string(r)) }, + ) + } else { + translationFunc := func(r rune) bool { return !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != ':' } + // Split metric name into "tokens" (of supported metric name runes). + nameTokens, separators = fieldsFunc(metric.Name(), translationFunc) + } + + // Split unit at the '/' if any + unitTokens := strings.SplitN(metric.Unit(), "/", 2) + + // Main unit + // Append if not blank, doesn't contain '{}', and is not present in metric name already + if len(unitTokens) > 0 { + var mainUnitProm, perUnitProm string + mainUnitOTel := strings.TrimSpace(unitTokens[0]) + if mainUnitOTel != "" && !strings.ContainsAny(mainUnitOTel, "{}") { + mainUnitProm = unitMapGetOrDefault(mainUnitOTel) + if !allowUTF8 { + mainUnitProm = cleanUpUnit(mainUnitProm) + } + if slices.Contains(nameTokens, mainUnitProm) { + mainUnitProm = "" + } + } + + // Per unit + // Append if not blank, doesn't contain '{}', and is not present in metric name already + if len(unitTokens) > 1 && unitTokens[1] != "" { + perUnitOTel := strings.TrimSpace(unitTokens[1]) + if perUnitOTel != "" && !strings.ContainsAny(perUnitOTel, "{}") { + perUnitProm = perUnitMapGetOrDefault(perUnitOTel) + if !allowUTF8 { + perUnitProm = cleanUpUnit(perUnitProm) + } + } + if perUnitProm != "" { + perUnitProm = "per_" + perUnitProm + if slices.Contains(nameTokens, perUnitProm) { + perUnitProm = "" + } + } + } + + if perUnitProm != "" { + mainUnitProm = strings.TrimSuffix(mainUnitProm, "_") + } + + if mainUnitProm != "" { + nameTokens = append(nameTokens, mainUnitProm) + } + if perUnitProm != "" { + nameTokens = append(nameTokens, perUnitProm) + } + } + + // Append _total for Counters + if metric.Type() == pmetric.MetricTypeSum && metric.Sum().IsMonotonic() { + nameTokens = append(removeItem(nameTokens, "total"), "total") + } + + // Append _ratio for metrics with unit "1" + // Some OTel receivers improperly use unit "1" for counters of objects + // See https://github.com/open-telemetry/opentelemetry-collector-contrib/issues?q=is%3Aissue+some+metric+units+don%27t+follow+otel+semantic+conventions + // Until these issues have been fixed, we're appending `_ratio` for gauges ONLY + // Theoretically, counters could be ratios as well, but it's absurd (for mathematical reasons) + if metric.Unit() == "1" && metric.Type() == pmetric.MetricTypeGauge { + nameTokens = append(removeItem(nameTokens, "ratio"), "ratio") + } + + // Namespace? + if namespace != "" { + nameTokens = append([]string{namespace}, nameTokens...) + } + + var normalizedName string + if !allowUTF8 { + // Build the string from the tokens, separated with underscores + normalizedName = strings.Join(nameTokens, "_") + } else { + // Build the string from the tokens + separators. + normalizedName = join(nameTokens, separators, "_") + } + + // Metric name cannot start with a digit, so prefix it with "_" in this case + if normalizedName != "" && unicode.IsDigit(rune(normalizedName[0])) { + normalizedName = "_" + normalizedName + } + + return normalizedName +} + +// TrimPromSuffixes trims type and unit prometheus suffixes from a metric name. +// Following the [OpenTelemetry specs] for converting Prometheus Metric points to OTLP. +// +// [OpenTelemetry specs]: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/data-model.md#metric-metadata +func TrimPromSuffixes(promName string, metricType pmetric.MetricType, unit string) string { + nameTokens := strings.Split(promName, "_") + if len(nameTokens) == 1 { + return promName + } + + nameTokens = removeTypeSuffixes(nameTokens, metricType) + nameTokens = removeUnitSuffixes(nameTokens, unit) + + return strings.Join(nameTokens, "_") +} + +func removeTypeSuffixes(tokens []string, metricType pmetric.MetricType) []string { + switch metricType { + case pmetric.MetricTypeSum: + // Only counters are expected to have a type suffix at this point. + // for other types, suffixes are removed during scrape. + return removeSuffix(tokens, "total") + default: + return tokens + } +} + +func removeUnitSuffixes(nameTokens []string, unit string) []string { + l := len(nameTokens) + unitTokens := strings.Split(unit, "_") + lu := len(unitTokens) + + if lu == 0 || l <= lu { + return nameTokens + } + + suffixed := true + for i := range unitTokens { + if nameTokens[l-i-1] != unitTokens[lu-i-1] { + suffixed = false + break + } + } + + if suffixed { + return nameTokens[:l-lu] + } + + return nameTokens +} + +func removeSuffix(tokens []string, suffix string) []string { + l := len(tokens) + if tokens[l-1] == suffix { + return tokens[:l-1] + } + + return tokens +} + +// cleanUpUnit cleans up unit so it matches model.LabelNameRE. +func cleanUpUnit(unit string) string { + // Multiple consecutive underscores are replaced with a single underscore. + // This is part of the OTel to Prometheus specification: https://github.com/open-telemetry/opentelemetry-specification/blob/v1.38.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus. + multipleUnderscoresRE := regexp.MustCompile(`__+`) + return strings.TrimPrefix(multipleUnderscoresRE.ReplaceAllString( + strutil.SanitizeLabelName(unit), + "_", + ), "_") +} + +// Retrieve the Prometheus "basic" unit corresponding to the specified "basic" unit +// Returns the specified unit if not found in unitMap. +func unitMapGetOrDefault(unit string) string { + if promUnit, ok := unitMap[unit]; ok { + return promUnit + } + return unit +} + +// Retrieve the Prometheus "per" unit corresponding to the specified "per" unit +// Returns the specified unit if not found in perUnitMap. +func perUnitMapGetOrDefault(perUnit string) string { + if promPerUnit, ok := perUnitMap[perUnit]; ok { + return promPerUnit + } + return perUnit +} + +// Remove the specified value from the slice. +func removeItem(slice []string, value string) []string { + newSlice := make([]string, 0, len(slice)) + for _, sliceEntry := range slice { + if sliceEntry != value { + newSlice = append(newSlice, sliceEntry) + } + } + return newSlice +} diff --git a/pkg/receive/otlptranslator/number_data_points.go b/pkg/receive/otlptranslator/number_data_points.go new file mode 100644 index 0000000000..23366fda67 --- /dev/null +++ b/pkg/receive/otlptranslator/number_data_points.go @@ -0,0 +1,118 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/number_data_points.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. + +package otlptranslator + +import ( + "context" + "math" + + "github.com/prometheus/common/model" + "github.com/thanos-io/thanos/pkg/store/labelpb" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" + + "github.com/prometheus/prometheus/model/value" + "github.com/thanos-io/thanos/pkg/store/storepb/prompb" +) + +func (c *PrometheusConverter) addGaugeNumberDataPoints(ctx context.Context, dataPoints pmetric.NumberDataPointSlice, + resource pcommon.Resource, settings Settings, name string) error { + for x := 0; x < dataPoints.Len(); x++ { + if err := c.everyN.checkContext(ctx); err != nil { + return err + } + + pt := dataPoints.At(x) + labels := createAttributes( + resource, + pt.Attributes(), + settings, + nil, + true, + model.MetricNameLabel, + name, + ) + sample := &prompb.Sample{ + // convert ns to ms + Timestamp: convertTimeStamp(pt.Timestamp()), + } + switch pt.ValueType() { + case pmetric.NumberDataPointValueTypeInt: + sample.Value = float64(pt.IntValue()) + case pmetric.NumberDataPointValueTypeDouble: + sample.Value = pt.DoubleValue() + } + if pt.Flags().NoRecordedValue() { + sample.Value = math.Float64frombits(value.StaleNaN) + } + c.addSample(sample, labels) + } + + return nil +} + +func (c *PrometheusConverter) addSumNumberDataPoints(ctx context.Context, dataPoints pmetric.NumberDataPointSlice, + resource pcommon.Resource, metric pmetric.Metric, settings Settings, name string) error { + for x := 0; x < dataPoints.Len(); x++ { + if err := c.everyN.checkContext(ctx); err != nil { + return err + } + + pt := dataPoints.At(x) + lbls := createAttributes( + resource, + pt.Attributes(), + settings, + nil, + true, + model.MetricNameLabel, + name, + ) + sample := &prompb.Sample{ + // convert ns to ms + Timestamp: convertTimeStamp(pt.Timestamp()), + } + switch pt.ValueType() { + case pmetric.NumberDataPointValueTypeInt: + sample.Value = float64(pt.IntValue()) + case pmetric.NumberDataPointValueTypeDouble: + sample.Value = pt.DoubleValue() + } + if pt.Flags().NoRecordedValue() { + sample.Value = math.Float64frombits(value.StaleNaN) + } + ts := c.addSample(sample, lbls) + if ts != nil { + exemplars, err := getPromExemplars[pmetric.NumberDataPoint](ctx, &c.everyN, pt) + if err != nil { + return err + } + ts.Exemplars = append(ts.Exemplars, exemplars...) + } + + // add created time series if needed + if settings.ExportCreatedMetric && metric.Sum().IsMonotonic() { + startTimestamp := pt.StartTimestamp() + if startTimestamp == 0 { + return nil + } + + createdLabels := make([]labelpb.ZLabel, len(lbls)) + copy(createdLabels, lbls) + for i, l := range createdLabels { + if l.Name == model.MetricNameLabel { + createdLabels[i].Value = name + createdSuffix + break + } + } + c.addTimeSeriesIfNeeded(createdLabels, startTimestamp, pt.Timestamp()) + } + } + + return nil +} diff --git a/pkg/receive/otlptranslator/number_data_points_test.go b/pkg/receive/otlptranslator/number_data_points_test.go new file mode 100644 index 0000000000..aad0af1839 --- /dev/null +++ b/pkg/receive/otlptranslator/number_data_points_test.go @@ -0,0 +1,254 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/247a9f996e09a83cdc25addf70c05e42b8b30186/pkg/translator/prometheusremotewrite/number_data_points_test.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. + +package otlptranslator + +import ( + "context" + "testing" + "time" + + "github.com/prometheus/common/model" + "github.com/stretchr/testify/assert" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" + + "github.com/thanos-io/thanos/pkg/store/labelpb" + "github.com/thanos-io/thanos/pkg/store/storepb/prompb" +) + +func TestPrometheusConverter_addGaugeNumberDataPoints(t *testing.T) { + ts := uint64(time.Now().UnixNano()) + tests := []struct { + name string + metric func() pmetric.Metric + want func() map[uint64]*prompb.TimeSeries + }{ + { + name: "gauge", + metric: func() pmetric.Metric { + return getIntGaugeMetric( + "test", + pcommon.NewMap(), + 1, ts, + ) + }, + want: func() map[uint64]*prompb.TimeSeries { + labels := []labelpb.ZLabel{ + {Name: model.MetricNameLabel, Value: "test"}, + } + return map[uint64]*prompb.TimeSeries{ + timeSeriesSignature(labels): { + Labels: labels, + Samples: []prompb.Sample{ + { + Value: 1, + Timestamp: convertTimeStamp(pcommon.Timestamp(ts)), + }}, + }, + } + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + metric := tt.metric() + converter := NewPrometheusConverter() + + _ = converter.addGaugeNumberDataPoints( + context.Background(), + metric.Gauge().DataPoints(), + pcommon.NewResource(), + Settings{ + ExportCreatedMetric: true, + }, + metric.Name(), + ) + + assert.Equal(t, tt.want(), converter.unique) + assert.Empty(t, converter.conflicts) + }) + } +} + +func TestPrometheusConverter_addSumNumberDataPoints(t *testing.T) { + ts := pcommon.Timestamp(time.Now().UnixNano()) + tests := []struct { + name string + metric func() pmetric.Metric + want func() map[uint64]*prompb.TimeSeries + }{ + { + name: "sum", + metric: func() pmetric.Metric { + return getIntSumMetric( + "test", + pcommon.NewMap(), + 1, + uint64(ts.AsTime().UnixNano()), + ) + }, + want: func() map[uint64]*prompb.TimeSeries { + labels := []labelpb.ZLabel{ + {Name: model.MetricNameLabel, Value: "test"}, + } + return map[uint64]*prompb.TimeSeries{ + timeSeriesSignature(labels): { + Labels: labels, + Samples: []prompb.Sample{ + { + Value: 1, + Timestamp: convertTimeStamp(ts), + }}, + }, + } + }, + }, + { + name: "sum with exemplars", + metric: func() pmetric.Metric { + m := getIntSumMetric( + "test", + pcommon.NewMap(), + 1, + uint64(ts.AsTime().UnixNano()), + ) + m.Sum().DataPoints().At(0).Exemplars().AppendEmpty().SetDoubleValue(2) + return m + }, + want: func() map[uint64]*prompb.TimeSeries { + labels := []labelpb.ZLabel{ + {Name: model.MetricNameLabel, Value: "test"}, + } + return map[uint64]*prompb.TimeSeries{ + timeSeriesSignature(labels): { + Labels: labels, + Samples: []prompb.Sample{{ + Value: 1, + Timestamp: convertTimeStamp(ts), + }}, + Exemplars: []prompb.Exemplar{ + {Value: 2}, + }, + }, + } + }, + }, + { + name: "monotonic cumulative sum with start timestamp", + metric: func() pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName("test_sum") + metric.SetEmptySum().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + metric.SetEmptySum().SetIsMonotonic(true) + + dp := metric.Sum().DataPoints().AppendEmpty() + dp.SetDoubleValue(1) + dp.SetTimestamp(ts) + dp.SetStartTimestamp(ts) + + return metric + }, + want: func() map[uint64]*prompb.TimeSeries { + labels := []labelpb.ZLabel{ + {Name: model.MetricNameLabel, Value: "test_sum"}, + } + createdLabels := []labelpb.ZLabel{ + {Name: model.MetricNameLabel, Value: "test_sum" + createdSuffix}, + } + return map[uint64]*prompb.TimeSeries{ + timeSeriesSignature(labels): { + Labels: labels, + Samples: []prompb.Sample{ + {Value: 1, Timestamp: convertTimeStamp(ts)}, + }, + }, + timeSeriesSignature(createdLabels): { + Labels: createdLabels, + Samples: []prompb.Sample{ + {Value: float64(convertTimeStamp(ts)), Timestamp: convertTimeStamp(ts)}, + }, + }, + } + }, + }, + { + name: "monotonic cumulative sum with no start time", + metric: func() pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName("test_sum") + metric.SetEmptySum().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + metric.SetEmptySum().SetIsMonotonic(true) + + dp := metric.Sum().DataPoints().AppendEmpty() + dp.SetTimestamp(ts) + + return metric + }, + want: func() map[uint64]*prompb.TimeSeries { + labels := []labelpb.ZLabel{ + {Name: model.MetricNameLabel, Value: "test_sum"}, + } + return map[uint64]*prompb.TimeSeries{ + timeSeriesSignature(labels): { + Labels: labels, + Samples: []prompb.Sample{ + {Value: 0, Timestamp: convertTimeStamp(ts)}, + }, + }, + } + }, + }, + { + name: "non-monotonic cumulative sum with start time", + metric: func() pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName("test_sum") + metric.SetEmptySum().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + metric.SetEmptySum().SetIsMonotonic(false) + + dp := metric.Sum().DataPoints().AppendEmpty() + dp.SetTimestamp(ts) + + return metric + }, + want: func() map[uint64]*prompb.TimeSeries { + labels := []labelpb.ZLabel{ + {Name: model.MetricNameLabel, Value: "test_sum"}, + } + return map[uint64]*prompb.TimeSeries{ + timeSeriesSignature(labels): { + Labels: labels, + Samples: []prompb.Sample{ + {Value: 0, Timestamp: convertTimeStamp(ts)}, + }, + }, + } + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + metric := tt.metric() + converter := NewPrometheusConverter() + + _ = converter.addSumNumberDataPoints( + context.Background(), + metric.Sum().DataPoints(), + pcommon.NewResource(), + metric, + Settings{ + ExportCreatedMetric: true, + }, + metric.Name(), + ) + + assert.Equal(t, tt.want(), converter.unique) + assert.Empty(t, converter.conflicts) + }) + } +} diff --git a/pkg/receive/otlptranslator/otlp_to_openmetrics_metadata.go b/pkg/receive/otlptranslator/otlp_to_openmetrics_metadata.go new file mode 100644 index 0000000000..eb20a16478 --- /dev/null +++ b/pkg/receive/otlptranslator/otlp_to_openmetrics_metadata.go @@ -0,0 +1,33 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/otlp_to_openmetrics_metadata.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. + +package otlptranslator + +import ( + "github.com/thanos-io/thanos/pkg/store/storepb/prompb" + "go.opentelemetry.io/collector/pdata/pmetric" +) + +func otelMetricTypeToPromMetricType(otelMetric pmetric.Metric) prompb.MetricMetadata_MetricType { + switch otelMetric.Type() { + case pmetric.MetricTypeGauge: + return prompb.MetricMetadata_GAUGE + case pmetric.MetricTypeSum: + metricType := prompb.MetricMetadata_GAUGE + if otelMetric.Sum().IsMonotonic() { + metricType = prompb.MetricMetadata_COUNTER + } + return metricType + case pmetric.MetricTypeHistogram: + return prompb.MetricMetadata_HISTOGRAM + case pmetric.MetricTypeSummary: + return prompb.MetricMetadata_SUMMARY + case pmetric.MetricTypeExponentialHistogram: + return prompb.MetricMetadata_HISTOGRAM + } + return prompb.MetricMetadata_UNKNOWN +} diff --git a/pkg/receive/otlptranslator/testutil_test.go b/pkg/receive/otlptranslator/testutil_test.go new file mode 100644 index 0000000000..c04542e37c --- /dev/null +++ b/pkg/receive/otlptranslator/testutil_test.go @@ -0,0 +1,46 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/247a9f996e09a83cdc25addf70c05e42b8b30186/pkg/translator/prometheusremotewrite/testutil_test.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. + +package otlptranslator + +import ( + "strings" + + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" +) + +func getIntGaugeMetric(name string, attributes pcommon.Map, value int64, ts uint64) pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName(name) + dp := metric.SetEmptyGauge().DataPoints().AppendEmpty() + if strings.HasPrefix(name, "staleNaN") { + dp.SetFlags(pmetric.DefaultDataPointFlags.WithNoRecordedValue(true)) + } + dp.SetIntValue(value) + attributes.CopyTo(dp.Attributes()) + + dp.SetStartTimestamp(pcommon.Timestamp(0)) + dp.SetTimestamp(pcommon.Timestamp(ts)) + return metric +} + +func getIntSumMetric(name string, attributes pcommon.Map, value int64, ts uint64) pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName(name) + metric.SetEmptySum().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + dp := metric.Sum().DataPoints().AppendEmpty() + if strings.HasPrefix(name, "staleNaN") { + dp.SetFlags(pmetric.DefaultDataPointFlags.WithNoRecordedValue(true)) + } + dp.SetIntValue(value) + attributes.CopyTo(dp.Attributes()) + + dp.SetStartTimestamp(pcommon.Timestamp(0)) + dp.SetTimestamp(pcommon.Timestamp(ts)) + return metric +} diff --git a/pkg/receive/otlptranslator/timeseries.go b/pkg/receive/otlptranslator/timeseries.go new file mode 100644 index 0000000000..2cc5394e45 --- /dev/null +++ b/pkg/receive/otlptranslator/timeseries.go @@ -0,0 +1,34 @@ +// Copyright (c) The Thanos Authors. +// Licensed under the Apache License 2.0. + +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/metrics_to_prw.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. + +package otlptranslator + +import "github.com/thanos-io/thanos/pkg/store/storepb/prompb" + +// TimeSeries returns a slice of the prompb.TimeSeries that were converted from OTel format. +func (c *PrometheusConverter) TimeSeries() []prompb.TimeSeries { + conflicts := 0 + for _, ts := range c.conflicts { + conflicts += len(ts) + } + allTS := make([]prompb.TimeSeries, 0, len(c.unique)+conflicts) + for _, ts := range c.unique { + allTS = append(allTS, *ts) + } + for _, cTS := range c.conflicts { + for _, ts := range cTS { + allTS = append(allTS, *ts) + } + } + + return allTS +} + +// Metadata returns a slice of the prompb.Metadata that were converted from OTel format. +func (c *PrometheusConverter) Metadata() []prompb.MetricMetadata { + return c.metadata +} diff --git a/test/e2e/e2ethanos/services.go b/test/e2e/e2ethanos/services.go index 5e0a005c48..70ef331260 100644 --- a/test/e2e/e2ethanos/services.go +++ b/test/e2e/e2ethanos/services.go @@ -67,6 +67,11 @@ func DefaultPrometheusImage() string { return "quay.io/prometheus/prometheus:v2.41.0" } +// DefaultOtelImage sets default Otel image used in e2e service. +func DefaultOtelImage() string { + return "otel/opentelemetry-collector-contrib:0.116.1" +} + // DefaultAlertmanagerImage sets default Alertmanager image used in e2e service. func DefaultAlertmanagerImage() string { return "quay.io/prometheus/alertmanager:v0.20.0" @@ -130,6 +135,30 @@ func NewPrometheus(e e2e.Environment, name, promConfig, webConfig, promImage str })), "http") } +func NewOtel(e e2e.Environment, name, otelConfig, otelImage string) *e2eobs.Observable { + f := e.Runnable(name).WithPorts(map[string]int{"http": 9090}).Future() + if err := os.MkdirAll(f.Dir(), 0750); err != nil { + return &e2eobs.Observable{Runnable: e2e.NewFailedRunnable(name, errors.Wrap(err, "create otel dir"))} + } + + if err := os.WriteFile(filepath.Join(f.Dir(), "otel.yaml"), []byte(otelConfig), 0600); err != nil { + return &e2eobs.Observable{Runnable: e2e.NewFailedRunnable(name, errors.Wrap(err, "creating otel config"))} + } + + //probe := e2e.NewHTTPReadinessProbe("http", "/-/ready", 200, 200) + args := e2e.BuildArgs(map[string]string{ + "--config": filepath.Join(f.InternalDir(), "otel.yaml"), + //"--log.level": infoLogLevel, + //"--web.listen-address": ":9090", + }) + + return e2eobs.AsObservable(f.Init(wrapWithDefaults(e2e.StartOptions{ + Image: otelImage, + Command: e2e.NewCommandWithoutEntrypoint("/otelcol-contrib", args...), + //Readiness: probe, + })), "http") +} + func NewPrometheusWithSidecar(e e2e.Environment, name, promConfig, webConfig, promImage, minTime string, enableFeatures ...string) (*e2eobs.Observable, *e2eobs.Observable) { return NewPrometheusWithSidecarCustomImage(e, name, promConfig, webConfig, promImage, minTime, DefaultImage(), enableFeatures...) } @@ -531,6 +560,8 @@ func (q *QuerierBuilder) collectArgs() ([]string, error) { return args, nil } +func OTLPEndpoint(addr string) string { return fmt.Sprintf("http://%s", addr) } + func RemoteWriteEndpoint(addr string) string { return fmt.Sprintf("http://%s/api/v1/receive", addr) } func RemoteWriteEndpoints(addrs ...string) string { @@ -1258,6 +1289,53 @@ rule_files: return config } +// DefaultOtelConfig returns Otel config that sets Otel to: +// * expose 2 external labels, source and replica. +// * optionally scrape self. This will produce up == 0 metric which we can assert on. +// * optionally remote write endpoint to write into. +func DefaultOtelConfig(remoteWriteEndpoint string) string { + config := fmt.Sprintf(` +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + prometheus: + config: + scrape_configs: + - job_name: otel-collector + scrape_interval: 5s + static_configs: + - targets: [localhost:8888] +exporters: + otlphttp/thanos: + metrics_endpoint: "%s/api/v1/otlp" + tls: + insecure: true + debug: + verbosity: detailed +extensions: + health_check: + pprof: +service: + telemetry: + logs: + level: "debug" + extensions: [pprof, health_check] + pipelines: + metrics: + receivers: + - prometheus + - otlp + exporters: + - otlphttp/thanos +`, remoteWriteEndpoint) + + return config +} + func NewRedis(e e2e.Environment, name string) e2e.Runnable { return e.Runnable(fmt.Sprintf("redis-%s", name)).WithPorts(map[string]int{"redis": 6379}).Init( e2e.StartOptions{ diff --git a/test/e2e/receive_test.go b/test/e2e/receive_test.go index 7d841e9a7e..3e099fcaff 100644 --- a/test/e2e/receive_test.go +++ b/test/e2e/receive_test.go @@ -109,6 +109,56 @@ func TestReceive(t *testing.T) { }) }) + t.Run("ingestor_otlp", func(t *testing.T) { + /* + The single_ingestor suite represents the simplest possible configuration of Thanos Receive. + ┌──────────┐ + │ Prom │ + └────┬─────┘ + │ + ┌────▼─────┐ + │ Ingestor │ + └────┬─────┘ + │ + ┌────▼─────┐ + │ Query │ + └──────────┘ + NB: Made with asciiflow.com - you can copy & paste the above there to modify. + */ + + t.Parallel() + e, err := e2e.NewDockerEnvironment("ingestor-otlp") + testutil.Ok(t, err) + t.Cleanup(e2ethanos.CleanScenario(t, e)) + + // Setup Router Ingestor. + i := e2ethanos.NewReceiveBuilder(e, "ingestor").WithIngestionEnabled().Init() + testutil.Ok(t, e2e.StartAndWaitReady(i)) + + // Setup Otel + otel := e2ethanos.NewOtel(e, "1", e2ethanos.DefaultOtelConfig(e2ethanos.OTLPEndpoint(i.InternalEndpoint("remote-write"))), e2ethanos.DefaultOtelImage()) + testutil.Ok(t, e2e.StartAndWaitReady(otel)) + + q := e2ethanos.NewQuerierBuilder(e, "1", i.InternalEndpoint("grpc")).Init() + testutil.Ok(t, e2e.StartAndWaitReady(q)) + + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) + t.Cleanup(cancel) + + testutil.Ok(t, q.WaitSumMetricsWithOptions(e2emon.Equals(1), []string{"thanos_store_nodes_grpc_connections"}, e2emon.WaitMissingMetrics())) + + // We expect the data from each Prometheus instance to be replicated twice across our ingesting instances + queryAndAssertSeries(t, ctx, q.Endpoint("http"), e2ethanos.QueryUpWithoutInstance, time.Now, promclient.QueryOptions{ + Deduplicate: false, + }, []model.Metric{ + { + "job": "otel-collector", + "receive": "receive-ingestor", + "tenant_id": "default-tenant", + }, + }) + }) + t.Run("ha_ingestor_with_ha_prom", func(t *testing.T) { /* The ha_ingestor_with_ha_prom suite represents a configuration of a