From 749e2d85c2ae7fd36bd313ac6ce340974cc8fcfa Mon Sep 17 00:00:00 2001 From: Pavel <177363085+pkcll@users.noreply.github.com> Date: Fri, 24 Jan 2025 10:00:56 -0500 Subject: [PATCH] Move promreceiver, scraper code to promotel --- go.mod | 61 +- go.sum | 18 +- pkg/promotel/cmd/example_test.go | 60 + pkg/promotel/config.go | 6 +- pkg/promotel/config_test.go | 10 +- pkg/promotel/prometheusreceiver/config.go | 158 + .../prometheusreceiver/config_test.go | 335 ++ pkg/promotel/prometheusreceiver/factory.go | 82 + .../prometheusreceiver/factory_test.go | 61 + .../prometheusreceiver/internal/appendable.go | 68 + .../prometheusreceiver/internal/logger.go | 139 + .../internal/logger_test.go | 287 ++ .../prometheusreceiver/internal/metadata.go | 65 + .../internal/metadata/generated_status.go | 16 + .../internal/metricfamily.go | 578 +++ .../internal/metricfamily_test.go | 904 ++++ .../internal/metrics_adjuster.go | 488 +++ .../internal/metrics_adjuster_test.go | 741 ++++ .../internal/metricsutil_test.go | 273 ++ .../internal/prom_to_otlp.go | 110 + .../internal/prom_to_otlp_test.go | 369 ++ .../internal/staleness_end_to_end_test.go | 233 + .../internal/starttimemetricadjuster.go | 128 + .../internal/starttimemetricadjuster_test.go | 154 + .../internal/transaction.go | 536 +++ .../internal/transaction_test.go | 2003 +++++++++ .../prometheusreceiver/internal/util.go | 128 + .../prometheusreceiver/internal/util_test.go | 182 + .../prometheusreceiver/metrics_receiver.go | 243 ++ .../metrics_receiver_test.go | 71 + .../scrape/clientprotobuf.go | 41 + .../prometheusreceiver/scrape/helpers_test.go | 223 + .../prometheusreceiver/scrape/manager.go | 314 ++ .../prometheusreceiver/scrape/manager_test.go | 888 ++++ .../prometheusreceiver/scrape/metrics.go | 331 ++ .../prometheusreceiver/scrape/promotel.go | 140 + .../scrape/promotel_test.go | 250 ++ .../prometheusreceiver/scrape/scrape.go | 2112 +++++++++ .../prometheusreceiver/scrape/scrape_test.go | 3764 +++++++++++++++++ .../prometheusreceiver/scrape/target.go | 585 +++ .../prometheusreceiver/scrape/target_test.go | 632 +++ .../scrape/testdata/bearertoken.txt | 1 + .../prometheusreceiver/scrape/testdata/ca.cer | 22 + .../prometheusreceiver/scrape/testdata/ca.key | 27 + .../scrape/testdata/client.cer | 28 + .../scrape/testdata/client.key | 51 + .../scrape/testdata/server.cer | 22 + .../scrape/testdata/server.key | 27 + .../scrape/testdata/servername.cer | 72 + .../scrape/testdata/servername.key | 27 + pkg/promotel/promotel_test.go | 249 ++ pkg/promotel/receiver.go | 4 +- pkg/promotel/receiver_test.go | 3 +- 53 files changed, 18292 insertions(+), 28 deletions(-) create mode 100644 pkg/promotel/cmd/example_test.go create mode 100644 pkg/promotel/prometheusreceiver/config.go create mode 100644 pkg/promotel/prometheusreceiver/config_test.go create mode 100644 pkg/promotel/prometheusreceiver/factory.go create mode 100644 pkg/promotel/prometheusreceiver/factory_test.go create mode 100644 pkg/promotel/prometheusreceiver/internal/appendable.go create mode 100644 pkg/promotel/prometheusreceiver/internal/logger.go create mode 100644 pkg/promotel/prometheusreceiver/internal/logger_test.go create mode 100644 pkg/promotel/prometheusreceiver/internal/metadata.go create mode 100644 pkg/promotel/prometheusreceiver/internal/metadata/generated_status.go create mode 100644 pkg/promotel/prometheusreceiver/internal/metricfamily.go create mode 100644 pkg/promotel/prometheusreceiver/internal/metricfamily_test.go create mode 100644 pkg/promotel/prometheusreceiver/internal/metrics_adjuster.go create mode 100644 pkg/promotel/prometheusreceiver/internal/metrics_adjuster_test.go create mode 100644 pkg/promotel/prometheusreceiver/internal/metricsutil_test.go create mode 100644 pkg/promotel/prometheusreceiver/internal/prom_to_otlp.go create mode 100644 pkg/promotel/prometheusreceiver/internal/prom_to_otlp_test.go create mode 100644 pkg/promotel/prometheusreceiver/internal/staleness_end_to_end_test.go create mode 100644 pkg/promotel/prometheusreceiver/internal/starttimemetricadjuster.go create mode 100644 pkg/promotel/prometheusreceiver/internal/starttimemetricadjuster_test.go create mode 100644 pkg/promotel/prometheusreceiver/internal/transaction.go create mode 100644 pkg/promotel/prometheusreceiver/internal/transaction_test.go create mode 100644 pkg/promotel/prometheusreceiver/internal/util.go create mode 100644 pkg/promotel/prometheusreceiver/internal/util_test.go create mode 100644 pkg/promotel/prometheusreceiver/metrics_receiver.go create mode 100644 pkg/promotel/prometheusreceiver/metrics_receiver_test.go create mode 100644 pkg/promotel/prometheusreceiver/scrape/clientprotobuf.go create mode 100644 pkg/promotel/prometheusreceiver/scrape/helpers_test.go create mode 100644 pkg/promotel/prometheusreceiver/scrape/manager.go create mode 100644 pkg/promotel/prometheusreceiver/scrape/manager_test.go create mode 100644 pkg/promotel/prometheusreceiver/scrape/metrics.go create mode 100644 pkg/promotel/prometheusreceiver/scrape/promotel.go create mode 100644 pkg/promotel/prometheusreceiver/scrape/promotel_test.go create mode 100644 pkg/promotel/prometheusreceiver/scrape/scrape.go create mode 100644 pkg/promotel/prometheusreceiver/scrape/scrape_test.go create mode 100644 pkg/promotel/prometheusreceiver/scrape/target.go create mode 100644 pkg/promotel/prometheusreceiver/scrape/target_test.go create mode 100644 pkg/promotel/prometheusreceiver/scrape/testdata/bearertoken.txt create mode 100644 pkg/promotel/prometheusreceiver/scrape/testdata/ca.cer create mode 100644 pkg/promotel/prometheusreceiver/scrape/testdata/ca.key create mode 100644 pkg/promotel/prometheusreceiver/scrape/testdata/client.cer create mode 100644 pkg/promotel/prometheusreceiver/scrape/testdata/client.key create mode 100644 pkg/promotel/prometheusreceiver/scrape/testdata/server.cer create mode 100644 pkg/promotel/prometheusreceiver/scrape/testdata/server.key create mode 100644 pkg/promotel/prometheusreceiver/scrape/testdata/servername.cer create mode 100644 pkg/promotel/prometheusreceiver/scrape/testdata/servername.key create mode 100644 pkg/promotel/promotel_test.go diff --git a/go.mod b/go.mod index 2b9df4226..40aa820ab 100644 --- a/go.mod +++ b/go.mod @@ -10,8 +10,11 @@ require ( github.com/dominikbraun/graph v0.23.0 github.com/fxamacker/cbor/v2 v2.5.0 github.com/go-json-experiment/json v0.0.0-20231102232822-2e55bd4e08b0 + github.com/go-kit/log v0.2.1 github.com/go-playground/validator/v10 v10.4.1 github.com/go-viper/mapstructure/v2 v2.2.1 + github.com/gogo/protobuf v1.3.2 + github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb github.com/google/go-cmp v0.6.0 github.com/google/uuid v1.6.0 github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1 @@ -24,9 +27,13 @@ require ( github.com/jmoiron/sqlx v1.4.0 github.com/jonboulle/clockwork v0.4.0 github.com/jpillora/backoff v1.0.0 + github.com/klauspost/compress v1.17.11 github.com/lib/pq v1.10.9 github.com/linkedin/goavro/v2 v2.12.0 github.com/marcboeker/go-duckdb v1.8.3 + github.com/open-telemetry/opentelemetry-collector-contrib/exporter/prometheusremotewriteexporter v0.115.0 + github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil v0.115.0 + github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/prometheus v0.115.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver v0.115.0 github.com/pelletier/go-toml/v2 v2.2.0 github.com/prometheus/client_golang v1.20.5 @@ -50,14 +57,20 @@ require ( go.opentelemetry.io/collector/config/configtelemetry v0.115.0 go.opentelemetry.io/collector/config/configtls v1.21.0 go.opentelemetry.io/collector/confmap v1.21.0 + go.opentelemetry.io/collector/confmap/provider/fileprovider v1.21.0 go.opentelemetry.io/collector/consumer v1.21.0 go.opentelemetry.io/collector/consumer/consumertest v0.115.0 go.opentelemetry.io/collector/exporter v0.115.0 go.opentelemetry.io/collector/exporter/exportertest v0.115.0 go.opentelemetry.io/collector/exporter/otlpexporter v0.115.0 + go.opentelemetry.io/collector/featuregate v1.21.0 + go.opentelemetry.io/collector/otelcol v0.115.0 go.opentelemetry.io/collector/pdata v1.21.0 + go.opentelemetry.io/collector/processor v0.115.0 + go.opentelemetry.io/collector/processor/batchprocessor v0.115.0 go.opentelemetry.io/collector/receiver v0.115.0 go.opentelemetry.io/collector/receiver/receivertest v0.115.0 + go.opentelemetry.io/collector/semconv v0.115.0 go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.56.0 go.opentelemetry.io/otel v1.32.0 go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.0.0-20240823153156-2a54df7bffb9 @@ -84,6 +97,7 @@ require ( gonum.org/v1/gonum v0.15.1 google.golang.org/grpc v1.67.1 google.golang.org/protobuf v1.35.2 + gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 sigs.k8s.io/yaml v1.4.0 ) @@ -105,6 +119,7 @@ require ( github.com/armon/go-metrics v0.4.1 // indirect github.com/aws/aws-sdk-go v1.54.19 // indirect github.com/bahlo/generic-list-go v0.2.0 // indirect + github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/buger/jsonparser v1.1.1 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect @@ -117,16 +132,17 @@ require ( github.com/docker/docker v27.3.1+incompatible // indirect github.com/docker/go-connections v0.4.0 // indirect github.com/docker/go-units v0.5.0 // indirect + github.com/ebitengine/purego v0.8.1 // indirect github.com/emicklei/go-restful/v3 v3.11.0 // indirect github.com/envoyproxy/go-control-plane v0.13.0 // indirect github.com/envoyproxy/protoc-gen-validate v1.1.0 // indirect github.com/fatih/color v1.17.0 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/fsnotify/fsnotify v1.8.0 // indirect - github.com/go-kit/log v0.2.1 // indirect github.com/go-logfmt/logfmt v0.6.0 // indirect github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect + github.com/go-ole/go-ole v1.2.6 // indirect github.com/go-openapi/jsonpointer v0.20.2 // indirect github.com/go-openapi/jsonreference v0.20.4 // indirect github.com/go-openapi/swag v0.22.9 // indirect @@ -136,11 +152,9 @@ require ( github.com/go-zookeeper/zk v1.0.3 // indirect github.com/goccy/go-json v0.10.3 // indirect github.com/goccy/go-yaml v1.12.0 // indirect - github.com/gogo/protobuf v1.3.2 // indirect github.com/golang-jwt/jwt/v5 v5.2.1 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.4 // indirect - github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb // indirect github.com/google/flatbuffers v24.3.25+incompatible // indirect github.com/google/gnostic-models v0.6.8 // indirect github.com/google/go-querystring v1.1.0 // indirect @@ -168,6 +182,7 @@ require ( github.com/hashicorp/yamux v0.1.1 // indirect github.com/hetznercloud/hcloud-go/v2 v2.10.2 // indirect github.com/imdario/mergo v0.3.16 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/ionos-cloud/sdk-go/v6 v6.1.11 // indirect github.com/jackc/chunkreader/v2 v2.0.1 // indirect github.com/jackc/pgconn v1.14.3 // indirect @@ -179,7 +194,6 @@ require ( github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect - github.com/klauspost/compress v1.17.11 // indirect github.com/klauspost/cpuid/v2 v2.2.8 // indirect github.com/knadh/koanf/maps v0.1.1 // indirect github.com/knadh/koanf/providers/confmap v0.1.0 // indirect @@ -188,6 +202,7 @@ require ( github.com/kylelemons/godebug v1.1.0 // indirect github.com/leodido/go-urn v1.2.0 // indirect github.com/linode/linodego v1.37.0 // indirect + github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect github.com/mailru/easyjson v0.7.7 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect @@ -209,8 +224,9 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect github.com/oklog/run v1.1.0 // indirect - github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil v0.115.0 // indirect - github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/prometheus v0.115.0 // indirect + github.com/oklog/ulid v1.3.1 // indirect + github.com/open-telemetry/opentelemetry-collector-contrib/pkg/resourcetotelemetry v0.115.0 // indirect + github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/prometheusremotewrite v0.115.0 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.0.3-0.20211202183452-c5a74bcca799 // indirect github.com/ovh/go-ovh v1.6.0 // indirect @@ -219,16 +235,27 @@ require ( github.com/pkg/errors v0.9.1 // indirect github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect github.com/prometheus/common/sigv4 v0.1.0 // indirect github.com/prometheus/procfs v0.15.1 // indirect github.com/rs/cors v1.11.1 // indirect github.com/sanity-io/litter v1.5.5 // indirect github.com/scaleway/scaleway-sdk-go v1.0.0-beta.29 // indirect + github.com/shirou/gopsutil/v4 v4.24.11 // indirect + github.com/spf13/cobra v1.8.1 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/stretchr/objx v0.5.2 // indirect + github.com/tidwall/gjson v1.14.2 // indirect + github.com/tidwall/match v1.1.1 // indirect + github.com/tidwall/pretty v1.2.0 // indirect + github.com/tidwall/tinylru v1.1.0 // indirect + github.com/tidwall/wal v1.1.8 // indirect + github.com/tklauser/go-sysconf v0.3.12 // indirect + github.com/tklauser/numcpus v0.6.1 // indirect github.com/vultr/govultr/v2 v2.17.2 // indirect github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect github.com/x448/float16 v0.8.4 // indirect + github.com/yusufpapurcu/wmi v1.2.4 // indirect github.com/zeebo/xxh3 v1.0.2 // indirect go.opencensus.io v0.24.0 // indirect go.opentelemetry.io/collector/client v1.21.0 // indirect @@ -236,6 +263,9 @@ require ( go.opentelemetry.io/collector/config/confighttp v0.115.0 // indirect go.opentelemetry.io/collector/config/confignet v1.21.0 // indirect go.opentelemetry.io/collector/config/internal v0.115.0 // indirect + go.opentelemetry.io/collector/connector v0.115.0 // indirect + go.opentelemetry.io/collector/connector/connectorprofiles v0.115.0 // indirect + go.opentelemetry.io/collector/connector/connectortest v0.115.0 // indirect go.opentelemetry.io/collector/consumer/consumererror v0.115.0 // indirect go.opentelemetry.io/collector/consumer/consumererror/consumererrorprofiles v0.115.0 // indirect go.opentelemetry.io/collector/consumer/consumerprofiles v0.115.0 // indirect @@ -244,14 +274,23 @@ require ( go.opentelemetry.io/collector/extension v0.115.0 // indirect go.opentelemetry.io/collector/extension/auth v0.115.0 // indirect go.opentelemetry.io/collector/extension/experimental/storage v0.115.0 // indirect - go.opentelemetry.io/collector/featuregate v1.21.0 // indirect + go.opentelemetry.io/collector/extension/extensioncapabilities v0.115.0 // indirect + go.opentelemetry.io/collector/extension/extensiontest v0.115.0 // indirect + go.opentelemetry.io/collector/internal/fanoutconsumer v0.115.0 // indirect go.opentelemetry.io/collector/pdata/pprofile v0.115.0 // indirect + go.opentelemetry.io/collector/pdata/testdata v0.115.0 // indirect go.opentelemetry.io/collector/pipeline v0.115.0 // indirect go.opentelemetry.io/collector/pipeline/pipelineprofiles v0.115.0 // indirect + go.opentelemetry.io/collector/processor/processorprofiles v0.115.0 // indirect + go.opentelemetry.io/collector/processor/processortest v0.115.0 // indirect go.opentelemetry.io/collector/receiver/receiverprofiles v0.115.0 // indirect - go.opentelemetry.io/collector/semconv v0.115.0 // indirect + go.opentelemetry.io/collector/service v0.115.0 // indirect + go.opentelemetry.io/contrib/bridges/otelzap v0.6.0 // indirect + go.opentelemetry.io/contrib/config v0.10.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 // indirect + go.opentelemetry.io/contrib/propagators/b3 v1.31.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.31.0 // indirect + go.opentelemetry.io/otel/exporters/prometheus v0.54.0 // indirect go.opentelemetry.io/proto/otlp v1.3.1 // indirect go.uber.org/atomic v1.11.0 // indirect golang.org/x/mod v0.21.0 // indirect @@ -268,7 +307,6 @@ require ( google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/ini.v1 v1.67.0 // indirect - gopkg.in/yaml.v2 v2.4.0 // indirect k8s.io/api v0.29.3 // indirect k8s.io/apimachinery v0.29.3 // indirect k8s.io/client-go v0.29.3 // indirect @@ -279,4 +317,7 @@ require ( sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect ) -replace github.com/prometheus/prometheus v0.54.1 => github.com/pkcll/prometheus v0.54.1-promotel +replace github.com/prometheus/prometheus v0.54.1 => github.com/pkcll/prometheus v0.54.1-promotel-protobufparse + +// replace github.com/prometheus/prometheus v0.54.1 => /Users/pk/repos/external/prometheus +// replace github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver v0.115.0 => /Users/pk/repos/opentelemetry-collector-contrib/receiver/prometheusreceiver diff --git a/go.sum b/go.sum index f14247909..1a92bc558 100644 --- a/go.sum +++ b/go.sum @@ -131,6 +131,7 @@ github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= github.com/davecgh/go-spew v0.0.0-20161028175848-04cdfd42973b/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -291,6 +292,7 @@ github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= @@ -616,6 +618,8 @@ github.com/open-telemetry/opentelemetry-collector-contrib/exporter/prometheusrem github.com/open-telemetry/opentelemetry-collector-contrib/exporter/prometheusremotewriteexporter v0.115.0/go.mod h1:r3iS2mDYu+cnGjgNc8TgvuUUAN6A6/1BvR1e1YJBrqM= github.com/open-telemetry/opentelemetry-collector-contrib/internal/common v0.115.0 h1:vRQQFD4YpasQFUAdF030UWtaflSYFXK542bfWMGhOK0= github.com/open-telemetry/opentelemetry-collector-contrib/internal/common v0.115.0/go.mod h1:BZ7DT+0VkKR7P3I9PGEDfVa0GdB0ty41eEcejIUXF9A= +github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal v0.115.0 h1:a36EJz/mb83f6ieX0v4fNDJ1jXqpeaM6DVQXeFDvdhw= +github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal v0.115.0/go.mod h1:r5/40YO1eSP5ZreOmRzVOUtDr7YG39ZIUcVjHd+9Izc= github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil v0.115.0 h1:WEqcnWSy9dNSlGb8pYRBX7zhaz2ReyaeImlenbzNTB4= github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil v0.115.0/go.mod h1:6Mk71CakHUA3I6oM9hARDiyQypYyOolvb+4PFYyVEFg= github.com/open-telemetry/opentelemetry-collector-contrib/pkg/resourcetotelemetry v0.115.0 h1:eoapW0JBablApkdv4C1RUuOKfz0U6SwuKMYYSAJH6fE= @@ -639,8 +643,8 @@ github.com/pelletier/go-toml/v2 v2.2.0 h1:QLgLl2yMN7N+ruc31VynXs1vhMZa7CeHHejIeB github.com/pelletier/go-toml/v2 v2.2.0/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs= github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= -github.com/pkcll/prometheus v0.54.1-promotel h1:f7BcAIHS0IolaC4c0ckq3FaPKvoNO4Df/ASie7GVUF0= -github.com/pkcll/prometheus v0.54.1-promotel/go.mod h1:xlLByHhk2g3ycakQGrMaU8K7OySZx98BzeCR99991NY= +github.com/pkcll/prometheus v0.54.1-promotel-protobufparse h1:F14CwqmK8I8EQfOv4Ybu0ifS/WD0cum172lb4dKyg2w= +github.com/pkcll/prometheus v0.54.1-promotel-protobufparse/go.mod h1:xlLByHhk2g3ycakQGrMaU8K7OySZx98BzeCR99991NY= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -696,6 +700,7 @@ github.com/rs/cors v1.11.1/go.mod h1:XyqrcTp5zjWr1wsJ8PIRZssZ8b/WMcMf71DJnit4EMU github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ= github.com/rs/zerolog v1.13.0/go.mod h1:YbFCdg8HfsridGWAh22vktObvhZbQsZXe4/zB0OKkWU= github.com/rs/zerolog v1.15.0/go.mod h1:xYTKnLHcpfU2225ny5qZjxnj9NvkumZYjJHlAThCjNc= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= github.com/sanity-io/litter v1.5.5 h1:iE+sBxPBzoK6uaEP5Lt3fHNgpKcHXc/A2HGETy0uJQo= github.com/sanity-io/litter v1.5.5/go.mod h1:9gzJgR2i4ZpjZHsKvUXIRQVk7P+yM3e+jAF7bU2UI5U= @@ -709,7 +714,6 @@ github.com/scylladb/go-reflectx v1.0.1 h1:b917wZM7189pZdlND9PbIJ6NQxfDPfBvUaQ7cj github.com/scylladb/go-reflectx v1.0.1/go.mod h1:rWnOfDIRWBGN0miMLIcoPt/Dhi2doCMZqwMCJ3KupFc= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= -github.com/shirou/gopsutil v3.21.4-0.20210419000835-c7a38de76ee5+incompatible h1:Bn1aCHHRnjv4Bl16T8rcaFjYSrGrIZvpiGO6P3Q4GpU= github.com/shirou/gopsutil/v4 v4.24.11 h1:WaU9xqGFKvFfsUv94SXcUPD7rCkU0vr/asVdQOBZNj8= github.com/shirou/gopsutil/v4 v4.24.11/go.mod h1:s4D/wg+ag4rG0WO7AiTj2BeYCRhym0vM7DHbZRxnIT8= github.com/shoenig/test v1.7.1 h1:UJcjSAI3aUKx52kfcfhblgyhZceouhvvs3OYdWgn+PY= @@ -756,6 +760,7 @@ github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOf github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/testcontainers/testcontainers-go v0.14.0 h1:h0D5GaYG9mhOWr2qHdEKDXpkce/VlvaYOCzTRi6UBi8= github.com/testcontainers/testcontainers-go v0.14.0/go.mod h1:hSRGJ1G8Q5Bw2gXgPulJOLlEBaYJHeBSOkQM5JLG+JQ= +github.com/tidwall/gjson v1.10.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= github.com/tidwall/gjson v1.14.2 h1:6BBkirS0rAHjumnjHF6qgy5d2YAJ1TLIaFE2lzfOLqo= github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= @@ -870,6 +875,8 @@ go.opentelemetry.io/collector/extension/extensioncapabilities v0.115.0 h1:/g25Hp go.opentelemetry.io/collector/extension/extensioncapabilities v0.115.0/go.mod h1:EQx7ETiy330O6q05S2KRZsRNDg0aQEeJmVl7Ipx+Fcw= go.opentelemetry.io/collector/extension/extensiontest v0.115.0 h1:GBVFxFEskR8jSdu9uaQh2qpXnN5VNXhXjpJ2UjxtE8I= go.opentelemetry.io/collector/extension/extensiontest v0.115.0/go.mod h1:eu1ecbz5mT+cHoH2H3GmD/rOO0WsicSJD2RLrYuOmRA= +go.opentelemetry.io/collector/extension/zpagesextension v0.115.0 h1:zYrZZocc7n0ZuDyXNkIaX0P0qk2fjMQj7NegwBJZA4k= +go.opentelemetry.io/collector/extension/zpagesextension v0.115.0/go.mod h1:OaXwNHF3MAcInBzCXrhXbTNHfIi9b7YGhXjtCFZqxNY= go.opentelemetry.io/collector/featuregate v1.21.0 h1:+EULHPJDLMipcwAGZVp9Nm8NriRvoBBMxp7MSiIZVMI= go.opentelemetry.io/collector/featuregate v1.21.0/go.mod h1:3GaXqflNDVwWndNGBJ1+XJFy3Fv/XrFgjMN60N3z7yg= go.opentelemetry.io/collector/internal/fanoutconsumer v0.115.0 h1:6DRiSECeApFq6Jj5ug77rG53R6FzJEZBfygkyMEXdpg= @@ -914,6 +921,8 @@ go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 h1:UP6IpuH go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0/go.mod h1:qxuZLtbq5QDtdeSHsS7bcf6EH6uO6jUAgk764zd3rhM= go.opentelemetry.io/contrib/propagators/b3 v1.31.0 h1:PQPXYscmwbCp76QDvO4hMngF2j8Bx/OTV86laEl8uqo= go.opentelemetry.io/contrib/propagators/b3 v1.31.0/go.mod h1:jbqfV8wDdqSDrAYxVpXQnpM0XFMq2FtDesblJ7blOwQ= +go.opentelemetry.io/contrib/zpages v0.56.0 h1:W7vP6s3juzL5KiHpr41zLNmsJ0QAZudYu8ay0zGAoko= +go.opentelemetry.io/contrib/zpages v0.56.0/go.mod h1:IxPRP4TYHw9jLeaEOSDIiA9zmyJNZNO6sbW55iMvSXs= go.opentelemetry.io/otel v1.32.0 h1:WnBN+Xjcteh0zdk01SVqV55d/m62NJLJdIyb4y/WO5U= go.opentelemetry.io/otel v1.32.0/go.mod h1:00DCVSB0RQcnzlwyTfqtxSm+DRr9hpYrHjNGiBHVQIg= go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.0.0-20240823153156-2a54df7bffb9 h1:UiRNKd1OgqsLbFwE+wkAWTdiAxXtCBqKIHeBIse4FUA= @@ -1110,6 +1119,7 @@ golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190924154521-2837fb4f24fe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -1135,6 +1145,7 @@ golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210303074136-134d130e1a04/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -1156,6 +1167,7 @@ golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= diff --git a/pkg/promotel/cmd/example_test.go b/pkg/promotel/cmd/example_test.go new file mode 100644 index 000000000..12ed43223 --- /dev/null +++ b/pkg/promotel/cmd/example_test.go @@ -0,0 +1,60 @@ +package main + +import ( + "context" + "testing" + "time" + + "go.opentelemetry.io/collector/pdata/pmetric" + "go.uber.org/zap" + + "github.com/prometheus/client_golang/prometheus" +) + +func TestExample(t *testing.T) { + logger, _ := zap.NewDevelopment() + + go reportMetrics(prometheus.DefaultRegisterer, logger) + + // Fetches metrics from in memory prometheus.Gatherer and converts to OTel format + foundCh := make(chan struct{}) + receiver := startMetricReceiver(prometheus.DefaultGatherer, logger, func(ctx context.Context, md pmetric.Metrics) error { + // Logs the converted OTel metric + rms := md.ResourceMetrics() + for i := 0; i < rms.Len(); i++ { + rm := rms.At(i) + ilms := rm.ScopeMetrics() + for j := 0; j < ilms.Len(); j++ { + ilm := ilms.At(j) + metrics := ilm.Metrics() + for k := 0; k < metrics.Len(); k++ { + metric := metrics.At(k) + if metric.Name() == testCounterMetricName { + v := metric.Sum().DataPoints().At(0).DoubleValue() + logger.Info("Exporting OTel metric ", zap.Any("name", metric.Name()), zap.Any("value", v)) + if v > 0 { + foundCh <- struct{}{} + return nil + } + } + } + } + } + return nil + }) + defer receiver.Close() + + timeout := 10 * time.Second + if deadline, ok := t.Deadline(); !ok { + timeout = time.Until(deadline) + } + timer := time.NewTimer(timeout) + defer timer.Stop() + + select { + case <-timer.C: + t.Fatal("Timed out waiting for metric") + case <-foundCh: + t.Log("Found metric") + } +} diff --git a/pkg/promotel/config.go b/pkg/promotel/config.go index 10268f506..afe5d70ea 100644 --- a/pkg/promotel/config.go +++ b/pkg/promotel/config.go @@ -5,13 +5,13 @@ import ( "os" "path/filepath" - "gopkg.in/yaml.v3" - - "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver" "github.com/prometheus/prometheus/discovery" "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/confmap" "go.opentelemetry.io/collector/exporter/otlpexporter" + "gopkg.in/yaml.v3" + + "github.com/smartcontractkit/chainlink-common/pkg/promotel/prometheusreceiver" ) type ReceiverConfig = component.Config diff --git a/pkg/promotel/config_test.go b/pkg/promotel/config_test.go index 9c8c7ddaa..b73e1750b 100644 --- a/pkg/promotel/config_test.go +++ b/pkg/promotel/config_test.go @@ -5,7 +5,6 @@ import ( "testing" "time" - "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver" promModel "github.com/prometheus/common/model" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -22,6 +21,7 @@ import ( "go.opentelemetry.io/collector/exporter/otlpexporter" "github.com/smartcontractkit/chainlink-common/pkg/promotel" + "github.com/smartcontractkit/chainlink-common/pkg/promotel/prometheusreceiver" ) func TestConfig(t *testing.T) { @@ -31,11 +31,6 @@ func TestConfig(t *testing.T) { c0 := cfg.(*prometheusreceiver.Config) assert.NotNil(t, c0.PrometheusConfig) - assert.Equal(t, "http://localhost:8080", c0.TargetAllocator.Endpoint) - assert.Equal(t, 5*time.Second, c0.TargetAllocator.Timeout) - assert.Equal(t, "client.crt", c0.TargetAllocator.TLSSetting.CertFile) - assert.Equal(t, 30*time.Second, c0.TargetAllocator.Interval) - assert.Equal(t, "collector-1", c0.TargetAllocator.CollectorID) assert.NotNil(t, c0.PrometheusConfig) cfg, err = promotel.LoadTestConfig(configFileName, "withScrape") @@ -43,9 +38,6 @@ func TestConfig(t *testing.T) { c1 := cfg.(*prometheusreceiver.Config) assert.NotNil(t, c0.PrometheusConfig) - assert.Equal(t, "http://localhost:8080", c0.TargetAllocator.Endpoint) - assert.Equal(t, 30*time.Second, c0.TargetAllocator.Interval) - assert.Equal(t, "collector-1", c0.TargetAllocator.CollectorID) assert.Len(t, c1.PrometheusConfig.ScrapeConfigs, 1) assert.Equal(t, "demo", c1.PrometheusConfig.ScrapeConfigs[0].JobName) diff --git a/pkg/promotel/prometheusreceiver/config.go b/pkg/promotel/prometheusreceiver/config.go new file mode 100644 index 000000000..d0e0c1c21 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/config.go @@ -0,0 +1,158 @@ +package prometheusreceiver + +import ( + "fmt" + "os" + "sort" + "strings" + + "github.com/prometheus/client_golang/prometheus" + commonconfig "github.com/prometheus/common/config" + promconfig "github.com/prometheus/prometheus/config" + "github.com/prometheus/prometheus/discovery/kubernetes" + "go.opentelemetry.io/collector/confmap" + "gopkg.in/yaml.v2" +) + +// Config defines configuration for Prometheus receiver. +type Config struct { + PrometheusConfig *PromConfig `mapstructure:"config"` + TrimMetricSuffixes bool `mapstructure:"trim_metric_suffixes"` + // UseStartTimeMetric enables retrieving the start time of all counter metrics + // from the process_start_time_seconds metric. This is only correct if all counters on that endpoint + // started after the process start time, and the process is the only actor exporting the metric after + // the process started. It should not be used in "exporters" which export counters that may have + // started before the process itself. Use only if you know what you are doing, as this may result + // in incorrect rate calculations. + UseStartTimeMetric bool `mapstructure:"use_start_time_metric"` + StartTimeMetricRegex string `mapstructure:"start_time_metric_regex"` + + // ReportExtraScrapeMetrics - enables reporting of additional metrics for Prometheus client like scrape_body_size_bytes + ReportExtraScrapeMetrics bool `mapstructure:"report_extra_scrape_metrics"` + + Registry *prometheus.Registry `mapstructure:"-"` +} + +// Validate checks the receiver configuration is valid. +func (cfg *Config) Validate() error { + return nil +} + +func containsScrapeConfig(cfg *Config) bool { + if cfg.PrometheusConfig == nil { + return false + } + scrapeConfigs, err := (*promconfig.Config)(cfg.PrometheusConfig).GetScrapeConfigs() + if err != nil { + return false + } + + return len(scrapeConfigs) > 0 +} + +// PromConfig is a redeclaration of promconfig.Config because we need custom unmarshaling +// as prometheus "config" uses `yaml` tags. +type PromConfig promconfig.Config + +var _ confmap.Unmarshaler = (*PromConfig)(nil) + +func (cfg *PromConfig) Unmarshal(componentParser *confmap.Conf) error { + cfgMap := componentParser.ToStringMap() + if len(cfgMap) == 0 { + return nil + } + return unmarshalYAML(cfgMap, (*promconfig.Config)(cfg)) +} + +func (cfg *PromConfig) Validate() error { + // Reject features that Prometheus supports but that the receiver doesn't support: + // See: + // * https://github.com/open-telemetry/opentelemetry-collector/issues/3863 + // * https://github.com/open-telemetry/wg-prometheus/issues/3 + unsupportedFeatures := make([]string, 0, 4) + if len(cfg.RemoteWriteConfigs) != 0 { + unsupportedFeatures = append(unsupportedFeatures, "remote_write") + } + if len(cfg.RemoteReadConfigs) != 0 { + unsupportedFeatures = append(unsupportedFeatures, "remote_read") + } + if len(cfg.RuleFiles) != 0 { + unsupportedFeatures = append(unsupportedFeatures, "rule_files") + } + if len(cfg.AlertingConfig.AlertRelabelConfigs) != 0 { + unsupportedFeatures = append(unsupportedFeatures, "alert_config.relabel_configs") + } + if len(cfg.AlertingConfig.AlertmanagerConfigs) != 0 { + unsupportedFeatures = append(unsupportedFeatures, "alert_config.alertmanagers") + } + if len(unsupportedFeatures) != 0 { + // Sort the values for deterministic error messages. + sort.Strings(unsupportedFeatures) + return fmt.Errorf("unsupported features:\n\t%s", strings.Join(unsupportedFeatures, "\n\t")) + } + + scrapeConfigs, err := (*promconfig.Config)(cfg).GetScrapeConfigs() + if err != nil { + return err + } + + for _, sc := range scrapeConfigs { + if err := validateHTTPClientConfig(&sc.HTTPClientConfig); err != nil { + return err + } + + for _, c := range sc.ServiceDiscoveryConfigs { + if c, ok := c.(*kubernetes.SDConfig); ok { + if err := validateHTTPClientConfig(&c.HTTPClientConfig); err != nil { + return err + } + } + } + } + return nil +} + +func unmarshalYAML(in map[string]any, out any) error { + yamlOut, err := yaml.Marshal(in) + if err != nil { + return fmt.Errorf("prometheus receiver: failed to marshal config to yaml: %w", err) + } + + err = yaml.UnmarshalStrict(yamlOut, out) + if err != nil { + return fmt.Errorf("prometheus receiver: failed to unmarshal yaml to prometheus config object: %w", err) + } + return nil +} + +func validateHTTPClientConfig(cfg *commonconfig.HTTPClientConfig) error { + if cfg.Authorization != nil { + if err := checkFile(cfg.Authorization.CredentialsFile); err != nil { + return fmt.Errorf("error checking authorization credentials file %q: %w", cfg.Authorization.CredentialsFile, err) + } + } + + if err := checkTLSConfig(cfg.TLSConfig); err != nil { + return err + } + return nil +} + +func checkFile(fn string) error { + // Nothing set, nothing to error on. + if fn == "" { + return nil + } + _, err := os.Stat(fn) + return err +} + +func checkTLSConfig(tlsConfig commonconfig.TLSConfig) error { + if err := checkFile(tlsConfig.CertFile); err != nil { + return fmt.Errorf("error checking client cert file %q: %w", tlsConfig.CertFile, err) + } + if err := checkFile(tlsConfig.KeyFile); err != nil { + return fmt.Errorf("error checking client key file %q: %w", tlsConfig.KeyFile, err) + } + return nil +} diff --git a/pkg/promotel/prometheusreceiver/config_test.go b/pkg/promotel/prometheusreceiver/config_test.go new file mode 100644 index 000000000..e0bb5c291 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/config_test.go @@ -0,0 +1,335 @@ +package prometheusreceiver + +import ( + "context" + "path/filepath" + "strings" + "testing" + "time" + + promModel "github.com/prometheus/common/model" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/confmap/confmaptest" + "go.opentelemetry.io/collector/receiver/receivertest" + "go.uber.org/zap" + "go.uber.org/zap/zaptest/observer" + + "github.com/smartcontractkit/chainlink-common/pkg/promotel/prometheusreceiver/internal/metadata" +) + +func TestLoadConfig(t *testing.T) { + cm, err := confmaptest.LoadConf(filepath.Join("testdata", "config.yaml")) + require.NoError(t, err) + factory := NewFactory() + cfg := factory.CreateDefaultConfig() + + sub, err := cm.Sub(component.NewIDWithName(metadata.Type, "").String()) + require.NoError(t, err) + require.NoError(t, sub.Unmarshal(cfg)) + + r0 := cfg.(*Config) + assert.Equal(t, r0, factory.CreateDefaultConfig()) + + sub, err = cm.Sub(component.NewIDWithName(metadata.Type, "customname").String()) + require.NoError(t, err) + require.NoError(t, sub.Unmarshal(cfg)) + + r1 := cfg.(*Config) + assert.Equal(t, "demo", r1.PrometheusConfig.ScrapeConfigs[0].JobName) + assert.Equal(t, 5*time.Second, time.Duration(r1.PrometheusConfig.ScrapeConfigs[0].ScrapeInterval)) + assert.True(t, r1.UseStartTimeMetric) + assert.True(t, r1.TrimMetricSuffixes) + assert.Equal(t, "^(.+_)*process_start_time_seconds$", r1.StartTimeMetricRegex) + assert.True(t, r1.ReportExtraScrapeMetrics) +} + +func TestLoadTargetAllocatorConfig(t *testing.T) { + cm, err := confmaptest.LoadConf(filepath.Join("testdata", "config_target_allocator.yaml")) + require.NoError(t, err) + factory := NewFactory() + cfg := factory.CreateDefaultConfig() + + sub, err := cm.Sub(component.NewIDWithName(metadata.Type, "").String()) + require.NoError(t, err) + require.NoError(t, sub.Unmarshal(cfg)) + require.NoError(t, component.ValidateConfig(cfg)) + + r0 := cfg.(*Config) + assert.NotNil(t, r0.PrometheusConfig) + assert.NotNil(t, r0.PrometheusConfig) + + sub, err = cm.Sub(component.NewIDWithName(metadata.Type, "withScrape").String()) + require.NoError(t, err) + cfg = factory.CreateDefaultConfig() + require.NoError(t, sub.Unmarshal(cfg)) + require.NoError(t, component.ValidateConfig(cfg)) + + r1 := cfg.(*Config) + assert.NotNil(t, r0.PrometheusConfig) + + assert.Len(t, r1.PrometheusConfig.ScrapeConfigs, 1) + assert.Equal(t, "demo", r1.PrometheusConfig.ScrapeConfigs[0].JobName) + assert.Equal(t, promModel.Duration(5*time.Second), r1.PrometheusConfig.ScrapeConfigs[0].ScrapeInterval) + + sub, err = cm.Sub(component.NewIDWithName(metadata.Type, "withOnlyScrape").String()) + require.NoError(t, err) + cfg = factory.CreateDefaultConfig() + require.NoError(t, sub.Unmarshal(cfg)) + require.NoError(t, component.ValidateConfig(cfg)) + + r2 := cfg.(*Config) + assert.Len(t, r2.PrometheusConfig.ScrapeConfigs, 1) + assert.Equal(t, "demo", r2.PrometheusConfig.ScrapeConfigs[0].JobName) + assert.Equal(t, promModel.Duration(5*time.Second), r2.PrometheusConfig.ScrapeConfigs[0].ScrapeInterval) +} + +func TestValidateConfigWithScrapeConfigFiles(t *testing.T) { + cm, err := confmaptest.LoadConf(filepath.Join("testdata", "config_scrape_config_files.yaml")) + require.NoError(t, err) + factory := NewFactory() + cfg := factory.CreateDefaultConfig() + + sub, err := cm.Sub(component.NewIDWithName(metadata.Type, "").String()) + require.NoError(t, err) + require.NoError(t, sub.Unmarshal(cfg)) + + require.NoError(t, component.ValidateConfig(cfg)) +} + +func TestLoadConfigFailsOnUnknownSection(t *testing.T) { + cm, err := confmaptest.LoadConf(filepath.Join("testdata", "invalid-config-section.yaml")) + require.NoError(t, err) + factory := NewFactory() + cfg := factory.CreateDefaultConfig() + + sub, err := cm.Sub(component.NewIDWithName(metadata.Type, "").String()) + require.NoError(t, err) + require.Error(t, sub.Unmarshal(cfg)) +} + +func TestLoadConfigFailsOnNoPrometheusOrTAConfig(t *testing.T) { + cm, err := confmaptest.LoadConf(filepath.Join("testdata", "invalid-config-prometheus-non-existent-scrape-config.yaml")) + require.NoError(t, err) + factory := NewFactory() + + cfg := factory.CreateDefaultConfig() + sub, err := cm.Sub(component.NewIDWithName(metadata.Type, "").String()) + require.NoError(t, err) + require.NoError(t, sub.Unmarshal(cfg)) + require.ErrorContains(t, component.ValidateConfig(cfg), "no Prometheus scrape_configs or target_allocator set") + + cfg = factory.CreateDefaultConfig() + sub, err = cm.Sub(component.NewIDWithName(metadata.Type, "withConfigAndTA").String()) + require.NoError(t, err) + require.NoError(t, sub.Unmarshal(cfg)) + require.NoError(t, component.ValidateConfig(cfg)) + + cfg = factory.CreateDefaultConfig() + sub, err = cm.Sub(component.NewIDWithName(metadata.Type, "withOnlyTA").String()) + require.NoError(t, err) + require.NoError(t, sub.Unmarshal(cfg)) + require.NoError(t, component.ValidateConfig(cfg)) + + cfg = factory.CreateDefaultConfig() + sub, err = cm.Sub(component.NewIDWithName(metadata.Type, "withOnlyScrape").String()) + require.NoError(t, err) + require.NoError(t, sub.Unmarshal(cfg)) + require.NoError(t, component.ValidateConfig(cfg)) +} + +// As one of the config parameters is consuming prometheus +// configuration as a subkey, ensure that invalid configuration +// within the subkey will also raise an error. +func TestLoadConfigFailsOnUnknownPrometheusSection(t *testing.T) { + cm, err := confmaptest.LoadConf(filepath.Join("testdata", "invalid-config-prometheus-section.yaml")) + require.NoError(t, err) + factory := NewFactory() + cfg := factory.CreateDefaultConfig() + + sub, err := cm.Sub(component.NewIDWithName(metadata.Type, "").String()) + require.NoError(t, err) + require.Error(t, sub.Unmarshal(cfg)) +} + +// Renaming emits a warning +func TestConfigWarningsOnRenameDisallowed(t *testing.T) { + // Construct the config that should emit a warning + cm, err := confmaptest.LoadConf(filepath.Join("testdata", "warning-config-prometheus-relabel.yaml")) + require.NoError(t, err) + factory := NewFactory() + cfg := factory.CreateDefaultConfig() + + sub, err := cm.Sub(component.NewIDWithName(metadata.Type, "").String()) + require.NoError(t, err) + require.NoError(t, sub.Unmarshal(cfg)) + // Use a fake logger + creationSet := receivertest.NewNopSettings() + observedZapCore, observedLogs := observer.New(zap.WarnLevel) + creationSet.Logger = zap.New(observedZapCore) + _, err = createMetricsReceiver(context.Background(), creationSet, cfg, nil) + require.NoError(t, err) + // We should have received a warning + assert.Equal(t, 1, observedLogs.Len()) +} + +func TestRejectUnsupportedPrometheusFeatures(t *testing.T) { + cm, err := confmaptest.LoadConf(filepath.Join("testdata", "invalid-config-prometheus-unsupported-features.yaml")) + require.NoError(t, err) + factory := NewFactory() + cfg := factory.CreateDefaultConfig() + + sub, err := cm.Sub(component.NewIDWithName(metadata.Type, "").String()) + require.NoError(t, err) + require.NoError(t, sub.Unmarshal(cfg)) + + err = component.ValidateConfig(cfg) + require.Error(t, err) + + wantErrMsg := `unsupported features: + alert_config.alertmanagers + alert_config.relabel_configs + remote_read + remote_write + rule_files` + + gotErrMsg := strings.ReplaceAll(err.Error(), "\t", strings.Repeat(" ", 8)) + require.Equal(t, wantErrMsg, gotErrMsg) +} + +func TestNonExistentAuthCredentialsFile(t *testing.T) { + cm, err := confmaptest.LoadConf(filepath.Join("testdata", "invalid-config-prometheus-non-existent-auth-credentials-file.yaml")) + require.NoError(t, err) + factory := NewFactory() + cfg := factory.CreateDefaultConfig() + + sub, err := cm.Sub(component.NewIDWithName(metadata.Type, "").String()) + require.NoError(t, err) + require.NoError(t, sub.Unmarshal(cfg)) + + assert.ErrorContains(t, + component.ValidateConfig(cfg), + `error checking authorization credentials file "/nonexistentauthcredentialsfile"`) +} + +func TestTLSConfigNonExistentCertFile(t *testing.T) { + cm, err := confmaptest.LoadConf(filepath.Join("testdata", "invalid-config-prometheus-non-existent-cert-file.yaml")) + require.NoError(t, err) + factory := NewFactory() + cfg := factory.CreateDefaultConfig() + + sub, err := cm.Sub(component.NewIDWithName(metadata.Type, "").String()) + require.NoError(t, err) + require.NoError(t, sub.Unmarshal(cfg)) + + assert.ErrorContains(t, + component.ValidateConfig(cfg), + `error checking client cert file "/nonexistentcertfile"`) +} + +func TestTLSConfigNonExistentKeyFile(t *testing.T) { + cm, err := confmaptest.LoadConf(filepath.Join("testdata", "invalid-config-prometheus-non-existent-key-file.yaml")) + require.NoError(t, err) + factory := NewFactory() + cfg := factory.CreateDefaultConfig() + + sub, err := cm.Sub(component.NewIDWithName(metadata.Type, "").String()) + require.NoError(t, err) + require.NoError(t, sub.Unmarshal(cfg)) + + assert.ErrorContains(t, + component.ValidateConfig(cfg), + `error checking client key file "/nonexistentkeyfile"`) +} + +func TestTLSConfigCertFileWithoutKeyFile(t *testing.T) { + cm, err := confmaptest.LoadConf(filepath.Join("testdata", "invalid-config-prometheus-cert-file-without-key-file.yaml")) + require.NoError(t, err) + factory := NewFactory() + cfg := factory.CreateDefaultConfig() + + sub, err := cm.Sub(component.NewIDWithName(metadata.Type, "").String()) + require.NoError(t, err) + + assert.ErrorContains(t, + sub.Unmarshal(cfg), + "exactly one of key or key_file must be configured when a client certificate is configured") +} + +func TestTLSConfigKeyFileWithoutCertFile(t *testing.T) { + cm, err := confmaptest.LoadConf(filepath.Join("testdata", "invalid-config-prometheus-key-file-without-cert-file.yaml")) + require.NoError(t, err) + factory := NewFactory() + cfg := factory.CreateDefaultConfig() + + sub, err := cm.Sub(component.NewIDWithName(metadata.Type, "").String()) + require.NoError(t, err) + assert.ErrorContains(t, + sub.Unmarshal(cfg), + "exactly one of cert or cert_file must be configured when a client key is configured") +} + +func TestKubernetesSDConfigWithoutKeyFile(t *testing.T) { + cm, err := confmaptest.LoadConf(filepath.Join("testdata", "invalid-config-prometheus-kubernetes-sd-config.yaml")) + require.NoError(t, err) + factory := NewFactory() + cfg := factory.CreateDefaultConfig() + + sub, err := cm.Sub(component.NewIDWithName(metadata.Type, "").String()) + require.NoError(t, err) + + assert.ErrorContains(t, + sub.Unmarshal(cfg), + "exactly one of key or key_file must be configured when a client certificate is configured") +} + +func TestFileSDConfigJsonNilTargetGroup(t *testing.T) { + cm, err := confmaptest.LoadConf(filepath.Join("testdata", "invalid-config-prometheus-file-sd-config-json.yaml")) + require.NoError(t, err) + factory := NewFactory() + cfg := factory.CreateDefaultConfig() + + sub, err := cm.Sub(component.NewIDWithName(metadata.Type, "").String()) + require.NoError(t, err) + require.NoError(t, sub.Unmarshal(cfg)) + + require.NoError(t, component.ValidateConfig(cfg)) +} + +func TestFileSDConfigYamlNilTargetGroup(t *testing.T) { + cm, err := confmaptest.LoadConf(filepath.Join("testdata", "invalid-config-prometheus-file-sd-config-yaml.yaml")) + require.NoError(t, err) + factory := NewFactory() + cfg := factory.CreateDefaultConfig() + + sub, err := cm.Sub(component.NewIDWithName(metadata.Type, "").String()) + require.NoError(t, err) + require.NoError(t, sub.Unmarshal(cfg)) + + require.NoError(t, component.ValidateConfig(cfg)) +} + +func TestTargetAllocatorInvalidHTTPScrape(t *testing.T) { + cm, err := confmaptest.LoadConf(filepath.Join("testdata", "invalid-config-prometheus-target-allocator.yaml")) + require.NoError(t, err) + factory := NewFactory() + cfg := factory.CreateDefaultConfig() + + sub, err := cm.Sub(component.NewIDWithName(metadata.Type, "").String()) + require.NoError(t, err) + require.Error(t, sub.Unmarshal(cfg)) +} + +func TestFileSDConfigWithoutSDFile(t *testing.T) { + cm, err := confmaptest.LoadConf(filepath.Join("testdata", "non-existent-prometheus-sd-file-config.yaml")) + require.NoError(t, err) + factory := NewFactory() + cfg := factory.CreateDefaultConfig() + + sub, err := cm.Sub(component.NewIDWithName(metadata.Type, "").String()) + require.NoError(t, err) + require.NoError(t, sub.Unmarshal(cfg)) + + require.NoError(t, component.ValidateConfig(cfg)) +} diff --git a/pkg/promotel/prometheusreceiver/factory.go b/pkg/promotel/prometheusreceiver/factory.go new file mode 100644 index 000000000..964d9bf95 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/factory.go @@ -0,0 +1,82 @@ +package prometheusreceiver + +import ( + "context" + + "github.com/prometheus/client_golang/prometheus" + promconfig "github.com/prometheus/prometheus/config" + _ "github.com/prometheus/prometheus/discovery/install" // init() of this package registers service discovery impl. + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/consumer" + "go.opentelemetry.io/collector/featuregate" + "go.opentelemetry.io/collector/receiver" + "go.uber.org/zap" + + "github.com/smartcontractkit/chainlink-common/pkg/promotel/prometheusreceiver/internal/metadata" +) + +// This file implements config for Prometheus receiver. +var useCreatedMetricGate = featuregate.GlobalRegistry().MustRegister( + "receiver.prometheusreceiver.UseCreatedMetric", + featuregate.StageAlpha, + featuregate.WithRegisterDescription("When enabled, the Prometheus receiver will"+ + " retrieve the start time for Summary, Histogram and Sum metrics from _created metric"), +) + +var enableNativeHistogramsGate = featuregate.GlobalRegistry().MustRegister( + "receiver.prometheusreceiver.EnableNativeHistograms", + featuregate.StageAlpha, + featuregate.WithRegisterDescription("When enabled, the Prometheus receiver will convert"+ + " Prometheus native histograms to OTEL exponential histograms and ignore"+ + " those Prometheus classic histograms that have a native histogram alternative"), +) + +// NewFactory creates a new Prometheus receiver factory. +func NewFactory() receiver.Factory { + return receiver.NewFactory( + metadata.Type, + createDefaultConfig, + receiver.WithMetrics(createMetricsReceiver, metadata.MetricsStability)) +} + +func NewFactoryWithRegistry(reg *prometheus.Registry) receiver.Factory { + return receiver.NewFactory( + metadata.Type, + createDefaultConfigWithRegistry(reg), + receiver.WithMetrics(createMetricsReceiver, metadata.MetricsStability)) +} + +func createDefaultConfig() component.Config { + return &Config{ + PrometheusConfig: &PromConfig{ + GlobalConfig: promconfig.DefaultGlobalConfig, + }, + } +} +func createDefaultConfigWithRegistry(reg *prometheus.Registry) func() component.Config { + return func() component.Config { + c := createDefaultConfig().(*Config) + c.Registry = reg + return c + } +} + +func createMetricsReceiver( + _ context.Context, + set receiver.Settings, + cfg component.Config, + nextConsumer consumer.Metrics, +) (receiver.Metrics, error) { + configWarnings(set.Logger, cfg.(*Config)) + return newPrometheusReceiver(set, cfg.(*Config), nextConsumer), nil +} + +func configWarnings(logger *zap.Logger, cfg *Config) { + for _, sc := range cfg.PrometheusConfig.ScrapeConfigs { + for _, rc := range sc.MetricRelabelConfigs { + if rc.TargetLabel == "__name__" { + logger.Warn("metric renaming using metric_relabel_configs will result in unknown-typed metrics without a unit or description", zap.String("job", sc.JobName)) + } + } + } +} diff --git a/pkg/promotel/prometheusreceiver/factory_test.go b/pkg/promotel/prometheusreceiver/factory_test.go new file mode 100644 index 000000000..bab718009 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/factory_test.go @@ -0,0 +1,61 @@ +package prometheusreceiver + +import ( + "context" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/component/componenttest" + "go.opentelemetry.io/collector/confmap/confmaptest" + "go.opentelemetry.io/collector/consumer/consumertest" + "go.opentelemetry.io/collector/receiver/receivertest" + + "github.com/smartcontractkit/chainlink-common/pkg/promotel/prometheusreceiver/internal/metadata" +) + +func TestCreateDefaultConfig(t *testing.T) { + cfg := createDefaultConfig() + assert.NotNil(t, cfg, "failed to create default config") + assert.NoError(t, componenttest.CheckConfigStruct(cfg)) +} + +func TestCreateReceiver(t *testing.T) { + cfg := createDefaultConfig() + + // The default config does not provide scrape_config so we expect that metrics receiver + // creation must also fail. + creationSet := receivertest.NewNopSettings() + mReceiver, _ := createMetricsReceiver(context.Background(), creationSet, cfg, consumertest.NewNop()) + assert.NotNil(t, mReceiver) + assert.NotNil(t, mReceiver.(*pReceiver).cfg.PrometheusConfig.GlobalConfig) +} + +func TestFactoryCanParseServiceDiscoveryConfigs(t *testing.T) { + cm, err := confmaptest.LoadConf(filepath.Join("testdata", "config_sd.yaml")) + require.NoError(t, err) + factory := NewFactory() + cfg := factory.CreateDefaultConfig() + + sub, err := cm.Sub(component.NewIDWithName(metadata.Type, "").String()) + require.NoError(t, err) + assert.NoError(t, sub.Unmarshal(cfg)) +} + +func TestMultipleCreate(t *testing.T) { + factory := NewFactory() + cfg := factory.CreateDefaultConfig() + set := receivertest.NewNopSettings() + firstRcvr, err := factory.CreateMetrics(context.Background(), set, cfg, consumertest.NewNop()) + require.NoError(t, err) + host := componenttest.NewNopHost() + require.NoError(t, err) + require.NoError(t, firstRcvr.Start(context.Background(), host)) + require.NoError(t, firstRcvr.Shutdown(context.Background())) + secondRcvr, err := factory.CreateMetrics(context.Background(), set, cfg, consumertest.NewNop()) + require.NoError(t, err) + require.NoError(t, secondRcvr.Start(context.Background(), host)) + require.NoError(t, secondRcvr.Shutdown(context.Background())) +} diff --git a/pkg/promotel/prometheusreceiver/internal/appendable.go b/pkg/promotel/prometheusreceiver/internal/appendable.go new file mode 100644 index 000000000..adc1f306f --- /dev/null +++ b/pkg/promotel/prometheusreceiver/internal/appendable.go @@ -0,0 +1,68 @@ +package internal + +import ( + "context" + "regexp" + "time" + + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/storage" + "go.opentelemetry.io/collector/consumer" + "go.opentelemetry.io/collector/receiver" + "go.opentelemetry.io/collector/receiver/receiverhelper" +) + +// appendable translates Prometheus scraping diffs into OpenTelemetry format. +type appendable struct { + sink consumer.Metrics + metricAdjuster MetricsAdjuster + useStartTimeMetric bool + enableNativeHistograms bool + trimSuffixes bool + startTimeMetricRegex *regexp.Regexp + externalLabels labels.Labels + + settings receiver.Settings + obsrecv *receiverhelper.ObsReport +} + +// NewAppendable returns a storage.Appendable instance that emits metrics to the sink. +func NewAppendable( + sink consumer.Metrics, + set receiver.Settings, + gcInterval time.Duration, + useStartTimeMetric bool, + startTimeMetricRegex *regexp.Regexp, + useCreatedMetric bool, + enableNativeHistograms bool, + externalLabels labels.Labels, + trimSuffixes bool, +) (storage.Appendable, error) { + var metricAdjuster MetricsAdjuster + if !useStartTimeMetric { + metricAdjuster = NewInitialPointAdjuster(set.Logger, gcInterval, useCreatedMetric) + } else { + metricAdjuster = NewStartTimeMetricAdjuster(set.Logger, startTimeMetricRegex) + } + + obsrecv, err := receiverhelper.NewObsReport(receiverhelper.ObsReportSettings{ReceiverID: set.ID, Transport: transport, ReceiverCreateSettings: set}) + if err != nil { + return nil, err + } + + return &appendable{ + sink: sink, + settings: set, + metricAdjuster: metricAdjuster, + useStartTimeMetric: useStartTimeMetric, + enableNativeHistograms: enableNativeHistograms, + startTimeMetricRegex: startTimeMetricRegex, + externalLabels: externalLabels, + obsrecv: obsrecv, + trimSuffixes: trimSuffixes, + }, nil +} + +func (o *appendable) Appender(ctx context.Context) storage.Appender { + return newTransaction(ctx, o.metricAdjuster, o.sink, o.externalLabels, o.settings, o.obsrecv, o.trimSuffixes, o.enableNativeHistograms) +} diff --git a/pkg/promotel/prometheusreceiver/internal/logger.go b/pkg/promotel/prometheusreceiver/internal/logger.go new file mode 100644 index 000000000..862e5496f --- /dev/null +++ b/pkg/promotel/prometheusreceiver/internal/logger.go @@ -0,0 +1,139 @@ +package internal + +import ( + gokitLog "github.com/go-kit/log" + "github.com/go-kit/log/level" + "go.uber.org/zap" +) + +const ( + levelKey = "level" + msgKey = "msg" + errKey = "err" +) + +// NewZapToGokitLogAdapter create an adapter for zap.Logger to gokitLog.Logger +func NewZapToGokitLogAdapter(logger *zap.Logger) gokitLog.Logger { + // need to skip two levels in order to get the correct caller + // one for this method, the other for gokitLog + logger = logger.WithOptions(zap.AddCallerSkip(2)) + return &zapToGokitLogAdapter{l: logger.Sugar()} +} + +type zapToGokitLogAdapter struct { + l *zap.SugaredLogger +} + +type logData struct { + level level.Value + msg string + otherFields []any +} + +func (w *zapToGokitLogAdapter) Log(keyvals ...any) error { + // expecting key value pairs, the number of items need to be even + if len(keyvals)%2 == 0 { + // Extract log level and message and log them using corresponding zap function + ld := extractLogData(keyvals) + logFunc := levelToFunc(w.l, ld.level) + logFunc(ld.msg, ld.otherFields...) + } else { + // in case something goes wrong + w.l.Info(keyvals...) + } + return nil +} + +func extractLogData(keyvals []any) logData { + ld := logData{ + level: level.InfoValue(), // default + } + + for i := 0; i < len(keyvals); i += 2 { + key := keyvals[i] + val := keyvals[i+1] + + if l, ok := matchLogLevel(key, val); ok { + ld.level = l + continue + } + + if m, ok := matchLogMessage(key, val); ok { + ld.msg = m + continue + } + + if err, ok := matchError(key, val); ok { + ld.otherFields = append(ld.otherFields, zap.Error(err)) + continue + } + + ld.otherFields = append(ld.otherFields, key, val) + } + + return ld +} + +// check if a given key-value pair represents go-kit log message and return it +func matchLogMessage(key any, val any) (string, bool) { + if strKey, ok := key.(string); !ok || strKey != msgKey { + return "", false + } + + msg, ok := val.(string) + if !ok { + return "", false + } + return msg, true +} + +// check if a given key-value pair represents go-kit log level and return it +func matchLogLevel(key any, val any) (level.Value, bool) { + strKey, ok := key.(string) + if !ok || strKey != levelKey { + return nil, false + } + + levelVal, ok := val.(level.Value) + if !ok { + return nil, false + } + return levelVal, true +} + +//revive:disable:error-return + +// check if a given key-value pair represents an error and return it +func matchError(key any, val any) (error, bool) { + strKey, ok := key.(string) + if !ok || strKey != errKey { + return nil, false + } + + err, ok := val.(error) + if !ok { + return nil, false + } + return err, true +} + +//revive:enable:error-return + +// find a matching zap logging function to be used for a given level +func levelToFunc(logger *zap.SugaredLogger, lvl level.Value) func(string, ...any) { + switch lvl { + case level.DebugValue(): + return logger.Debugw + case level.InfoValue(): + return logger.Infow + case level.WarnValue(): + return logger.Warnw + case level.ErrorValue(): + return logger.Errorw + } + + // default + return logger.Infow +} + +var _ gokitLog.Logger = (*zapToGokitLogAdapter)(nil) diff --git a/pkg/promotel/prometheusreceiver/internal/logger_test.go b/pkg/promotel/prometheusreceiver/internal/logger_test.go new file mode 100644 index 000000000..1a5bd88dd --- /dev/null +++ b/pkg/promotel/prometheusreceiver/internal/logger_test.go @@ -0,0 +1,287 @@ +package internal + +import ( + "fmt" + "net/http" + "testing" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/zap" + "go.uber.org/zap/zapcore" + "go.uber.org/zap/zaptest/observer" +) + +func TestLog(t *testing.T) { + tcs := []struct { + name string + input []any + wantLevel zapcore.Level + wantMessage string + }{ + { + name: "Starting provider", + input: []any{ + "level", + level.DebugValue(), + "msg", + "Starting provider", + "provider", + "string/0", + "subs", + "[target1]", + }, + wantLevel: zapcore.DebugLevel, + wantMessage: "Starting provider", + }, + { + name: "Scrape failed", + input: []any{ + "level", + level.ErrorValue(), + "scrape_pool", + "target1", + "msg", + "Scrape failed", + "err", + "server returned HTTP status 500 Internal Server Error", + }, + wantLevel: zapcore.ErrorLevel, + wantMessage: "Scrape failed", + }, + } + + for _, tc := range tcs { + t.Run(tc.name, func(t *testing.T) { + conf := zap.NewProductionConfig() + conf.Level.SetLevel(zapcore.DebugLevel) + + // capture zap log entry + var entry zapcore.Entry + h := func(e zapcore.Entry) error { + entry = e + return nil + } + + logger, err := conf.Build(zap.Hooks(h)) + require.NoError(t, err) + + adapter := NewZapToGokitLogAdapter(logger) + err = adapter.Log(tc.input...) + require.NoError(t, err) + + assert.Equal(t, tc.wantLevel, entry.Level) + assert.Equal(t, tc.wantMessage, entry.Message) + }) + } +} + +func TestExtractLogData(t *testing.T) { + tcs := []struct { + name string + input []any + wantLevel level.Value + wantMessage string + wantOutput []any + }{ + { + name: "nil fields", + input: nil, + wantLevel: level.InfoValue(), // Default + wantMessage: "", + wantOutput: nil, + }, + { + name: "empty fields", + input: []any{}, + wantLevel: level.InfoValue(), // Default + wantMessage: "", + wantOutput: nil, + }, + { + name: "info level", + input: []any{ + "level", + level.InfoValue(), + }, + wantLevel: level.InfoValue(), + wantMessage: "", + wantOutput: nil, + }, + { + name: "warn level", + input: []any{ + "level", + level.WarnValue(), + }, + wantLevel: level.WarnValue(), + wantMessage: "", + wantOutput: nil, + }, + { + name: "error level", + input: []any{ + "level", + level.ErrorValue(), + }, + wantLevel: level.ErrorValue(), + wantMessage: "", + wantOutput: nil, + }, + { + name: "debug level + extra fields", + input: []any{ + "timestamp", + 1596604719, + "level", + level.DebugValue(), + "msg", + "http client error", + }, + wantLevel: level.DebugValue(), + wantMessage: "http client error", + wantOutput: []any{ + "timestamp", 1596604719, + }, + }, + { + name: "missing level field", + input: []any{ + "timestamp", + 1596604719, + "msg", + "http client error", + }, + wantLevel: level.InfoValue(), // Default + wantMessage: "http client error", + wantOutput: []any{ + "timestamp", 1596604719, + }, + }, + { + name: "invalid level type", + input: []any{ + "level", + "warn", // String is not recognized + }, + wantLevel: level.InfoValue(), // Default + wantOutput: []any{ + "level", "warn", // Field is preserved + }, + }, + } + + for _, tc := range tcs { + t.Run(tc.name, func(t *testing.T) { + ld := extractLogData(tc.input) + assert.Equal(t, tc.wantLevel, ld.level) + assert.Equal(t, tc.wantMessage, ld.msg) + assert.Equal(t, tc.wantOutput, ld.otherFields) + }) + } +} + +func TestE2E(t *testing.T) { + logger, observed := observer.New(zap.DebugLevel) + gLogger := NewZapToGokitLogAdapter(zap.New(logger)) + + const targetStr = "https://host.docker.internal:5000/prometheus" + + tcs := []struct { + name string + log func() error + wantLevel zapcore.Level + wantMessage string + wantOutput []zapcore.Field + }{ + { + name: "debug level", + log: func() error { + return level.Debug(gLogger).Log() + }, + wantLevel: zapcore.DebugLevel, + wantMessage: "", + wantOutput: []zapcore.Field{}, + }, + { + name: "info level", + log: func() error { + return level.Info(gLogger).Log() + }, + wantLevel: zapcore.InfoLevel, + wantMessage: "", + wantOutput: []zapcore.Field{}, + }, + { + name: "warn level", + log: func() error { + return level.Warn(gLogger).Log() + }, + wantLevel: zapcore.WarnLevel, + wantMessage: "", + wantOutput: []zapcore.Field{}, + }, + { + name: "error level", + log: func() error { + return level.Error(gLogger).Log() + }, + wantLevel: zapcore.ErrorLevel, + wantMessage: "", + wantOutput: []zapcore.Field{}, + }, + { + name: "logger with and msg", + log: func() error { + ngLogger := log.With(gLogger, "scrape_pool", "scrape_pool") + ngLogger = log.With(ngLogger, "target", targetStr) + return level.Debug(ngLogger).Log("msg", "http client error", "err", fmt.Errorf("%s %q: dial tcp 192.168.65.2:5000: connect: connection refused", http.MethodGet, targetStr)) + }, + wantLevel: zapcore.DebugLevel, + wantMessage: "http client error", + wantOutput: []zapcore.Field{ + zap.String("scrape_pool", "scrape_pool"), + zap.String("target", "https://host.docker.internal:5000/prometheus"), + zap.Error(fmt.Errorf("%s %q: dial tcp 192.168.65.2:5000: connect: connection refused", http.MethodGet, targetStr)), + }, + }, + { + name: "missing level", + log: func() error { + ngLogger := log.With(gLogger, "target", "foo") + return ngLogger.Log("msg", "http client error") + }, + wantLevel: zapcore.InfoLevel, // Default + wantMessage: "http client error", + wantOutput: []zapcore.Field{ + zap.String("target", "foo"), + }, + }, + { + name: "invalid level type", + log: func() error { + ngLogger := log.With(gLogger, "target", "foo") + return ngLogger.Log("msg", "http client error", "level", "warn") + }, + wantLevel: zapcore.InfoLevel, // Default + wantMessage: "http client error", + wantOutput: []zapcore.Field{ + zap.String("target", "foo"), + zap.String("level", "warn"), // Field is preserved + }, + }, + } + + for _, tc := range tcs { + t.Run(tc.name, func(t *testing.T) { + assert.NoError(t, tc.log()) + entries := observed.TakeAll() + require.Len(t, entries, 1) + assert.Equal(t, tc.wantLevel, entries[0].Level) + assert.Equal(t, tc.wantMessage, entries[0].Message) + assert.Equal(t, tc.wantOutput, entries[0].Context) + }) + } +} diff --git a/pkg/promotel/prometheusreceiver/internal/metadata.go b/pkg/promotel/prometheusreceiver/internal/metadata.go new file mode 100644 index 000000000..f843c8d22 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/internal/metadata.go @@ -0,0 +1,65 @@ +package internal + +import ( + "github.com/prometheus/common/model" + + "github.com/smartcontractkit/chainlink-common/pkg/promotel/prometheusreceiver/scrape" +) + +type dataPoint struct { + value float64 + boundary float64 +} + +// internalMetricMetadata allows looking up metadata for internal scrape metrics +var internalMetricMetadata = map[string]*scrape.MetricMetadata{ + scrapeUpMetricName: { + Metric: scrapeUpMetricName, + Type: model.MetricTypeGauge, + Help: "The scraping was successful", + }, + "scrape_duration_seconds": { + Metric: "scrape_duration_seconds", + Unit: "seconds", + Type: model.MetricTypeGauge, + Help: "Duration of the scrape", + }, + "scrape_samples_scraped": { + Metric: "scrape_samples_scraped", + Type: model.MetricTypeGauge, + Help: "The number of samples the target exposed", + }, + "scrape_series_added": { + Metric: "scrape_series_added", + Type: model.MetricTypeGauge, + Help: "The approximate number of new series in this scrape", + }, + "scrape_samples_post_metric_relabeling": { + Metric: "scrape_samples_post_metric_relabeling", + Type: model.MetricTypeGauge, + Help: "The number of samples remaining after metric relabeling was applied", + }, +} + +func metadataForMetric(metricName string, mc scrape.MetricMetadataStore) (*scrape.MetricMetadata, string) { + if metadata, ok := internalMetricMetadata[metricName]; ok { + return metadata, metricName + } + if metadata, ok := mc.GetMetadata(metricName); ok { + return &metadata, metricName + } + // If we didn't find metadata with the original name, + // try with suffixes trimmed, in-case it is a "merged" metric type. + normalizedName := normalizeMetricName(metricName) + if metadata, ok := mc.GetMetadata(normalizedName); ok { + if metadata.Type == model.MetricTypeCounter { + return &metadata, metricName + } + return &metadata, normalizedName + } + // Otherwise, the metric is unknown + return &scrape.MetricMetadata{ + Metric: metricName, + Type: model.MetricTypeUnknown, + }, metricName +} diff --git a/pkg/promotel/prometheusreceiver/internal/metadata/generated_status.go b/pkg/promotel/prometheusreceiver/internal/metadata/generated_status.go new file mode 100644 index 000000000..32dcd383c --- /dev/null +++ b/pkg/promotel/prometheusreceiver/internal/metadata/generated_status.go @@ -0,0 +1,16 @@ +// Code generated by mdatagen. DO NOT EDIT. + +package metadata + +import ( + "go.opentelemetry.io/collector/component" +) + +var ( + Type = component.MustNewType("prometheus") + ScopeName = "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver" +) + +const ( + MetricsStability = component.StabilityLevelBeta +) diff --git a/pkg/promotel/prometheusreceiver/internal/metricfamily.go b/pkg/promotel/prometheusreceiver/internal/metricfamily.go new file mode 100644 index 000000000..265f670de --- /dev/null +++ b/pkg/promotel/prometheusreceiver/internal/metricfamily.go @@ -0,0 +1,578 @@ +package internal + +import ( + "encoding/hex" + "fmt" + "math" + "sort" + "strings" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/prometheus" + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/value" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" + "go.uber.org/zap" + + "github.com/smartcontractkit/chainlink-common/pkg/promotel/prometheusreceiver/scrape" +) + +type metricFamily struct { + mtype pmetric.MetricType + // isMonotonic only applies to sums + isMonotonic bool + groups map[uint64]*metricGroup + name string + metadata *scrape.MetricMetadata + groupOrders []*metricGroup +} + +// metricGroup, represents a single metric of a metric family. for example a histogram metric is usually represent by +// a couple data complexValue (buckets and count/sum), a group of a metric family always share a same set of tags. for +// simple types like counter and gauge, each data point is a group of itself +type metricGroup struct { + mtype pmetric.MetricType + ts int64 + ls labels.Labels + count float64 + hasCount bool + sum float64 + hasSum bool + created float64 + value float64 + hValue *histogram.Histogram + fhValue *histogram.FloatHistogram + complexValue []*dataPoint + exemplars pmetric.ExemplarSlice +} + +func newMetricFamily(metricName string, mc scrape.MetricMetadataStore, logger *zap.Logger) *metricFamily { + metadata, familyName := metadataForMetric(metricName, mc) + mtype, isMonotonic := convToMetricType(metadata.Type) + if mtype == pmetric.MetricTypeEmpty { + logger.Debug(fmt.Sprintf("Unknown-typed metric : %s %+v", metricName, metadata)) + } + + return &metricFamily{ + mtype: mtype, + isMonotonic: isMonotonic, + groups: make(map[uint64]*metricGroup), + name: familyName, + metadata: metadata, + } +} + +// includesMetric returns true if the metric is part of the family +func (mf *metricFamily) includesMetric(metricName string) bool { + if mf.mtype != pmetric.MetricTypeGauge { + // If it is a merged family type, then it should match the + // family name when suffixes are trimmed. + return normalizeMetricName(metricName) == mf.name + } + // If it isn't a merged type, the metricName and family name should match + return metricName == mf.name +} + +func (mg *metricGroup) sortPoints() { + sort.Slice(mg.complexValue, func(i, j int) bool { + return mg.complexValue[i].boundary < mg.complexValue[j].boundary + }) +} + +func (mg *metricGroup) toDistributionPoint(dest pmetric.HistogramDataPointSlice) { + if !mg.hasCount { + return + } + + mg.sortPoints() + + bucketCount := len(mg.complexValue) + 1 + // if the final bucket is +Inf, we ignore it + if bucketCount > 1 && mg.complexValue[bucketCount-2].boundary == math.Inf(1) { + bucketCount-- + } + + // for OTLP the bounds won't include +inf + bounds := make([]float64, bucketCount-1) + bucketCounts := make([]uint64, bucketCount) + var adjustedCount float64 + + pointIsStale := value.IsStaleNaN(mg.sum) || value.IsStaleNaN(mg.count) + for i := 0; i < bucketCount-1; i++ { + bounds[i] = mg.complexValue[i].boundary + adjustedCount = mg.complexValue[i].value + + // Buckets still need to be sent to know to set them as stale, + // but a staleness NaN converted to uint64 would be an extremely large number. + // Setting to 0 instead. + if pointIsStale { + adjustedCount = 0 + } else if i != 0 { + adjustedCount -= mg.complexValue[i-1].value + } + bucketCounts[i] = uint64(adjustedCount) + } + + // Add the final bucket based on the total count + adjustedCount = mg.count + if pointIsStale { + adjustedCount = 0 + } else if bucketCount > 1 { + adjustedCount -= mg.complexValue[bucketCount-2].value + } + bucketCounts[bucketCount-1] = uint64(adjustedCount) + + point := dest.AppendEmpty() + + if pointIsStale { + point.SetFlags(pmetric.DefaultDataPointFlags.WithNoRecordedValue(true)) + } else { + point.SetCount(uint64(mg.count)) + if mg.hasSum { + point.SetSum(mg.sum) + } + } + + point.ExplicitBounds().FromRaw(bounds) + point.BucketCounts().FromRaw(bucketCounts) + + // The timestamp MUST be in retrieved from milliseconds and converted to nanoseconds. + tsNanos := timestampFromMs(mg.ts) + if mg.created != 0 { + point.SetStartTimestamp(timestampFromFloat64(mg.created)) + } else { + // metrics_adjuster adjusts the startTimestamp to the initial scrape timestamp + point.SetStartTimestamp(tsNanos) + } + point.SetTimestamp(tsNanos) + populateAttributes(pmetric.MetricTypeHistogram, mg.ls, point.Attributes()) + mg.setExemplars(point.Exemplars()) +} + +// toExponentialHistogramDataPoints is based on +// https://opentelemetry.io/docs/specs/otel/compatibility/prometheus_and_openmetrics/#exponential-histograms +func (mg *metricGroup) toExponentialHistogramDataPoints(dest pmetric.ExponentialHistogramDataPointSlice) { + if !mg.hasCount { + return + } + point := dest.AppendEmpty() + point.SetTimestamp(timestampFromMs(mg.ts)) + + // We do not set Min or Max as native histograms don't have that information. + switch { + case mg.fhValue != nil: + fh := mg.fhValue + + if value.IsStaleNaN(fh.Sum) { + point.SetFlags(pmetric.DefaultDataPointFlags.WithNoRecordedValue(true)) + // The count and sum are initialized to 0, so we don't need to set them. + } else { + point.SetScale(fh.Schema) + // Input is a float native histogram. This conversion will lose + // precision,but we don't actually expect float histograms in scrape, + // since these are typically the result of operations on integer + // native histograms in the database. + point.SetCount(uint64(fh.Count)) + point.SetSum(fh.Sum) + point.SetZeroThreshold(fh.ZeroThreshold) + point.SetZeroCount(uint64(fh.ZeroCount)) + + if len(fh.PositiveSpans) > 0 { + point.Positive().SetOffset(fh.PositiveSpans[0].Offset - 1) // -1 because OTEL offset are for the lower bound, not the upper bound + convertAbsoluteBuckets(fh.PositiveSpans, fh.PositiveBuckets, point.Positive().BucketCounts()) + } + if len(fh.NegativeSpans) > 0 { + point.Negative().SetOffset(fh.NegativeSpans[0].Offset - 1) // -1 because OTEL offset are for the lower bound, not the upper bound + convertAbsoluteBuckets(fh.NegativeSpans, fh.NegativeBuckets, point.Negative().BucketCounts()) + } + } + + case mg.hValue != nil: + h := mg.hValue + + if value.IsStaleNaN(h.Sum) { + point.SetFlags(pmetric.DefaultDataPointFlags.WithNoRecordedValue(true)) + // The count and sum are initialized to 0, so we don't need to set them. + } else { + point.SetScale(h.Schema) + point.SetCount(h.Count) + point.SetSum(h.Sum) + point.SetZeroThreshold(h.ZeroThreshold) + point.SetZeroCount(h.ZeroCount) + + if len(h.PositiveSpans) > 0 { + point.Positive().SetOffset(h.PositiveSpans[0].Offset - 1) // -1 because OTEL offset are for the lower bound, not the upper bound + convertDeltaBuckets(h.PositiveSpans, h.PositiveBuckets, point.Positive().BucketCounts()) + } + if len(h.NegativeSpans) > 0 { + point.Negative().SetOffset(h.NegativeSpans[0].Offset - 1) // -1 because OTEL offset are for the lower bound, not the upper bound + convertDeltaBuckets(h.NegativeSpans, h.NegativeBuckets, point.Negative().BucketCounts()) + } + } + + default: + // This should never happen. + return + } + + tsNanos := timestampFromMs(mg.ts) + if mg.created != 0 { + point.SetStartTimestamp(timestampFromFloat64(mg.created)) + } else { + // metrics_adjuster adjusts the startTimestamp to the initial scrape timestamp + point.SetStartTimestamp(tsNanos) + } + point.SetTimestamp(tsNanos) + populateAttributes(pmetric.MetricTypeHistogram, mg.ls, point.Attributes()) + mg.setExemplars(point.Exemplars()) +} + +func convertDeltaBuckets(spans []histogram.Span, deltas []int64, buckets pcommon.UInt64Slice) { + buckets.EnsureCapacity(len(deltas)) + bucketIdx := 0 + bucketCount := int64(0) + for spanIdx, span := range spans { + if spanIdx > 0 { + for i := int32(0); i < span.Offset; i++ { + buckets.Append(uint64(0)) + } + } + for i := uint32(0); i < span.Length; i++ { + bucketCount += deltas[bucketIdx] + bucketIdx++ + buckets.Append(uint64(bucketCount)) + } + } +} + +func convertAbsoluteBuckets(spans []histogram.Span, counts []float64, buckets pcommon.UInt64Slice) { + buckets.EnsureCapacity(len(counts)) + bucketIdx := 0 + for spanIdx, span := range spans { + if spanIdx > 0 { + for i := int32(0); i < span.Offset; i++ { + buckets.Append(uint64(0)) + } + } + for i := uint32(0); i < span.Length; i++ { + buckets.Append(uint64(counts[bucketIdx])) + bucketIdx++ + } + } +} + +func (mg *metricGroup) setExemplars(exemplars pmetric.ExemplarSlice) { + if mg == nil { + return + } + if mg.exemplars.Len() > 0 { + mg.exemplars.MoveAndAppendTo(exemplars) + } +} + +func (mg *metricGroup) toSummaryPoint(dest pmetric.SummaryDataPointSlice) { + // expecting count to be provided, however, in the following two cases, they can be missed. + // 1. data is corrupted + // 2. ignored by startValue evaluation + if !mg.hasCount { + return + } + + mg.sortPoints() + + point := dest.AppendEmpty() + pointIsStale := value.IsStaleNaN(mg.sum) || value.IsStaleNaN(mg.count) + if pointIsStale { + point.SetFlags(pmetric.DefaultDataPointFlags.WithNoRecordedValue(true)) + } else { + if mg.hasSum { + point.SetSum(mg.sum) + } + point.SetCount(uint64(mg.count)) + } + + quantileValues := point.QuantileValues() + for _, p := range mg.complexValue { + quantile := quantileValues.AppendEmpty() + // Quantiles still need to be sent to know to set them as stale, + // but a staleness NaN converted to uint64 would be an extremely large number. + // By not setting the quantile value, it will default to 0. + if !pointIsStale { + quantile.SetValue(p.value) + } + quantile.SetQuantile(p.boundary) + } + + // Based on the summary description from https://prometheus.io/docs/concepts/metric_types/#summary + // the quantiles are calculated over a sliding time window, however, the count is the total count of + // observations and the corresponding sum is a sum of all observed values, thus the sum and count used + // at the global level of the metricspb.SummaryValue + // The timestamp MUST be in retrieved from milliseconds and converted to nanoseconds. + tsNanos := timestampFromMs(mg.ts) + point.SetTimestamp(tsNanos) + if mg.created != 0 { + point.SetStartTimestamp(timestampFromFloat64(mg.created)) + } else { + // metrics_adjuster adjusts the startTimestamp to the initial scrape timestamp + point.SetStartTimestamp(tsNanos) + } + populateAttributes(pmetric.MetricTypeSummary, mg.ls, point.Attributes()) +} + +func (mg *metricGroup) toNumberDataPoint(dest pmetric.NumberDataPointSlice) { + tsNanos := timestampFromMs(mg.ts) + point := dest.AppendEmpty() + // gauge/undefined types have no start time. + if mg.mtype == pmetric.MetricTypeSum { + if mg.created != 0 { + point.SetStartTimestamp(timestampFromFloat64(mg.created)) + } else { + // metrics_adjuster adjusts the startTimestamp to the initial scrape timestamp + point.SetStartTimestamp(tsNanos) + } + } + point.SetTimestamp(tsNanos) + if value.IsStaleNaN(mg.value) { + point.SetFlags(pmetric.DefaultDataPointFlags.WithNoRecordedValue(true)) + } else { + point.SetDoubleValue(mg.value) + } + populateAttributes(pmetric.MetricTypeGauge, mg.ls, point.Attributes()) + mg.setExemplars(point.Exemplars()) +} + +func populateAttributes(mType pmetric.MetricType, ls labels.Labels, dest pcommon.Map) { + dest.EnsureCapacity(ls.Len()) + names := getSortedNotUsefulLabels(mType) + j := 0 + ls.Range(func(l labels.Label) { + for j < len(names) && names[j] < l.Name { + j++ + } + if j < len(names) && l.Name == names[j] { + return + } + if l.Value == "" { + // empty label values should be omitted + return + } + dest.PutStr(l.Name, l.Value) + }) +} + +func (mf *metricFamily) loadMetricGroupOrCreate(groupKey uint64, ls labels.Labels, ts int64) *metricGroup { + mg, ok := mf.groups[groupKey] + if !ok { + mg = &metricGroup{ + mtype: mf.mtype, + ts: ts, + ls: ls, + exemplars: pmetric.NewExemplarSlice(), + } + mf.groups[groupKey] = mg + // maintaining data insertion order is helpful to generate stable/reproducible metric output + mf.groupOrders = append(mf.groupOrders, mg) + } + return mg +} + +func (mf *metricFamily) addSeries(seriesRef uint64, metricName string, ls labels.Labels, t int64, v float64) error { + mg := mf.loadMetricGroupOrCreate(seriesRef, ls, t) + if mg.ts != t { + return fmt.Errorf("inconsistent timestamps on metric points for metric %v", metricName) + } + switch mf.mtype { + case pmetric.MetricTypeHistogram, pmetric.MetricTypeSummary: + switch { + case strings.HasSuffix(metricName, metricsSuffixSum): + mg.sum = v + mg.hasSum = true + case strings.HasSuffix(metricName, metricsSuffixCount): + // always use the timestamp from count, because is the only required field for histograms and summaries. + mg.ts = t + mg.count = v + mg.hasCount = true + case metricName == mf.metadata.Metric+metricSuffixCreated: + mg.created = v + default: + boundary, err := getBoundary(mf.mtype, ls) + if err != nil { + return err + } + mg.complexValue = append(mg.complexValue, &dataPoint{value: v, boundary: boundary}) + } + case pmetric.MetricTypeExponentialHistogram: + if metricName == mf.metadata.Metric+metricSuffixCreated { + mg.created = v + } + case pmetric.MetricTypeSum: + if metricName == mf.metadata.Metric+metricSuffixCreated { + mg.created = v + } else { + mg.value = v + } + case pmetric.MetricTypeEmpty, pmetric.MetricTypeGauge: + fallthrough + default: + mg.value = v + } + + return nil +} + +func (mf *metricFamily) addExponentialHistogramSeries(seriesRef uint64, metricName string, ls labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) error { + mg := mf.loadMetricGroupOrCreate(seriesRef, ls, t) + if mg.ts != t { + return fmt.Errorf("inconsistent timestamps on metric points for metric %v", metricName) + } + if mg.mtype != pmetric.MetricTypeExponentialHistogram { + return fmt.Errorf("metric type mismatch for exponential histogram metric %v type %s", metricName, mg.mtype.String()) + } + switch { + case fh != nil: + if mg.hValue != nil { + return fmt.Errorf("exponential histogram %v already has float counts", metricName) + } + mg.count = fh.Count + mg.sum = fh.Sum + mg.hasCount = true + mg.hasSum = true + mg.fhValue = fh + case h != nil: + if mg.fhValue != nil { + return fmt.Errorf("exponential histogram %v already has integer counts", metricName) + } + mg.count = float64(h.Count) + mg.sum = h.Sum + mg.hasCount = true + mg.hasSum = true + mg.hValue = h + } + return nil +} + +func (mf *metricFamily) appendMetric(metrics pmetric.MetricSlice, trimSuffixes bool) { + metric := pmetric.NewMetric() + // Trims type and unit suffixes from metric name + name := mf.name + if trimSuffixes { + name = prometheus.TrimPromSuffixes(name, mf.mtype, mf.metadata.Unit) + } + metric.SetName(name) + metric.SetDescription(mf.metadata.Help) + metric.SetUnit(prometheus.UnitWordToUCUM(mf.metadata.Unit)) + metric.Metadata().PutStr(prometheus.MetricMetadataTypeKey, string(mf.metadata.Type)) + + var pointCount int + + switch mf.mtype { + case pmetric.MetricTypeHistogram: + histogram := metric.SetEmptyHistogram() + histogram.SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + hdpL := histogram.DataPoints() + for _, mg := range mf.groupOrders { + mg.toDistributionPoint(hdpL) + } + pointCount = hdpL.Len() + + case pmetric.MetricTypeSummary: + summary := metric.SetEmptySummary() + sdpL := summary.DataPoints() + for _, mg := range mf.groupOrders { + mg.toSummaryPoint(sdpL) + } + pointCount = sdpL.Len() + + case pmetric.MetricTypeSum: + sum := metric.SetEmptySum() + sum.SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + sum.SetIsMonotonic(mf.isMonotonic) + sdpL := sum.DataPoints() + for _, mg := range mf.groupOrders { + mg.toNumberDataPoint(sdpL) + } + pointCount = sdpL.Len() + + case pmetric.MetricTypeExponentialHistogram: + histogram := metric.SetEmptyExponentialHistogram() + histogram.SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + hdpL := histogram.DataPoints() + for _, mg := range mf.groupOrders { + mg.toExponentialHistogramDataPoints(hdpL) + } + pointCount = hdpL.Len() + + case pmetric.MetricTypeEmpty, pmetric.MetricTypeGauge: + fallthrough + default: // Everything else should be set to a Gauge. + gauge := metric.SetEmptyGauge() + gdpL := gauge.DataPoints() + for _, mg := range mf.groupOrders { + mg.toNumberDataPoint(gdpL) + } + pointCount = gdpL.Len() + } + + if pointCount == 0 { + return + } + + metric.MoveTo(metrics.AppendEmpty()) +} + +func (mf *metricFamily) addExemplar(seriesRef uint64, e exemplar.Exemplar) { + mg := mf.groups[seriesRef] + if mg == nil { + return + } + es := mg.exemplars + convertExemplar(e, es.AppendEmpty()) +} + +func convertExemplar(pe exemplar.Exemplar, e pmetric.Exemplar) { + e.SetTimestamp(timestampFromMs(pe.Ts)) + e.SetDoubleValue(pe.Value) + e.FilteredAttributes().EnsureCapacity(pe.Labels.Len()) + pe.Labels.Range(func(lb labels.Label) { + switch strings.ToLower(lb.Name) { + case prometheus.ExemplarTraceIDKey: + var tid [16]byte + err := decodeAndCopyToLowerBytes(tid[:], []byte(lb.Value)) + if err == nil { + e.SetTraceID(tid) + } else { + e.FilteredAttributes().PutStr(lb.Name, lb.Value) + } + case prometheus.ExemplarSpanIDKey: + var sid [8]byte + err := decodeAndCopyToLowerBytes(sid[:], []byte(lb.Value)) + if err == nil { + e.SetSpanID(sid) + } else { + e.FilteredAttributes().PutStr(lb.Name, lb.Value) + } + default: + e.FilteredAttributes().PutStr(lb.Name, lb.Value) + } + }) +} + +/* + decodeAndCopyToLowerBytes copies src to dst on lower bytes instead of higher + +1. If len(src) > len(dst) -> copy first len(dst) bytes as it is. Example -> src = []byte{0xab,0xcd,0xef,0xgh,0xij}, dst = [2]byte, result dst = [2]byte{0xab, 0xcd} +2. If len(src) = len(dst) -> copy src to dst as it is +3. If len(src) < len(dst) -> prepend required 0s and then add src to dst. Example -> src = []byte{0xab, 0xcd}, dst = [8]byte, result dst = [8]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xab, 0xcd} +*/ +func decodeAndCopyToLowerBytes(dst []byte, src []byte) error { + var err error + decodedLen := hex.DecodedLen(len(src)) + if decodedLen >= len(dst) { + _, err = hex.Decode(dst, src[:hex.EncodedLen(len(dst))]) + } else { + _, err = hex.Decode(dst[len(dst)-decodedLen:], src) + } + return err +} diff --git a/pkg/promotel/prometheusreceiver/internal/metricfamily_test.go b/pkg/promotel/prometheusreceiver/internal/metricfamily_test.go new file mode 100644 index 000000000..a15599173 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/internal/metricfamily_test.go @@ -0,0 +1,904 @@ +package internal + +import ( + "math" + "testing" + "time" + + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/value" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" + "go.uber.org/zap" + + "github.com/smartcontractkit/chainlink-common/pkg/promotel/prometheusreceiver/scrape" +) + +type testMetadataStore map[string]scrape.MetricMetadata + +func (tmc testMetadataStore) GetMetadata(familyName string) (scrape.MetricMetadata, bool) { + lookup, ok := tmc[familyName] + return lookup, ok +} + +func (tmc testMetadataStore) ListMetadata() []scrape.MetricMetadata { return nil } + +func (tmc testMetadataStore) SizeMetadata() int { return 0 } + +func (tmc testMetadataStore) LengthMetadata() int { + return len(tmc) +} + +var mc = testMetadataStore{ + "counter": scrape.MetricMetadata{ + Metric: "cr", + Type: model.MetricTypeCounter, + Help: "This is some help for a counter", + Unit: "By", + }, + "counter_created": scrape.MetricMetadata{ + Metric: "counter", + Type: model.MetricTypeCounter, + Help: "This is some help for a counter", + Unit: "By", + }, + "gauge": scrape.MetricMetadata{ + Metric: "ge", + Type: model.MetricTypeGauge, + Help: "This is some help for a gauge", + Unit: "1", + }, + "gaugehistogram": scrape.MetricMetadata{ + Metric: "gh", + Type: model.MetricTypeGaugeHistogram, + Help: "This is some help for a gauge histogram", + Unit: "?", + }, + "histogram": scrape.MetricMetadata{ + Metric: "hg", + Type: model.MetricTypeHistogram, + Help: "This is some help for a histogram", + Unit: "ms", + }, + "histogram_with_created": scrape.MetricMetadata{ + Metric: "histogram_with_created", + Type: model.MetricTypeHistogram, + Help: "This is some help for a histogram", + Unit: "ms", + }, + "histogram_stale": scrape.MetricMetadata{ + Metric: "hg_stale", + Type: model.MetricTypeHistogram, + Help: "This is some help for a histogram", + Unit: "ms", + }, + "summary": scrape.MetricMetadata{ + Metric: "s", + Type: model.MetricTypeSummary, + Help: "This is some help for a summary", + Unit: "ms", + }, + "summary_with_created": scrape.MetricMetadata{ + Metric: "summary_with_created", + Type: model.MetricTypeSummary, + Help: "This is some help for a summary", + Unit: "ms", + }, + "summary_stale": scrape.MetricMetadata{ + Metric: "s_stale", + Type: model.MetricTypeSummary, + Help: "This is some help for a summary", + Unit: "ms", + }, + "unknown": scrape.MetricMetadata{ + Metric: "u", + Type: model.MetricTypeUnknown, + Help: "This is some help for an unknown metric", + Unit: "?", + }, +} + +func TestMetricGroupData_toDistributionUnitTest(t *testing.T) { + type scrape struct { + at int64 + value float64 + metric string + extraLabel labels.Label + } + tests := []struct { + name string + metricName string + labels labels.Labels + scrapes []*scrape + want func() pmetric.HistogramDataPoint + wantErr bool + intervalStartTimeMs int64 + }{ + { + name: "histogram with startTimestamp", + metricName: "histogram", + intervalStartTimeMs: 11, + labels: labels.FromMap(map[string]string{"a": "A", "b": "B"}), + scrapes: []*scrape{ + {at: 11, value: 66, metric: "histogram_count"}, + {at: 11, value: 1004.78, metric: "histogram_sum"}, + {at: 11, value: 33, metric: "histogram_bucket", extraLabel: labels.Label{Name: "le", Value: "0.75"}}, + {at: 11, value: 55, metric: "histogram_bucket", extraLabel: labels.Label{Name: "le", Value: "2.75"}}, + {at: 11, value: 66, metric: "histogram_bucket", extraLabel: labels.Label{Name: "le", Value: "+Inf"}}, + }, + want: func() pmetric.HistogramDataPoint { + point := pmetric.NewHistogramDataPoint() + point.SetCount(66) + point.SetSum(1004.78) + point.SetTimestamp(pcommon.Timestamp(11 * time.Millisecond)) // the time in milliseconds -> nanoseconds. + point.ExplicitBounds().FromRaw([]float64{0.75, 2.75}) + point.BucketCounts().FromRaw([]uint64{33, 22, 11}) + point.SetStartTimestamp(pcommon.Timestamp(11 * time.Millisecond)) // the time in milliseconds -> nanoseconds. + attributes := point.Attributes() + attributes.PutStr("a", "A") + attributes.PutStr("b", "B") + return point + }, + }, + { + name: "histogram with startTimestamp from _created", + metricName: "histogram_with_created", + intervalStartTimeMs: 11, + labels: labels.FromMap(map[string]string{"a": "A"}), + scrapes: []*scrape{ + {at: 11, value: 66, metric: "histogram_with_created_count"}, + {at: 11, value: 1004.78, metric: "histogram_with_created_sum"}, + {at: 11, value: 600.78, metric: "histogram_with_created_created"}, + { + at: 11, + value: 33, + metric: "histogram_with_created_bucket", + extraLabel: labels.Label{Name: "le", Value: "0.75"}, + }, + { + at: 11, + value: 55, + metric: "histogram_with_created_bucket", + extraLabel: labels.Label{Name: "le", Value: "2.75"}, + }, + { + at: 11, + value: 66, + metric: "histogram_with_created_bucket", + extraLabel: labels.Label{Name: "le", Value: "+Inf"}, + }, + }, + want: func() pmetric.HistogramDataPoint { + point := pmetric.NewHistogramDataPoint() + point.SetCount(66) + point.SetSum(1004.78) + + // the time in milliseconds -> nanoseconds. + point.SetTimestamp(pcommon.Timestamp(11 * time.Millisecond)) + point.SetStartTimestamp(timestampFromFloat64(600.78)) + + point.ExplicitBounds().FromRaw([]float64{0.75, 2.75}) + point.BucketCounts().FromRaw([]uint64{33, 22, 11}) + attributes := point.Attributes() + attributes.PutStr("a", "A") + return point + }, + }, + { + name: "histogram that is stale", + metricName: "histogram_stale", + intervalStartTimeMs: 11, + labels: labels.FromMap(map[string]string{"a": "A", "b": "B"}), + scrapes: []*scrape{ + {at: 11, value: math.Float64frombits(value.StaleNaN), metric: "histogram_stale_count"}, + {at: 11, value: math.Float64frombits(value.StaleNaN), metric: "histogram_stale_sum"}, + {at: 11, value: math.Float64frombits(value.StaleNaN), metric: "histogram_bucket", extraLabel: labels.Label{Name: "le", Value: "0.75"}}, + {at: 11, value: math.Float64frombits(value.StaleNaN), metric: "histogram_bucket", extraLabel: labels.Label{Name: "le", Value: "2.75"}}, + {at: 11, value: math.Float64frombits(value.StaleNaN), metric: "histogram_bucket", extraLabel: labels.Label{Name: "le", Value: "+Inf"}}, + }, + want: func() pmetric.HistogramDataPoint { + point := pmetric.NewHistogramDataPoint() + point.SetTimestamp(pcommon.Timestamp(11 * time.Millisecond)) // the time in milliseconds -> nanoseconds. + point.SetFlags(pmetric.DefaultDataPointFlags.WithNoRecordedValue(true)) + point.ExplicitBounds().FromRaw([]float64{0.75, 2.75}) + point.BucketCounts().FromRaw([]uint64{0, 0, 0}) + point.SetStartTimestamp(pcommon.Timestamp(11 * time.Millisecond)) // the time in milliseconds -> nanoseconds. + attributes := point.Attributes() + attributes.PutStr("a", "A") + attributes.PutStr("b", "B") + return point + }, + }, + { + name: "histogram with inconsistent timestamps", + metricName: "histogram_inconsistent_ts", + intervalStartTimeMs: 11, + labels: labels.FromMap(map[string]string{"a": "A", "le": "0.75", "b": "B"}), + scrapes: []*scrape{ + {at: 11, value: math.Float64frombits(value.StaleNaN), metric: "histogram_stale_count"}, + {at: 12, value: math.Float64frombits(value.StaleNaN), metric: "histogram_stale_sum"}, + {at: 13, value: math.Float64frombits(value.StaleNaN), metric: "value"}, + }, + wantErr: true, + }, + { + name: "histogram without buckets", + metricName: "histogram", + intervalStartTimeMs: 11, + labels: labels.FromMap(map[string]string{"a": "A", "b": "B"}), + scrapes: []*scrape{ + {at: 11, value: 66, metric: "histogram_count"}, + {at: 11, value: 1004.78, metric: "histogram_sum"}, + }, + want: func() pmetric.HistogramDataPoint { + point := pmetric.NewHistogramDataPoint() + point.SetCount(66) + point.SetSum(1004.78) + point.SetTimestamp(pcommon.Timestamp(11 * time.Millisecond)) // the time in milliseconds -> nanoseconds. + point.SetStartTimestamp(pcommon.Timestamp(11 * time.Millisecond)) // the time in milliseconds -> nanoseconds. + point.BucketCounts().FromRaw([]uint64{66}) + attributes := point.Attributes() + attributes.PutStr("a", "A") + attributes.PutStr("b", "B") + return point + }, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + mp := newMetricFamily(tt.metricName, mc, zap.NewNop()) + for i, tv := range tt.scrapes { + var lbls labels.Labels + if tv.extraLabel.Name != "" { + lbls = labels.NewBuilder(tt.labels).Set(tv.extraLabel.Name, tv.extraLabel.Value).Labels() + } else { + lbls = tt.labels.Copy() + } + sRef, _ := getSeriesRef(nil, lbls, mp.mtype) + err := mp.addSeries(sRef, tv.metric, lbls, tv.at, tv.value) + if tt.wantErr { + if i != 0 { + require.Error(t, err) + } + } else { + require.NoError(t, err) + } + } + if tt.wantErr { + // Don't check the result if we got an error + return + } + + require.Len(t, mp.groups, 1) + + sl := pmetric.NewMetricSlice() + mp.appendMetric(sl, false) + + require.Equal(t, 1, sl.Len(), "Exactly one metric expected") + metric := sl.At(0) + require.Equal(t, mc[tt.metricName].Help, metric.Description(), "Expected help metadata in metric description") + require.Equal(t, mc[tt.metricName].Unit, metric.Unit(), "Expected unit metadata in metric") + + hdpL := metric.Histogram().DataPoints() + require.Equal(t, 1, hdpL.Len(), "Exactly one point expected") + got := hdpL.At(0) + want := tt.want() + require.Equal(t, want, got, "Expected the points to be equal") + }) + } +} + +func TestMetricGroupData_toExponentialDistributionUnitTest(t *testing.T) { + type scrape struct { + at int64 + metric string + extraLabel labels.Label + + // Only one kind of value should be set. + value float64 + integerHistogram *histogram.Histogram + floatHistogram *histogram.FloatHistogram // TODO: add tests for float histograms. + } + tests := []struct { + name string + metricName string + labels labels.Labels + scrapes []*scrape + want func() pmetric.ExponentialHistogramDataPoint + wantErr bool + intervalStartTimeMs int64 + }{ + { + name: "integer histogram with startTimestamp", + metricName: "request_duration_seconds", + intervalStartTimeMs: 11, + labels: labels.FromMap(map[string]string{"a": "A", "b": "B"}), + scrapes: []*scrape{ + { + at: 11, + metric: "request_duration_seconds", + integerHistogram: &histogram.Histogram{ + CounterResetHint: histogram.UnknownCounterReset, + Schema: 1, + ZeroThreshold: 0.42, + ZeroCount: 1, + Count: 66, + Sum: 1004.78, + PositiveSpans: []histogram.Span{{Offset: 1, Length: 2}, {Offset: 3, Length: 1}}, + PositiveBuckets: []int64{33, -30, 26}, // Delta encoded counts: 33, 3=(33-30), 30=(3+27) -> 65 + NegativeSpans: []histogram.Span{{Offset: 0, Length: 1}}, + NegativeBuckets: []int64{1}, // Delta encoded counts: 1 + }, + }, + }, + want: func() pmetric.ExponentialHistogramDataPoint { + point := pmetric.NewExponentialHistogramDataPoint() + point.SetCount(66) + point.SetSum(1004.78) + point.SetTimestamp(pcommon.Timestamp(11 * time.Millisecond)) // the time in milliseconds -> nanoseconds. + point.SetStartTimestamp(pcommon.Timestamp(11 * time.Millisecond)) // the time in milliseconds -> nanoseconds. + point.SetScale(1) + point.SetZeroThreshold(0.42) + point.SetZeroCount(1) + point.Positive().SetOffset(0) + point.Positive().BucketCounts().FromRaw([]uint64{33, 3, 0, 0, 0, 29}) + point.Negative().SetOffset(-1) + point.Negative().BucketCounts().FromRaw([]uint64{1}) + attributes := point.Attributes() + attributes.PutStr("a", "A") + attributes.PutStr("b", "B") + return point + }, + }, + { + name: "integer histogram with startTimestamp from _created", + metricName: "request_duration_seconds", + intervalStartTimeMs: 11, + labels: labels.FromMap(map[string]string{"a": "A"}), + scrapes: []*scrape{ + { + at: 11, + metric: "request_duration_seconds", + integerHistogram: &histogram.Histogram{ + CounterResetHint: histogram.UnknownCounterReset, + Schema: 1, + ZeroThreshold: 0.42, + ZeroCount: 1, + Count: 66, + Sum: 1004.78, + PositiveSpans: []histogram.Span{{Offset: 1, Length: 2}, {Offset: 3, Length: 1}}, + PositiveBuckets: []int64{33, -30, 26}, // Delta encoded counts: 33, 3=(33-30), 30=(3+27) -> 65 + NegativeSpans: []histogram.Span{{Offset: 0, Length: 1}}, + NegativeBuckets: []int64{1}, // Delta encoded counts: 1 + }, + }, + { + at: 11, + metric: "request_duration_seconds_created", + value: 600.78, + }, + }, + want: func() pmetric.ExponentialHistogramDataPoint { + point := pmetric.NewExponentialHistogramDataPoint() + point.SetCount(66) + point.SetSum(1004.78) + point.SetTimestamp(pcommon.Timestamp(11 * time.Millisecond)) // the time in milliseconds -> nanoseconds. + point.SetStartTimestamp(timestampFromFloat64(600.78)) // the time in milliseconds -> nanoseconds. + point.SetScale(1) + point.SetZeroThreshold(0.42) + point.SetZeroCount(1) + point.Positive().SetOffset(0) + point.Positive().BucketCounts().FromRaw([]uint64{33, 3, 0, 0, 0, 29}) + point.Negative().SetOffset(-1) + point.Negative().BucketCounts().FromRaw([]uint64{1}) + attributes := point.Attributes() + attributes.PutStr("a", "A") + return point + }, + }, + { + name: "integer histogram that is stale", + metricName: "request_duration_seconds", + intervalStartTimeMs: 11, + labels: labels.FromMap(map[string]string{"a": "A", "b": "B"}), + scrapes: []*scrape{ + { + at: 11, + metric: "request_duration_seconds", + integerHistogram: &histogram.Histogram{ + Sum: math.Float64frombits(value.StaleNaN), + }, + }, + }, + want: func() pmetric.ExponentialHistogramDataPoint { + point := pmetric.NewExponentialHistogramDataPoint() + point.SetTimestamp(pcommon.Timestamp(11 * time.Millisecond)) // the time in milliseconds -> nanoseconds. + point.SetFlags(pmetric.DefaultDataPointFlags.WithNoRecordedValue(true)) + point.SetStartTimestamp(pcommon.Timestamp(11 * time.Millisecond)) // the time in milliseconds -> nanoseconds. + attributes := point.Attributes() + attributes.PutStr("a", "A") + attributes.PutStr("b", "B") + return point + }, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + mp := newMetricFamily(tt.metricName, mc, zap.NewNop()) + for i, tv := range tt.scrapes { + var lbls labels.Labels + if tv.extraLabel.Name != "" { + lbls = labels.NewBuilder(tt.labels).Set(tv.extraLabel.Name, tv.extraLabel.Value).Labels() + } else { + lbls = tt.labels.Copy() + } + + var err error + switch { + case tv.integerHistogram != nil: + mp.mtype = pmetric.MetricTypeExponentialHistogram + sRef, _ := getSeriesRef(nil, lbls, mp.mtype) + err = mp.addExponentialHistogramSeries(sRef, tv.metric, lbls, tv.at, tv.integerHistogram, nil) + case tv.floatHistogram != nil: + mp.mtype = pmetric.MetricTypeExponentialHistogram + sRef, _ := getSeriesRef(nil, lbls, mp.mtype) + err = mp.addExponentialHistogramSeries(sRef, tv.metric, lbls, tv.at, nil, tv.floatHistogram) + default: + sRef, _ := getSeriesRef(nil, lbls, mp.mtype) + err = mp.addSeries(sRef, tv.metric, lbls, tv.at, tv.value) + } + if tt.wantErr { + if i != 0 { + require.Error(t, err) + } + } else { + require.NoError(t, err) + } + } + if tt.wantErr { + // Don't check the result if we got an error + return + } + + require.Len(t, mp.groups, 1) + + sl := pmetric.NewMetricSlice() + mp.appendMetric(sl, false) + + require.Equal(t, 1, sl.Len(), "Exactly one metric expected") + metric := sl.At(0) + require.Equal(t, mc[tt.metricName].Help, metric.Description(), "Expected help metadata in metric description") + require.Equal(t, mc[tt.metricName].Unit, metric.Unit(), "Expected unit metadata in metric") + + hdpL := metric.ExponentialHistogram().DataPoints() + require.Equal(t, 1, hdpL.Len(), "Exactly one point expected") + got := hdpL.At(0) + want := tt.want() + require.Equal(t, want, got, "Expected the points to be equal") + }) + } +} + +func TestMetricGroupData_toSummaryUnitTest(t *testing.T) { + type scrape struct { + at int64 + value float64 + metric string + } + + type labelsScrapes struct { + labels labels.Labels + scrapes []*scrape + } + tests := []struct { + name string + labelsScrapes []*labelsScrapes + want func() pmetric.SummaryDataPoint + wantErr bool + }{ + { + name: "summary", + labelsScrapes: []*labelsScrapes{ + { + labels: labels.FromMap(map[string]string{"a": "A", "b": "B"}), + scrapes: []*scrape{ + {at: 14, value: 10, metric: "summary_count"}, + {at: 14, value: 15, metric: "summary_sum"}, + }, + }, + { + labels: labels.FromMap(map[string]string{"a": "A", "quantile": "0.0", "b": "B"}), + scrapes: []*scrape{ + {at: 14, value: 8, metric: "value"}, + }, + }, + { + labels: labels.FromMap(map[string]string{"a": "A", "quantile": "0.75", "b": "B"}), + scrapes: []*scrape{ + {at: 14, value: 33.7, metric: "value"}, + }, + }, + { + labels: labels.FromMap(map[string]string{"a": "A", "quantile": "0.50", "b": "B"}), + scrapes: []*scrape{ + {at: 14, value: 27, metric: "value"}, + }, + }, + { + labels: labels.FromMap(map[string]string{"a": "A", "quantile": "0.90", "b": "B"}), + scrapes: []*scrape{ + {at: 14, value: 56, metric: "value"}, + }, + }, + { + labels: labels.FromMap(map[string]string{"a": "A", "quantile": "0.99", "b": "B"}), + scrapes: []*scrape{ + {at: 14, value: 82, metric: "value"}, + }, + }, + }, + want: func() pmetric.SummaryDataPoint { + point := pmetric.NewSummaryDataPoint() + point.SetCount(10) + point.SetSum(15) + qtL := point.QuantileValues() + qn0 := qtL.AppendEmpty() + qn0.SetQuantile(0) + qn0.SetValue(8) + qn50 := qtL.AppendEmpty() + qn50.SetQuantile(.5) + qn50.SetValue(27) + qn75 := qtL.AppendEmpty() + qn75.SetQuantile(.75) + qn75.SetValue(33.7) + qn90 := qtL.AppendEmpty() + qn90.SetQuantile(.9) + qn90.SetValue(56) + qn99 := qtL.AppendEmpty() + qn99.SetQuantile(.99) + qn99.SetValue(82) + point.SetTimestamp(pcommon.Timestamp(14 * time.Millisecond)) // the time in milliseconds -> nanoseconds. + point.SetStartTimestamp(pcommon.Timestamp(14 * time.Millisecond)) // the time in milliseconds -> nanoseconds + attributes := point.Attributes() + attributes.PutStr("a", "A") + attributes.PutStr("b", "B") + return point + }, + }, + { + name: "summary_with_created", + labelsScrapes: []*labelsScrapes{ + { + labels: labels.FromMap(map[string]string{"a": "A", "b": "B"}), + scrapes: []*scrape{ + {at: 14, value: 10, metric: "summary_with_created_count"}, + {at: 14, value: 15, metric: "summary_with_created_sum"}, + {at: 14, value: 150, metric: "summary_with_created_created"}, + }, + }, + { + labels: labels.FromMap(map[string]string{"a": "A", "quantile": "0.0", "b": "B"}), + scrapes: []*scrape{ + {at: 14, value: 8, metric: "value"}, + }, + }, + { + labels: labels.FromMap(map[string]string{"a": "A", "quantile": "0.75", "b": "B"}), + scrapes: []*scrape{ + {at: 14, value: 33.7, metric: "value"}, + }, + }, + { + labels: labels.FromMap(map[string]string{"a": "A", "quantile": "0.50", "b": "B"}), + scrapes: []*scrape{ + {at: 14, value: 27, metric: "value"}, + }, + }, + { + labels: labels.FromMap(map[string]string{"a": "A", "quantile": "0.90", "b": "B"}), + scrapes: []*scrape{ + {at: 14, value: 56, metric: "value"}, + }, + }, + { + labels: labels.FromMap(map[string]string{"a": "A", "quantile": "0.99", "b": "B"}), + scrapes: []*scrape{ + {at: 14, value: 82, metric: "value"}, + }, + }, + }, + want: func() pmetric.SummaryDataPoint { + point := pmetric.NewSummaryDataPoint() + point.SetCount(10) + point.SetSum(15) + qtL := point.QuantileValues() + qn0 := qtL.AppendEmpty() + qn0.SetQuantile(0) + qn0.SetValue(8) + qn50 := qtL.AppendEmpty() + qn50.SetQuantile(.5) + qn50.SetValue(27) + qn75 := qtL.AppendEmpty() + qn75.SetQuantile(.75) + qn75.SetValue(33.7) + qn90 := qtL.AppendEmpty() + qn90.SetQuantile(.9) + qn90.SetValue(56) + qn99 := qtL.AppendEmpty() + qn99.SetQuantile(.99) + qn99.SetValue(82) + + // the time in milliseconds -> nanoseconds. + point.SetTimestamp(pcommon.Timestamp(14 * time.Millisecond)) + point.SetStartTimestamp(timestampFromFloat64(150)) + + attributes := point.Attributes() + attributes.PutStr("a", "A") + attributes.PutStr("b", "B") + return point + }, + }, + { + name: "summary_stale", + labelsScrapes: []*labelsScrapes{ + { + labels: labels.FromMap(map[string]string{"a": "A", "quantile": "0.0", "b": "B"}), + scrapes: []*scrape{ + {at: 14, value: 10, metric: "summary_stale_count"}, + {at: 14, value: 12, metric: "summary_stale_sum"}, + {at: 14, value: 8, metric: "value"}, + }, + }, + { + labels: labels.FromMap(map[string]string{"a": "A", "quantile": "0.75", "b": "B"}), + scrapes: []*scrape{ + {at: 14, value: 10, metric: "summary_stale_count"}, + {at: 14, value: 1004.78, metric: "summary_stale_sum"}, + {at: 14, value: 33.7, metric: "value"}, + }, + }, + { + labels: labels.FromMap(map[string]string{"a": "A", "quantile": "0.50", "b": "B"}), + scrapes: []*scrape{ + {at: 14, value: 10, metric: "summary_stale_count"}, + {at: 14, value: 13, metric: "summary_stale_sum"}, + {at: 14, value: 27, metric: "value"}, + }, + }, + { + labels: labels.FromMap(map[string]string{"a": "A", "quantile": "0.90", "b": "B"}), + scrapes: []*scrape{ + {at: 14, value: 10, metric: "summary_stale_count"}, + {at: 14, value: 14, metric: "summary_stale_sum"}, + {at: 14, value: 56, metric: "value"}, + }, + }, + { + labels: labels.FromMap(map[string]string{"a": "A", "quantile": "0.99", "b": "B"}), + scrapes: []*scrape{ + {at: 14, value: math.Float64frombits(value.StaleNaN), metric: "summary_stale_count"}, + {at: 14, value: math.Float64frombits(value.StaleNaN), metric: "summary_stale_sum"}, + {at: 14, value: math.Float64frombits(value.StaleNaN), metric: "value"}, + }, + }, + }, + want: func() pmetric.SummaryDataPoint { + point := pmetric.NewSummaryDataPoint() + qtL := point.QuantileValues() + qn0 := qtL.AppendEmpty() + point.SetFlags(pmetric.DefaultDataPointFlags.WithNoRecordedValue(true)) + qn0.SetQuantile(0) + qn0.SetValue(0) + qn50 := qtL.AppendEmpty() + qn50.SetQuantile(.5) + qn50.SetValue(0) + qn75 := qtL.AppendEmpty() + qn75.SetQuantile(.75) + qn75.SetValue(0) + qn90 := qtL.AppendEmpty() + qn90.SetQuantile(.9) + qn90.SetValue(0) + qn99 := qtL.AppendEmpty() + qn99.SetQuantile(.99) + qn99.SetValue(0) + point.SetTimestamp(pcommon.Timestamp(14 * time.Millisecond)) // the time in milliseconds -> nanoseconds. + point.SetStartTimestamp(pcommon.Timestamp(14 * time.Millisecond)) // the time in milliseconds -> nanoseconds + attributes := point.Attributes() + attributes.PutStr("a", "A") + attributes.PutStr("b", "B") + return point + }, + }, + { + name: "summary with inconsistent timestamps", + labelsScrapes: []*labelsScrapes{ + { + labels: labels.FromMap(map[string]string{"a": "A", "b": "B"}), + scrapes: []*scrape{ + {at: 11, value: 10, metric: "summary_count"}, + {at: 14, value: 15, metric: "summary_sum"}, + }, + }, + }, + wantErr: true, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + mp := newMetricFamily(tt.name, mc, zap.NewNop()) + for _, lbs := range tt.labelsScrapes { + for i, scrape := range lbs.scrapes { + lb := lbs.labels.Copy() + sRef, _ := getSeriesRef(nil, lb, mp.mtype) + err := mp.addSeries(sRef, scrape.metric, lb, scrape.at, scrape.value) + if tt.wantErr { + // The first scrape won't have an error + if i != 0 { + require.Error(t, err) + } + } else { + require.NoError(t, err) + } + } + } + if tt.wantErr { + // Don't check the result if we got an error + return + } + + require.Len(t, mp.groups, 1) + + sl := pmetric.NewMetricSlice() + mp.appendMetric(sl, false) + + require.Equal(t, 1, sl.Len(), "Exactly one metric expected") + metric := sl.At(0) + require.Equal(t, mc[tt.name].Help, metric.Description(), "Expected help metadata in metric description") + require.Equal(t, mc[tt.name].Unit, metric.Unit(), "Expected unit metadata in metric") + + sdpL := metric.Summary().DataPoints() + require.Equal(t, 1, sdpL.Len(), "Exactly one point expected") + got := sdpL.At(0) + want := tt.want() + require.Equal(t, want, got, "Expected the points to be equal") + }) + } +} + +func TestMetricGroupData_toNumberDataUnitTest(t *testing.T) { + type scrape struct { + at int64 + value float64 + metric string + } + tests := []struct { + name string + metricKind string + labels labels.Labels + scrapes []*scrape + intervalStartTimestampMs int64 + want func() pmetric.NumberDataPoint + }{ + { + metricKind: "counter", + name: "counter:: startTimestampMs from _created", + intervalStartTimestampMs: 11, + labels: labels.FromMap(map[string]string{"a": "A", "b": "B"}), + scrapes: []*scrape{ + {at: 13, value: 33.7, metric: "value"}, + {at: 13, value: 150, metric: "value_created"}, + }, + want: func() pmetric.NumberDataPoint { + point := pmetric.NewNumberDataPoint() + point.SetDoubleValue(150) + + // the time in milliseconds -> nanoseconds. + point.SetTimestamp(pcommon.Timestamp(13 * time.Millisecond)) + point.SetStartTimestamp(pcommon.Timestamp(13 * time.Millisecond)) + + attributes := point.Attributes() + attributes.PutStr("a", "A") + attributes.PutStr("b", "B") + return point + }, + }, + { + metricKind: "counter_created", + name: "counter:: startTimestampMs from _created", + intervalStartTimestampMs: 11, + labels: labels.FromMap(map[string]string{"a": "A", "b": "B"}), + scrapes: []*scrape{ + {at: 13, value: 33.7, metric: "counter"}, + {at: 13, value: 150, metric: "counter_created"}, + }, + want: func() pmetric.NumberDataPoint { + point := pmetric.NewNumberDataPoint() + point.SetDoubleValue(33.7) + + // the time in milliseconds -> nanoseconds. + point.SetTimestamp(pcommon.Timestamp(13 * time.Millisecond)) + point.SetStartTimestamp(timestampFromFloat64(150)) + + attributes := point.Attributes() + attributes.PutStr("a", "A") + attributes.PutStr("b", "B") + return point + }, + }, + { + metricKind: "counter", + name: "counter:: startTimestampMs of 11", + intervalStartTimestampMs: 11, + labels: labels.FromMap(map[string]string{"a": "A", "b": "B"}), + scrapes: []*scrape{ + {at: 13, value: 33.7, metric: "value"}, + }, + want: func() pmetric.NumberDataPoint { + point := pmetric.NewNumberDataPoint() + point.SetDoubleValue(33.7) + point.SetTimestamp(pcommon.Timestamp(13 * time.Millisecond)) // the time in milliseconds -> nanoseconds. + point.SetStartTimestamp(pcommon.Timestamp(13 * time.Millisecond)) // the time in milliseconds -> nanoseconds. + attributes := point.Attributes() + attributes.PutStr("a", "A") + attributes.PutStr("b", "B") + return point + }, + }, + { + name: "counter:: startTimestampMs of 0", + metricKind: "counter", + intervalStartTimestampMs: 0, + labels: labels.FromMap(map[string]string{"a": "A", "b": "B"}), + scrapes: []*scrape{ + {at: 28, value: 99.9, metric: "value"}, + }, + want: func() pmetric.NumberDataPoint { + point := pmetric.NewNumberDataPoint() + point.SetDoubleValue(99.9) + point.SetTimestamp(pcommon.Timestamp(28 * time.Millisecond)) // the time in milliseconds -> nanoseconds. + point.SetStartTimestamp(pcommon.Timestamp(28 * time.Millisecond)) // the time in milliseconds -> nanoseconds. + attributes := point.Attributes() + attributes.PutStr("a", "A") + attributes.PutStr("b", "B") + return point + }, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + mp := newMetricFamily(tt.metricKind, mc, zap.NewNop()) + for _, tv := range tt.scrapes { + lb := tt.labels.Copy() + sRef, _ := getSeriesRef(nil, lb, mp.mtype) + require.NoError(t, mp.addSeries(sRef, tv.metric, lb, tv.at, tv.value)) + } + + require.Len(t, mp.groups, 1) + + sl := pmetric.NewMetricSlice() + mp.appendMetric(sl, false) + + require.Equal(t, 1, sl.Len(), "Exactly one metric expected") + metric := sl.At(0) + require.Equal(t, mc[tt.metricKind].Help, metric.Description(), "Expected help metadata in metric description") + require.Equal(t, mc[tt.metricKind].Unit, metric.Unit(), "Expected unit metadata in metric") + + ndpL := metric.Sum().DataPoints() + require.Equal(t, 1, ndpL.Len(), "Exactly one point expected") + got := ndpL.At(0) + want := tt.want() + require.Equal(t, want, got, "Expected the points to be equal") + }) + } +} diff --git a/pkg/promotel/prometheusreceiver/internal/metrics_adjuster.go b/pkg/promotel/prometheusreceiver/internal/metrics_adjuster.go new file mode 100644 index 000000000..5c67e8dbf --- /dev/null +++ b/pkg/promotel/prometheusreceiver/internal/metrics_adjuster.go @@ -0,0 +1,488 @@ +package internal + +import ( + "errors" + "sync" + "time" + + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" + semconv "go.opentelemetry.io/collector/semconv/v1.27.0" + "go.uber.org/zap" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil" +) + +// Notes on garbage collection (gc): +// +// Job-level gc: +// The Prometheus receiver will likely execute in a long running service whose lifetime may exceed +// the lifetimes of many of the jobs that it is collecting from. In order to keep the JobsMap from +// leaking memory for entries of no-longer existing jobs, the JobsMap needs to remove entries that +// haven't been accessed for a long period of time. +// +// Timeseries-level gc: +// Some jobs that the Prometheus receiver is collecting from may export timeseries based on metrics +// from other jobs (e.g. cAdvisor). In order to keep the timeseriesMap from leaking memory for entries +// of no-longer existing jobs, the timeseriesMap for each job needs to remove entries that haven't +// been accessed for a long period of time. +// +// The gc strategy uses a standard mark-and-sweep approach - each time a timeseriesMap is accessed, +// it is marked. Similarly, each time a timeseriesInfo is accessed, it is also marked. +// +// At the end of each JobsMap.get(), if the last time the JobsMap was gc'd exceeds the 'gcInterval', +// the JobsMap is locked and any timeseriesMaps that are unmarked are removed from the JobsMap +// otherwise the timeseriesMap is gc'd +// +// The gc for the timeseriesMap is straightforward - the map is locked and, for each timeseriesInfo +// in the map, if it has not been marked, it is removed otherwise it is unmarked. +// +// Alternative Strategies +// 1. If the job-level gc doesn't run often enough, or runs too often, a separate go routine can +// be spawned at JobMap creation time that gc's at periodic intervals. This approach potentially +// adds more contention and latency to each scrape so the current approach is used. Note that +// the go routine will need to be cancelled upon Shutdown(). +// 2. If the gc of each timeseriesMap during the gc of the JobsMap causes too much contention, +// the gc of timeseriesMaps can be moved to the end of MetricsAdjuster().AdjustMetricSlice(). This +// approach requires adding 'lastGC' Time and (potentially) a gcInterval duration to +// timeseriesMap so the current approach is used instead. + +// timeseriesInfo contains the information necessary to adjust from the initial point and to detect resets. +type timeseriesInfo struct { + mark bool + + number numberInfo + histogram histogramInfo + summary summaryInfo +} + +type numberInfo struct { + startTime pcommon.Timestamp + previousValue float64 +} + +type histogramInfo struct { + startTime pcommon.Timestamp + previousCount uint64 + previousSum float64 +} + +type summaryInfo struct { + startTime pcommon.Timestamp + previousCount uint64 + previousSum float64 +} + +type timeseriesKey struct { + name string + attributes [16]byte + aggTemporality pmetric.AggregationTemporality +} + +// timeseriesMap maps from a timeseries instance (metric * label values) to the timeseries info for +// the instance. +type timeseriesMap struct { + sync.RWMutex + // The mutex is used to protect access to the member fields. It is acquired for the entirety of + // AdjustMetricSlice() and also acquired by gc(). + + mark bool + tsiMap map[timeseriesKey]*timeseriesInfo +} + +// Get the timeseriesInfo for the timeseries associated with the metric and label values. +func (tsm *timeseriesMap) get(metric pmetric.Metric, kv pcommon.Map) (*timeseriesInfo, bool) { + // This should only be invoked be functions called (directly or indirectly) by AdjustMetricSlice(). + // The lock protecting tsm.tsiMap is acquired there. + name := metric.Name() + key := timeseriesKey{ + name: name, + attributes: getAttributesSignature(kv), + } + switch metric.Type() { + case pmetric.MetricTypeHistogram: + // There are 2 types of Histograms whose aggregation temporality needs distinguishing: + // * CumulativeHistogram + // * GaugeHistogram + key.aggTemporality = metric.Histogram().AggregationTemporality() + case pmetric.MetricTypeExponentialHistogram: + // There are 2 types of ExponentialHistograms whose aggregation temporality needs distinguishing: + // * CumulativeHistogram + // * GaugeHistogram + key.aggTemporality = metric.ExponentialHistogram().AggregationTemporality() + } + + tsm.mark = true + tsi, ok := tsm.tsiMap[key] + if !ok { + tsi = ×eriesInfo{} + tsm.tsiMap[key] = tsi + } + tsi.mark = true + return tsi, ok +} + +// Create a unique string signature for attributes values sorted by attribute keys. +func getAttributesSignature(m pcommon.Map) [16]byte { + clearedMap := pcommon.NewMap() + m.Range(func(k string, attrValue pcommon.Value) bool { + value := attrValue.Str() + if value != "" { + clearedMap.PutStr(k, value) + } + return true + }) + return pdatautil.MapHash(clearedMap) +} + +// Remove timeseries that have aged out. +func (tsm *timeseriesMap) gc() { + tsm.Lock() + defer tsm.Unlock() + // this shouldn't happen under the current gc() strategy + if !tsm.mark { + return + } + for ts, tsi := range tsm.tsiMap { + if !tsi.mark { + delete(tsm.tsiMap, ts) + } else { + tsi.mark = false + } + } + tsm.mark = false +} + +func newTimeseriesMap() *timeseriesMap { + return ×eriesMap{mark: true, tsiMap: map[timeseriesKey]*timeseriesInfo{}} +} + +// JobsMap maps from a job instance to a map of timeseries instances for the job. +type JobsMap struct { + sync.RWMutex + // The mutex is used to protect access to the member fields. It is acquired for most of + // get() and also acquired by gc(). + + gcInterval time.Duration + lastGC time.Time + jobsMap map[string]*timeseriesMap +} + +// NewJobsMap creates a new (empty) JobsMap. +func NewJobsMap(gcInterval time.Duration) *JobsMap { + return &JobsMap{gcInterval: gcInterval, lastGC: time.Now(), jobsMap: make(map[string]*timeseriesMap)} +} + +// Remove jobs and timeseries that have aged out. +func (jm *JobsMap) gc() { + jm.Lock() + defer jm.Unlock() + // once the structure is locked, confirm that gc() is still necessary + if time.Since(jm.lastGC) > jm.gcInterval { + for sig, tsm := range jm.jobsMap { + tsm.RLock() + tsmNotMarked := !tsm.mark + // take a read lock here, no need to get a full lock as we have a lock on the JobsMap + tsm.RUnlock() + if tsmNotMarked { + delete(jm.jobsMap, sig) + } else { + // a full lock will be obtained in here, if required. + tsm.gc() + } + } + jm.lastGC = time.Now() + } +} + +func (jm *JobsMap) maybeGC() { + // speculatively check if gc() is necessary, recheck once the structure is locked + jm.RLock() + defer jm.RUnlock() + if time.Since(jm.lastGC) > jm.gcInterval { + go jm.gc() + } +} + +func (jm *JobsMap) get(job, instance string) *timeseriesMap { + sig := job + ":" + instance + // a read locke is taken here as we will not need to modify jobsMap if the target timeseriesMap is available. + jm.RLock() + tsm, ok := jm.jobsMap[sig] + jm.RUnlock() + defer jm.maybeGC() + if ok { + return tsm + } + jm.Lock() + defer jm.Unlock() + // Now that we've got an exclusive lock, check once more to ensure an entry wasn't created in the interim + // and then create a new timeseriesMap if required. + tsm2, ok2 := jm.jobsMap[sig] + if ok2 { + return tsm2 + } + tsm2 = newTimeseriesMap() + jm.jobsMap[sig] = tsm2 + return tsm2 +} + +type MetricsAdjuster interface { + AdjustMetrics(metrics pmetric.Metrics) error +} + +// initialPointAdjuster takes a map from a metric instance to the initial point in the metrics instance +// and provides AdjustMetricSlice, which takes a sequence of metrics and adjust their start times based on +// the initial points. +type initialPointAdjuster struct { + jobsMap *JobsMap + logger *zap.Logger + useCreatedMetric bool +} + +// NewInitialPointAdjuster returns a new MetricsAdjuster that adjust metrics' start times based on the initial received points. +func NewInitialPointAdjuster(logger *zap.Logger, gcInterval time.Duration, useCreatedMetric bool) MetricsAdjuster { + return &initialPointAdjuster{ + jobsMap: NewJobsMap(gcInterval), + logger: logger, + useCreatedMetric: useCreatedMetric, + } +} + +// AdjustMetrics takes a sequence of metrics and adjust their start times based on the initial and +// previous points in the timeseriesMap. +func (a *initialPointAdjuster) AdjustMetrics(metrics pmetric.Metrics) error { + // By contract metrics will have at least 1 data point, so for sure will have at least one ResourceMetrics. + + job, found := metrics.ResourceMetrics().At(0).Resource().Attributes().Get(semconv.AttributeServiceName) + if !found { + return errors.New("adjusting metrics without job") + } + + instance, found := metrics.ResourceMetrics().At(0).Resource().Attributes().Get(semconv.AttributeServiceInstanceID) + if !found { + return errors.New("adjusting metrics without instance") + } + tsm := a.jobsMap.get(job.Str(), instance.Str()) + + // The lock on the relevant timeseriesMap is held throughout the adjustment process to ensure that + // nothing else can modify the data used for adjustment. + tsm.Lock() + defer tsm.Unlock() + for i := 0; i < metrics.ResourceMetrics().Len(); i++ { + rm := metrics.ResourceMetrics().At(i) + for j := 0; j < rm.ScopeMetrics().Len(); j++ { + ilm := rm.ScopeMetrics().At(j) + for k := 0; k < ilm.Metrics().Len(); k++ { + metric := ilm.Metrics().At(k) + switch dataType := metric.Type(); dataType { + case pmetric.MetricTypeGauge: + // gauges don't need to be adjusted so no additional processing is necessary + + case pmetric.MetricTypeHistogram: + a.adjustMetricHistogram(tsm, metric) + + case pmetric.MetricTypeSummary: + a.adjustMetricSummary(tsm, metric) + + case pmetric.MetricTypeSum: + a.adjustMetricSum(tsm, metric) + + case pmetric.MetricTypeExponentialHistogram: + a.adjustMetricExponentialHistogram(tsm, metric) + + case pmetric.MetricTypeEmpty: + fallthrough + + default: + // this shouldn't happen + a.logger.Info("Adjust - skipping unexpected point", zap.String("type", dataType.String())) + } + } + } + } + return nil +} + +func (a *initialPointAdjuster) adjustMetricHistogram(tsm *timeseriesMap, current pmetric.Metric) { + histogram := current.Histogram() + if histogram.AggregationTemporality() != pmetric.AggregationTemporalityCumulative { + // Only dealing with CumulativeDistributions. + return + } + + currentPoints := histogram.DataPoints() + for i := 0; i < currentPoints.Len(); i++ { + currentDist := currentPoints.At(i) + + // start timestamp was set from _created + if a.useCreatedMetric && + !currentDist.Flags().NoRecordedValue() && + currentDist.StartTimestamp() < currentDist.Timestamp() { + continue + } + + tsi, found := tsm.get(current, currentDist.Attributes()) + if !found { + // initialize everything. + tsi.histogram.startTime = currentDist.StartTimestamp() + tsi.histogram.previousCount = currentDist.Count() + tsi.histogram.previousSum = currentDist.Sum() + continue + } + + if currentDist.Flags().NoRecordedValue() { + // TODO: Investigate why this does not reset. + currentDist.SetStartTimestamp(tsi.histogram.startTime) + continue + } + + if currentDist.Count() < tsi.histogram.previousCount || currentDist.Sum() < tsi.histogram.previousSum { + // reset re-initialize everything. + tsi.histogram.startTime = currentDist.StartTimestamp() + tsi.histogram.previousCount = currentDist.Count() + tsi.histogram.previousSum = currentDist.Sum() + continue + } + + // Update only previous values. + tsi.histogram.previousCount = currentDist.Count() + tsi.histogram.previousSum = currentDist.Sum() + currentDist.SetStartTimestamp(tsi.histogram.startTime) + } +} + +func (a *initialPointAdjuster) adjustMetricExponentialHistogram(tsm *timeseriesMap, current pmetric.Metric) { + histogram := current.ExponentialHistogram() + if histogram.AggregationTemporality() != pmetric.AggregationTemporalityCumulative { + // Only dealing with CumulativeDistributions. + return + } + + currentPoints := histogram.DataPoints() + for i := 0; i < currentPoints.Len(); i++ { + currentDist := currentPoints.At(i) + + // start timestamp was set from _created + if a.useCreatedMetric && + !currentDist.Flags().NoRecordedValue() && + currentDist.StartTimestamp() < currentDist.Timestamp() { + continue + } + + tsi, found := tsm.get(current, currentDist.Attributes()) + if !found { + // initialize everything. + tsi.histogram.startTime = currentDist.StartTimestamp() + tsi.histogram.previousCount = currentDist.Count() + tsi.histogram.previousSum = currentDist.Sum() + continue + } + + if currentDist.Flags().NoRecordedValue() { + // TODO: Investigate why this does not reset. + currentDist.SetStartTimestamp(tsi.histogram.startTime) + continue + } + + if currentDist.Count() < tsi.histogram.previousCount || currentDist.Sum() < tsi.histogram.previousSum { + // reset re-initialize everything. + tsi.histogram.startTime = currentDist.StartTimestamp() + tsi.histogram.previousCount = currentDist.Count() + tsi.histogram.previousSum = currentDist.Sum() + continue + } + + // Update only previous values. + tsi.histogram.previousCount = currentDist.Count() + tsi.histogram.previousSum = currentDist.Sum() + currentDist.SetStartTimestamp(tsi.histogram.startTime) + } +} + +func (a *initialPointAdjuster) adjustMetricSum(tsm *timeseriesMap, current pmetric.Metric) { + currentPoints := current.Sum().DataPoints() + for i := 0; i < currentPoints.Len(); i++ { + currentSum := currentPoints.At(i) + + // start timestamp was set from _created + if a.useCreatedMetric && + !currentSum.Flags().NoRecordedValue() && + currentSum.StartTimestamp() < currentSum.Timestamp() { + continue + } + + tsi, found := tsm.get(current, currentSum.Attributes()) + if !found { + // initialize everything. + tsi.number.startTime = currentSum.StartTimestamp() + tsi.number.previousValue = currentSum.DoubleValue() + continue + } + + if currentSum.Flags().NoRecordedValue() { + // TODO: Investigate why this does not reset. + currentSum.SetStartTimestamp(tsi.number.startTime) + continue + } + + if currentSum.DoubleValue() < tsi.number.previousValue { + // reset re-initialize everything. + tsi.number.startTime = currentSum.StartTimestamp() + tsi.number.previousValue = currentSum.DoubleValue() + continue + } + + // Update only previous values. + tsi.number.previousValue = currentSum.DoubleValue() + currentSum.SetStartTimestamp(tsi.number.startTime) + } +} + +func (a *initialPointAdjuster) adjustMetricSummary(tsm *timeseriesMap, current pmetric.Metric) { + currentPoints := current.Summary().DataPoints() + + for i := 0; i < currentPoints.Len(); i++ { + currentSummary := currentPoints.At(i) + + // start timestamp was set from _created + if a.useCreatedMetric && + !currentSummary.Flags().NoRecordedValue() && + currentSummary.StartTimestamp() < currentSummary.Timestamp() { + continue + } + + tsi, found := tsm.get(current, currentSummary.Attributes()) + if !found { + // initialize everything. + tsi.summary.startTime = currentSummary.StartTimestamp() + tsi.summary.previousCount = currentSummary.Count() + tsi.summary.previousSum = currentSummary.Sum() + continue + } + + if currentSummary.Flags().NoRecordedValue() { + // TODO: Investigate why this does not reset. + currentSummary.SetStartTimestamp(tsi.summary.startTime) + continue + } + + if (currentSummary.Count() != 0 && + tsi.summary.previousCount != 0 && + currentSummary.Count() < tsi.summary.previousCount) || + (currentSummary.Sum() != 0 && + tsi.summary.previousSum != 0 && + currentSummary.Sum() < tsi.summary.previousSum) { + // reset re-initialize everything. + tsi.summary.startTime = currentSummary.StartTimestamp() + tsi.summary.previousCount = currentSummary.Count() + tsi.summary.previousSum = currentSummary.Sum() + continue + } + + // Update only previous values. + tsi.summary.previousCount = currentSummary.Count() + tsi.summary.previousSum = currentSummary.Sum() + currentSummary.SetStartTimestamp(tsi.summary.startTime) + } +} diff --git a/pkg/promotel/prometheusreceiver/internal/metrics_adjuster_test.go b/pkg/promotel/prometheusreceiver/internal/metrics_adjuster_test.go new file mode 100644 index 000000000..7f0020df1 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/internal/metrics_adjuster_test.go @@ -0,0 +1,741 @@ + +package internal + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" + semconv "go.opentelemetry.io/collector/semconv/v1.27.0" + "go.uber.org/zap" +) + +var ( + tUnknown = timestampFromMs(0) + t1 = timestampFromMs(1) + t2 = timestampFromMs(2) + t3 = timestampFromMs(3) + t4 = timestampFromMs(4) + t5 = timestampFromMs(5) + + bounds0 = []float64{1, 2, 4} + percent0 = []float64{10, 50, 90} + + sum1 = "sum1" + gauge1 = "gauge1" + histogram1 = "histogram1" + summary1 = "summary1" + exponentialHistogram1 = "exponentialHistogram1" + + k1v1k2v2 = []*kv{ + {"k1", "v1"}, + {"k2", "v2"}, + } + + k1v10k2v20 = []*kv{ + {"k1", "v10"}, + {"k2", "v20"}, + } + + k1v100k2v200 = []*kv{ + {"k1", "v100"}, + {"k2", "v200"}, + } + + emptyLabels []*kv + k1vEmpty = []*kv{{"k1", ""}} + k1vEmptyk2vEmptyk3vEmpty = []*kv{{"k1", ""}, {"k2", ""}, {"k3", ""}} +) + +func TestGauge(t *testing.T) { + script := []*metricsAdjusterTest{ + { + description: "Gauge: round 1 - gauge not adjusted", + metrics: metrics(gaugeMetric(gauge1, doublePoint(k1v1k2v2, t1, t1, 44))), + adjusted: metrics(gaugeMetric(gauge1, doublePoint(k1v1k2v2, t1, t1, 44))), + }, + { + description: "Gauge: round 2 - gauge not adjusted", + metrics: metrics(gaugeMetric(gauge1, doublePoint(k1v1k2v2, t2, t2, 66))), + adjusted: metrics(gaugeMetric(gauge1, doublePoint(k1v1k2v2, t2, t2, 66))), + }, + { + description: "Gauge: round 3 - value less than previous value - gauge is not adjusted", + metrics: metrics(gaugeMetric(gauge1, doublePoint(k1v1k2v2, t3, t3, 55))), + adjusted: metrics(gaugeMetric(gauge1, doublePoint(k1v1k2v2, t3, t3, 55))), + }, + } + runScript(t, NewInitialPointAdjuster(zap.NewNop(), time.Minute, true), "job", "0", script) +} + +func TestSum(t *testing.T) { + script := []*metricsAdjusterTest{ + { + description: "Sum: round 1 - initial instance, start time is established", + metrics: metrics(sumMetric(sum1, doublePoint(k1v1k2v2, t1, t1, 44))), + adjusted: metrics(sumMetric(sum1, doublePoint(k1v1k2v2, t1, t1, 44))), + }, + { + description: "Sum: round 2 - instance adjusted based on round 1", + metrics: metrics(sumMetric(sum1, doublePoint(k1v1k2v2, t2, t2, 66))), + adjusted: metrics(sumMetric(sum1, doublePoint(k1v1k2v2, t1, t2, 66))), + }, + { + description: "Sum: round 3 - instance reset (value less than previous value), start time is reset", + metrics: metrics(sumMetric(sum1, doublePoint(k1v1k2v2, t3, t3, 55))), + adjusted: metrics(sumMetric(sum1, doublePoint(k1v1k2v2, t3, t3, 55))), + }, + { + description: "Sum: round 4 - instance adjusted based on round 3", + metrics: metrics(sumMetric(sum1, doublePoint(k1v1k2v2, t4, t4, 72))), + adjusted: metrics(sumMetric(sum1, doublePoint(k1v1k2v2, t3, t4, 72))), + }, + { + description: "Sum: round 5 - instance adjusted based on round 4", + metrics: metrics(sumMetric(sum1, doublePoint(k1v1k2v2, t5, t5, 72))), + adjusted: metrics(sumMetric(sum1, doublePoint(k1v1k2v2, t3, t5, 72))), + }, + } + runScript(t, NewInitialPointAdjuster(zap.NewNop(), time.Minute, true), "job", "0", script) +} + +func TestSummaryNoCount(t *testing.T) { + script := []*metricsAdjusterTest{ + { + description: "Summary No Count: round 1 - initial instance, start time is established", + metrics: metrics(summaryMetric(summary1, summaryPoint(k1v1k2v2, t1, t1, 0, 40, percent0, []float64{1, 5, 8}))), + adjusted: metrics(summaryMetric(summary1, summaryPoint(k1v1k2v2, t1, t1, 0, 40, percent0, []float64{1, 5, 8}))), + }, + { + description: "Summary No Count: round 2 - instance adjusted based on round 1", + metrics: metrics(summaryMetric(summary1, summaryPoint(k1v1k2v2, t2, t2, 0, 70, percent0, []float64{7, 44, 9}))), + adjusted: metrics(summaryMetric(summary1, summaryPoint(k1v1k2v2, t1, t2, 0, 70, percent0, []float64{7, 44, 9}))), + }, + { + description: "Summary No Count: round 3 - instance reset (count less than previous), start time is reset", + metrics: metrics(summaryMetric(summary1, summaryPoint(k1v1k2v2, t3, t3, 0, 66, percent0, []float64{3, 22, 5}))), + adjusted: metrics(summaryMetric(summary1, summaryPoint(k1v1k2v2, t3, t3, 0, 66, percent0, []float64{3, 22, 5}))), + }, + { + description: "Summary No Count: round 4 - instance adjusted based on round 3", + metrics: metrics(summaryMetric(summary1, summaryPoint(k1v1k2v2, t4, t4, 0, 96, percent0, []float64{9, 47, 8}))), + adjusted: metrics(summaryMetric(summary1, summaryPoint(k1v1k2v2, t3, t4, 0, 96, percent0, []float64{9, 47, 8}))), + }, + } + + runScript(t, NewInitialPointAdjuster(zap.NewNop(), time.Minute, true), "job", "0", script) +} + +func TestSummaryFlagNoRecordedValue(t *testing.T) { + script := []*metricsAdjusterTest{ + { + description: "Summary No Count: round 1 - initial instance, start time is established", + metrics: metrics(summaryMetric(summary1, summaryPoint(k1v1k2v2, t1, t1, 0, 40, percent0, []float64{1, 5, 8}))), + adjusted: metrics(summaryMetric(summary1, summaryPoint(k1v1k2v2, t1, t1, 0, 40, percent0, []float64{1, 5, 8}))), + }, + { + description: "Summary Flag NoRecordedValue: round 2 - instance adjusted based on round 1", + metrics: metrics(summaryMetric(summary1, summaryPointNoValue(k1v1k2v2, t2, t2))), + adjusted: metrics(summaryMetric(summary1, summaryPointNoValue(k1v1k2v2, t1, t2))), + }, + } + + runScript(t, NewInitialPointAdjuster(zap.NewNop(), time.Minute, true), "job", "0", script) +} + +func TestSummary(t *testing.T) { + script := []*metricsAdjusterTest{ + { + description: "Summary: round 1 - initial instance, start time is established", + metrics: metrics( + summaryMetric(summary1, summaryPoint(k1v1k2v2, t1, t1, 10, 40, percent0, []float64{1, 5, 8})), + ), + adjusted: metrics( + summaryMetric(summary1, summaryPoint(k1v1k2v2, t1, t1, 10, 40, percent0, []float64{1, 5, 8})), + ), + }, + { + description: "Summary: round 2 - instance adjusted based on round 1", + metrics: metrics( + summaryMetric(summary1, summaryPoint(k1v1k2v2, t2, t2, 15, 70, percent0, []float64{7, 44, 9})), + ), + adjusted: metrics( + summaryMetric(summary1, summaryPoint(k1v1k2v2, t1, t2, 15, 70, percent0, []float64{7, 44, 9})), + ), + }, + { + description: "Summary: round 3 - instance reset (count less than previous), start time is reset", + metrics: metrics( + summaryMetric(summary1, summaryPoint(k1v1k2v2, t3, t3, 12, 66, percent0, []float64{3, 22, 5})), + ), + adjusted: metrics( + summaryMetric(summary1, summaryPoint(k1v1k2v2, t3, t3, 12, 66, percent0, []float64{3, 22, 5})), + ), + }, + { + description: "Summary: round 4 - instance adjusted based on round 3", + metrics: metrics( + summaryMetric(summary1, summaryPoint(k1v1k2v2, t4, t4, 14, 96, percent0, []float64{9, 47, 8})), + ), + adjusted: metrics( + summaryMetric(summary1, summaryPoint(k1v1k2v2, t3, t4, 14, 96, percent0, []float64{9, 47, 8})), + ), + }, + } + + runScript(t, NewInitialPointAdjuster(zap.NewNop(), time.Minute, true), "job", "0", script) +} + +func TestHistogram(t *testing.T) { + script := []*metricsAdjusterTest{ + { + description: "Histogram: round 1 - initial instance, start time is established", + metrics: metrics(histogramMetric(histogram1, histogramPoint(k1v1k2v2, t1, t1, bounds0, []uint64{4, 2, 3, 7}))), + adjusted: metrics(histogramMetric(histogram1, histogramPoint(k1v1k2v2, t1, t1, bounds0, []uint64{4, 2, 3, 7}))), + }, { + description: "Histogram: round 2 - instance adjusted based on round 1", + metrics: metrics(histogramMetric(histogram1, histogramPoint(k1v1k2v2, t2, t2, bounds0, []uint64{6, 3, 4, 8}))), + adjusted: metrics(histogramMetric(histogram1, histogramPoint(k1v1k2v2, t1, t2, bounds0, []uint64{6, 3, 4, 8}))), + }, { + description: "Histogram: round 3 - instance reset (value less than previous value), start time is reset", + metrics: metrics(histogramMetric(histogram1, histogramPoint(k1v1k2v2, t3, t3, bounds0, []uint64{5, 3, 2, 7}))), + adjusted: metrics(histogramMetric(histogram1, histogramPoint(k1v1k2v2, t3, t3, bounds0, []uint64{5, 3, 2, 7}))), + }, { + description: "Histogram: round 4 - instance adjusted based on round 3", + metrics: metrics(histogramMetric(histogram1, histogramPoint(k1v1k2v2, t4, t4, bounds0, []uint64{7, 4, 2, 12}))), + adjusted: metrics(histogramMetric(histogram1, histogramPoint(k1v1k2v2, t3, t4, bounds0, []uint64{7, 4, 2, 12}))), + }, + } + runScript(t, NewInitialPointAdjuster(zap.NewNop(), time.Minute, true), "job", "0", script) +} + +func TestHistogramFlagNoRecordedValue(t *testing.T) { + script := []*metricsAdjusterTest{ + { + description: "Histogram: round 1 - initial instance, start time is established", + metrics: metrics(histogramMetric(histogram1, histogramPoint(k1v1k2v2, t1, t1, bounds0, []uint64{7, 4, 2, 12}))), + adjusted: metrics(histogramMetric(histogram1, histogramPoint(k1v1k2v2, t1, t1, bounds0, []uint64{7, 4, 2, 12}))), + }, + { + description: "Histogram: round 2 - instance adjusted based on round 1", + metrics: metrics(histogramMetric(histogram1, histogramPointNoValue(k1v1k2v2, tUnknown, t2))), + adjusted: metrics(histogramMetric(histogram1, histogramPointNoValue(k1v1k2v2, t1, t2))), + }, + } + + runScript(t, NewInitialPointAdjuster(zap.NewNop(), time.Minute, true), "job", "0", script) +} + +func TestHistogramFlagNoRecordedValueFirstObservation(t *testing.T) { + script := []*metricsAdjusterTest{ + { + description: "Histogram: round 1 - initial instance, start time is unknown", + metrics: metrics(histogramMetric(histogram1, histogramPointNoValue(k1v1k2v2, tUnknown, t1))), + adjusted: metrics(histogramMetric(histogram1, histogramPointNoValue(k1v1k2v2, tUnknown, t1))), + }, + { + description: "Histogram: round 2 - instance unchanged", + metrics: metrics(histogramMetric(histogram1, histogramPointNoValue(k1v1k2v2, tUnknown, t2))), + adjusted: metrics(histogramMetric(histogram1, histogramPointNoValue(k1v1k2v2, tUnknown, t2))), + }, + } + + runScript(t, NewInitialPointAdjuster(zap.NewNop(), time.Minute, true), "job", "0", script) +} + +// In TestExponentHistogram we exclude negative buckets on purpose as they are +// not considered the main use case - response times that are most commonly +// observed are never negative. Negative buckets would make the Sum() non +// monotonic and cause unexpected resets. +func TestExponentialHistogram(t *testing.T) { + script := []*metricsAdjusterTest{ + { + description: "Exponential Histogram: round 1 - initial instance, start time is established", + metrics: metrics(exponentialHistogramMetric(exponentialHistogram1, exponentialHistogramPoint(k1v1k2v2, t1, t1, 3, 1, 0, []uint64{}, -2, []uint64{4, 2, 3, 7}))), + adjusted: metrics(exponentialHistogramMetric(exponentialHistogram1, exponentialHistogramPoint(k1v1k2v2, t1, t1, 3, 1, 0, []uint64{}, -2, []uint64{4, 2, 3, 7}))), + }, { + description: "Exponential Histogram: round 2 - instance adjusted based on round 1", + metrics: metrics(exponentialHistogramMetric(exponentialHistogram1, exponentialHistogramPoint(k1v1k2v2, t2, t2, 3, 1, 0, []uint64{}, -2, []uint64{6, 2, 3, 7}))), + adjusted: metrics(exponentialHistogramMetric(exponentialHistogram1, exponentialHistogramPoint(k1v1k2v2, t1, t2, 3, 1, 0, []uint64{}, -2, []uint64{6, 2, 3, 7}))), + }, { + description: "Exponential Histogram: round 3 - instance reset (value less than previous value), start time is reset", + metrics: metrics(exponentialHistogramMetric(histogram1, exponentialHistogramPoint(k1v1k2v2, t3, t3, 3, 1, 0, []uint64{}, -2, []uint64{5, 3, 2, 7}))), + adjusted: metrics(exponentialHistogramMetric(histogram1, exponentialHistogramPoint(k1v1k2v2, t3, t3, 3, 1, 0, []uint64{}, -2, []uint64{5, 3, 2, 7}))), + }, { + description: "Exponential Histogram: round 4 - instance adjusted based on round 3", + metrics: metrics(exponentialHistogramMetric(histogram1, exponentialHistogramPoint(k1v1k2v2, t4, t4, 3, 1, 0, []uint64{}, -2, []uint64{7, 4, 2, 12}))), + adjusted: metrics(exponentialHistogramMetric(histogram1, exponentialHistogramPoint(k1v1k2v2, t3, t4, 3, 1, 0, []uint64{}, -2, []uint64{7, 4, 2, 12}))), + }, + } + runScript(t, NewInitialPointAdjuster(zap.NewNop(), time.Minute, true), "job", "0", script) +} + +func TestExponentialHistogramFlagNoRecordedValue(t *testing.T) { + script := []*metricsAdjusterTest{ + { + description: "Histogram: round 1 - initial instance, start time is established", + metrics: metrics(exponentialHistogramMetric(histogram1, exponentialHistogramPoint(k1v1k2v2, t1, t1, 0, 2, 2, []uint64{7, 4, 2, 12}, 3, []uint64{}))), + adjusted: metrics(exponentialHistogramMetric(histogram1, exponentialHistogramPoint(k1v1k2v2, t1, t1, 0, 2, 2, []uint64{7, 4, 2, 12}, 3, []uint64{}))), + }, + { + description: "Histogram: round 2 - instance adjusted based on round 1", + metrics: metrics(exponentialHistogramMetric(histogram1, exponentialHistogramPointNoValue(k1v1k2v2, tUnknown, t2))), + adjusted: metrics(exponentialHistogramMetric(histogram1, exponentialHistogramPointNoValue(k1v1k2v2, t1, t2))), + }, + } + + runScript(t, NewInitialPointAdjuster(zap.NewNop(), time.Minute, true), "job", "0", script) +} + +func TestExponentialHistogramFlagNoRecordedValueFirstObservation(t *testing.T) { + script := []*metricsAdjusterTest{ + { + description: "Histogram: round 1 - initial instance, start time is unknown", + metrics: metrics(exponentialHistogramMetric(histogram1, exponentialHistogramPointNoValue(k1v1k2v2, tUnknown, t1))), + adjusted: metrics(exponentialHistogramMetric(histogram1, exponentialHistogramPointNoValue(k1v1k2v2, tUnknown, t1))), + }, + { + description: "Histogram: round 2 - instance unchanged", + metrics: metrics(exponentialHistogramMetric(histogram1, exponentialHistogramPointNoValue(k1v1k2v2, tUnknown, t2))), + adjusted: metrics(exponentialHistogramMetric(histogram1, exponentialHistogramPointNoValue(k1v1k2v2, tUnknown, t2))), + }, + } + + runScript(t, NewInitialPointAdjuster(zap.NewNop(), time.Minute, true), "job", "0", script) +} + +func TestSummaryFlagNoRecordedValueFirstObservation(t *testing.T) { + script := []*metricsAdjusterTest{ + { + description: "Summary: round 1 - initial instance, start time is unknown", + metrics: metrics(summaryMetric(summary1, summaryPointNoValue(k1v1k2v2, tUnknown, t1))), + adjusted: metrics(summaryMetric(summary1, summaryPointNoValue(k1v1k2v2, tUnknown, t1))), + }, + { + description: "Summary: round 2 - instance unchanged", + metrics: metrics(summaryMetric(summary1, summaryPointNoValue(k1v1k2v2, tUnknown, t2))), + adjusted: metrics(summaryMetric(summary1, summaryPointNoValue(k1v1k2v2, tUnknown, t2))), + }, + } + + runScript(t, NewInitialPointAdjuster(zap.NewNop(), time.Minute, true), "job", "0", script) +} + +func TestGaugeFlagNoRecordedValueFirstObservation(t *testing.T) { + script := []*metricsAdjusterTest{ + { + description: "Gauge: round 1 - initial instance, start time is unknown", + metrics: metrics(gaugeMetric(gauge1, doublePointNoValue(k1v1k2v2, tUnknown, t1))), + adjusted: metrics(gaugeMetric(gauge1, doublePointNoValue(k1v1k2v2, tUnknown, t1))), + }, + { + description: "Gauge: round 2 - instance unchanged", + metrics: metrics(gaugeMetric(gauge1, doublePointNoValue(k1v1k2v2, tUnknown, t2))), + adjusted: metrics(gaugeMetric(gauge1, doublePointNoValue(k1v1k2v2, tUnknown, t2))), + }, + } + + runScript(t, NewInitialPointAdjuster(zap.NewNop(), time.Minute, true), "job", "0", script) +} + +func TestSumFlagNoRecordedValueFirstObservation(t *testing.T) { + script := []*metricsAdjusterTest{ + { + description: "Sum: round 1 - initial instance, start time is unknown", + metrics: metrics(sumMetric("sum1", doublePointNoValue(k1v1k2v2, tUnknown, t1))), + adjusted: metrics(sumMetric("sum1", doublePointNoValue(k1v1k2v2, tUnknown, t1))), + }, + { + description: "Sum: round 2 - instance unchanged", + metrics: metrics(sumMetric("sum1", doublePointNoValue(k1v1k2v2, tUnknown, t2))), + adjusted: metrics(sumMetric("sum1", doublePointNoValue(k1v1k2v2, tUnknown, t2))), + }, + } + + runScript(t, NewInitialPointAdjuster(zap.NewNop(), time.Minute, true), "job", "0", script) +} + +func TestMultiMetrics(t *testing.T) { + script := []*metricsAdjusterTest{ + { + description: "MultiMetrics: round 1 - combined round 1 of individual metrics", + metrics: metrics( + gaugeMetric(gauge1, doublePoint(k1v1k2v2, t1, t1, 44)), + sumMetric(sum1, doublePoint(k1v1k2v2, t1, t1, 44)), + histogramMetric(histogram1, histogramPoint(k1v1k2v2, t1, t1, bounds0, []uint64{4, 2, 3, 7})), + summaryMetric(summary1, summaryPoint(k1v1k2v2, t1, t1, 10, 40, percent0, []float64{1, 5, 8})), + ), + adjusted: metrics( + gaugeMetric(gauge1, doublePoint(k1v1k2v2, t1, t1, 44)), + sumMetric(sum1, doublePoint(k1v1k2v2, t1, t1, 44)), + histogramMetric(histogram1, histogramPoint(k1v1k2v2, t1, t1, bounds0, []uint64{4, 2, 3, 7})), + summaryMetric(summary1, summaryPoint(k1v1k2v2, t1, t1, 10, 40, percent0, []float64{1, 5, 8})), + ), + }, + { + description: "MultiMetrics: round 2 - combined round 2 of individual metrics", + metrics: metrics( + gaugeMetric(gauge1, doublePoint(k1v1k2v2, t2, t2, 66)), + sumMetric(sum1, doublePoint(k1v1k2v2, t2, t2, 66)), + histogramMetric(histogram1, histogramPoint(k1v1k2v2, t2, t2, bounds0, []uint64{6, 3, 4, 8})), + summaryMetric(summary1, summaryPoint(k1v1k2v2, t2, t2, 15, 70, percent0, []float64{7, 44, 9})), + ), + adjusted: metrics( + gaugeMetric(gauge1, doublePoint(k1v1k2v2, t2, t2, 66)), + sumMetric(sum1, doublePoint(k1v1k2v2, t1, t2, 66)), + histogramMetric(histogram1, histogramPoint(k1v1k2v2, t1, t2, bounds0, []uint64{6, 3, 4, 8})), + summaryMetric(summary1, summaryPoint(k1v1k2v2, t1, t2, 15, 70, percent0, []float64{7, 44, 9})), + ), + }, + { + description: "MultiMetrics: round 3 - combined round 3 of individual metrics", + metrics: metrics( + gaugeMetric(gauge1, doublePoint(k1v1k2v2, t3, t3, 55)), + sumMetric(sum1, doublePoint(k1v1k2v2, t3, t3, 55)), + histogramMetric(histogram1, histogramPoint(k1v1k2v2, t3, t3, bounds0, []uint64{5, 3, 2, 7})), + summaryMetric(summary1, summaryPoint(k1v1k2v2, t3, t3, 12, 66, percent0, []float64{3, 22, 5})), + ), + adjusted: metrics( + gaugeMetric(gauge1, doublePoint(k1v1k2v2, t3, t3, 55)), + sumMetric(sum1, doublePoint(k1v1k2v2, t3, t3, 55)), + histogramMetric(histogram1, histogramPoint(k1v1k2v2, t3, t3, bounds0, []uint64{5, 3, 2, 7})), + summaryMetric(summary1, summaryPoint(k1v1k2v2, t3, t3, 12, 66, percent0, []float64{3, 22, 5})), + ), + }, + { + description: "MultiMetrics: round 4 - combined round 4 of individual metrics", + metrics: metrics( + sumMetric(sum1, doublePoint(k1v1k2v2, t4, t4, 72)), + histogramMetric(histogram1, histogramPoint(k1v1k2v2, t4, t4, bounds0, []uint64{7, 4, 2, 12})), + summaryMetric(summary1, summaryPoint(k1v1k2v2, t4, t4, 14, 96, percent0, []float64{9, 47, 8})), + ), + adjusted: metrics( + sumMetric(sum1, doublePoint(k1v1k2v2, t3, t4, 72)), + histogramMetric(histogram1, histogramPoint(k1v1k2v2, t3, t4, bounds0, []uint64{7, 4, 2, 12})), + summaryMetric(summary1, summaryPoint(k1v1k2v2, t3, t4, 14, 96, percent0, []float64{9, 47, 8})), + ), + }, + } + runScript(t, NewInitialPointAdjuster(zap.NewNop(), time.Minute, true), "job", "0", script) +} + +func TestNewDataPointsAdded(t *testing.T) { + script := []*metricsAdjusterTest{ + { + description: "New Datapoints: round 1 - two datapoints each", + metrics: metrics( + sumMetric(sum1, + doublePoint(k1v1k2v2, t1, t1, 44), + doublePoint(k1v100k2v200, t1, t1, 44)), + histogramMetric(histogram1, + histogramPoint(k1v1k2v2, t1, t1, bounds0, []uint64{4, 2, 3, 7}), + histogramPoint(k1v100k2v200, t1, t1, bounds0, []uint64{4, 2, 3, 7})), + summaryMetric(summary1, + summaryPoint(k1v1k2v2, t1, t1, 10, 40, percent0, []float64{1, 5, 8}), + summaryPoint(k1v100k2v200, t1, t1, 10, 40, percent0, []float64{1, 5, 8})), + ), + adjusted: metrics( + sumMetric(sum1, + doublePoint(k1v1k2v2, t1, t1, 44), + doublePoint(k1v100k2v200, t1, t1, 44)), + histogramMetric(histogram1, + histogramPoint(k1v1k2v2, t1, t1, bounds0, []uint64{4, 2, 3, 7}), + histogramPoint(k1v100k2v200, t1, t1, bounds0, []uint64{4, 2, 3, 7})), + summaryMetric(summary1, + summaryPoint(k1v1k2v2, t1, t1, 10, 40, percent0, []float64{1, 5, 8}), + summaryPoint(k1v100k2v200, t1, t1, 10, 40, percent0, []float64{1, 5, 8})), + ), + }, + { + description: "New Datapoints: round 2 - new datapoints unchanged, old datapoints adjusted", + metrics: metrics( + sumMetric(sum1, + doublePoint(k1v1k2v2, t2, t2, 44), + doublePoint(k1v10k2v20, t2, t2, 44), + doublePoint(k1v100k2v200, t2, t2, 44)), + histogramMetric(histogram1, + histogramPoint(k1v1k2v2, t2, t2, bounds0, []uint64{4, 2, 3, 7}), + histogramPoint(k1v10k2v20, t2, t2, bounds0, []uint64{4, 2, 3, 7}), + histogramPoint(k1v100k2v200, t2, t2, bounds0, []uint64{4, 2, 3, 7})), + summaryMetric(summary1, + summaryPoint(k1v1k2v2, t2, t2, 10, 40, percent0, []float64{1, 5, 8}), + summaryPoint(k1v10k2v20, t2, t2, 10, 40, percent0, []float64{1, 5, 8}), + summaryPoint(k1v100k2v200, t2, t2, 10, 40, percent0, []float64{1, 5, 8})), + ), + adjusted: metrics( + sumMetric(sum1, + doublePoint(k1v1k2v2, t1, t2, 44), + doublePoint(k1v10k2v20, t2, t2, 44), + doublePoint(k1v100k2v200, t1, t2, 44)), + histogramMetric(histogram1, + histogramPoint(k1v1k2v2, t1, t2, bounds0, []uint64{4, 2, 3, 7}), + histogramPoint(k1v10k2v20, t2, t2, bounds0, []uint64{4, 2, 3, 7}), + histogramPoint(k1v100k2v200, t1, t2, bounds0, []uint64{4, 2, 3, 7})), + summaryMetric(summary1, + summaryPoint(k1v1k2v2, t1, t2, 10, 40, percent0, []float64{1, 5, 8}), + summaryPoint(k1v10k2v20, t2, t2, 10, 40, percent0, []float64{1, 5, 8}), + summaryPoint(k1v100k2v200, t1, t2, 10, 40, percent0, []float64{1, 5, 8})), + ), + }, + } + runScript(t, NewInitialPointAdjuster(zap.NewNop(), time.Minute, true), "job", "0", script) +} + +func TestMultiTimeseries(t *testing.T) { + script := []*metricsAdjusterTest{ + { + description: "MultiTimeseries: round 1 - initial first instance, start time is established", + metrics: metrics(sumMetric(sum1, doublePoint(k1v1k2v2, t1, t1, 44))), + adjusted: metrics(sumMetric(sum1, doublePoint(k1v1k2v2, t1, t1, 44))), + }, + { + description: "MultiTimeseries: round 2 - first instance adjusted based on round 1, initial second instance", + metrics: metrics( + sumMetric(sum1, doublePoint(k1v1k2v2, t2, t2, 66)), + sumMetric(sum1, doublePoint(k1v10k2v20, t2, t2, 20.0)), + ), + adjusted: metrics( + sumMetric(sum1, doublePoint(k1v1k2v2, t1, t2, 66)), + sumMetric(sum1, doublePoint(k1v10k2v20, t2, t2, 20.0)), + ), + }, + { + description: "MultiTimeseries: round 3 - first instance adjusted based on round 1, second based on round 2", + metrics: metrics( + sumMetric(sum1, doublePoint(k1v1k2v2, t3, t3, 88.0)), + sumMetric(sum1, doublePoint(k1v10k2v20, t3, t3, 49.0)), + ), + adjusted: metrics( + sumMetric(sum1, doublePoint(k1v1k2v2, t1, t3, 88.0)), + sumMetric(sum1, doublePoint(k1v10k2v20, t2, t3, 49.0)), + ), + }, + { + description: "MultiTimeseries: round 4 - first instance reset, second instance adjusted based on round 2, initial third instance", + metrics: metrics( + sumMetric(sum1, doublePoint(k1v1k2v2, t4, t4, 87.0)), + sumMetric(sum1, doublePoint(k1v10k2v20, t4, t4, 57.0)), + sumMetric(sum1, doublePoint(k1v100k2v200, t4, t4, 10.0)), + ), + adjusted: metrics( + sumMetric(sum1, doublePoint(k1v1k2v2, t4, t4, 87.0)), + sumMetric(sum1, doublePoint(k1v10k2v20, t2, t4, 57.0)), + sumMetric(sum1, doublePoint(k1v100k2v200, t4, t4, 10.0)), + ), + }, + { + description: "MultiTimeseries: round 5 - first instance adjusted based on round 4, second on round 2, third on round 4", + metrics: metrics( + sumMetric(sum1, doublePoint(k1v1k2v2, t5, t5, 90.0)), + sumMetric(sum1, doublePoint(k1v10k2v20, t5, t5, 65.0)), + sumMetric(sum1, doublePoint(k1v100k2v200, t5, t5, 22.0)), + ), + adjusted: metrics( + sumMetric(sum1, doublePoint(k1v1k2v2, t4, t5, 90.0)), + sumMetric(sum1, doublePoint(k1v10k2v20, t2, t5, 65.0)), + sumMetric(sum1, doublePoint(k1v100k2v200, t4, t5, 22.0)), + ), + }, + } + runScript(t, NewInitialPointAdjuster(zap.NewNop(), time.Minute, true), "job", "0", script) +} + +func TestEmptyLabels(t *testing.T) { + script := []*metricsAdjusterTest{ + { + description: "EmptyLabels: round 1 - initial instance, implicitly empty labels, start time is established", + metrics: metrics(sumMetric(sum1, doublePoint(emptyLabels, t1, t1, 44))), + adjusted: metrics(sumMetric(sum1, doublePoint(emptyLabels, t1, t1, 44))), + }, + { + description: "EmptyLabels: round 2 - instance adjusted based on round 1", + metrics: metrics(sumMetric(sum1, doublePoint(emptyLabels, t2, t2, 66))), + adjusted: metrics(sumMetric(sum1, doublePoint(emptyLabels, t1, t2, 66))), + }, + { + description: "EmptyLabels: round 3 - one explicitly empty label, instance adjusted based on round 1", + metrics: metrics(sumMetric(sum1, doublePoint(k1vEmpty, t3, t3, 77))), + adjusted: metrics(sumMetric(sum1, doublePoint(k1vEmpty, t1, t3, 77))), + }, + { + description: "EmptyLabels: round 4 - three explicitly empty labels, instance adjusted based on round 1", + metrics: metrics(sumMetric(sum1, doublePoint(k1vEmptyk2vEmptyk3vEmpty, t3, t3, 88))), + adjusted: metrics(sumMetric(sum1, doublePoint(k1vEmptyk2vEmptyk3vEmpty, t1, t3, 88))), + }, + } + runScript(t, NewInitialPointAdjuster(zap.NewNop(), time.Minute, true), "job", "0", script) +} + +func TestTsGC(t *testing.T) { + script1 := []*metricsAdjusterTest{ + { + description: "TsGC: round 1 - initial instances, start time is established", + metrics: metrics( + sumMetric(sum1, doublePoint(k1v1k2v2, t1, t1, 44)), + sumMetric(sum1, doublePoint(k1v10k2v20, t1, t1, 20)), + histogramMetric(histogram1, histogramPoint(k1v1k2v2, t1, t1, bounds0, []uint64{4, 2, 3, 7})), + histogramMetric(histogram1, histogramPoint(k1v10k2v20, t1, t1, bounds0, []uint64{40, 20, 30, 70})), + ), + adjusted: metrics( + sumMetric(sum1, doublePoint(k1v1k2v2, t1, t1, 44)), + sumMetric(sum1, doublePoint(k1v10k2v20, t1, t1, 20)), + histogramMetric(histogram1, histogramPoint(k1v1k2v2, t1, t1, bounds0, []uint64{4, 2, 3, 7})), + histogramMetric(histogram1, histogramPoint(k1v10k2v20, t1, t1, bounds0, []uint64{40, 20, 30, 70})), + ), + }, + } + + script2 := []*metricsAdjusterTest{ + { + description: "TsGC: round 2 - metrics first timeseries adjusted based on round 2, second timeseries not updated", + metrics: metrics( + sumMetric(sum1, doublePoint(k1v1k2v2, t2, t2, 88)), + histogramMetric(histogram1, histogramPoint(k1v1k2v2, t2, t2, bounds0, []uint64{8, 7, 9, 14})), + ), + adjusted: metrics( + sumMetric(sum1, doublePoint(k1v1k2v2, t1, t2, 88)), + histogramMetric(histogram1, histogramPoint(k1v1k2v2, t1, t2, bounds0, []uint64{8, 7, 9, 14})), + ), + }, + } + + script3 := []*metricsAdjusterTest{ + { + description: "TsGC: round 3 - metrics first timeseries adjusted based on round 2, second timeseries empty due to timeseries gc()", + metrics: metrics( + sumMetric(sum1, doublePoint(k1v1k2v2, t3, t3, 99)), + sumMetric(sum1, doublePoint(k1v10k2v20, t3, t3, 80)), + histogramMetric(histogram1, histogramPoint(k1v1k2v2, t3, t3, bounds0, []uint64{9, 8, 10, 15})), + histogramMetric(histogram1, histogramPoint(k1v10k2v20, t3, t3, bounds0, []uint64{55, 66, 33, 77})), + ), + adjusted: metrics( + sumMetric(sum1, doublePoint(k1v1k2v2, t1, t3, 99)), + sumMetric(sum1, doublePoint(k1v10k2v20, t3, t3, 80)), + histogramMetric(histogram1, histogramPoint(k1v1k2v2, t1, t3, bounds0, []uint64{9, 8, 10, 15})), + histogramMetric(histogram1, histogramPoint(k1v10k2v20, t3, t3, bounds0, []uint64{55, 66, 33, 77})), + ), + }, + } + + ma := NewInitialPointAdjuster(zap.NewNop(), time.Minute, true) + + // run round 1 + runScript(t, ma, "job", "0", script1) + // gc the tsmap, unmarking all entries + ma.(*initialPointAdjuster).jobsMap.get("job", "0").gc() + // run round 2 - update metrics first timeseries only + runScript(t, ma, "job", "0", script2) + // gc the tsmap, collecting umarked entries + ma.(*initialPointAdjuster).jobsMap.get("job", "0").gc() + // run round 3 - verify that metrics second timeseries have been gc'd + runScript(t, ma, "job", "0", script3) +} + +func TestJobGC(t *testing.T) { + job1Script1 := []*metricsAdjusterTest{ + { + description: "JobGC: job 1, round 1 - initial instances, adjusted should be empty", + metrics: metrics( + sumMetric(sum1, doublePoint(k1v1k2v2, t1, t1, 44)), + sumMetric(sum1, doublePoint(k1v10k2v20, t1, t1, 20)), + histogramMetric(histogram1, histogramPoint(k1v1k2v2, t1, t1, bounds0, []uint64{4, 2, 3, 7})), + histogramMetric(histogram1, histogramPoint(k1v10k2v20, t1, t1, bounds0, []uint64{40, 20, 30, 70})), + ), + adjusted: metrics( + sumMetric(sum1, doublePoint(k1v1k2v2, t1, t1, 44)), + sumMetric(sum1, doublePoint(k1v10k2v20, t1, t1, 20)), + histogramMetric(histogram1, histogramPoint(k1v1k2v2, t1, t1, bounds0, []uint64{4, 2, 3, 7})), + histogramMetric(histogram1, histogramPoint(k1v10k2v20, t1, t1, bounds0, []uint64{40, 20, 30, 70})), + ), + }, + } + + job2Script1 := []*metricsAdjusterTest{ + { + description: "JobGC: job2, round 1 - no metrics adjusted, just trigger gc", + metrics: metrics(), + adjusted: metrics(), + }, + } + + job1Script2 := []*metricsAdjusterTest{ + { + description: "JobGC: job 1, round 2 - metrics timeseries empty due to job-level gc", + metrics: metrics( + sumMetric(sum1, doublePoint(k1v1k2v2, t4, t4, 99)), + sumMetric(sum1, doublePoint(k1v10k2v20, t4, t4, 80)), + histogramMetric(histogram1, histogramPoint(k1v1k2v2, t4, t4, bounds0, []uint64{9, 8, 10, 15})), + histogramMetric(histogram1, histogramPoint(k1v10k2v20, t4, t4, bounds0, []uint64{55, 66, 33, 77})), + ), + adjusted: metrics( + sumMetric(sum1, doublePoint(k1v1k2v2, t4, t4, 99)), + sumMetric(sum1, doublePoint(k1v10k2v20, t4, t4, 80)), + histogramMetric(histogram1, histogramPoint(k1v1k2v2, t4, t4, bounds0, []uint64{9, 8, 10, 15})), + histogramMetric(histogram1, histogramPoint(k1v10k2v20, t4, t4, bounds0, []uint64{55, 66, 33, 77})), + ), + }, + } + + gcInterval := 10 * time.Millisecond + ma := NewInitialPointAdjuster(zap.NewNop(), gcInterval, true) + + // run job 1, round 1 - all entries marked + runScript(t, ma, "job1", "0", job1Script1) + // sleep longer than gcInterval to enable job gc in the next run + time.Sleep(2 * gcInterval) + // run job 2, round1 - trigger job gc, unmarking all entries + runScript(t, ma, "job1", "1", job2Script1) + // sleep longer than gcInterval to enable job gc in the next run + time.Sleep(2 * gcInterval) + // re-run job 2, round1 - trigger job gc, removing unmarked entries + runScript(t, ma, "job1", "1", job2Script1) + // ensure that at least one jobsMap.gc() completed + ma.(*initialPointAdjuster).jobsMap.gc() + // run job 1, round 2 - verify that all job 1 timeseries have been gc'd + runScript(t, ma, "job1", "0", job1Script2) +} + +type metricsAdjusterTest struct { + description string + metrics pmetric.Metrics + adjusted pmetric.Metrics +} + +func runScript(t *testing.T, ma MetricsAdjuster, job, instance string, tests []*metricsAdjusterTest) { + for _, test := range tests { + t.Run(test.description, func(t *testing.T) { + adjusted := pmetric.NewMetrics() + test.metrics.CopyTo(adjusted) + // Add the instance/job to the input metrics. + adjusted.ResourceMetrics().At(0).Resource().Attributes().PutStr(semconv.AttributeServiceInstanceID, instance) + adjusted.ResourceMetrics().At(0).Resource().Attributes().PutStr(semconv.AttributeServiceName, job) + assert.NoError(t, ma.AdjustMetrics(adjusted)) + + // Add the instance/job to the expected metrics as well. + test.adjusted.ResourceMetrics().At(0).Resource().Attributes().PutStr(semconv.AttributeServiceInstanceID, instance) + test.adjusted.ResourceMetrics().At(0).Resource().Attributes().PutStr(semconv.AttributeServiceName, job) + assert.EqualValues(t, test.adjusted, adjusted) + }) + } +} + +func BenchmarkGetAttributesSignature(b *testing.B) { + attrs := pcommon.NewMap() + attrs.PutStr("key1", "some-random-test-value-1") + attrs.PutStr("key2", "some-random-test-value-2") + attrs.PutStr("key6", "some-random-test-value-6") + attrs.PutStr("key3", "some-random-test-value-3") + attrs.PutStr("key4", "some-random-test-value-4") + attrs.PutStr("key5", "some-random-test-value-5") + attrs.PutStr("key7", "some-random-test-value-7") + attrs.PutStr("key8", "some-random-test-value-8") + + b.ResetTimer() + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + getAttributesSignature(attrs) + } +} diff --git a/pkg/promotel/prometheusreceiver/internal/metricsutil_test.go b/pkg/promotel/prometheusreceiver/internal/metricsutil_test.go new file mode 100644 index 000000000..7582a42ed --- /dev/null +++ b/pkg/promotel/prometheusreceiver/internal/metricsutil_test.go @@ -0,0 +1,273 @@ + +package internal + +import ( + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" +) + +type kv struct { + Key, Value string +} + +func metrics(metrics ...pmetric.Metric) pmetric.Metrics { + md := pmetric.NewMetrics() + ms := md.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + for _, metric := range metrics { + destMetric := ms.AppendEmpty() + metric.CopyTo(destMetric) + } + + return md +} + +func histogramPointRaw(attributes []*kv, startTimestamp, timestamp pcommon.Timestamp) pmetric.HistogramDataPoint { + hdp := pmetric.NewHistogramDataPoint() + hdp.SetStartTimestamp(startTimestamp) + hdp.SetTimestamp(timestamp) + + attrs := hdp.Attributes() + for _, kv := range attributes { + attrs.PutStr(kv.Key, kv.Value) + } + + return hdp +} + +func histogramPoint(attributes []*kv, startTimestamp, timestamp pcommon.Timestamp, bounds []float64, counts []uint64) pmetric.HistogramDataPoint { + hdp := histogramPointRaw(attributes, startTimestamp, timestamp) + hdp.ExplicitBounds().FromRaw(bounds) + hdp.BucketCounts().FromRaw(counts) + + var sum float64 + var count uint64 + for i, bcount := range counts { + count += bcount + if i > 0 { + sum += float64(bcount) * bounds[i-1] + } + } + hdp.SetCount(count) + hdp.SetSum(sum) + + return hdp +} + +func histogramPointNoValue(attributes []*kv, startTimestamp, timestamp pcommon.Timestamp) pmetric.HistogramDataPoint { + hdp := histogramPointRaw(attributes, startTimestamp, timestamp) + hdp.SetFlags(pmetric.DefaultDataPointFlags.WithNoRecordedValue(true)) + + return hdp +} + +func histogramMetric(name string, points ...pmetric.HistogramDataPoint) pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName(name) + metric.Metadata().PutStr("prometheus.type", "histogram") + histogram := metric.SetEmptyHistogram() + histogram.SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + + destPointL := histogram.DataPoints() + // By default the AggregationTemporality is Cumulative until it'll be changed by the caller. + for _, point := range points { + destPoint := destPointL.AppendEmpty() + point.CopyTo(destPoint) + } + + return metric +} + +func exponentialHistogramMetric(name string, points ...pmetric.ExponentialHistogramDataPoint) pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName(name) + metric.Metadata().PutStr("prometheus.type", "histogram") + histogram := metric.SetEmptyExponentialHistogram() + histogram.SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + + destPointL := histogram.DataPoints() + // By default the AggregationTemporality is Cumulative until it'll be changed by the caller. + for _, point := range points { + destPoint := destPointL.AppendEmpty() + point.CopyTo(destPoint) + } + + return metric +} + +func exponentialHistogramPointRaw(attributes []*kv, startTimestamp, timestamp pcommon.Timestamp) pmetric.ExponentialHistogramDataPoint { + hdp := pmetric.NewExponentialHistogramDataPoint() + hdp.SetStartTimestamp(startTimestamp) + hdp.SetTimestamp(timestamp) + + attrs := hdp.Attributes() + for _, kv := range attributes { + attrs.PutStr(kv.Key, kv.Value) + } + + return hdp +} + +func exponentialHistogramPoint(attributes []*kv, startTimestamp, timestamp pcommon.Timestamp, scale int32, zeroCount uint64, negativeOffset int32, negativeBuckets []uint64, positiveOffset int32, positiveBuckets []uint64) pmetric.ExponentialHistogramDataPoint { + hdp := exponentialHistogramPointRaw(attributes, startTimestamp, timestamp) + hdp.SetScale(scale) + hdp.SetZeroCount(zeroCount) + hdp.Negative().SetOffset(negativeOffset) + hdp.Negative().BucketCounts().FromRaw(negativeBuckets) + hdp.Positive().SetOffset(positiveOffset) + hdp.Positive().BucketCounts().FromRaw(positiveBuckets) + + count := uint64(0) + sum := float64(0) + for i, bCount := range positiveBuckets { + count += bCount + sum += float64(bCount) * float64(i) + } + for i, bCount := range negativeBuckets { + count += bCount + sum -= float64(bCount) * float64(i) + } + hdp.SetCount(count) + hdp.SetSum(sum) + return hdp +} + +func exponentialHistogramPointNoValue(attributes []*kv, startTimestamp, timestamp pcommon.Timestamp) pmetric.ExponentialHistogramDataPoint { + hdp := exponentialHistogramPointRaw(attributes, startTimestamp, timestamp) + hdp.SetFlags(pmetric.DefaultDataPointFlags.WithNoRecordedValue(true)) + + return hdp +} + +// exponentialHistogramPointSimplified let's you define an exponential +// histogram with just a few parameters. +// Scale and ZeroCount are set to the provided values. +// Positive and negative buckets are generated using the offset and bucketCount +// parameters by adding buckets from offset in both positive and negative +// directions. Bucket counts start from 1 and increase by 1 for each bucket. +// Sum and Count will be proportional to the bucket count. +func exponentialHistogramPointSimplified(attributes []*kv, startTimestamp, timestamp pcommon.Timestamp, scale int32, zeroCount uint64, offset int32, bucketCount int) pmetric.ExponentialHistogramDataPoint { + hdp := exponentialHistogramPointRaw(attributes, startTimestamp, timestamp) + hdp.SetScale(scale) + hdp.SetZeroCount(zeroCount) + + positive := hdp.Positive() + positive.SetOffset(offset) + positive.BucketCounts().EnsureCapacity(bucketCount) + negative := hdp.Negative() + negative.SetOffset(offset) + negative.BucketCounts().EnsureCapacity(bucketCount) + + var sum float64 + var count uint64 + for i := 0; i < bucketCount; i++ { + positive.BucketCounts().Append(uint64(i + 1)) + negative.BucketCounts().Append(uint64(i + 1)) + count += uint64(i+1) + uint64(i+1) + sum += float64(i+1)*10 + float64(i+1)*10.0 + } + hdp.SetCount(count) + hdp.SetSum(sum) + + return hdp +} + +func doublePointRaw(attributes []*kv, startTimestamp, timestamp pcommon.Timestamp) pmetric.NumberDataPoint { + ndp := pmetric.NewNumberDataPoint() + ndp.SetStartTimestamp(startTimestamp) + ndp.SetTimestamp(timestamp) + + for _, kv := range attributes { + ndp.Attributes().PutStr(kv.Key, kv.Value) + } + + return ndp +} + +func doublePoint(attributes []*kv, startTimestamp, timestamp pcommon.Timestamp, value float64) pmetric.NumberDataPoint { + ndp := doublePointRaw(attributes, startTimestamp, timestamp) + ndp.SetDoubleValue(value) + return ndp +} + +func doublePointNoValue(attributes []*kv, startTimestamp, timestamp pcommon.Timestamp) pmetric.NumberDataPoint { + ndp := doublePointRaw(attributes, startTimestamp, timestamp) + ndp.SetFlags(pmetric.DefaultDataPointFlags.WithNoRecordedValue(true)) + return ndp +} + +func gaugeMetric(name string, points ...pmetric.NumberDataPoint) pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName(name) + metric.Metadata().PutStr("prometheus.type", "gauge") + destPointL := metric.SetEmptyGauge().DataPoints() + for _, point := range points { + destPoint := destPointL.AppendEmpty() + point.CopyTo(destPoint) + } + + return metric +} + +func sumMetric(name string, points ...pmetric.NumberDataPoint) pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName(name) + metric.Metadata().PutStr("prometheus.type", "counter") + sum := metric.SetEmptySum() + sum.SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + sum.SetIsMonotonic(true) + + destPointL := sum.DataPoints() + for _, point := range points { + destPoint := destPointL.AppendEmpty() + point.CopyTo(destPoint) + } + + return metric +} + +func summaryPointRaw(attributes []*kv, startTimestamp, timestamp pcommon.Timestamp) pmetric.SummaryDataPoint { + sdp := pmetric.NewSummaryDataPoint() + sdp.SetStartTimestamp(startTimestamp) + sdp.SetTimestamp(timestamp) + + for _, kv := range attributes { + sdp.Attributes().PutStr(kv.Key, kv.Value) + } + + return sdp +} + +func summaryPoint(attributes []*kv, startTimestamp, timestamp pcommon.Timestamp, count uint64, sum float64, quantiles, values []float64) pmetric.SummaryDataPoint { + sdp := summaryPointRaw(attributes, startTimestamp, timestamp) + sdp.SetCount(count) + sdp.SetSum(sum) + + qvL := sdp.QuantileValues() + for i := 0; i < len(quantiles); i++ { + qvi := qvL.AppendEmpty() + qvi.SetQuantile(quantiles[i]) + qvi.SetValue(values[i]) + } + + return sdp +} + +func summaryPointNoValue(attributes []*kv, startTimestamp, timestamp pcommon.Timestamp) pmetric.SummaryDataPoint { + sdp := summaryPointRaw(attributes, startTimestamp, timestamp) + sdp.SetFlags(pmetric.DefaultDataPointFlags.WithNoRecordedValue(true)) + + return sdp +} + +func summaryMetric(name string, points ...pmetric.SummaryDataPoint) pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName(name) + metric.Metadata().PutStr("prometheus.type", "summary") + destPointL := metric.SetEmptySummary().DataPoints() + for _, point := range points { + destPoint := destPointL.AppendEmpty() + point.CopyTo(destPoint) + } + + return metric +} diff --git a/pkg/promotel/prometheusreceiver/internal/prom_to_otlp.go b/pkg/promotel/prometheusreceiver/internal/prom_to_otlp.go new file mode 100644 index 000000000..453d2fa0b --- /dev/null +++ b/pkg/promotel/prometheusreceiver/internal/prom_to_otlp.go @@ -0,0 +1,110 @@ + +package internal + +import ( + "net" + + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/model/labels" + "go.opentelemetry.io/collector/featuregate" + "go.opentelemetry.io/collector/pdata/pcommon" + conventions "go.opentelemetry.io/collector/semconv/v1.25.0" + oldconventions "go.opentelemetry.io/collector/semconv/v1.6.1" +) + +const removeOldSemconvFeatureGateID = "receiver.prometheusreceiver.RemoveLegacyResourceAttributes" + +var removeOldSemconvFeatureGate = featuregate.GlobalRegistry().MustRegister( + removeOldSemconvFeatureGateID, + featuregate.StageAlpha, + featuregate.WithRegisterFromVersion("v0.101.0"), + featuregate.WithRegisterDescription("When enabled, the net.host.name, net.host.port, and http.scheme resource attributes are no longer added to metrics. Use server.address, server.port, and url.scheme instead."), + featuregate.WithRegisterReferenceURL("https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/32814"), +) + +// isDiscernibleHost checks if a host can be used as a value for the 'host.name' key. +// localhost-like hosts and unspecified (0.0.0.0) hosts are not discernible. +func isDiscernibleHost(host string) bool { + ip := net.ParseIP(host) + if ip != nil { + // An IP is discernible if + // - it's not local (e.g. belongs to 127.0.0.0/8 or ::1/128) and + // - it's not unspecified (e.g. the 0.0.0.0 address). + return !ip.IsLoopback() && !ip.IsUnspecified() + } + + if host == "localhost" { + return false + } + + // not an IP, not 'localhost', assume it is discernible. + return true +} + +// CreateResource creates the resource data added to OTLP payloads. +func CreateResource(job, instance string, serviceDiscoveryLabels labels.Labels) pcommon.Resource { + host, port, err := net.SplitHostPort(instance) + if err != nil { + host = instance + } + resource := pcommon.NewResource() + attrs := resource.Attributes() + attrs.PutStr(conventions.AttributeServiceName, job) + if isDiscernibleHost(host) { + if !removeOldSemconvFeatureGate.IsEnabled() { + attrs.PutStr(oldconventions.AttributeNetHostName, host) + } + attrs.PutStr(conventions.AttributeServerAddress, host) + } + attrs.PutStr(conventions.AttributeServiceInstanceID, instance) + if !removeOldSemconvFeatureGate.IsEnabled() { + attrs.PutStr(conventions.AttributeNetHostPort, port) + attrs.PutStr(conventions.AttributeHTTPScheme, serviceDiscoveryLabels.Get(model.SchemeLabel)) + } + attrs.PutStr(conventions.AttributeServerPort, port) + attrs.PutStr(conventions.AttributeURLScheme, serviceDiscoveryLabels.Get(model.SchemeLabel)) + + addKubernetesResource(attrs, serviceDiscoveryLabels) + + return resource +} + +// kubernetesDiscoveryToResourceAttributes maps from metadata labels discovered +// through the kubernetes implementation of service discovery to opentelemetry +// resource attribute keys. +var kubernetesDiscoveryToResourceAttributes = map[string]string{ + "__meta_kubernetes_pod_name": conventions.AttributeK8SPodName, + "__meta_kubernetes_pod_uid": conventions.AttributeK8SPodUID, + "__meta_kubernetes_pod_container_name": conventions.AttributeK8SContainerName, + "__meta_kubernetes_namespace": conventions.AttributeK8SNamespaceName, + // Only one of the node name service discovery labels will be present + "__meta_kubernetes_pod_node_name": conventions.AttributeK8SNodeName, + "__meta_kubernetes_node_name": conventions.AttributeK8SNodeName, + "__meta_kubernetes_endpoint_node_name": conventions.AttributeK8SNodeName, +} + +// addKubernetesResource adds resource information detected by prometheus' +// kubernetes service discovery. +func addKubernetesResource(attrs pcommon.Map, serviceDiscoveryLabels labels.Labels) { + for sdKey, attributeKey := range kubernetesDiscoveryToResourceAttributes { + if attr := serviceDiscoveryLabels.Get(sdKey); attr != "" { + attrs.PutStr(attributeKey, attr) + } + } + controllerName := serviceDiscoveryLabels.Get("__meta_kubernetes_pod_controller_name") + controllerKind := serviceDiscoveryLabels.Get("__meta_kubernetes_pod_controller_kind") + if controllerKind != "" && controllerName != "" { + switch controllerKind { + case "ReplicaSet": + attrs.PutStr(conventions.AttributeK8SReplicaSetName, controllerName) + case "DaemonSet": + attrs.PutStr(conventions.AttributeK8SDaemonSetName, controllerName) + case "StatefulSet": + attrs.PutStr(conventions.AttributeK8SStatefulSetName, controllerName) + case "Job": + attrs.PutStr(conventions.AttributeK8SJobName, controllerName) + case "CronJob": + attrs.PutStr(conventions.AttributeK8SCronJobName, controllerName) + } + } +} diff --git a/pkg/promotel/prometheusreceiver/internal/prom_to_otlp_test.go b/pkg/promotel/prometheusreceiver/internal/prom_to_otlp_test.go new file mode 100644 index 000000000..7a2fce71f --- /dev/null +++ b/pkg/promotel/prometheusreceiver/internal/prom_to_otlp_test.go @@ -0,0 +1,369 @@ +package internal + +import ( + "testing" + + "github.com/prometheus/prometheus/model/labels" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/featuregate" + "go.opentelemetry.io/collector/pdata/pcommon" + conventions "go.opentelemetry.io/collector/semconv/v1.27.0" +) + +type jobInstanceDefinition struct { + job, instance, host, scheme, port string +} + +type k8sResourceDefinition struct { + podName, podUID, container, node, rs, ds, ss, job, cronjob, ns string +} + +func makeK8sResource(jobInstance *jobInstanceDefinition, def *k8sResourceDefinition) pcommon.Resource { + resource := makeResourceWithJobInstanceScheme(jobInstance, true) + attrs := resource.Attributes() + if def.podName != "" { + attrs.PutStr(conventions.AttributeK8SPodName, def.podName) + } + if def.podUID != "" { + attrs.PutStr(conventions.AttributeK8SPodUID, def.podUID) + } + if def.container != "" { + attrs.PutStr(conventions.AttributeK8SContainerName, def.container) + } + if def.node != "" { + attrs.PutStr(conventions.AttributeK8SNodeName, def.node) + } + if def.rs != "" { + attrs.PutStr(conventions.AttributeK8SReplicaSetName, def.rs) + } + if def.ds != "" { + attrs.PutStr(conventions.AttributeK8SDaemonSetName, def.ds) + } + if def.ss != "" { + attrs.PutStr(conventions.AttributeK8SStatefulSetName, def.ss) + } + if def.job != "" { + attrs.PutStr(conventions.AttributeK8SJobName, def.job) + } + if def.cronjob != "" { + attrs.PutStr(conventions.AttributeK8SCronJobName, def.cronjob) + } + if def.ns != "" { + attrs.PutStr(conventions.AttributeK8SNamespaceName, def.ns) + } + return resource +} + +func makeResourceWithJobInstanceScheme(def *jobInstanceDefinition, hasHost bool) pcommon.Resource { + resource := pcommon.NewResource() + attrs := resource.Attributes() + // Using hardcoded values to assert on outward expectations so that + // when variables change, these tests will fail and we'll have reports. + attrs.PutStr("service.name", def.job) + if hasHost { + attrs.PutStr("server.address", def.host) + } + attrs.PutStr("service.instance.id", def.instance) + attrs.PutStr("server.port", def.port) + attrs.PutStr("url.scheme", def.scheme) + return resource +} + +func makeResourceWithJobInstanceSchemeDuplicate(def *jobInstanceDefinition, hasHost bool) pcommon.Resource { + resource := pcommon.NewResource() + attrs := resource.Attributes() + // Using hardcoded values to assert on outward expectations so that + // when variables change, these tests will fail and we'll have reports. + attrs.PutStr("service.name", def.job) + if hasHost { + attrs.PutStr("net.host.name", def.host) + attrs.PutStr("server.address", def.host) + } + attrs.PutStr("service.instance.id", def.instance) + attrs.PutStr("net.host.port", def.port) + attrs.PutStr("http.scheme", def.scheme) + attrs.PutStr("server.port", def.port) + attrs.PutStr("url.scheme", def.scheme) + return resource +} + +func TestCreateNodeAndResourcePromToOTLP(t *testing.T) { + tests := []struct { + name, job string + instance string + sdLabels labels.Labels + removeOldSemconvFeatureGate bool + want pcommon.Resource + }{ + { + name: "all attributes proper", + job: "job", instance: "hostname:8888", sdLabels: labels.New(labels.Label{Name: "__scheme__", Value: "http"}), + removeOldSemconvFeatureGate: true, + want: makeResourceWithJobInstanceScheme(&jobInstanceDefinition{ + "job", "hostname:8888", "hostname", "http", "8888", + }, true), + }, + { + name: "missing port", + job: "job", instance: "myinstance", sdLabels: labels.New(labels.Label{Name: "__scheme__", Value: "https"}), + removeOldSemconvFeatureGate: true, + want: makeResourceWithJobInstanceScheme(&jobInstanceDefinition{ + "job", "myinstance", "myinstance", "https", "", + }, true), + }, + { + name: "blank scheme", + job: "job", instance: "myinstance:443", sdLabels: labels.New(labels.Label{Name: "__scheme__", Value: ""}), + removeOldSemconvFeatureGate: true, + want: makeResourceWithJobInstanceScheme(&jobInstanceDefinition{ + "job", "myinstance:443", "myinstance", "", "443", + }, true), + }, + { + name: "blank instance, blank scheme", + job: "job", instance: "", sdLabels: labels.New(labels.Label{Name: "__scheme__", Value: ""}), + removeOldSemconvFeatureGate: true, + want: makeResourceWithJobInstanceScheme(&jobInstanceDefinition{ + "job", "", "", "", "", + }, true), + }, + { + name: "blank instance, non-blank scheme", + job: "job", instance: "", sdLabels: labels.New(labels.Label{Name: "__scheme__", Value: "http"}), + removeOldSemconvFeatureGate: true, + want: makeResourceWithJobInstanceScheme(&jobInstanceDefinition{ + "job", "", "", "http", "", + }, true), + }, + { + name: "0.0.0.0 address", + job: "job", instance: "0.0.0.0:8888", sdLabels: labels.New(labels.Label{Name: "__scheme__", Value: "http"}), + removeOldSemconvFeatureGate: true, + want: makeResourceWithJobInstanceScheme(&jobInstanceDefinition{ + "job", "0.0.0.0:8888", "", "http", "8888", + }, false), + }, + { + name: "localhost", + job: "job", instance: "localhost:8888", sdLabels: labels.New(labels.Label{Name: "__scheme__", Value: "http"}), + removeOldSemconvFeatureGate: true, + want: makeResourceWithJobInstanceScheme(&jobInstanceDefinition{ + "job", "localhost:8888", "", "http", "8888", + }, false), + }, + { + name: "all attributes proper with duplicates", + job: "job", instance: "hostname:8888", sdLabels: labels.New(labels.Label{Name: "__scheme__", Value: "http"}), + want: makeResourceWithJobInstanceSchemeDuplicate(&jobInstanceDefinition{ + "job", "hostname:8888", "hostname", "http", "8888", + }, true), + }, + { + name: "missing port with duplicates", + job: "job", instance: "myinstance", sdLabels: labels.New(labels.Label{Name: "__scheme__", Value: "https"}), + want: makeResourceWithJobInstanceSchemeDuplicate(&jobInstanceDefinition{ + "job", "myinstance", "myinstance", "https", "", + }, true), + }, + { + name: "blank scheme with duplicates", + job: "job", instance: "myinstance:443", sdLabels: labels.New(labels.Label{Name: "__scheme__", Value: ""}), + want: makeResourceWithJobInstanceSchemeDuplicate(&jobInstanceDefinition{ + "job", "myinstance:443", "myinstance", "", "443", + }, true), + }, + { + name: "blank instance, blank scheme with duplicates", + job: "job", instance: "", sdLabels: labels.New(labels.Label{Name: "__scheme__", Value: ""}), + want: makeResourceWithJobInstanceSchemeDuplicate(&jobInstanceDefinition{ + "job", "", "", "", "", + }, true), + }, + { + name: "blank instance, non-blank scheme with duplicates", + job: "job", instance: "", sdLabels: labels.New(labels.Label{Name: "__scheme__", Value: "http"}), + want: makeResourceWithJobInstanceSchemeDuplicate(&jobInstanceDefinition{ + "job", "", "", "http", "", + }, true), + }, + { + name: "0.0.0.0 address with duplicates", + job: "job", instance: "0.0.0.0:8888", sdLabels: labels.New(labels.Label{Name: "__scheme__", Value: "http"}), + want: makeResourceWithJobInstanceSchemeDuplicate(&jobInstanceDefinition{ + "job", "0.0.0.0:8888", "", "http", "8888", + }, false), + }, + { + name: "localhost with duplicates", + job: "job", instance: "localhost:8888", sdLabels: labels.New(labels.Label{Name: "__scheme__", Value: "http"}), + want: makeResourceWithJobInstanceSchemeDuplicate(&jobInstanceDefinition{ + "job", "localhost:8888", "", "http", "8888", + }, false), + }, + { + name: "kubernetes daemonset pod", + job: "job", instance: "hostname:8888", sdLabels: labels.New( + labels.Label{Name: "__scheme__", Value: "http"}, + labels.Label{Name: "__meta_kubernetes_pod_name", Value: "my-pod-23491"}, + labels.Label{Name: "__meta_kubernetes_pod_uid", Value: "84279wretgu89dg489q2"}, + labels.Label{Name: "__meta_kubernetes_pod_container_name", Value: "my-container"}, + labels.Label{Name: "__meta_kubernetes_pod_node_name", Value: "k8s-node-123"}, + labels.Label{Name: "__meta_kubernetes_pod_controller_name", Value: "my-pod"}, + labels.Label{Name: "__meta_kubernetes_pod_controller_kind", Value: "DaemonSet"}, + labels.Label{Name: "__meta_kubernetes_namespace", Value: "kube-system"}, + ), + removeOldSemconvFeatureGate: true, + want: makeK8sResource(&jobInstanceDefinition{ + "job", "hostname:8888", "hostname", "http", "8888", + }, &k8sResourceDefinition{ + podName: "my-pod-23491", + podUID: "84279wretgu89dg489q2", + container: "my-container", + node: "k8s-node-123", + ds: "my-pod", + ns: "kube-system", + }), + }, + { + name: "kubernetes replicaset pod", + job: "job", instance: "hostname:8888", sdLabels: labels.New( + labels.Label{Name: "__scheme__", Value: "http"}, + labels.Label{Name: "__meta_kubernetes_pod_name", Value: "my-pod-23491"}, + labels.Label{Name: "__meta_kubernetes_pod_uid", Value: "84279wretgu89dg489q2"}, + labels.Label{Name: "__meta_kubernetes_pod_container_name", Value: "my-container"}, + labels.Label{Name: "__meta_kubernetes_pod_node_name", Value: "k8s-node-123"}, + labels.Label{Name: "__meta_kubernetes_pod_controller_name", Value: "my-pod"}, + labels.Label{Name: "__meta_kubernetes_pod_controller_kind", Value: "ReplicaSet"}, + labels.Label{Name: "__meta_kubernetes_namespace", Value: "kube-system"}, + ), + removeOldSemconvFeatureGate: true, + want: makeK8sResource(&jobInstanceDefinition{ + "job", "hostname:8888", "hostname", "http", "8888", + }, &k8sResourceDefinition{ + podName: "my-pod-23491", + podUID: "84279wretgu89dg489q2", + container: "my-container", + node: "k8s-node-123", + rs: "my-pod", + ns: "kube-system", + }), + }, + { + name: "kubernetes statefulset pod", + job: "job", instance: "hostname:8888", sdLabels: labels.New( + labels.Label{Name: "__scheme__", Value: "http"}, + labels.Label{Name: "__meta_kubernetes_pod_name", Value: "my-pod-23491"}, + labels.Label{Name: "__meta_kubernetes_pod_uid", Value: "84279wretgu89dg489q2"}, + labels.Label{Name: "__meta_kubernetes_pod_container_name", Value: "my-container"}, + labels.Label{Name: "__meta_kubernetes_pod_node_name", Value: "k8s-node-123"}, + labels.Label{Name: "__meta_kubernetes_pod_controller_name", Value: "my-pod"}, + labels.Label{Name: "__meta_kubernetes_pod_controller_kind", Value: "StatefulSet"}, + labels.Label{Name: "__meta_kubernetes_namespace", Value: "kube-system"}, + ), + removeOldSemconvFeatureGate: true, + want: makeK8sResource(&jobInstanceDefinition{ + "job", "hostname:8888", "hostname", "http", "8888", + }, &k8sResourceDefinition{ + podName: "my-pod-23491", + podUID: "84279wretgu89dg489q2", + container: "my-container", + node: "k8s-node-123", + ss: "my-pod", + ns: "kube-system", + }), + }, + { + name: "kubernetes job pod", + job: "job", instance: "hostname:8888", sdLabels: labels.New( + labels.Label{Name: "__scheme__", Value: "http"}, + labels.Label{Name: "__meta_kubernetes_pod_name", Value: "my-pod-23491"}, + labels.Label{Name: "__meta_kubernetes_pod_uid", Value: "84279wretgu89dg489q2"}, + labels.Label{Name: "__meta_kubernetes_pod_container_name", Value: "my-container"}, + labels.Label{Name: "__meta_kubernetes_pod_node_name", Value: "k8s-node-123"}, + labels.Label{Name: "__meta_kubernetes_pod_controller_name", Value: "my-pod"}, + labels.Label{Name: "__meta_kubernetes_pod_controller_kind", Value: "Job"}, + labels.Label{Name: "__meta_kubernetes_namespace", Value: "kube-system"}, + ), + removeOldSemconvFeatureGate: true, + want: makeK8sResource(&jobInstanceDefinition{ + "job", "hostname:8888", "hostname", "http", "8888", + }, &k8sResourceDefinition{ + podName: "my-pod-23491", + podUID: "84279wretgu89dg489q2", + container: "my-container", + node: "k8s-node-123", + job: "my-pod", + ns: "kube-system", + }), + }, + { + name: "kubernetes cronjob pod", + job: "job", instance: "hostname:8888", sdLabels: labels.New( + labels.Label{Name: "__scheme__", Value: "http"}, + labels.Label{Name: "__meta_kubernetes_pod_name", Value: "my-pod-23491"}, + labels.Label{Name: "__meta_kubernetes_pod_uid", Value: "84279wretgu89dg489q2"}, + labels.Label{Name: "__meta_kubernetes_pod_container_name", Value: "my-container"}, + labels.Label{Name: "__meta_kubernetes_pod_node_name", Value: "k8s-node-123"}, + labels.Label{Name: "__meta_kubernetes_pod_controller_name", Value: "my-pod"}, + labels.Label{Name: "__meta_kubernetes_pod_controller_kind", Value: "CronJob"}, + labels.Label{Name: "__meta_kubernetes_namespace", Value: "kube-system"}, + ), + removeOldSemconvFeatureGate: true, + want: makeK8sResource(&jobInstanceDefinition{ + "job", "hostname:8888", "hostname", "http", "8888", + }, &k8sResourceDefinition{ + podName: "my-pod-23491", + podUID: "84279wretgu89dg489q2", + container: "my-container", + node: "k8s-node-123", + cronjob: "my-pod", + ns: "kube-system", + }), + }, + { + name: "kubernetes node (e.g. kubelet)", + job: "job", instance: "hostname:8888", sdLabels: labels.New( + labels.Label{Name: "__scheme__", Value: "http"}, + labels.Label{Name: "__meta_kubernetes_node_name", Value: "k8s-node-123"}, + ), + removeOldSemconvFeatureGate: true, + want: makeK8sResource(&jobInstanceDefinition{ + "job", "hostname:8888", "hostname", "http", "8888", + }, &k8sResourceDefinition{ + node: "k8s-node-123", + }), + }, + { + name: "kubernetes service endpoint", + job: "job", instance: "hostname:8888", sdLabels: labels.New( + labels.Label{Name: "__scheme__", Value: "http"}, + labels.Label{Name: "__meta_kubernetes_endpoint_node_name", Value: "k8s-node-123"}, + ), + removeOldSemconvFeatureGate: true, + want: makeK8sResource(&jobInstanceDefinition{ + "job", "hostname:8888", "hostname", "http", "8888", + }, &k8sResourceDefinition{ + node: "k8s-node-123", + }), + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + SetFeatureGateForTest(t, removeOldSemconvFeatureGate, tt.removeOldSemconvFeatureGate) + got := CreateResource(tt.job, tt.instance, tt.sdLabels) + require.Equal(t, tt.want.Attributes().AsRaw(), got.Attributes().AsRaw()) + }) + } +} + +// Force the state of feature gate for a test +// usage: defer SetFeatureGateForTest("gateName", true)() +func SetFeatureGateForTest(t testing.TB, gate *featuregate.Gate, enabled bool) func() { + originalValue := gate.IsEnabled() + require.NoError(t, featuregate.GlobalRegistry().Set(gate.ID(), enabled)) + return func() { + require.NoError(t, featuregate.GlobalRegistry().Set(gate.ID(), originalValue)) + } +} diff --git a/pkg/promotel/prometheusreceiver/internal/staleness_end_to_end_test.go b/pkg/promotel/prometheusreceiver/internal/staleness_end_to_end_test.go new file mode 100644 index 000000000..12fa42724 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/internal/staleness_end_to_end_test.go @@ -0,0 +1,233 @@ + +package internal_test + +import ( + "context" + "fmt" + "io" + "net/http" + "net/http/httptest" + "net/url" + "os" + "strings" + "sync/atomic" + "testing" + "time" + + "github.com/gogo/protobuf/proto" + "github.com/golang/snappy" + "github.com/prometheus/prometheus/model/value" + "github.com/prometheus/prometheus/prompb" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/confmap" + "go.opentelemetry.io/collector/confmap/provider/fileprovider" + "go.opentelemetry.io/collector/exporter" + "go.opentelemetry.io/collector/otelcol" + "go.opentelemetry.io/collector/processor" + "go.opentelemetry.io/collector/processor/batchprocessor" + "go.opentelemetry.io/collector/receiver" + "go.uber.org/zap" + "go.uber.org/zap/zapcore" + + "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/prometheusremotewriteexporter" + "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver" +) + +// Test that staleness markers are emitted for timeseries that intermittently disappear. +// This test runs the entire collector and end-to-end scrapes then checks with the +// Prometheus remotewrite exporter that staleness markers are emitted per timeseries. +// See https://github.com/open-telemetry/opentelemetry-collector/issues/3413 +func TestStalenessMarkersEndToEnd(t *testing.T) { + if testing.Short() { + t.Skip("This test can take a long time") + } + + ctx, cancel := context.WithCancel(context.Background()) + + // 1. Setup the server that sends series that intermittently appear and disappear. + n := &atomic.Uint64{} + scrapeServer := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, _ *http.Request) { + // Increment the scrape count atomically per scrape. + i := n.Add(1) + + select { + case <-ctx.Done(): + return + default: + } + + // Alternate metrics per scrape so that every one of + // them will be reported as stale. + if i%2 == 0 { + fmt.Fprintf(rw, ` +# HELP jvm_memory_bytes_used Used bytes of a given JVM memory area. +# TYPE jvm_memory_bytes_used gauge +jvm_memory_bytes_used{area="heap"} %.1f`, float64(i)) + } else { + fmt.Fprintf(rw, ` +# HELP jvm_memory_pool_bytes_used Used bytes of a given JVM memory pool. +# TYPE jvm_memory_pool_bytes_used gauge +jvm_memory_pool_bytes_used{pool="CodeHeap 'non-nmethods'"} %.1f`, float64(i)) + } + })) + defer scrapeServer.Close() + + serverURL, err := url.Parse(scrapeServer.URL) + require.NoError(t, err) + + // 2. Set up the Prometheus RemoteWrite endpoint. + prweUploads := make(chan *prompb.WriteRequest) + prweServer := httptest.NewServer(http.HandlerFunc(func(_ http.ResponseWriter, req *http.Request) { + // Snappy decode the uploads. + payload, rerr := io.ReadAll(req.Body) + assert.NoError(t, rerr) + + recv := make([]byte, len(payload)) + decoded, derr := snappy.Decode(recv, payload) + assert.NoError(t, derr) + + writeReq := new(prompb.WriteRequest) + assert.NoError(t, proto.Unmarshal(decoded, writeReq)) + + select { + case <-ctx.Done(): + return + case prweUploads <- writeReq: + } + })) + defer prweServer.Close() + + // 3. Set the OpenTelemetry Prometheus receiver. + cfg := fmt.Sprintf(` +receivers: + prometheus: + config: + scrape_configs: + - job_name: 'test' + scrape_interval: 100ms + static_configs: + - targets: [%q] + +processors: + batch: +exporters: + prometheusremotewrite: + endpoint: %q + tls: + insecure: true + +service: + pipelines: + metrics: + receivers: [prometheus] + processors: [batch] + exporters: [prometheusremotewrite]`, serverURL.Host, prweServer.URL) + + confFile, err := os.CreateTemp(os.TempDir(), "conf-") + require.NoError(t, err) + defer os.Remove(confFile.Name()) + _, err = confFile.Write([]byte(cfg)) + require.NoError(t, err) + // 4. Run the OpenTelemetry Collector. + receivers, err := receiver.MakeFactoryMap(prometheusreceiver.NewFactory()) + require.NoError(t, err) + exporters, err := exporter.MakeFactoryMap(prometheusremotewriteexporter.NewFactory()) + require.NoError(t, err) + processors, err := processor.MakeFactoryMap(batchprocessor.NewFactory()) + require.NoError(t, err) + + factories := otelcol.Factories{ + Receivers: receivers, + Exporters: exporters, + Processors: processors, + } + + appSettings := otelcol.CollectorSettings{ + Factories: func() (otelcol.Factories, error) { return factories, nil }, + ConfigProviderSettings: otelcol.ConfigProviderSettings{ + ResolverSettings: confmap.ResolverSettings{ + URIs: []string{confFile.Name()}, + ProviderFactories: []confmap.ProviderFactory{fileprovider.NewFactory()}, + }, + }, + BuildInfo: component.BuildInfo{ + Command: "otelcol", + Description: "OpenTelemetry Collector", + Version: "tests", + }, + LoggingOptions: []zap.Option{ + // Turn off the verbose logging from the collector. + zap.WrapCore(func(zapcore.Core) zapcore.Core { + return zapcore.NewNopCore() + }), + }, + } + + app, err := otelcol.NewCollector(appSettings) + require.NoError(t, err) + + go func() { + assert.NoError(t, app.Run(context.Background())) + }() + defer app.Shutdown() + + // Wait until the collector has actually started. + for notYetStarted := true; notYetStarted; { + state := app.GetState() + switch state { + case otelcol.StateRunning, otelcol.StateClosed, otelcol.StateClosing: + notYetStarted = false + case otelcol.StateStarting: + } + time.Sleep(10 * time.Millisecond) + } + + // 5. Let's wait on 10 fetches. + var wReqL []*prompb.WriteRequest + for i := 0; i < 10; i++ { + wReqL = append(wReqL, <-prweUploads) + } + defer cancel() + + // 6. Assert that we encounter the stale markers aka special NaNs for the various time series. + staleMarkerCount := 0 + totalSamples := 0 + require.NotEmpty(t, wReqL, "Expecting at least one WriteRequest") + for i, wReq := range wReqL { + name := fmt.Sprintf("WriteRequest#%d", i) + require.NotEmpty(t, wReq.Timeseries, "Expecting at least 1 timeSeries for:: "+name) + for j, ts := range wReq.Timeseries { + fullName := fmt.Sprintf("%s/TimeSeries#%d", name, j) + assert.NotEmpty(t, ts.Samples, "Expected at least 1 Sample in:: "+fullName) + + // We are strictly counting series directly included in the scrapes, and no + // internal timeseries like "up" nor "scrape_seconds" etc. + metricName := "" + for _, label := range ts.Labels { + if label.Name == "__name__" { + metricName = label.Value + } + } + if !strings.HasPrefix(metricName, "jvm") { + continue + } + + for _, sample := range ts.Samples { + totalSamples++ + if value.IsStaleNaN(sample.Value) { + staleMarkerCount++ + } + } + } + } + + require.Positive(t, totalSamples, "Expected at least 1 sample") + // On every alternative scrape the prior scrape will be reported as sale. + // Expect at least: + // * The first scrape will NOT return stale markers + // * (N-1 / alternatives) = ((10-1) / 2) = ~40% chance of stale markers being emitted. + chance := float64(staleMarkerCount) / float64(totalSamples) + require.GreaterOrEqualf(t, chance, 0.4, "Expected at least one stale marker: %.3f", chance) +} diff --git a/pkg/promotel/prometheusreceiver/internal/starttimemetricadjuster.go b/pkg/promotel/prometheusreceiver/internal/starttimemetricadjuster.go new file mode 100644 index 000000000..8543da756 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/internal/starttimemetricadjuster.go @@ -0,0 +1,128 @@ + +package internal + +import ( + "errors" + "regexp" + + "go.opentelemetry.io/collector/pdata/pmetric" + "go.uber.org/zap" +) + +var ( + errNoStartTimeMetrics = errors.New("start_time metric is missing") + errNoDataPointsStartTimeMetric = errors.New("start time metric with no data points") + errUnsupportedTypeStartTimeMetric = errors.New("unsupported data type for start time metric") +) + +type startTimeMetricAdjuster struct { + startTimeMetricRegex *regexp.Regexp + logger *zap.Logger +} + +// NewStartTimeMetricAdjuster returns a new MetricsAdjuster that adjust metrics' start times based on a start time metric. +func NewStartTimeMetricAdjuster(logger *zap.Logger, startTimeMetricRegex *regexp.Regexp) MetricsAdjuster { + return &startTimeMetricAdjuster{ + startTimeMetricRegex: startTimeMetricRegex, + logger: logger, + } +} + +func (stma *startTimeMetricAdjuster) AdjustMetrics(metrics pmetric.Metrics) error { + startTime, err := stma.getStartTime(metrics) + if err != nil { + return err + } + + startTimeTs := timestampFromFloat64(startTime) + for i := 0; i < metrics.ResourceMetrics().Len(); i++ { + rm := metrics.ResourceMetrics().At(i) + for j := 0; j < rm.ScopeMetrics().Len(); j++ { + ilm := rm.ScopeMetrics().At(j) + for k := 0; k < ilm.Metrics().Len(); k++ { + metric := ilm.Metrics().At(k) + switch metric.Type() { + case pmetric.MetricTypeGauge: + continue + + case pmetric.MetricTypeSum: + dataPoints := metric.Sum().DataPoints() + for l := 0; l < dataPoints.Len(); l++ { + dp := dataPoints.At(l) + dp.SetStartTimestamp(startTimeTs) + } + + case pmetric.MetricTypeSummary: + dataPoints := metric.Summary().DataPoints() + for l := 0; l < dataPoints.Len(); l++ { + dp := dataPoints.At(l) + dp.SetStartTimestamp(startTimeTs) + } + + case pmetric.MetricTypeHistogram: + dataPoints := metric.Histogram().DataPoints() + for l := 0; l < dataPoints.Len(); l++ { + dp := dataPoints.At(l) + dp.SetStartTimestamp(startTimeTs) + } + + case pmetric.MetricTypeExponentialHistogram: + dataPoints := metric.ExponentialHistogram().DataPoints() + for l := 0; l < dataPoints.Len(); l++ { + dp := dataPoints.At(l) + dp.SetStartTimestamp(startTimeTs) + } + + case pmetric.MetricTypeEmpty: + fallthrough + + default: + stma.logger.Warn("Unknown metric type", zap.String("type", metric.Type().String())) + } + } + } + } + + return nil +} + +func (stma *startTimeMetricAdjuster) getStartTime(metrics pmetric.Metrics) (float64, error) { + for i := 0; i < metrics.ResourceMetrics().Len(); i++ { + rm := metrics.ResourceMetrics().At(i) + for j := 0; j < rm.ScopeMetrics().Len(); j++ { + ilm := rm.ScopeMetrics().At(j) + for k := 0; k < ilm.Metrics().Len(); k++ { + metric := ilm.Metrics().At(k) + if stma.matchStartTimeMetric(metric.Name()) { + switch metric.Type() { + case pmetric.MetricTypeGauge: + if metric.Gauge().DataPoints().Len() == 0 { + return 0.0, errNoDataPointsStartTimeMetric + } + return metric.Gauge().DataPoints().At(0).DoubleValue(), nil + + case pmetric.MetricTypeSum: + if metric.Sum().DataPoints().Len() == 0 { + return 0.0, errNoDataPointsStartTimeMetric + } + return metric.Sum().DataPoints().At(0).DoubleValue(), nil + + case pmetric.MetricTypeEmpty, pmetric.MetricTypeHistogram, pmetric.MetricTypeExponentialHistogram, pmetric.MetricTypeSummary: + fallthrough + default: + return 0, errUnsupportedTypeStartTimeMetric + } + } + } + } + } + return 0.0, errNoStartTimeMetrics +} + +func (stma *startTimeMetricAdjuster) matchStartTimeMetric(metricName string) bool { + if stma.startTimeMetricRegex != nil { + return stma.startTimeMetricRegex.MatchString(metricName) + } + + return metricName == startTimeMetricName +} diff --git a/pkg/promotel/prometheusreceiver/internal/starttimemetricadjuster_test.go b/pkg/promotel/prometheusreceiver/internal/starttimemetricadjuster_test.go new file mode 100644 index 000000000..4990eccc4 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/internal/starttimemetricadjuster_test.go @@ -0,0 +1,154 @@ + +package internal + +import ( + "regexp" + "testing" + + "github.com/stretchr/testify/assert" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" + "go.uber.org/zap" +) + +func TestStartTimeMetricMatch(t *testing.T) { + const startTime = pcommon.Timestamp(123 * 1e9) + const currentTime = pcommon.Timestamp(126 * 1e9) + const matchBuilderStartTime = 124 + + tests := []struct { + name string + inputs pmetric.Metrics + startTimeMetricRegex *regexp.Regexp + expectedStartTime pcommon.Timestamp + expectedErr error + }{ + { + name: "regexp_match_sum_metric", + inputs: metrics( + sumMetric("test_sum_metric", doublePoint(nil, startTime, currentTime, 16)), + histogramMetric("test_histogram_metric", histogramPoint(nil, startTime, currentTime, []float64{1, 2}, []uint64{2, 3, 4})), + summaryMetric("test_summary_metric", summaryPoint(nil, startTime, currentTime, 10, 100, []float64{10, 50, 90}, []float64{9, 15, 48})), + sumMetric("example_process_start_time_seconds", doublePoint(nil, startTime, currentTime, matchBuilderStartTime)), + sumMetric("process_start_time_seconds", doublePoint(nil, startTime, currentTime, matchBuilderStartTime+1)), + exponentialHistogramMetric("test_exponential_histogram_metric", exponentialHistogramPointSimplified(nil, startTime, currentTime, 3, 1, -5, 3)), + ), + startTimeMetricRegex: regexp.MustCompile("^.*_process_start_time_seconds$"), + expectedStartTime: timestampFromFloat64(matchBuilderStartTime), + }, + { + name: "match_default_sum_start_time_metric", + inputs: metrics( + sumMetric("test_sum_metric", doublePoint(nil, startTime, currentTime, 16)), + histogramMetric("test_histogram_metric", histogramPoint(nil, startTime, currentTime, []float64{1, 2}, []uint64{2, 3, 4})), + summaryMetric("test_summary_metric", summaryPoint(nil, startTime, currentTime, 10, 100, []float64{10, 50, 90}, []float64{9, 15, 48})), + sumMetric("example_process_start_time_seconds", doublePoint(nil, startTime, currentTime, matchBuilderStartTime)), + sumMetric("process_start_time_seconds", doublePoint(nil, startTime, currentTime, matchBuilderStartTime+1)), + exponentialHistogramMetric("test_exponential_histogram_metric", exponentialHistogramPointSimplified(nil, startTime, currentTime, 3, 1, -5, 3)), + ), + expectedStartTime: timestampFromFloat64(matchBuilderStartTime + 1), + }, + { + name: "regexp_match_gauge_metric", + inputs: metrics( + sumMetric("test_sum_metric", doublePoint(nil, startTime, currentTime, 16)), + histogramMetric("test_histogram_metric", histogramPoint(nil, startTime, currentTime, []float64{1, 2}, []uint64{2, 3, 4})), + summaryMetric("test_summary_metric", summaryPoint(nil, startTime, currentTime, 10, 100, []float64{10, 50, 90}, []float64{9, 15, 48})), + gaugeMetric("example_process_start_time_seconds", doublePoint(nil, startTime, currentTime, matchBuilderStartTime)), + gaugeMetric("process_start_time_seconds", doublePoint(nil, startTime, currentTime, matchBuilderStartTime+1)), + ), + startTimeMetricRegex: regexp.MustCompile("^.*_process_start_time_seconds$"), + expectedStartTime: timestampFromFloat64(matchBuilderStartTime), + }, + { + name: "match_default_gauge_start_time_metric", + inputs: metrics( + sumMetric("test_sum_metric", doublePoint(nil, startTime, currentTime, 16)), + histogramMetric("test_histogram_metric", histogramPoint(nil, startTime, currentTime, []float64{1, 2}, []uint64{2, 3, 4})), + summaryMetric("test_summary_metric", summaryPoint(nil, startTime, currentTime, 10, 100, []float64{10, 50, 90}, []float64{9, 15, 48})), + gaugeMetric("example_process_start_time_seconds", doublePoint(nil, startTime, currentTime, matchBuilderStartTime)), + gaugeMetric("process_start_time_seconds", doublePoint(nil, startTime, currentTime, matchBuilderStartTime+1)), + ), + expectedStartTime: timestampFromFloat64(matchBuilderStartTime + 1), + }, + { + name: "empty gauge start time metrics", + inputs: metrics( + gaugeMetric("process_start_time_seconds"), + ), + expectedErr: errNoDataPointsStartTimeMetric, + }, + { + name: "empty sum start time metrics", + inputs: metrics( + sumMetric("process_start_time_seconds"), + ), + expectedErr: errNoDataPointsStartTimeMetric, + }, + { + name: "unsupported type start time metric", + inputs: metrics( + histogramMetric("process_start_time_seconds"), + ), + expectedErr: errUnsupportedTypeStartTimeMetric, + }, + { + name: "regexp_nomatch", + inputs: metrics( + sumMetric("subprocess_start_time_seconds", doublePoint(nil, startTime, currentTime, matchBuilderStartTime)), + ), + startTimeMetricRegex: regexp.MustCompile("^.+_process_start_time_seconds$"), + expectedErr: errNoStartTimeMetrics, + }, + { + name: "nomatch_default_start_time_metric", + inputs: metrics( + gaugeMetric("subprocess_start_time_seconds", doublePoint(nil, startTime, currentTime, matchBuilderStartTime)), + ), + expectedErr: errNoStartTimeMetrics, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + stma := NewStartTimeMetricAdjuster(zap.NewNop(), tt.startTimeMetricRegex) + if tt.expectedErr != nil { + assert.ErrorIs(t, stma.AdjustMetrics(tt.inputs), tt.expectedErr) + return + } + assert.NoError(t, stma.AdjustMetrics(tt.inputs)) + for i := 0; i < tt.inputs.ResourceMetrics().Len(); i++ { + rm := tt.inputs.ResourceMetrics().At(i) + for j := 0; j < rm.ScopeMetrics().Len(); j++ { + ilm := rm.ScopeMetrics().At(j) + for k := 0; k < ilm.Metrics().Len(); k++ { + metric := ilm.Metrics().At(k) + switch metric.Type() { + case pmetric.MetricTypeSum: + dps := metric.Sum().DataPoints() + for l := 0; l < dps.Len(); l++ { + assert.Equal(t, tt.expectedStartTime, dps.At(l).StartTimestamp()) + } + case pmetric.MetricTypeSummary: + dps := metric.Summary().DataPoints() + for l := 0; l < dps.Len(); l++ { + assert.Equal(t, tt.expectedStartTime, dps.At(l).StartTimestamp()) + } + case pmetric.MetricTypeHistogram: + dps := metric.Histogram().DataPoints() + for l := 0; l < dps.Len(); l++ { + assert.Equal(t, tt.expectedStartTime, dps.At(l).StartTimestamp()) + } + case pmetric.MetricTypeExponentialHistogram: + dps := metric.ExponentialHistogram().DataPoints() + for l := 0; l < dps.Len(); l++ { + assert.Equal(t, tt.expectedStartTime, dps.At(l).StartTimestamp()) + } + case pmetric.MetricTypeEmpty, pmetric.MetricTypeGauge: + } + } + } + } + }) + } +} diff --git a/pkg/promotel/prometheusreceiver/internal/transaction.go b/pkg/promotel/prometheusreceiver/internal/transaction.go new file mode 100644 index 000000000..f44c19cef --- /dev/null +++ b/pkg/promotel/prometheusreceiver/internal/transaction.go @@ -0,0 +1,536 @@ +package internal + +import ( + "context" + "errors" + "fmt" + "math" + + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/metadata" + "github.com/prometheus/prometheus/model/value" + "github.com/prometheus/prometheus/storage" + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/consumer" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" + "go.opentelemetry.io/collector/receiver" + "go.opentelemetry.io/collector/receiver/receiverhelper" + "go.uber.org/zap" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/prometheus" + + mdata "github.com/smartcontractkit/chainlink-common/pkg/promotel/prometheusreceiver/internal/metadata" + "github.com/smartcontractkit/chainlink-common/pkg/promotel/prometheusreceiver/scrape" +) + +type resourceKey struct { + job string + instance string +} +type transaction struct { + isNew bool + trimSuffixes bool + enableNativeHistograms bool + ctx context.Context + families map[resourceKey]map[scopeID]map[string]*metricFamily + mc scrape.MetricMetadataStore + sink consumer.Metrics + externalLabels labels.Labels + nodeResources map[resourceKey]pcommon.Resource + scopeAttributes map[resourceKey]map[scopeID]pcommon.Map + logger *zap.Logger + buildInfo component.BuildInfo + metricAdjuster MetricsAdjuster + obsrecv *receiverhelper.ObsReport + // Used as buffer to calculate series ref hash. + bufBytes []byte +} + +var emptyScopeID scopeID + +type scopeID struct { + name string + version string +} + +func NewTransaction( + ctx context.Context, + metricAdjuster MetricsAdjuster, + sink consumer.Metrics, + externalLabels labels.Labels, + settings receiver.Settings, + obsrecv *receiverhelper.ObsReport, + trimSuffixes bool, + enableNativeHistograms bool, +) *transaction { + return newTransaction( + ctx, + metricAdjuster, + sink, + externalLabels, + settings, + obsrecv, + trimSuffixes, + enableNativeHistograms, + ) +} + +func newTransaction( + ctx context.Context, + metricAdjuster MetricsAdjuster, + sink consumer.Metrics, + externalLabels labels.Labels, + settings receiver.Settings, + obsrecv *receiverhelper.ObsReport, + trimSuffixes bool, + enableNativeHistograms bool, +) *transaction { + return &transaction{ + ctx: ctx, + families: make(map[resourceKey]map[scopeID]map[string]*metricFamily), + isNew: true, + trimSuffixes: trimSuffixes, + enableNativeHistograms: enableNativeHistograms, + sink: sink, + metricAdjuster: metricAdjuster, + externalLabels: externalLabels, + logger: settings.Logger, + buildInfo: settings.BuildInfo, + obsrecv: obsrecv, + bufBytes: make([]byte, 0, 1024), + scopeAttributes: make(map[resourceKey]map[scopeID]pcommon.Map), + nodeResources: map[resourceKey]pcommon.Resource{}, + } +} + +// Append always returns 0 to disable label caching. +func (t *transaction) Append(_ storage.SeriesRef, ls labels.Labels, atMs int64, val float64) (storage.SeriesRef, error) { + select { + case <-t.ctx.Done(): + return 0, errTransactionAborted + default: + } + + if t.externalLabels.Len() != 0 { + b := labels.NewBuilder(ls) + t.externalLabels.Range(func(l labels.Label) { + b.Set(l.Name, l.Value) + }) + ls = b.Labels() + } + + rKey, err := t.initTransaction(ls) + if err != nil { + return 0, err + } + + // Any datapoint with duplicate labels MUST be rejected per: + // * https://github.com/open-telemetry/wg-prometheus/issues/44 + // * https://github.com/open-telemetry/opentelemetry-collector/issues/3407 + // as Prometheus rejects such too as of version 2.16.0, released on 2020-02-13. + if dupLabel, hasDup := ls.HasDuplicateLabelNames(); hasDup { + return 0, fmt.Errorf("invalid sample: non-unique label names: %q", dupLabel) + } + + metricName := ls.Get(model.MetricNameLabel) + if metricName == "" { + return 0, errMetricNameNotFound + } + + // See https://www.prometheus.io/docs/concepts/jobs_instances/#automatically-generated-labels-and-time-series + // up: 1 if the instance is healthy, i.e. reachable, or 0 if the scrape failed. + // But it can also be a staleNaN, which is inserted when the target goes away. + if metricName == scrapeUpMetricName && val != 1.0 && !value.IsStaleNaN(val) { + if val == 0.0 { + t.logger.Warn("Failed to scrape Prometheus endpoint", + zap.Int64("scrape_timestamp", atMs), + zap.Stringer("target_labels", ls)) + } else { + t.logger.Warn("The 'up' metric contains invalid value", + zap.Float64("value", val), + zap.Int64("scrape_timestamp", atMs), + zap.Stringer("target_labels", ls)) + } + } + + // For the `target_info` metric we need to convert it to resource attributes. + if metricName == prometheus.TargetInfoMetricName { + t.AddTargetInfo(*rKey, ls) + return 0, nil + } + + // For the `otel_scope_info` metric we need to convert it to scope attributes. + if metricName == prometheus.ScopeInfoMetricName { + t.addScopeInfo(*rKey, ls) + return 0, nil + } + + curMF, existing := t.getOrCreateMetricFamily(*rKey, getScopeID(ls), metricName) + + if t.enableNativeHistograms && curMF.mtype == pmetric.MetricTypeExponentialHistogram { + // If a histogram has both classic and native version, the native histogram is scraped + // first. Getting a float sample for the same series means that `scrape_classic_histogram` + // is set to true in the scrape config. In this case, we should ignore the native histogram. + curMF.mtype = pmetric.MetricTypeHistogram + } + + seriesRef := t.getSeriesRef(ls, curMF.mtype) + err = curMF.addSeries(seriesRef, metricName, ls, atMs, val) + if err != nil { + // Handle special case of float sample indicating staleness of native + // histogram. This is similar to how Prometheus handles it, but we + // don't have access to the previous value so we're applying some + // heuristics to figure out if this is native histogram or not. + // The metric type will indicate histogram, but presumably there will be no + // _bucket, _count, _sum suffix or `le` label, which makes addSeries fail + // with errEmptyLeLabel. + if t.enableNativeHistograms && errors.Is(err, errEmptyLeLabel) && !existing && value.IsStaleNaN(val) && curMF.mtype == pmetric.MetricTypeHistogram { + mg := curMF.loadMetricGroupOrCreate(seriesRef, ls, atMs) + curMF.mtype = pmetric.MetricTypeExponentialHistogram + mg.mtype = pmetric.MetricTypeExponentialHistogram + _ = curMF.addExponentialHistogramSeries(seriesRef, metricName, ls, atMs, &histogram.Histogram{Sum: math.Float64frombits(value.StaleNaN)}, nil) + // ignore errors here, this is best effort. + } else { + t.logger.Warn("failed to add datapoint", zap.Error(err), zap.String("metric_name", metricName), zap.Any("labels", ls)) + } + } + + return 0, nil // never return errors, as that fails the whole scrape +} + +// getOrCreateMetricFamily returns the metric family for the given metric name and scope, +// and true if an existing family was found. +func (t *transaction) getOrCreateMetricFamily(key resourceKey, scope scopeID, mn string) (*metricFamily, bool) { + if _, ok := t.families[key]; !ok { + t.families[key] = make(map[scopeID]map[string]*metricFamily) + } + if _, ok := t.families[key][scope]; !ok { + t.families[key][scope] = make(map[string]*metricFamily) + } + + curMf, ok := t.families[key][scope][mn] + if !ok { + fn := mn + if _, ok := t.mc.GetMetadata(mn); !ok { + fn = normalizeMetricName(mn) + } + if mf, ok := t.families[key][scope][fn]; ok && mf.includesMetric(mn) { + curMf = mf + } else { + curMf = newMetricFamily(mn, t.mc, t.logger) + t.families[key][scope][curMf.name] = curMf + return curMf, false + } + } + return curMf, true +} + +func (t *transaction) AppendExemplar(_ storage.SeriesRef, l labels.Labels, e exemplar.Exemplar) (storage.SeriesRef, error) { + select { + case <-t.ctx.Done(): + return 0, errTransactionAborted + default: + } + + rKey, err := t.initTransaction(l) + if err != nil { + return 0, err + } + + l = l.WithoutEmpty() + + if dupLabel, hasDup := l.HasDuplicateLabelNames(); hasDup { + return 0, fmt.Errorf("invalid sample: non-unique label names: %q", dupLabel) + } + + mn := l.Get(model.MetricNameLabel) + if mn == "" { + return 0, errMetricNameNotFound + } + + mf, _ := t.getOrCreateMetricFamily(*rKey, getScopeID(l), mn) + mf.addExemplar(t.getSeriesRef(l, mf.mtype), e) + + return 0, nil +} + +func (t *transaction) AppendHistogram(_ storage.SeriesRef, ls labels.Labels, atMs int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + if !t.enableNativeHistograms { + return 0, nil + } + + select { + case <-t.ctx.Done(): + return 0, errTransactionAborted + default: + } + + if t.externalLabels.Len() != 0 { + b := labels.NewBuilder(ls) + t.externalLabels.Range(func(l labels.Label) { + b.Set(l.Name, l.Value) + }) + ls = b.Labels() + } + + rKey, err := t.initTransaction(ls) + if err != nil { + return 0, err + } + + // Any datapoint with duplicate labels MUST be rejected per: + // * https://github.com/open-telemetry/wg-prometheus/issues/44 + // * https://github.com/open-telemetry/opentelemetry-collector/issues/3407 + // as Prometheus rejects such too as of version 2.16.0, released on 2020-02-13. + if dupLabel, hasDup := ls.HasDuplicateLabelNames(); hasDup { + return 0, fmt.Errorf("invalid sample: non-unique label names: %q", dupLabel) + } + + metricName := ls.Get(model.MetricNameLabel) + if metricName == "" { + return 0, errMetricNameNotFound + } + + // The `up`, `target_info`, `otel_scope_info` metrics should never generate native histograms, + // thus we don't check for them here as opposed to the Append function. + + curMF, existing := t.getOrCreateMetricFamily(*rKey, getScopeID(ls), metricName) + if !existing { + curMF.mtype = pmetric.MetricTypeExponentialHistogram + } else if curMF.mtype != pmetric.MetricTypeExponentialHistogram { + // Already scraped as classic histogram. + return 0, nil + } + + if h != nil && h.CounterResetHint == histogram.GaugeType || fh != nil && fh.CounterResetHint == histogram.GaugeType { + t.logger.Warn("dropping unsupported gauge histogram datapoint", zap.String("metric_name", metricName), zap.Any("labels", ls)) + } + + err = curMF.addExponentialHistogramSeries(t.getSeriesRef(ls, curMF.mtype), metricName, ls, atMs, h, fh) + if err != nil { + t.logger.Warn("failed to add histogram datapoint", zap.Error(err), zap.String("metric_name", metricName), zap.Any("labels", ls)) + } + + return 0, nil // never return errors, as that fails the whole scrape +} + +func (t *transaction) AppendCTZeroSample(_ storage.SeriesRef, _ labels.Labels, _, _ int64) (storage.SeriesRef, error) { + // TODO: implement this func + return 0, nil +} + +func (t *transaction) getSeriesRef(ls labels.Labels, mtype pmetric.MetricType) uint64 { + var hash uint64 + hash, t.bufBytes = getSeriesRef(t.bufBytes, ls, mtype) + return hash +} + +// getMetrics returns all metrics to the given slice. +// The only error returned by this function is errNoDataToBuild. +// TODO: USE THIS TO CONVERT PROM TO OTEL +func (t *transaction) getMetrics() (pmetric.Metrics, error) { + if len(t.families) == 0 { + return pmetric.Metrics{}, errNoDataToBuild + } + + md := pmetric.NewMetrics() + + for rKey, families := range t.families { + if len(families) == 0 { + continue + } + resource, ok := t.nodeResources[rKey] + if !ok { + continue + } + rms := md.ResourceMetrics().AppendEmpty() + resource.CopyTo(rms.Resource()) + + for scope, mfs := range families { + ils := rms.ScopeMetrics().AppendEmpty() + // If metrics don't include otel_scope_name or otel_scope_version + // labels, use the receiver name and version. + if scope == emptyScopeID { + ils.Scope().SetName(mdata.ScopeName) + ils.Scope().SetVersion(t.buildInfo.Version) + } else { + // Otherwise, use the scope that was provided with the metrics. + ils.Scope().SetName(scope.name) + ils.Scope().SetVersion(scope.version) + // If we got an otel_scope_info metric for that scope, get scope + // attributes from it. + if scopeAttributes, ok := t.scopeAttributes[rKey]; ok { + if attributes, ok := scopeAttributes[scope]; ok { + attributes.CopyTo(ils.Scope().Attributes()) + } + } + } + metrics := ils.Metrics() + for _, mf := range mfs { + mf.appendMetric(metrics, t.trimSuffixes) + } + } + } + // remove the resource if no metrics were added to avoid returning resources with empty data points + md.ResourceMetrics().RemoveIf(func(metrics pmetric.ResourceMetrics) bool { + if metrics.ScopeMetrics().Len() == 0 { + return true + } + remove := true + for i := 0; i < metrics.ScopeMetrics().Len(); i++ { + if metrics.ScopeMetrics().At(i).Metrics().Len() > 0 { + remove = false + break + } + } + return remove + }) + + return md, nil +} + +func getScopeID(ls labels.Labels) scopeID { + var scope scopeID + ls.Range(func(lbl labels.Label) { + if lbl.Name == prometheus.ScopeNameLabelKey { + scope.name = lbl.Value + } + if lbl.Name == prometheus.ScopeVersionLabelKey { + scope.version = lbl.Value + } + }) + return scope +} + +func (t *transaction) initTransaction(labels labels.Labels) (*resourceKey, error) { + target, ok := scrape.TargetFromContext(t.ctx) + if !ok { + return nil, errors.New("unable to find target in context") + } + t.mc, ok = scrape.MetricMetadataStoreFromContext(t.ctx) + if !ok { + return nil, errors.New("unable to find MetricMetadataStore in context") + } + + rKey, err := t.getJobAndInstance(labels) + if err != nil { + return nil, err + } + if _, ok := t.nodeResources[*rKey]; !ok { + t.nodeResources[*rKey] = CreateResource(rKey.job, rKey.instance, target.DiscoveredLabels()) + } + + t.isNew = false + return rKey, nil +} + +func (t *transaction) getJobAndInstance(labels labels.Labels) (*resourceKey, error) { + // first, try to get job and instance from the labels + job, instance := labels.Get(model.JobLabel), labels.Get(model.InstanceLabel) + if job != "" && instance != "" { + return &resourceKey{ + job: job, + instance: instance, + }, nil + } + + // if not available in the labels, try to fall back to the scrape job associated + // with the transaction. + // this can be the case for, e.g., aggregated metrics coming from a federate endpoint + // that represent the whole cluster, rather than an individual workload. + // See https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/32555 for reference + if target, ok := scrape.TargetFromContext(t.ctx); ok { + if job == "" { + job = target.GetValue(model.JobLabel) + } + if instance == "" { + instance = target.GetValue(model.InstanceLabel) + } + if job != "" && instance != "" { + return &resourceKey{ + job: job, + instance: instance, + }, nil + } + } + return nil, errNoJobInstance +} + +func (t *transaction) Commit() error { + if t.isNew { + return nil + } + + ctx := t.obsrecv.StartMetricsOp(t.ctx) + md, err := t.getMetrics() + if err != nil { + t.obsrecv.EndMetricsOp(ctx, dataformat, 0, err) + return err + } + + numPoints := md.DataPointCount() + if numPoints == 0 { + return nil + } + + if err = t.metricAdjuster.AdjustMetrics(md); err != nil { + t.obsrecv.EndMetricsOp(ctx, dataformat, numPoints, err) + return err + } + + err = t.sink.ConsumeMetrics(ctx, md) + t.obsrecv.EndMetricsOp(ctx, dataformat, numPoints, err) + return err +} + +func (t *transaction) Rollback() error { + return nil +} + +func (t *transaction) UpdateMetadata(_ storage.SeriesRef, _ labels.Labels, _ metadata.Metadata) (storage.SeriesRef, error) { + // TODO: implement this func + return 0, nil +} + +func (t *transaction) AddTargetInfo(key resourceKey, ls labels.Labels) { + if resource, ok := t.nodeResources[key]; ok { + attrs := resource.Attributes() + ls.Range(func(lbl labels.Label) { + if lbl.Name == model.JobLabel || lbl.Name == model.InstanceLabel || lbl.Name == model.MetricNameLabel { + return + } + attrs.PutStr(lbl.Name, lbl.Value) + }) + } +} + +func (t *transaction) addScopeInfo(key resourceKey, ls labels.Labels) { + attrs := pcommon.NewMap() + scope := scopeID{} + ls.Range(func(lbl labels.Label) { + if lbl.Name == model.JobLabel || lbl.Name == model.InstanceLabel || lbl.Name == model.MetricNameLabel { + return + } + if lbl.Name == prometheus.ScopeNameLabelKey { + scope.name = lbl.Value + return + } + if lbl.Name == prometheus.ScopeVersionLabelKey { + scope.version = lbl.Value + return + } + attrs.PutStr(lbl.Name, lbl.Value) + }) + if _, ok := t.scopeAttributes[key]; !ok { + t.scopeAttributes[key] = make(map[scopeID]pcommon.Map) + } + t.scopeAttributes[key][scope] = attrs +} + +func getSeriesRef(bytes []byte, ls labels.Labels, mtype pmetric.MetricType) (uint64, []byte) { + return ls.HashWithoutLabels(bytes, getSortedNotUsefulLabels(mtype)...) +} diff --git a/pkg/promotel/prometheusreceiver/internal/transaction_test.go b/pkg/promotel/prometheusreceiver/internal/transaction_test.go new file mode 100644 index 000000000..938ccde23 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/internal/transaction_test.go @@ -0,0 +1,2003 @@ +package internal + +import ( + "context" + "errors" + "fmt" + "testing" + "time" + + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/metadata" + "github.com/prometheus/prometheus/tsdb/tsdbutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/consumer/consumertest" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" + "go.opentelemetry.io/collector/receiver/receiverhelper" + "go.opentelemetry.io/collector/receiver/receivertest" + conventions "go.opentelemetry.io/collector/semconv/v1.27.0" + "go.uber.org/zap" + "go.uber.org/zap/zaptest/observer" + + "github.com/smartcontractkit/chainlink-common/pkg/promotel/prometheusreceiver/scrape" +) + +const ( + startTimestamp = pcommon.Timestamp(1555366608340000000) + ts = int64(1555366610000) + interval = int64(15 * 1000) + tsNanos = pcommon.Timestamp(ts * 1e6) + tsPlusIntervalNanos = pcommon.Timestamp((ts + interval) * 1e6) +) + +var ( + target = scrape.NewTarget( + // processedLabels contain label values after processing (e.g. relabeling) + labels.FromMap(map[string]string{ + model.InstanceLabel: "localhost:8080", + }), + // discoveredLabels contain labels prior to any processing + labels.FromMap(map[string]string{ + model.AddressLabel: "address:8080", + model.SchemeLabel: "http", + }), + nil) + + scrapeCtx = scrape.ContextWithMetricMetadataStore( + scrape.ContextWithTarget(context.Background(), target), + testMetadataStore(testMetadata)) +) + +func TestTransactionCommitWithoutAdding(t *testing.T) { + for _, enableNativeHistograms := range []bool{true, false} { + t.Run(fmt.Sprintf("enableNativeHistograms=%v", enableNativeHistograms), func(t *testing.T) { + testTransactionCommitWithoutAdding(t, enableNativeHistograms) + }) + } +} + +func testTransactionCommitWithoutAdding(t *testing.T, enableNativeHistograms bool) { + tr := newTransaction(scrapeCtx, &startTimeAdjuster{startTime: startTimestamp}, consumertest.NewNop(), labels.EmptyLabels(), receivertest.NewNopSettings(), nopObsRecv(t), false, enableNativeHistograms) + assert.NoError(t, tr.Commit()) +} + +func TestTransactionRollbackDoesNothing(t *testing.T) { + for _, enableNativeHistograms := range []bool{true, false} { + t.Run(fmt.Sprintf("enableNativeHistograms=%v", enableNativeHistograms), func(t *testing.T) { + testTransactionRollbackDoesNothing(t, enableNativeHistograms) + }) + } +} + +func testTransactionRollbackDoesNothing(t *testing.T, enableNativeHistograms bool) { + tr := newTransaction(scrapeCtx, &startTimeAdjuster{startTime: startTimestamp}, consumertest.NewNop(), labels.EmptyLabels(), receivertest.NewNopSettings(), nopObsRecv(t), false, enableNativeHistograms) + assert.NoError(t, tr.Rollback()) +} + +func TestTransactionUpdateMetadataDoesNothing(t *testing.T) { + for _, enableNativeHistograms := range []bool{true, false} { + t.Run(fmt.Sprintf("enableNativeHistograms=%v", enableNativeHistograms), func(t *testing.T) { + testTransactionUpdateMetadataDoesNothing(t, enableNativeHistograms) + }) + } +} + +func testTransactionUpdateMetadataDoesNothing(t *testing.T, enableNativeHistograms bool) { + tr := newTransaction(scrapeCtx, &startTimeAdjuster{startTime: startTimestamp}, consumertest.NewNop(), labels.EmptyLabels(), receivertest.NewNopSettings(), nopObsRecv(t), false, enableNativeHistograms) + _, err := tr.UpdateMetadata(0, labels.New(), metadata.Metadata{}) + assert.NoError(t, err) +} + +func TestTransactionAppendNoTarget(t *testing.T) { + for _, enableNativeHistograms := range []bool{true, false} { + t.Run(fmt.Sprintf("enableNativeHistograms=%v", enableNativeHistograms), func(t *testing.T) { + testTransactionAppendNoTarget(t, enableNativeHistograms) + }) + } +} + +func testTransactionAppendNoTarget(t *testing.T, enableNativeHistograms bool) { + badLabels := labels.FromStrings(model.MetricNameLabel, "counter_test") + tr := newTransaction(scrapeCtx, &startTimeAdjuster{startTime: startTimestamp}, consumertest.NewNop(), labels.EmptyLabels(), receivertest.NewNopSettings(), nopObsRecv(t), false, enableNativeHistograms) + _, err := tr.Append(0, badLabels, time.Now().Unix()*1000, 1.0) + assert.Error(t, err) +} + +func TestTransactionAppendNoMetricName(t *testing.T) { + for _, enableNativeHistograms := range []bool{true, false} { + t.Run(fmt.Sprintf("enableNativeHistograms=%v", enableNativeHistograms), func(t *testing.T) { + testTransactionAppendNoMetricName(t, enableNativeHistograms) + }) + } +} + +func testTransactionAppendNoMetricName(t *testing.T, enableNativeHistograms bool) { + jobNotFoundLb := labels.FromMap(map[string]string{ + model.InstanceLabel: "localhost:8080", + model.JobLabel: "test2", + }) + tr := newTransaction(scrapeCtx, &startTimeAdjuster{startTime: startTimestamp}, consumertest.NewNop(), labels.EmptyLabels(), receivertest.NewNopSettings(), nopObsRecv(t), false, enableNativeHistograms) + _, err := tr.Append(0, jobNotFoundLb, time.Now().Unix()*1000, 1.0) + assert.ErrorIs(t, err, errMetricNameNotFound) + assert.ErrorIs(t, tr.Commit(), errNoDataToBuild) +} + +func TestTransactionAppendEmptyMetricName(t *testing.T) { + for _, enableNativeHistograms := range []bool{true, false} { + t.Run(fmt.Sprintf("enableNativeHistograms=%v", enableNativeHistograms), func(t *testing.T) { + testTransactionAppendEmptyMetricName(t, enableNativeHistograms) + }) + } +} + +func testTransactionAppendEmptyMetricName(t *testing.T, enableNativeHistograms bool) { + tr := newTransaction(scrapeCtx, &startTimeAdjuster{startTime: startTimestamp}, consumertest.NewNop(), labels.EmptyLabels(), receivertest.NewNopSettings(), nopObsRecv(t), false, enableNativeHistograms) + _, err := tr.Append(0, labels.FromMap(map[string]string{ + model.InstanceLabel: "localhost:8080", + model.JobLabel: "test2", + model.MetricNameLabel: "", + }), time.Now().Unix()*1000, 1.0) + assert.ErrorIs(t, err, errMetricNameNotFound) +} + +func TestTransactionAppendResource(t *testing.T) { + for _, enableNativeHistograms := range []bool{true, false} { + t.Run(fmt.Sprintf("enableNativeHistograms=%v", enableNativeHistograms), func(t *testing.T) { + testTransactionAppendResource(t, enableNativeHistograms) + }) + } +} + +func testTransactionAppendResource(t *testing.T, enableNativeHistograms bool) { + sink := new(consumertest.MetricsSink) + tr := newTransaction(scrapeCtx, &startTimeAdjuster{startTime: startTimestamp}, sink, labels.EmptyLabels(), receivertest.NewNopSettings(), nopObsRecv(t), false, enableNativeHistograms) + _, err := tr.Append(0, labels.FromMap(map[string]string{ + model.InstanceLabel: "localhost:8080", + model.JobLabel: "test", + model.MetricNameLabel: "counter_test", + }), time.Now().Unix()*1000, 1.0) + assert.NoError(t, err) + _, err = tr.Append(0, labels.FromMap(map[string]string{ + model.InstanceLabel: "localhost:8080", + model.JobLabel: "test", + model.MetricNameLabel: startTimeMetricName, + }), time.Now().UnixMilli(), 1.0) + assert.NoError(t, err) + assert.NoError(t, tr.Commit()) + expectedResource := CreateResource("test", "localhost:8080", labels.FromStrings(model.SchemeLabel, "http")) + mds := sink.AllMetrics() + require.Len(t, mds, 1) + gotResource := mds[0].ResourceMetrics().At(0).Resource() + require.Equal(t, expectedResource, gotResource) +} + +func TestTransactionAppendMultipleResources(t *testing.T) { + for _, enableNativeHistograms := range []bool{true, false} { + t.Run(fmt.Sprintf("enableNativeHistograms=%v", enableNativeHistograms), func(t *testing.T) { + testTransactionAppendMultipleResources(t, enableNativeHistograms) + }) + } +} + +func testTransactionAppendMultipleResources(t *testing.T, enableNativeHistograms bool) { + sink := new(consumertest.MetricsSink) + tr := newTransaction(scrapeCtx, &startTimeAdjuster{startTime: startTimestamp}, sink, labels.EmptyLabels(), receivertest.NewNopSettings(), nopObsRecv(t), false, enableNativeHistograms) + _, err := tr.Append(0, labels.FromMap(map[string]string{ + model.InstanceLabel: "localhost:8080", + model.JobLabel: "test-1", + model.MetricNameLabel: "counter_test", + }), time.Now().Unix()*1000, 1.0) + assert.NoError(t, err) + _, err = tr.Append(0, labels.FromMap(map[string]string{ + model.InstanceLabel: "localhost:8080", + model.JobLabel: "test-2", + model.MetricNameLabel: startTimeMetricName, + }), time.Now().UnixMilli(), 1.0) + assert.NoError(t, err) + assert.NoError(t, tr.Commit()) + + expectedResources := []pcommon.Resource{ + CreateResource("test-1", "localhost:8080", labels.FromStrings(model.SchemeLabel, "http")), + CreateResource("test-2", "localhost:8080", labels.FromStrings(model.SchemeLabel, "http")), + } + + mds := sink.AllMetrics() + require.Len(t, mds, 1) + require.Equal(t, 2, mds[0].ResourceMetrics().Len()) + + for _, expectedResource := range expectedResources { + foundResource := false + expectedServiceName, _ := expectedResource.Attributes().Get(conventions.AttributeServiceName) + for i := 0; i < mds[0].ResourceMetrics().Len(); i++ { + res := mds[0].ResourceMetrics().At(i).Resource() + if serviceName, ok := res.Attributes().Get(conventions.AttributeServiceName); ok { + if serviceName.AsString() == expectedServiceName.AsString() { + foundResource = true + require.Equal(t, expectedResource, res) + break + } + } + } + require.True(t, foundResource) + } +} + +func TestReceiverVersionAndNameAreAttached(t *testing.T) { + for _, enableNativeHistograms := range []bool{true, false} { + t.Run(fmt.Sprintf("enableNativeHistograms=%v", enableNativeHistograms), func(t *testing.T) { + testReceiverVersionAndNameAreAttached(t, enableNativeHistograms) + }) + } +} + +func testReceiverVersionAndNameAreAttached(t *testing.T, enableNativeHistograms bool) { + sink := new(consumertest.MetricsSink) + tr := newTransaction(scrapeCtx, &startTimeAdjuster{startTime: startTimestamp}, sink, labels.EmptyLabels(), receivertest.NewNopSettings(), nopObsRecv(t), false, enableNativeHistograms) + _, err := tr.Append(0, labels.FromMap(map[string]string{ + model.InstanceLabel: "localhost:8080", + model.JobLabel: "test", + model.MetricNameLabel: "counter_test", + }), time.Now().Unix()*1000, 1.0) + assert.NoError(t, err) + assert.NoError(t, tr.Commit()) + + expectedResource := CreateResource("test", "localhost:8080", labels.FromStrings(model.SchemeLabel, "http")) + mds := sink.AllMetrics() + require.Len(t, mds, 1) + gotResource := mds[0].ResourceMetrics().At(0).Resource() + require.Equal(t, expectedResource, gotResource) + + gotScope := mds[0].ResourceMetrics().At(0).ScopeMetrics().At(0).Scope() + require.Equal(t, "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver", gotScope.Name()) + require.Equal(t, component.NewDefaultBuildInfo().Version, gotScope.Version()) +} + +func TestTransactionCommitErrorWhenAdjusterError(t *testing.T) { + for _, enableNativeHistograms := range []bool{true, false} { + t.Run(fmt.Sprintf("enableNativeHistograms=%v", enableNativeHistograms), func(t *testing.T) { + testTransactionCommitErrorWhenAdjusterError(t, enableNativeHistograms) + }) + } +} + +func testTransactionCommitErrorWhenAdjusterError(t *testing.T, enableNativeHistograms bool) { + goodLabels := labels.FromMap(map[string]string{ + model.InstanceLabel: "localhost:8080", + model.JobLabel: "test", + model.MetricNameLabel: "counter_test", + }) + sink := new(consumertest.MetricsSink) + adjusterErr := errors.New("adjuster error") + tr := newTransaction(scrapeCtx, &errorAdjuster{err: adjusterErr}, sink, labels.EmptyLabels(), receivertest.NewNopSettings(), nopObsRecv(t), false, enableNativeHistograms) + _, err := tr.Append(0, goodLabels, time.Now().Unix()*1000, 1.0) + assert.NoError(t, err) + assert.ErrorIs(t, tr.Commit(), adjusterErr) +} + +// Ensure that we reject duplicate label keys. See https://github.com/open-telemetry/wg-prometheus/issues/44. +func TestTransactionAppendDuplicateLabels(t *testing.T) { + for _, enableNativeHistograms := range []bool{true, false} { + t.Run(fmt.Sprintf("enableNativeHistograms=%v", enableNativeHistograms), func(t *testing.T) { + testTransactionAppendDuplicateLabels(t, enableNativeHistograms) + }) + } +} + +func testTransactionAppendDuplicateLabels(t *testing.T, enableNativeHistograms bool) { + sink := new(consumertest.MetricsSink) + tr := newTransaction(scrapeCtx, &startTimeAdjuster{startTime: startTimestamp}, sink, labels.EmptyLabels(), receivertest.NewNopSettings(), nopObsRecv(t), false, enableNativeHistograms) + + dupLabels := labels.FromStrings( + model.InstanceLabel, "0.0.0.0:8855", + model.JobLabel, "test", + model.MetricNameLabel, "counter_test", + "a", "1", + "a", "6", + "z", "9", + ) + + _, err := tr.Append(0, dupLabels, 1917, 1.0) + assert.ErrorContains(t, err, `invalid sample: non-unique label names: "a"`) +} + +func TestTransactionAppendHistogramNoLe(t *testing.T) { + for _, enableNativeHistograms := range []bool{true, false} { + t.Run(fmt.Sprintf("enableNativeHistograms=%v", enableNativeHistograms), func(t *testing.T) { + testTransactionAppendHistogramNoLe(t, enableNativeHistograms) + }) + } +} + +func testTransactionAppendHistogramNoLe(t *testing.T, enableNativeHistograms bool) { + sink := new(consumertest.MetricsSink) + receiverSettings := receivertest.NewNopSettings() + core, observedLogs := observer.New(zap.InfoLevel) + receiverSettings.Logger = zap.New(core) + tr := newTransaction( + scrapeCtx, + &startTimeAdjuster{startTime: startTimestamp}, + sink, + labels.EmptyLabels(), + receiverSettings, + nopObsRecv(t), + false, + enableNativeHistograms, + ) + + goodLabels := labels.FromStrings( + model.InstanceLabel, "0.0.0.0:8855", + model.JobLabel, "test", + model.MetricNameLabel, "hist_test_bucket", + ) + + _, err := tr.Append(0, goodLabels, 1917, 1.0) + require.NoError(t, err) + assert.Equal(t, 1, observedLogs.Len()) + assert.Equal(t, 1, observedLogs.FilterMessage("failed to add datapoint").Len()) + + assert.NoError(t, tr.Commit()) + assert.Empty(t, sink.AllMetrics()) +} + +func TestTransactionAppendSummaryNoQuantile(t *testing.T) { + for _, enableNativeHistograms := range []bool{true, false} { + t.Run(fmt.Sprintf("enableNativeHistograms=%v", enableNativeHistograms), func(t *testing.T) { + testTransactionAppendSummaryNoQuantile(t, enableNativeHistograms) + }) + } +} + +func testTransactionAppendSummaryNoQuantile(t *testing.T, enableNativeHistograms bool) { + sink := new(consumertest.MetricsSink) + receiverSettings := receivertest.NewNopSettings() + core, observedLogs := observer.New(zap.InfoLevel) + receiverSettings.Logger = zap.New(core) + tr := newTransaction( + scrapeCtx, + &startTimeAdjuster{startTime: startTimestamp}, + sink, + labels.EmptyLabels(), + receiverSettings, + nopObsRecv(t), + false, + enableNativeHistograms, + ) + + goodLabels := labels.FromStrings( + model.InstanceLabel, "0.0.0.0:8855", + model.JobLabel, "test", + model.MetricNameLabel, "summary_test", + ) + + _, err := tr.Append(0, goodLabels, 1917, 1.0) + require.NoError(t, err) + assert.Equal(t, 1, observedLogs.Len()) + assert.Equal(t, 1, observedLogs.FilterMessage("failed to add datapoint").Len()) + + assert.NoError(t, tr.Commit()) + assert.Empty(t, sink.AllMetrics()) +} + +func TestTransactionAppendValidAndInvalid(t *testing.T) { + for _, enableNativeHistograms := range []bool{true, false} { + t.Run(fmt.Sprintf("enableNativeHistograms=%v", enableNativeHistograms), func(t *testing.T) { + testTransactionAppendValidAndInvalid(t, enableNativeHistograms) + }) + } +} + +func testTransactionAppendValidAndInvalid(t *testing.T, enableNativeHistograms bool) { + sink := new(consumertest.MetricsSink) + receiverSettings := receivertest.NewNopSettings() + core, observedLogs := observer.New(zap.InfoLevel) + receiverSettings.Logger = zap.New(core) + tr := newTransaction( + scrapeCtx, + &startTimeAdjuster{startTime: startTimestamp}, + sink, + labels.EmptyLabels(), + receiverSettings, + nopObsRecv(t), + false, + enableNativeHistograms, + ) + + // a valid counter + _, err := tr.Append(0, labels.FromMap(map[string]string{ + model.InstanceLabel: "localhost:8080", + model.JobLabel: "test", + model.MetricNameLabel: "counter_test", + }), time.Now().Unix()*1000, 1.0) + assert.NoError(t, err) + + // summary without quantiles, should be ignored + summarylabels := labels.FromStrings( + model.InstanceLabel, "0.0.0.0:8855", + model.JobLabel, "test", + model.MetricNameLabel, "summary_test", + ) + + _, err = tr.Append(0, summarylabels, 1917, 1.0) + require.NoError(t, err) + + assert.Equal(t, 1, observedLogs.Len()) + assert.Equal(t, 1, observedLogs.FilterMessage("failed to add datapoint").Len()) + + assert.NoError(t, tr.Commit()) + expectedResource := CreateResource("test", "localhost:8080", labels.FromStrings(model.SchemeLabel, "http")) + mds := sink.AllMetrics() + require.Len(t, mds, 1) + gotResource := mds[0].ResourceMetrics().At(0).Resource() + require.Equal(t, expectedResource, gotResource) + require.Equal(t, 1, mds[0].MetricCount()) +} + +func TestTransactionAppendWithEmptyLabelArrayFallbackToTargetLabels(t *testing.T) { + for _, enableNativeHistograms := range []bool{true, false} { + t.Run(fmt.Sprintf("enableNativeHistograms=%v", enableNativeHistograms), func(t *testing.T) { + testTransactionAppendWithEmptyLabelArrayFallbackToTargetLabels(t, enableNativeHistograms) + }) + } +} + +func testTransactionAppendWithEmptyLabelArrayFallbackToTargetLabels(t *testing.T, enableNativeHistograms bool) { + sink := new(consumertest.MetricsSink) + + scrapeTarget := scrape.NewTarget( + // processedLabels contain label values after processing (e.g. relabeling) + labels.FromMap(map[string]string{ + model.InstanceLabel: "localhost:8080", + model.JobLabel: "federate", + }), + // discoveredLabels contain labels prior to any processing + labels.FromMap(map[string]string{ + model.AddressLabel: "address:8080", + model.SchemeLabel: "http", + }), + nil) + + ctx := scrape.ContextWithMetricMetadataStore( + scrape.ContextWithTarget(context.Background(), scrapeTarget), + testMetadataStore(testMetadata)) + + tr := newTransaction(ctx, &startTimeAdjuster{startTime: startTimestamp}, sink, labels.EmptyLabels(), receivertest.NewNopSettings(), nopObsRecv(t), false, enableNativeHistograms) + + _, err := tr.Append(0, labels.FromMap(map[string]string{ + model.MetricNameLabel: "counter_test", + }), time.Now().Unix()*1000, 1.0) + assert.NoError(t, err) +} + +func TestAppendExemplarWithNoMetricName(t *testing.T) { + for _, enableNativeHistograms := range []bool{true, false} { + t.Run(fmt.Sprintf("enableNativeHistograms=%v", enableNativeHistograms), func(t *testing.T) { + testAppendExemplarWithNoMetricName(t, enableNativeHistograms) + }) + } +} + +func testAppendExemplarWithNoMetricName(t *testing.T, enableNativeHistograms bool) { + sink := new(consumertest.MetricsSink) + tr := newTransaction(scrapeCtx, &startTimeAdjuster{startTime: startTimestamp}, sink, labels.EmptyLabels(), receivertest.NewNopSettings(), nopObsRecv(t), false, enableNativeHistograms) + + labels := labels.FromStrings( + model.InstanceLabel, "0.0.0.0:8855", + model.JobLabel, "test", + ) + + _, err := tr.AppendExemplar(0, labels, exemplar.Exemplar{Value: 0}) + assert.Equal(t, errMetricNameNotFound, err) +} + +func TestAppendExemplarWithEmptyMetricName(t *testing.T) { + for _, enableNativeHistograms := range []bool{true, false} { + t.Run(fmt.Sprintf("enableNativeHistograms=%v", enableNativeHistograms), func(t *testing.T) { + testAppendExemplarWithEmptyMetricName(t, enableNativeHistograms) + }) + } +} + +func testAppendExemplarWithEmptyMetricName(t *testing.T, enableNativeHistograms bool) { + sink := new(consumertest.MetricsSink) + tr := newTransaction(scrapeCtx, &startTimeAdjuster{startTime: startTimestamp}, sink, labels.EmptyLabels(), receivertest.NewNopSettings(), nopObsRecv(t), false, enableNativeHistograms) + + labels := labels.FromStrings( + model.InstanceLabel, "0.0.0.0:8855", + model.JobLabel, "test", + model.MetricNameLabel, "", + ) + _, err := tr.AppendExemplar(0, labels, exemplar.Exemplar{Value: 0}) + assert.Equal(t, errMetricNameNotFound, err) +} + +func TestAppendExemplarWithDuplicateLabels(t *testing.T) { + for _, enableNativeHistograms := range []bool{true, false} { + t.Run(fmt.Sprintf("enableNativeHistograms=%v", enableNativeHistograms), func(t *testing.T) { + testAppendExemplarWithDuplicateLabels(t, enableNativeHistograms) + }) + } +} + +func testAppendExemplarWithDuplicateLabels(t *testing.T, enableNativeHistograms bool) { + sink := new(consumertest.MetricsSink) + tr := newTransaction(scrapeCtx, &startTimeAdjuster{startTime: startTimestamp}, sink, labels.EmptyLabels(), receivertest.NewNopSettings(), nopObsRecv(t), false, enableNativeHistograms) + + labels := labels.FromStrings( + model.InstanceLabel, "0.0.0.0:8855", + model.JobLabel, "test", + model.MetricNameLabel, "", + "a", "b", + "a", "c", + ) + _, err := tr.AppendExemplar(0, labels, exemplar.Exemplar{Value: 0}) + assert.ErrorContains(t, err, `invalid sample: non-unique label names: "a"`) +} + +func TestAppendExemplarWithoutAddingMetric(t *testing.T) { + for _, enableNativeHistograms := range []bool{true, false} { + t.Run(fmt.Sprintf("enableNativeHistograms=%v", enableNativeHistograms), func(t *testing.T) { + testAppendExemplarWithoutAddingMetric(t, enableNativeHistograms) + }) + } +} + +func testAppendExemplarWithoutAddingMetric(t *testing.T, enableNativeHistograms bool) { + sink := new(consumertest.MetricsSink) + tr := newTransaction(scrapeCtx, &startTimeAdjuster{startTime: startTimestamp}, sink, labels.EmptyLabels(), receivertest.NewNopSettings(), nopObsRecv(t), false, enableNativeHistograms) + + labels := labels.FromStrings( + model.InstanceLabel, "0.0.0.0:8855", + model.JobLabel, "test", + model.MetricNameLabel, "counter_test", + "a", "b", + ) + _, err := tr.AppendExemplar(0, labels, exemplar.Exemplar{Value: 0}) + assert.NoError(t, err) +} + +func TestAppendExemplarWithNoLabels(t *testing.T) { + for _, enableNativeHistograms := range []bool{true, false} { + t.Run(fmt.Sprintf("enableNativeHistograms=%v", enableNativeHistograms), func(t *testing.T) { + testAppendExemplarWithNoLabels(t, enableNativeHistograms) + }) + } +} + +func testAppendExemplarWithNoLabels(t *testing.T, enableNativeHistograms bool) { + sink := new(consumertest.MetricsSink) + tr := newTransaction(scrapeCtx, &startTimeAdjuster{startTime: startTimestamp}, sink, labels.EmptyLabels(), receivertest.NewNopSettings(), nopObsRecv(t), false, enableNativeHistograms) + + _, err := tr.AppendExemplar(0, labels.EmptyLabels(), exemplar.Exemplar{Value: 0}) + assert.Equal(t, errNoJobInstance, err) +} + +func TestAppendExemplarWithEmptyLabelArray(t *testing.T) { + for _, enableNativeHistograms := range []bool{true, false} { + t.Run(fmt.Sprintf("enableNativeHistograms=%v", enableNativeHistograms), func(t *testing.T) { + testAppendExemplarWithEmptyLabelArray(t, enableNativeHistograms) + }) + } +} + +func testAppendExemplarWithEmptyLabelArray(t *testing.T, enableNativeHistograms bool) { + sink := new(consumertest.MetricsSink) + tr := newTransaction(scrapeCtx, &startTimeAdjuster{startTime: startTimestamp}, sink, labels.EmptyLabels(), receivertest.NewNopSettings(), nopObsRecv(t), false, enableNativeHistograms) + + _, err := tr.AppendExemplar(0, labels.FromStrings(), exemplar.Exemplar{Value: 0}) + assert.Equal(t, errNoJobInstance, err) +} + +func nopObsRecv(t *testing.T) *receiverhelper.ObsReport { + obsrecv, err := receiverhelper.NewObsReport(receiverhelper.ObsReportSettings{ + ReceiverID: component.MustNewID("prometheus"), + Transport: transport, + ReceiverCreateSettings: receivertest.NewNopSettings(), + }) + require.NoError(t, err) + return obsrecv +} + +func TestMetricBuilderCounters(t *testing.T) { + tests := []buildTestData{ + { + name: "single-item", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint("counter_test", 100, nil, "foo", "bar"), + }, + }, + }, + wants: func() []pmetric.Metrics { + md0 := pmetric.NewMetrics() + mL0 := md0.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m0 := mL0.AppendEmpty() + m0.SetName("counter_test") + m0.Metadata().PutStr("prometheus.type", "counter") + sum := m0.SetEmptySum() + sum.SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + sum.SetIsMonotonic(true) + pt0 := sum.DataPoints().AppendEmpty() + pt0.SetDoubleValue(100.0) + pt0.SetStartTimestamp(startTimestamp) + pt0.SetTimestamp(tsNanos) + pt0.Attributes().PutStr("foo", "bar") + + return []pmetric.Metrics{md0} + }, + }, + { + name: "single-item-with-exemplars", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint( + "counter_test", + 100, + []exemplar.Exemplar{ + { + Value: 1, + Ts: 1663113420863, + Labels: labels.New([]labels.Label{{Name: model.MetricNameLabel, Value: "counter_test"}, {Name: model.JobLabel, Value: "job"}, {Name: model.InstanceLabel, Value: "instance"}, {Name: "foo", Value: "bar"}}...), + }, + { + Value: 1, + Ts: 1663113420863, + Labels: labels.New([]labels.Label{{Name: "foo", Value: "bar"}, {Name: "trace_id", Value: ""}, {Name: "span_id", Value: ""}}...), + }, + { + Value: 1, + Ts: 1663113420863, + Labels: labels.New([]labels.Label{{Name: "foo", Value: "bar"}, {Name: "trace_id", Value: "10a47365b8aa04e08291fab9deca84db6170"}, {Name: "span_id", Value: "719cee4a669fd7d109ff"}}...), + }, + { + Value: 1, + Ts: 1663113420863, + Labels: labels.New([]labels.Label{{Name: "foo", Value: "bar"}, {Name: "trace_id", Value: "174137cab66dc880"}, {Name: "span_id", Value: "dfa4597a9d"}}...), + }, + }, + "foo", "bar"), + }, + }, + }, + wants: func() []pmetric.Metrics { + md0 := pmetric.NewMetrics() + mL0 := md0.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m0 := mL0.AppendEmpty() + m0.SetName("counter_test") + m0.Metadata().PutStr("prometheus.type", "counter") + sum := m0.SetEmptySum() + sum.SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + sum.SetIsMonotonic(true) + pt0 := sum.DataPoints().AppendEmpty() + pt0.SetDoubleValue(100.0) + pt0.SetStartTimestamp(startTimestamp) + pt0.SetTimestamp(tsNanos) + pt0.Attributes().PutStr("foo", "bar") + + e0 := pt0.Exemplars().AppendEmpty() + e0.SetTimestamp(timestampFromMs(1663113420863)) + e0.SetDoubleValue(1) + e0.FilteredAttributes().PutStr(model.MetricNameLabel, "counter_test") + e0.FilteredAttributes().PutStr("foo", "bar") + e0.FilteredAttributes().PutStr(model.InstanceLabel, "instance") + e0.FilteredAttributes().PutStr(model.JobLabel, "job") + + e1 := pt0.Exemplars().AppendEmpty() + e1.SetTimestamp(timestampFromMs(1663113420863)) + e1.SetDoubleValue(1) + e1.FilteredAttributes().PutStr("foo", "bar") + + e2 := pt0.Exemplars().AppendEmpty() + e2.SetTimestamp(timestampFromMs(1663113420863)) + e2.SetDoubleValue(1) + e2.FilteredAttributes().PutStr("foo", "bar") + e2.SetTraceID([16]byte{0x10, 0xa4, 0x73, 0x65, 0xb8, 0xaa, 0x04, 0xe0, 0x82, 0x91, 0xfa, 0xb9, 0xde, 0xca, 0x84, 0xdb}) + e2.SetSpanID([8]byte{0x71, 0x9c, 0xee, 0x4a, 0x66, 0x9f, 0xd7, 0xd1}) + + e3 := pt0.Exemplars().AppendEmpty() + e3.SetTimestamp(timestampFromMs(1663113420863)) + e3.SetDoubleValue(1) + e3.FilteredAttributes().PutStr("foo", "bar") + e3.SetTraceID([16]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x41, 0x37, 0xca, 0xb6, 0x6d, 0xc8, 0x80}) + e3.SetSpanID([8]byte{0x00, 0x00, 0x00, 0xdf, 0xa4, 0x59, 0x7a, 0x9d}) + + return []pmetric.Metrics{md0} + }, + }, + { + name: "two-items", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint("counter_test", 150, nil, "foo", "bar"), + createDataPoint("counter_test", 25, nil, "foo", "other"), + }, + }, + }, + wants: func() []pmetric.Metrics { + md0 := pmetric.NewMetrics() + mL0 := md0.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m0 := mL0.AppendEmpty() + m0.SetName("counter_test") + m0.Metadata().PutStr("prometheus.type", "counter") + sum := m0.SetEmptySum() + sum.SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + sum.SetIsMonotonic(true) + pt0 := sum.DataPoints().AppendEmpty() + pt0.SetDoubleValue(150.0) + pt0.SetStartTimestamp(startTimestamp) + pt0.SetTimestamp(tsNanos) + pt0.Attributes().PutStr("foo", "bar") + + pt1 := sum.DataPoints().AppendEmpty() + pt1.SetDoubleValue(25.0) + pt1.SetStartTimestamp(startTimestamp) + pt1.SetTimestamp(tsNanos) + pt1.Attributes().PutStr("foo", "other") + + return []pmetric.Metrics{md0} + }, + }, + { + name: "two-metrics", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint("counter_test", 150, nil, "foo", "bar"), + createDataPoint("counter_test", 25, nil, "foo", "other"), + createDataPoint("counter_test2", 100, nil, "foo", "bar"), + }, + }, + }, + wants: func() []pmetric.Metrics { + md0 := pmetric.NewMetrics() + mL0 := md0.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m0 := mL0.AppendEmpty() + m0.SetName("counter_test") + m0.Metadata().PutStr("prometheus.type", "counter") + sum0 := m0.SetEmptySum() + sum0.SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + sum0.SetIsMonotonic(true) + pt0 := sum0.DataPoints().AppendEmpty() + pt0.SetDoubleValue(150.0) + pt0.SetStartTimestamp(startTimestamp) + pt0.SetTimestamp(tsNanos) + pt0.Attributes().PutStr("foo", "bar") + + pt1 := sum0.DataPoints().AppendEmpty() + pt1.SetDoubleValue(25.0) + pt1.SetStartTimestamp(startTimestamp) + pt1.SetTimestamp(tsNanos) + pt1.Attributes().PutStr("foo", "other") + + m1 := mL0.AppendEmpty() + m1.SetName("counter_test2") + m1.Metadata().PutStr("prometheus.type", "counter") + sum1 := m1.SetEmptySum() + sum1.SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + sum1.SetIsMonotonic(true) + pt2 := sum1.DataPoints().AppendEmpty() + pt2.SetDoubleValue(100.0) + pt2.SetStartTimestamp(startTimestamp) + pt2.SetTimestamp(tsNanos) + pt2.Attributes().PutStr("foo", "bar") + + return []pmetric.Metrics{md0} + }, + }, + { + name: "metrics-with-poor-names", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint("poor_name_count", 100, nil, "foo", "bar"), + }, + }, + }, + wants: func() []pmetric.Metrics { + md0 := pmetric.NewMetrics() + mL0 := md0.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m0 := mL0.AppendEmpty() + m0.SetName("poor_name_count") + m0.Metadata().PutStr("prometheus.type", "counter") + sum := m0.SetEmptySum() + sum.SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + sum.SetIsMonotonic(true) + pt0 := sum.DataPoints().AppendEmpty() + pt0.SetDoubleValue(100.0) + pt0.SetStartTimestamp(startTimestamp) + pt0.SetTimestamp(tsNanos) + pt0.Attributes().PutStr("foo", "bar") + + return []pmetric.Metrics{md0} + }, + }, + } + + for _, tt := range tests { + for _, enableNativeHistograms := range []bool{true, false} { + t.Run(fmt.Sprintf("%s/enableNativeHistograms=%v", tt.name, enableNativeHistograms), func(t *testing.T) { + tt.run(t, enableNativeHistograms) + }) + } + } +} + +func TestMetricBuilderGauges(t *testing.T) { + tests := []buildTestData{ + { + name: "one-gauge", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint("gauge_test", 100, nil, "foo", "bar"), + }, + }, + { + pts: []*testDataPoint{ + createDataPoint("gauge_test", 90, nil, "foo", "bar"), + }, + }, + }, + wants: func() []pmetric.Metrics { + md0 := pmetric.NewMetrics() + mL0 := md0.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m0 := mL0.AppendEmpty() + m0.SetName("gauge_test") + m0.Metadata().PutStr("prometheus.type", "gauge") + gauge0 := m0.SetEmptyGauge() + pt0 := gauge0.DataPoints().AppendEmpty() + pt0.SetDoubleValue(100.0) + pt0.SetStartTimestamp(0) + pt0.SetTimestamp(tsNanos) + pt0.Attributes().PutStr("foo", "bar") + + md1 := pmetric.NewMetrics() + mL1 := md1.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m1 := mL1.AppendEmpty() + m1.SetName("gauge_test") + m1.Metadata().PutStr("prometheus.type", "gauge") + gauge1 := m1.SetEmptyGauge() + pt1 := gauge1.DataPoints().AppendEmpty() + pt1.SetDoubleValue(90.0) + pt1.SetStartTimestamp(0) + pt1.SetTimestamp(tsPlusIntervalNanos) + pt1.Attributes().PutStr("foo", "bar") + + return []pmetric.Metrics{md0, md1} + }, + }, + { + name: "one-gauge-with-exemplars", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint( + "gauge_test", + 100, + []exemplar.Exemplar{ + { + Value: 2, + Ts: 1663350815890, + Labels: labels.New([]labels.Label{{Name: model.MetricNameLabel, Value: "counter_test"}, {Name: model.JobLabel, Value: "job"}, {Name: model.InstanceLabel, Value: "instance"}, {Name: "foo", Value: "bar"}}...), + }, + { + Value: 2, + Ts: 1663350815890, + Labels: labels.New([]labels.Label{{Name: "foo", Value: "bar"}, {Name: "trace_id", Value: ""}, {Name: "span_id", Value: ""}}...), + }, + { + Value: 2, + Ts: 1663350815890, + Labels: labels.New([]labels.Label{{Name: "foo", Value: "bar"}, {Name: "trace_id", Value: "10a47365b8aa04e08291fab9deca84db6170"}, {Name: "span_id", Value: "719cee4a669fd7d109ff"}}...), + }, + { + Value: 2, + Ts: 1663350815890, + Labels: labels.New([]labels.Label{{Name: "foo", Value: "bar"}, {Name: "trace_id", Value: "174137cab66dc880"}, {Name: "span_id", Value: "dfa4597a9d"}}...), + }, + }, + "foo", "bar"), + }, + }, + { + pts: []*testDataPoint{ + createDataPoint("gauge_test", 90, nil, "foo", "bar"), + }, + }, + }, + wants: func() []pmetric.Metrics { + md0 := pmetric.NewMetrics() + mL0 := md0.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m0 := mL0.AppendEmpty() + m0.SetName("gauge_test") + m0.Metadata().PutStr("prometheus.type", "gauge") + gauge0 := m0.SetEmptyGauge() + pt0 := gauge0.DataPoints().AppendEmpty() + pt0.SetDoubleValue(100.0) + pt0.SetStartTimestamp(0) + pt0.SetTimestamp(tsNanos) + pt0.Attributes().PutStr("foo", "bar") + + e0 := pt0.Exemplars().AppendEmpty() + e0.SetTimestamp(timestampFromMs(1663350815890)) + e0.SetDoubleValue(2) + e0.FilteredAttributes().PutStr(model.MetricNameLabel, "counter_test") + e0.FilteredAttributes().PutStr("foo", "bar") + e0.FilteredAttributes().PutStr(model.InstanceLabel, "instance") + e0.FilteredAttributes().PutStr(model.JobLabel, "job") + + e1 := pt0.Exemplars().AppendEmpty() + e1.SetTimestamp(timestampFromMs(1663350815890)) + e1.SetDoubleValue(2) + e1.FilteredAttributes().PutStr("foo", "bar") + + e2 := pt0.Exemplars().AppendEmpty() + e2.SetTimestamp(timestampFromMs(1663350815890)) + e2.SetDoubleValue(2) + e2.FilteredAttributes().PutStr("foo", "bar") + e2.SetTraceID([16]byte{0x10, 0xa4, 0x73, 0x65, 0xb8, 0xaa, 0x04, 0xe0, 0x82, 0x91, 0xfa, 0xb9, 0xde, 0xca, 0x84, 0xdb}) + e2.SetSpanID([8]byte{0x71, 0x9c, 0xee, 0x4a, 0x66, 0x9f, 0xd7, 0xd1}) + + e3 := pt0.Exemplars().AppendEmpty() + e3.SetTimestamp(timestampFromMs(1663350815890)) + e3.SetDoubleValue(2) + e3.FilteredAttributes().PutStr("foo", "bar") + e3.SetTraceID([16]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x41, 0x37, 0xca, 0xb6, 0x6d, 0xc8, 0x80}) + e3.SetSpanID([8]byte{0x00, 0x00, 0x00, 0xdf, 0xa4, 0x59, 0x7a, 0x9d}) + + md1 := pmetric.NewMetrics() + mL1 := md1.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m1 := mL1.AppendEmpty() + m1.SetName("gauge_test") + m1.Metadata().PutStr("prometheus.type", "gauge") + gauge1 := m1.SetEmptyGauge() + pt1 := gauge1.DataPoints().AppendEmpty() + pt1.SetDoubleValue(90.0) + pt1.SetStartTimestamp(0) + pt1.SetTimestamp(tsPlusIntervalNanos) + pt1.Attributes().PutStr("foo", "bar") + + return []pmetric.Metrics{md0, md1} + }, + }, + { + name: "gauge-with-different-tags", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint("gauge_test", 100, nil, "foo", "bar"), + createDataPoint("gauge_test", 200, nil, "bar", "foo"), + }, + }, + }, + wants: func() []pmetric.Metrics { + md0 := pmetric.NewMetrics() + mL0 := md0.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m0 := mL0.AppendEmpty() + m0.SetName("gauge_test") + m0.Metadata().PutStr("prometheus.type", "gauge") + gauge0 := m0.SetEmptyGauge() + pt0 := gauge0.DataPoints().AppendEmpty() + pt0.SetDoubleValue(100.0) + pt0.SetStartTimestamp(0) + pt0.SetTimestamp(tsNanos) + pt0.Attributes().PutStr("foo", "bar") + + pt1 := gauge0.DataPoints().AppendEmpty() + pt1.SetDoubleValue(200.0) + pt1.SetStartTimestamp(0) + pt1.SetTimestamp(tsNanos) + pt1.Attributes().PutStr("bar", "foo") + + return []pmetric.Metrics{md0} + }, + }, + { + // TODO: A decision need to be made. If we want to have the behavior which can generate different tag key + // sets because metrics come and go + name: "gauge-comes-and-go-with-different-tagset", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint("gauge_test", 100, nil, "foo", "bar"), + createDataPoint("gauge_test", 200, nil, "bar", "foo"), + }, + }, + { + pts: []*testDataPoint{ + createDataPoint("gauge_test", 20, nil, "foo", "bar"), + }, + }, + }, + wants: func() []pmetric.Metrics { + md0 := pmetric.NewMetrics() + mL0 := md0.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m0 := mL0.AppendEmpty() + m0.SetName("gauge_test") + m0.Metadata().PutStr("prometheus.type", "gauge") + gauge0 := m0.SetEmptyGauge() + pt0 := gauge0.DataPoints().AppendEmpty() + pt0.SetDoubleValue(100.0) + pt0.SetStartTimestamp(0) + pt0.SetTimestamp(tsNanos) + pt0.Attributes().PutStr("foo", "bar") + + pt1 := gauge0.DataPoints().AppendEmpty() + pt1.SetDoubleValue(200.0) + pt1.SetStartTimestamp(0) + pt1.SetTimestamp(tsNanos) + pt1.Attributes().PutStr("bar", "foo") + + md1 := pmetric.NewMetrics() + mL1 := md1.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m1 := mL1.AppendEmpty() + m1.SetName("gauge_test") + m1.Metadata().PutStr("prometheus.type", "gauge") + gauge1 := m1.SetEmptyGauge() + pt2 := gauge1.DataPoints().AppendEmpty() + pt2.SetDoubleValue(20.0) + pt2.SetStartTimestamp(0) + pt2.SetTimestamp(tsPlusIntervalNanos) + pt2.Attributes().PutStr("foo", "bar") + + return []pmetric.Metrics{md0, md1} + }, + }, + } + + for _, tt := range tests { + for _, enableNativeHistograms := range []bool{true, false} { + t.Run(fmt.Sprintf("%s/enableNativeHistograms=%v", tt.name, enableNativeHistograms), func(t *testing.T) { + tt.run(t, enableNativeHistograms) + }) + } + } +} + +func TestMetricBuilderUntyped(t *testing.T) { + tests := []buildTestData{ + { + name: "one-unknown", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint("unknown_test", 100, nil, "foo", "bar"), + }, + }, + }, + wants: func() []pmetric.Metrics { + md0 := pmetric.NewMetrics() + mL0 := md0.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m0 := mL0.AppendEmpty() + m0.SetName("unknown_test") + m0.Metadata().PutStr("prometheus.type", "unknown") + gauge0 := m0.SetEmptyGauge() + pt0 := gauge0.DataPoints().AppendEmpty() + pt0.SetDoubleValue(100.0) + pt0.SetStartTimestamp(0) + pt0.SetTimestamp(tsNanos) + pt0.Attributes().PutStr("foo", "bar") + + return []pmetric.Metrics{md0} + }, + }, + { + name: "no-type-hint", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint("something_not_exists", 100, nil, "foo", "bar"), + createDataPoint("theother_not_exists", 200, nil, "foo", "bar"), + createDataPoint("theother_not_exists", 300, nil, "bar", "foo"), + }, + }, + }, + wants: func() []pmetric.Metrics { + md0 := pmetric.NewMetrics() + mL0 := md0.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m0 := mL0.AppendEmpty() + m0.SetName("something_not_exists") + m0.Metadata().PutStr("prometheus.type", "unknown") + gauge0 := m0.SetEmptyGauge() + pt0 := gauge0.DataPoints().AppendEmpty() + pt0.SetDoubleValue(100.0) + pt0.SetTimestamp(tsNanos) + pt0.Attributes().PutStr("foo", "bar") + + m1 := mL0.AppendEmpty() + m1.SetName("theother_not_exists") + m1.Metadata().PutStr("prometheus.type", "unknown") + gauge1 := m1.SetEmptyGauge() + pt1 := gauge1.DataPoints().AppendEmpty() + pt1.SetDoubleValue(200.0) + pt1.SetTimestamp(tsNanos) + pt1.Attributes().PutStr("foo", "bar") + + pt2 := gauge1.DataPoints().AppendEmpty() + pt2.SetDoubleValue(300.0) + pt2.SetTimestamp(tsNanos) + pt2.Attributes().PutStr("bar", "foo") + + return []pmetric.Metrics{md0} + }, + }, + { + name: "untype-metric-poor-names", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint("some_count", 100, nil, "foo", "bar"), + }, + }, + }, + wants: func() []pmetric.Metrics { + md0 := pmetric.NewMetrics() + mL0 := md0.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m0 := mL0.AppendEmpty() + m0.SetName("some_count") + m0.Metadata().PutStr("prometheus.type", "unknown") + gauge0 := m0.SetEmptyGauge() + pt0 := gauge0.DataPoints().AppendEmpty() + pt0.SetDoubleValue(100.0) + pt0.SetTimestamp(tsNanos) + pt0.Attributes().PutStr("foo", "bar") + + return []pmetric.Metrics{md0} + }, + }, + } + + for _, tt := range tests { + for _, enableNativeHistograms := range []bool{true, false} { + t.Run(fmt.Sprintf("%s/enableNativeHistograms=%v", tt.name, enableNativeHistograms), func(t *testing.T) { + tt.run(t, enableNativeHistograms) + }) + } + } +} + +func TestMetricBuilderHistogram(t *testing.T) { + tests := []buildTestData{ + { + name: "single item", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint("hist_test_bucket", 1, nil, "foo", "bar", "le", "10"), + createDataPoint("hist_test_bucket", 2, nil, "foo", "bar", "le", "20"), + createDataPoint("hist_test_bucket", 10, nil, "foo", "bar", "le", "+inf"), + createDataPoint("hist_test_sum", 99, nil, "foo", "bar"), + createDataPoint("hist_test_count", 10, nil, "foo", "bar"), + }, + }, + }, + wants: func() []pmetric.Metrics { + md0 := pmetric.NewMetrics() + mL0 := md0.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m0 := mL0.AppendEmpty() + m0.SetName("hist_test") + m0.Metadata().PutStr("prometheus.type", "histogram") + hist0 := m0.SetEmptyHistogram() + hist0.SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + pt0 := hist0.DataPoints().AppendEmpty() + pt0.SetCount(10) + pt0.SetSum(99) + pt0.ExplicitBounds().FromRaw([]float64{10, 20}) + pt0.BucketCounts().FromRaw([]uint64{1, 1, 8}) + pt0.SetTimestamp(tsNanos) + pt0.SetStartTimestamp(startTimestamp) + pt0.Attributes().PutStr("foo", "bar") + + return []pmetric.Metrics{md0} + }, + }, + { + name: "single item with exemplars", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint( + "hist_test_bucket", + 1, + []exemplar.Exemplar{ + { + Value: 1, + Ts: 1663113420863, + Labels: labels.New([]labels.Label{{Name: model.MetricNameLabel, Value: "counter_test"}, {Name: model.JobLabel, Value: "job"}, {Name: model.InstanceLabel, Value: "instance"}, {Name: "foo", Value: "bar"}}...), + }, + { + Value: 1, + Ts: 1663113420863, + Labels: labels.New([]labels.Label{{Name: "foo", Value: "bar"}, {Name: "trace_id", Value: ""}, {Name: "span_id", Value: ""}, {Name: "le", Value: "20"}}...), + }, + { + Value: 1, + Ts: 1663113420863, + Labels: labels.New([]labels.Label{{Name: "foo", Value: "bar"}, {Name: "trace_id", Value: "10a47365b8aa04e08291fab9deca84db6170"}, {Name: "traceid", Value: "e3688e1aa2961786"}, {Name: "span_id", Value: "719cee4a669fd7d109ff"}}...), + }, + { + Value: 1, + Ts: 1663113420863, + Labels: labels.New([]labels.Label{{Name: "foo", Value: "bar"}, {Name: "trace_id", Value: "174137cab66dc880"}, {Name: "span_id", Value: "dfa4597a9d"}}...), + }, + { + Value: 1, + Ts: 1663113420863, + Labels: labels.New([]labels.Label{{Name: "foo", Value: "bar"}, {Name: "trace_id", Value: "174137cab66dc88"}, {Name: "span_id", Value: "dfa4597a9"}}...), + }, + }, + "foo", "bar", "le", "10"), + createDataPoint("hist_test_bucket", 2, nil, "foo", "bar", "le", "20"), + createDataPoint("hist_test_bucket", 10, nil, "foo", "bar", "le", "+inf"), + createDataPoint("hist_test_sum", 99, nil, "foo", "bar"), + createDataPoint("hist_test_count", 10, nil, "foo", "bar"), + }, + }, + }, + wants: func() []pmetric.Metrics { + md0 := pmetric.NewMetrics() + mL0 := md0.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m0 := mL0.AppendEmpty() + m0.SetName("hist_test") + m0.Metadata().PutStr("prometheus.type", "histogram") + hist0 := m0.SetEmptyHistogram() + hist0.SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + pt0 := hist0.DataPoints().AppendEmpty() + pt0.SetCount(10) + pt0.SetSum(99) + pt0.ExplicitBounds().FromRaw([]float64{10, 20}) + pt0.BucketCounts().FromRaw([]uint64{1, 1, 8}) + pt0.SetTimestamp(tsNanos) + pt0.SetStartTimestamp(startTimestamp) + pt0.Attributes().PutStr("foo", "bar") + + e0 := pt0.Exemplars().AppendEmpty() + e0.SetTimestamp(timestampFromMs(1663113420863)) + e0.SetDoubleValue(1) + e0.FilteredAttributes().PutStr(model.MetricNameLabel, "counter_test") + e0.FilteredAttributes().PutStr("foo", "bar") + e0.FilteredAttributes().PutStr(model.InstanceLabel, "instance") + e0.FilteredAttributes().PutStr(model.JobLabel, "job") + + e1 := pt0.Exemplars().AppendEmpty() + e1.SetTimestamp(timestampFromMs(1663113420863)) + e1.SetDoubleValue(1) + e1.FilteredAttributes().PutStr("foo", "bar") + e1.FilteredAttributes().PutStr("le", "20") + + e2 := pt0.Exemplars().AppendEmpty() + e2.SetTimestamp(timestampFromMs(1663113420863)) + e2.SetDoubleValue(1) + e2.FilteredAttributes().PutStr("foo", "bar") + e2.FilteredAttributes().PutStr("traceid", "e3688e1aa2961786") + e2.SetTraceID([16]byte{0x10, 0xa4, 0x73, 0x65, 0xb8, 0xaa, 0x04, 0xe0, 0x82, 0x91, 0xfa, 0xb9, 0xde, 0xca, 0x84, 0xdb}) + e2.SetSpanID([8]byte{0x71, 0x9c, 0xee, 0x4a, 0x66, 0x9f, 0xd7, 0xd1}) + + e3 := pt0.Exemplars().AppendEmpty() + e3.SetTimestamp(timestampFromMs(1663113420863)) + e3.SetDoubleValue(1) + e3.FilteredAttributes().PutStr("foo", "bar") + e3.SetTraceID([16]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x41, 0x37, 0xca, 0xb6, 0x6d, 0xc8, 0x80}) + e3.SetSpanID([8]byte{0x00, 0x00, 0x00, 0xdf, 0xa4, 0x59, 0x7a, 0x9d}) + + e4 := pt0.Exemplars().AppendEmpty() + e4.SetTimestamp(timestampFromMs(1663113420863)) + e4.SetDoubleValue(1) + e4.FilteredAttributes().PutStr("foo", "bar") + e4.FilteredAttributes().PutStr("span_id", "dfa4597a9") + e4.FilteredAttributes().PutStr("trace_id", "174137cab66dc88") + + return []pmetric.Metrics{md0} + }, + }, + { + name: "multi-groups", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint("hist_test_bucket", 1, nil, "foo", "bar", "le", "10"), + createDataPoint("hist_test_bucket", 2, nil, "foo", "bar", "le", "20"), + createDataPoint("hist_test_bucket", 10, nil, "foo", "bar", "le", "+inf"), + createDataPoint("hist_test_sum", 99, nil, "foo", "bar"), + createDataPoint("hist_test_count", 10, nil, "foo", "bar"), + createDataPoint("hist_test_bucket", 1, nil, "key2", "v2", "le", "10"), + createDataPoint("hist_test_bucket", 2, nil, "key2", "v2", "le", "20"), + createDataPoint("hist_test_bucket", 3, nil, "key2", "v2", "le", "+inf"), + createDataPoint("hist_test_sum", 50, nil, "key2", "v2"), + createDataPoint("hist_test_count", 3, nil, "key2", "v2"), + }, + }, + }, + wants: func() []pmetric.Metrics { + md0 := pmetric.NewMetrics() + mL0 := md0.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m0 := mL0.AppendEmpty() + m0.SetName("hist_test") + m0.Metadata().PutStr("prometheus.type", "histogram") + hist0 := m0.SetEmptyHistogram() + hist0.SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + pt0 := hist0.DataPoints().AppendEmpty() + pt0.SetCount(10) + pt0.SetSum(99) + pt0.ExplicitBounds().FromRaw([]float64{10, 20}) + pt0.BucketCounts().FromRaw([]uint64{1, 1, 8}) + pt0.SetTimestamp(tsNanos) + pt0.SetStartTimestamp(startTimestamp) + pt0.Attributes().PutStr("foo", "bar") + + pt1 := hist0.DataPoints().AppendEmpty() + pt1.SetCount(3) + pt1.SetSum(50) + pt1.ExplicitBounds().FromRaw([]float64{10, 20}) + pt1.BucketCounts().FromRaw([]uint64{1, 1, 1}) + pt1.SetTimestamp(tsNanos) + pt1.SetStartTimestamp(startTimestamp) + pt1.Attributes().PutStr("key2", "v2") + + return []pmetric.Metrics{md0} + }, + }, + { + name: "multi-groups-and-families", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint("hist_test_bucket", 1, nil, "foo", "bar", "le", "10"), + createDataPoint("hist_test_bucket", 2, nil, "foo", "bar", "le", "20"), + createDataPoint("hist_test_bucket", 10, nil, "foo", "bar", "le", "+inf"), + createDataPoint("hist_test_sum", 99, nil, "foo", "bar"), + createDataPoint("hist_test_count", 10, nil, "foo", "bar"), + createDataPoint("hist_test_bucket", 1, nil, "key2", "v2", "le", "10"), + createDataPoint("hist_test_bucket", 2, nil, "key2", "v2", "le", "20"), + createDataPoint("hist_test_bucket", 3, nil, "key2", "v2", "le", "+inf"), + createDataPoint("hist_test_sum", 50, nil, "key2", "v2"), + createDataPoint("hist_test_count", 3, nil, "key2", "v2"), + createDataPoint("hist_test2_bucket", 1, nil, "foo", "bar", "le", "10"), + createDataPoint("hist_test2_bucket", 2, nil, "foo", "bar", "le", "20"), + createDataPoint("hist_test2_bucket", 3, nil, "foo", "bar", "le", "+inf"), + createDataPoint("hist_test2_sum", 50, nil, "foo", "bar"), + createDataPoint("hist_test2_count", 3, nil, "foo", "bar"), + }, + }, + }, + wants: func() []pmetric.Metrics { + md0 := pmetric.NewMetrics() + mL0 := md0.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m0 := mL0.AppendEmpty() + m0.SetName("hist_test") + m0.Metadata().PutStr("prometheus.type", "histogram") + hist0 := m0.SetEmptyHistogram() + hist0.SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + pt0 := hist0.DataPoints().AppendEmpty() + pt0.SetCount(10) + pt0.SetSum(99) + pt0.ExplicitBounds().FromRaw([]float64{10, 20}) + pt0.BucketCounts().FromRaw([]uint64{1, 1, 8}) + pt0.SetTimestamp(tsNanos) + pt0.SetStartTimestamp(startTimestamp) + pt0.Attributes().PutStr("foo", "bar") + + pt1 := hist0.DataPoints().AppendEmpty() + pt1.SetCount(3) + pt1.SetSum(50) + pt1.ExplicitBounds().FromRaw([]float64{10, 20}) + pt1.BucketCounts().FromRaw([]uint64{1, 1, 1}) + pt1.SetTimestamp(tsNanos) + pt1.SetStartTimestamp(startTimestamp) + pt1.Attributes().PutStr("key2", "v2") + + m1 := mL0.AppendEmpty() + m1.SetName("hist_test2") + m1.Metadata().PutStr("prometheus.type", "histogram") + hist1 := m1.SetEmptyHistogram() + hist1.SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + pt2 := hist1.DataPoints().AppendEmpty() + pt2.SetCount(3) + pt2.SetSum(50) + pt2.ExplicitBounds().FromRaw([]float64{10, 20}) + pt2.BucketCounts().FromRaw([]uint64{1, 1, 1}) + pt2.SetTimestamp(tsNanos) + pt2.SetStartTimestamp(startTimestamp) + pt2.Attributes().PutStr("foo", "bar") + + return []pmetric.Metrics{md0} + }, + }, + { + name: "unordered-buckets", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint("hist_test_bucket", 10, nil, "foo", "bar", "le", "+inf"), + createDataPoint("hist_test_bucket", 1, nil, "foo", "bar", "le", "10"), + createDataPoint("hist_test_bucket", 2, nil, "foo", "bar", "le", "20"), + createDataPoint("hist_test_sum", 99, nil, "foo", "bar"), + createDataPoint("hist_test_count", 10, nil, "foo", "bar"), + }, + }, + }, + wants: func() []pmetric.Metrics { + md0 := pmetric.NewMetrics() + mL0 := md0.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m0 := mL0.AppendEmpty() + m0.SetName("hist_test") + m0.Metadata().PutStr("prometheus.type", "histogram") + hist0 := m0.SetEmptyHistogram() + hist0.SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + pt0 := hist0.DataPoints().AppendEmpty() + pt0.SetCount(10) + pt0.SetSum(99) + pt0.ExplicitBounds().FromRaw([]float64{10, 20}) + pt0.BucketCounts().FromRaw([]uint64{1, 1, 8}) + pt0.SetTimestamp(tsNanos) + pt0.SetStartTimestamp(startTimestamp) + pt0.Attributes().PutStr("foo", "bar") + + return []pmetric.Metrics{md0} + }, + }, + { + // this won't likely happen in real env, as prometheus wont generate histogram with less than 3 buckets + name: "only-one-bucket", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint("hist_test_bucket", 3, nil, "foo", "bar", "le", "+inf"), + createDataPoint("hist_test_count", 3, nil, "foo", "bar"), + createDataPoint("hist_test_sum", 100, nil, "foo", "bar"), + }, + }, + }, + wants: func() []pmetric.Metrics { + md0 := pmetric.NewMetrics() + mL0 := md0.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m0 := mL0.AppendEmpty() + m0.SetName("hist_test") + m0.Metadata().PutStr("prometheus.type", "histogram") + hist0 := m0.SetEmptyHistogram() + hist0.SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + pt0 := hist0.DataPoints().AppendEmpty() + pt0.SetCount(3) + pt0.SetSum(100) + pt0.BucketCounts().FromRaw([]uint64{3}) + pt0.SetTimestamp(tsNanos) + pt0.SetStartTimestamp(startTimestamp) + pt0.Attributes().PutStr("foo", "bar") + + return []pmetric.Metrics{md0} + }, + }, + { + // this won't likely happen in real env, as prometheus wont generate histogram with less than 3 buckets + name: "only-one-bucket-noninf", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint("hist_test_bucket", 3, nil, "foo", "bar", "le", "20"), + createDataPoint("hist_test_count", 3, nil, "foo", "bar"), + createDataPoint("hist_test_sum", 100, nil, "foo", "bar"), + }, + }, + }, + wants: func() []pmetric.Metrics { + md0 := pmetric.NewMetrics() + mL0 := md0.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m0 := mL0.AppendEmpty() + m0.SetName("hist_test") + m0.Metadata().PutStr("prometheus.type", "histogram") + hist0 := m0.SetEmptyHistogram() + hist0.SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + pt0 := hist0.DataPoints().AppendEmpty() + pt0.SetCount(3) + pt0.SetSum(100) + pt0.BucketCounts().FromRaw([]uint64{3, 0}) + pt0.ExplicitBounds().FromRaw([]float64{20}) + pt0.SetTimestamp(tsNanos) + pt0.SetStartTimestamp(startTimestamp) + pt0.Attributes().PutStr("foo", "bar") + + return []pmetric.Metrics{md0} + }, + }, + { + name: "no-sum", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint("hist_test_bucket", 1, nil, "foo", "bar", "le", "10"), + createDataPoint("hist_test_bucket", 2, nil, "foo", "bar", "le", "20"), + createDataPoint("hist_test_bucket", 3, nil, "foo", "bar", "le", "+inf"), + createDataPoint("hist_test_count", 3, nil, "foo", "bar"), + }, + }, + }, + wants: func() []pmetric.Metrics { + md0 := pmetric.NewMetrics() + mL0 := md0.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m0 := mL0.AppendEmpty() + m0.SetName("hist_test") + m0.Metadata().PutStr("prometheus.type", "histogram") + hist0 := m0.SetEmptyHistogram() + hist0.SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + pt0 := hist0.DataPoints().AppendEmpty() + pt0.SetCount(3) + pt0.ExplicitBounds().FromRaw([]float64{10, 20}) + pt0.BucketCounts().FromRaw([]uint64{1, 1, 1}) + pt0.SetTimestamp(tsNanos) + pt0.SetStartTimestamp(startTimestamp) + pt0.Attributes().PutStr("foo", "bar") + + return []pmetric.Metrics{md0} + }, + }, + { + name: "corrupted-no-buckets", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint("hist_test_sum", 99, nil, "foo", "bar"), + createDataPoint("hist_test_count", 10, nil, "foo", "bar"), + }, + }, + }, + wants: func() []pmetric.Metrics { + md0 := pmetric.NewMetrics() + mL0 := md0.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m0 := mL0.AppendEmpty() + m0.SetName("hist_test") + m0.Metadata().PutStr("prometheus.type", "histogram") + hist0 := m0.SetEmptyHistogram() + hist0.SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + pt0 := hist0.DataPoints().AppendEmpty() + pt0.SetCount(10) + pt0.SetSum(99) + pt0.BucketCounts().FromRaw([]uint64{10}) + pt0.SetTimestamp(tsNanos) + pt0.SetStartTimestamp(startTimestamp) + pt0.Attributes().PutStr("foo", "bar") + + return []pmetric.Metrics{md0} + }, + }, + { + name: "corrupted-no-count", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint("hist_test_bucket", 1, nil, "foo", "bar", "le", "10"), + createDataPoint("hist_test_bucket", 2, nil, "foo", "bar", "le", "20"), + createDataPoint("hist_test_bucket", 3, nil, "foo", "bar", "le", "+inf"), + createDataPoint("hist_test_sum", 99, nil, "foo", "bar"), + }, + }, + }, + wants: func() []pmetric.Metrics { + return []pmetric.Metrics{pmetric.NewMetrics()} + }, + }, + } + + for _, tt := range tests { + for _, enableNativeHistograms := range []bool{true, false} { + // None of the histograms above have native histogram versions, so enabling native hisotgrams has no effect. + t.Run(fmt.Sprintf("%s/enableNativeHistograms=%v", tt.name, enableNativeHistograms), func(t *testing.T) { + tt.run(t, enableNativeHistograms) + }) + } + } +} + +func TestMetricBuilderSummary(t *testing.T) { + tests := []buildTestData{ + { + name: "no-sum-and-count", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint("summary_test", 5, nil, "foo", "bar", "quantile", "1"), + }, + }, + }, + wants: func() []pmetric.Metrics { + return []pmetric.Metrics{pmetric.NewMetrics()} + }, + }, + { + name: "no-count", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint("summary_test", 1, nil, "foo", "bar", "quantile", "0.5"), + createDataPoint("summary_test", 2, nil, "foo", "bar", "quantile", "0.75"), + createDataPoint("summary_test", 5, nil, "foo", "bar", "quantile", "1"), + createDataPoint("summary_test_sum", 500, nil, "foo", "bar"), + }, + }, + }, + wants: func() []pmetric.Metrics { + return []pmetric.Metrics{pmetric.NewMetrics()} + }, + }, + { + name: "no-sum", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint("summary_test", 1, nil, "foo", "bar", "quantile", "0.5"), + createDataPoint("summary_test", 2, nil, "foo", "bar", "quantile", "0.75"), + createDataPoint("summary_test", 5, nil, "foo", "bar", "quantile", "1"), + createDataPoint("summary_test_count", 500, nil, "foo", "bar"), + }, + }, + }, + wants: func() []pmetric.Metrics { + md0 := pmetric.NewMetrics() + mL0 := md0.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m0 := mL0.AppendEmpty() + m0.SetName("summary_test") + m0.Metadata().PutStr("prometheus.type", "summary") + sum0 := m0.SetEmptySummary() + pt0 := sum0.DataPoints().AppendEmpty() + pt0.SetTimestamp(tsNanos) + pt0.SetStartTimestamp(startTimestamp) + pt0.SetCount(500) + pt0.SetSum(0.0) + pt0.Attributes().PutStr("foo", "bar") + qvL := pt0.QuantileValues() + q50 := qvL.AppendEmpty() + q50.SetQuantile(.50) + q50.SetValue(1.0) + q75 := qvL.AppendEmpty() + q75.SetQuantile(.75) + q75.SetValue(2.0) + q100 := qvL.AppendEmpty() + q100.SetQuantile(1) + q100.SetValue(5.0) + return []pmetric.Metrics{md0} + }, + }, + { + name: "empty-quantiles", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint("summary_test_sum", 100, nil, "foo", "bar"), + createDataPoint("summary_test_count", 500, nil, "foo", "bar"), + }, + }, + }, + wants: func() []pmetric.Metrics { + md0 := pmetric.NewMetrics() + mL0 := md0.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m0 := mL0.AppendEmpty() + m0.SetName("summary_test") + m0.Metadata().PutStr("prometheus.type", "summary") + sum0 := m0.SetEmptySummary() + pt0 := sum0.DataPoints().AppendEmpty() + pt0.SetStartTimestamp(startTimestamp) + pt0.SetTimestamp(tsNanos) + pt0.SetCount(500) + pt0.SetSum(100.0) + pt0.Attributes().PutStr("foo", "bar") + + return []pmetric.Metrics{md0} + }, + }, + { + name: "regular-summary", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createDataPoint("summary_test", 1, nil, "foo", "bar", "quantile", "0.5"), + createDataPoint("summary_test", 2, nil, "foo", "bar", "quantile", "0.75"), + createDataPoint("summary_test", 5, nil, "foo", "bar", "quantile", "1"), + createDataPoint("summary_test_sum", 100, nil, "foo", "bar"), + createDataPoint("summary_test_count", 500, nil, "foo", "bar"), + }, + }, + }, + wants: func() []pmetric.Metrics { + md0 := pmetric.NewMetrics() + mL0 := md0.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m0 := mL0.AppendEmpty() + m0.SetName("summary_test") + m0.Metadata().PutStr("prometheus.type", "summary") + sum0 := m0.SetEmptySummary() + pt0 := sum0.DataPoints().AppendEmpty() + pt0.SetStartTimestamp(startTimestamp) + pt0.SetTimestamp(tsNanos) + pt0.SetCount(500) + pt0.SetSum(100.0) + pt0.Attributes().PutStr("foo", "bar") + qvL := pt0.QuantileValues() + q50 := qvL.AppendEmpty() + q50.SetQuantile(.50) + q50.SetValue(1.0) + q75 := qvL.AppendEmpty() + q75.SetQuantile(.75) + q75.SetValue(2.0) + q100 := qvL.AppendEmpty() + q100.SetQuantile(1) + q100.SetValue(5.0) + + return []pmetric.Metrics{md0} + }, + }, + } + + for _, tt := range tests { + for _, enableNativeHistograms := range []bool{false, true} { + t.Run(fmt.Sprintf("%s/enableNativeHistograms=%v", tt.name, enableNativeHistograms), func(t *testing.T) { + tt.run(t, enableNativeHistograms) + }) + } + } +} + +func TestMetricBuilderNativeHistogram(t *testing.T) { + for _, enableNativeHistograms := range []bool{false, true} { + emptyH := &histogram.Histogram{ + Schema: 1, + Count: 0, + Sum: 0, + ZeroThreshold: 0.001, + ZeroCount: 0, + } + h0 := tsdbutil.GenerateTestHistogram(0) + + tests := []buildTestData{ + { + name: "empty integer histogram", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createHistogramDataPoint("hist_test", emptyH, nil, nil, "foo", "bar"), + }, + }, + }, + wants: func() []pmetric.Metrics { + md0 := pmetric.NewMetrics() + if !enableNativeHistograms { + return []pmetric.Metrics{md0} + } + mL0 := md0.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m0 := mL0.AppendEmpty() + m0.SetName("hist_test") + m0.Metadata().PutStr("prometheus.type", "histogram") + m0.SetEmptyExponentialHistogram() + m0.ExponentialHistogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + pt0 := m0.ExponentialHistogram().DataPoints().AppendEmpty() + pt0.Attributes().PutStr("foo", "bar") + pt0.SetStartTimestamp(startTimestamp) + pt0.SetTimestamp(tsNanos) + pt0.SetCount(0) + pt0.SetSum(0) + pt0.SetZeroThreshold(0.001) + pt0.SetScale(1) + + return []pmetric.Metrics{md0} + }, + }, + { + name: "integer histogram", + inputs: []*testScrapedPage{ + { + pts: []*testDataPoint{ + createHistogramDataPoint("hist_test", h0, nil, nil, "foo", "bar"), + }, + }, + }, + wants: func() []pmetric.Metrics { + md0 := pmetric.NewMetrics() + if !enableNativeHistograms { + return []pmetric.Metrics{md0} + } + mL0 := md0.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty().Metrics() + m0 := mL0.AppendEmpty() + m0.SetName("hist_test") + m0.Metadata().PutStr("prometheus.type", "histogram") + m0.SetEmptyExponentialHistogram() + m0.ExponentialHistogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + pt0 := m0.ExponentialHistogram().DataPoints().AppendEmpty() + pt0.Attributes().PutStr("foo", "bar") + pt0.SetStartTimestamp(startTimestamp) + pt0.SetTimestamp(tsNanos) + pt0.SetCount(12) + pt0.SetSum(18.4) + pt0.SetScale(1) + pt0.SetZeroThreshold(0.001) + pt0.SetZeroCount(2) + pt0.Positive().SetOffset(-1) + pt0.Positive().BucketCounts().Append(1) + pt0.Positive().BucketCounts().Append(2) + pt0.Positive().BucketCounts().Append(0) + pt0.Positive().BucketCounts().Append(1) + pt0.Positive().BucketCounts().Append(1) + pt0.Negative().SetOffset(-1) + pt0.Negative().BucketCounts().Append(1) + pt0.Negative().BucketCounts().Append(2) + pt0.Negative().BucketCounts().Append(0) + pt0.Negative().BucketCounts().Append(1) + pt0.Negative().BucketCounts().Append(1) + + return []pmetric.Metrics{md0} + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.run(t, enableNativeHistograms) + }) + } + } +} + +type buildTestData struct { + name string + inputs []*testScrapedPage + wants func() []pmetric.Metrics +} + +func (tt buildTestData) run(t *testing.T, enableNativeHistograms bool) { + wants := tt.wants() + assert.EqualValues(t, len(wants), len(tt.inputs)) + st := ts + for i, page := range tt.inputs { + sink := new(consumertest.MetricsSink) + tr := newTransaction(scrapeCtx, &startTimeAdjuster{startTime: startTimestamp}, sink, labels.EmptyLabels(), receivertest.NewNopSettings(), nopObsRecv(t), false, enableNativeHistograms) + for _, pt := range page.pts { + // set ts for testing + pt.t = st + var err error + switch { + case pt.fh != nil: + _, err = tr.AppendHistogram(0, pt.lb, pt.t, nil, pt.fh) + case pt.h != nil: + _, err = tr.AppendHistogram(0, pt.lb, pt.t, pt.h, nil) + default: + _, err = tr.Append(0, pt.lb, pt.t, pt.v) + } + assert.NoError(t, err) + + for _, e := range pt.exemplars { + _, err := tr.AppendExemplar(0, pt.lb, e) + assert.NoError(t, err) + } + } + assert.NoError(t, tr.Commit()) + mds := sink.AllMetrics() + if wants[i].ResourceMetrics().Len() == 0 { + // Receiver does not emit empty metrics, so will not have anything in the sink. + require.Empty(t, mds) + st += interval + continue + } + require.Len(t, mds, 1) + assertEquivalentMetrics(t, wants[i], mds[0]) + st += interval + } +} + +type errorAdjuster struct { + err error +} + +func (ea *errorAdjuster) AdjustMetrics(pmetric.Metrics) error { + return ea.err +} + +type startTimeAdjuster struct { + startTime pcommon.Timestamp +} + +func (s *startTimeAdjuster) AdjustMetrics(metrics pmetric.Metrics) error { + for i := 0; i < metrics.ResourceMetrics().Len(); i++ { + rm := metrics.ResourceMetrics().At(i) + for j := 0; j < rm.ScopeMetrics().Len(); j++ { + ilm := rm.ScopeMetrics().At(j) + for k := 0; k < ilm.Metrics().Len(); k++ { + metric := ilm.Metrics().At(k) + switch metric.Type() { + case pmetric.MetricTypeSum: + dps := metric.Sum().DataPoints() + for l := 0; l < dps.Len(); l++ { + dps.At(l).SetStartTimestamp(s.startTime) + } + case pmetric.MetricTypeSummary: + dps := metric.Summary().DataPoints() + for l := 0; l < dps.Len(); l++ { + dps.At(l).SetStartTimestamp(s.startTime) + } + case pmetric.MetricTypeHistogram: + dps := metric.Histogram().DataPoints() + for l := 0; l < dps.Len(); l++ { + dps.At(l).SetStartTimestamp(s.startTime) + } + case pmetric.MetricTypeExponentialHistogram: + dps := metric.ExponentialHistogram().DataPoints() + for l := 0; l < dps.Len(); l++ { + dps.At(l).SetStartTimestamp(s.startTime) + } + case pmetric.MetricTypeEmpty, pmetric.MetricTypeGauge: + } + } + } + } + return nil +} + +type testDataPoint struct { + lb labels.Labels + t int64 + v float64 + h *histogram.Histogram + fh *histogram.FloatHistogram + exemplars []exemplar.Exemplar +} + +type testScrapedPage struct { + pts []*testDataPoint +} + +func createDataPoint(mname string, value float64, es []exemplar.Exemplar, tagPairs ...string) *testDataPoint { + var lbls []string + lbls = append(lbls, tagPairs...) + lbls = append(lbls, model.MetricNameLabel, mname) + lbls = append(lbls, model.JobLabel, "job") + lbls = append(lbls, model.InstanceLabel, "instance") + + return &testDataPoint{ + lb: labels.FromStrings(lbls...), + t: ts, + v: value, + exemplars: es, + } +} + +func createHistogramDataPoint(mname string, h *histogram.Histogram, fh *histogram.FloatHistogram, es []exemplar.Exemplar, tagPairs ...string) *testDataPoint { + dataPoint := createDataPoint(mname, 0, es, tagPairs...) + dataPoint.h = h + dataPoint.fh = fh + return dataPoint +} + +func assertEquivalentMetrics(t *testing.T, want, got pmetric.Metrics) { + require.Equal(t, want.ResourceMetrics().Len(), got.ResourceMetrics().Len()) + if want.ResourceMetrics().Len() == 0 { + return + } + for i := 0; i < want.ResourceMetrics().Len(); i++ { + wantSm := want.ResourceMetrics().At(i).ScopeMetrics() + gotSm := got.ResourceMetrics().At(i).ScopeMetrics() + require.Equal(t, wantSm.Len(), gotSm.Len()) + if wantSm.Len() == 0 { + return + } + + for j := 0; j < wantSm.Len(); j++ { + wantMs := wantSm.At(j).Metrics() + gotMs := gotSm.At(j).Metrics() + require.Equal(t, wantMs.Len(), gotMs.Len()) + + wmap := map[string]pmetric.Metric{} + gmap := map[string]pmetric.Metric{} + + for k := 0; k < wantMs.Len(); k++ { + wi := wantMs.At(k) + wmap[wi.Name()] = wi + gi := gotMs.At(k) + gmap[gi.Name()] = gi + } + assert.EqualValues(t, wmap, gmap) + } + } +} diff --git a/pkg/promotel/prometheusreceiver/internal/util.go b/pkg/promotel/prometheusreceiver/internal/util.go new file mode 100644 index 000000000..8e552e074 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/internal/util.go @@ -0,0 +1,128 @@ +package internal + +import ( + "errors" + "sort" + "strconv" + "strings" + + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/model/labels" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" +) + +const ( + metricsSuffixCount = "_count" + metricsSuffixBucket = "_bucket" + metricsSuffixSum = "_sum" + metricSuffixTotal = "_total" + metricSuffixInfo = "_info" + metricSuffixCreated = "_created" + startTimeMetricName = "process_start_time_seconds" + scrapeUpMetricName = "up" + + transport = "http" + dataformat = "prometheus" +) + +var ( + trimmableSuffixes = []string{metricsSuffixBucket, metricsSuffixCount, metricsSuffixSum, metricSuffixTotal, metricSuffixInfo, metricSuffixCreated} + errNoDataToBuild = errors.New("there's no data to build") + errNoBoundaryLabel = errors.New("given metricType has no 'le' or 'quantile' label") + errEmptyQuantileLabel = errors.New("'quantile' label on summary metric is missing or empty") + errEmptyLeLabel = errors.New("'le' label on histogram metric is missing or empty") + errMetricNameNotFound = errors.New("metricName not found from labels") + errTransactionAborted = errors.New("transaction aborted") + errNoJobInstance = errors.New("job or instance cannot be found from labels") + + notUsefulLabelsHistogram = sortString([]string{model.MetricNameLabel, model.InstanceLabel, model.SchemeLabel, model.MetricsPathLabel, model.JobLabel, model.BucketLabel}) + notUsefulLabelsSummary = sortString([]string{model.MetricNameLabel, model.InstanceLabel, model.SchemeLabel, model.MetricsPathLabel, model.JobLabel, model.QuantileLabel}) + notUsefulLabelsOther = sortString([]string{model.MetricNameLabel, model.InstanceLabel, model.SchemeLabel, model.MetricsPathLabel, model.JobLabel}) +) + +func sortString(strs []string) []string { + sort.Strings(strs) + return strs +} + +func getSortedNotUsefulLabels(mType pmetric.MetricType) []string { + switch mType { + case pmetric.MetricTypeHistogram: + return notUsefulLabelsHistogram + case pmetric.MetricTypeSummary: + return notUsefulLabelsSummary + case pmetric.MetricTypeEmpty, pmetric.MetricTypeGauge, pmetric.MetricTypeSum, pmetric.MetricTypeExponentialHistogram: + fallthrough + default: + return notUsefulLabelsOther + } +} + +func timestampFromFloat64(ts float64) pcommon.Timestamp { + secs := int64(ts) + nanos := int64((ts - float64(secs)) * 1e9) + return pcommon.Timestamp(secs*1e9 + nanos) +} + +func timestampFromMs(timeAtMs int64) pcommon.Timestamp { + return pcommon.Timestamp(timeAtMs * 1e6) +} + +func getBoundary(metricType pmetric.MetricType, labels labels.Labels) (float64, error) { + var val string + switch metricType { + case pmetric.MetricTypeHistogram: + val = labels.Get(model.BucketLabel) + if val == "" { + return 0, errEmptyLeLabel + } + case pmetric.MetricTypeSummary: + val = labels.Get(model.QuantileLabel) + if val == "" { + return 0, errEmptyQuantileLabel + } + case pmetric.MetricTypeEmpty, pmetric.MetricTypeGauge, pmetric.MetricTypeSum, pmetric.MetricTypeExponentialHistogram: + fallthrough + default: + return 0, errNoBoundaryLabel + } + + return strconv.ParseFloat(val, 64) +} + +// convToMetricType returns the data type and if it is monotonic +func convToMetricType(metricType model.MetricType) (pmetric.MetricType, bool) { + switch metricType { + case model.MetricTypeCounter: + // always use float64, as it's the internal data type used in prometheus + return pmetric.MetricTypeSum, true + // model.MetricTypeUnknown is converted to gauge by default to prevent Prometheus untyped metrics from being dropped + case model.MetricTypeGauge, model.MetricTypeUnknown: + return pmetric.MetricTypeGauge, false + case model.MetricTypeHistogram: + return pmetric.MetricTypeHistogram, true + // dropping support for gaugehistogram for now until we have an official spec of its implementation + // a draft can be found in: https://docs.google.com/document/d/1KwV0mAXwwbvvifBvDKH_LU1YjyXE_wxCkHNoCGq1GX0/edit#heading=h.1cvzqd4ksd23 + // case model.MetricTypeGaugeHistogram: + // return + case model.MetricTypeSummary: + return pmetric.MetricTypeSummary, true + case model.MetricTypeInfo, model.MetricTypeStateset: + return pmetric.MetricTypeSum, false + case model.MetricTypeGaugeHistogram: + fallthrough + default: + // including: model.MetricTypeGaugeHistogram + return pmetric.MetricTypeEmpty, false + } +} + +func normalizeMetricName(name string) string { + for _, s := range trimmableSuffixes { + if strings.HasSuffix(name, s) && name != s { + return strings.TrimSuffix(name, s) + } + } + return name +} diff --git a/pkg/promotel/prometheusreceiver/internal/util_test.go b/pkg/promotel/prometheusreceiver/internal/util_test.go new file mode 100644 index 000000000..5238ce255 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/internal/util_test.go @@ -0,0 +1,182 @@ +package internal + +import ( + "testing" + "time" + + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/model/labels" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" + + "github.com/smartcontractkit/chainlink-common/pkg/promotel/prometheusreceiver/scrape" +) + +var testMetadata = map[string]scrape.MetricMetadata{ + "counter_test": {Metric: "counter_test", Type: model.MetricTypeCounter, Help: "", Unit: ""}, + "counter_test2": {Metric: "counter_test2", Type: model.MetricTypeCounter, Help: "", Unit: ""}, + "gauge_test": {Metric: "gauge_test", Type: model.MetricTypeGauge, Help: "", Unit: ""}, + "gauge_test2": {Metric: "gauge_test2", Type: model.MetricTypeGauge, Help: "", Unit: ""}, + "hist_test": {Metric: "hist_test", Type: model.MetricTypeHistogram, Help: "", Unit: ""}, + "hist_test2": {Metric: "hist_test2", Type: model.MetricTypeHistogram, Help: "", Unit: ""}, + "ghist_test": {Metric: "ghist_test", Type: model.MetricTypeGaugeHistogram, Help: "", Unit: ""}, + "summary_test": {Metric: "summary_test", Type: model.MetricTypeSummary, Help: "", Unit: ""}, + "summary_test2": {Metric: "summary_test2", Type: model.MetricTypeSummary, Help: "", Unit: ""}, + "unknown_test": {Metric: "unknown_test", Type: model.MetricTypeUnknown, Help: "", Unit: ""}, + "poor_name": {Metric: "poor_name", Type: model.MetricTypeGauge, Help: "", Unit: ""}, + "poor_name_count": {Metric: "poor_name_count", Type: model.MetricTypeCounter, Help: "", Unit: ""}, + "scrape_foo": {Metric: "scrape_foo", Type: model.MetricTypeCounter, Help: "", Unit: ""}, + "example_process_start_time_seconds": { + Metric: "example_process_start_time_seconds", + Type: model.MetricTypeGauge, Help: "", Unit: "", + }, + "process_start_time_seconds": { + Metric: "process_start_time_seconds", + Type: model.MetricTypeGauge, Help: "", Unit: "", + }, + "subprocess_start_time_seconds": { + Metric: "subprocess_start_time_seconds", + Type: model.MetricTypeGauge, Help: "", Unit: "", + }, +} + +func TestTimestampFromMs(t *testing.T) { + assert.Equal(t, pcommon.Timestamp(0), timestampFromMs(0)) + assert.Equal(t, pcommon.NewTimestampFromTime(time.UnixMilli(1662679535432)), timestampFromMs(1662679535432)) +} + +func TestTimestampFromFloat64(t *testing.T) { + assert.Equal(t, pcommon.Timestamp(0), timestampFromFloat64(0)) + // Because of float64 conversion, we check only that we are within 100ns error. + assert.InEpsilon(t, uint64(1662679535040000000), uint64(timestampFromFloat64(1662679535.040)), 100) +} + +func TestConvToMetricType(t *testing.T) { + tests := []struct { + name string + mtype model.MetricType + want pmetric.MetricType + wantMonotonic bool + }{ + { + name: "model.counter", + mtype: model.MetricTypeCounter, + want: pmetric.MetricTypeSum, + wantMonotonic: true, + }, + { + name: "model.gauge", + mtype: model.MetricTypeGauge, + want: pmetric.MetricTypeGauge, + wantMonotonic: false, + }, + { + name: "model.unknown", + mtype: model.MetricTypeUnknown, + want: pmetric.MetricTypeGauge, + wantMonotonic: false, + }, + { + name: "model.histogram", + mtype: model.MetricTypeHistogram, + want: pmetric.MetricTypeHistogram, + wantMonotonic: true, + }, + { + name: "model.summary", + mtype: model.MetricTypeSummary, + want: pmetric.MetricTypeSummary, + wantMonotonic: true, + }, + { + name: "model.metric_type_info", + mtype: model.MetricTypeInfo, + want: pmetric.MetricTypeSum, + wantMonotonic: false, + }, + { + name: "model.metric_state_set", + mtype: model.MetricTypeStateset, + want: pmetric.MetricTypeSum, + wantMonotonic: false, + }, + { + name: "model.metric_gauge_hostogram", + mtype: model.MetricTypeGaugeHistogram, + want: pmetric.MetricTypeEmpty, + wantMonotonic: false, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + got, monotonic := convToMetricType(tt.mtype) + require.Equal(t, got.String(), tt.want.String()) + require.Equal(t, tt.wantMonotonic, monotonic) + }) + } +} + +func TestGetBoundary(t *testing.T) { + tests := []struct { + name string + mtype pmetric.MetricType + labels labels.Labels + wantValue float64 + wantErr error + }{ + { + name: "cumulative histogram with bucket label", + mtype: pmetric.MetricTypeHistogram, + labels: labels.FromStrings(model.BucketLabel, "0.256"), + wantValue: 0.256, + }, + { + name: "gauge histogram with bucket label", + mtype: pmetric.MetricTypeHistogram, + labels: labels.FromStrings(model.BucketLabel, "11.71"), + wantValue: 11.71, + }, + { + name: "summary with bucket label", + mtype: pmetric.MetricTypeSummary, + labels: labels.FromStrings(model.BucketLabel, "11.71"), + wantErr: errEmptyQuantileLabel, + }, + { + name: "summary with quantile label", + mtype: pmetric.MetricTypeSummary, + labels: labels.FromStrings(model.QuantileLabel, "92.88"), + wantValue: 92.88, + }, + { + name: "gauge histogram mismatched with bucket label", + mtype: pmetric.MetricTypeSummary, + labels: labels.FromStrings(model.BucketLabel, "11.71"), + wantErr: errEmptyQuantileLabel, + }, + { + name: "other data types without matches", + mtype: pmetric.MetricTypeGauge, + labels: labels.FromStrings(model.BucketLabel, "11.71"), + wantErr: errNoBoundaryLabel, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + value, err := getBoundary(tt.mtype, tt.labels) + if tt.wantErr != nil { + assert.ErrorIs(t, err, tt.wantErr) + return + } + + assert.NoError(t, err) + assert.Equal(t, tt.wantValue, value) + }) + } +} diff --git a/pkg/promotel/prometheusreceiver/metrics_receiver.go b/pkg/promotel/prometheusreceiver/metrics_receiver.go new file mode 100644 index 000000000..9ec0521bc --- /dev/null +++ b/pkg/promotel/prometheusreceiver/metrics_receiver.go @@ -0,0 +1,243 @@ +package prometheusreceiver + +import ( + "context" + "reflect" + "regexp" + "sync" + "time" + "unsafe" + + "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" + + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/consumer" + "go.opentelemetry.io/collector/receiver" + "go.uber.org/zap" + + "github.com/smartcontractkit/chainlink-common/pkg/promotel/prometheusreceiver/internal" + "github.com/smartcontractkit/chainlink-common/pkg/promotel/prometheusreceiver/scrape" +) + +const ( + defaultGCInterval = 2 * time.Minute + gcIntervalDelta = 1 * time.Minute +) + +// pReceiver is the type that provides Prometheus scraper/receiver functionality. +type pReceiver struct { + cfg *Config + consumer consumer.Metrics + cancelFunc context.CancelFunc + configLoaded chan struct{} + loadConfigOnce sync.Once + + settings receiver.Settings + scrapeManager *scrape.Manager + //discoveryManager *discovery.Manager + //targetAllocatorManager *targetallocator.Manager + registerer prometheus.Registerer + gatherer prometheus.Gatherer + unregisterMetrics func() + skipOffsetting bool // for testing only +} + +func NewPrometheusReceiver(set receiver.Settings, cfg *Config, next consumer.Metrics) *pReceiver { + return newPrometheusReceiver(set, cfg, next) +} + +// New creates a new prometheus.Receiver reference. +func newPrometheusReceiver(set receiver.Settings, cfg *Config, next consumer.Metrics) *pReceiver { + var ( + registerer prometheus.Registerer + gatherer prometheus.Gatherer + ) + if cfg.Registry != nil { + registerer = cfg.Registry + gatherer = cfg.Registry + } else { + registerer = prometheus.DefaultRegisterer + gatherer = prometheus.DefaultGatherer + } + + //baseCfg := promconfig.Config(*cfg.PrometheusConfig) + pr := &pReceiver{ + cfg: cfg, + consumer: next, + settings: set, + configLoaded: make(chan struct{}), + registerer: prometheus.WrapRegistererWith( + prometheus.Labels{"receiver": set.ID.String()}, + registerer), + // Added + gatherer: gatherer, + // targetAllocatorManager: targetallocator.NewManager( + // set, + // cfg.TargetAllocator, + // &baseCfg, + // enableNativeHistogramsGate.IsEnabled(), + // ), + } + return pr +} + +// Start is the method that starts Prometheus scraping. It +// is controlled by having previously defined a Configuration using perhaps New. +func (r *pReceiver) Start(ctx context.Context, host component.Host) error { + discoveryCtx, cancel := context.WithCancel(context.Background()) + r.cancelFunc = cancel + + logger := internal.NewZapToGokitLogAdapter(r.settings.Logger) + + err := r.initPrometheusComponents(discoveryCtx, logger, host) + if err != nil { + r.settings.Logger.Error("Failed to initPrometheusComponents Prometheus components", zap.Error(err)) + return err + } + + //err = r.targetAllocatorManager.Start(ctx, host, r.scrapeManager, r.discoveryManager) + // if err != nil { + // return err + // } + + r.loadConfigOnce.Do(func() { + close(r.configLoaded) + }) + + return nil +} + +func (r *pReceiver) initPrometheusComponents(ctx context.Context, logger log.Logger, host component.Host) error { + // Some SD mechanisms use the "refresh" package, which has its own metrics. + // refreshSdMetrics := discovery.NewRefreshMetrics(r.registerer) + + // Register the metrics specific for each SD mechanism, and the ones for the refresh package. + // sdMetrics, err := discovery.RegisterSDMetrics(r.registerer, refreshSdMetrics) + // if err != nil { + // return fmt.Errorf("failed to register service discovery metrics: %w", err) + // } + // r.discoveryManager = discovery.NewManager(ctx, logger, r.registerer, sdMetrics) + // if r.discoveryManager == nil { + // // NewManager can sometimes return nil if it encountered an error, but + // // the error message is logged separately. + // return errors.New("failed to create discovery manager") + // } + + // go func() { + // r.settings.Logger.Info("Starting discovery manager") + // if err = r.discoveryManager.Run(); err != nil && !errors.Is(err, context.Canceled) { + // r.settings.Logger.Error("Discovery manager failed", zap.Error(err)) + // componentstatus.ReportStatus(host, componentstatus.NewFatalErrorEvent(err)) + // } + // }() + + var startTimeMetricRegex *regexp.Regexp + var err error + if r.cfg.StartTimeMetricRegex != "" { + startTimeMetricRegex, err = regexp.Compile(r.cfg.StartTimeMetricRegex) + if err != nil { + return err + } + } + + store, err := internal.NewAppendable( + r.consumer, + r.settings, + gcInterval(r.cfg.PrometheusConfig), + r.cfg.UseStartTimeMetric, + startTimeMetricRegex, + false, + false, + r.cfg.PrometheusConfig.GlobalConfig.ExternalLabels, + r.cfg.TrimMetricSuffixes, + ) + if err != nil { + return err + } + + opts := &scrape.Options{ + ExtraMetrics: r.cfg.ReportExtraScrapeMetrics, + // HTTPClientOptions: []commonconfig.HTTPClientOption{ + // commonconfig.WithUserAgent(r.settings.BuildInfo.Command + "/" + r.settings.BuildInfo.Version), + // }, + } + + // if enableNativeHistogramsGate.IsEnabled() { + // opts.EnableNativeHistogramsIngestion = true + // } + + // for testing only + if r.skipOffsetting { + optsValue := reflect.ValueOf(opts).Elem() + field := optsValue.FieldByName("skipOffsetting") + reflect.NewAt(field.Type(), unsafe.Pointer(field.UnsafeAddr())). + Elem(). + Set(reflect.ValueOf(true)) + } + + // scrapeManager, err := scrape.NewManager(opts, logger, store, r.registerer) + // if err != nil { + // return err + // } + // r.scrapeManager = scrapeManager + + // r.unregisterMetrics = func() { + // refreshSdMetrics.Unregister() + // for _, sdMetric := range sdMetrics { + // sdMetric.Unregister() + // } + // r.discoveryManager.UnregisterMetrics() + // r.scrapeManager.UnregisterMetrics() + // } + + loop, err := scrape.NewGathererLoop(ctx, nil, store, r.registerer, r.gatherer, 10*time.Millisecond) + if err != nil { + return err + } + + go func() { + // The scrape manager needs to wait for the configuration to be loaded before beginning + <-r.configLoaded + r.settings.Logger.Info("Starting gatherer loop") + // if err := r.scrapeManager.Run(r.discoveryManager.SyncCh()); err != nil { + // r.settings.Logger.Error("Scrape manager failed", zap.Error(err)) + // componentstatus.ReportStatus(host, componentstatus.NewFatalErrorEvent(err)) + // } + loop.Run(nil) + }() + return nil +} + +// gcInterval returns the longest scrape interval used by a scrape config, +// plus a delta to prevent race conditions. +// This ensures jobs are not garbage collected between scrapes. +func gcInterval(cfg *PromConfig) time.Duration { + gcInterval := defaultGCInterval + if time.Duration(cfg.GlobalConfig.ScrapeInterval)+gcIntervalDelta > gcInterval { + gcInterval = time.Duration(cfg.GlobalConfig.ScrapeInterval) + gcIntervalDelta + } + for _, scrapeConfig := range cfg.ScrapeConfigs { + if time.Duration(scrapeConfig.ScrapeInterval)+gcIntervalDelta > gcInterval { + gcInterval = time.Duration(scrapeConfig.ScrapeInterval) + gcIntervalDelta + } + } + return gcInterval +} + +// Shutdown stops and cancels the underlying Prometheus scrapers. +func (r *pReceiver) Shutdown(context.Context) error { + if r.cancelFunc != nil { + r.cancelFunc() + } + if r.scrapeManager != nil { + r.scrapeManager.Stop() + } + // if r.targetAllocatorManager != nil { + // r.targetAllocatorManager.Shutdown() + // } + if r.unregisterMetrics != nil { + r.unregisterMetrics() + } + return nil +} diff --git a/pkg/promotel/prometheusreceiver/metrics_receiver_test.go b/pkg/promotel/prometheusreceiver/metrics_receiver_test.go new file mode 100644 index 000000000..0d7897bf6 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/metrics_receiver_test.go @@ -0,0 +1,71 @@ +package prometheusreceiver_test + +import ( + "context" + "fmt" + "testing" + "time" + + promcfg "github.com/prometheus/prometheus/config" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component/componenttest" + "go.opentelemetry.io/collector/consumer/consumertest" + "go.opentelemetry.io/collector/pdata/pmetric" + "go.opentelemetry.io/collector/receiver/receivertest" + + promreceiver "github.com/smartcontractkit/chainlink-common/pkg/promotel/prometheusreceiver" +) + +func TestReceiverEndToEnd(t *testing.T) { + //cfg, err := setupTestConfig("127.0.0.1:8888", "/metrics") + //assert.NoError(t, err) + ctx := context.Background() + config := &promreceiver.Config{ + PrometheusConfig: (*promreceiver.PromConfig)(&promcfg.Config{}), + StartTimeMetricRegex: "", + } + + cms := new(consumertest.MetricsSink) + receiver := promreceiver.NewPrometheusReceiver(receivertest.NewNopSettings(), config, cms) + + require.NoError(t, receiver.Start(ctx, componenttest.NewNopHost())) + // verify state after shutdown is called + t.Cleanup(func() { + // verify state after shutdown is called + require.NoError(t, receiver.Shutdown(context.Background())) + // assert.Empty(t, flattenTargets(receiver.scrapeManager.TargetsAll()), "expected scrape manager to have no targets") + }) + // Wait for some scrape results to be collected + assert.Eventually(t, func() bool { + // This is the receiver's pov as to what should have been collected from the server + metrics := cms.AllMetrics() + if len(metrics) > 0 { + // If we don't have enough scrapes yet lets return false and wait for another tick + return true + } + return false + }, 30*time.Second, 500*time.Millisecond) + + // This begins the processing of the scrapes collected by the receiver + metrics := cms.AllMetrics() + // split and store results by target name + pResults := splitMetricsByTarget(metrics) + for name, scrapes := range pResults { + // validate scrapes here + fmt.Printf("name %s, \nscrapes %+v", name, scrapes) + assert.NotEmpty(t, scrapes) + } +} + +func splitMetricsByTarget(metrics []pmetric.Metrics) map[string][]pmetric.ResourceMetrics { + pResults := make(map[string][]pmetric.ResourceMetrics) + for _, md := range metrics { + rms := md.ResourceMetrics() + for i := 0; i < rms.Len(); i++ { + name, _ := rms.At(i).Resource().Attributes().Get("service.name") + pResults[name.AsString()] = append(pResults[name.AsString()], rms.At(i)) + } + } + return pResults +} diff --git a/pkg/promotel/prometheusreceiver/scrape/clientprotobuf.go b/pkg/promotel/prometheusreceiver/scrape/clientprotobuf.go new file mode 100644 index 000000000..88c547e60 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/scrape/clientprotobuf.go @@ -0,0 +1,41 @@ +package scrape + +import ( + "bytes" + "encoding/binary" + + "github.com/gogo/protobuf/proto" + + // Intentionally using client model to simulate client in tests. + dto "github.com/prometheus/client_model/go" +) + +// Write a MetricFamily into a protobuf. +// This function is intended for testing scraping by providing protobuf serialized input. +func MetricFamilyToProtobuf(metricFamily *dto.MetricFamily) ([]byte, error) { + buffer := &bytes.Buffer{} + err := AddMetricFamilyToProtobuf(buffer, metricFamily) + if err != nil { + return nil, err + } + return buffer.Bytes(), nil +} + +// Append a MetricFamily protobuf representation to a buffer. +// This function is intended for testing scraping by providing protobuf serialized input. +func AddMetricFamilyToProtobuf(buffer *bytes.Buffer, metricFamily *dto.MetricFamily) error { + protoBuf, err := proto.Marshal(metricFamily) + if err != nil { + return err + } + + varintBuf := make([]byte, binary.MaxVarintLen32) + varintLength := binary.PutUvarint(varintBuf, uint64(len(protoBuf))) + + _, err = buffer.Write(varintBuf[:varintLength]) + if err != nil { + return err + } + _, err = buffer.Write(protoBuf) + return err +} diff --git a/pkg/promotel/prometheusreceiver/scrape/helpers_test.go b/pkg/promotel/prometheusreceiver/scrape/helpers_test.go new file mode 100644 index 000000000..22628cbe5 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/scrape/helpers_test.go @@ -0,0 +1,223 @@ +package scrape + +import ( + "bytes" + "context" + "encoding/binary" + "fmt" + "math" + "math/rand" + "strings" + "sync" + "testing" + + "github.com/gogo/protobuf/proto" + dto "github.com/prometheus/client_model/go" + "github.com/stretchr/testify/require" + + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/metadata" + "github.com/prometheus/prometheus/storage" +) + +type nopAppendable struct{} + +func (a nopAppendable) Appender(_ context.Context) storage.Appender { + return nopAppender{} +} + +type nopAppender struct{} + +func (a nopAppender) Append(storage.SeriesRef, labels.Labels, int64, float64) (storage.SeriesRef, error) { + return 0, nil +} + +func (a nopAppender) AppendExemplar(storage.SeriesRef, labels.Labels, exemplar.Exemplar) (storage.SeriesRef, error) { + return 0, nil +} + +func (a nopAppender) AppendHistogram(storage.SeriesRef, labels.Labels, int64, *histogram.Histogram, *histogram.FloatHistogram) (storage.SeriesRef, error) { + return 0, nil +} + +func (a nopAppender) UpdateMetadata(storage.SeriesRef, labels.Labels, metadata.Metadata) (storage.SeriesRef, error) { + return 0, nil +} + +func (a nopAppender) AppendCTZeroSample(storage.SeriesRef, labels.Labels, int64, int64) (storage.SeriesRef, error) { + return 0, nil +} + +func (a nopAppender) Commit() error { return nil } +func (a nopAppender) Rollback() error { return nil } + +type floatSample struct { + metric labels.Labels + t int64 + f float64 +} + +func equalFloatSamples(a, b floatSample) bool { + // Compare Float64bits so NaN values which are exactly the same will compare equal. + return labels.Equal(a.metric, b.metric) && a.t == b.t && math.Float64bits(a.f) == math.Float64bits(b.f) +} + +type histogramSample struct { + t int64 + h *histogram.Histogram + fh *histogram.FloatHistogram +} + +type collectResultAppendable struct { + *collectResultAppender +} + +func (a *collectResultAppendable) Appender(_ context.Context) storage.Appender { + return a +} + +// collectResultAppender records all samples that were added through the appender. +// It can be used as its zero value or be backed by another appender it writes samples through. +type collectResultAppender struct { + mtx sync.Mutex + + next storage.Appender + resultFloats []floatSample + pendingFloats []floatSample + rolledbackFloats []floatSample + resultHistograms []histogramSample + pendingHistograms []histogramSample + rolledbackHistograms []histogramSample + resultExemplars []exemplar.Exemplar + pendingExemplars []exemplar.Exemplar + resultMetadata []metadata.Metadata + pendingMetadata []metadata.Metadata +} + +func (a *collectResultAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64, v float64) (storage.SeriesRef, error) { + a.mtx.Lock() + defer a.mtx.Unlock() + a.pendingFloats = append(a.pendingFloats, floatSample{ + metric: lset, + t: t, + f: v, + }) + + if ref == 0 { + ref = storage.SeriesRef(rand.Uint64()) + } + if a.next == nil { + return ref, nil + } + + ref, err := a.next.Append(ref, lset, t, v) + if err != nil { + return 0, err + } + return ref, err +} + +func (a *collectResultAppender) AppendExemplar(ref storage.SeriesRef, l labels.Labels, e exemplar.Exemplar) (storage.SeriesRef, error) { + a.mtx.Lock() + defer a.mtx.Unlock() + a.pendingExemplars = append(a.pendingExemplars, e) + if a.next == nil { + return 0, nil + } + + return a.next.AppendExemplar(ref, l, e) +} + +func (a *collectResultAppender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + a.mtx.Lock() + defer a.mtx.Unlock() + a.pendingHistograms = append(a.pendingHistograms, histogramSample{h: h, fh: fh, t: t}) + if a.next == nil { + return 0, nil + } + + return a.next.AppendHistogram(ref, l, t, h, fh) +} + +func (a *collectResultAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels, m metadata.Metadata) (storage.SeriesRef, error) { + a.mtx.Lock() + defer a.mtx.Unlock() + a.pendingMetadata = append(a.pendingMetadata, m) + if ref == 0 { + ref = storage.SeriesRef(rand.Uint64()) + } + if a.next == nil { + return ref, nil + } + + return a.next.UpdateMetadata(ref, l, m) +} + +func (a *collectResultAppender) AppendCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64) (storage.SeriesRef, error) { + return a.Append(ref, l, ct, 0.0) +} + +func (a *collectResultAppender) Commit() error { + a.mtx.Lock() + defer a.mtx.Unlock() + a.resultFloats = append(a.resultFloats, a.pendingFloats...) + a.resultExemplars = append(a.resultExemplars, a.pendingExemplars...) + a.resultHistograms = append(a.resultHistograms, a.pendingHistograms...) + a.resultMetadata = append(a.resultMetadata, a.pendingMetadata...) + a.pendingFloats = nil + a.pendingExemplars = nil + a.pendingHistograms = nil + a.pendingMetadata = nil + if a.next == nil { + return nil + } + return a.next.Commit() +} + +func (a *collectResultAppender) Rollback() error { + a.mtx.Lock() + defer a.mtx.Unlock() + a.rolledbackFloats = a.pendingFloats + a.rolledbackHistograms = a.pendingHistograms + a.pendingFloats = nil + a.pendingHistograms = nil + if a.next == nil { + return nil + } + return a.next.Rollback() +} + +func (a *collectResultAppender) String() string { + var sb strings.Builder + for _, s := range a.resultFloats { + sb.WriteString(fmt.Sprintf("committed: %s %f %d\n", s.metric, s.f, s.t)) + } + for _, s := range a.pendingFloats { + sb.WriteString(fmt.Sprintf("pending: %s %f %d\n", s.metric, s.f, s.t)) + } + for _, s := range a.rolledbackFloats { + sb.WriteString(fmt.Sprintf("rolledback: %s %f %d\n", s.metric, s.f, s.t)) + } + return sb.String() +} + +// protoMarshalDelimited marshals a MetricFamily into a delimited +// Prometheus proto exposition format bytes (known as 'encoding=delimited`) +// +// See also https://eli.thegreenplace.net/2011/08/02/length-prefix-framing-for-protocol-buffers +func protoMarshalDelimited(t *testing.T, mf *dto.MetricFamily) []byte { + t.Helper() + + protoBuf, err := proto.Marshal(mf) + require.NoError(t, err) + + varintBuf := make([]byte, binary.MaxVarintLen32) + varintLength := binary.PutUvarint(varintBuf, uint64(len(protoBuf))) + + buf := &bytes.Buffer{} + buf.Write(varintBuf[:varintLength]) + buf.Write(protoBuf) + return buf.Bytes() +} diff --git a/pkg/promotel/prometheusreceiver/scrape/manager.go b/pkg/promotel/prometheusreceiver/scrape/manager.go new file mode 100644 index 000000000..cef463ddc --- /dev/null +++ b/pkg/promotel/prometheusreceiver/scrape/manager.go @@ -0,0 +1,314 @@ +package scrape + +import ( + "errors" + "fmt" + "hash/fnv" + "reflect" + "sync" + "time" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/prometheus/client_golang/prometheus" + config_util "github.com/prometheus/common/config" + "github.com/prometheus/common/model" + + "github.com/prometheus/prometheus/config" + "github.com/prometheus/prometheus/discovery/targetgroup" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/util/osutil" + "github.com/prometheus/prometheus/util/pool" +) + +// NewManager is the Manager constructor. +func NewManager(o *Options, logger log.Logger, app storage.Appendable, registerer prometheus.Registerer) (*Manager, error) { + if o == nil { + o = &Options{} + } + if logger == nil { + logger = log.NewNopLogger() + } + + sm, err := newScrapeMetrics(registerer) + if err != nil { + return nil, fmt.Errorf("failed to create scrape manager due to error: %w", err) + } + + m := &Manager{ + append: app, + opts: o, + logger: logger, + scrapeConfigs: make(map[string]*config.ScrapeConfig), + scrapePools: make(map[string]*scrapePool), + graceShut: make(chan struct{}), + triggerReload: make(chan struct{}, 1), + metrics: sm, + buffers: pool.New(1e3, 100e6, 3, func(sz int) interface{} { return make([]byte, 0, sz) }), + } + + m.metrics.setTargetMetadataCacheGatherer(m) + + return m, nil +} + +// Options are the configuration parameters to the scrape manager. +type Options struct { + ExtraMetrics bool + NoDefaultPort bool + // Option used by downstream scraper users like OpenTelemetry Collector + // to help lookup metric metadata. Should be false for Prometheus. + PassMetadataInContext bool + // Option to enable appending of scraped Metadata to the TSDB/other appenders. Individual appenders + // can decide what to do with metadata, but for practical purposes this flag exists so that metadata + // can be written to the WAL and thus read for remote write. + // TODO: implement some form of metadata storage + AppendMetadata bool + // Option to increase the interval used by scrape manager to throttle target groups updates. + DiscoveryReloadInterval model.Duration + // Option to enable the ingestion of the created timestamp as a synthetic zero sample. + // See: https://github.com/prometheus/proposals/blob/main/proposals/2023-06-13_created-timestamp.md + EnableCreatedTimestampZeroIngestion bool + // Option to enable the ingestion of native histograms. + EnableNativeHistogramsIngestion bool + + // Optional HTTP client options to use when scraping. + HTTPClientOptions []config_util.HTTPClientOption + + // private option for testability. + skipOffsetting bool +} + +// Manager maintains a set of scrape pools and manages start/stop cycles +// when receiving new target groups from the discovery manager. +type Manager struct { + opts *Options + logger log.Logger + append storage.Appendable + graceShut chan struct{} + + offsetSeed uint64 // Global offsetSeed seed is used to spread scrape workload across HA setup. + mtxScrape sync.Mutex // Guards the fields below. + scrapeConfigs map[string]*config.ScrapeConfig + scrapePools map[string]*scrapePool + targetSets map[string][]*targetgroup.Group + buffers *pool.Pool + + triggerReload chan struct{} + + metrics *scrapeMetrics +} + +// Run receives and saves target set updates and triggers the scraping loops reloading. +// Reloading happens in the background so that it doesn't block receiving targets updates. +func (m *Manager) Run(tsets <-chan map[string][]*targetgroup.Group) error { + go m.reloader() + for { + select { + case ts := <-tsets: + m.updateTsets(ts) + + select { + case m.triggerReload <- struct{}{}: + default: + } + + case <-m.graceShut: + return nil + } + } +} + +// UnregisterMetrics unregisters manager metrics. +func (m *Manager) UnregisterMetrics() { + m.metrics.Unregister() +} + +func (m *Manager) reloader() { + reloadIntervalDuration := m.opts.DiscoveryReloadInterval + if reloadIntervalDuration < model.Duration(5*time.Second) { + reloadIntervalDuration = model.Duration(5 * time.Second) + } + + ticker := time.NewTicker(time.Duration(reloadIntervalDuration)) + + defer ticker.Stop() + + for { + select { + case <-m.graceShut: + return + case <-ticker.C: + select { + case <-m.triggerReload: + m.reload() + case <-m.graceShut: + return + } + } + } +} + +func (m *Manager) reload() { + m.mtxScrape.Lock() + var wg sync.WaitGroup + for setName, groups := range m.targetSets { + if _, ok := m.scrapePools[setName]; !ok { + scrapeConfig, ok := m.scrapeConfigs[setName] + if !ok { + level.Error(m.logger).Log("msg", "error reloading target set", "err", "invalid config id:"+setName) + continue + } + m.metrics.targetScrapePools.Inc() + sp, err := newScrapePool(scrapeConfig, m.append, m.offsetSeed, log.With(m.logger, "scrape_pool", setName), m.buffers, m.opts, m.metrics) + if err != nil { + m.metrics.targetScrapePoolsFailed.Inc() + level.Error(m.logger).Log("msg", "error creating new scrape pool", "err", err, "scrape_pool", setName) + continue + } + m.scrapePools[setName] = sp + } + + wg.Add(1) + // Run the sync in parallel as these take a while and at high load can't catch up. + go func(sp *scrapePool, groups []*targetgroup.Group) { + sp.Sync(groups) + wg.Done() + }(m.scrapePools[setName], groups) + } + m.mtxScrape.Unlock() + wg.Wait() +} + +// setOffsetSeed calculates a global offsetSeed per server relying on extra label set. +func (m *Manager) setOffsetSeed(labels labels.Labels) error { + h := fnv.New64a() + hostname, err := osutil.GetFQDN() + if err != nil { + return err + } + if _, err := fmt.Fprintf(h, "%s%s", hostname, labels.String()); err != nil { + return err + } + m.offsetSeed = h.Sum64() + return nil +} + +// Stop cancels all running scrape pools and blocks until all have exited. +func (m *Manager) Stop() { + m.mtxScrape.Lock() + defer m.mtxScrape.Unlock() + + for _, sp := range m.scrapePools { + sp.stop() + } + close(m.graceShut) +} + +func (m *Manager) updateTsets(tsets map[string][]*targetgroup.Group) { + m.mtxScrape.Lock() + m.targetSets = tsets + m.mtxScrape.Unlock() +} + +// ApplyConfig resets the manager's target providers and job configurations as defined by the new cfg. +func (m *Manager) ApplyConfig(cfg *config.Config) error { + m.mtxScrape.Lock() + defer m.mtxScrape.Unlock() + + scfgs, err := cfg.GetScrapeConfigs() + if err != nil { + return err + } + + c := make(map[string]*config.ScrapeConfig) + for _, scfg := range scfgs { + c[scfg.JobName] = scfg + } + m.scrapeConfigs = c + + if err := m.setOffsetSeed(cfg.GlobalConfig.ExternalLabels); err != nil { + return err + } + + // Cleanup and reload pool if the configuration has changed. + var failed bool + for name, sp := range m.scrapePools { + switch cfg, ok := m.scrapeConfigs[name]; { + case !ok: + sp.stop() + delete(m.scrapePools, name) + case !reflect.DeepEqual(sp.config, cfg): + err := sp.reload(cfg) + if err != nil { + level.Error(m.logger).Log("msg", "error reloading scrape pool", "err", err, "scrape_pool", name) + failed = true + } + } + } + + if failed { + return errors.New("failed to apply the new configuration") + } + return nil +} + +// TargetsAll returns active and dropped targets grouped by job_name. +func (m *Manager) TargetsAll() map[string][]*Target { + m.mtxScrape.Lock() + defer m.mtxScrape.Unlock() + + targets := make(map[string][]*Target, len(m.scrapePools)) + for tset, sp := range m.scrapePools { + targets[tset] = append(sp.ActiveTargets(), sp.DroppedTargets()...) + } + return targets +} + +// ScrapePools returns the list of all scrape pool names. +func (m *Manager) ScrapePools() []string { + m.mtxScrape.Lock() + defer m.mtxScrape.Unlock() + + names := make([]string, 0, len(m.scrapePools)) + for name := range m.scrapePools { + names = append(names, name) + } + return names +} + +// TargetsActive returns the active targets currently being scraped. +func (m *Manager) TargetsActive() map[string][]*Target { + m.mtxScrape.Lock() + defer m.mtxScrape.Unlock() + + targets := make(map[string][]*Target, len(m.scrapePools)) + for tset, sp := range m.scrapePools { + targets[tset] = sp.ActiveTargets() + } + return targets +} + +// TargetsDropped returns the dropped targets during relabelling, subject to KeepDroppedTargets limit. +func (m *Manager) TargetsDropped() map[string][]*Target { + m.mtxScrape.Lock() + defer m.mtxScrape.Unlock() + + targets := make(map[string][]*Target, len(m.scrapePools)) + for tset, sp := range m.scrapePools { + targets[tset] = sp.DroppedTargets() + } + return targets +} + +func (m *Manager) TargetsDroppedCounts() map[string]int { + m.mtxScrape.Lock() + defer m.mtxScrape.Unlock() + + counts := make(map[string]int, len(m.scrapePools)) + for tset, sp := range m.scrapePools { + counts[tset] = sp.droppedTargetsCount + } + return counts +} diff --git a/pkg/promotel/prometheusreceiver/scrape/manager_test.go b/pkg/promotel/prometheusreceiver/scrape/manager_test.go new file mode 100644 index 000000000..c62cce8d6 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/scrape/manager_test.go @@ -0,0 +1,888 @@ +package scrape + +import ( + "context" + "fmt" + "net/http" + "net/http/httptest" + "net/url" + "os" + "strconv" + "sync" + "testing" + "time" + + "github.com/go-kit/log" + "github.com/gogo/protobuf/proto" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" + "google.golang.org/protobuf/types/known/timestamppb" + "gopkg.in/yaml.v2" + + "github.com/prometheus/prometheus/config" + "github.com/prometheus/prometheus/discovery" + "github.com/prometheus/prometheus/discovery/targetgroup" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/relabel" + "github.com/prometheus/prometheus/util/runutil" + "github.com/prometheus/prometheus/util/testutil" +) + +func TestPopulateLabels(t *testing.T) { + cases := []struct { + in labels.Labels + cfg *config.ScrapeConfig + noDefaultPort bool + res labels.Labels + resOrig labels.Labels + err string + }{ + // Regular population of scrape config options. + { + in: labels.FromMap(map[string]string{ + model.AddressLabel: "1.2.3.4:1000", + "custom": "value", + }), + cfg: &config.ScrapeConfig{ + Scheme: "https", + MetricsPath: "/metrics", + JobName: "job", + ScrapeInterval: model.Duration(time.Second), + ScrapeTimeout: model.Duration(time.Second), + }, + res: labels.FromMap(map[string]string{ + model.AddressLabel: "1.2.3.4:1000", + model.InstanceLabel: "1.2.3.4:1000", + model.SchemeLabel: "https", + model.MetricsPathLabel: "/metrics", + model.JobLabel: "job", + model.ScrapeIntervalLabel: "1s", + model.ScrapeTimeoutLabel: "1s", + "custom": "value", + }), + resOrig: labels.FromMap(map[string]string{ + model.AddressLabel: "1.2.3.4:1000", + model.SchemeLabel: "https", + model.MetricsPathLabel: "/metrics", + model.JobLabel: "job", + "custom": "value", + model.ScrapeIntervalLabel: "1s", + model.ScrapeTimeoutLabel: "1s", + }), + }, + // Pre-define/overwrite scrape config labels. + // Leave out port and expect it to be defaulted to scheme. + { + in: labels.FromMap(map[string]string{ + model.AddressLabel: "1.2.3.4", + model.SchemeLabel: "http", + model.MetricsPathLabel: "/custom", + model.JobLabel: "custom-job", + model.ScrapeIntervalLabel: "2s", + model.ScrapeTimeoutLabel: "2s", + }), + cfg: &config.ScrapeConfig{ + Scheme: "https", + MetricsPath: "/metrics", + JobName: "job", + ScrapeInterval: model.Duration(time.Second), + ScrapeTimeout: model.Duration(time.Second), + }, + res: labels.FromMap(map[string]string{ + model.AddressLabel: "1.2.3.4:80", + model.InstanceLabel: "1.2.3.4:80", + model.SchemeLabel: "http", + model.MetricsPathLabel: "/custom", + model.JobLabel: "custom-job", + model.ScrapeIntervalLabel: "2s", + model.ScrapeTimeoutLabel: "2s", + }), + resOrig: labels.FromMap(map[string]string{ + model.AddressLabel: "1.2.3.4", + model.SchemeLabel: "http", + model.MetricsPathLabel: "/custom", + model.JobLabel: "custom-job", + model.ScrapeIntervalLabel: "2s", + model.ScrapeTimeoutLabel: "2s", + }), + }, + // Provide instance label. HTTPS port default for IPv6. + { + in: labels.FromMap(map[string]string{ + model.AddressLabel: "[::1]", + model.InstanceLabel: "custom-instance", + }), + cfg: &config.ScrapeConfig{ + Scheme: "https", + MetricsPath: "/metrics", + JobName: "job", + ScrapeInterval: model.Duration(time.Second), + ScrapeTimeout: model.Duration(time.Second), + }, + res: labels.FromMap(map[string]string{ + model.AddressLabel: "[::1]:443", + model.InstanceLabel: "custom-instance", + model.SchemeLabel: "https", + model.MetricsPathLabel: "/metrics", + model.JobLabel: "job", + model.ScrapeIntervalLabel: "1s", + model.ScrapeTimeoutLabel: "1s", + }), + resOrig: labels.FromMap(map[string]string{ + model.AddressLabel: "[::1]", + model.InstanceLabel: "custom-instance", + model.SchemeLabel: "https", + model.MetricsPathLabel: "/metrics", + model.JobLabel: "job", + model.ScrapeIntervalLabel: "1s", + model.ScrapeTimeoutLabel: "1s", + }), + }, + // Address label missing. + { + in: labels.FromStrings("custom", "value"), + cfg: &config.ScrapeConfig{ + Scheme: "https", + MetricsPath: "/metrics", + JobName: "job", + ScrapeInterval: model.Duration(time.Second), + ScrapeTimeout: model.Duration(time.Second), + }, + res: labels.EmptyLabels(), + resOrig: labels.EmptyLabels(), + err: "no address", + }, + // Address label missing, but added in relabelling. + { + in: labels.FromStrings("custom", "host:1234"), + cfg: &config.ScrapeConfig{ + Scheme: "https", + MetricsPath: "/metrics", + JobName: "job", + ScrapeInterval: model.Duration(time.Second), + ScrapeTimeout: model.Duration(time.Second), + RelabelConfigs: []*relabel.Config{ + { + Action: relabel.Replace, + Regex: relabel.MustNewRegexp("(.*)"), + SourceLabels: model.LabelNames{"custom"}, + Replacement: "${1}", + TargetLabel: string(model.AddressLabel), + }, + }, + }, + res: labels.FromMap(map[string]string{ + model.AddressLabel: "host:1234", + model.InstanceLabel: "host:1234", + model.SchemeLabel: "https", + model.MetricsPathLabel: "/metrics", + model.JobLabel: "job", + model.ScrapeIntervalLabel: "1s", + model.ScrapeTimeoutLabel: "1s", + "custom": "host:1234", + }), + resOrig: labels.FromMap(map[string]string{ + model.SchemeLabel: "https", + model.MetricsPathLabel: "/metrics", + model.JobLabel: "job", + model.ScrapeIntervalLabel: "1s", + model.ScrapeTimeoutLabel: "1s", + "custom": "host:1234", + }), + }, + // Address label missing, but added in relabelling. + { + in: labels.FromStrings("custom", "host:1234"), + cfg: &config.ScrapeConfig{ + Scheme: "https", + MetricsPath: "/metrics", + JobName: "job", + ScrapeInterval: model.Duration(time.Second), + ScrapeTimeout: model.Duration(time.Second), + RelabelConfigs: []*relabel.Config{ + { + Action: relabel.Replace, + Regex: relabel.MustNewRegexp("(.*)"), + SourceLabels: model.LabelNames{"custom"}, + Replacement: "${1}", + TargetLabel: string(model.AddressLabel), + }, + }, + }, + res: labels.FromMap(map[string]string{ + model.AddressLabel: "host:1234", + model.InstanceLabel: "host:1234", + model.SchemeLabel: "https", + model.MetricsPathLabel: "/metrics", + model.JobLabel: "job", + model.ScrapeIntervalLabel: "1s", + model.ScrapeTimeoutLabel: "1s", + "custom": "host:1234", + }), + resOrig: labels.FromMap(map[string]string{ + model.SchemeLabel: "https", + model.MetricsPathLabel: "/metrics", + model.JobLabel: "job", + model.ScrapeIntervalLabel: "1s", + model.ScrapeTimeoutLabel: "1s", + "custom": "host:1234", + }), + }, + // Invalid UTF-8 in label. + { + in: labels.FromMap(map[string]string{ + model.AddressLabel: "1.2.3.4:1000", + "custom": "\xbd", + }), + cfg: &config.ScrapeConfig{ + Scheme: "https", + MetricsPath: "/metrics", + JobName: "job", + ScrapeInterval: model.Duration(time.Second), + ScrapeTimeout: model.Duration(time.Second), + }, + res: labels.EmptyLabels(), + resOrig: labels.EmptyLabels(), + err: "invalid label value for \"custom\": \"\\xbd\"", + }, + // Invalid duration in interval label. + { + in: labels.FromMap(map[string]string{ + model.AddressLabel: "1.2.3.4:1000", + model.ScrapeIntervalLabel: "2notseconds", + }), + cfg: &config.ScrapeConfig{ + Scheme: "https", + MetricsPath: "/metrics", + JobName: "job", + ScrapeInterval: model.Duration(time.Second), + ScrapeTimeout: model.Duration(time.Second), + }, + res: labels.EmptyLabels(), + resOrig: labels.EmptyLabels(), + err: "error parsing scrape interval: unknown unit \"notseconds\" in duration \"2notseconds\"", + }, + // Invalid duration in timeout label. + { + in: labels.FromMap(map[string]string{ + model.AddressLabel: "1.2.3.4:1000", + model.ScrapeTimeoutLabel: "2notseconds", + }), + cfg: &config.ScrapeConfig{ + Scheme: "https", + MetricsPath: "/metrics", + JobName: "job", + ScrapeInterval: model.Duration(time.Second), + ScrapeTimeout: model.Duration(time.Second), + }, + res: labels.EmptyLabels(), + resOrig: labels.EmptyLabels(), + err: "error parsing scrape timeout: unknown unit \"notseconds\" in duration \"2notseconds\"", + }, + // 0 interval in timeout label. + { + in: labels.FromMap(map[string]string{ + model.AddressLabel: "1.2.3.4:1000", + model.ScrapeIntervalLabel: "0s", + }), + cfg: &config.ScrapeConfig{ + Scheme: "https", + MetricsPath: "/metrics", + JobName: "job", + ScrapeInterval: model.Duration(time.Second), + ScrapeTimeout: model.Duration(time.Second), + }, + res: labels.EmptyLabels(), + resOrig: labels.EmptyLabels(), + err: "scrape interval cannot be 0", + }, + // 0 duration in timeout label. + { + in: labels.FromMap(map[string]string{ + model.AddressLabel: "1.2.3.4:1000", + model.ScrapeTimeoutLabel: "0s", + }), + cfg: &config.ScrapeConfig{ + Scheme: "https", + MetricsPath: "/metrics", + JobName: "job", + ScrapeInterval: model.Duration(time.Second), + ScrapeTimeout: model.Duration(time.Second), + }, + res: labels.EmptyLabels(), + resOrig: labels.EmptyLabels(), + err: "scrape timeout cannot be 0", + }, + // Timeout less than interval. + { + in: labels.FromMap(map[string]string{ + model.AddressLabel: "1.2.3.4:1000", + model.ScrapeIntervalLabel: "1s", + model.ScrapeTimeoutLabel: "2s", + }), + cfg: &config.ScrapeConfig{ + Scheme: "https", + MetricsPath: "/metrics", + JobName: "job", + ScrapeInterval: model.Duration(time.Second), + ScrapeTimeout: model.Duration(time.Second), + }, + res: labels.EmptyLabels(), + resOrig: labels.EmptyLabels(), + err: "scrape timeout cannot be greater than scrape interval (\"2s\" > \"1s\")", + }, + // Don't attach default port. + { + in: labels.FromMap(map[string]string{ + model.AddressLabel: "1.2.3.4", + }), + cfg: &config.ScrapeConfig{ + Scheme: "https", + MetricsPath: "/metrics", + JobName: "job", + ScrapeInterval: model.Duration(time.Second), + ScrapeTimeout: model.Duration(time.Second), + }, + noDefaultPort: true, + res: labels.FromMap(map[string]string{ + model.AddressLabel: "1.2.3.4", + model.InstanceLabel: "1.2.3.4", + model.SchemeLabel: "https", + model.MetricsPathLabel: "/metrics", + model.JobLabel: "job", + model.ScrapeIntervalLabel: "1s", + model.ScrapeTimeoutLabel: "1s", + }), + resOrig: labels.FromMap(map[string]string{ + model.AddressLabel: "1.2.3.4", + model.SchemeLabel: "https", + model.MetricsPathLabel: "/metrics", + model.JobLabel: "job", + model.ScrapeIntervalLabel: "1s", + model.ScrapeTimeoutLabel: "1s", + }), + }, + // Remove default port (http). + { + in: labels.FromMap(map[string]string{ + model.AddressLabel: "1.2.3.4:80", + }), + cfg: &config.ScrapeConfig{ + Scheme: "http", + MetricsPath: "/metrics", + JobName: "job", + ScrapeInterval: model.Duration(time.Second), + ScrapeTimeout: model.Duration(time.Second), + }, + noDefaultPort: true, + res: labels.FromMap(map[string]string{ + model.AddressLabel: "1.2.3.4", + model.InstanceLabel: "1.2.3.4:80", + model.SchemeLabel: "http", + model.MetricsPathLabel: "/metrics", + model.JobLabel: "job", + model.ScrapeIntervalLabel: "1s", + model.ScrapeTimeoutLabel: "1s", + }), + resOrig: labels.FromMap(map[string]string{ + model.AddressLabel: "1.2.3.4:80", + model.SchemeLabel: "http", + model.MetricsPathLabel: "/metrics", + model.JobLabel: "job", + model.ScrapeIntervalLabel: "1s", + model.ScrapeTimeoutLabel: "1s", + }), + }, + // Remove default port (https). + { + in: labels.FromMap(map[string]string{ + model.AddressLabel: "1.2.3.4:443", + }), + cfg: &config.ScrapeConfig{ + Scheme: "https", + MetricsPath: "/metrics", + JobName: "job", + ScrapeInterval: model.Duration(time.Second), + ScrapeTimeout: model.Duration(time.Second), + }, + noDefaultPort: true, + res: labels.FromMap(map[string]string{ + model.AddressLabel: "1.2.3.4", + model.InstanceLabel: "1.2.3.4:443", + model.SchemeLabel: "https", + model.MetricsPathLabel: "/metrics", + model.JobLabel: "job", + model.ScrapeIntervalLabel: "1s", + model.ScrapeTimeoutLabel: "1s", + }), + resOrig: labels.FromMap(map[string]string{ + model.AddressLabel: "1.2.3.4:443", + model.SchemeLabel: "https", + model.MetricsPathLabel: "/metrics", + model.JobLabel: "job", + model.ScrapeIntervalLabel: "1s", + model.ScrapeTimeoutLabel: "1s", + }), + }, + } + for _, c := range cases { + in := c.in.Copy() + + res, orig, err := PopulateLabels(labels.NewBuilder(c.in), c.cfg, c.noDefaultPort) + if c.err != "" { + require.EqualError(t, err, c.err) + } else { + require.NoError(t, err) + } + require.Equal(t, c.in, in) + testutil.RequireEqual(t, c.res, res) + testutil.RequireEqual(t, c.resOrig, orig) + } +} + +func loadConfiguration(t testing.TB, c string) *config.Config { + t.Helper() + + cfg := &config.Config{} + err := yaml.UnmarshalStrict([]byte(c), cfg) + require.NoError(t, err, "Unable to load YAML config.") + + return cfg +} + +func noopLoop() loop { + return &testLoop{ + startFunc: func(interval, timeout time.Duration, errc chan<- error) {}, + stopFunc: func() {}, + } +} + +func TestManagerApplyConfig(t *testing.T) { + // Valid initial configuration. + cfgText1 := ` +scrape_configs: + - job_name: job1 + static_configs: + - targets: ["foo:9090"] +` + // Invalid configuration. + cfgText2 := ` +scrape_configs: + - job_name: job1 + scheme: https + static_configs: + - targets: ["foo:9090"] + tls_config: + ca_file: /not/existing/ca/file +` + // Valid configuration. + cfgText3 := ` +scrape_configs: + - job_name: job1 + scheme: https + static_configs: + - targets: ["foo:9090"] +` + var ( + cfg1 = loadConfiguration(t, cfgText1) + cfg2 = loadConfiguration(t, cfgText2) + cfg3 = loadConfiguration(t, cfgText3) + + ch = make(chan struct{}, 1) + + testRegistry = prometheus.NewRegistry() + ) + + opts := Options{} + scrapeManager, err := NewManager(&opts, nil, nil, testRegistry) + require.NoError(t, err) + newLoop := func(scrapeLoopOptions) loop { + ch <- struct{}{} + return noopLoop() + } + sp := &scrapePool{ + appendable: &nopAppendable{}, + activeTargets: map[uint64]*Target{ + 1: {}, + }, + loops: map[uint64]loop{ + 1: noopLoop(), + }, + newLoop: newLoop, + logger: nil, + config: cfg1.ScrapeConfigs[0], + client: http.DefaultClient, + metrics: scrapeManager.metrics, + symbolTable: labels.NewSymbolTable(), + } + scrapeManager.scrapePools = map[string]*scrapePool{ + "job1": sp, + } + + // Apply the initial configuration. + err = scrapeManager.ApplyConfig(cfg1) + require.NoError(t, err, "Unable to apply configuration.") + select { + case <-ch: + require.FailNow(t, "Reload happened.") + default: + } + + // Apply a configuration for which the reload fails. + err = scrapeManager.ApplyConfig(cfg2) + require.Error(t, err, "Expecting error but got none.") + select { + case <-ch: + require.FailNow(t, "Reload happened.") + default: + } + + // Apply a configuration for which the reload succeeds. + err = scrapeManager.ApplyConfig(cfg3) + require.NoError(t, err, "Unable to apply configuration.") + select { + case <-ch: + default: + require.FailNow(t, "Reload didn't happen.") + } + + // Re-applying the same configuration shouldn't trigger a reload. + err = scrapeManager.ApplyConfig(cfg3) + require.NoError(t, err, "Unable to apply configuration.") + select { + case <-ch: + require.FailNow(t, "Reload happened.") + default: + } +} + +func TestManagerTargetsUpdates(t *testing.T) { + opts := Options{} + testRegistry := prometheus.NewRegistry() + m, err := NewManager(&opts, nil, nil, testRegistry) + require.NoError(t, err) + + ts := make(chan map[string][]*targetgroup.Group) + go m.Run(ts) + defer m.Stop() + + tgSent := make(map[string][]*targetgroup.Group) + for x := 0; x < 10; x++ { + tgSent[strconv.Itoa(x)] = []*targetgroup.Group{ + { + Source: strconv.Itoa(x), + }, + } + + select { + case ts <- tgSent: + case <-time.After(10 * time.Millisecond): + require.Fail(t, "Scrape manager's channel remained blocked after the set threshold.") + } + } + + m.mtxScrape.Lock() + tsetActual := m.targetSets + m.mtxScrape.Unlock() + + // Make sure all updates have been received. + require.Equal(t, tgSent, tsetActual) + + select { + case <-m.triggerReload: + default: + require.Fail(t, "No scrape loops reload was triggered after targets update.") + } +} + +func TestSetOffsetSeed(t *testing.T) { + getConfig := func(prometheus string) *config.Config { + cfgText := ` +global: + external_labels: + prometheus: '` + prometheus + `' +` + + cfg := &config.Config{} + err := yaml.UnmarshalStrict([]byte(cfgText), cfg) + require.NoError(t, err, "Unable to load YAML config cfgYaml.") + + return cfg + } + + opts := Options{} + testRegistry := prometheus.NewRegistry() + scrapeManager, err := NewManager(&opts, nil, nil, testRegistry) + require.NoError(t, err) + + // Load the first config. + cfg1 := getConfig("ha1") + err = scrapeManager.setOffsetSeed(cfg1.GlobalConfig.ExternalLabels) + require.NoError(t, err) + offsetSeed1 := scrapeManager.offsetSeed + + require.NotZero(t, offsetSeed1, "Offset seed has to be a hash of uint64.") + + // Load the first config. + cfg2 := getConfig("ha2") + require.NoError(t, scrapeManager.setOffsetSeed(cfg2.GlobalConfig.ExternalLabels)) + offsetSeed2 := scrapeManager.offsetSeed + + require.NotEqual(t, offsetSeed1, offsetSeed2, "Offset seed should not be the same on different set of external labels.") +} + +func TestManagerScrapePools(t *testing.T) { + cfgText1 := ` +scrape_configs: +- job_name: job1 + static_configs: + - targets: ["foo:9090"] +- job_name: job2 + static_configs: + - targets: ["foo:9091", "foo:9092"] +` + cfgText2 := ` +scrape_configs: +- job_name: job1 + static_configs: + - targets: ["foo:9090", "foo:9094"] +- job_name: job3 + static_configs: + - targets: ["foo:9093"] +` + var ( + cfg1 = loadConfiguration(t, cfgText1) + cfg2 = loadConfiguration(t, cfgText2) + testRegistry = prometheus.NewRegistry() + ) + + reload := func(scrapeManager *Manager, cfg *config.Config) { + newLoop := func(scrapeLoopOptions) loop { + return noopLoop() + } + scrapeManager.scrapePools = map[string]*scrapePool{} + for _, sc := range cfg.ScrapeConfigs { + _, cancel := context.WithCancel(context.Background()) + defer cancel() + sp := &scrapePool{ + appendable: &nopAppendable{}, + activeTargets: map[uint64]*Target{}, + loops: map[uint64]loop{ + 1: noopLoop(), + }, + newLoop: newLoop, + logger: nil, + config: sc, + client: http.DefaultClient, + cancel: cancel, + } + for _, c := range sc.ServiceDiscoveryConfigs { + staticConfig := c.(discovery.StaticConfig) + for _, group := range staticConfig { + for i := range group.Targets { + sp.activeTargets[uint64(i)] = &Target{} + } + } + } + scrapeManager.scrapePools[sc.JobName] = sp + } + } + + opts := Options{} + scrapeManager, err := NewManager(&opts, nil, nil, testRegistry) + require.NoError(t, err) + + reload(scrapeManager, cfg1) + require.ElementsMatch(t, []string{"job1", "job2"}, scrapeManager.ScrapePools()) + + reload(scrapeManager, cfg2) + require.ElementsMatch(t, []string{"job1", "job3"}, scrapeManager.ScrapePools()) +} + +type testGatherer struct { + t *testing.T + metrics []*dto.MetricFamily +} + +var _ prometheus.Gatherer = &testGatherer{} + +func (g *testGatherer) Gather() ([]*dto.MetricFamily, error) { + g.t.Log("testGatherer.Gather is called") + return g.metrics, nil +} + +// TestManagerCTZeroIngestion tests scrape manager for CT cases. +func TestManagerCTZeroIngestion(t *testing.T) { + const mName = "expected_counter" + + for _, tc := range []struct { + name string + counterSample *dto.Counter + enableCTZeroIngestion bool + + expectedValues []float64 + }{ + { + name: "disabled with CT on counter", + counterSample: &dto.Counter{ + Value: proto.Float64(1.0), + // Timestamp does not matter as long as it exists in this test. + CreatedTimestamp: timestamppb.Now(), + }, + expectedValues: []float64{1.0}, + }, + { + name: "enabled with CT on counter", + counterSample: &dto.Counter{ + Value: proto.Float64(1.0), + // Timestamp does not matter as long as it exists in this test. + CreatedTimestamp: timestamppb.Now(), + }, + enableCTZeroIngestion: true, + expectedValues: []float64{0.0, 1.0}, + }, + { + name: "enabled without CT on counter", + counterSample: &dto.Counter{ + Value: proto.Float64(1.0), + }, + enableCTZeroIngestion: true, + expectedValues: []float64{1.0}, + }, + } { + for _, useHTTPTestServer := range []bool{true, false} { + t.Run(fmt.Sprintf("useHTTPTestServer:%t", useHTTPTestServer), func(t *testing.T) { + t.Run(tc.name, func(t *testing.T) { + app := &collectResultAppender{} + scrapeManager, err := NewManager( + &Options{ + EnableCreatedTimestampZeroIngestion: tc.enableCTZeroIngestion, + skipOffsetting: true, + }, + log.NewLogfmtLogger(os.Stderr), + &collectResultAppendable{app}, + prometheus.NewRegistry(), + ) + require.NoError(t, err) + + require.NoError(t, scrapeManager.ApplyConfig(&config.Config{ + GlobalConfig: config.GlobalConfig{ + // Disable regular scrapes. + ScrapeInterval: model.Duration(9999 * time.Minute), + ScrapeTimeout: model.Duration(5 * time.Second), + // Ensure the proto is chosen. We need proto as it's the only protocol + // with the CT parsing support. + ScrapeProtocols: []config.ScrapeProtocol{config.PrometheusProto}, + }, + ScrapeConfigs: []*config.ScrapeConfig{{JobName: "test"}}, + })) + + once := sync.Once{} + // Start fake HTTP target to that allow one scrape only. + ctrType := dto.MetricType_COUNTER + mf := &dto.MetricFamily{ + Name: proto.String(mName), + Type: &ctrType, + Metric: []*dto.Metric{{Counter: tc.counterSample}}, + } + mfs := []*dto.MetricFamily{mf} + + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fail := true + once.Do(func() { + fail = false + w.Header().Set("Content-Type", `application/vnd.google.protobuf; proto=io.prometheus.client.MetricFamily; encoding=delimited`) + t.Log("Received HTTP request to the test server from scraper") + // Dont write anything to the response body + // w.Write(protoMarshalDelimited(t, mf)) + }) + if fail { + w.WriteHeader(http.StatusInternalServerError) + } + }) + var serverURL *url.URL + if useHTTPTestServer { + server := httptest.NewServer(handler) + defer server.Close() + serverURL, err = url.Parse(server.URL) + require.NoError(t, err) + } else { + // This enables scraper to read metrics from the handler directly without making HTTP request + SetDefaultGathererHandler(handler) + defer SetDefaultGathererHandler(nil) + serverURL, err = url.Parse("http://not-started:8080") + require.NoError(t, err) + } + + testPromGatherer := prometheus.Gatherer(&testGatherer{t, mfs}) + // This will cause scrapeLoop to a switch from ProtobufParser to GathererParser which reads directly from testPromGatherer + SetDefaultGatherer(testPromGatherer) + + // Add fake target directly into tsets + reload. Normally users would use + // Manager.Run and wait for minimum 5s refresh interval. + scrapeManager.updateTsets(map[string][]*targetgroup.Group{ + "test": {{ + Targets: []model.LabelSet{{ + model.SchemeLabel: model.LabelValue(serverURL.Scheme), + model.AddressLabel: model.LabelValue(serverURL.Host), + }}, + }}, + }) + scrapeManager.reload() + + // Wait for one scrape. + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute) + defer cancel() + require.NoError(t, runutil.Retry(100*time.Millisecond, ctx.Done(), func() error { + if countFloatSamples(app, mName) != len(tc.expectedValues) { + return fmt.Errorf("expected %v samples", tc.expectedValues) + } + return nil + }), "after 1 minute") + scrapeManager.Stop() + + require.Equal(t, tc.expectedValues, getResultFloats(app, mName)) + }) + }) + } + } +} + +func countFloatSamples(a *collectResultAppender, expectedMetricName string) (count int) { + a.mtx.Lock() + defer a.mtx.Unlock() + + for _, f := range a.resultFloats { + if f.metric.Get(model.MetricNameLabel) == expectedMetricName { + count++ + } + } + return count +} + +func getResultFloats(app *collectResultAppender, expectedMetricName string) (result []float64) { + app.mtx.Lock() + defer app.mtx.Unlock() + + for _, f := range app.resultFloats { + if f.metric.Get(model.MetricNameLabel) == expectedMetricName { + result = append(result, f.f) + } + } + return result +} + +func TestUnregisterMetrics(t *testing.T) { + reg := prometheus.NewRegistry() + // Check that all metrics can be unregistered, allowing a second manager to be created. + for i := 0; i < 2; i++ { + opts := Options{} + manager, err := NewManager(&opts, nil, nil, reg) + require.NotNil(t, manager) + require.NoError(t, err) + // Unregister all metrics. + manager.UnregisterMetrics() + } +} diff --git a/pkg/promotel/prometheusreceiver/scrape/metrics.go b/pkg/promotel/prometheusreceiver/scrape/metrics.go new file mode 100644 index 000000000..0255c05fb --- /dev/null +++ b/pkg/promotel/prometheusreceiver/scrape/metrics.go @@ -0,0 +1,331 @@ +package scrape + +import ( + "fmt" + + "github.com/prometheus/client_golang/prometheus" +) + +type scrapeMetrics struct { + reg prometheus.Registerer + // Used by Manager. + targetMetadataCache *MetadataMetricsCollector + targetScrapePools prometheus.Counter + targetScrapePoolsFailed prometheus.Counter + + // Used by scrapePool. + targetReloadIntervalLength *prometheus.SummaryVec + targetScrapePoolReloads prometheus.Counter + targetScrapePoolReloadsFailed prometheus.Counter + targetScrapePoolSyncsCounter *prometheus.CounterVec + targetScrapePoolExceededTargetLimit prometheus.Counter + targetScrapePoolTargetLimit *prometheus.GaugeVec + targetScrapePoolTargetsAdded *prometheus.GaugeVec + targetScrapePoolSymbolTableItems *prometheus.GaugeVec + targetSyncIntervalLength *prometheus.SummaryVec + targetSyncFailed *prometheus.CounterVec + + // Used by targetScraper. + targetScrapeExceededBodySizeLimit prometheus.Counter + + // Used by scrapeCache. + targetScrapeCacheFlushForced prometheus.Counter + + // Used by scrapeLoop. + targetIntervalLength *prometheus.SummaryVec + targetScrapeSampleLimit prometheus.Counter + targetScrapeSampleDuplicate prometheus.Counter + targetScrapeSampleOutOfOrder prometheus.Counter + targetScrapeSampleOutOfBounds prometheus.Counter + targetScrapeExemplarOutOfOrder prometheus.Counter + targetScrapePoolExceededLabelLimits prometheus.Counter + targetScrapeNativeHistogramBucketLimit prometheus.Counter +} + +func newScrapeMetrics(reg prometheus.Registerer) (*scrapeMetrics, error) { + sm := &scrapeMetrics{reg: reg} + + // Manager metrics. + sm.targetMetadataCache = &MetadataMetricsCollector{ + CacheEntries: prometheus.NewDesc( + "prometheus_target_metadata_cache_entries", + "Total number of metric metadata entries in the cache", + []string{"scrape_job"}, + nil, + ), + CacheBytes: prometheus.NewDesc( + "prometheus_target_metadata_cache_bytes", + "The number of bytes that are currently used for storing metric metadata in the cache", + []string{"scrape_job"}, + nil, + ), + // TargetsGatherer should be set later, because it's a circular dependency. + // newScrapeMetrics() is called by NewManager(), while also TargetsGatherer is the new Manager. + } + + sm.targetScrapePools = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_target_scrape_pools_total", + Help: "Total number of scrape pool creation attempts.", + }, + ) + sm.targetScrapePoolsFailed = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_target_scrape_pools_failed_total", + Help: "Total number of scrape pool creations that failed.", + }, + ) + + // Used by scrapePool. + sm.targetReloadIntervalLength = prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Name: "prometheus_target_reload_length_seconds", + Help: "Actual interval to reload the scrape pool with a given configuration.", + Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001}, + }, + []string{"interval"}, + ) + sm.targetScrapePoolReloads = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_target_scrape_pool_reloads_total", + Help: "Total number of scrape pool reloads.", + }, + ) + sm.targetScrapePoolReloadsFailed = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_target_scrape_pool_reloads_failed_total", + Help: "Total number of failed scrape pool reloads.", + }, + ) + sm.targetScrapePoolExceededTargetLimit = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_target_scrape_pool_exceeded_target_limit_total", + Help: "Total number of times scrape pools hit the target limit, during sync or config reload.", + }, + ) + sm.targetScrapePoolTargetLimit = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "prometheus_target_scrape_pool_target_limit", + Help: "Maximum number of targets allowed in this scrape pool.", + }, + []string{"scrape_job"}, + ) + sm.targetScrapePoolTargetsAdded = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "prometheus_target_scrape_pool_targets", + Help: "Current number of targets in this scrape pool.", + }, + []string{"scrape_job"}, + ) + sm.targetScrapePoolSymbolTableItems = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "prometheus_target_scrape_pool_symboltable_items", + Help: "Current number of symbols in table for this scrape pool.", + }, + []string{"scrape_job"}, + ) + sm.targetScrapePoolSyncsCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "prometheus_target_scrape_pool_sync_total", + Help: "Total number of syncs that were executed on a scrape pool.", + }, + []string{"scrape_job"}, + ) + sm.targetSyncIntervalLength = prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Name: "prometheus_target_sync_length_seconds", + Help: "Actual interval to sync the scrape pool.", + Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001}, + }, + []string{"scrape_job"}, + ) + sm.targetSyncFailed = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "prometheus_target_sync_failed_total", + Help: "Total number of target sync failures.", + }, + []string{"scrape_job"}, + ) + + // Used by targetScraper. + sm.targetScrapeExceededBodySizeLimit = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_target_scrapes_exceeded_body_size_limit_total", + Help: "Total number of scrapes that hit the body size limit", + }, + ) + + // Used by scrapeCache. + sm.targetScrapeCacheFlushForced = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_target_scrapes_cache_flush_forced_total", + Help: "How many times a scrape cache was flushed due to getting big while scrapes are failing.", + }, + ) + + // Used by scrapeLoop. + sm.targetIntervalLength = prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Name: "prometheus_target_interval_length_seconds", + Help: "Actual intervals between scrapes.", + Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001}, + }, + []string{"interval"}, + ) + sm.targetScrapeSampleLimit = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_target_scrapes_exceeded_sample_limit_total", + Help: "Total number of scrapes that hit the sample limit and were rejected.", + }, + ) + sm.targetScrapeSampleDuplicate = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_target_scrapes_sample_duplicate_timestamp_total", + Help: "Total number of samples rejected due to duplicate timestamps but different values.", + }, + ) + sm.targetScrapeSampleOutOfOrder = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_target_scrapes_sample_out_of_order_total", + Help: "Total number of samples rejected due to not being out of the expected order.", + }, + ) + sm.targetScrapeSampleOutOfBounds = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_target_scrapes_sample_out_of_bounds_total", + Help: "Total number of samples rejected due to timestamp falling outside of the time bounds.", + }, + ) + sm.targetScrapePoolExceededLabelLimits = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_target_scrape_pool_exceeded_label_limits_total", + Help: "Total number of times scrape pools hit the label limits, during sync or config reload.", + }, + ) + sm.targetScrapeNativeHistogramBucketLimit = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_target_scrapes_exceeded_native_histogram_bucket_limit_total", + Help: "Total number of scrapes that hit the native histogram bucket limit and were rejected.", + }, + ) + sm.targetScrapeExemplarOutOfOrder = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "prometheus_target_scrapes_exemplar_out_of_order_total", + Help: "Total number of exemplar rejected due to not being out of the expected order.", + }, + ) + + for _, collector := range []prometheus.Collector{ + // Used by Manager. + sm.targetMetadataCache, + sm.targetScrapePools, + sm.targetScrapePoolsFailed, + // Used by scrapePool. + sm.targetReloadIntervalLength, + sm.targetScrapePoolReloads, + sm.targetScrapePoolReloadsFailed, + sm.targetSyncIntervalLength, + sm.targetScrapePoolSyncsCounter, + sm.targetScrapePoolExceededTargetLimit, + sm.targetScrapePoolTargetLimit, + sm.targetScrapePoolTargetsAdded, + sm.targetScrapePoolSymbolTableItems, + sm.targetSyncFailed, + // Used by targetScraper. + sm.targetScrapeExceededBodySizeLimit, + // Used by scrapeCache. + sm.targetScrapeCacheFlushForced, + // Used by scrapeLoop. + sm.targetIntervalLength, + sm.targetScrapeSampleLimit, + sm.targetScrapeSampleDuplicate, + sm.targetScrapeSampleOutOfOrder, + sm.targetScrapeSampleOutOfBounds, + sm.targetScrapeExemplarOutOfOrder, + sm.targetScrapePoolExceededLabelLimits, + sm.targetScrapeNativeHistogramBucketLimit, + } { + err := reg.Register(collector) + if err != nil { + return nil, fmt.Errorf("failed to register scrape metrics: %w", err) + } + } + return sm, nil +} + +func (sm *scrapeMetrics) setTargetMetadataCacheGatherer(gatherer TargetsGatherer) { + sm.targetMetadataCache.TargetsGatherer = gatherer +} + +// Unregister unregisters all metrics. +func (sm *scrapeMetrics) Unregister() { + sm.reg.Unregister(sm.targetMetadataCache) + sm.reg.Unregister(sm.targetScrapePools) + sm.reg.Unregister(sm.targetScrapePoolsFailed) + sm.reg.Unregister(sm.targetReloadIntervalLength) + sm.reg.Unregister(sm.targetScrapePoolReloads) + sm.reg.Unregister(sm.targetScrapePoolReloadsFailed) + sm.reg.Unregister(sm.targetSyncIntervalLength) + sm.reg.Unregister(sm.targetScrapePoolSyncsCounter) + sm.reg.Unregister(sm.targetScrapePoolExceededTargetLimit) + sm.reg.Unregister(sm.targetScrapePoolTargetLimit) + sm.reg.Unregister(sm.targetScrapePoolTargetsAdded) + sm.reg.Unregister(sm.targetScrapePoolSymbolTableItems) + sm.reg.Unregister(sm.targetSyncFailed) + sm.reg.Unregister(sm.targetScrapeExceededBodySizeLimit) + sm.reg.Unregister(sm.targetScrapeCacheFlushForced) + sm.reg.Unregister(sm.targetIntervalLength) + sm.reg.Unregister(sm.targetScrapeSampleLimit) + sm.reg.Unregister(sm.targetScrapeSampleDuplicate) + sm.reg.Unregister(sm.targetScrapeSampleOutOfOrder) + sm.reg.Unregister(sm.targetScrapeSampleOutOfBounds) + sm.reg.Unregister(sm.targetScrapeExemplarOutOfOrder) + sm.reg.Unregister(sm.targetScrapePoolExceededLabelLimits) + sm.reg.Unregister(sm.targetScrapeNativeHistogramBucketLimit) +} + +type TargetsGatherer interface { + TargetsActive() map[string][]*Target +} + +// MetadataMetricsCollector is a Custom Collector for the metadata cache metrics. +type MetadataMetricsCollector struct { + CacheEntries *prometheus.Desc + CacheBytes *prometheus.Desc + TargetsGatherer TargetsGatherer +} + +// Describe sends the metrics descriptions to the channel. +func (mc *MetadataMetricsCollector) Describe(ch chan<- *prometheus.Desc) { + ch <- mc.CacheEntries + ch <- mc.CacheBytes +} + +// Collect creates and sends the metrics for the metadata cache. +func (mc *MetadataMetricsCollector) Collect(ch chan<- prometheus.Metric) { + if mc.TargetsGatherer == nil { + return + } + + for tset, targets := range mc.TargetsGatherer.TargetsActive() { + var size, length int + for _, t := range targets { + size += t.SizeMetadata() + length += t.LengthMetadata() + } + + ch <- prometheus.MustNewConstMetric( + mc.CacheEntries, + prometheus.GaugeValue, + float64(length), + tset, + ) + + ch <- prometheus.MustNewConstMetric( + mc.CacheBytes, + prometheus.GaugeValue, + float64(size), + tset, + ) + } +} diff --git a/pkg/promotel/prometheusreceiver/scrape/promotel.go b/pkg/promotel/prometheusreceiver/scrape/promotel.go new file mode 100644 index 000000000..47be2cba3 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/scrape/promotel.go @@ -0,0 +1,140 @@ +package scrape + +import ( + "context" + "io" + "net/http" + "time" + + "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/config" + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/textparse" + "github.com/prometheus/prometheus/storage" +) + +type GathereLoop struct { + *scrapeLoop + g prometheus.Gatherer +} + +func (gl *GathereLoop) newParser() (textparse.Parser, error) { + mfs, err := gl.g.Gather() + if err != nil { + gl.l.Log("msg", "Error while gathering metrics", "err", err) + return nil, err + } + return textparse.NewProtobufParserShim(gl.scrapeClassicHistograms, gl.symbolTable, mfs), err + +} + +func (gl *GathereLoop) Run(errc chan<- error) { + gl.scrapeLoop.run(errc) +} + +func (gl *GathereLoop) Stop() { + gl.scrapeLoop.stop() +} + +func (gl *GathereLoop) ScrapeAndReport( + last, appendTime time.Time, errc chan<- error, +) time.Time { + return gl.scrapeAndReport(last, appendTime, errc) +} + +func noopScrapeFunc(context.Context, io.Writer) error { return nil } + +func newNoopTarget(lbls labels.Labels) *Target { + return &Target{labels: lbls} +} + +func NewGathererLoop(ctx context.Context, logger log.Logger, app storage.Appendable, reg prometheus.Registerer, g prometheus.Gatherer, interval time.Duration) (*GathereLoop, error) { + nopMutator := func(l labels.Labels) labels.Labels { return l } + metrics, err := newScrapeMetrics(reg) + if err != nil { + return nil, err + } + if logger == nil { + logger = log.NewNopLogger() + } + target := newNoopTarget([]labels.Label{ + {Name: model.JobLabel, Value: "promotel"}, // required label + {Name: model.InstanceLabel, Value: "promotel"}, // required label + {Name: model.ScrapeIntervalLabel, Value: interval.String()}, + {Name: model.MetricsPathLabel, Value: config.DefaultScrapeConfig.MetricsPath}, + {Name: model.SchemeLabel, Value: config.DefaultScrapeConfig.Scheme}, + }) + loop := &GathereLoop{ + newScrapeLoop( + ctx, + &scraperShim{scrapeFunc: noopScrapeFunc}, + logger, + nil, + nopMutator, + nopMutator, + func(ctx context.Context) storage.Appender { return app.Appender(ctx) }, + nil, + labels.NewSymbolTable(), + 0, + true, + false, + true, + 0, + 0, + histogram.ExponentialSchemaMax, + nil, + interval, + time.Hour, + false, + false, + false, + false, // todo: pass this from the opts + false, + target, + true, + metrics, + true, + ), + g, + } + // Override the newParser function to use the gatherer. + loop.scrapeLoop.newParserFunc = loop.newParser + return loop, nil +} + +// scraperShim implements the scraper interface and allows setting values +// returned by its methods. It also allows setting a custom scrape function. +type scraperShim struct { + offsetDur time.Duration + + lastStart time.Time + lastDuration time.Duration + lastError error + + scrapeErr error + scrapeFunc func(context.Context, io.Writer) error +} + +func (ts *scraperShim) offset(time.Duration, uint64) time.Duration { + return ts.offsetDur +} + +func (ts *scraperShim) Report(start time.Time, duration time.Duration, err error) { + ts.lastStart = start + ts.lastDuration = duration + ts.lastError = err +} + +func (ts *scraperShim) scrape(ctx context.Context) (*http.Response, error) { + return nil, ts.scrapeErr +} + +func (ts *scraperShim) readResponse(ctx context.Context, resp *http.Response, w io.Writer) (string, error) { + if ts.scrapeFunc != nil { + return "", ts.scrapeFunc(ctx, w) + } + return "", ts.scrapeErr +} diff --git a/pkg/promotel/prometheusreceiver/scrape/promotel_test.go b/pkg/promotel/prometheusreceiver/scrape/promotel_test.go new file mode 100644 index 000000000..ea2ac1774 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/scrape/promotel_test.go @@ -0,0 +1,250 @@ +package scrape_test + +import ( + "bytes" + "context" + "encoding/binary" + "fmt" + "math/rand" + "strings" + "sync" + "testing" + "time" + + "github.com/gogo/protobuf/proto" + "github.com/stretchr/testify/require" + + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/metadata" + "github.com/prometheus/prometheus/storage" + + "github.com/smartcontractkit/chainlink-common/pkg/promotel/prometheusreceiver/scrape" +) + +// TestScrapeLoopScrapeAndReport exercises scrapeAndReport with various scenarios +// (successful scrape, failed scrape, forced error, empty body leading to staleness, etc.). +func TestScrapeLoopScrapeAndReport(t *testing.T) { + appendable := &collectResultAppendable{&testAppender{}} + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + reg := prometheus.NewRegistry() + + sl, err := scrape.NewGathererLoop(ctx, nil, appendable, reg, reg, 10*time.Millisecond) + require.NoError(t, err) + + start := time.Now() + sl.ScrapeAndReport(time.Time{}, start, nil) + // The collectResultAppender holds all appended samples. Check the last appended + // for staleness or actual data, depending on if the scrape was declared OK. + allSamples := appendable.resultFloats + // We expect at least one normal sample plus the reported samples. + require.NotEmpty(t, allSamples, "Expected to see appended samples.") + + // reset the appender + appendable.testAppender = &testAppender{} + // create counter metric + counter := prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "metric_a", + Help: "metric_a help", + }, []string{"label_a"}) + reg.MustRegister(counter) + counter.WithLabelValues("value_a").Add(42) + + mfs, err := reg.Gather() + require.NoError(t, err) + // verify that metric_a is present in Gatherer results + var foundMetric bool + for _, mf := range mfs { + if mf.GetName() == "metric_a" { + // verify metrics value + require.Len(t, mf.GetMetric(), 1) + require.Equal(t, "value_a", mf.GetMetric()[0].GetLabel()[0].GetValue()) + require.Equal(t, 42.0, mf.GetMetric()[0].GetCounter().GetValue()) + foundMetric = true + break + } + } + require.True(t, foundMetric, "Expected to see the 'metric_a' counter metric.") + + sl.ScrapeAndReport(time.Time{}, start, nil) + // Get all appended samples + allSamples = appendable.resultFloats + // verify that the counter metric 'metric_a' was reported + var found bool + for _, s := range allSamples { + if s.metric.Get("__name__") == "metric_a" && s.metric.Get("label_a") == "value_a" { + found = true + require.Equal(t, 42.0, s.f) + } + } + require.True(t, found, "Expected to see the 'metric_a' counter metric.") +} + +type floatSample struct { + metric labels.Labels + t int64 + f float64 +} + +type histogramSample struct { + t int64 + h *histogram.Histogram + fh *histogram.FloatHistogram +} + +type collectResultAppendable struct { + *testAppender +} + +func (a *collectResultAppendable) Appender(_ context.Context) storage.Appender { + return a +} + +// testAppender records all samples that were added through the appender. +// It can be used as its zero value or be backed by another appender it writes samples through. +type testAppender struct { + mtx sync.Mutex + + next storage.Appender + resultFloats []floatSample + pendingFloats []floatSample + rolledbackFloats []floatSample + resultHistograms []histogramSample + pendingHistograms []histogramSample + rolledbackHistograms []histogramSample + resultExemplars []exemplar.Exemplar + pendingExemplars []exemplar.Exemplar + resultMetadata []metadata.Metadata + pendingMetadata []metadata.Metadata +} + +func (a *testAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64, v float64) (storage.SeriesRef, error) { + a.mtx.Lock() + defer a.mtx.Unlock() + a.pendingFloats = append(a.pendingFloats, floatSample{ + metric: lset, + t: t, + f: v, + }) + + if ref == 0 { + ref = storage.SeriesRef(rand.Uint64()) + } + if a.next == nil { + return ref, nil + } + + ref, err := a.next.Append(ref, lset, t, v) + if err != nil { + return 0, err + } + return ref, err +} + +func (a *testAppender) AppendExemplar(ref storage.SeriesRef, l labels.Labels, e exemplar.Exemplar) (storage.SeriesRef, error) { + a.mtx.Lock() + defer a.mtx.Unlock() + a.pendingExemplars = append(a.pendingExemplars, e) + if a.next == nil { + return 0, nil + } + + return a.next.AppendExemplar(ref, l, e) +} + +func (a *testAppender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + a.mtx.Lock() + defer a.mtx.Unlock() + a.pendingHistograms = append(a.pendingHistograms, histogramSample{h: h, fh: fh, t: t}) + if a.next == nil { + return 0, nil + } + + return a.next.AppendHistogram(ref, l, t, h, fh) +} + +func (a *testAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels, m metadata.Metadata) (storage.SeriesRef, error) { + a.mtx.Lock() + defer a.mtx.Unlock() + a.pendingMetadata = append(a.pendingMetadata, m) + if ref == 0 { + ref = storage.SeriesRef(rand.Uint64()) + } + if a.next == nil { + return ref, nil + } + + return a.next.UpdateMetadata(ref, l, m) +} + +func (a *testAppender) AppendCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64) (storage.SeriesRef, error) { + return a.Append(ref, l, ct, 0.0) +} + +func (a *testAppender) Commit() error { + a.mtx.Lock() + defer a.mtx.Unlock() + a.resultFloats = append(a.resultFloats, a.pendingFloats...) + a.resultExemplars = append(a.resultExemplars, a.pendingExemplars...) + a.resultHistograms = append(a.resultHistograms, a.pendingHistograms...) + a.resultMetadata = append(a.resultMetadata, a.pendingMetadata...) + a.pendingFloats = nil + a.pendingExemplars = nil + a.pendingHistograms = nil + a.pendingMetadata = nil + if a.next == nil { + return nil + } + return a.next.Commit() +} + +func (a *testAppender) Rollback() error { + a.mtx.Lock() + defer a.mtx.Unlock() + a.rolledbackFloats = a.pendingFloats + a.rolledbackHistograms = a.pendingHistograms + a.pendingFloats = nil + a.pendingHistograms = nil + if a.next == nil { + return nil + } + return a.next.Rollback() +} + +func (a *testAppender) String() string { + var sb strings.Builder + for _, s := range a.resultFloats { + sb.WriteString(fmt.Sprintf("committed: %s %f %d\n", s.metric, s.f, s.t)) + } + for _, s := range a.pendingFloats { + sb.WriteString(fmt.Sprintf("pending: %s %f %d\n", s.metric, s.f, s.t)) + } + for _, s := range a.rolledbackFloats { + sb.WriteString(fmt.Sprintf("rolledback: %s %f %d\n", s.metric, s.f, s.t)) + } + return sb.String() +} + +// protoMarshalDelimited marshals a MetricFamily into a delimited +// Prometheus proto exposition format bytes (known as 'encoding=delimited`) +// +// See also https://eli.thegreenplace.net/2011/08/02/length-prefix-framing-for-protocol-buffers +func protoMarshalDelimited(t *testing.T, mf *dto.MetricFamily) []byte { + t.Helper() + + protoBuf, err := proto.Marshal(mf) + require.NoError(t, err) + + varintBuf := make([]byte, binary.MaxVarintLen32) + varintLength := binary.PutUvarint(varintBuf, uint64(len(protoBuf))) + + buf := &bytes.Buffer{} + buf.Write(varintBuf[:varintLength]) + buf.Write(protoBuf) + return buf.Bytes() +} diff --git a/pkg/promotel/prometheusreceiver/scrape/scrape.go b/pkg/promotel/prometheusreceiver/scrape/scrape.go new file mode 100644 index 000000000..de9ac7a61 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/scrape/scrape.go @@ -0,0 +1,2112 @@ +package scrape + +import ( + "bufio" + "bytes" + "context" + "errors" + "fmt" + "io" + "math" + "net/http" + "reflect" + "slices" + "strconv" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/klauspost/compress/gzip" + config_util "github.com/prometheus/common/config" + "github.com/prometheus/common/model" + "github.com/prometheus/common/version" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" + "github.com/prometheus/prometheus/config" + "github.com/prometheus/prometheus/discovery/targetgroup" + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/metadata" + "github.com/prometheus/prometheus/model/relabel" + "github.com/prometheus/prometheus/model/textparse" + "github.com/prometheus/prometheus/model/timestamp" + "github.com/prometheus/prometheus/model/value" + "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/util/pool" +) + +// ScrapeTimestampTolerance is the tolerance for scrape appends timestamps +// alignment, to enable better compression at the TSDB level. +// See https://github.com/prometheus/prometheus/issues/7846 +var ScrapeTimestampTolerance = 2 * time.Millisecond + +// AlignScrapeTimestamps enables the tolerance for scrape appends timestamps described above. +var AlignScrapeTimestamps = true + +var errNameLabelMandatory = fmt.Errorf("missing metric name (%s label)", labels.MetricName) + +// scrapePool manages scrapes for sets of targets. +type scrapePool struct { + appendable storage.Appendable + logger log.Logger + cancel context.CancelFunc + httpOpts []config_util.HTTPClientOption + + // mtx must not be taken after targetMtx. + mtx sync.Mutex + config *config.ScrapeConfig + client *http.Client + loops map[uint64]loop + + symbolTable *labels.SymbolTable + lastSymbolTableCheck time.Time + initialSymbolTableLen int + + targetMtx sync.Mutex + // activeTargets and loops must always be synchronized to have the same + // set of hashes. + activeTargets map[uint64]*Target + droppedTargets []*Target // Subject to KeepDroppedTargets limit. + droppedTargetsCount int // Count of all dropped targets. + + // Constructor for new scrape loops. This is settable for testing convenience. + newLoop func(scrapeLoopOptions) loop + + noDefaultPort bool + + metrics *scrapeMetrics +} + +type labelLimits struct { + labelLimit int + labelNameLengthLimit int + labelValueLengthLimit int +} + +type scrapeLoopOptions struct { + target *Target + scraper scraper + sampleLimit int + bucketLimit int + maxSchema int32 + labelLimits *labelLimits + honorLabels bool + honorTimestamps bool + trackTimestampsStaleness bool + interval time.Duration + timeout time.Duration + scrapeClassicHistograms bool + + mrc []*relabel.Config + cache *scrapeCache + enableCompression bool +} + +const maxAheadTime = 10 * time.Minute + +// returning an empty label set is interpreted as "drop". +type labelsMutator func(labels.Labels) labels.Labels + +func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed uint64, logger log.Logger, buffers *pool.Pool, options *Options, metrics *scrapeMetrics) (*scrapePool, error) { + if logger == nil { + logger = log.NewNopLogger() + } + + client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, cfg.JobName, options.HTTPClientOptions...) + if err != nil { + return nil, fmt.Errorf("error creating HTTP client: %w", err) + } + + ctx, cancel := context.WithCancel(context.Background()) + sp := &scrapePool{ + cancel: cancel, + appendable: app, + config: cfg, + client: client, + activeTargets: map[uint64]*Target{}, + loops: map[uint64]loop{}, + symbolTable: labels.NewSymbolTable(), + lastSymbolTableCheck: time.Now(), + logger: logger, + metrics: metrics, + httpOpts: options.HTTPClientOptions, + noDefaultPort: options.NoDefaultPort, + } + sp.newLoop = func(opts scrapeLoopOptions) loop { + // Update the targets retrieval function for metadata to a new scrape cache. + cache := opts.cache + if cache == nil { + cache = newScrapeCache(metrics) + } + opts.target.SetMetadataStore(cache) + + return newScrapeLoop( + ctx, + opts.scraper, + log.With(logger, "target", opts.target), + buffers, + func(l labels.Labels) labels.Labels { + return mutateSampleLabels(l, opts.target, opts.honorLabels, opts.mrc) + }, + func(l labels.Labels) labels.Labels { return mutateReportSampleLabels(l, opts.target) }, + func(ctx context.Context) storage.Appender { return app.Appender(ctx) }, + cache, + sp.symbolTable, + offsetSeed, + opts.honorTimestamps, + opts.trackTimestampsStaleness, + opts.enableCompression, + opts.sampleLimit, + opts.bucketLimit, + opts.maxSchema, + opts.labelLimits, + opts.interval, + opts.timeout, + opts.scrapeClassicHistograms, + options.EnableNativeHistogramsIngestion, + options.EnableCreatedTimestampZeroIngestion, + options.ExtraMetrics, + options.AppendMetadata, + opts.target, + options.PassMetadataInContext, + metrics, + options.skipOffsetting, + ) + } + sp.metrics.targetScrapePoolTargetLimit.WithLabelValues(sp.config.JobName).Set(float64(sp.config.TargetLimit)) + return sp, nil +} + +func (sp *scrapePool) ActiveTargets() []*Target { + sp.targetMtx.Lock() + defer sp.targetMtx.Unlock() + + var tActive []*Target + for _, t := range sp.activeTargets { + tActive = append(tActive, t) + } + return tActive +} + +// Return dropped targets, subject to KeepDroppedTargets limit. +func (sp *scrapePool) DroppedTargets() []*Target { + sp.targetMtx.Lock() + defer sp.targetMtx.Unlock() + return sp.droppedTargets +} + +func (sp *scrapePool) DroppedTargetsCount() int { + sp.targetMtx.Lock() + defer sp.targetMtx.Unlock() + return sp.droppedTargetsCount +} + +// stop terminates all scrape loops and returns after they all terminated. +func (sp *scrapePool) stop() { + sp.mtx.Lock() + defer sp.mtx.Unlock() + sp.cancel() + var wg sync.WaitGroup + + sp.targetMtx.Lock() + + for fp, l := range sp.loops { + wg.Add(1) + + go func(l loop) { + l.stop() + wg.Done() + }(l) + + delete(sp.loops, fp) + delete(sp.activeTargets, fp) + } + + sp.targetMtx.Unlock() + + wg.Wait() + sp.client.CloseIdleConnections() + + if sp.config != nil { + sp.metrics.targetScrapePoolSyncsCounter.DeleteLabelValues(sp.config.JobName) + sp.metrics.targetScrapePoolTargetLimit.DeleteLabelValues(sp.config.JobName) + sp.metrics.targetScrapePoolTargetsAdded.DeleteLabelValues(sp.config.JobName) + sp.metrics.targetScrapePoolSymbolTableItems.DeleteLabelValues(sp.config.JobName) + sp.metrics.targetSyncIntervalLength.DeleteLabelValues(sp.config.JobName) + sp.metrics.targetSyncFailed.DeleteLabelValues(sp.config.JobName) + } +} + +// reload the scrape pool with the given scrape configuration. The target state is preserved +// but all scrape loops are restarted with the new scrape configuration. +// This method returns after all scrape loops that were stopped have stopped scraping. +func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error { + sp.mtx.Lock() + defer sp.mtx.Unlock() + sp.metrics.targetScrapePoolReloads.Inc() + start := time.Now() + + client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, cfg.JobName, sp.httpOpts...) + if err != nil { + sp.metrics.targetScrapePoolReloadsFailed.Inc() + return fmt.Errorf("error creating HTTP client: %w", err) + } + + reuseCache := reusableCache(sp.config, cfg) + sp.config = cfg + oldClient := sp.client + sp.client = client + + sp.metrics.targetScrapePoolTargetLimit.WithLabelValues(sp.config.JobName).Set(float64(sp.config.TargetLimit)) + + sp.restartLoops(reuseCache) + oldClient.CloseIdleConnections() + sp.metrics.targetReloadIntervalLength.WithLabelValues(time.Duration(sp.config.ScrapeInterval).String()).Observe( + time.Since(start).Seconds(), + ) + return nil +} + +func (sp *scrapePool) restartLoops(reuseCache bool) { + var ( + wg sync.WaitGroup + interval = time.Duration(sp.config.ScrapeInterval) + timeout = time.Duration(sp.config.ScrapeTimeout) + bodySizeLimit = int64(sp.config.BodySizeLimit) + sampleLimit = int(sp.config.SampleLimit) + bucketLimit = int(sp.config.NativeHistogramBucketLimit) + maxSchema = pickSchema(sp.config.NativeHistogramMinBucketFactor) + labelLimits = &labelLimits{ + labelLimit: int(sp.config.LabelLimit), + labelNameLengthLimit: int(sp.config.LabelNameLengthLimit), + labelValueLengthLimit: int(sp.config.LabelValueLengthLimit), + } + honorLabels = sp.config.HonorLabels + honorTimestamps = sp.config.HonorTimestamps + enableCompression = sp.config.EnableCompression + trackTimestampsStaleness = sp.config.TrackTimestampsStaleness + mrc = sp.config.MetricRelabelConfigs + ) + + sp.targetMtx.Lock() + + forcedErr := sp.refreshTargetLimitErr() + for fp, oldLoop := range sp.loops { + var cache *scrapeCache + if oc := oldLoop.getCache(); reuseCache && oc != nil { + oldLoop.disableEndOfRunStalenessMarkers() + cache = oc + } else { + cache = newScrapeCache(sp.metrics) + } + + t := sp.activeTargets[fp] + interval, timeout, err := t.intervalAndTimeout(interval, timeout) + var ( + s = newScraper(&targetScraper{ + Target: t, + client: sp.client, + timeout: timeout, + bodySizeLimit: bodySizeLimit, + acceptHeader: acceptHeader(sp.config.ScrapeProtocols), + acceptEncodingHeader: acceptEncodingHeader(enableCompression), + }) + newLoop = sp.newLoop(scrapeLoopOptions{ + target: t, + scraper: s, + sampleLimit: sampleLimit, + bucketLimit: bucketLimit, + maxSchema: maxSchema, + labelLimits: labelLimits, + honorLabels: honorLabels, + honorTimestamps: honorTimestamps, + enableCompression: enableCompression, + trackTimestampsStaleness: trackTimestampsStaleness, + mrc: mrc, + cache: cache, + interval: interval, + timeout: timeout, + }) + ) + if err != nil { + newLoop.setForcedError(err) + } + wg.Add(1) + + go func(oldLoop, newLoop loop) { + oldLoop.stop() + wg.Done() + + newLoop.setForcedError(forcedErr) + newLoop.run(nil) + }(oldLoop, newLoop) + + sp.loops[fp] = newLoop + } + + sp.targetMtx.Unlock() + + wg.Wait() +} + +// Must be called with sp.mtx held. +func (sp *scrapePool) checkSymbolTable() { + // Here we take steps to clear out the symbol table if it has grown a lot. + // After waiting some time for things to settle, we take the size of the symbol-table. + // If, after some more time, the table has grown to twice that size, we start a new one. + const minTimeToCleanSymbolTable = 5 * time.Minute + if time.Since(sp.lastSymbolTableCheck) > minTimeToCleanSymbolTable { + if sp.initialSymbolTableLen == 0 { + sp.initialSymbolTableLen = sp.symbolTable.Len() + } else if sp.symbolTable.Len() > 2*sp.initialSymbolTableLen { + sp.symbolTable = labels.NewSymbolTable() + sp.initialSymbolTableLen = 0 + sp.restartLoops(false) // To drop all caches. + } + sp.lastSymbolTableCheck = time.Now() + } +} + +// Sync converts target groups into actual scrape targets and synchronizes +// the currently running scraper with the resulting set and returns all scraped and dropped targets. +func (sp *scrapePool) Sync(tgs []*targetgroup.Group) { + sp.mtx.Lock() + defer sp.mtx.Unlock() + start := time.Now() + + sp.targetMtx.Lock() + var all []*Target + var targets []*Target + lb := labels.NewBuilderWithSymbolTable(sp.symbolTable) + sp.droppedTargets = []*Target{} + sp.droppedTargetsCount = 0 + for _, tg := range tgs { + targets, failures := TargetsFromGroup(tg, sp.config, sp.noDefaultPort, targets, lb) + for _, err := range failures { + level.Error(sp.logger).Log("msg", "Creating target failed", "err", err) + } + sp.metrics.targetSyncFailed.WithLabelValues(sp.config.JobName).Add(float64(len(failures))) + for _, t := range targets { + // Replicate .Labels().IsEmpty() with a loop here to avoid generating garbage. + nonEmpty := false + t.LabelsRange(func(l labels.Label) { nonEmpty = true }) + switch { + case nonEmpty: + all = append(all, t) + case !t.discoveredLabels.IsEmpty(): + if sp.config.KeepDroppedTargets == 0 || uint(len(sp.droppedTargets)) < sp.config.KeepDroppedTargets { + sp.droppedTargets = append(sp.droppedTargets, t) + } + sp.droppedTargetsCount++ + } + } + } + sp.metrics.targetScrapePoolSymbolTableItems.WithLabelValues(sp.config.JobName).Set(float64(sp.symbolTable.Len())) + sp.targetMtx.Unlock() + sp.sync(all) + sp.checkSymbolTable() + + sp.metrics.targetSyncIntervalLength.WithLabelValues(sp.config.JobName).Observe( + time.Since(start).Seconds(), + ) + sp.metrics.targetScrapePoolSyncsCounter.WithLabelValues(sp.config.JobName).Inc() +} + +// sync takes a list of potentially duplicated targets, deduplicates them, starts +// scrape loops for new targets, and stops scrape loops for disappeared targets. +// It returns after all stopped scrape loops terminated. +func (sp *scrapePool) sync(targets []*Target) { + var ( + uniqueLoops = make(map[uint64]loop) + interval = time.Duration(sp.config.ScrapeInterval) + timeout = time.Duration(sp.config.ScrapeTimeout) + bodySizeLimit = int64(sp.config.BodySizeLimit) + sampleLimit = int(sp.config.SampleLimit) + bucketLimit = int(sp.config.NativeHistogramBucketLimit) + maxSchema = pickSchema(sp.config.NativeHistogramMinBucketFactor) + labelLimits = &labelLimits{ + labelLimit: int(sp.config.LabelLimit), + labelNameLengthLimit: int(sp.config.LabelNameLengthLimit), + labelValueLengthLimit: int(sp.config.LabelValueLengthLimit), + } + honorLabels = sp.config.HonorLabels + honorTimestamps = sp.config.HonorTimestamps + enableCompression = sp.config.EnableCompression + trackTimestampsStaleness = sp.config.TrackTimestampsStaleness + mrc = sp.config.MetricRelabelConfigs + scrapeClassicHistograms = sp.config.ScrapeClassicHistograms + ) + + sp.targetMtx.Lock() + for _, t := range targets { + hash := t.hash() + + if _, ok := sp.activeTargets[hash]; !ok { + // The scrape interval and timeout labels are set to the config's values initially, + // so whether changed via relabeling or not, they'll exist and hold the correct values + // for every target. + var err error + interval, timeout, err = t.intervalAndTimeout(interval, timeout) + s := newScraper(&targetScraper{ + Target: t, + client: sp.client, + timeout: timeout, + bodySizeLimit: bodySizeLimit, + acceptHeader: acceptHeader(sp.config.ScrapeProtocols), + acceptEncodingHeader: acceptEncodingHeader(enableCompression), + metrics: sp.metrics, + }) + l := sp.newLoop(scrapeLoopOptions{ + target: t, + scraper: s, + sampleLimit: sampleLimit, + bucketLimit: bucketLimit, + maxSchema: maxSchema, + labelLimits: labelLimits, + honorLabels: honorLabels, + honorTimestamps: honorTimestamps, + enableCompression: enableCompression, + trackTimestampsStaleness: trackTimestampsStaleness, + mrc: mrc, + interval: interval, + timeout: timeout, + scrapeClassicHistograms: scrapeClassicHistograms, + }) + if err != nil { + l.setForcedError(err) + } + + sp.activeTargets[hash] = t + sp.loops[hash] = l + + uniqueLoops[hash] = l + } else { + // This might be a duplicated target. + if _, ok := uniqueLoops[hash]; !ok { + uniqueLoops[hash] = nil + } + // Need to keep the most updated labels information + // for displaying it in the Service Discovery web page. + sp.activeTargets[hash].SetDiscoveredLabels(t.DiscoveredLabels()) + } + } + + var wg sync.WaitGroup + + // Stop and remove old targets and scraper loops. + for hash := range sp.activeTargets { + if _, ok := uniqueLoops[hash]; !ok { + wg.Add(1) + go func(l loop) { + l.stop() + wg.Done() + }(sp.loops[hash]) + + delete(sp.loops, hash) + delete(sp.activeTargets, hash) + } + } + + sp.targetMtx.Unlock() + + sp.metrics.targetScrapePoolTargetsAdded.WithLabelValues(sp.config.JobName).Set(float64(len(uniqueLoops))) + forcedErr := sp.refreshTargetLimitErr() + for _, l := range sp.loops { + l.setForcedError(forcedErr) + } + for _, l := range uniqueLoops { + if l != nil { + go l.run(nil) + } + } + // Wait for all potentially stopped scrapers to terminate. + // This covers the case of flapping targets. If the server is under high load, a new scraper + // may be active and tries to insert. The old scraper that didn't terminate yet could still + // be inserting a previous sample set. + wg.Wait() +} + +// refreshTargetLimitErr returns an error that can be passed to the scrape loops +// if the number of targets exceeds the configured limit. +func (sp *scrapePool) refreshTargetLimitErr() error { + if sp.config == nil || sp.config.TargetLimit == 0 { + return nil + } + if l := len(sp.activeTargets); l > int(sp.config.TargetLimit) { + sp.metrics.targetScrapePoolExceededTargetLimit.Inc() + return fmt.Errorf("target_limit exceeded (number of targets: %d, limit: %d)", l, sp.config.TargetLimit) + } + return nil +} + +func verifyLabelLimits(lset labels.Labels, limits *labelLimits) error { + if limits == nil { + return nil + } + + met := lset.Get(labels.MetricName) + if limits.labelLimit > 0 { + nbLabels := lset.Len() + if nbLabels > limits.labelLimit { + return fmt.Errorf("label_limit exceeded (metric: %.50s, number of labels: %d, limit: %d)", met, nbLabels, limits.labelLimit) + } + } + + if limits.labelNameLengthLimit == 0 && limits.labelValueLengthLimit == 0 { + return nil + } + + return lset.Validate(func(l labels.Label) error { + if limits.labelNameLengthLimit > 0 { + nameLength := len(l.Name) + if nameLength > limits.labelNameLengthLimit { + return fmt.Errorf("label_name_length_limit exceeded (metric: %.50s, label name: %.50s, length: %d, limit: %d)", met, l.Name, nameLength, limits.labelNameLengthLimit) + } + } + + if limits.labelValueLengthLimit > 0 { + valueLength := len(l.Value) + if valueLength > limits.labelValueLengthLimit { + return fmt.Errorf("label_value_length_limit exceeded (metric: %.50s, label name: %.50s, value: %.50q, length: %d, limit: %d)", met, l.Name, l.Value, valueLength, limits.labelValueLengthLimit) + } + } + return nil + }) +} + +func mutateSampleLabels(lset labels.Labels, target *Target, honor bool, rc []*relabel.Config) labels.Labels { + lb := labels.NewBuilder(lset) + + if honor { + target.LabelsRange(func(l labels.Label) { + if !lset.Has(l.Name) { + lb.Set(l.Name, l.Value) + } + }) + } else { + var conflictingExposedLabels []labels.Label + target.LabelsRange(func(l labels.Label) { + existingValue := lset.Get(l.Name) + if existingValue != "" { + conflictingExposedLabels = append(conflictingExposedLabels, labels.Label{Name: l.Name, Value: existingValue}) + } + // It is now safe to set the target label. + lb.Set(l.Name, l.Value) + }) + + if len(conflictingExposedLabels) > 0 { + resolveConflictingExposedLabels(lb, conflictingExposedLabels) + } + } + + res := lb.Labels() + + if len(rc) > 0 { + res, _ = relabel.Process(res, rc...) + } + + return res +} + +func resolveConflictingExposedLabels(lb *labels.Builder, conflictingExposedLabels []labels.Label) { + slices.SortStableFunc(conflictingExposedLabels, func(a, b labels.Label) int { + return len(a.Name) - len(b.Name) + }) + + for _, l := range conflictingExposedLabels { + newName := l.Name + for { + newName = model.ExportedLabelPrefix + newName + if lb.Get(newName) == "" { + lb.Set(newName, l.Value) + break + } + } + } +} + +func mutateReportSampleLabels(lset labels.Labels, target *Target) labels.Labels { + lb := labels.NewBuilder(lset) + + target.LabelsRange(func(l labels.Label) { + lb.Set(model.ExportedLabelPrefix+l.Name, lset.Get(l.Name)) + lb.Set(l.Name, l.Value) + }) + + return lb.Labels() +} + +// appender returns an appender for ingested samples from the target. +func appender(app storage.Appender, sampleLimit, bucketLimit int, maxSchema int32) storage.Appender { + app = &timeLimitAppender{ + Appender: app, + maxTime: timestamp.FromTime(time.Now().Add(maxAheadTime)), + } + + // The sampleLimit is applied after metrics are potentially dropped via relabeling. + if sampleLimit > 0 { + app = &limitAppender{ + Appender: app, + limit: sampleLimit, + } + } + + if bucketLimit > 0 { + app = &bucketLimitAppender{ + Appender: app, + limit: bucketLimit, + } + } + + if maxSchema < histogram.ExponentialSchemaMax { + app = &maxSchemaAppender{ + Appender: app, + maxSchema: maxSchema, + } + } + + return app +} + +// A scraper retrieves samples and accepts a status report at the end. +type scraper interface { + scrape(ctx context.Context) (*http.Response, error) + readResponse(ctx context.Context, resp *http.Response, w io.Writer) (string, error) + Report(start time.Time, dur time.Duration, err error) + offset(interval time.Duration, offsetSeed uint64) time.Duration +} + +// targetScraper implements the scraper interface for a target. +type targetScraper struct { + *Target + + client *http.Client + req *http.Request + timeout time.Duration + + gzipr *gzip.Reader + buf *bufio.Reader + + bodySizeLimit int64 + acceptHeader string + acceptEncodingHeader string + + metrics *scrapeMetrics +} + +func newScraper(ts *targetScraper) scraper { + if handler := GetDefaultGathererHandler(); handler != nil { + return &gathererScraper{ts, handler} + } + return ts +} + +var errBodySizeLimit = errors.New("body size limit exceeded") + +// acceptHeader transforms preference from the options into specific header values as +// https://www.rfc-editor.org/rfc/rfc9110.html#name-accept defines. +// No validation is here, we expect scrape protocols to be validated already. +func acceptHeader(sps []config.ScrapeProtocol) string { + var vals []string + weight := len(config.ScrapeProtocolsHeaders) + 1 + for _, sp := range sps { + vals = append(vals, fmt.Sprintf("%s;q=0.%d", config.ScrapeProtocolsHeaders[sp], weight)) + weight-- + } + // Default match anything. + vals = append(vals, fmt.Sprintf("*/*;q=0.%d", weight)) + return strings.Join(vals, ",") +} + +func acceptEncodingHeader(enableCompression bool) string { + if enableCompression { + return "gzip" + } + return "identity" +} + +var UserAgent = fmt.Sprintf("Prometheus/%s", version.Version) + +func (s *targetScraper) scrapeRequest() (*http.Request, error) { + if s.req == nil { + req, err := http.NewRequest(http.MethodGet, s.URL().String(), nil) + if err != nil { + return nil, err + } + req.Header.Add("Accept", s.acceptHeader) + req.Header.Add("Accept-Encoding", s.acceptEncodingHeader) + req.Header.Set("User-Agent", UserAgent) + req.Header.Set("X-Prometheus-Scrape-Timeout-Seconds", strconv.FormatFloat(s.timeout.Seconds(), 'f', -1, 64)) + + s.req = req + } + return s.req, nil +} + +func (s *targetScraper) scrape(ctx context.Context) (*http.Response, error) { + req, err := s.scrapeRequest() + if err != nil { + return nil, err + } + return s.client.Do(req.WithContext(ctx)) +} + +func (s *targetScraper) readResponse(ctx context.Context, resp *http.Response, w io.Writer) (string, error) { + defer func() { + io.Copy(io.Discard, resp.Body) + resp.Body.Close() + }() + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("server returned HTTP status %s", resp.Status) + } + + if s.bodySizeLimit <= 0 { + s.bodySizeLimit = math.MaxInt64 + } + if resp.Header.Get("Content-Encoding") != "gzip" { + n, err := io.Copy(w, io.LimitReader(resp.Body, s.bodySizeLimit)) + if err != nil { + return "", err + } + if n >= s.bodySizeLimit { + s.metrics.targetScrapeExceededBodySizeLimit.Inc() + return "", errBodySizeLimit + } + return resp.Header.Get("Content-Type"), nil + } + + if s.gzipr == nil { + s.buf = bufio.NewReader(resp.Body) + var err error + s.gzipr, err = gzip.NewReader(s.buf) + if err != nil { + return "", err + } + } else { + s.buf.Reset(resp.Body) + if err := s.gzipr.Reset(s.buf); err != nil { + return "", err + } + } + + n, err := io.Copy(w, io.LimitReader(s.gzipr, s.bodySizeLimit)) + s.gzipr.Close() + if err != nil { + return "", err + } + if n >= s.bodySizeLimit { + s.metrics.targetScrapeExceededBodySizeLimit.Inc() + return "", errBodySizeLimit + } + return resp.Header.Get("Content-Type"), nil +} + +// A loop can run and be stopped again. It must not be reused after it was stopped. +type loop interface { + run(errc chan<- error) + setForcedError(err error) + stop() + getCache() *scrapeCache + disableEndOfRunStalenessMarkers() +} + +type cacheEntry struct { + ref storage.SeriesRef + lastIter uint64 + hash uint64 + lset labels.Labels +} + +type scrapeLoop struct { + scraper scraper + l log.Logger + cache *scrapeCache + lastScrapeSize int + buffers *pool.Pool + offsetSeed uint64 + honorTimestamps bool + trackTimestampsStaleness bool + enableCompression bool + forcedErr error + forcedErrMtx sync.Mutex + sampleLimit int + bucketLimit int + maxSchema int32 + labelLimits *labelLimits + interval time.Duration + timeout time.Duration + scrapeClassicHistograms bool + + // Feature flagged options. + enableNativeHistogramIngestion bool + enableCTZeroIngestion bool + + appender func(ctx context.Context) storage.Appender + symbolTable *labels.SymbolTable + sampleMutator labelsMutator + reportSampleMutator labelsMutator + + parentCtx context.Context + appenderCtx context.Context + ctx context.Context + cancel func() + stopped chan struct{} + + disabledEndOfRunStalenessMarkers bool + + reportExtraMetrics bool + appendMetadataToWAL bool + + metrics *scrapeMetrics + + skipOffsetting bool // For testability. + + newParserFunc func() (textparse.Parser, error) +} + +// scrapeCache tracks mappings of exposed metric strings to label sets and +// storage references. Additionally, it tracks staleness of series between +// scrapes. +type scrapeCache struct { + iter uint64 // Current scrape iteration. + + // How many series and metadata entries there were at the last success. + successfulCount int + + // Parsed string to an entry with information about the actual label set + // and its storage reference. + series map[string]*cacheEntry + + // Cache of dropped metric strings and their iteration. The iteration must + // be a pointer so we can update it. + droppedSeries map[string]*uint64 + + // seriesCur and seriesPrev store the labels of series that were seen + // in the current and previous scrape. + // We hold two maps and swap them out to save allocations. + seriesCur map[uint64]labels.Labels + seriesPrev map[uint64]labels.Labels + + metaMtx sync.Mutex + metadata map[string]*metaEntry + + metrics *scrapeMetrics +} + +// metaEntry holds meta information about a metric. +type metaEntry struct { + metadata.Metadata + + lastIter uint64 // Last scrape iteration the entry was observed at. + lastIterChange uint64 // Last scrape iteration the entry was changed at. +} + +func (m *metaEntry) size() int { + // The attribute lastIter although part of the struct it is not metadata. + return len(m.Help) + len(m.Unit) + len(m.Type) +} + +func newScrapeCache(metrics *scrapeMetrics) *scrapeCache { + return &scrapeCache{ + series: map[string]*cacheEntry{}, + droppedSeries: map[string]*uint64{}, + seriesCur: map[uint64]labels.Labels{}, + seriesPrev: map[uint64]labels.Labels{}, + metadata: map[string]*metaEntry{}, + metrics: metrics, + } +} + +func (c *scrapeCache) iterDone(flushCache bool) { + c.metaMtx.Lock() + count := len(c.series) + len(c.droppedSeries) + len(c.metadata) + c.metaMtx.Unlock() + + switch { + case flushCache: + c.successfulCount = count + case count > c.successfulCount*2+1000: + // If a target had varying labels in scrapes that ultimately failed, + // the caches would grow indefinitely. Force a flush when this happens. + // We use the heuristic that this is a doubling of the cache size + // since the last scrape, and allow an additional 1000 in case + // initial scrapes all fail. + flushCache = true + c.metrics.targetScrapeCacheFlushForced.Inc() + } + + if flushCache { + // All caches may grow over time through series churn + // or multiple string representations of the same metric. Clean up entries + // that haven't appeared in the last scrape. + for s, e := range c.series { + if c.iter != e.lastIter { + delete(c.series, s) + } + } + for s, iter := range c.droppedSeries { + if c.iter != *iter { + delete(c.droppedSeries, s) + } + } + c.metaMtx.Lock() + for m, e := range c.metadata { + // Keep metadata around for 10 scrapes after its metric disappeared. + if c.iter-e.lastIter > 10 { + delete(c.metadata, m) + } + } + c.metaMtx.Unlock() + + c.iter++ + } + + // Swap current and previous series. + c.seriesPrev, c.seriesCur = c.seriesCur, c.seriesPrev + + // We have to delete every single key in the map. + for k := range c.seriesCur { + delete(c.seriesCur, k) + } +} + +func (c *scrapeCache) get(met []byte) (*cacheEntry, bool, bool) { + e, ok := c.series[string(met)] + if !ok { + return nil, false, false + } + alreadyScraped := e.lastIter == c.iter + e.lastIter = c.iter + return e, true, alreadyScraped +} + +func (c *scrapeCache) addRef(met []byte, ref storage.SeriesRef, lset labels.Labels, hash uint64) { + if ref == 0 { + return + } + c.series[string(met)] = &cacheEntry{ref: ref, lastIter: c.iter, lset: lset, hash: hash} +} + +func (c *scrapeCache) addDropped(met []byte) { + iter := c.iter + c.droppedSeries[string(met)] = &iter +} + +func (c *scrapeCache) getDropped(met []byte) bool { + iterp, ok := c.droppedSeries[string(met)] + if ok { + *iterp = c.iter + } + return ok +} + +func (c *scrapeCache) trackStaleness(hash uint64, lset labels.Labels) { + c.seriesCur[hash] = lset +} + +func (c *scrapeCache) forEachStale(f func(labels.Labels) bool) { + for h, lset := range c.seriesPrev { + if _, ok := c.seriesCur[h]; !ok { + if !f(lset) { + break + } + } + } +} + +func (c *scrapeCache) setType(metric []byte, t model.MetricType) { + c.metaMtx.Lock() + + e, ok := c.metadata[string(metric)] + if !ok { + e = &metaEntry{Metadata: metadata.Metadata{Type: model.MetricTypeUnknown}} + c.metadata[string(metric)] = e + } + if e.Type != t { + e.Type = t + e.lastIterChange = c.iter + } + e.lastIter = c.iter + + c.metaMtx.Unlock() +} + +func (c *scrapeCache) setHelp(metric, help []byte) { + c.metaMtx.Lock() + + e, ok := c.metadata[string(metric)] + if !ok { + e = &metaEntry{Metadata: metadata.Metadata{Type: model.MetricTypeUnknown}} + c.metadata[string(metric)] = e + } + if e.Help != string(help) { + e.Help = string(help) + e.lastIterChange = c.iter + } + e.lastIter = c.iter + + c.metaMtx.Unlock() +} + +func (c *scrapeCache) setUnit(metric, unit []byte) { + c.metaMtx.Lock() + + e, ok := c.metadata[string(metric)] + if !ok { + e = &metaEntry{Metadata: metadata.Metadata{Type: model.MetricTypeUnknown}} + c.metadata[string(metric)] = e + } + if e.Unit != string(unit) { + e.Unit = string(unit) + e.lastIterChange = c.iter + } + e.lastIter = c.iter + + c.metaMtx.Unlock() +} + +func (c *scrapeCache) GetMetadata(metric string) (MetricMetadata, bool) { + c.metaMtx.Lock() + defer c.metaMtx.Unlock() + + m, ok := c.metadata[metric] + if !ok { + return MetricMetadata{}, false + } + return MetricMetadata{ + Metric: metric, + Type: m.Type, + Help: m.Help, + Unit: m.Unit, + }, true +} + +func (c *scrapeCache) ListMetadata() []MetricMetadata { + c.metaMtx.Lock() + defer c.metaMtx.Unlock() + + res := make([]MetricMetadata, 0, len(c.metadata)) + + for m, e := range c.metadata { + res = append(res, MetricMetadata{ + Metric: m, + Type: e.Type, + Help: e.Help, + Unit: e.Unit, + }) + } + return res +} + +// MetadataSize returns the size of the metadata cache. +func (c *scrapeCache) SizeMetadata() (s int) { + c.metaMtx.Lock() + defer c.metaMtx.Unlock() + for _, e := range c.metadata { + s += e.size() + } + + return s +} + +// MetadataLen returns the number of metadata entries in the cache. +func (c *scrapeCache) LengthMetadata() int { + c.metaMtx.Lock() + defer c.metaMtx.Unlock() + + return len(c.metadata) +} + +func newScrapeLoop(ctx context.Context, + sc scraper, + l log.Logger, + buffers *pool.Pool, + sampleMutator labelsMutator, + reportSampleMutator labelsMutator, + appender func(ctx context.Context) storage.Appender, + cache *scrapeCache, + symbolTable *labels.SymbolTable, + offsetSeed uint64, + honorTimestamps bool, + trackTimestampsStaleness bool, + enableCompression bool, + sampleLimit int, + bucketLimit int, + maxSchema int32, + labelLimits *labelLimits, + interval time.Duration, + timeout time.Duration, + scrapeClassicHistograms bool, + enableNativeHistogramIngestion bool, + enableCTZeroIngestion bool, + reportExtraMetrics bool, + appendMetadataToWAL bool, + target *Target, + passMetadataInContext bool, + metrics *scrapeMetrics, + skipOffsetting bool, +) *scrapeLoop { + if l == nil { + l = log.NewNopLogger() + } + if buffers == nil { + buffers = pool.New(1e3, 1e6, 3, func(sz int) interface{} { return make([]byte, 0, sz) }) + } + if cache == nil { + cache = newScrapeCache(metrics) + } + + appenderCtx := ctx + + if passMetadataInContext { + // Store the cache and target in the context. This is then used by downstream OTel Collector + // to lookup the metadata required to process the samples. Not used by Prometheus itself. + // TODO(gouthamve) We're using a dedicated context because using the parentCtx caused a memory + // leak. We should ideally fix the main leak. See: https://github.com/prometheus/prometheus/pull/10590 + appenderCtx = ContextWithMetricMetadataStore(appenderCtx, cache) + appenderCtx = ContextWithTarget(appenderCtx, target) + } + + sl := &scrapeLoop{ + scraper: sc, + buffers: buffers, + cache: cache, + appender: appender, + symbolTable: symbolTable, + sampleMutator: sampleMutator, + reportSampleMutator: reportSampleMutator, + stopped: make(chan struct{}), + offsetSeed: offsetSeed, + l: l, + parentCtx: ctx, + appenderCtx: appenderCtx, + honorTimestamps: honorTimestamps, + trackTimestampsStaleness: trackTimestampsStaleness, + enableCompression: enableCompression, + sampleLimit: sampleLimit, + bucketLimit: bucketLimit, + maxSchema: maxSchema, + labelLimits: labelLimits, + interval: interval, + timeout: timeout, + scrapeClassicHistograms: scrapeClassicHistograms, + enableNativeHistogramIngestion: enableNativeHistogramIngestion, + enableCTZeroIngestion: enableCTZeroIngestion, + reportExtraMetrics: reportExtraMetrics, + appendMetadataToWAL: appendMetadataToWAL, + metrics: metrics, + skipOffsetting: skipOffsetting, + } + sl.ctx, sl.cancel = context.WithCancel(ctx) + + return sl +} + +func (sl *scrapeLoop) run(errc chan<- error) { + if !sl.skipOffsetting { + select { + case <-time.After(sl.scraper.offset(sl.interval, sl.offsetSeed)): + // Continue after a scraping offset. + case <-sl.ctx.Done(): + close(sl.stopped) + return + } + } + + var last time.Time + + alignedScrapeTime := time.Now().Round(0) + ticker := time.NewTicker(sl.interval) + defer ticker.Stop() + +mainLoop: + for { + select { + case <-sl.parentCtx.Done(): + close(sl.stopped) + return + case <-sl.ctx.Done(): + break mainLoop + default: + } + + // Temporary workaround for a jitter in go timers that causes disk space + // increase in TSDB. + // See https://github.com/prometheus/prometheus/issues/7846 + // Calling Round ensures the time used is the wall clock, as otherwise .Sub + // and .Add on time.Time behave differently (see time package docs). + scrapeTime := time.Now().Round(0) + if AlignScrapeTimestamps { + // Tolerance is clamped to maximum 1% of the scrape interval. + tolerance := min(sl.interval/100, ScrapeTimestampTolerance) + // For some reason, a tick might have been skipped, in which case we + // would call alignedScrapeTime.Add(interval) multiple times. + for scrapeTime.Sub(alignedScrapeTime) >= sl.interval { + alignedScrapeTime = alignedScrapeTime.Add(sl.interval) + } + // Align the scrape time if we are in the tolerance boundaries. + if scrapeTime.Sub(alignedScrapeTime) <= tolerance { + scrapeTime = alignedScrapeTime + } + } + + last = sl.scrapeAndReport(last, scrapeTime, errc) + + select { + case <-sl.parentCtx.Done(): + close(sl.stopped) + return + case <-sl.ctx.Done(): + break mainLoop + case <-ticker.C: + } + } + + close(sl.stopped) + + if !sl.disabledEndOfRunStalenessMarkers { + sl.endOfRunStaleness(last, ticker, sl.interval) + } +} + +// scrapeAndReport performs a scrape and then appends the result to the storage +// together with reporting metrics, by using as few appenders as possible. +// In the happy scenario, a single appender is used. +// This function uses sl.appenderCtx instead of sl.ctx on purpose. A scrape should +// only be cancelled on shutdown, not on reloads. +func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- error) time.Time { + start := time.Now() + + // Only record after the first scrape. + if !last.IsZero() { + sl.metrics.targetIntervalLength.WithLabelValues(sl.interval.String()).Observe( + time.Since(last).Seconds(), + ) + } + + var total, added, seriesAdded, bytesRead int + var err, appErr, scrapeErr error + + app := sl.appender(sl.appenderCtx) + defer func() { + if err != nil { + app.Rollback() + return + } + err = app.Commit() + if err != nil { + level.Error(sl.l).Log("msg", "Scrape commit failed", "err", err) + } + }() + + defer func() { + if err = sl.report(app, appendTime, time.Since(start), total, added, seriesAdded, bytesRead, scrapeErr); err != nil { + level.Warn(sl.l).Log("msg", "Appending scrape report failed", "err", err) + } + }() + + if forcedErr := sl.getForcedError(); forcedErr != nil { + scrapeErr = forcedErr + // Add stale markers. + if _, _, _, err := sl.append(app, []byte{}, "", appendTime); err != nil { + app.Rollback() + app = sl.appender(sl.appenderCtx) + level.Warn(sl.l).Log("msg", "Append failed", "err", err) + } + if errc != nil { + errc <- forcedErr + } + + return start + } + + var contentType string + var resp *http.Response + var b []byte + var buf *bytes.Buffer + scrapeCtx, cancel := context.WithTimeout(sl.parentCtx, sl.timeout) + resp, scrapeErr = sl.scraper.scrape(scrapeCtx) + if scrapeErr == nil { + b = sl.buffers.Get(sl.lastScrapeSize).([]byte) + defer sl.buffers.Put(b) + buf = bytes.NewBuffer(b) + contentType, scrapeErr = sl.scraper.readResponse(scrapeCtx, resp, buf) + } + cancel() + + if scrapeErr == nil { + b = buf.Bytes() + // NOTE: There were issues with misbehaving clients in the past + // that occasionally returned empty results. We don't want those + // to falsely reset our buffer size. + if len(b) > 0 { + sl.lastScrapeSize = len(b) + } + bytesRead = len(b) + } else { + level.Debug(sl.l).Log("msg", "Scrape failed", "err", scrapeErr) + if errc != nil { + errc <- scrapeErr + } + if errors.Is(scrapeErr, errBodySizeLimit) { + bytesRead = -1 + } + } + + // A failed scrape is the same as an empty scrape, + // we still call sl.append to trigger stale markers. + total, added, seriesAdded, appErr = sl.append(app, b, contentType, appendTime) + if appErr != nil { + app.Rollback() + app = sl.appender(sl.appenderCtx) + level.Debug(sl.l).Log("msg", "Append failed", "err", appErr) + // The append failed, probably due to a parse error or sample limit. + // Call sl.append again with an empty scrape to trigger stale markers. + if _, _, _, err := sl.append(app, []byte{}, "", appendTime); err != nil { + app.Rollback() + app = sl.appender(sl.appenderCtx) + level.Warn(sl.l).Log("msg", "Append failed", "err", err) + } + } + + if scrapeErr == nil { + scrapeErr = appErr + } + + return start +} + +func (sl *scrapeLoop) setForcedError(err error) { + sl.forcedErrMtx.Lock() + defer sl.forcedErrMtx.Unlock() + sl.forcedErr = err +} + +func (sl *scrapeLoop) getForcedError() error { + sl.forcedErrMtx.Lock() + defer sl.forcedErrMtx.Unlock() + return sl.forcedErr +} + +func (sl *scrapeLoop) endOfRunStaleness(last time.Time, ticker *time.Ticker, interval time.Duration) { + // Scraping has stopped. We want to write stale markers but + // the target may be recreated, so we wait just over 2 scrape intervals + // before creating them. + // If the context is canceled, we presume the server is shutting down + // and will restart where is was. We do not attempt to write stale markers + // in this case. + + if last.IsZero() { + // There never was a scrape, so there will be no stale markers. + return + } + + // Wait for when the next scrape would have been, record its timestamp. + var staleTime time.Time + select { + case <-sl.parentCtx.Done(): + return + case <-ticker.C: + staleTime = time.Now() + } + + // Wait for when the next scrape would have been, if the target was recreated + // samples should have been ingested by now. + select { + case <-sl.parentCtx.Done(): + return + case <-ticker.C: + } + + // Wait for an extra 10% of the interval, just to be safe. + select { + case <-sl.parentCtx.Done(): + return + case <-time.After(interval / 10): + } + + // Call sl.append again with an empty scrape to trigger stale markers. + // If the target has since been recreated and scraped, the + // stale markers will be out of order and ignored. + // sl.context would have been cancelled, hence using sl.appenderCtx. + app := sl.appender(sl.appenderCtx) + var err error + defer func() { + if err != nil { + app.Rollback() + return + } + err = app.Commit() + if err != nil { + level.Warn(sl.l).Log("msg", "Stale commit failed", "err", err) + } + }() + if _, _, _, err = sl.append(app, []byte{}, "", staleTime); err != nil { + app.Rollback() + app = sl.appender(sl.appenderCtx) + level.Warn(sl.l).Log("msg", "Stale append failed", "err", err) + } + if err = sl.reportStale(app, staleTime); err != nil { + level.Warn(sl.l).Log("msg", "Stale report failed", "err", err) + } +} + +// Stop the scraping. May still write data and stale markers after it has +// returned. Cancel the context to stop all writes. +func (sl *scrapeLoop) stop() { + sl.cancel() + <-sl.stopped +} + +func (sl *scrapeLoop) disableEndOfRunStalenessMarkers() { + sl.disabledEndOfRunStalenessMarkers = true +} + +func (sl *scrapeLoop) getCache() *scrapeCache { + return sl.cache +} + +type appendErrors struct { + numOutOfOrder int + numDuplicates int + numOutOfBounds int + numExemplarOutOfOrder int +} + +func (sl *scrapeLoop) newParser(b []byte, contentType string) (textparse.Parser, error) { + if sl.newParserFunc != nil { + return sl.newParserFunc() + } + return textparse.New(b, contentType, sl.scrapeClassicHistograms, sl.symbolTable) +} + +func (sl *scrapeLoop) append(app storage.Appender, b []byte, contentType string, ts time.Time) (total, added, seriesAdded int, err error) { + p, err := sl.newParser(b, contentType) + + if err != nil { + level.Debug(sl.l).Log( + "msg", "Invalid content type on scrape, using prometheus parser as fallback.", + "content_type", contentType, + "err", err, + ) + } + + var ( + defTime = timestamp.FromTime(ts) + appErrs = appendErrors{} + sampleLimitErr error + bucketLimitErr error + lset labels.Labels // escapes to heap so hoisted out of loop + e exemplar.Exemplar // escapes to heap so hoisted out of loop + meta metadata.Metadata + metadataChanged bool + ) + + exemplars := make([]exemplar.Exemplar, 1) + + // updateMetadata updates the current iteration's metadata object and the + // metadataChanged value if we have metadata in the scrape cache AND the + // labelset is for a new series or the metadata for this series has just + // changed. It returns a boolean based on whether the metadata was updated. + updateMetadata := func(lset labels.Labels, isNewSeries bool) bool { + if !sl.appendMetadataToWAL { + return false + } + + sl.cache.metaMtx.Lock() + defer sl.cache.metaMtx.Unlock() + metaEntry, metaOk := sl.cache.metadata[lset.Get(labels.MetricName)] + if metaOk && (isNewSeries || metaEntry.lastIterChange == sl.cache.iter) { + metadataChanged = true + meta.Type = metaEntry.Type + meta.Unit = metaEntry.Unit + meta.Help = metaEntry.Help + return true + } + return false + } + + // Take an appender with limits. + app = appender(app, sl.sampleLimit, sl.bucketLimit, sl.maxSchema) + + defer func() { + if err != nil { + return + } + // Only perform cache cleaning if the scrape was not empty. + // An empty scrape (usually) is used to indicate a failed scrape. + sl.cache.iterDone(len(b) > 0) + }() + +loop: + for { + var ( + et textparse.Entry + sampleAdded, isHistogram bool + met []byte + parsedTimestamp *int64 + val float64 + h *histogram.Histogram + fh *histogram.FloatHistogram + ) + if et, err = p.Next(); err != nil { + if errors.Is(err, io.EOF) { + err = nil + } + break + } + switch et { + case textparse.EntryType: + sl.cache.setType(p.Type()) + continue + case textparse.EntryHelp: + sl.cache.setHelp(p.Help()) + continue + case textparse.EntryUnit: + sl.cache.setUnit(p.Unit()) + continue + case textparse.EntryComment: + continue + case textparse.EntryHistogram: + isHistogram = true + default: + } + total++ + + t := defTime + if isHistogram { + met, parsedTimestamp, h, fh = p.Histogram() + } else { + met, parsedTimestamp, val = p.Series() + } + if !sl.honorTimestamps { + parsedTimestamp = nil + } + if parsedTimestamp != nil { + t = *parsedTimestamp + } + + // Zero metadata out for current iteration until it's resolved. + meta = metadata.Metadata{} + metadataChanged = false + + if sl.cache.getDropped(met) { + continue + } + ce, ok, seriesAlreadyScraped := sl.cache.get(met) + var ( + ref storage.SeriesRef + hash uint64 + ) + + if ok { + ref = ce.ref + lset = ce.lset + hash = ce.hash + + // Update metadata only if it changed in the current iteration. + updateMetadata(lset, false) + } else { + p.Metric(&lset) + hash = lset.Hash() + + // Hash label set as it is seen local to the target. Then add target labels + // and relabeling and store the final label set. + lset = sl.sampleMutator(lset) + + // The label set may be set to empty to indicate dropping. + if lset.IsEmpty() { + sl.cache.addDropped(met) + continue + } + + if !lset.Has(labels.MetricName) { + err = errNameLabelMandatory + break loop + } + if !lset.IsValid() { + err = fmt.Errorf("invalid metric name or label names: %s", lset.String()) + break loop + } + + // If any label limits is exceeded the scrape should fail. + if err = verifyLabelLimits(lset, sl.labelLimits); err != nil { + sl.metrics.targetScrapePoolExceededLabelLimits.Inc() + break loop + } + + // Append metadata for new series if they were present. + updateMetadata(lset, true) + } + + if seriesAlreadyScraped && parsedTimestamp == nil { + err = storage.ErrDuplicateSampleForTimestamp + } else { + if ctMs := p.CreatedTimestamp(); sl.enableCTZeroIngestion && ctMs != nil { + ref, err = app.AppendCTZeroSample(ref, lset, t, *ctMs) + if err != nil && !errors.Is(err, storage.ErrOutOfOrderCT) { // OOO is a common case, ignoring completely for now. + // CT is an experimental feature. For now, we don't need to fail the + // scrape on errors updating the created timestamp, log debug. + level.Debug(sl.l).Log("msg", "Error when appending CT in scrape loop", "series", string(met), "ct", *ctMs, "t", t, "err", err) + } + } + + if isHistogram && sl.enableNativeHistogramIngestion { + if h != nil { + ref, err = app.AppendHistogram(ref, lset, t, h, nil) + } else { + ref, err = app.AppendHistogram(ref, lset, t, nil, fh) + } + } else { + ref, err = app.Append(ref, lset, t, val) + } + } + + if err == nil { + if (parsedTimestamp == nil || sl.trackTimestampsStaleness) && ce != nil { + sl.cache.trackStaleness(ce.hash, ce.lset) + } + } + + sampleAdded, err = sl.checkAddError(met, err, &sampleLimitErr, &bucketLimitErr, &appErrs) + if err != nil { + if !errors.Is(err, storage.ErrNotFound) { + level.Debug(sl.l).Log("msg", "Unexpected error", "series", string(met), "err", err) + } + break loop + } + + if !ok { + if parsedTimestamp == nil || sl.trackTimestampsStaleness { + // Bypass staleness logic if there is an explicit timestamp. + sl.cache.trackStaleness(hash, lset) + } + sl.cache.addRef(met, ref, lset, hash) + if sampleAdded && sampleLimitErr == nil && bucketLimitErr == nil { + seriesAdded++ + } + } + + // Increment added even if there's an error so we correctly report the + // number of samples remaining after relabeling. + // We still report duplicated samples here since this number should be the exact number + // of time series exposed on a scrape after relabelling. + added++ + exemplars = exemplars[:0] // Reset and reuse the exemplar slice. + for hasExemplar := p.Exemplar(&e); hasExemplar; hasExemplar = p.Exemplar(&e) { + if !e.HasTs { + if isHistogram { + // We drop exemplars for native histograms if they don't have a timestamp. + // Missing timestamps are deliberately not supported as we want to start + // enforcing timestamps for exemplars as otherwise proper deduplication + // is inefficient and purely based on heuristics: we cannot distinguish + // between repeated exemplars and new instances with the same values. + // This is done silently without logs as it is not an error but out of spec. + // This does not affect classic histograms so that behaviour is unchanged. + e = exemplar.Exemplar{} // Reset for next time round loop. + continue + } + e.Ts = t + } + exemplars = append(exemplars, e) + e = exemplar.Exemplar{} // Reset for next time round loop. + } + // Sort so that checking for duplicates / out of order is more efficient during validation. + slices.SortFunc(exemplars, exemplar.Compare) + outOfOrderExemplars := 0 + for _, e := range exemplars { + _, exemplarErr := app.AppendExemplar(ref, lset, e) + switch { + case exemplarErr == nil: + // Do nothing. + case errors.Is(exemplarErr, storage.ErrOutOfOrderExemplar): + outOfOrderExemplars++ + default: + // Since exemplar storage is still experimental, we don't fail the scrape on ingestion errors. + level.Debug(sl.l).Log("msg", "Error while adding exemplar in AddExemplar", "exemplar", fmt.Sprintf("%+v", e), "err", exemplarErr) + } + } + if outOfOrderExemplars > 0 && outOfOrderExemplars == len(exemplars) { + // Only report out of order exemplars if all are out of order, otherwise this was a partial update + // to some existing set of exemplars. + appErrs.numExemplarOutOfOrder += outOfOrderExemplars + level.Debug(sl.l).Log("msg", "Out of order exemplars", "count", outOfOrderExemplars, "latest", fmt.Sprintf("%+v", exemplars[len(exemplars)-1])) + sl.metrics.targetScrapeExemplarOutOfOrder.Add(float64(outOfOrderExemplars)) + } + + if sl.appendMetadataToWAL && metadataChanged { + if _, merr := app.UpdateMetadata(ref, lset, meta); merr != nil { + // No need to fail the scrape on errors appending metadata. + level.Debug(sl.l).Log("msg", "Error when appending metadata in scrape loop", "ref", fmt.Sprintf("%d", ref), "metadata", fmt.Sprintf("%+v", meta), "err", merr) + } + } + } + if sampleLimitErr != nil { + if err == nil { + err = sampleLimitErr + } + // We only want to increment this once per scrape, so this is Inc'd outside the loop. + sl.metrics.targetScrapeSampleLimit.Inc() + } + if bucketLimitErr != nil { + if err == nil { + err = bucketLimitErr // If sample limit is hit, that error takes precedence. + } + // We only want to increment this once per scrape, so this is Inc'd outside the loop. + sl.metrics.targetScrapeNativeHistogramBucketLimit.Inc() + } + if appErrs.numOutOfOrder > 0 { + level.Warn(sl.l).Log("msg", "Error on ingesting out-of-order samples", "num_dropped", appErrs.numOutOfOrder) + } + if appErrs.numDuplicates > 0 { + level.Warn(sl.l).Log("msg", "Error on ingesting samples with different value but same timestamp", "num_dropped", appErrs.numDuplicates) + } + if appErrs.numOutOfBounds > 0 { + level.Warn(sl.l).Log("msg", "Error on ingesting samples that are too old or are too far into the future", "num_dropped", appErrs.numOutOfBounds) + } + if appErrs.numExemplarOutOfOrder > 0 { + level.Warn(sl.l).Log("msg", "Error on ingesting out-of-order exemplars", "num_dropped", appErrs.numExemplarOutOfOrder) + } + if err == nil { + sl.cache.forEachStale(func(lset labels.Labels) bool { + // Series no longer exposed, mark it stale. + _, err = app.Append(0, lset, defTime, math.Float64frombits(value.StaleNaN)) + switch { + case errors.Is(err, storage.ErrOutOfOrderSample), errors.Is(err, storage.ErrDuplicateSampleForTimestamp): + // Do not count these in logging, as this is expected if a target + // goes away and comes back again with a new scrape loop. + err = nil + } + return err == nil + }) + } + return +} + +// Adds samples to the appender, checking the error, and then returns the # of samples added, +// whether the caller should continue to process more samples, and any sample or bucket limit errors. +func (sl *scrapeLoop) checkAddError(met []byte, err error, sampleLimitErr, bucketLimitErr *error, appErrs *appendErrors) (bool, error) { + switch { + case err == nil: + return true, nil + case errors.Is(err, storage.ErrNotFound): + return false, storage.ErrNotFound + case errors.Is(err, storage.ErrOutOfOrderSample): + appErrs.numOutOfOrder++ + level.Debug(sl.l).Log("msg", "Out of order sample", "series", string(met)) + sl.metrics.targetScrapeSampleOutOfOrder.Inc() + return false, nil + case errors.Is(err, storage.ErrDuplicateSampleForTimestamp): + appErrs.numDuplicates++ + level.Debug(sl.l).Log("msg", "Duplicate sample for timestamp", "series", string(met)) + sl.metrics.targetScrapeSampleDuplicate.Inc() + return false, nil + case errors.Is(err, storage.ErrOutOfBounds): + appErrs.numOutOfBounds++ + level.Debug(sl.l).Log("msg", "Out of bounds metric", "series", string(met)) + sl.metrics.targetScrapeSampleOutOfBounds.Inc() + return false, nil + case errors.Is(err, errSampleLimit): + // Keep on parsing output if we hit the limit, so we report the correct + // total number of samples scraped. + *sampleLimitErr = err + return false, nil + case errors.Is(err, errBucketLimit): + // Keep on parsing output if we hit the limit, so we report the correct + // total number of samples scraped. + *bucketLimitErr = err + return false, nil + default: + return false, err + } +} + +// The constants are suffixed with the invalid \xff unicode rune to avoid collisions +// with scraped metrics in the cache. +var ( + scrapeHealthMetricName = []byte("up" + "\xff") + scrapeDurationMetricName = []byte("scrape_duration_seconds" + "\xff") + scrapeSamplesMetricName = []byte("scrape_samples_scraped" + "\xff") + samplesPostRelabelMetricName = []byte("scrape_samples_post_metric_relabeling" + "\xff") + scrapeSeriesAddedMetricName = []byte("scrape_series_added" + "\xff") + scrapeTimeoutMetricName = []byte("scrape_timeout_seconds" + "\xff") + scrapeSampleLimitMetricName = []byte("scrape_sample_limit" + "\xff") + scrapeBodySizeBytesMetricName = []byte("scrape_body_size_bytes" + "\xff") +) + +func (sl *scrapeLoop) report(app storage.Appender, start time.Time, duration time.Duration, scraped, added, seriesAdded, bytes int, scrapeErr error) (err error) { + sl.scraper.Report(start, duration, scrapeErr) + + ts := timestamp.FromTime(start) + + var health float64 + if scrapeErr == nil { + health = 1 + } + b := labels.NewBuilderWithSymbolTable(sl.symbolTable) + + if err = sl.addReportSample(app, scrapeHealthMetricName, ts, health, b); err != nil { + return + } + if err = sl.addReportSample(app, scrapeDurationMetricName, ts, duration.Seconds(), b); err != nil { + return + } + if err = sl.addReportSample(app, scrapeSamplesMetricName, ts, float64(scraped), b); err != nil { + return + } + if err = sl.addReportSample(app, samplesPostRelabelMetricName, ts, float64(added), b); err != nil { + return + } + if err = sl.addReportSample(app, scrapeSeriesAddedMetricName, ts, float64(seriesAdded), b); err != nil { + return + } + if sl.reportExtraMetrics { + if err = sl.addReportSample(app, scrapeTimeoutMetricName, ts, sl.timeout.Seconds(), b); err != nil { + return + } + if err = sl.addReportSample(app, scrapeSampleLimitMetricName, ts, float64(sl.sampleLimit), b); err != nil { + return + } + if err = sl.addReportSample(app, scrapeBodySizeBytesMetricName, ts, float64(bytes), b); err != nil { + return + } + } + return +} + +func (sl *scrapeLoop) reportStale(app storage.Appender, start time.Time) (err error) { + ts := timestamp.FromTime(start) + + stale := math.Float64frombits(value.StaleNaN) + b := labels.NewBuilder(labels.EmptyLabels()) + + if err = sl.addReportSample(app, scrapeHealthMetricName, ts, stale, b); err != nil { + return + } + if err = sl.addReportSample(app, scrapeDurationMetricName, ts, stale, b); err != nil { + return + } + if err = sl.addReportSample(app, scrapeSamplesMetricName, ts, stale, b); err != nil { + return + } + if err = sl.addReportSample(app, samplesPostRelabelMetricName, ts, stale, b); err != nil { + return + } + if err = sl.addReportSample(app, scrapeSeriesAddedMetricName, ts, stale, b); err != nil { + return + } + if sl.reportExtraMetrics { + if err = sl.addReportSample(app, scrapeTimeoutMetricName, ts, stale, b); err != nil { + return + } + if err = sl.addReportSample(app, scrapeSampleLimitMetricName, ts, stale, b); err != nil { + return + } + if err = sl.addReportSample(app, scrapeBodySizeBytesMetricName, ts, stale, b); err != nil { + return + } + } + return +} + +func (sl *scrapeLoop) addReportSample(app storage.Appender, s []byte, t int64, v float64, b *labels.Builder) error { + ce, ok, _ := sl.cache.get(s) + var ref storage.SeriesRef + var lset labels.Labels + if ok { + ref = ce.ref + lset = ce.lset + } else { + // The constants are suffixed with the invalid \xff unicode rune to avoid collisions + // with scraped metrics in the cache. + // We have to drop it when building the actual metric. + b.Reset(labels.EmptyLabels()) + b.Set(labels.MetricName, string(s[:len(s)-1])) + lset = sl.reportSampleMutator(b.Labels()) + } + + ref, err := app.Append(ref, lset, t, v) + switch { + case err == nil: + if !ok { + sl.cache.addRef(s, ref, lset, lset.Hash()) + } + return nil + case errors.Is(err, storage.ErrOutOfOrderSample), errors.Is(err, storage.ErrDuplicateSampleForTimestamp): + // Do not log here, as this is expected if a target goes away and comes back + // again with a new scrape loop. + return nil + default: + return err + } +} + +// zeroConfig returns a new scrape config that only contains configuration items +// that alter metrics. +func zeroConfig(c *config.ScrapeConfig) *config.ScrapeConfig { + z := *c + // We zero out the fields that for sure don't affect scrape. + z.ScrapeInterval = 0 + z.ScrapeTimeout = 0 + z.SampleLimit = 0 + z.HTTPClientConfig = config_util.HTTPClientConfig{} + return &z +} + +// reusableCache compares two scrape config and tells whether the cache is still +// valid. +func reusableCache(r, l *config.ScrapeConfig) bool { + if r == nil || l == nil { + return false + } + return reflect.DeepEqual(zeroConfig(r), zeroConfig(l)) +} + +// CtxKey is a dedicated type for keys of context-embedded values propagated +// with the scrape context. +type ctxKey int + +// Valid CtxKey values. +const ( + ctxKeyMetadata ctxKey = iota + 1 + ctxKeyTarget +) + +func ContextWithMetricMetadataStore(ctx context.Context, s MetricMetadataStore) context.Context { + return context.WithValue(ctx, ctxKeyMetadata, s) +} + +func MetricMetadataStoreFromContext(ctx context.Context) (MetricMetadataStore, bool) { + s, ok := ctx.Value(ctxKeyMetadata).(MetricMetadataStore) + return s, ok +} + +func ContextWithTarget(ctx context.Context, t *Target) context.Context { + return context.WithValue(ctx, ctxKeyTarget, t) +} + +func TargetFromContext(ctx context.Context) (*Target, bool) { + t, ok := ctx.Value(ctxKeyTarget).(*Target) + return t, ok +} + +func pickSchema(bucketFactor float64) int32 { + if bucketFactor <= 1 { + bucketFactor = 1.00271 + } + floor := math.Floor(-math.Log2(math.Log2(bucketFactor))) + switch { + case floor >= float64(histogram.ExponentialSchemaMax): + return histogram.ExponentialSchemaMax + case floor <= float64(histogram.ExponentialSchemaMin): + return histogram.ExponentialSchemaMin + default: + return int32(floor) + } +} + +// Scraper implementation that fetches metrics data from Gatherer http.Handler. +type gathererScraper struct { + *targetScraper + h http.Handler +} + +type scrapeResult struct { + resp *http.Response + err error +} + +func (gs *gathererScraper) scrape(ctx context.Context) (*http.Response, error) { + resCh := make(chan scrapeResult, 1) + go func() { + defer close(resCh) + req, err := gs.scrapeRequest() + if err != nil { + resCh <- scrapeResult{nil, err} + return + } + w := newResponseWriter(req) + if gs.h != nil { + gs.h.ServeHTTP(w, req) + } + fmt.Println("[gathererScraper] scraping metrics") + resCh <- scrapeResult{w.response, nil} + }() + select { + case <-ctx.Done(): + return nil, ctx.Err() + case r := <-resCh: + return r.resp, r.err + } +} + +type responseWriter struct { + http.ResponseWriter + response *http.Response + // Writes to response body + w io.Writer +} + +func newResponseWriter(req *http.Request) *responseWriter { + buf := new(bytes.Buffer) + + return &responseWriter{ + w: io.Writer(buf), + response: &http.Response{ + Status: http.StatusText(http.StatusOK), + StatusCode: http.StatusOK, + Header: make(http.Header), + Body: io.NopCloser(buf), + Request: req, + }, + } +} + +func (rw *responseWriter) Header() http.Header { + return rw.response.Header +} + +func (rw *responseWriter) Write(data []byte) (int, error) { + return rw.w.Write(data) +} + +func (rw *responseWriter) WriteHeader(statusCode int) { + rw.response.StatusCode = statusCode + rw.response.Status = fmt.Sprintf("%d %s", statusCode, http.StatusText(statusCode)) +} + +var ( + defaultGathererHandler atomic.Pointer[http.Handler] + + defaultGatherer atomic.Pointer[prometheus.Gatherer] +) + +// This enables scraper to read metrics from the handler directly without making HTTP request +func SetDefaultGathererHandler(h http.Handler) { + defaultGathererHandler.Store(&h) +} + +func SetDefaultGatherer(g prometheus.Gatherer) { + defaultGatherer.Store(&g) + SetDefaultGathererHandler(promhttp.HandlerFor(g, promhttp.HandlerOpts{})) +} + +func GetDefaultGathererHandler() http.Handler { + if h := defaultGathererHandler.Load(); h != nil { + return *h + } + return nil +} + +func GetDefaultGatherer() prometheus.Gatherer { + if g := defaultGatherer.Load(); g != nil { + return *g + } + return nil +} diff --git a/pkg/promotel/prometheusreceiver/scrape/scrape_test.go b/pkg/promotel/prometheusreceiver/scrape/scrape_test.go new file mode 100644 index 000000000..ff95f5d6a --- /dev/null +++ b/pkg/promotel/prometheusreceiver/scrape/scrape_test.go @@ -0,0 +1,3764 @@ +package scrape + +import ( + "bytes" + "compress/gzip" + "context" + "encoding/binary" + "errors" + "fmt" + "io" + "math" + "net/http" + "net/http/httptest" + "net/url" + "strconv" + "strings" + "sync" + "testing" + "time" + + "github.com/go-kit/log" + "github.com/gogo/protobuf/proto" + "github.com/google/go-cmp/cmp" + "github.com/prometheus/client_golang/prometheus" + prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" + dto "github.com/prometheus/client_model/go" + config_util "github.com/prometheus/common/config" + "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" + + "github.com/prometheus/prometheus/config" + "github.com/prometheus/prometheus/discovery" + "github.com/prometheus/prometheus/discovery/targetgroup" + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/relabel" + "github.com/prometheus/prometheus/model/textparse" + "github.com/prometheus/prometheus/model/timestamp" + "github.com/prometheus/prometheus/model/value" + "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/tsdb/chunkenc" + "github.com/prometheus/prometheus/util/pool" + "github.com/prometheus/prometheus/util/teststorage" + "github.com/prometheus/prometheus/util/testutil" +) + +func TestMain(m *testing.M) { + testutil.TolerantVerifyLeak(m) +} + +func newTestScrapeMetrics(t testing.TB) *scrapeMetrics { + reg := prometheus.NewRegistry() + metrics, err := newScrapeMetrics(reg) + require.NoError(t, err) + return metrics +} + +func TestNewScrapePool(t *testing.T) { + var ( + app = &nopAppendable{} + cfg = &config.ScrapeConfig{} + sp, _ = newScrapePool(cfg, app, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + ) + + a, ok := sp.appendable.(*nopAppendable) + require.True(t, ok, "Failure to append.") + require.Equal(t, app, a, "Wrong sample appender.") + require.Equal(t, cfg, sp.config, "Wrong scrape config.") + require.NotNil(t, sp.newLoop, "newLoop function not initialized.") +} + +func TestDroppedTargetsList(t *testing.T) { + var ( + app = &nopAppendable{} + cfg = &config.ScrapeConfig{ + JobName: "dropMe", + ScrapeInterval: model.Duration(1), + RelabelConfigs: []*relabel.Config{ + { + Action: relabel.Drop, + Regex: relabel.MustNewRegexp("dropMe"), + SourceLabels: model.LabelNames{"job"}, + }, + }, + } + tgs = []*targetgroup.Group{ + { + Targets: []model.LabelSet{ + {model.AddressLabel: "127.0.0.1:9090"}, + {model.AddressLabel: "127.0.0.1:9091"}, + }, + }, + } + sp, _ = newScrapePool(cfg, app, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + expectedLabelSetString = "{__address__=\"127.0.0.1:9090\", __scrape_interval__=\"0s\", __scrape_timeout__=\"0s\", job=\"dropMe\"}" + expectedLength = 2 + ) + sp.Sync(tgs) + sp.Sync(tgs) + require.Len(t, sp.droppedTargets, expectedLength) + require.Equal(t, expectedLength, sp.droppedTargetsCount) + require.Equal(t, expectedLabelSetString, sp.droppedTargets[0].DiscoveredLabels().String()) + + // Check that count is still correct when we don't retain all dropped targets. + sp.config.KeepDroppedTargets = 1 + sp.Sync(tgs) + require.Len(t, sp.droppedTargets, 1) + require.Equal(t, expectedLength, sp.droppedTargetsCount) +} + +// TestDiscoveredLabelsUpdate checks that DiscoveredLabels are updated +// even when new labels don't affect the target `hash`. +func TestDiscoveredLabelsUpdate(t *testing.T) { + sp := &scrapePool{ + metrics: newTestScrapeMetrics(t), + } + + // These are used when syncing so need this to avoid a panic. + sp.config = &config.ScrapeConfig{ + ScrapeInterval: model.Duration(1), + ScrapeTimeout: model.Duration(1), + } + sp.activeTargets = make(map[uint64]*Target) + t1 := &Target{ + discoveredLabels: labels.FromStrings("label", "name"), + } + sp.activeTargets[t1.hash()] = t1 + + t2 := &Target{ + discoveredLabels: labels.FromStrings("labelNew", "nameNew"), + } + sp.sync([]*Target{t2}) + + require.Equal(t, t2.DiscoveredLabels(), sp.activeTargets[t1.hash()].DiscoveredLabels()) +} + +type testLoop struct { + startFunc func(interval, timeout time.Duration, errc chan<- error) + stopFunc func() + forcedErr error + forcedErrMtx sync.Mutex + runOnce bool + interval time.Duration + timeout time.Duration +} + +func (l *testLoop) run(errc chan<- error) { + if l.runOnce { + panic("loop must be started only once") + } + l.runOnce = true + l.startFunc(l.interval, l.timeout, errc) +} + +func (l *testLoop) disableEndOfRunStalenessMarkers() { +} + +func (l *testLoop) setForcedError(err error) { + l.forcedErrMtx.Lock() + defer l.forcedErrMtx.Unlock() + l.forcedErr = err +} + +func (l *testLoop) getForcedError() error { + l.forcedErrMtx.Lock() + defer l.forcedErrMtx.Unlock() + return l.forcedErr +} + +func (l *testLoop) stop() { + l.stopFunc() +} + +func (l *testLoop) getCache() *scrapeCache { + return nil +} + +func TestScrapePoolStop(t *testing.T) { + sp := &scrapePool{ + activeTargets: map[uint64]*Target{}, + loops: map[uint64]loop{}, + cancel: func() {}, + client: http.DefaultClient, + metrics: newTestScrapeMetrics(t), + } + var mtx sync.Mutex + stopped := map[uint64]bool{} + numTargets := 20 + + // Stopping the scrape pool must call stop() on all scrape loops, + // clean them and the respective targets up. It must wait until each loop's + // stop function returned before returning itself. + + for i := 0; i < numTargets; i++ { + t := &Target{ + labels: labels.FromStrings(model.AddressLabel, fmt.Sprintf("example.com:%d", i)), + } + l := &testLoop{} + d := time.Duration((i+1)*20) * time.Millisecond + l.stopFunc = func() { + time.Sleep(d) + + mtx.Lock() + stopped[t.hash()] = true + mtx.Unlock() + } + + sp.activeTargets[t.hash()] = t + sp.loops[t.hash()] = l + } + + done := make(chan struct{}) + stopTime := time.Now() + + go func() { + sp.stop() + close(done) + }() + + select { + case <-time.After(5 * time.Second): + require.Fail(t, "scrapeLoop.stop() did not return as expected") + case <-done: + // This should have taken at least as long as the last target slept. + require.GreaterOrEqual(t, time.Since(stopTime), time.Duration(numTargets*20)*time.Millisecond, "scrapeLoop.stop() exited before all targets stopped") + } + + mtx.Lock() + require.Len(t, stopped, numTargets, "Unexpected number of stopped loops") + mtx.Unlock() + + require.Empty(t, sp.activeTargets, "Targets were not cleared on stopping: %d left", len(sp.activeTargets)) + require.Empty(t, sp.loops, "Loops were not cleared on stopping: %d left", len(sp.loops)) +} + +func TestScrapePoolReload(t *testing.T) { + var mtx sync.Mutex + numTargets := 20 + + stopped := map[uint64]bool{} + + reloadCfg := &config.ScrapeConfig{ + ScrapeInterval: model.Duration(3 * time.Second), + ScrapeTimeout: model.Duration(2 * time.Second), + } + // On starting to run, new loops created on reload check whether their preceding + // equivalents have been stopped. + newLoop := func(opts scrapeLoopOptions) loop { + l := &testLoop{interval: time.Duration(reloadCfg.ScrapeInterval), timeout: time.Duration(reloadCfg.ScrapeTimeout)} + l.startFunc = func(interval, timeout time.Duration, errc chan<- error) { + require.Equal(t, 3*time.Second, interval, "Unexpected scrape interval") + require.Equal(t, 2*time.Second, timeout, "Unexpected scrape timeout") + + mtx.Lock() + targetScraper := opts.scraper.(*targetScraper) + require.True(t, stopped[targetScraper.hash()], "Scrape loop for %v not stopped yet", targetScraper) + mtx.Unlock() + } + return l + } + + sp := &scrapePool{ + appendable: &nopAppendable{}, + activeTargets: map[uint64]*Target{}, + loops: map[uint64]loop{}, + newLoop: newLoop, + logger: nil, + client: http.DefaultClient, + metrics: newTestScrapeMetrics(t), + symbolTable: labels.NewSymbolTable(), + } + + // Reloading a scrape pool with a new scrape configuration must stop all scrape + // loops and start new ones. A new loop must not be started before the preceding + // one terminated. + + for i := 0; i < numTargets; i++ { + labels := labels.FromStrings(model.AddressLabel, fmt.Sprintf("example.com:%d", i)) + t := &Target{ + labels: labels, + discoveredLabels: labels, + } + l := &testLoop{} + d := time.Duration((i+1)*20) * time.Millisecond + l.stopFunc = func() { + time.Sleep(d) + + mtx.Lock() + stopped[t.hash()] = true + mtx.Unlock() + } + + sp.activeTargets[t.hash()] = t + sp.loops[t.hash()] = l + } + done := make(chan struct{}) + + beforeTargets := map[uint64]*Target{} + for h, t := range sp.activeTargets { + beforeTargets[h] = t + } + + reloadTime := time.Now() + + go func() { + sp.reload(reloadCfg) + close(done) + }() + + select { + case <-time.After(5 * time.Second): + require.FailNow(t, "scrapeLoop.reload() did not return as expected") + case <-done: + // This should have taken at least as long as the last target slept. + require.GreaterOrEqual(t, time.Since(reloadTime), time.Duration(numTargets*20)*time.Millisecond, "scrapeLoop.stop() exited before all targets stopped") + } + + mtx.Lock() + require.Len(t, stopped, numTargets, "Unexpected number of stopped loops") + mtx.Unlock() + + require.Equal(t, sp.activeTargets, beforeTargets, "Reloading affected target states unexpectedly") + require.Len(t, sp.loops, numTargets, "Unexpected number of stopped loops after reload") +} + +func TestScrapePoolReloadPreserveRelabeledIntervalTimeout(t *testing.T) { + reloadCfg := &config.ScrapeConfig{ + ScrapeInterval: model.Duration(3 * time.Second), + ScrapeTimeout: model.Duration(2 * time.Second), + } + newLoop := func(opts scrapeLoopOptions) loop { + l := &testLoop{interval: opts.interval, timeout: opts.timeout} + l.startFunc = func(interval, timeout time.Duration, errc chan<- error) { + require.Equal(t, 5*time.Second, interval, "Unexpected scrape interval") + require.Equal(t, 3*time.Second, timeout, "Unexpected scrape timeout") + } + return l + } + sp := &scrapePool{ + appendable: &nopAppendable{}, + activeTargets: map[uint64]*Target{ + 1: { + labels: labels.FromStrings(model.ScrapeIntervalLabel, "5s", model.ScrapeTimeoutLabel, "3s"), + }, + }, + loops: map[uint64]loop{ + 1: noopLoop(), + }, + newLoop: newLoop, + logger: nil, + client: http.DefaultClient, + metrics: newTestScrapeMetrics(t), + symbolTable: labels.NewSymbolTable(), + } + + err := sp.reload(reloadCfg) + if err != nil { + t.Fatalf("unable to reload configuration: %s", err) + } +} + +func TestScrapePoolTargetLimit(t *testing.T) { + var wg sync.WaitGroup + // On starting to run, new loops created on reload check whether their preceding + // equivalents have been stopped. + newLoop := func(opts scrapeLoopOptions) loop { + wg.Add(1) + l := &testLoop{ + startFunc: func(interval, timeout time.Duration, errc chan<- error) { + wg.Done() + }, + stopFunc: func() {}, + } + return l + } + sp := &scrapePool{ + appendable: &nopAppendable{}, + activeTargets: map[uint64]*Target{}, + loops: map[uint64]loop{}, + newLoop: newLoop, + logger: log.NewNopLogger(), + client: http.DefaultClient, + metrics: newTestScrapeMetrics(t), + symbolTable: labels.NewSymbolTable(), + } + + tgs := []*targetgroup.Group{} + for i := 0; i < 50; i++ { + tgs = append(tgs, + &targetgroup.Group{ + Targets: []model.LabelSet{ + {model.AddressLabel: model.LabelValue(fmt.Sprintf("127.0.0.1:%d", 9090+i))}, + }, + }, + ) + } + + var limit uint + reloadWithLimit := func(l uint) { + limit = l + require.NoError(t, sp.reload(&config.ScrapeConfig{ + ScrapeInterval: model.Duration(3 * time.Second), + ScrapeTimeout: model.Duration(2 * time.Second), + TargetLimit: l, + })) + } + + var targets int + loadTargets := func(n int) { + targets = n + sp.Sync(tgs[:n]) + } + + validateIsRunning := func() { + wg.Wait() + for _, l := range sp.loops { + require.True(t, l.(*testLoop).runOnce, "loop should be running") + } + } + + validateErrorMessage := func(shouldErr bool) { + for _, l := range sp.loops { + lerr := l.(*testLoop).getForcedError() + if shouldErr { + require.Error(t, lerr, "error was expected for %d targets with a limit of %d", targets, limit) + require.Equal(t, fmt.Sprintf("target_limit exceeded (number of targets: %d, limit: %d)", targets, limit), lerr.Error()) + } else { + require.NoError(t, lerr) + } + } + } + + reloadWithLimit(0) + loadTargets(50) + validateIsRunning() + + // Simulate an initial config with a limit. + sp.config.TargetLimit = 30 + limit = 30 + loadTargets(50) + validateIsRunning() + validateErrorMessage(true) + + reloadWithLimit(50) + validateIsRunning() + validateErrorMessage(false) + + reloadWithLimit(40) + validateIsRunning() + validateErrorMessage(true) + + loadTargets(30) + validateIsRunning() + validateErrorMessage(false) + + loadTargets(40) + validateIsRunning() + validateErrorMessage(false) + + loadTargets(41) + validateIsRunning() + validateErrorMessage(true) + + reloadWithLimit(0) + validateIsRunning() + validateErrorMessage(false) + + reloadWithLimit(51) + validateIsRunning() + validateErrorMessage(false) + + tgs = append(tgs, + &targetgroup.Group{ + Targets: []model.LabelSet{ + {model.AddressLabel: model.LabelValue("127.0.0.1:1090")}, + }, + }, + &targetgroup.Group{ + Targets: []model.LabelSet{ + {model.AddressLabel: model.LabelValue("127.0.0.1:1090")}, + }, + }, + ) + + sp.Sync(tgs) + validateIsRunning() + validateErrorMessage(false) +} + +func TestScrapePoolAppender(t *testing.T) { + cfg := &config.ScrapeConfig{} + app := &nopAppendable{} + sp, _ := newScrapePool(cfg, app, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + + loop := sp.newLoop(scrapeLoopOptions{ + target: &Target{}, + }) + appl, ok := loop.(*scrapeLoop) + require.True(t, ok, "Expected scrapeLoop but got %T", loop) + + wrapped := appender(appl.appender(context.Background()), 0, 0, histogram.ExponentialSchemaMax) + + tl, ok := wrapped.(*timeLimitAppender) + require.True(t, ok, "Expected timeLimitAppender but got %T", wrapped) + + _, ok = tl.Appender.(nopAppender) + require.True(t, ok, "Expected base appender but got %T", tl.Appender) + + sampleLimit := 100 + loop = sp.newLoop(scrapeLoopOptions{ + target: &Target{}, + sampleLimit: sampleLimit, + }) + appl, ok = loop.(*scrapeLoop) + require.True(t, ok, "Expected scrapeLoop but got %T", loop) + + wrapped = appender(appl.appender(context.Background()), sampleLimit, 0, histogram.ExponentialSchemaMax) + + sl, ok := wrapped.(*limitAppender) + require.True(t, ok, "Expected limitAppender but got %T", wrapped) + + tl, ok = sl.Appender.(*timeLimitAppender) + require.True(t, ok, "Expected timeLimitAppender but got %T", sl.Appender) + + _, ok = tl.Appender.(nopAppender) + require.True(t, ok, "Expected base appender but got %T", tl.Appender) + + wrapped = appender(appl.appender(context.Background()), sampleLimit, 100, histogram.ExponentialSchemaMax) + + bl, ok := wrapped.(*bucketLimitAppender) + require.True(t, ok, "Expected bucketLimitAppender but got %T", wrapped) + + sl, ok = bl.Appender.(*limitAppender) + require.True(t, ok, "Expected limitAppender but got %T", bl) + + tl, ok = sl.Appender.(*timeLimitAppender) + require.True(t, ok, "Expected timeLimitAppender but got %T", sl.Appender) + + _, ok = tl.Appender.(nopAppender) + require.True(t, ok, "Expected base appender but got %T", tl.Appender) + + wrapped = appender(appl.appender(context.Background()), sampleLimit, 100, 0) + + ml, ok := wrapped.(*maxSchemaAppender) + require.True(t, ok, "Expected maxSchemaAppender but got %T", wrapped) + + bl, ok = ml.Appender.(*bucketLimitAppender) + require.True(t, ok, "Expected bucketLimitAppender but got %T", wrapped) + + sl, ok = bl.Appender.(*limitAppender) + require.True(t, ok, "Expected limitAppender but got %T", bl) + + tl, ok = sl.Appender.(*timeLimitAppender) + require.True(t, ok, "Expected timeLimitAppender but got %T", sl.Appender) + + _, ok = tl.Appender.(nopAppender) + require.True(t, ok, "Expected base appender but got %T", tl.Appender) +} + +func TestScrapePoolRaces(t *testing.T) { + interval, _ := model.ParseDuration("1s") + timeout, _ := model.ParseDuration("500ms") + newConfig := func() *config.ScrapeConfig { + return &config.ScrapeConfig{ScrapeInterval: interval, ScrapeTimeout: timeout} + } + sp, _ := newScrapePool(newConfig(), &nopAppendable{}, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + tgts := []*targetgroup.Group{ + { + Targets: []model.LabelSet{ + {model.AddressLabel: "127.0.0.1:9090"}, + {model.AddressLabel: "127.0.0.2:9090"}, + {model.AddressLabel: "127.0.0.3:9090"}, + {model.AddressLabel: "127.0.0.4:9090"}, + {model.AddressLabel: "127.0.0.5:9090"}, + {model.AddressLabel: "127.0.0.6:9090"}, + {model.AddressLabel: "127.0.0.7:9090"}, + {model.AddressLabel: "127.0.0.8:9090"}, + }, + }, + } + + sp.Sync(tgts) + active := sp.ActiveTargets() + dropped := sp.DroppedTargets() + expectedActive, expectedDropped := len(tgts[0].Targets), 0 + + require.Len(t, active, expectedActive, "Invalid number of active targets") + require.Len(t, dropped, expectedDropped, "Invalid number of dropped targets") + + for i := 0; i < 20; i++ { + time.Sleep(10 * time.Millisecond) + sp.reload(newConfig()) + } + sp.stop() +} + +func TestScrapePoolScrapeLoopsStarted(t *testing.T) { + var wg sync.WaitGroup + newLoop := func(opts scrapeLoopOptions) loop { + wg.Add(1) + l := &testLoop{ + startFunc: func(interval, timeout time.Duration, errc chan<- error) { + wg.Done() + }, + stopFunc: func() {}, + } + return l + } + sp := &scrapePool{ + appendable: &nopAppendable{}, + activeTargets: map[uint64]*Target{}, + loops: map[uint64]loop{}, + newLoop: newLoop, + logger: nil, + client: http.DefaultClient, + metrics: newTestScrapeMetrics(t), + symbolTable: labels.NewSymbolTable(), + } + + tgs := []*targetgroup.Group{ + { + Targets: []model.LabelSet{ + {model.AddressLabel: model.LabelValue("127.0.0.1:9090")}, + }, + }, + { + Targets: []model.LabelSet{ + {model.AddressLabel: model.LabelValue("127.0.0.1:9090")}, + }, + }, + } + + require.NoError(t, sp.reload(&config.ScrapeConfig{ + ScrapeInterval: model.Duration(3 * time.Second), + ScrapeTimeout: model.Duration(2 * time.Second), + })) + sp.Sync(tgs) + + require.Len(t, sp.loops, 1) + + wg.Wait() + for _, l := range sp.loops { + require.True(t, l.(*testLoop).runOnce, "loop should be running") + } +} + +func newBasicScrapeLoop(t testing.TB, ctx context.Context, scraper scraper, app func(ctx context.Context) storage.Appender, interval time.Duration) *scrapeLoop { + return newScrapeLoop(ctx, + scraper, + nil, nil, + nopMutator, + nopMutator, + app, + nil, + labels.NewSymbolTable(), + 0, + true, + false, + true, + 0, 0, histogram.ExponentialSchemaMax, + nil, + interval, + time.Hour, + false, + false, + false, + false, + false, + nil, + false, + newTestScrapeMetrics(t), + false, + ) +} + +func TestScrapeLoopStopBeforeRun(t *testing.T) { + scraper := &scraperShim{} + sl := newBasicScrapeLoop(t, context.Background(), scraper, nil, 1) + + // The scrape pool synchronizes on stopping scrape loops. However, new scrape + // loops are started asynchronously. Thus it's possible, that a loop is stopped + // again before having started properly. + // Stopping not-yet-started loops must block until the run method was called and exited. + // The run method must exit immediately. + + stopDone := make(chan struct{}) + go func() { + sl.stop() + close(stopDone) + }() + + select { + case <-stopDone: + require.FailNow(t, "Stopping terminated before run exited successfully.") + case <-time.After(500 * time.Millisecond): + } + + // Running the scrape loop must exit before calling the scraper even once. + scraper.scrapeFunc = func(context.Context, io.Writer) error { + require.FailNow(t, "Scraper was called for terminated scrape loop.") + return nil + } + + runDone := make(chan struct{}) + go func() { + sl.run(nil) + close(runDone) + }() + + select { + case <-runDone: + case <-time.After(1 * time.Second): + require.FailNow(t, "Running terminated scrape loop did not exit.") + } + + select { + case <-stopDone: + case <-time.After(1 * time.Second): + require.FailNow(t, "Stopping did not terminate after running exited.") + } +} + +func nopMutator(l labels.Labels) labels.Labels { return l } + +func TestScrapeLoopStop(t *testing.T) { + var ( + signal = make(chan struct{}, 1) + appender = &collectResultAppender{} + scraper = &scraperShim{} + app = func(ctx context.Context) storage.Appender { return appender } + ) + + sl := newBasicScrapeLoop(t, context.Background(), scraper, app, 10*time.Millisecond) + + // Terminate loop after 2 scrapes. + numScrapes := 0 + + scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error { + numScrapes++ + if numScrapes == 2 { + go sl.stop() + <-sl.ctx.Done() + } + w.Write([]byte("metric_a 42\n")) + return ctx.Err() + } + + go func() { + sl.run(nil) + signal <- struct{}{} + }() + + select { + case <-signal: + case <-time.After(5 * time.Second): + require.FailNow(t, "Scrape wasn't stopped.") + } + + // We expected 1 actual sample for each scrape plus 5 for report samples. + // At least 2 scrapes were made, plus the final stale markers. + require.GreaterOrEqual(t, len(appender.resultFloats), 6*3, "Expected at least 3 scrapes with 6 samples each.") + require.Zero(t, len(appender.resultFloats)%6, "There is a scrape with missing samples.") + // All samples in a scrape must have the same timestamp. + var ts int64 + for i, s := range appender.resultFloats { + switch { + case i%6 == 0: + ts = s.t + case s.t != ts: + t.Fatalf("Unexpected multiple timestamps within single scrape") + } + } + // All samples from the last scrape must be stale markers. + for _, s := range appender.resultFloats[len(appender.resultFloats)-5:] { + require.True(t, value.IsStaleNaN(s.f), "Appended last sample not as expected. Wanted: stale NaN Got: %x", math.Float64bits(s.f)) + } +} + +func TestScrapeLoopRun(t *testing.T) { + var ( + signal = make(chan struct{}, 1) + errc = make(chan error) + + scraper = &scraperShim{} + app = func(ctx context.Context) storage.Appender { return &nopAppender{} } + scrapeMetrics = newTestScrapeMetrics(t) + ) + + ctx, cancel := context.WithCancel(context.Background()) + sl := newScrapeLoop(ctx, + scraper, + nil, nil, + nopMutator, + nopMutator, + app, + nil, + nil, + 0, + true, + false, + true, + 0, 0, histogram.ExponentialSchemaMax, + nil, + time.Second, + time.Hour, + false, + false, + false, + false, + false, + nil, + false, + scrapeMetrics, + false, + ) + + // The loop must terminate during the initial offset if the context + // is canceled. + scraper.offsetDur = time.Hour + + go func() { + sl.run(errc) + signal <- struct{}{} + }() + + // Wait to make sure we are actually waiting on the offset. + time.Sleep(1 * time.Second) + + cancel() + select { + case <-signal: + case <-time.After(5 * time.Second): + require.FailNow(t, "Cancellation during initial offset failed.") + case err := <-errc: + require.FailNow(t, "Unexpected error: %s", err) + } + + // The provided timeout must cause cancellation of the context passed down to the + // scraper. The scraper has to respect the context. + scraper.offsetDur = 0 + + block := make(chan struct{}) + scraper.scrapeFunc = func(ctx context.Context, _ io.Writer) error { + select { + case <-block: + case <-ctx.Done(): + return ctx.Err() + } + return nil + } + + ctx, cancel = context.WithCancel(context.Background()) + sl = newBasicScrapeLoop(t, ctx, scraper, app, time.Second) + sl.timeout = 100 * time.Millisecond + + go func() { + sl.run(errc) + signal <- struct{}{} + }() + + select { + case err := <-errc: + require.ErrorIs(t, err, context.DeadlineExceeded) + case <-time.After(3 * time.Second): + require.FailNow(t, "Expected timeout error but got none.") + } + + // We already caught the timeout error and are certainly in the loop. + // Let the scrapes returns immediately to cause no further timeout errors + // and check whether canceling the parent context terminates the loop. + close(block) + cancel() + + select { + case <-signal: + // Loop terminated as expected. + case err := <-errc: + require.FailNow(t, "Unexpected error: %s", err) + case <-time.After(3 * time.Second): + require.FailNow(t, "Loop did not terminate on context cancellation") + } +} + +func TestScrapeLoopForcedErr(t *testing.T) { + var ( + signal = make(chan struct{}, 1) + errc = make(chan error) + + scraper = &scraperShim{} + app = func(ctx context.Context) storage.Appender { return &nopAppender{} } + ) + + ctx, cancel := context.WithCancel(context.Background()) + sl := newBasicScrapeLoop(t, ctx, scraper, app, time.Second) + + forcedErr := fmt.Errorf("forced err") + sl.setForcedError(forcedErr) + + scraper.scrapeFunc = func(context.Context, io.Writer) error { + require.FailNow(t, "Should not be scraped.") + return nil + } + + go func() { + sl.run(errc) + signal <- struct{}{} + }() + + select { + case err := <-errc: + require.ErrorIs(t, err, forcedErr) + case <-time.After(3 * time.Second): + require.FailNow(t, "Expected forced error but got none.") + } + cancel() + + select { + case <-signal: + case <-time.After(5 * time.Second): + require.FailNow(t, "Scrape not stopped.") + } +} + +func TestScrapeLoopMetadata(t *testing.T) { + var ( + signal = make(chan struct{}) + scraper = &scraperShim{} + scrapeMetrics = newTestScrapeMetrics(t) + cache = newScrapeCache(scrapeMetrics) + ) + defer close(signal) + + ctx, cancel := context.WithCancel(context.Background()) + sl := newScrapeLoop(ctx, + scraper, + nil, nil, + nopMutator, + nopMutator, + func(ctx context.Context) storage.Appender { return nopAppender{} }, + cache, + labels.NewSymbolTable(), + 0, + true, + false, + true, + 0, 0, histogram.ExponentialSchemaMax, + nil, + 0, + 0, + false, + false, + false, + false, + false, + nil, + false, + scrapeMetrics, + false, + ) + defer cancel() + + slApp := sl.appender(ctx) + total, _, _, err := sl.append(slApp, []byte(`# TYPE test_metric counter +# HELP test_metric some help text +# UNIT test_metric metric +test_metric 1 +# TYPE test_metric_no_help gauge +# HELP test_metric_no_type other help text +# EOF`), "application/openmetrics-text", time.Now()) + require.NoError(t, err) + require.NoError(t, slApp.Commit()) + require.Equal(t, 1, total) + + md, ok := cache.GetMetadata("test_metric") + require.True(t, ok, "expected metadata to be present") + require.Equal(t, model.MetricTypeCounter, md.Type, "unexpected metric type") + require.Equal(t, "some help text", md.Help) + require.Equal(t, "metric", md.Unit) + + md, ok = cache.GetMetadata("test_metric_no_help") + require.True(t, ok, "expected metadata to be present") + require.Equal(t, model.MetricTypeGauge, md.Type, "unexpected metric type") + require.Equal(t, "", md.Help) + require.Equal(t, "", md.Unit) + + md, ok = cache.GetMetadata("test_metric_no_type") + require.True(t, ok, "expected metadata to be present") + require.Equal(t, model.MetricTypeUnknown, md.Type, "unexpected metric type") + require.Equal(t, "other help text", md.Help) + require.Equal(t, "", md.Unit) +} + +func simpleTestScrapeLoop(t testing.TB) (context.Context, *scrapeLoop) { + // Need a full storage for correct Add/AddFast semantics. + s := teststorage.New(t) + t.Cleanup(func() { s.Close() }) + + ctx, cancel := context.WithCancel(context.Background()) + sl := newBasicScrapeLoop(t, ctx, &scraperShim{}, s.Appender, 0) + t.Cleanup(func() { cancel() }) + + return ctx, sl +} + +func TestScrapeLoopSeriesAdded(t *testing.T) { + ctx, sl := simpleTestScrapeLoop(t) + + slApp := sl.appender(ctx) + total, added, seriesAdded, err := sl.append(slApp, []byte("test_metric 1\n"), "", time.Time{}) + require.NoError(t, err) + require.NoError(t, slApp.Commit()) + require.Equal(t, 1, total) + require.Equal(t, 1, added) + require.Equal(t, 1, seriesAdded) + + slApp = sl.appender(ctx) + total, added, seriesAdded, err = sl.append(slApp, []byte("test_metric 1\n"), "", time.Time{}) + require.NoError(t, slApp.Commit()) + require.NoError(t, err) + require.Equal(t, 1, total) + require.Equal(t, 1, added) + require.Equal(t, 0, seriesAdded) +} + +func TestScrapeLoopFailWithInvalidLabelsAfterRelabel(t *testing.T) { + s := teststorage.New(t) + defer s.Close() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + target := &Target{ + labels: labels.FromStrings("pod_label_invalid_012", "test"), + } + relabelConfig := []*relabel.Config{{ + Action: relabel.LabelMap, + Regex: relabel.MustNewRegexp("pod_label_invalid_(.+)"), + Separator: ";", + Replacement: "$1", + }} + sl := newBasicScrapeLoop(t, ctx, &scraperShim{}, s.Appender, 0) + sl.sampleMutator = func(l labels.Labels) labels.Labels { + return mutateSampleLabels(l, target, true, relabelConfig) + } + + slApp := sl.appender(ctx) + total, added, seriesAdded, err := sl.append(slApp, []byte("test_metric 1\n"), "", time.Time{}) + require.ErrorContains(t, err, "invalid metric name or label names") + require.NoError(t, slApp.Rollback()) + require.Equal(t, 1, total) + require.Equal(t, 0, added) + require.Equal(t, 0, seriesAdded) +} + +func makeTestMetrics(n int) []byte { + // Construct a metrics string to parse + sb := bytes.Buffer{} + for i := 0; i < n; i++ { + fmt.Fprintf(&sb, "# TYPE metric_a gauge\n") + fmt.Fprintf(&sb, "# HELP metric_a help text\n") + fmt.Fprintf(&sb, "metric_a{foo=\"%d\",bar=\"%d\"} 1\n", i, i*100) + } + fmt.Fprintf(&sb, "# EOF\n") + return sb.Bytes() +} + +func BenchmarkScrapeLoopAppend(b *testing.B) { + ctx, sl := simpleTestScrapeLoop(b) + + slApp := sl.appender(ctx) + metrics := makeTestMetrics(100) + ts := time.Time{} + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + ts = ts.Add(time.Second) + _, _, _, _ = sl.append(slApp, metrics, "", ts) + } +} + +func BenchmarkScrapeLoopAppendOM(b *testing.B) { + ctx, sl := simpleTestScrapeLoop(b) + + slApp := sl.appender(ctx) + metrics := makeTestMetrics(100) + ts := time.Time{} + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + ts = ts.Add(time.Second) + _, _, _, _ = sl.append(slApp, metrics, "application/openmetrics-text", ts) + } +} + +func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrape(t *testing.T) { + appender := &collectResultAppender{} + var ( + signal = make(chan struct{}, 1) + scraper = &scraperShim{} + app = func(ctx context.Context) storage.Appender { return appender } + ) + + ctx, cancel := context.WithCancel(context.Background()) + sl := newBasicScrapeLoop(t, ctx, scraper, app, 10*time.Millisecond) + // Succeed once, several failures, then stop. + numScrapes := 0 + + scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error { + numScrapes++ + + switch numScrapes { + case 1: + w.Write([]byte("metric_a 42\n")) + return nil + case 5: + cancel() + } + return errors.New("scrape failed") + } + + go func() { + sl.run(nil) + signal <- struct{}{} + }() + + select { + case <-signal: + case <-time.After(5 * time.Second): + require.FailNow(t, "Scrape wasn't stopped.") + } + + // 1 successfully scraped sample, 1 stale marker after first fail, 5 report samples for + // each scrape successful or not. + require.Len(t, appender.resultFloats, 27, "Appended samples not as expected:\n%s", appender) + require.Equal(t, 42.0, appender.resultFloats[0].f, "Appended first sample not as expected") + require.True(t, value.IsStaleNaN(appender.resultFloats[6].f), + "Appended second sample not as expected. Wanted: stale NaN Got: %x", math.Float64bits(appender.resultFloats[6].f)) +} + +func TestScrapeLoopRunCreatesStaleMarkersOnParseFailure(t *testing.T) { + appender := &collectResultAppender{} + var ( + signal = make(chan struct{}, 1) + scraper = &scraperShim{} + app = func(ctx context.Context) storage.Appender { return appender } + numScrapes = 0 + ) + + ctx, cancel := context.WithCancel(context.Background()) + sl := newBasicScrapeLoop(t, ctx, scraper, app, 10*time.Millisecond) + + // Succeed once, several failures, then stop. + scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error { + numScrapes++ + switch numScrapes { + case 1: + w.Write([]byte("metric_a 42\n")) + return nil + case 2: + w.Write([]byte("7&-\n")) + return nil + case 3: + cancel() + } + return errors.New("scrape failed") + } + + go func() { + sl.run(nil) + signal <- struct{}{} + }() + + select { + case <-signal: + case <-time.After(5 * time.Second): + require.FailNow(t, "Scrape wasn't stopped.") + } + + // 1 successfully scraped sample, 1 stale marker after first fail, 5 report samples for + // each scrape successful or not. + require.Len(t, appender.resultFloats, 17, "Appended samples not as expected:\n%s", appender) + require.Equal(t, 42.0, appender.resultFloats[0].f, "Appended first sample not as expected") + require.True(t, value.IsStaleNaN(appender.resultFloats[6].f), + "Appended second sample not as expected. Wanted: stale NaN Got: %x", math.Float64bits(appender.resultFloats[6].f)) +} + +func TestScrapeLoopCache(t *testing.T) { + s := teststorage.New(t) + defer s.Close() + + appender := &collectResultAppender{} + var ( + signal = make(chan struct{}, 1) + scraper = &scraperShim{} + app = func(ctx context.Context) storage.Appender { appender.next = s.Appender(ctx); return appender } + ) + + ctx, cancel := context.WithCancel(context.Background()) + // Decreasing the scrape interval could make the test fail, as multiple scrapes might be initiated at identical millisecond timestamps. + // See https://github.com/prometheus/prometheus/issues/12727. + sl := newBasicScrapeLoop(t, ctx, scraper, app, 100*time.Millisecond) + + numScrapes := 0 + + scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error { + switch numScrapes { + case 1, 2: + _, ok := sl.cache.series["metric_a"] + require.True(t, ok, "metric_a missing from cache after scrape %d", numScrapes) + _, ok = sl.cache.series["metric_b"] + require.True(t, ok, "metric_b missing from cache after scrape %d", numScrapes) + case 3: + _, ok := sl.cache.series["metric_a"] + require.True(t, ok, "metric_a missing from cache after scrape %d", numScrapes) + _, ok = sl.cache.series["metric_b"] + require.False(t, ok, "metric_b present in cache after scrape %d", numScrapes) + } + + numScrapes++ + switch numScrapes { + case 1: + w.Write([]byte("metric_a 42\nmetric_b 43\n")) + return nil + case 3: + w.Write([]byte("metric_a 44\n")) + return nil + case 4: + cancel() + } + return fmt.Errorf("scrape failed") + } + + go func() { + sl.run(nil) + signal <- struct{}{} + }() + + select { + case <-signal: + case <-time.After(5 * time.Second): + require.FailNow(t, "Scrape wasn't stopped.") + } + + // 1 successfully scraped sample, 1 stale marker after first fail, 5 report samples for + // each scrape successful or not. + require.Len(t, appender.resultFloats, 26, "Appended samples not as expected:\n%s", appender) +} + +func TestScrapeLoopCacheMemoryExhaustionProtection(t *testing.T) { + s := teststorage.New(t) + defer s.Close() + + sapp := s.Appender(context.Background()) + + appender := &collectResultAppender{next: sapp} + var ( + signal = make(chan struct{}, 1) + scraper = &scraperShim{} + app = func(ctx context.Context) storage.Appender { return appender } + ) + + ctx, cancel := context.WithCancel(context.Background()) + sl := newBasicScrapeLoop(t, ctx, scraper, app, 10*time.Millisecond) + + numScrapes := 0 + + scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error { + numScrapes++ + if numScrapes < 5 { + s := "" + for i := 0; i < 500; i++ { + s = fmt.Sprintf("%smetric_%d_%d 42\n", s, i, numScrapes) + } + w.Write([]byte(s + "&")) + } else { + cancel() + } + return nil + } + + go func() { + sl.run(nil) + signal <- struct{}{} + }() + + select { + case <-signal: + case <-time.After(5 * time.Second): + require.FailNow(t, "Scrape wasn't stopped.") + } + + require.LessOrEqual(t, len(sl.cache.series), 2000, "More than 2000 series cached.") +} + +func TestScrapeLoopAppend(t *testing.T) { + tests := []struct { + title string + honorLabels bool + scrapeLabels string + discoveryLabels []string + expLset labels.Labels + expValue float64 + }{ + { + // When "honor_labels" is not set + // label name collision is handler by adding a prefix. + title: "Label name collision", + honorLabels: false, + scrapeLabels: `metric{n="1"} 0`, + discoveryLabels: []string{"n", "2"}, + expLset: labels.FromStrings("__name__", "metric", "exported_n", "1", "n", "2"), + expValue: 0, + }, { + // When "honor_labels" is not set + // exported label from discovery don't get overwritten + title: "Label name collision", + honorLabels: false, + scrapeLabels: `metric 0`, + discoveryLabels: []string{"n", "2", "exported_n", "2"}, + expLset: labels.FromStrings("__name__", "metric", "n", "2", "exported_n", "2"), + expValue: 0, + }, { + // Labels with no value need to be removed as these should not be ingested. + title: "Delete Empty labels", + honorLabels: false, + scrapeLabels: `metric{n=""} 0`, + discoveryLabels: nil, + expLset: labels.FromStrings("__name__", "metric"), + expValue: 0, + }, { + // Honor Labels should ignore labels with the same name. + title: "Honor Labels", + honorLabels: true, + scrapeLabels: `metric{n1="1", n2="2"} 0`, + discoveryLabels: []string{"n1", "0"}, + expLset: labels.FromStrings("__name__", "metric", "n1", "1", "n2", "2"), + expValue: 0, + }, { + title: "Stale - NaN", + honorLabels: false, + scrapeLabels: `metric NaN`, + discoveryLabels: nil, + expLset: labels.FromStrings("__name__", "metric"), + expValue: math.Float64frombits(value.NormalNaN), + }, + } + + for _, test := range tests { + app := &collectResultAppender{} + + discoveryLabels := &Target{ + labels: labels.FromStrings(test.discoveryLabels...), + } + + sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return app }, 0) + sl.sampleMutator = func(l labels.Labels) labels.Labels { + return mutateSampleLabels(l, discoveryLabels, test.honorLabels, nil) + } + sl.reportSampleMutator = func(l labels.Labels) labels.Labels { + return mutateReportSampleLabels(l, discoveryLabels) + } + + now := time.Now() + + slApp := sl.appender(context.Background()) + _, _, _, err := sl.append(slApp, []byte(test.scrapeLabels), "", now) + require.NoError(t, err) + require.NoError(t, slApp.Commit()) + + expected := []floatSample{ + { + metric: test.expLset, + t: timestamp.FromTime(now), + f: test.expValue, + }, + } + + t.Logf("Test:%s", test.title) + requireEqual(t, expected, app.resultFloats) + } +} + +func requireEqual(t *testing.T, expected, actual interface{}, msgAndArgs ...interface{}) { + testutil.RequireEqualWithOptions(t, expected, actual, + []cmp.Option{cmp.Comparer(equalFloatSamples), cmp.AllowUnexported(histogramSample{})}, + msgAndArgs...) +} + +func TestScrapeLoopAppendForConflictingPrefixedLabels(t *testing.T) { + testcases := map[string]struct { + targetLabels []string + exposedLabels string + expected []string + }{ + "One target label collides with existing label": { + targetLabels: []string{"foo", "2"}, + exposedLabels: `metric{foo="1"} 0`, + expected: []string{"__name__", "metric", "exported_foo", "1", "foo", "2"}, + }, + + "One target label collides with existing label, plus target label already with prefix 'exported'": { + targetLabels: []string{"foo", "2", "exported_foo", "3"}, + exposedLabels: `metric{foo="1"} 0`, + expected: []string{"__name__", "metric", "exported_exported_foo", "1", "exported_foo", "3", "foo", "2"}, + }, + "One target label collides with existing label, plus existing label already with prefix 'exported": { + targetLabels: []string{"foo", "3"}, + exposedLabels: `metric{foo="1", exported_foo="2"} 0`, + expected: []string{"__name__", "metric", "exported_exported_foo", "1", "exported_foo", "2", "foo", "3"}, + }, + "One target label collides with existing label, both already with prefix 'exported'": { + targetLabels: []string{"exported_foo", "2"}, + exposedLabels: `metric{exported_foo="1"} 0`, + expected: []string{"__name__", "metric", "exported_exported_foo", "1", "exported_foo", "2"}, + }, + "Two target labels collide with existing labels, both with and without prefix 'exported'": { + targetLabels: []string{"foo", "3", "exported_foo", "4"}, + exposedLabels: `metric{foo="1", exported_foo="2"} 0`, + expected: []string{ + "__name__", "metric", "exported_exported_foo", "1", "exported_exported_exported_foo", + "2", "exported_foo", "4", "foo", "3", + }, + }, + "Extreme example": { + targetLabels: []string{"foo", "0", "exported_exported_foo", "1", "exported_exported_exported_foo", "2"}, + exposedLabels: `metric{foo="3", exported_foo="4", exported_exported_exported_foo="5"} 0`, + expected: []string{ + "__name__", "metric", + "exported_exported_exported_exported_exported_foo", "5", + "exported_exported_exported_exported_foo", "3", + "exported_exported_exported_foo", "2", + "exported_exported_foo", "1", + "exported_foo", "4", + "foo", "0", + }, + }, + } + + for name, tc := range testcases { + t.Run(name, func(t *testing.T) { + app := &collectResultAppender{} + sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return app }, 0) + sl.sampleMutator = func(l labels.Labels) labels.Labels { + return mutateSampleLabels(l, &Target{labels: labels.FromStrings(tc.targetLabels...)}, false, nil) + } + slApp := sl.appender(context.Background()) + _, _, _, err := sl.append(slApp, []byte(tc.exposedLabels), "", time.Date(2000, 1, 1, 1, 0, 0, 0, time.UTC)) + require.NoError(t, err) + + require.NoError(t, slApp.Commit()) + + requireEqual(t, []floatSample{ + { + metric: labels.FromStrings(tc.expected...), + t: timestamp.FromTime(time.Date(2000, 1, 1, 1, 0, 0, 0, time.UTC)), + f: 0, + }, + }, app.resultFloats) + }) + } +} + +func TestScrapeLoopAppendCacheEntryButErrNotFound(t *testing.T) { + // collectResultAppender's AddFast always returns ErrNotFound if we don't give it a next. + app := &collectResultAppender{} + sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return app }, 0) + + fakeRef := storage.SeriesRef(1) + expValue := float64(1) + metric := []byte(`metric{n="1"} 1`) + p, warning := textparse.New(metric, "", false, labels.NewSymbolTable()) + require.NoError(t, warning) + + var lset labels.Labels + p.Next() + p.Metric(&lset) + hash := lset.Hash() + + // Create a fake entry in the cache + sl.cache.addRef(metric, fakeRef, lset, hash) + now := time.Now() + + slApp := sl.appender(context.Background()) + _, _, _, err := sl.append(slApp, metric, "", now) + require.NoError(t, err) + require.NoError(t, slApp.Commit()) + + expected := []floatSample{ + { + metric: lset, + t: timestamp.FromTime(now), + f: expValue, + }, + } + + require.Equal(t, expected, app.resultFloats) +} + +func TestScrapeLoopAppendSampleLimit(t *testing.T) { + resApp := &collectResultAppender{} + app := &limitAppender{Appender: resApp, limit: 1} + + sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return app }, 0) + sl.sampleMutator = func(l labels.Labels) labels.Labels { + if l.Has("deleteme") { + return labels.EmptyLabels() + } + return l + } + sl.sampleLimit = app.limit + + // Get the value of the Counter before performing the append. + beforeMetric := dto.Metric{} + err := sl.metrics.targetScrapeSampleLimit.Write(&beforeMetric) + require.NoError(t, err) + + beforeMetricValue := beforeMetric.GetCounter().GetValue() + + now := time.Now() + slApp := sl.appender(context.Background()) + total, added, seriesAdded, err := sl.append(app, []byte("metric_a 1\nmetric_b 1\nmetric_c 1\n"), "", now) + require.ErrorIs(t, err, errSampleLimit) + require.NoError(t, slApp.Rollback()) + require.Equal(t, 3, total) + require.Equal(t, 3, added) + require.Equal(t, 1, seriesAdded) + + // Check that the Counter has been incremented a single time for the scrape, + // not multiple times for each sample. + metric := dto.Metric{} + err = sl.metrics.targetScrapeSampleLimit.Write(&metric) + require.NoError(t, err) + + value := metric.GetCounter().GetValue() + change := value - beforeMetricValue + require.Equal(t, 1.0, change, "Unexpected change of sample limit metric: %f", change) + + // And verify that we got the samples that fit under the limit. + want := []floatSample{ + { + metric: labels.FromStrings(model.MetricNameLabel, "metric_a"), + t: timestamp.FromTime(now), + f: 1, + }, + } + requireEqual(t, want, resApp.rolledbackFloats, "Appended samples not as expected:\n%s", appender) + + now = time.Now() + slApp = sl.appender(context.Background()) + total, added, seriesAdded, err = sl.append(slApp, []byte("metric_a 1\nmetric_b 1\nmetric_c{deleteme=\"yes\"} 1\nmetric_d 1\nmetric_e 1\nmetric_f 1\nmetric_g 1\nmetric_h{deleteme=\"yes\"} 1\nmetric_i{deleteme=\"yes\"} 1\n"), "", now) + require.ErrorIs(t, err, errSampleLimit) + require.NoError(t, slApp.Rollback()) + require.Equal(t, 9, total) + require.Equal(t, 6, added) + require.Equal(t, 0, seriesAdded) +} + +func TestScrapeLoop_HistogramBucketLimit(t *testing.T) { + resApp := &collectResultAppender{} + app := &bucketLimitAppender{Appender: resApp, limit: 2} + + sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return app }, 0) + sl.enableNativeHistogramIngestion = true + sl.sampleMutator = func(l labels.Labels) labels.Labels { + if l.Has("deleteme") { + return labels.EmptyLabels() + } + return l + } + sl.sampleLimit = app.limit + + metric := dto.Metric{} + err := sl.metrics.targetScrapeNativeHistogramBucketLimit.Write(&metric) + require.NoError(t, err) + beforeMetricValue := metric.GetCounter().GetValue() + + nativeHistogram := prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "testing", + Name: "example_native_histogram", + Help: "This is used for testing", + ConstLabels: map[string]string{"some": "value"}, + NativeHistogramBucketFactor: 1.1, // 10% increase from bucket to bucket + NativeHistogramMaxBucketNumber: 100, // intentionally higher than the limit we'll use in the scraper + }, + []string{"size"}, + ) + registry := prometheus.NewRegistry() + registry.Register(nativeHistogram) + nativeHistogram.WithLabelValues("S").Observe(1.0) + nativeHistogram.WithLabelValues("M").Observe(1.0) + nativeHistogram.WithLabelValues("L").Observe(1.0) + nativeHistogram.WithLabelValues("M").Observe(10.0) + nativeHistogram.WithLabelValues("L").Observe(10.0) // in different bucket since > 1*1.1 + + gathered, err := registry.Gather() + require.NoError(t, err) + require.NotEmpty(t, gathered) + + histogramMetricFamily := gathered[0] + msg, err := MetricFamilyToProtobuf(histogramMetricFamily) + require.NoError(t, err) + + now := time.Now() + total, added, seriesAdded, err := sl.append(app, msg, "application/vnd.google.protobuf", now) + require.NoError(t, err) + require.Equal(t, 3, total) + require.Equal(t, 3, added) + require.Equal(t, 3, seriesAdded) + + err = sl.metrics.targetScrapeNativeHistogramBucketLimit.Write(&metric) + require.NoError(t, err) + metricValue := metric.GetCounter().GetValue() + require.Equal(t, beforeMetricValue, metricValue) + beforeMetricValue = metricValue + + nativeHistogram.WithLabelValues("L").Observe(100.0) // in different bucket since > 10*1.1 + + gathered, err = registry.Gather() + require.NoError(t, err) + require.NotEmpty(t, gathered) + + histogramMetricFamily = gathered[0] + msg, err = MetricFamilyToProtobuf(histogramMetricFamily) + require.NoError(t, err) + + now = time.Now() + total, added, seriesAdded, err = sl.append(app, msg, "application/vnd.google.protobuf", now) + require.NoError(t, err) + require.Equal(t, 3, total) + require.Equal(t, 3, added) + require.Equal(t, 3, seriesAdded) + + err = sl.metrics.targetScrapeNativeHistogramBucketLimit.Write(&metric) + require.NoError(t, err) + metricValue = metric.GetCounter().GetValue() + require.Equal(t, beforeMetricValue, metricValue) + beforeMetricValue = metricValue + + nativeHistogram.WithLabelValues("L").Observe(100000.0) // in different bucket since > 10*1.1 + + gathered, err = registry.Gather() + require.NoError(t, err) + require.NotEmpty(t, gathered) + + histogramMetricFamily = gathered[0] + msg, err = MetricFamilyToProtobuf(histogramMetricFamily) + require.NoError(t, err) + + now = time.Now() + total, added, seriesAdded, err = sl.append(app, msg, "application/vnd.google.protobuf", now) + if !errors.Is(err, errBucketLimit) { + t.Fatalf("Did not see expected histogram bucket limit error: %s", err) + } + require.NoError(t, app.Rollback()) + require.Equal(t, 3, total) + require.Equal(t, 3, added) + require.Equal(t, 0, seriesAdded) + + err = sl.metrics.targetScrapeNativeHistogramBucketLimit.Write(&metric) + require.NoError(t, err) + metricValue = metric.GetCounter().GetValue() + require.Equal(t, beforeMetricValue+1, metricValue) +} + +func TestScrapeLoop_ChangingMetricString(t *testing.T) { + // This is a regression test for the scrape loop cache not properly maintaining + // IDs when the string representation of a metric changes across a scrape. Thus + // we use a real storage appender here. + s := teststorage.New(t) + defer s.Close() + + capp := &collectResultAppender{} + sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return capp }, 0) + + now := time.Now() + slApp := sl.appender(context.Background()) + _, _, _, err := sl.append(slApp, []byte(`metric_a{a="1",b="1"} 1`), "", now) + require.NoError(t, err) + require.NoError(t, slApp.Commit()) + + slApp = sl.appender(context.Background()) + _, _, _, err = sl.append(slApp, []byte(`metric_a{b="1",a="1"} 2`), "", now.Add(time.Minute)) + require.NoError(t, err) + require.NoError(t, slApp.Commit()) + + want := []floatSample{ + { + metric: labels.FromStrings("__name__", "metric_a", "a", "1", "b", "1"), + t: timestamp.FromTime(now), + f: 1, + }, + { + metric: labels.FromStrings("__name__", "metric_a", "a", "1", "b", "1"), + t: timestamp.FromTime(now.Add(time.Minute)), + f: 2, + }, + } + require.Equal(t, want, capp.resultFloats, "Appended samples not as expected:\n%s", appender) +} + +func TestScrapeLoopAppendStaleness(t *testing.T) { + app := &collectResultAppender{} + + sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return app }, 0) + + now := time.Now() + slApp := sl.appender(context.Background()) + _, _, _, err := sl.append(slApp, []byte("metric_a 1\n"), "", now) + require.NoError(t, err) + require.NoError(t, slApp.Commit()) + + slApp = sl.appender(context.Background()) + _, _, _, err = sl.append(slApp, []byte(""), "", now.Add(time.Second)) + require.NoError(t, err) + require.NoError(t, slApp.Commit()) + + want := []floatSample{ + { + metric: labels.FromStrings(model.MetricNameLabel, "metric_a"), + t: timestamp.FromTime(now), + f: 1, + }, + { + metric: labels.FromStrings(model.MetricNameLabel, "metric_a"), + t: timestamp.FromTime(now.Add(time.Second)), + f: math.Float64frombits(value.StaleNaN), + }, + } + requireEqual(t, want, app.resultFloats, "Appended samples not as expected:\n%s", appender) +} + +func TestScrapeLoopAppendNoStalenessIfTimestamp(t *testing.T) { + app := &collectResultAppender{} + sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return app }, 0) + now := time.Now() + slApp := sl.appender(context.Background()) + _, _, _, err := sl.append(slApp, []byte("metric_a 1 1000\n"), "", now) + require.NoError(t, err) + require.NoError(t, slApp.Commit()) + + slApp = sl.appender(context.Background()) + _, _, _, err = sl.append(slApp, []byte(""), "", now.Add(time.Second)) + require.NoError(t, err) + require.NoError(t, slApp.Commit()) + + want := []floatSample{ + { + metric: labels.FromStrings(model.MetricNameLabel, "metric_a"), + t: 1000, + f: 1, + }, + } + require.Equal(t, want, app.resultFloats, "Appended samples not as expected:\n%s", appender) +} + +func TestScrapeLoopAppendStalenessIfTrackTimestampStaleness(t *testing.T) { + app := &collectResultAppender{} + sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return app }, 0) + sl.trackTimestampsStaleness = true + + now := time.Now() + slApp := sl.appender(context.Background()) + _, _, _, err := sl.append(slApp, []byte("metric_a 1 1000\n"), "", now) + require.NoError(t, err) + require.NoError(t, slApp.Commit()) + + slApp = sl.appender(context.Background()) + _, _, _, err = sl.append(slApp, []byte(""), "", now.Add(time.Second)) + require.NoError(t, err) + require.NoError(t, slApp.Commit()) + + want := []floatSample{ + { + metric: labels.FromStrings(model.MetricNameLabel, "metric_a"), + t: 1000, + f: 1, + }, + { + metric: labels.FromStrings(model.MetricNameLabel, "metric_a"), + t: timestamp.FromTime(now.Add(time.Second)), + f: math.Float64frombits(value.StaleNaN), + }, + } + requireEqual(t, want, app.resultFloats, "Appended samples not as expected:\n%s", appender) +} + +func TestScrapeLoopAppendExemplar(t *testing.T) { + tests := []struct { + title string + scrapeClassicHistograms bool + enableNativeHistogramsIngestion bool + scrapeText string + contentType string + discoveryLabels []string + floats []floatSample + histograms []histogramSample + exemplars []exemplar.Exemplar + }{ + { + title: "Metric without exemplars", + scrapeText: "metric_total{n=\"1\"} 0\n# EOF", + contentType: "application/openmetrics-text", + discoveryLabels: []string{"n", "2"}, + floats: []floatSample{{ + metric: labels.FromStrings("__name__", "metric_total", "exported_n", "1", "n", "2"), + f: 0, + }}, + }, + { + title: "Metric with exemplars", + scrapeText: "metric_total{n=\"1\"} 0 # {a=\"abc\"} 1.0\n# EOF", + contentType: "application/openmetrics-text", + discoveryLabels: []string{"n", "2"}, + floats: []floatSample{{ + metric: labels.FromStrings("__name__", "metric_total", "exported_n", "1", "n", "2"), + f: 0, + }}, + exemplars: []exemplar.Exemplar{ + {Labels: labels.FromStrings("a", "abc"), Value: 1}, + }, + }, + { + title: "Metric with exemplars and TS", + scrapeText: "metric_total{n=\"1\"} 0 # {a=\"abc\"} 1.0 10000\n# EOF", + contentType: "application/openmetrics-text", + discoveryLabels: []string{"n", "2"}, + floats: []floatSample{{ + metric: labels.FromStrings("__name__", "metric_total", "exported_n", "1", "n", "2"), + f: 0, + }}, + exemplars: []exemplar.Exemplar{ + {Labels: labels.FromStrings("a", "abc"), Value: 1, Ts: 10000000, HasTs: true}, + }, + }, + { + title: "Two metrics and exemplars", + scrapeText: `metric_total{n="1"} 1 # {t="1"} 1.0 10000 +metric_total{n="2"} 2 # {t="2"} 2.0 20000 +# EOF`, + contentType: "application/openmetrics-text", + floats: []floatSample{{ + metric: labels.FromStrings("__name__", "metric_total", "n", "1"), + f: 1, + }, { + metric: labels.FromStrings("__name__", "metric_total", "n", "2"), + f: 2, + }}, + exemplars: []exemplar.Exemplar{ + {Labels: labels.FromStrings("t", "1"), Value: 1, Ts: 10000000, HasTs: true}, + {Labels: labels.FromStrings("t", "2"), Value: 2, Ts: 20000000, HasTs: true}, + }, + }, + { + title: "Native histogram with three exemplars", + + enableNativeHistogramsIngestion: true, + scrapeText: `name: "test_histogram" +help: "Test histogram with many buckets removed to keep it manageable in size." +type: HISTOGRAM +metric: < + histogram: < + sample_count: 175 + sample_sum: 0.0008280461746287094 + bucket: < + cumulative_count: 2 + upper_bound: -0.0004899999999999998 + > + bucket: < + cumulative_count: 4 + upper_bound: -0.0003899999999999998 + exemplar: < + label: < + name: "dummyID" + value: "59727" + > + value: -0.00039 + timestamp: < + seconds: 1625851155 + nanos: 146848499 + > + > + > + bucket: < + cumulative_count: 16 + upper_bound: -0.0002899999999999998 + exemplar: < + label: < + name: "dummyID" + value: "5617" + > + value: -0.00029 + > + > + bucket: < + cumulative_count: 32 + upper_bound: -0.0001899999999999998 + exemplar: < + label: < + name: "dummyID" + value: "58215" + > + value: -0.00019 + timestamp: < + seconds: 1625851055 + nanos: 146848599 + > + > + > + schema: 3 + zero_threshold: 2.938735877055719e-39 + zero_count: 2 + negative_span: < + offset: -162 + length: 1 + > + negative_span: < + offset: 23 + length: 4 + > + negative_delta: 1 + negative_delta: 3 + negative_delta: -2 + negative_delta: -1 + negative_delta: 1 + positive_span: < + offset: -161 + length: 1 + > + positive_span: < + offset: 8 + length: 3 + > + positive_delta: 1 + positive_delta: 2 + positive_delta: -1 + positive_delta: -1 + > + timestamp_ms: 1234568 +> + +`, + contentType: "application/vnd.google.protobuf", + histograms: []histogramSample{{ + t: 1234568, + h: &histogram.Histogram{ + Count: 175, + ZeroCount: 2, + Sum: 0.0008280461746287094, + ZeroThreshold: 2.938735877055719e-39, + Schema: 3, + PositiveSpans: []histogram.Span{ + {Offset: -161, Length: 1}, + {Offset: 8, Length: 3}, + }, + NegativeSpans: []histogram.Span{ + {Offset: -162, Length: 1}, + {Offset: 23, Length: 4}, + }, + PositiveBuckets: []int64{1, 2, -1, -1}, + NegativeBuckets: []int64{1, 3, -2, -1, 1}, + }, + }}, + exemplars: []exemplar.Exemplar{ + // Native histogram exemplars are arranged by timestamp, and those with missing timestamps are dropped. + {Labels: labels.FromStrings("dummyID", "58215"), Value: -0.00019, Ts: 1625851055146, HasTs: true}, + {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, Ts: 1625851155146, HasTs: true}, + }, + }, + { + title: "Native histogram with three exemplars scraped as classic histogram", + + enableNativeHistogramsIngestion: true, + scrapeText: `name: "test_histogram" +help: "Test histogram with many buckets removed to keep it manageable in size." +type: HISTOGRAM +metric: < + histogram: < + sample_count: 175 + sample_sum: 0.0008280461746287094 + bucket: < + cumulative_count: 2 + upper_bound: -0.0004899999999999998 + > + bucket: < + cumulative_count: 4 + upper_bound: -0.0003899999999999998 + exemplar: < + label: < + name: "dummyID" + value: "59727" + > + value: -0.00039 + timestamp: < + seconds: 1625851155 + nanos: 146848499 + > + > + > + bucket: < + cumulative_count: 16 + upper_bound: -0.0002899999999999998 + exemplar: < + label: < + name: "dummyID" + value: "5617" + > + value: -0.00029 + > + > + bucket: < + cumulative_count: 32 + upper_bound: -0.0001899999999999998 + exemplar: < + label: < + name: "dummyID" + value: "58215" + > + value: -0.00019 + timestamp: < + seconds: 1625851055 + nanos: 146848599 + > + > + > + schema: 3 + zero_threshold: 2.938735877055719e-39 + zero_count: 2 + negative_span: < + offset: -162 + length: 1 + > + negative_span: < + offset: 23 + length: 4 + > + negative_delta: 1 + negative_delta: 3 + negative_delta: -2 + negative_delta: -1 + negative_delta: 1 + positive_span: < + offset: -161 + length: 1 + > + positive_span: < + offset: 8 + length: 3 + > + positive_delta: 1 + positive_delta: 2 + positive_delta: -1 + positive_delta: -1 + > + timestamp_ms: 1234568 +> + +`, + scrapeClassicHistograms: true, + contentType: "application/vnd.google.protobuf", + floats: []floatSample{ + {metric: labels.FromStrings("__name__", "test_histogram_count"), t: 1234568, f: 175}, + {metric: labels.FromStrings("__name__", "test_histogram_sum"), t: 1234568, f: 0.0008280461746287094}, + {metric: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0004899999999999998"), t: 1234568, f: 2}, + {metric: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0003899999999999998"), t: 1234568, f: 4}, + {metric: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0002899999999999998"), t: 1234568, f: 16}, + {metric: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0001899999999999998"), t: 1234568, f: 32}, + {metric: labels.FromStrings("__name__", "test_histogram_bucket", "le", "+Inf"), t: 1234568, f: 175}, + }, + histograms: []histogramSample{{ + t: 1234568, + h: &histogram.Histogram{ + Count: 175, + ZeroCount: 2, + Sum: 0.0008280461746287094, + ZeroThreshold: 2.938735877055719e-39, + Schema: 3, + PositiveSpans: []histogram.Span{ + {Offset: -161, Length: 1}, + {Offset: 8, Length: 3}, + }, + NegativeSpans: []histogram.Span{ + {Offset: -162, Length: 1}, + {Offset: 23, Length: 4}, + }, + PositiveBuckets: []int64{1, 2, -1, -1}, + NegativeBuckets: []int64{1, 3, -2, -1, 1}, + }, + }}, + exemplars: []exemplar.Exemplar{ + // Native histogram one is arranged by timestamp. + // Exemplars with missing timestamps are dropped for native histograms. + {Labels: labels.FromStrings("dummyID", "58215"), Value: -0.00019, Ts: 1625851055146, HasTs: true}, + {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, Ts: 1625851155146, HasTs: true}, + // Classic histogram one is in order of appearance. + // Exemplars with missing timestamps are supported for classic histograms. + {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, Ts: 1625851155146, HasTs: true}, + {Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, Ts: 1234568, HasTs: false}, + {Labels: labels.FromStrings("dummyID", "58215"), Value: -0.00019, Ts: 1625851055146, HasTs: true}, + }, + }, + } + + for _, test := range tests { + t.Run(test.title, func(t *testing.T) { + app := &collectResultAppender{} + + discoveryLabels := &Target{ + labels: labels.FromStrings(test.discoveryLabels...), + } + + sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return app }, 0) + sl.enableNativeHistogramIngestion = test.enableNativeHistogramsIngestion + sl.sampleMutator = func(l labels.Labels) labels.Labels { + return mutateSampleLabels(l, discoveryLabels, false, nil) + } + sl.reportSampleMutator = func(l labels.Labels) labels.Labels { + return mutateReportSampleLabels(l, discoveryLabels) + } + sl.scrapeClassicHistograms = test.scrapeClassicHistograms + + now := time.Now() + + for i := range test.floats { + if test.floats[i].t != 0 { + continue + } + test.floats[i].t = timestamp.FromTime(now) + } + + // We need to set the timestamp for expected exemplars that does not have a timestamp. + for i := range test.exemplars { + if test.exemplars[i].Ts == 0 { + test.exemplars[i].Ts = timestamp.FromTime(now) + } + } + + buf := &bytes.Buffer{} + if test.contentType == "application/vnd.google.protobuf" { + // In case of protobuf, we have to create the binary representation. + pb := &dto.MetricFamily{} + // From text to proto message. + require.NoError(t, proto.UnmarshalText(test.scrapeText, pb)) + // From proto message to binary protobuf. + protoBuf, err := proto.Marshal(pb) + require.NoError(t, err) + + // Write first length, then binary protobuf. + varintBuf := binary.AppendUvarint(nil, uint64(len(protoBuf))) + buf.Write(varintBuf) + buf.Write(protoBuf) + } else { + buf.WriteString(test.scrapeText) + } + + _, _, _, err := sl.append(app, buf.Bytes(), test.contentType, now) + require.NoError(t, err) + require.NoError(t, app.Commit()) + requireEqual(t, test.floats, app.resultFloats) + requireEqual(t, test.histograms, app.resultHistograms) + requireEqual(t, test.exemplars, app.resultExemplars) + }) + } +} + +func TestScrapeLoopAppendExemplarSeries(t *testing.T) { + scrapeText := []string{`metric_total{n="1"} 1 # {t="1"} 1.0 10000 +# EOF`, `metric_total{n="1"} 2 # {t="2"} 2.0 20000 +# EOF`} + samples := []floatSample{{ + metric: labels.FromStrings("__name__", "metric_total", "n", "1"), + f: 1, + }, { + metric: labels.FromStrings("__name__", "metric_total", "n", "1"), + f: 2, + }} + exemplars := []exemplar.Exemplar{ + {Labels: labels.FromStrings("t", "1"), Value: 1, Ts: 10000000, HasTs: true}, + {Labels: labels.FromStrings("t", "2"), Value: 2, Ts: 20000000, HasTs: true}, + } + discoveryLabels := &Target{ + labels: labels.FromStrings(), + } + + app := &collectResultAppender{} + + sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return app }, 0) + sl.sampleMutator = func(l labels.Labels) labels.Labels { + return mutateSampleLabels(l, discoveryLabels, false, nil) + } + sl.reportSampleMutator = func(l labels.Labels) labels.Labels { + return mutateReportSampleLabels(l, discoveryLabels) + } + + now := time.Now() + + for i := range samples { + ts := now.Add(time.Second * time.Duration(i)) + samples[i].t = timestamp.FromTime(ts) + } + + // We need to set the timestamp for expected exemplars that does not have a timestamp. + for i := range exemplars { + if exemplars[i].Ts == 0 { + ts := now.Add(time.Second * time.Duration(i)) + exemplars[i].Ts = timestamp.FromTime(ts) + } + } + + for i, st := range scrapeText { + _, _, _, err := sl.append(app, []byte(st), "application/openmetrics-text", timestamp.Time(samples[i].t)) + require.NoError(t, err) + require.NoError(t, app.Commit()) + } + + requireEqual(t, samples, app.resultFloats) + requireEqual(t, exemplars, app.resultExemplars) +} + +func TestScrapeLoopRunReportsTargetDownOnScrapeError(t *testing.T) { + var ( + scraper = &scraperShim{} + appender = &collectResultAppender{} + app = func(ctx context.Context) storage.Appender { return appender } + ) + + ctx, cancel := context.WithCancel(context.Background()) + sl := newBasicScrapeLoop(t, ctx, scraper, app, 10*time.Millisecond) + + scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error { + cancel() + return errors.New("scrape failed") + } + + sl.run(nil) + require.Equal(t, 0.0, appender.resultFloats[0].f, "bad 'up' value") +} + +func TestScrapeLoopRunReportsTargetDownOnInvalidUTF8(t *testing.T) { + var ( + scraper = &scraperShim{} + appender = &collectResultAppender{} + app = func(ctx context.Context) storage.Appender { return appender } + ) + + ctx, cancel := context.WithCancel(context.Background()) + sl := newBasicScrapeLoop(t, ctx, scraper, app, 10*time.Millisecond) + + scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error { + cancel() + w.Write([]byte("a{l=\"\xff\"} 1\n")) + return nil + } + + sl.run(nil) + require.Equal(t, 0.0, appender.resultFloats[0].f, "bad 'up' value") +} + +type errorAppender struct { + collectResultAppender +} + +func (app *errorAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64, v float64) (storage.SeriesRef, error) { + switch lset.Get(model.MetricNameLabel) { + case "out_of_order": + return 0, storage.ErrOutOfOrderSample + case "amend": + return 0, storage.ErrDuplicateSampleForTimestamp + case "out_of_bounds": + return 0, storage.ErrOutOfBounds + default: + return app.collectResultAppender.Append(ref, lset, t, v) + } +} + +func TestScrapeLoopAppendGracefullyIfAmendOrOutOfOrderOrOutOfBounds(t *testing.T) { + app := &errorAppender{} + sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return app }, 0) + + now := time.Unix(1, 0) + slApp := sl.appender(context.Background()) + total, added, seriesAdded, err := sl.append(slApp, []byte("out_of_order 1\namend 1\nnormal 1\nout_of_bounds 1\n"), "", now) + require.NoError(t, err) + require.NoError(t, slApp.Commit()) + + want := []floatSample{ + { + metric: labels.FromStrings(model.MetricNameLabel, "normal"), + t: timestamp.FromTime(now), + f: 1, + }, + } + requireEqual(t, want, app.resultFloats, "Appended samples not as expected:\n%s", appender) + require.Equal(t, 4, total) + require.Equal(t, 4, added) + require.Equal(t, 1, seriesAdded) +} + +func TestScrapeLoopOutOfBoundsTimeError(t *testing.T) { + app := &collectResultAppender{} + sl := newBasicScrapeLoop(t, context.Background(), nil, + func(ctx context.Context) storage.Appender { + return &timeLimitAppender{ + Appender: app, + maxTime: timestamp.FromTime(time.Now().Add(10 * time.Minute)), + } + }, + 0, + ) + + now := time.Now().Add(20 * time.Minute) + slApp := sl.appender(context.Background()) + total, added, seriesAdded, err := sl.append(slApp, []byte("normal 1\n"), "", now) + require.NoError(t, err) + require.NoError(t, slApp.Commit()) + require.Equal(t, 1, total) + require.Equal(t, 1, added) + require.Equal(t, 0, seriesAdded) +} + +const useGathererHandler = true + +func newHTTPTestServer(handler http.Handler) *httptest.Server { + if useGathererHandler { + server := httptest.NewUnstartedServer(handler) + server.URL = "http://not-started:8080" + SetDefaultGathererHandler(handler) + return server + } + server := httptest.NewServer(handler) + SetDefaultGathererHandler(nil) + return server +} + +func TestTargetScraperScrapeOK(t *testing.T) { + const ( + configTimeout = 1500 * time.Millisecond + expectedTimeout = "1.5" + ) + + var protobufParsing bool + + server := newHTTPTestServer( + http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if protobufParsing { + accept := r.Header.Get("Accept") + require.True(t, strings.HasPrefix(accept, "application/vnd.google.protobuf;"), + "Expected Accept header to prefer application/vnd.google.protobuf.") + } + + timeout := r.Header.Get("X-Prometheus-Scrape-Timeout-Seconds") + require.Equal(t, expectedTimeout, timeout, "Expected scrape timeout header.") + + w.Header().Set("Content-Type", `text/plain; version=0.0.4`) + w.Write([]byte("metric_a 1\nmetric_b 2\n")) + }), + ) + defer server.Close() + defer SetDefaultGathererHandler(nil) + + serverURL, err := url.Parse(server.URL) + if err != nil { + panic(err) + } + + runTest := func(acceptHeader string) { + ts := newScraper(&targetScraper{ + Target: &Target{ + labels: labels.FromStrings( + model.SchemeLabel, serverURL.Scheme, + model.AddressLabel, serverURL.Host, + ), + }, + client: http.DefaultClient, + timeout: configTimeout, + acceptHeader: acceptHeader, + }) + var buf bytes.Buffer + + resp, err := ts.scrape(context.Background()) + require.NoError(t, err) + contentType, err := ts.readResponse(context.Background(), resp, &buf) + require.NoError(t, err) + require.Equal(t, "text/plain; version=0.0.4", contentType) + require.Equal(t, "metric_a 1\nmetric_b 2\n", buf.String()) + } + + runTest(acceptHeader(config.DefaultScrapeProtocols)) + protobufParsing = true + runTest(acceptHeader(config.DefaultProtoFirstScrapeProtocols)) +} + +func TestTargetScrapeScrapeCancel(t *testing.T) { + block := make(chan struct{}) + + server := newHTTPTestServer( + http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + <-block + }), + ) + defer server.Close() + defer SetDefaultGathererHandler(nil) + + serverURL, err := url.Parse(server.URL) + if err != nil { + panic(err) + } + + ts := newScraper(&targetScraper{ + Target: &Target{ + labels: labels.FromStrings( + model.SchemeLabel, serverURL.Scheme, + model.AddressLabel, serverURL.Host, + ), + }, + client: http.DefaultClient, + acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols), + }) + ctx, cancel := context.WithCancel(context.Background()) + + errc := make(chan error, 1) + + go func() { + time.Sleep(1 * time.Second) + cancel() + }() + + go func() { + _, err := ts.scrape(ctx) + switch { + case err == nil: + errc <- errors.New("Expected error but got nil") + case !errors.Is(ctx.Err(), context.Canceled): + errc <- fmt.Errorf("Expected context cancellation error but got: %w", ctx.Err()) + default: + close(errc) + } + }() + + select { + case <-time.After(5 * time.Second): + require.FailNow(t, "Scrape function did not return unexpectedly.") + case err := <-errc: + require.NoError(t, err) + } + // If this is closed in a defer above the function the test server + // doesn't terminate and the test doesn't complete. + close(block) +} + +func TestTargetScrapeScrapeNotFound(t *testing.T) { + server := newHTTPTestServer( + http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + }), + ) + defer server.Close() + defer SetDefaultGathererHandler(nil) + + serverURL, err := url.Parse(server.URL) + if err != nil { + panic(err) + } + + ts := newScraper(&targetScraper{ + Target: &Target{ + labels: labels.FromStrings( + model.SchemeLabel, serverURL.Scheme, + model.AddressLabel, serverURL.Host, + ), + }, + client: http.DefaultClient, + acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols), + }) + + resp, err := ts.scrape(context.Background()) + require.NoError(t, err) + _, err = ts.readResponse(context.Background(), resp, io.Discard) + require.Error(t, err) + require.Contains(t, err.Error(), "404", "Expected \"404 NotFound\" error but got: %s", err) +} + +func TestTargetScraperBodySizeLimit(t *testing.T) { + const ( + bodySizeLimit = 15 + responseBody = "metric_a 1\nmetric_b 2\n" + ) + var gzipResponse bool + server := newHTTPTestServer( + http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", `text/plain; version=0.0.4`) + if gzipResponse { + w.Header().Set("Content-Encoding", "gzip") + gw := gzip.NewWriter(w) + defer gw.Close() + gw.Write([]byte(responseBody)) + return + } + w.Write([]byte(responseBody)) + }), + ) + defer server.Close() + defer SetDefaultGathererHandler(nil) + + serverURL, err := url.Parse(server.URL) + if err != nil { + panic(err) + } + + ts := &targetScraper{ + Target: &Target{ + labels: labels.FromStrings( + model.SchemeLabel, serverURL.Scheme, + model.AddressLabel, serverURL.Host, + ), + }, + client: http.DefaultClient, + bodySizeLimit: bodySizeLimit, + acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols), + metrics: newTestScrapeMetrics(t), + } + s := newScraper(ts) + var buf bytes.Buffer + + // Target response uncompressed body, scrape with body size limit. + resp, err := s.scrape(context.Background()) + require.NoError(t, err) + _, err = s.readResponse(context.Background(), resp, &buf) + require.ErrorIs(t, err, errBodySizeLimit) + require.Equal(t, bodySizeLimit, buf.Len()) + // Target response gzip compressed body, scrape with body size limit. + gzipResponse = true + buf.Reset() + resp, err = s.scrape(context.Background()) + require.NoError(t, err) + _, err = s.readResponse(context.Background(), resp, &buf) + require.ErrorIs(t, err, errBodySizeLimit) + require.Equal(t, bodySizeLimit, buf.Len()) + // Target response uncompressed body, scrape without body size limit. + gzipResponse = false + buf.Reset() + ts.bodySizeLimit = 0 + resp, err = s.scrape(context.Background()) + require.NoError(t, err) + _, err = s.readResponse(context.Background(), resp, &buf) + require.NoError(t, err) + require.Len(t, responseBody, buf.Len()) + // Target response gzip compressed body, scrape without body size limit. + gzipResponse = true + buf.Reset() + resp, err = s.scrape(context.Background()) + require.NoError(t, err) + _, err = s.readResponse(context.Background(), resp, &buf) + require.NoError(t, err) + require.Len(t, responseBody, buf.Len()) +} + +// testScraper implements the scraper interface and allows setting values +// returned by its methods. It also allows setting a custom scrape function. + +func TestScrapeLoop_RespectTimestamps(t *testing.T) { + s := teststorage.New(t) + defer s.Close() + + app := s.Appender(context.Background()) + capp := &collectResultAppender{next: app} + sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return capp }, 0) + + now := time.Now() + slApp := sl.appender(context.Background()) + _, _, _, err := sl.append(slApp, []byte(`metric_a{a="1",b="1"} 1 0`), "", now) + require.NoError(t, err) + require.NoError(t, slApp.Commit()) + + want := []floatSample{ + { + metric: labels.FromStrings("__name__", "metric_a", "a", "1", "b", "1"), + t: 0, + f: 1, + }, + } + require.Equal(t, want, capp.resultFloats, "Appended samples not as expected:\n%s", appender) +} + +func TestScrapeLoop_DiscardTimestamps(t *testing.T) { + s := teststorage.New(t) + defer s.Close() + + app := s.Appender(context.Background()) + + capp := &collectResultAppender{next: app} + + sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return capp }, 0) + sl.honorTimestamps = false + + now := time.Now() + slApp := sl.appender(context.Background()) + _, _, _, err := sl.append(slApp, []byte(`metric_a{a="1",b="1"} 1 0`), "", now) + require.NoError(t, err) + require.NoError(t, slApp.Commit()) + + want := []floatSample{ + { + metric: labels.FromStrings("__name__", "metric_a", "a", "1", "b", "1"), + t: timestamp.FromTime(now), + f: 1, + }, + } + require.Equal(t, want, capp.resultFloats, "Appended samples not as expected:\n%s", appender) +} + +func TestScrapeLoopDiscardDuplicateLabels(t *testing.T) { + s := teststorage.New(t) + defer s.Close() + + ctx, cancel := context.WithCancel(context.Background()) + sl := newBasicScrapeLoop(t, ctx, &scraperShim{}, s.Appender, 0) + defer cancel() + + // We add a good and a bad metric to check that both are discarded. + slApp := sl.appender(ctx) + _, _, _, err := sl.append(slApp, []byte("test_metric{le=\"500\"} 1\ntest_metric{le=\"600\",le=\"700\"} 1\n"), "", time.Time{}) + require.Error(t, err) + require.NoError(t, slApp.Rollback()) + // We need to cycle staleness cache maps after a manual rollback. Otherwise they will have old entries in them, + // which would cause ErrDuplicateSampleForTimestamp errors on the next append. + sl.cache.iterDone(true) + + q, err := s.Querier(time.Time{}.UnixNano(), 0) + require.NoError(t, err) + series := q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", ".*")) + require.False(t, series.Next(), "series found in tsdb") + require.NoError(t, series.Err()) + + // We add a good metric to check that it is recorded. + slApp = sl.appender(ctx) + _, _, _, err = sl.append(slApp, []byte("test_metric{le=\"500\"} 1\n"), "", time.Time{}) + require.NoError(t, err) + require.NoError(t, slApp.Commit()) + + q, err = s.Querier(time.Time{}.UnixNano(), 0) + require.NoError(t, err) + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchEqual, "le", "500")) + require.True(t, series.Next(), "series not found in tsdb") + require.NoError(t, series.Err()) + require.False(t, series.Next(), "more than one series found in tsdb") +} + +func TestScrapeLoopDiscardUnnamedMetrics(t *testing.T) { + s := teststorage.New(t) + defer s.Close() + + app := s.Appender(context.Background()) + + ctx, cancel := context.WithCancel(context.Background()) + sl := newBasicScrapeLoop(t, context.Background(), &scraperShim{}, func(ctx context.Context) storage.Appender { return app }, 0) + sl.sampleMutator = func(l labels.Labels) labels.Labels { + if l.Has("drop") { + return labels.FromStrings("no", "name") // This label set will trigger an error. + } + return l + } + defer cancel() + + slApp := sl.appender(context.Background()) + _, _, _, err := sl.append(slApp, []byte("nok 1\nnok2{drop=\"drop\"} 1\n"), "", time.Time{}) + require.Error(t, err) + require.NoError(t, slApp.Rollback()) + require.Equal(t, errNameLabelMandatory, err) + + q, err := s.Querier(time.Time{}.UnixNano(), 0) + require.NoError(t, err) + series := q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", ".*")) + require.False(t, series.Next(), "series found in tsdb") + require.NoError(t, series.Err()) +} + +func TestReusableConfig(t *testing.T) { + variants := []*config.ScrapeConfig{ + { + JobName: "prometheus", + ScrapeTimeout: model.Duration(15 * time.Second), + }, + { + JobName: "httpd", + ScrapeTimeout: model.Duration(15 * time.Second), + }, + { + JobName: "prometheus", + ScrapeTimeout: model.Duration(5 * time.Second), + }, + { + JobName: "prometheus", + MetricsPath: "/metrics", + }, + { + JobName: "prometheus", + MetricsPath: "/metrics2", + }, + { + JobName: "prometheus", + ScrapeTimeout: model.Duration(5 * time.Second), + MetricsPath: "/metrics2", + }, + { + JobName: "prometheus", + ScrapeInterval: model.Duration(5 * time.Second), + MetricsPath: "/metrics2", + }, + { + JobName: "prometheus", + ScrapeInterval: model.Duration(5 * time.Second), + SampleLimit: 1000, + MetricsPath: "/metrics2", + }, + } + + match := [][]int{ + {0, 2}, + {4, 5}, + {4, 6}, + {4, 7}, + {5, 6}, + {5, 7}, + {6, 7}, + } + noMatch := [][]int{ + {1, 2}, + {0, 4}, + {3, 4}, + } + + for i, m := range match { + require.True(t, reusableCache(variants[m[0]], variants[m[1]]), "match test %d", i) + require.True(t, reusableCache(variants[m[1]], variants[m[0]]), "match test %d", i) + require.True(t, reusableCache(variants[m[1]], variants[m[1]]), "match test %d", i) + require.True(t, reusableCache(variants[m[0]], variants[m[0]]), "match test %d", i) + } + for i, m := range noMatch { + require.False(t, reusableCache(variants[m[0]], variants[m[1]]), "not match test %d", i) + require.False(t, reusableCache(variants[m[1]], variants[m[0]]), "not match test %d", i) + } +} + +func TestReuseScrapeCache(t *testing.T) { + var ( + app = &nopAppendable{} + cfg = &config.ScrapeConfig{ + JobName: "Prometheus", + ScrapeTimeout: model.Duration(5 * time.Second), + ScrapeInterval: model.Duration(5 * time.Second), + MetricsPath: "/metrics", + } + sp, _ = newScrapePool(cfg, app, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + t1 = &Target{ + discoveredLabels: labels.FromStrings("labelNew", "nameNew", "labelNew1", "nameNew1", "labelNew2", "nameNew2"), + } + proxyURL, _ = url.Parse("http://localhost:2128") + ) + defer sp.stop() + sp.sync([]*Target{t1}) + + steps := []struct { + keep bool + newConfig *config.ScrapeConfig + }{ + { + keep: true, + newConfig: &config.ScrapeConfig{ + JobName: "Prometheus", + ScrapeInterval: model.Duration(5 * time.Second), + ScrapeTimeout: model.Duration(5 * time.Second), + MetricsPath: "/metrics", + }, + }, + { + keep: false, + newConfig: &config.ScrapeConfig{ + JobName: "Prometheus", + ScrapeInterval: model.Duration(5 * time.Second), + ScrapeTimeout: model.Duration(15 * time.Second), + MetricsPath: "/metrics2", + }, + }, + { + keep: true, + newConfig: &config.ScrapeConfig{ + JobName: "Prometheus", + SampleLimit: 400, + ScrapeInterval: model.Duration(5 * time.Second), + ScrapeTimeout: model.Duration(15 * time.Second), + MetricsPath: "/metrics2", + }, + }, + { + keep: false, + newConfig: &config.ScrapeConfig{ + JobName: "Prometheus", + HonorTimestamps: true, + SampleLimit: 400, + ScrapeInterval: model.Duration(5 * time.Second), + ScrapeTimeout: model.Duration(15 * time.Second), + MetricsPath: "/metrics2", + }, + }, + { + keep: true, + newConfig: &config.ScrapeConfig{ + JobName: "Prometheus", + HonorTimestamps: true, + SampleLimit: 400, + HTTPClientConfig: config_util.HTTPClientConfig{ + ProxyConfig: config_util.ProxyConfig{ProxyURL: config_util.URL{URL: proxyURL}}, + }, + ScrapeInterval: model.Duration(5 * time.Second), + ScrapeTimeout: model.Duration(15 * time.Second), + MetricsPath: "/metrics2", + }, + }, + { + keep: false, + newConfig: &config.ScrapeConfig{ + JobName: "Prometheus", + HonorTimestamps: true, + HonorLabels: true, + SampleLimit: 400, + ScrapeInterval: model.Duration(5 * time.Second), + ScrapeTimeout: model.Duration(15 * time.Second), + MetricsPath: "/metrics2", + }, + }, + { + keep: false, + newConfig: &config.ScrapeConfig{ + JobName: "Prometheus", + ScrapeInterval: model.Duration(5 * time.Second), + ScrapeTimeout: model.Duration(15 * time.Second), + MetricsPath: "/metrics", + LabelLimit: 1, + }, + }, + { + keep: false, + newConfig: &config.ScrapeConfig{ + JobName: "Prometheus", + ScrapeInterval: model.Duration(5 * time.Second), + ScrapeTimeout: model.Duration(15 * time.Second), + MetricsPath: "/metrics", + LabelLimit: 15, + }, + }, + { + keep: false, + newConfig: &config.ScrapeConfig{ + JobName: "Prometheus", + ScrapeInterval: model.Duration(5 * time.Second), + ScrapeTimeout: model.Duration(15 * time.Second), + MetricsPath: "/metrics", + LabelLimit: 15, + LabelNameLengthLimit: 5, + }, + }, + { + keep: false, + newConfig: &config.ScrapeConfig{ + JobName: "Prometheus", + ScrapeInterval: model.Duration(5 * time.Second), + ScrapeTimeout: model.Duration(15 * time.Second), + MetricsPath: "/metrics", + LabelLimit: 15, + LabelNameLengthLimit: 5, + LabelValueLengthLimit: 7, + }, + }, + } + + cacheAddr := func(sp *scrapePool) map[uint64]string { + r := make(map[uint64]string) + for fp, l := range sp.loops { + r[fp] = fmt.Sprintf("%p", l.getCache()) + } + return r + } + + for i, s := range steps { + initCacheAddr := cacheAddr(sp) + sp.reload(s.newConfig) + for fp, newCacheAddr := range cacheAddr(sp) { + if s.keep { + require.Equal(t, initCacheAddr[fp], newCacheAddr, "step %d: old cache and new cache are not the same", i) + } else { + require.NotEqual(t, initCacheAddr[fp], newCacheAddr, "step %d: old cache and new cache are the same", i) + } + } + initCacheAddr = cacheAddr(sp) + sp.reload(s.newConfig) + for fp, newCacheAddr := range cacheAddr(sp) { + require.Equal(t, initCacheAddr[fp], newCacheAddr, "step %d: reloading the exact config invalidates the cache", i) + } + } +} + +func TestScrapeAddFast(t *testing.T) { + s := teststorage.New(t) + defer s.Close() + + ctx, cancel := context.WithCancel(context.Background()) + sl := newBasicScrapeLoop(t, ctx, &scraperShim{}, s.Appender, 0) + defer cancel() + + slApp := sl.appender(ctx) + _, _, _, err := sl.append(slApp, []byte("up 1\n"), "", time.Time{}) + require.NoError(t, err) + require.NoError(t, slApp.Commit()) + + // Poison the cache. There is just one entry, and one series in the + // storage. Changing the ref will create a 'not found' error. + for _, v := range sl.getCache().series { + v.ref++ + } + + slApp = sl.appender(ctx) + _, _, _, err = sl.append(slApp, []byte("up 1\n"), "", time.Time{}.Add(time.Second)) + require.NoError(t, err) + require.NoError(t, slApp.Commit()) +} + +func TestReuseCacheRace(t *testing.T) { + var ( + app = &nopAppendable{} + cfg = &config.ScrapeConfig{ + JobName: "Prometheus", + ScrapeTimeout: model.Duration(5 * time.Second), + ScrapeInterval: model.Duration(5 * time.Second), + MetricsPath: "/metrics", + } + buffers = pool.New(1e3, 100e6, 3, func(sz int) interface{} { return make([]byte, 0, sz) }) + sp, _ = newScrapePool(cfg, app, 0, nil, buffers, &Options{}, newTestScrapeMetrics(t)) + t1 = &Target{ + discoveredLabels: labels.FromStrings("labelNew", "nameNew"), + } + ) + defer sp.stop() + sp.sync([]*Target{t1}) + + start := time.Now() + for i := uint(1); i > 0; i++ { + if time.Since(start) > 5*time.Second { + break + } + sp.reload(&config.ScrapeConfig{ + JobName: "Prometheus", + ScrapeTimeout: model.Duration(1 * time.Millisecond), + ScrapeInterval: model.Duration(1 * time.Millisecond), + MetricsPath: "/metrics", + SampleLimit: i, + }) + } +} + +func TestCheckAddError(t *testing.T) { + var appErrs appendErrors + sl := scrapeLoop{l: log.NewNopLogger(), metrics: newTestScrapeMetrics(t)} + sl.checkAddError(nil, storage.ErrOutOfOrderSample, nil, nil, &appErrs) + require.Equal(t, 1, appErrs.numOutOfOrder) +} + +func TestScrapeReportSingleAppender(t *testing.T) { + s := teststorage.New(t) + defer s.Close() + + var ( + signal = make(chan struct{}, 1) + scraper = &scraperShim{} + ) + + ctx, cancel := context.WithCancel(context.Background()) + sl := newBasicScrapeLoop(t, ctx, scraper, s.Appender, 10*time.Millisecond) + + numScrapes := 0 + + scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error { + numScrapes++ + if numScrapes%4 == 0 { + return fmt.Errorf("scrape failed") + } + w.Write([]byte("metric_a 44\nmetric_b 44\nmetric_c 44\nmetric_d 44\n")) + return nil + } + + go func() { + sl.run(nil) + signal <- struct{}{} + }() + + start := time.Now() + for time.Since(start) < 3*time.Second { + q, err := s.Querier(time.Time{}.UnixNano(), time.Now().UnixNano()) + require.NoError(t, err) + series := q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", ".+")) + + c := 0 + for series.Next() { + i := series.At().Iterator(nil) + for i.Next() != chunkenc.ValNone { + c++ + } + } + + require.Equal(t, 0, c%9, "Appended samples not as expected: %d", c) + q.Close() + } + cancel() + + select { + case <-signal: + case <-time.After(5 * time.Second): + require.FailNow(t, "Scrape wasn't stopped.") + } +} + +func TestScrapeReportLimit(t *testing.T) { + s := teststorage.New(t) + defer s.Close() + + cfg := &config.ScrapeConfig{ + JobName: "test", + SampleLimit: 5, + Scheme: "http", + ScrapeInterval: model.Duration(100 * time.Millisecond), + ScrapeTimeout: model.Duration(100 * time.Millisecond), + } + + var ( + scrapes int + scrapedTwice = make(chan bool) + ) + + ts := newHTTPTestServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fmt.Fprint(w, "metric_a 44\nmetric_b 44\nmetric_c 44\nmetric_d 44\n") + scrapes++ + if scrapes == 2 { + close(scrapedTwice) + } + })) + defer ts.Close() + defer SetDefaultGathererHandler(nil) + + sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + require.NoError(t, err) + defer sp.stop() + + testURL, err := url.Parse(ts.URL) + require.NoError(t, err) + sp.Sync([]*targetgroup.Group{ + { + Targets: []model.LabelSet{{model.AddressLabel: model.LabelValue(testURL.Host)}}, + }, + }) + + select { + case <-time.After(5 * time.Second): + t.Fatalf("target was not scraped twice") + case <-scrapedTwice: + // If the target has been scraped twice, report samples from the first + // scrape have been inserted in the database. + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + q, err := s.Querier(time.Time{}.UnixNano(), time.Now().UnixNano()) + require.NoError(t, err) + defer q.Close() + series := q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", "up")) + + var found bool + for series.Next() { + i := series.At().Iterator(nil) + for i.Next() == chunkenc.ValFloat { + _, v := i.At() + require.Equal(t, 1.0, v) + found = true + } + } + + require.True(t, found) +} + +func TestScrapeLoopLabelLimit(t *testing.T) { + tests := []struct { + title string + scrapeLabels string + discoveryLabels []string + labelLimits labelLimits + expectErr bool + }{ + { + title: "Valid number of labels", + scrapeLabels: `metric{l1="1", l2="2"} 0`, + discoveryLabels: nil, + labelLimits: labelLimits{labelLimit: 5}, + expectErr: false, + }, { + title: "Too many labels", + scrapeLabels: `metric{l1="1", l2="2", l3="3", l4="4", l5="5", l6="6"} 0`, + discoveryLabels: nil, + labelLimits: labelLimits{labelLimit: 5}, + expectErr: true, + }, { + title: "Too many labels including discovery labels", + scrapeLabels: `metric{l1="1", l2="2", l3="3", l4="4"} 0`, + discoveryLabels: []string{"l5", "5", "l6", "6"}, + labelLimits: labelLimits{labelLimit: 5}, + expectErr: true, + }, { + title: "Valid labels name length", + scrapeLabels: `metric{l1="1", l2="2"} 0`, + discoveryLabels: nil, + labelLimits: labelLimits{labelNameLengthLimit: 10}, + expectErr: false, + }, { + title: "Label name too long", + scrapeLabels: `metric{label_name_too_long="0"} 0`, + discoveryLabels: nil, + labelLimits: labelLimits{labelNameLengthLimit: 10}, + expectErr: true, + }, { + title: "Discovery label name too long", + scrapeLabels: `metric{l1="1", l2="2"} 0`, + discoveryLabels: []string{"label_name_too_long", "0"}, + labelLimits: labelLimits{labelNameLengthLimit: 10}, + expectErr: true, + }, { + title: "Valid labels value length", + scrapeLabels: `metric{l1="1", l2="2"} 0`, + discoveryLabels: nil, + labelLimits: labelLimits{labelValueLengthLimit: 10}, + expectErr: false, + }, { + title: "Label value too long", + scrapeLabels: `metric{l1="label_value_too_long"} 0`, + discoveryLabels: nil, + labelLimits: labelLimits{labelValueLengthLimit: 10}, + expectErr: true, + }, { + title: "Discovery label value too long", + scrapeLabels: `metric{l1="1", l2="2"} 0`, + discoveryLabels: []string{"l1", "label_value_too_long"}, + labelLimits: labelLimits{labelValueLengthLimit: 10}, + expectErr: true, + }, + } + + for _, test := range tests { + app := &collectResultAppender{} + + discoveryLabels := &Target{ + labels: labels.FromStrings(test.discoveryLabels...), + } + + sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return app }, 0) + sl.sampleMutator = func(l labels.Labels) labels.Labels { + return mutateSampleLabels(l, discoveryLabels, false, nil) + } + sl.reportSampleMutator = func(l labels.Labels) labels.Labels { + return mutateReportSampleLabels(l, discoveryLabels) + } + sl.labelLimits = &test.labelLimits + + slApp := sl.appender(context.Background()) + _, _, _, err := sl.append(slApp, []byte(test.scrapeLabels), "", time.Now()) + + t.Logf("Test:%s", test.title) + if test.expectErr { + require.Error(t, err) + } else { + require.NoError(t, err) + require.NoError(t, slApp.Commit()) + } + } +} + +func TestTargetScrapeIntervalAndTimeoutRelabel(t *testing.T) { + interval, _ := model.ParseDuration("2s") + timeout, _ := model.ParseDuration("500ms") + config := &config.ScrapeConfig{ + ScrapeInterval: interval, + ScrapeTimeout: timeout, + RelabelConfigs: []*relabel.Config{ + { + SourceLabels: model.LabelNames{model.ScrapeIntervalLabel}, + Regex: relabel.MustNewRegexp("2s"), + Replacement: "3s", + TargetLabel: model.ScrapeIntervalLabel, + Action: relabel.Replace, + }, + { + SourceLabels: model.LabelNames{model.ScrapeTimeoutLabel}, + Regex: relabel.MustNewRegexp("500ms"), + Replacement: "750ms", + TargetLabel: model.ScrapeTimeoutLabel, + Action: relabel.Replace, + }, + }, + } + sp, _ := newScrapePool(config, &nopAppendable{}, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + tgts := []*targetgroup.Group{ + { + Targets: []model.LabelSet{{model.AddressLabel: "127.0.0.1:9090"}}, + }, + } + + sp.Sync(tgts) + defer sp.stop() + + require.Equal(t, "3s", sp.ActiveTargets()[0].labels.Get(model.ScrapeIntervalLabel)) + require.Equal(t, "750ms", sp.ActiveTargets()[0].labels.Get(model.ScrapeTimeoutLabel)) +} + +// Testing whether we can remove trailing .0 from histogram 'le' and summary 'quantile' labels. +func TestLeQuantileReLabel(t *testing.T) { + simpleStorage := teststorage.New(t) + defer simpleStorage.Close() + + config := &config.ScrapeConfig{ + JobName: "test", + MetricRelabelConfigs: []*relabel.Config{ + { + SourceLabels: model.LabelNames{"le", "__name__"}, + Regex: relabel.MustNewRegexp("(\\d+)\\.0+;.*_bucket"), + Replacement: relabel.DefaultRelabelConfig.Replacement, + Separator: relabel.DefaultRelabelConfig.Separator, + TargetLabel: "le", + Action: relabel.Replace, + }, + { + SourceLabels: model.LabelNames{"quantile"}, + Regex: relabel.MustNewRegexp("(\\d+)\\.0+"), + Replacement: relabel.DefaultRelabelConfig.Replacement, + Separator: relabel.DefaultRelabelConfig.Separator, + TargetLabel: "quantile", + Action: relabel.Replace, + }, + }, + SampleLimit: 100, + Scheme: "http", + ScrapeInterval: model.Duration(100 * time.Millisecond), + ScrapeTimeout: model.Duration(100 * time.Millisecond), + } + + metricsText := ` +# HELP test_histogram This is a histogram with default buckets +# TYPE test_histogram histogram +test_histogram_bucket{address="0.0.0.0",port="5001",le="0.005"} 0 +test_histogram_bucket{address="0.0.0.0",port="5001",le="0.01"} 0 +test_histogram_bucket{address="0.0.0.0",port="5001",le="0.025"} 0 +test_histogram_bucket{address="0.0.0.0",port="5001",le="0.05"} 0 +test_histogram_bucket{address="0.0.0.0",port="5001",le="0.1"} 0 +test_histogram_bucket{address="0.0.0.0",port="5001",le="0.25"} 0 +test_histogram_bucket{address="0.0.0.0",port="5001",le="0.5"} 0 +test_histogram_bucket{address="0.0.0.0",port="5001",le="1.0"} 0 +test_histogram_bucket{address="0.0.0.0",port="5001",le="2.5"} 0 +test_histogram_bucket{address="0.0.0.0",port="5001",le="5.0"} 0 +test_histogram_bucket{address="0.0.0.0",port="5001",le="10.0"} 0 +test_histogram_bucket{address="0.0.0.0",port="5001",le="+Inf"} 0 +test_histogram_sum{address="0.0.0.0",port="5001"} 0 +test_histogram_count{address="0.0.0.0",port="5001"} 0 +# HELP test_summary Number of inflight requests sampled at a regular interval. Quantile buckets keep track of inflight requests over the last 60s. +# TYPE test_summary summary +test_summary{quantile="0.5"} 0 +test_summary{quantile="0.9"} 0 +test_summary{quantile="0.95"} 0 +test_summary{quantile="0.99"} 0 +test_summary{quantile="1.0"} 1 +test_summary_sum 1 +test_summary_count 199 +` + + // The expected "le" values do not have the trailing ".0". + expectedLeValues := []string{"0.005", "0.01", "0.025", "0.05", "0.1", "0.25", "0.5", "1", "2.5", "5", "10", "+Inf"} + + // The expected "quantile" values do not have the trailing ".0". + expectedQuantileValues := []string{"0.5", "0.9", "0.95", "0.99", "1"} + + scrapeCount := 0 + scraped := make(chan bool) + + ts := newHTTPTestServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fmt.Fprint(w, metricsText) + scrapeCount++ + if scrapeCount > 2 { + close(scraped) + } + })) + defer ts.Close() + defer SetDefaultGathererHandler(nil) + + sp, err := newScrapePool(config, simpleStorage, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + require.NoError(t, err) + defer sp.stop() + + testURL, err := url.Parse(ts.URL) + require.NoError(t, err) + sp.Sync([]*targetgroup.Group{ + { + Targets: []model.LabelSet{{model.AddressLabel: model.LabelValue(testURL.Host)}}, + }, + }) + require.Len(t, sp.ActiveTargets(), 1) + + select { + case <-time.After(5 * time.Second): + t.Fatalf("target was not scraped") + case <-scraped: + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + q, err := simpleStorage.Querier(time.Time{}.UnixNano(), time.Now().UnixNano()) + require.NoError(t, err) + defer q.Close() + + checkValues := func(labelName string, expectedValues []string, series storage.SeriesSet) { + foundLeValues := map[string]bool{} + + for series.Next() { + s := series.At() + v := s.Labels().Get(labelName) + require.NotContains(t, foundLeValues, v, "duplicate label value found") + foundLeValues[v] = true + } + + require.Equal(t, len(expectedValues), len(foundLeValues), "number of label values not as expected") + for _, v := range expectedValues { + require.Contains(t, foundLeValues, v, "label value not found") + } + } + + series := q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", "test_histogram_bucket")) + checkValues("le", expectedLeValues, series) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", "test_summary")) + checkValues("quantile", expectedQuantileValues, series) +} + +func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrapeForTimestampedMetrics(t *testing.T) { + appender := &collectResultAppender{} + var ( + signal = make(chan struct{}, 1) + scraper = &scraperShim{} + app = func(ctx context.Context) storage.Appender { return appender } + ) + + ctx, cancel := context.WithCancel(context.Background()) + sl := newBasicScrapeLoop(t, ctx, scraper, app, 10*time.Millisecond) + sl.trackTimestampsStaleness = true + // Succeed once, several failures, then stop. + numScrapes := 0 + + scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error { + numScrapes++ + + switch numScrapes { + case 1: + w.Write([]byte(fmt.Sprintf("metric_a 42 %d\n", time.Now().UnixNano()/int64(time.Millisecond)))) + return nil + case 5: + cancel() + } + return errors.New("scrape failed") + } + + go func() { + sl.run(nil) + signal <- struct{}{} + }() + + select { + case <-signal: + case <-time.After(5 * time.Second): + t.Fatalf("Scrape wasn't stopped.") + } + + // 1 successfully scraped sample, 1 stale marker after first fail, 5 report samples for + // each scrape successful or not. + require.Len(t, appender.resultFloats, 27, "Appended samples not as expected:\n%s", appender) + require.Equal(t, 42.0, appender.resultFloats[0].f, "Appended first sample not as expected") + require.True(t, value.IsStaleNaN(appender.resultFloats[6].f), + "Appended second sample not as expected. Wanted: stale NaN Got: %x", math.Float64bits(appender.resultFloats[6].f)) +} + +func TestScrapeLoopCompression(t *testing.T) { + simpleStorage := teststorage.New(t) + defer simpleStorage.Close() + + metricsText := makeTestMetrics(10) + + for _, tc := range []struct { + enableCompression bool + acceptEncoding string + }{ + { + enableCompression: true, + acceptEncoding: "gzip", + }, + { + enableCompression: false, + acceptEncoding: "identity", + }, + } { + t.Run(fmt.Sprintf("compression=%v,acceptEncoding=%s", tc.enableCompression, tc.acceptEncoding), func(t *testing.T) { + scraped := make(chan bool) + + ts := newHTTPTestServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + require.Equal(t, tc.acceptEncoding, r.Header.Get("Accept-Encoding"), "invalid value of the Accept-Encoding header") + fmt.Fprint(w, metricsText) + close(scraped) + })) + defer ts.Close() + + config := &config.ScrapeConfig{ + JobName: "test", + SampleLimit: 100, + Scheme: "http", + ScrapeInterval: model.Duration(100 * time.Millisecond), + ScrapeTimeout: model.Duration(100 * time.Millisecond), + EnableCompression: tc.enableCompression, + } + + sp, err := newScrapePool(config, simpleStorage, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + require.NoError(t, err) + defer sp.stop() + + testURL, err := url.Parse(ts.URL) + require.NoError(t, err) + sp.Sync([]*targetgroup.Group{ + { + Targets: []model.LabelSet{{model.AddressLabel: model.LabelValue(testURL.Host)}}, + }, + }) + require.Len(t, sp.ActiveTargets(), 1) + + select { + case <-time.After(5 * time.Second): + t.Fatalf("target was not scraped") + case <-scraped: + } + }) + } +} + +func TestPickSchema(t *testing.T) { + tcs := []struct { + factor float64 + schema int32 + }{ + { + factor: 65536, + schema: -4, + }, + { + factor: 256, + schema: -3, + }, + { + factor: 16, + schema: -2, + }, + { + factor: 4, + schema: -1, + }, + { + factor: 2, + schema: 0, + }, + { + factor: 1.4, + schema: 1, + }, + { + factor: 1.1, + schema: 2, + }, + { + factor: 1.09, + schema: 3, + }, + { + factor: 1.04, + schema: 4, + }, + { + factor: 1.02, + schema: 5, + }, + { + factor: 1.01, + schema: 6, + }, + { + factor: 1.005, + schema: 7, + }, + { + factor: 1.002, + schema: 8, + }, + // The default value of native_histogram_min_bucket_factor + { + factor: 0, + schema: 8, + }, + } + + for _, tc := range tcs { + schema := pickSchema(tc.factor) + require.Equal(t, tc.schema, schema) + } +} + +func BenchmarkTargetScraperGzip(b *testing.B) { + scenarios := []struct { + metricsCount int + body []byte + }{ + {metricsCount: 1}, + {metricsCount: 100}, + {metricsCount: 1000}, + {metricsCount: 10000}, + {metricsCount: 100000}, + } + + for i := 0; i < len(scenarios); i++ { + var buf bytes.Buffer + var name string + gw := gzip.NewWriter(&buf) + for j := 0; j < scenarios[i].metricsCount; j++ { + name = fmt.Sprintf("go_memstats_alloc_bytes_total_%d", j) + fmt.Fprintf(gw, "# HELP %s Total number of bytes allocated, even if freed.\n", name) + fmt.Fprintf(gw, "# TYPE %s counter\n", name) + fmt.Fprintf(gw, "%s %d\n", name, i*j) + } + gw.Close() + scenarios[i].body = buf.Bytes() + } + + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", `text/plain; version=0.0.4`) + w.Header().Set("Content-Encoding", "gzip") + for _, scenario := range scenarios { + if strconv.Itoa(scenario.metricsCount) == r.URL.Query()["count"][0] { + w.Write(scenario.body) + return + } + } + w.WriteHeader(http.StatusBadRequest) + }) + + server := httptest.NewServer(handler) + defer server.Close() + + serverURL, err := url.Parse(server.URL) + if err != nil { + panic(err) + } + + client, err := config_util.NewClientFromConfig(config_util.DefaultHTTPClientConfig, "test_job") + if err != nil { + panic(err) + } + + for _, scenario := range scenarios { + b.Run(fmt.Sprintf("metrics=%d", scenario.metricsCount), func(b *testing.B) { + ts := newScraper(&targetScraper{ + Target: &Target{ + labels: labels.FromStrings( + model.SchemeLabel, serverURL.Scheme, + model.AddressLabel, serverURL.Host, + ), + params: url.Values{"count": []string{strconv.Itoa(scenario.metricsCount)}}, + }, + client: client, + timeout: time.Second, + }) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err = ts.scrape(context.Background()) + require.NoError(b, err) + } + }) + } +} + +// When a scrape contains multiple instances for the same time series we should increment +// prometheus_target_scrapes_sample_duplicate_timestamp_total metric. +func TestScrapeLoopSeriesAddedDuplicates(t *testing.T) { + ctx, sl := simpleTestScrapeLoop(t) + + slApp := sl.appender(ctx) + total, added, seriesAdded, err := sl.append(slApp, []byte("test_metric 1\ntest_metric 2\ntest_metric 3\n"), "", time.Time{}) + require.NoError(t, err) + require.NoError(t, slApp.Commit()) + require.Equal(t, 3, total) + require.Equal(t, 3, added) + require.Equal(t, 1, seriesAdded) + require.Equal(t, 2.0, prom_testutil.ToFloat64(sl.metrics.targetScrapeSampleDuplicate)) + + slApp = sl.appender(ctx) + total, added, seriesAdded, err = sl.append(slApp, []byte("test_metric 1\ntest_metric 1\ntest_metric 1\n"), "", time.Time{}) + require.NoError(t, err) + require.NoError(t, slApp.Commit()) + require.Equal(t, 3, total) + require.Equal(t, 3, added) + require.Equal(t, 0, seriesAdded) + require.Equal(t, 4.0, prom_testutil.ToFloat64(sl.metrics.targetScrapeSampleDuplicate)) + + // When different timestamps are supplied, multiple samples are accepted. + slApp = sl.appender(ctx) + total, added, seriesAdded, err = sl.append(slApp, []byte("test_metric 1 1001\ntest_metric 1 1002\ntest_metric 1 1003\n"), "", time.Time{}) + require.NoError(t, err) + require.NoError(t, slApp.Commit()) + require.Equal(t, 3, total) + require.Equal(t, 3, added) + require.Equal(t, 0, seriesAdded) + // Metric is not higher than last time. + require.Equal(t, 4.0, prom_testutil.ToFloat64(sl.metrics.targetScrapeSampleDuplicate)) +} + +// This tests running a full scrape loop and checking that the scrape option +// `native_histogram_min_bucket_factor` is used correctly. +func TestNativeHistogramMaxSchemaSet(t *testing.T) { + testcases := map[string]struct { + minBucketFactor string + expectedSchema int32 + }{ + "min factor not specified": { + minBucketFactor: "", + expectedSchema: 3, // Factor 1.09. + }, + "min factor 1": { + minBucketFactor: "native_histogram_min_bucket_factor: 1", + expectedSchema: 3, // Factor 1.09. + }, + "min factor 2": { + minBucketFactor: "native_histogram_min_bucket_factor: 2", + expectedSchema: 0, // Factor 2.00. + }, + } + for name, tc := range testcases { + t.Run(name, func(t *testing.T) { + testNativeHistogramMaxSchemaSet(t, tc.minBucketFactor, tc.expectedSchema) + }) + } +} + +func testNativeHistogramMaxSchemaSet(t *testing.T, minBucketFactor string, expectedSchema int32) { + // Create a ProtoBuf message to serve as a Prometheus metric. + nativeHistogram := prometheus.NewHistogram( + prometheus.HistogramOpts{ + Namespace: "testing", + Name: "example_native_histogram", + Help: "This is used for testing", + NativeHistogramBucketFactor: 1.1, + NativeHistogramMaxBucketNumber: 100, + }, + ) + registry := prometheus.NewRegistry() + registry.Register(nativeHistogram) + nativeHistogram.Observe(1.0) + nativeHistogram.Observe(1.0) + nativeHistogram.Observe(1.0) + nativeHistogram.Observe(10.0) // in different bucket since > 1*1.1. + nativeHistogram.Observe(10.0) + + gathered, err := registry.Gather() + require.NoError(t, err) + require.NotEmpty(t, gathered) + + histogramMetricFamily := gathered[0] + buffer := protoMarshalDelimited(t, histogramMetricFamily) + + // Create a HTTP server to serve /metrics via ProtoBuf + metricsServer := newHTTPTestServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", `application/vnd.google.protobuf; proto=io.prometheus.client.MetricFamily; encoding=delimited`) + w.Write(buffer) + })) + defer metricsServer.Close() + + // Create a scrape loop with the HTTP server as the target. + configStr := fmt.Sprintf(` +global: + scrape_interval: 1s + scrape_timeout: 1s +scrape_configs: + - job_name: test + %s + static_configs: + - targets: [%s] +`, minBucketFactor, strings.ReplaceAll(metricsServer.URL, "http://", "")) + + s := teststorage.New(t) + defer s.Close() + s.DB.EnableNativeHistograms() + reg := prometheus.NewRegistry() + + mng, err := NewManager(&Options{EnableNativeHistogramsIngestion: true}, nil, s, reg) + require.NoError(t, err) + cfg, err := config.Load(configStr, false, log.NewNopLogger()) + require.NoError(t, err) + mng.ApplyConfig(cfg) + tsets := make(chan map[string][]*targetgroup.Group) + go func() { + err = mng.Run(tsets) + require.NoError(t, err) + }() + defer mng.Stop() + + // Get the static targets and apply them to the scrape manager. + require.Len(t, cfg.ScrapeConfigs, 1) + scrapeCfg := cfg.ScrapeConfigs[0] + require.Len(t, scrapeCfg.ServiceDiscoveryConfigs, 1) + staticDiscovery, ok := scrapeCfg.ServiceDiscoveryConfigs[0].(discovery.StaticConfig) + require.True(t, ok) + require.Len(t, staticDiscovery, 1) + tsets <- map[string][]*targetgroup.Group{"test": staticDiscovery} + + // Wait for the scrape loop to scrape the target. + require.Eventually(t, func() bool { + q, err := s.Querier(0, math.MaxInt64) + require.NoError(t, err) + seriesS := q.Select(context.Background(), false, nil, labels.MustNewMatcher(labels.MatchEqual, "__name__", "testing_example_native_histogram")) + countSeries := 0 + for seriesS.Next() { + countSeries++ + } + return countSeries > 0 + }, 15*time.Second, 100*time.Millisecond) + + // Check that native histogram schema is as expected. + q, err := s.Querier(0, math.MaxInt64) + require.NoError(t, err) + seriesS := q.Select(context.Background(), false, nil, labels.MustNewMatcher(labels.MatchEqual, "__name__", "testing_example_native_histogram")) + histogramSamples := []*histogram.Histogram{} + for seriesS.Next() { + series := seriesS.At() + it := series.Iterator(nil) + for vt := it.Next(); vt != chunkenc.ValNone; vt = it.Next() { + if vt != chunkenc.ValHistogram { + // don't care about other samples + continue + } + _, h := it.AtHistogram(nil) + histogramSamples = append(histogramSamples, h) + } + } + require.NoError(t, seriesS.Err()) + require.NotEmpty(t, histogramSamples) + for _, h := range histogramSamples { + require.Equal(t, expectedSchema, h.Schema) + } +} diff --git a/pkg/promotel/prometheusreceiver/scrape/target.go b/pkg/promotel/prometheusreceiver/scrape/target.go new file mode 100644 index 000000000..a2622a1a6 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/scrape/target.go @@ -0,0 +1,585 @@ +// Copyright 2013 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package scrape + +import ( + "errors" + "fmt" + "hash/fnv" + "net" + "net/url" + "strings" + "sync" + "time" + + "github.com/prometheus/common/model" + + "github.com/prometheus/prometheus/config" + "github.com/prometheus/prometheus/discovery/targetgroup" + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/relabel" + "github.com/prometheus/prometheus/model/value" + "github.com/prometheus/prometheus/storage" +) + +// TargetHealth describes the health state of a target. +type TargetHealth string + +// The possible health states of a target based on the last performed scrape. +const ( + HealthUnknown TargetHealth = "unknown" + HealthGood TargetHealth = "up" + HealthBad TargetHealth = "down" +) + +// Target refers to a singular HTTP or HTTPS endpoint. +type Target struct { + // Labels before any processing. + discoveredLabels labels.Labels + // Any labels that are added to this target and its metrics. + labels labels.Labels + // Additional URL parameters that are part of the target URL. + params url.Values + + mtx sync.RWMutex + lastError error + lastScrape time.Time + lastScrapeDuration time.Duration + health TargetHealth + metadata MetricMetadataStore +} + +// NewTarget creates a reasonably configured target for querying. +func NewTarget(labels, discoveredLabels labels.Labels, params url.Values) *Target { + return &Target{ + labels: labels, + discoveredLabels: discoveredLabels, + params: params, + health: HealthUnknown, + } +} + +func (t *Target) String() string { + return t.URL().String() +} + +// MetricMetadataStore represents a storage for metadata. +type MetricMetadataStore interface { + ListMetadata() []MetricMetadata + GetMetadata(metric string) (MetricMetadata, bool) + SizeMetadata() int + LengthMetadata() int +} + +// MetricMetadata is a piece of metadata for a metric. +type MetricMetadata struct { + Metric string + Type model.MetricType + Help string + Unit string +} + +func (t *Target) ListMetadata() []MetricMetadata { + t.mtx.RLock() + defer t.mtx.RUnlock() + + if t.metadata == nil { + return nil + } + return t.metadata.ListMetadata() +} + +func (t *Target) SizeMetadata() int { + t.mtx.RLock() + defer t.mtx.RUnlock() + + if t.metadata == nil { + return 0 + } + + return t.metadata.SizeMetadata() +} + +func (t *Target) LengthMetadata() int { + t.mtx.RLock() + defer t.mtx.RUnlock() + + if t.metadata == nil { + return 0 + } + + return t.metadata.LengthMetadata() +} + +// GetMetadata returns type and help metadata for the given metric. +func (t *Target) GetMetadata(metric string) (MetricMetadata, bool) { + t.mtx.RLock() + defer t.mtx.RUnlock() + + if t.metadata == nil { + return MetricMetadata{}, false + } + return t.metadata.GetMetadata(metric) +} + +func (t *Target) SetMetadataStore(s MetricMetadataStore) { + t.mtx.Lock() + defer t.mtx.Unlock() + t.metadata = s +} + +// hash returns an identifying hash for the target. +func (t *Target) hash() uint64 { + h := fnv.New64a() + + h.Write([]byte(fmt.Sprintf("%016d", t.labels.Hash()))) + h.Write([]byte(t.URL().String())) + + return h.Sum64() +} + +// offset returns the time until the next scrape cycle for the target. +// It includes the global server offsetSeed for scrapes from multiple Prometheus to try to be at different times. +func (t *Target) offset(interval time.Duration, offsetSeed uint64) time.Duration { + now := time.Now().UnixNano() + + // Base is a pinned to absolute time, no matter how often offset is called. + var ( + base = int64(interval) - now%int64(interval) + offset = (t.hash() ^ offsetSeed) % uint64(interval) + next = base + int64(offset) + ) + + if next > int64(interval) { + next -= int64(interval) + } + return time.Duration(next) +} + +// Labels returns a copy of the set of all public labels of the target. +func (t *Target) Labels(b *labels.ScratchBuilder) labels.Labels { + b.Reset() + t.labels.Range(func(l labels.Label) { + if !strings.HasPrefix(l.Name, model.ReservedLabelPrefix) { + b.Add(l.Name, l.Value) + } + }) + return b.Labels() +} + +// LabelsRange calls f on each public label of the target. +func (t *Target) LabelsRange(f func(l labels.Label)) { + t.labels.Range(func(l labels.Label) { + if !strings.HasPrefix(l.Name, model.ReservedLabelPrefix) { + f(l) + } + }) +} + +// DiscoveredLabels returns a copy of the target's labels before any processing. +func (t *Target) DiscoveredLabels() labels.Labels { + t.mtx.Lock() + defer t.mtx.Unlock() + return t.discoveredLabels.Copy() +} + +// SetDiscoveredLabels sets new DiscoveredLabels. +func (t *Target) SetDiscoveredLabels(l labels.Labels) { + t.mtx.Lock() + defer t.mtx.Unlock() + t.discoveredLabels = l +} + +// URL returns a copy of the target's URL. +func (t *Target) URL() *url.URL { + params := url.Values{} + + for k, v := range t.params { + params[k] = make([]string, len(v)) + copy(params[k], v) + } + t.labels.Range(func(l labels.Label) { + if !strings.HasPrefix(l.Name, model.ParamLabelPrefix) { + return + } + ks := l.Name[len(model.ParamLabelPrefix):] + + if len(params[ks]) > 0 { + params[ks][0] = l.Value + } else { + params[ks] = []string{l.Value} + } + }) + + return &url.URL{ + Scheme: t.labels.Get(model.SchemeLabel), + Host: t.labels.Get(model.AddressLabel), + Path: t.labels.Get(model.MetricsPathLabel), + RawQuery: params.Encode(), + } +} + +// Report sets target data about the last scrape. +func (t *Target) Report(start time.Time, dur time.Duration, err error) { + t.mtx.Lock() + defer t.mtx.Unlock() + + if err == nil { + t.health = HealthGood + } else { + t.health = HealthBad + } + + t.lastError = err + t.lastScrape = start + t.lastScrapeDuration = dur +} + +// LastError returns the error encountered during the last scrape. +func (t *Target) LastError() error { + t.mtx.RLock() + defer t.mtx.RUnlock() + + return t.lastError +} + +// LastScrape returns the time of the last scrape. +func (t *Target) LastScrape() time.Time { + t.mtx.RLock() + defer t.mtx.RUnlock() + + return t.lastScrape +} + +// LastScrapeDuration returns how long the last scrape of the target took. +func (t *Target) LastScrapeDuration() time.Duration { + t.mtx.RLock() + defer t.mtx.RUnlock() + + return t.lastScrapeDuration +} + +// Health returns the last known health state of the target. +func (t *Target) Health() TargetHealth { + t.mtx.RLock() + defer t.mtx.RUnlock() + + return t.health +} + +// intervalAndTimeout returns the interval and timeout derived from +// the targets labels. +func (t *Target) intervalAndTimeout(defaultInterval, defaultDuration time.Duration) (time.Duration, time.Duration, error) { + t.mtx.RLock() + defer t.mtx.RUnlock() + + intervalLabel := t.labels.Get(model.ScrapeIntervalLabel) + interval, err := model.ParseDuration(intervalLabel) + if err != nil { + return defaultInterval, defaultDuration, fmt.Errorf("Error parsing interval label %q: %w", intervalLabel, err) + } + timeoutLabel := t.labels.Get(model.ScrapeTimeoutLabel) + timeout, err := model.ParseDuration(timeoutLabel) + if err != nil { + return defaultInterval, defaultDuration, fmt.Errorf("Error parsing timeout label %q: %w", timeoutLabel, err) + } + + return time.Duration(interval), time.Duration(timeout), nil +} + +// GetValue gets a label value from the entire label set. +func (t *Target) GetValue(name string) string { + return t.labels.Get(name) +} + +// Targets is a sortable list of targets. +type Targets []*Target + +func (ts Targets) Len() int { return len(ts) } +func (ts Targets) Less(i, j int) bool { return ts[i].URL().String() < ts[j].URL().String() } +func (ts Targets) Swap(i, j int) { ts[i], ts[j] = ts[j], ts[i] } + +var ( + errSampleLimit = errors.New("sample limit exceeded") + errBucketLimit = errors.New("histogram bucket limit exceeded") +) + +// limitAppender limits the number of total appended samples in a batch. +type limitAppender struct { + storage.Appender + + limit int + i int +} + +func (app *limitAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64, v float64) (storage.SeriesRef, error) { + if !value.IsStaleNaN(v) { + app.i++ + if app.i > app.limit { + return 0, errSampleLimit + } + } + ref, err := app.Appender.Append(ref, lset, t, v) + if err != nil { + return 0, err + } + return ref, nil +} + +type timeLimitAppender struct { + storage.Appender + + maxTime int64 +} + +func (app *timeLimitAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64, v float64) (storage.SeriesRef, error) { + if t > app.maxTime { + return 0, storage.ErrOutOfBounds + } + + ref, err := app.Appender.Append(ref, lset, t, v) + if err != nil { + return 0, err + } + return ref, nil +} + +// bucketLimitAppender limits the number of total appended samples in a batch. +type bucketLimitAppender struct { + storage.Appender + + limit int +} + +func (app *bucketLimitAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + if h != nil { + // Return with an early error if the histogram has too many buckets and the + // schema is not exponential, in which case we can't reduce the resolution. + if len(h.PositiveBuckets)+len(h.NegativeBuckets) > app.limit && !histogram.IsExponentialSchema(h.Schema) { + return 0, errBucketLimit + } + for len(h.PositiveBuckets)+len(h.NegativeBuckets) > app.limit { + if h.Schema <= histogram.ExponentialSchemaMin { + return 0, errBucketLimit + } + h = h.ReduceResolution(h.Schema - 1) + } + } + if fh != nil { + // Return with an early error if the histogram has too many buckets and the + // schema is not exponential, in which case we can't reduce the resolution. + if len(fh.PositiveBuckets)+len(fh.NegativeBuckets) > app.limit && !histogram.IsExponentialSchema(fh.Schema) { + return 0, errBucketLimit + } + for len(fh.PositiveBuckets)+len(fh.NegativeBuckets) > app.limit { + if fh.Schema <= histogram.ExponentialSchemaMin { + return 0, errBucketLimit + } + fh = fh.ReduceResolution(fh.Schema - 1) + } + } + ref, err := app.Appender.AppendHistogram(ref, lset, t, h, fh) + if err != nil { + return 0, err + } + return ref, nil +} + +type maxSchemaAppender struct { + storage.Appender + + maxSchema int32 +} + +func (app *maxSchemaAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + if h != nil { + if histogram.IsExponentialSchema(h.Schema) && h.Schema > app.maxSchema { + h = h.ReduceResolution(app.maxSchema) + } + } + if fh != nil { + if histogram.IsExponentialSchema(fh.Schema) && fh.Schema > app.maxSchema { + fh = fh.ReduceResolution(app.maxSchema) + } + } + ref, err := app.Appender.AppendHistogram(ref, lset, t, h, fh) + if err != nil { + return 0, err + } + return ref, nil +} + +// PopulateLabels builds a label set from the given label set and scrape configuration. +// It returns a label set before relabeling was applied as the second return value. +// Returns the original discovered label set found before relabelling was applied if the target is dropped during relabeling. +func PopulateLabels(lb *labels.Builder, cfg *config.ScrapeConfig, noDefaultPort bool) (res, orig labels.Labels, err error) { + // Copy labels into the labelset for the target if they are not set already. + scrapeLabels := []labels.Label{ + {Name: model.JobLabel, Value: cfg.JobName}, + {Name: model.ScrapeIntervalLabel, Value: cfg.ScrapeInterval.String()}, + {Name: model.ScrapeTimeoutLabel, Value: cfg.ScrapeTimeout.String()}, + {Name: model.MetricsPathLabel, Value: cfg.MetricsPath}, + {Name: model.SchemeLabel, Value: cfg.Scheme}, + } + + for _, l := range scrapeLabels { + if lb.Get(l.Name) == "" { + lb.Set(l.Name, l.Value) + } + } + // Encode scrape query parameters as labels. + for k, v := range cfg.Params { + if len(v) > 0 { + lb.Set(model.ParamLabelPrefix+k, v[0]) + } + } + + preRelabelLabels := lb.Labels() + keep := relabel.ProcessBuilder(lb, cfg.RelabelConfigs...) + + // Check if the target was dropped. + if !keep { + return labels.EmptyLabels(), preRelabelLabels, nil + } + if v := lb.Get(model.AddressLabel); v == "" { + return labels.EmptyLabels(), labels.EmptyLabels(), errors.New("no address") + } + + // addPort checks whether we should add a default port to the address. + // If the address is not valid, we don't append a port either. + addPort := func(s string) (string, string, bool) { + // If we can split, a port exists and we don't have to add one. + if host, port, err := net.SplitHostPort(s); err == nil { + return host, port, false + } + // If adding a port makes it valid, the previous error + // was not due to an invalid address and we can append a port. + _, _, err := net.SplitHostPort(s + ":1234") + return "", "", err == nil + } + + addr := lb.Get(model.AddressLabel) + scheme := lb.Get(model.SchemeLabel) + host, port, add := addPort(addr) + // If it's an address with no trailing port, infer it based on the used scheme + // unless the no-default-scrape-port feature flag is present. + if !noDefaultPort && add { + // Addresses reaching this point are already wrapped in [] if necessary. + switch scheme { + case "http", "": + addr += ":80" + case "https": + addr += ":443" + default: + return labels.EmptyLabels(), labels.EmptyLabels(), fmt.Errorf("invalid scheme: %q", cfg.Scheme) + } + lb.Set(model.AddressLabel, addr) + } + + if noDefaultPort { + // If it's an address with a trailing default port and the + // no-default-scrape-port flag is present, remove the port. + switch port { + case "80": + if scheme == "http" { + lb.Set(model.AddressLabel, host) + } + case "443": + if scheme == "https" { + lb.Set(model.AddressLabel, host) + } + } + } + + if err := config.CheckTargetAddress(model.LabelValue(addr)); err != nil { + return labels.EmptyLabels(), labels.EmptyLabels(), err + } + + interval := lb.Get(model.ScrapeIntervalLabel) + intervalDuration, err := model.ParseDuration(interval) + if err != nil { + return labels.EmptyLabels(), labels.EmptyLabels(), fmt.Errorf("error parsing scrape interval: %w", err) + } + if time.Duration(intervalDuration) == 0 { + return labels.EmptyLabels(), labels.EmptyLabels(), errors.New("scrape interval cannot be 0") + } + + timeout := lb.Get(model.ScrapeTimeoutLabel) + timeoutDuration, err := model.ParseDuration(timeout) + if err != nil { + return labels.EmptyLabels(), labels.EmptyLabels(), fmt.Errorf("error parsing scrape timeout: %w", err) + } + if time.Duration(timeoutDuration) == 0 { + return labels.EmptyLabels(), labels.EmptyLabels(), errors.New("scrape timeout cannot be 0") + } + + if timeoutDuration > intervalDuration { + return labels.EmptyLabels(), labels.EmptyLabels(), fmt.Errorf("scrape timeout cannot be greater than scrape interval (%q > %q)", timeout, interval) + } + + // Meta labels are deleted after relabelling. Other internal labels propagate to + // the target which decides whether they will be part of their label set. + lb.Range(func(l labels.Label) { + if strings.HasPrefix(l.Name, model.MetaLabelPrefix) { + lb.Del(l.Name) + } + }) + + // Default the instance label to the target address. + if v := lb.Get(model.InstanceLabel); v == "" { + lb.Set(model.InstanceLabel, addr) + } + + res = lb.Labels() + err = res.Validate(func(l labels.Label) error { + // Check label values are valid, drop the target if not. + if !model.LabelValue(l.Value).IsValid() { + return fmt.Errorf("invalid label value for %q: %q", l.Name, l.Value) + } + return nil + }) + if err != nil { + return labels.EmptyLabels(), labels.EmptyLabels(), err + } + return res, preRelabelLabels, nil +} + +// TargetsFromGroup builds targets based on the given TargetGroup and config. +func TargetsFromGroup(tg *targetgroup.Group, cfg *config.ScrapeConfig, noDefaultPort bool, targets []*Target, lb *labels.Builder) ([]*Target, []error) { + targets = targets[:0] + failures := []error{} + + for i, tlset := range tg.Targets { + lb.Reset(labels.EmptyLabels()) + + for ln, lv := range tlset { + lb.Set(string(ln), string(lv)) + } + for ln, lv := range tg.Labels { + if _, ok := tlset[ln]; !ok { + lb.Set(string(ln), string(lv)) + } + } + + lset, origLabels, err := PopulateLabels(lb, cfg, noDefaultPort) + if err != nil { + failures = append(failures, fmt.Errorf("instance %d in group %s: %w", i, tg, err)) + } + if !lset.IsEmpty() || !origLabels.IsEmpty() { + targets = append(targets, NewTarget(lset, origLabels, cfg.Params)) + } + } + return targets, failures +} \ No newline at end of file diff --git a/pkg/promotel/prometheusreceiver/scrape/target_test.go b/pkg/promotel/prometheusreceiver/scrape/target_test.go new file mode 100644 index 000000000..75b62360f --- /dev/null +++ b/pkg/promotel/prometheusreceiver/scrape/target_test.go @@ -0,0 +1,632 @@ +package scrape + +import ( + "crypto/tls" + "crypto/x509" + "fmt" + "net/http" + "net/http/httptest" + "net/url" + "os" + "strconv" + "strings" + "testing" + "time" + + config_util "github.com/prometheus/common/config" + "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" + + "github.com/prometheus/prometheus/config" + "github.com/prometheus/prometheus/discovery/targetgroup" + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" +) + +const ( + caCertPath = "testdata/ca.cer" +) + +func TestTargetLabels(t *testing.T) { + target := newTestTarget("example.com:80", 0, labels.FromStrings("job", "some_job", "foo", "bar")) + want := labels.FromStrings(model.JobLabel, "some_job", "foo", "bar") + b := labels.NewScratchBuilder(0) + got := target.Labels(&b) + require.Equal(t, want, got) + i := 0 + target.LabelsRange(func(l labels.Label) { + switch i { + case 0: + require.Equal(t, labels.Label{Name: "foo", Value: "bar"}, l) + case 1: + require.Equal(t, labels.Label{Name: model.JobLabel, Value: "some_job"}, l) + } + i++ + }) + require.Equal(t, 2, i) +} + +func TestTargetOffset(t *testing.T) { + interval := 10 * time.Second + offsetSeed := uint64(0) + + offsets := make([]time.Duration, 10000) + + // Calculate offsets for 10000 different targets. + for i := range offsets { + target := newTestTarget("example.com:80", 0, labels.FromStrings( + "label", strconv.Itoa(i), + )) + offsets[i] = target.offset(interval, offsetSeed) + } + + // Put the offsets into buckets and validate that they are all + // within bounds. + bucketSize := 1 * time.Second + buckets := make([]int, interval/bucketSize) + + for _, offset := range offsets { + require.InDelta(t, time.Duration(0), offset, float64(interval), "Offset %v out of bounds.", offset) + + bucket := offset / bucketSize + buckets[bucket]++ + } + + t.Log(buckets) + + // Calculate whether the number of targets per bucket + // does not differ more than a given tolerance. + avg := len(offsets) / len(buckets) + tolerance := 0.15 + + for _, bucket := range buckets { + diff := bucket - avg + if diff < 0 { + diff = -diff + } + + require.LessOrEqual(t, float64(diff)/float64(avg), tolerance, "Bucket out of tolerance bounds.") + } +} + +func TestTargetURL(t *testing.T) { + params := url.Values{ + "abc": []string{"foo", "bar", "baz"}, + "xyz": []string{"hoo"}, + } + labels := labels.FromMap(map[string]string{ + model.AddressLabel: "example.com:1234", + model.SchemeLabel: "https", + model.MetricsPathLabel: "/metricz", + "__param_abc": "overwrite", + "__param_cde": "huu", + }) + target := NewTarget(labels, labels, params) + + // The reserved labels are concatenated into a full URL. The first value for each + // URL query parameter can be set/modified via labels as well. + expectedParams := url.Values{ + "abc": []string{"overwrite", "bar", "baz"}, + "cde": []string{"huu"}, + "xyz": []string{"hoo"}, + } + expectedURL := &url.URL{ + Scheme: "https", + Host: "example.com:1234", + Path: "/metricz", + RawQuery: expectedParams.Encode(), + } + + require.Equal(t, expectedURL, target.URL()) +} + +func newTestTarget(targetURL string, _ time.Duration, lbls labels.Labels) *Target { + lb := labels.NewBuilder(lbls) + lb.Set(model.SchemeLabel, "http") + lb.Set(model.AddressLabel, strings.TrimPrefix(targetURL, "http://")) + lb.Set(model.MetricsPathLabel, "/metrics") + + return &Target{labels: lb.Labels()} +} + +func TestNewHTTPBearerToken(t *testing.T) { + server := httptest.NewServer( + http.HandlerFunc( + func(w http.ResponseWriter, r *http.Request) { + expected := "Bearer 1234" + received := r.Header.Get("Authorization") + require.Equal(t, expected, received, "Authorization header was not set correctly.") + }, + ), + ) + defer server.Close() + + cfg := config_util.HTTPClientConfig{ + BearerToken: "1234", + } + c, err := config_util.NewClientFromConfig(cfg, "test") + require.NoError(t, err) + _, err = c.Get(server.URL) + require.NoError(t, err) +} + +func TestNewHTTPBearerTokenFile(t *testing.T) { + server := httptest.NewServer( + http.HandlerFunc( + func(w http.ResponseWriter, r *http.Request) { + expected := "Bearer 12345" + received := r.Header.Get("Authorization") + require.Equal(t, expected, received, "Authorization header was not set correctly.") + }, + ), + ) + defer server.Close() + + cfg := config_util.HTTPClientConfig{ + BearerTokenFile: "testdata/bearertoken.txt", + } + c, err := config_util.NewClientFromConfig(cfg, "test") + require.NoError(t, err) + _, err = c.Get(server.URL) + require.NoError(t, err) +} + +func TestNewHTTPBasicAuth(t *testing.T) { + server := httptest.NewServer( + http.HandlerFunc( + func(w http.ResponseWriter, r *http.Request) { + username, password, ok := r.BasicAuth() + require.True(t, ok, "Basic authorization header was not set correctly.") + require.Equal(t, "user", username) + require.Equal(t, "password123", password) + }, + ), + ) + defer server.Close() + + cfg := config_util.HTTPClientConfig{ + BasicAuth: &config_util.BasicAuth{ + Username: "user", + Password: "password123", + }, + } + c, err := config_util.NewClientFromConfig(cfg, "test") + require.NoError(t, err) + _, err = c.Get(server.URL) + require.NoError(t, err) +} + +func TestNewHTTPCACert(t *testing.T) { + server := httptest.NewUnstartedServer( + http.HandlerFunc( + func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", `text/plain; version=0.0.4`) + w.Write([]byte{}) + }, + ), + ) + server.TLS = newTLSConfig("server", t) + server.StartTLS() + defer server.Close() + + cfg := config_util.HTTPClientConfig{ + TLSConfig: config_util.TLSConfig{ + CAFile: caCertPath, + }, + } + c, err := config_util.NewClientFromConfig(cfg, "test") + require.NoError(t, err) + _, err = c.Get(server.URL) + require.NoError(t, err) +} + +func TestNewHTTPClientCert(t *testing.T) { + server := httptest.NewUnstartedServer( + http.HandlerFunc( + func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", `text/plain; version=0.0.4`) + w.Write([]byte{}) + }, + ), + ) + tlsConfig := newTLSConfig("server", t) + tlsConfig.ClientAuth = tls.RequireAndVerifyClientCert + tlsConfig.ClientCAs = tlsConfig.RootCAs + server.TLS = tlsConfig + server.StartTLS() + defer server.Close() + + cfg := config_util.HTTPClientConfig{ + TLSConfig: config_util.TLSConfig{ + CAFile: caCertPath, + CertFile: "testdata/client.cer", + KeyFile: "testdata/client.key", + }, + } + c, err := config_util.NewClientFromConfig(cfg, "test") + require.NoError(t, err) + _, err = c.Get(server.URL) + require.NoError(t, err) +} + +func TestNewHTTPWithServerName(t *testing.T) { + server := httptest.NewUnstartedServer( + http.HandlerFunc( + func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", `text/plain; version=0.0.4`) + w.Write([]byte{}) + }, + ), + ) + server.TLS = newTLSConfig("servername", t) + server.StartTLS() + defer server.Close() + + cfg := config_util.HTTPClientConfig{ + TLSConfig: config_util.TLSConfig{ + CAFile: caCertPath, + ServerName: "prometheus.rocks", + }, + } + c, err := config_util.NewClientFromConfig(cfg, "test") + require.NoError(t, err) + _, err = c.Get(server.URL) + require.NoError(t, err) +} + +func TestNewHTTPWithBadServerName(t *testing.T) { + server := httptest.NewUnstartedServer( + http.HandlerFunc( + func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", `text/plain; version=0.0.4`) + w.Write([]byte{}) + }, + ), + ) + server.TLS = newTLSConfig("servername", t) + server.StartTLS() + defer server.Close() + + cfg := config_util.HTTPClientConfig{ + TLSConfig: config_util.TLSConfig{ + CAFile: caCertPath, + ServerName: "badname", + }, + } + c, err := config_util.NewClientFromConfig(cfg, "test") + require.NoError(t, err) + _, err = c.Get(server.URL) + require.Error(t, err) +} + +func newTLSConfig(certName string, t *testing.T) *tls.Config { + tlsConfig := &tls.Config{} + caCertPool := x509.NewCertPool() + caCert, err := os.ReadFile(caCertPath) + require.NoError(t, err, "Couldn't read CA cert.") + caCertPool.AppendCertsFromPEM(caCert) + tlsConfig.RootCAs = caCertPool + tlsConfig.ServerName = "127.0.0.1" + certPath := fmt.Sprintf("testdata/%s.cer", certName) + keyPath := fmt.Sprintf("testdata/%s.key", certName) + cert, err := tls.LoadX509KeyPair(certPath, keyPath) + require.NoError(t, err, "Unable to use specified server cert (%s) & key (%v).", certPath, keyPath) + tlsConfig.Certificates = []tls.Certificate{cert} + return tlsConfig +} + +func TestNewClientWithBadTLSConfig(t *testing.T) { + cfg := config_util.HTTPClientConfig{ + TLSConfig: config_util.TLSConfig{ + CAFile: "testdata/nonexistent_ca.cer", + CertFile: "testdata/nonexistent_client.cer", + KeyFile: "testdata/nonexistent_client.key", + }, + } + _, err := config_util.NewClientFromConfig(cfg, "test") + require.Error(t, err) +} + +func TestTargetsFromGroup(t *testing.T) { + expectedError := "instance 0 in group : no address" + + cfg := config.ScrapeConfig{ + ScrapeTimeout: model.Duration(10 * time.Second), + ScrapeInterval: model.Duration(1 * time.Minute), + } + lb := labels.NewBuilder(labels.EmptyLabels()) + targets, failures := TargetsFromGroup(&targetgroup.Group{Targets: []model.LabelSet{{}, {model.AddressLabel: "localhost:9090"}}}, &cfg, false, nil, lb) + require.Len(t, targets, 1) + require.Len(t, failures, 1) + require.EqualError(t, failures[0], expectedError) +} + +func BenchmarkTargetsFromGroup(b *testing.B) { + // Simulate Kubernetes service-discovery and use subset of rules from typical Prometheus config. + cfgText := ` +scrape_configs: + - job_name: job1 + scrape_interval: 15s + scrape_timeout: 10s + relabel_configs: + - source_labels: [__meta_kubernetes_pod_container_port_name] + separator: ; + regex: .*-metrics + replacement: $1 + action: keep + - source_labels: [__meta_kubernetes_pod_phase] + separator: ; + regex: Succeeded|Failed + replacement: $1 + action: drop + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_label_name] + separator: / + regex: (.*) + target_label: job + replacement: $1 + action: replace + - source_labels: [__meta_kubernetes_namespace] + separator: ; + regex: (.*) + target_label: namespace + replacement: $1 + action: replace + - source_labels: [__meta_kubernetes_pod_name] + separator: ; + regex: (.*) + target_label: pod + replacement: $1 + action: replace + - source_labels: [__meta_kubernetes_pod_container_name] + separator: ; + regex: (.*) + target_label: container + replacement: $1 + action: replace + - source_labels: [__meta_kubernetes_pod_name, __meta_kubernetes_pod_container_name, + __meta_kubernetes_pod_container_port_name] + separator: ':' + regex: (.*) + target_label: instance + replacement: $1 + action: replace + - separator: ; + regex: (.*) + target_label: cluster + replacement: dev-us-central-0 + action: replace +` + config := loadConfiguration(b, cfgText) + for _, nTargets := range []int{1, 10, 100} { + b.Run(fmt.Sprintf("%d_targets", nTargets), func(b *testing.B) { + targets := []model.LabelSet{} + for i := 0; i < nTargets; i++ { + labels := model.LabelSet{ + model.AddressLabel: model.LabelValue(fmt.Sprintf("localhost:%d", i)), + "__meta_kubernetes_namespace": "some_namespace", + "__meta_kubernetes_pod_container_name": "some_container", + "__meta_kubernetes_pod_container_port_name": "http-metrics", + "__meta_kubernetes_pod_container_port_number": "80", + "__meta_kubernetes_pod_label_name": "some_name", + "__meta_kubernetes_pod_name": "some_pod", + "__meta_kubernetes_pod_phase": "Running", + } + // Add some more labels, because Kubernetes SD generates a lot + for i := 0; i < 10; i++ { + labels[model.LabelName(fmt.Sprintf("__meta_kubernetes_pod_label_extra%d", i))] = "a_label_abcdefgh" + labels[model.LabelName(fmt.Sprintf("__meta_kubernetes_pod_labelpresent_extra%d", i))] = "true" + } + targets = append(targets, labels) + } + var tgets []*Target + lb := labels.NewBuilder(labels.EmptyLabels()) + group := &targetgroup.Group{Targets: targets} + for i := 0; i < b.N; i++ { + tgets, _ = TargetsFromGroup(group, config.ScrapeConfigs[0], false, tgets, lb) + if len(targets) != nTargets { + b.Fatalf("Expected %d targets, got %d", nTargets, len(targets)) + } + } + }) + } +} + +func TestBucketLimitAppender(t *testing.T) { + example := histogram.Histogram{ + Schema: 0, + Count: 21, + Sum: 33, + ZeroThreshold: 0.001, + ZeroCount: 3, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 3}, + }, + PositiveBuckets: []int64{3, 0, 0}, + NegativeSpans: []histogram.Span{ + {Offset: 0, Length: 3}, + }, + NegativeBuckets: []int64{3, 0, 0}, + } + + bigGap := histogram.Histogram{ + Schema: 0, + Count: 21, + Sum: 33, + ZeroThreshold: 0.001, + ZeroCount: 3, + PositiveSpans: []histogram.Span{ + {Offset: 1, Length: 1}, // in (1, 2] + {Offset: 2, Length: 1}, // in (8, 16] + }, + PositiveBuckets: []int64{1, 0}, // 1, 1 + } + + customBuckets := histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 9, + Sum: 33, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 3}, + }, + PositiveBuckets: []int64{3, 0, 0}, + CustomValues: []float64{1, 2, 3}, + } + + cases := []struct { + h histogram.Histogram + limit int + expectError bool + expectBucketCount int + expectSchema int32 + }{ + { + h: example, + limit: 3, + expectError: true, + }, + { + h: example, + limit: 4, + expectError: false, + expectBucketCount: 4, + expectSchema: -1, + }, + { + h: example, + limit: 10, + expectError: false, + expectBucketCount: 6, + expectSchema: 0, + }, + { + h: bigGap, + limit: 1, + expectError: false, + expectBucketCount: 1, + expectSchema: -2, + }, + { + h: customBuckets, + limit: 2, + expectError: true, + }, + { + h: customBuckets, + limit: 3, + expectError: false, + expectBucketCount: 3, + expectSchema: histogram.CustomBucketsSchema, + }, + } + + resApp := &collectResultAppender{} + + for _, c := range cases { + for _, floatHisto := range []bool{true, false} { + t.Run(fmt.Sprintf("floatHistogram=%t", floatHisto), func(t *testing.T) { + app := &bucketLimitAppender{Appender: resApp, limit: c.limit} + ts := int64(10 * time.Minute / time.Millisecond) + lbls := labels.FromStrings("__name__", "sparse_histogram_series") + var err error + if floatHisto { + fh := c.h.Copy().ToFloat(nil) + _, err = app.AppendHistogram(0, lbls, ts, nil, fh) + if c.expectError { + require.Error(t, err) + } else { + require.Equal(t, c.expectSchema, fh.Schema) + require.Equal(t, c.expectBucketCount, len(fh.NegativeBuckets)+len(fh.PositiveBuckets)) + require.NoError(t, err) + } + } else { + h := c.h.Copy() + _, err = app.AppendHistogram(0, lbls, ts, h, nil) + if c.expectError { + require.Error(t, err) + } else { + require.Equal(t, c.expectSchema, h.Schema) + require.Equal(t, c.expectBucketCount, len(h.NegativeBuckets)+len(h.PositiveBuckets)) + require.NoError(t, err) + } + } + require.NoError(t, app.Commit()) + }) + } + } +} + +func TestMaxSchemaAppender(t *testing.T) { + example := histogram.Histogram{ + Schema: 0, + Count: 21, + Sum: 33, + ZeroThreshold: 0.001, + ZeroCount: 3, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 3}, + }, + PositiveBuckets: []int64{3, 0, 0}, + NegativeSpans: []histogram.Span{ + {Offset: 0, Length: 3}, + }, + NegativeBuckets: []int64{3, 0, 0}, + } + + customBuckets := histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 9, + Sum: 33, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 3}, + }, + PositiveBuckets: []int64{3, 0, 0}, + CustomValues: []float64{1, 2, 3}, + } + + cases := []struct { + h histogram.Histogram + maxSchema int32 + expectSchema int32 + }{ + { + h: example, + maxSchema: -1, + expectSchema: -1, + }, + { + h: example, + maxSchema: 0, + expectSchema: 0, + }, + { + h: customBuckets, + maxSchema: -1, + expectSchema: histogram.CustomBucketsSchema, + }, + } + + resApp := &collectResultAppender{} + + for _, c := range cases { + for _, floatHisto := range []bool{true, false} { + t.Run(fmt.Sprintf("floatHistogram=%t", floatHisto), func(t *testing.T) { + app := &maxSchemaAppender{Appender: resApp, maxSchema: c.maxSchema} + ts := int64(10 * time.Minute / time.Millisecond) + lbls := labels.FromStrings("__name__", "sparse_histogram_series") + var err error + if floatHisto { + fh := c.h.Copy().ToFloat(nil) + _, err = app.AppendHistogram(0, lbls, ts, nil, fh) + require.Equal(t, c.expectSchema, fh.Schema) + require.NoError(t, err) + } else { + h := c.h.Copy() + _, err = app.AppendHistogram(0, lbls, ts, h, nil) + require.Equal(t, c.expectSchema, h.Schema) + require.NoError(t, err) + } + require.NoError(t, app.Commit()) + }) + } + } +} diff --git a/pkg/promotel/prometheusreceiver/scrape/testdata/bearertoken.txt b/pkg/promotel/prometheusreceiver/scrape/testdata/bearertoken.txt new file mode 100644 index 000000000..e56e15bb7 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/scrape/testdata/bearertoken.txt @@ -0,0 +1 @@ +12345 diff --git a/pkg/promotel/prometheusreceiver/scrape/testdata/ca.cer b/pkg/promotel/prometheusreceiver/scrape/testdata/ca.cer new file mode 100644 index 000000000..86f627a90 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/scrape/testdata/ca.cer @@ -0,0 +1,22 @@ +-----BEGIN CERTIFICATE----- +MIIDkTCCAnmgAwIBAgIJAJNsnimNN3tmMA0GCSqGSIb3DQEBCwUAMF8xCzAJBgNV +BAYTAlhYMRUwEwYDVQQHDAxEZWZhdWx0IENpdHkxHDAaBgNVBAoME0RlZmF1bHQg +Q29tcGFueSBMdGQxGzAZBgNVBAMMElByb21ldGhldXMgVGVzdCBDQTAeFw0xNTA4 +MDQxNDA5MjFaFw0yNTA4MDExNDA5MjFaMF8xCzAJBgNVBAYTAlhYMRUwEwYDVQQH +DAxEZWZhdWx0IENpdHkxHDAaBgNVBAoME0RlZmF1bHQgQ29tcGFueSBMdGQxGzAZ +BgNVBAMMElByb21ldGhldXMgVGVzdCBDQTCCASIwDQYJKoZIhvcNAQEBBQADggEP +ADCCAQoCggEBAOlSBU3yWpUELbhzizznR0hnAL7dbEHzfEtEc6N3PoSvMNcqrUVq +t4kjBRWzqkZ5uJVkzBPERKEBoOI9pWcrqtMTBkMzHJY2Ep7GHTab10e9KC2IFQT6 +FKP/jCYixaIVx3azEfajRJooD8r79FGoagWUfHdHyCFWJb/iLt8z8+S91kelSRMS +yB9M1ypWomzBz1UFXZp1oiNO5o7/dgXW4MgLUfC2obJ9j5xqpc6GkhWMW4ZFwEr/ +VLjuzxG9B8tLfQuhnXKGn1W8+WzZVWCWMD/sLfZfmjKaWlwcXzL51g8E+IEIBJqV +w51aMI6lDkcvAM7gLq1auLZMVXyKWSKw7XMCAwEAAaNQME4wHQYDVR0OBBYEFMz1 +BZnlqxJp2HiJSjHK8IsLrWYbMB8GA1UdIwQYMBaAFMz1BZnlqxJp2HiJSjHK8IsL +rWYbMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBAI2iA3w3TK5J15Pu +e4fPFB4jxQqsbUwuyXbCCv/jKLeFNCD4BjM181WZEYjPMumeTBVzU3aF45LWQIG1 +0DJcrCL4mjMz9qgAoGqA7aDDXiJGbukMgYYsn7vrnVmrZH8T3E8ySlltr7+W578k +pJ5FxnbCroQwn0zLyVB3sFbS8E3vpBr3L8oy8PwPHhIScexcNVc3V6/m4vTZsXTH +U+vUm1XhDgpDcFMTg2QQiJbfpOYUkwIgnRDAT7t282t2KQWtnlqc3zwPQ1F/6Cpx +j19JeNsaF1DArkD7YlyKj/GhZLtHwFHG5cxznH0mLDJTW7bQvqqh2iQTeXmBk1lU +mM5lH/s= +-----END CERTIFICATE----- diff --git a/pkg/promotel/prometheusreceiver/scrape/testdata/ca.key b/pkg/promotel/prometheusreceiver/scrape/testdata/ca.key new file mode 100644 index 000000000..1db260037 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/scrape/testdata/ca.key @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEpgIBAAKCAQEA6VIFTfJalQQtuHOLPOdHSGcAvt1sQfN8S0Rzo3c+hK8w1yqt +RWq3iSMFFbOqRnm4lWTME8REoQGg4j2lZyuq0xMGQzMcljYSnsYdNpvXR70oLYgV +BPoUo/+MJiLFohXHdrMR9qNEmigPyvv0UahqBZR8d0fIIVYlv+Iu3zPz5L3WR6VJ +ExLIH0zXKlaibMHPVQVdmnWiI07mjv92BdbgyAtR8Lahsn2PnGqlzoaSFYxbhkXA +Sv9UuO7PEb0Hy0t9C6GdcoafVbz5bNlVYJYwP+wt9l+aMppaXBxfMvnWDwT4gQgE +mpXDnVowjqUORy8AzuAurVq4tkxVfIpZIrDtcwIDAQABAoIBAQCcVDd3pYWpyLX1 +m31UnkX1rgYi3Gs3uTOznra4dSIvds6LrG2SUFGPEibLBql1NQNHHdVa/StakaPB +UrqraOe5K0sL5Ygm4S4Ssf1K5JoW2Be+gipLPmBsDcJSnwO6eUs/LfZAQd6qR2Nl +hvGJcQUwne/TYAYox/bdHWh4Zu/odz4NrZKZLbnXkdLLDEhZbjA0HpwJZ7NpMcB7 +Z6NayOm5dAZncfqBjY+3GNL0VjvDjwwYbESM8GkAbojMgcpODGk0h9arRWCP2RqT +SVgmiFI2mVT7sW1XLdVXmyCL2jzak7sktpbLbVgngwOrBmLO/m4NBftzcZrgvxj3 +YakCPH/hAoGBAP1v85pIxqWr5dFdRlOW0MG35ifL+YXpavcs233jGDHYNZefrR5q +Mw8eA20zwj41OdryqGh58nLYm3zYM0vPFrRJrzWYQfcWDmQELAylr9z9vsMj8gRq +IZQD6wzFmLi1PN2QDmovF+2y/CLAq03XK6FQlNsVQxubfjh4hcX5+nXDAoGBAOut +/pQaIBbIhaI8y3KjpizU24jxIkV8R/q1yE5V01YCl2OC5hEd4iZP14YLDRXLSHKT +e/dyJ/OEyTIzUeDg0ZF3ao9ugbWuASgrnrrdPEooi7C9n9PeaLFTK5oVZoVP2A7E +BwhSFW3VdEzQkdJczVE2jOY6JdBKMndjoDQnhT6RAoGBAL4WMO1gdnYeZ0JQJoZd +kPgrOZpR2DaDa3I3F+3k3enM0+2EmzE70E4fYcyPTLqh62H4LS4ngRx4sK7D7j2G +9u2EcsDNEXUE+wgzROK7hxtGysTMeiKrg8Hj6nFq53Bqp1s7SESGS/lCDPD398Rr +hdL5gJyN5waW6uXqJ9Pk+eFHAoGBAKV/YGcV1XTKSPT9ZgxRmM6ghq0qT1umA1Gt +t0QzBp2+Yhqx/+cDKhynMnxhZEXqoyw6HvJLSny5wSMsYJHeratNxRmFizZOQ2e3 +AdbMppqY0EdDUWnRI4lqExM3de+let4bj6irI3smSm3qhIvJOTCPcu/04zrZ74hh +AE2/dtTRAoGBAO6bENEqLgxZOvX5NnbytTuuoEnbceUPiIvc6S/nWJPEoGXVN2EJ +a3OaIOQmknE6bjXIWrHTaXJhwejvPUz9DVa4GxU5aJhs4gpocVGf+owQFvk4nJO8 +JL+QVVdXp3XdrXIGyvXJfy0fXXgJg5czrnDHjSTE8/2POtyuZ6VyBtQc +-----END RSA PRIVATE KEY----- diff --git a/pkg/promotel/prometheusreceiver/scrape/testdata/client.cer b/pkg/promotel/prometheusreceiver/scrape/testdata/client.cer new file mode 100644 index 000000000..aeeca617f --- /dev/null +++ b/pkg/promotel/prometheusreceiver/scrape/testdata/client.cer @@ -0,0 +1,28 @@ +-----BEGIN CERTIFICATE----- +MIIErjCCA5agAwIBAgIUZSocqIiJtB3sMwam1lxD89SYebUwDQYJKoZIhvcNAQEL +BQAwXzELMAkGA1UEBhMCWFgxFTATBgNVBAcMDERlZmF1bHQgQ2l0eTEcMBoGA1UE +CgwTRGVmYXVsdCBDb21wYW55IEx0ZDEbMBkGA1UEAwwSUHJvbWV0aGV1cyBUZXN0 +IENBMB4XDTIyMDMxNzA3MDAzNloXDTQ5MDMxNzA3MDAzNlowajELMAkGA1UEBhMC +WFgxEzARBgNVBAgMClNvbWUtU3RhdGUxFTATBgNVBAcMDERlZmF1bHQgQ2l0eTEc +MBoGA1UECgwTRGVmYXVsdCBDb21wYW55IEx0ZDERMA8GA1UEAwwIdGVzdHVzZXIw +ggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQDigQV8d+V/fgSxqmP3Atvp +UGGc59d2fGRA61Cqb+Z/dUANAf2GRN4A0p5Nsq8vHcZqdTSbV5vFq6yEEgZ7Exji +sMiMEdYHIIFGDw/Xwssu0+8u2G8wRvHaxHMYiv2CFsoqNH6UQn0/W9vnzhHmPtPr +YBVfvl4211c10QgTJrGdtnDpuLdrexhygWYWKFKD4Cow5/uLf669K9aHeslTqHGA +qOka7kB7Fjx4+kJVPFRrrZA+z6by0vsNpf+iapmQx5WOqOqQ4nQvGpKfIv7d6TQZ +XsZOrLf/GGeUvY+i46gum1J3q1GQeNG+i/Dl18Fkt0ucurdOwNUCJz79g2Shm5/F +Lo+0b0uTgGXQ59T/jOWlZUS6GG3UxySC6y4Pm8cgyKJKjjWy4I+/ABU+mtNd3iQG +SQZHqGjew423/3eRY0Q2nR9m19FLq9wD4eHG7/tBaNdwjz0nJAPRFKONqgZii1sr +POfA/L0Sskv5qvyHkS9ACMCrmhCCbT7YsAk55Oj7xX0LzvNM/xMqvrr0QaGW61h2 +/txkfKzc7vBnNVG0wvo5xmLW0Iccdf+52nudlxkaaZZ8DzvXG/qcdmX/NNofpT90 +jGP10vp8kwKf43Mv4zgMK2SmOK4M/uWZigiOtTWdIqL2iuyaqPakzIko8pGQmphY +ZuKYPIrRAg+RYZ1HrSemsQIDAQABo1cwVTATBgNVHSUEDDAKBggrBgEFBQcDAjAd +BgNVHQ4EFgQUQkOl8D7Cn1gpwoxg1LNKBM+mGnQwHwYDVR0jBBgwFoAUzPUFmeWr +EmnYeIlKMcrwiwutZhswDQYJKoZIhvcNAQELBQADggEBALQ7dkH4sSnnAU+LRdxf +SfzyEiG5YHArP0hT3CGOaVDRqftimLyYxTQXU/jI3LG3ai+SuCQiDyrhHvlDCzZA +gA9FaEI9d06nK8gu27i5PCHNegB8acPBPZdk+FxLU/XGKmbQiYG8Hqssc7C9gG0r +hiJX4KrqVgdtbxXTaP9p3dIc9N1EXJh8CX9E+QuNS7/E3cx+asPu2PiL+zt4G5tM +1kLxibnAF6zoXBUN0ap60BjJ+v9mxQYimqY4XEuSUo4RxVh0z19UAxuWEhbuWAvq +7Zk2AHG0i65w3XNuBPbICp/C9zxzcCd/3AlB6fJCkHYeTTeUUn5jqLNV89XdwjCh +nOI= +-----END CERTIFICATE----- diff --git a/pkg/promotel/prometheusreceiver/scrape/testdata/client.key b/pkg/promotel/prometheusreceiver/scrape/testdata/client.key new file mode 100644 index 000000000..e584b7ead --- /dev/null +++ b/pkg/promotel/prometheusreceiver/scrape/testdata/client.key @@ -0,0 +1,51 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIJKAIBAAKCAgEA4oEFfHflf34Esapj9wLb6VBhnOfXdnxkQOtQqm/mf3VADQH9 +hkTeANKeTbKvLx3GanU0m1ebxaushBIGexMY4rDIjBHWByCBRg8P18LLLtPvLthv +MEbx2sRzGIr9ghbKKjR+lEJ9P1vb584R5j7T62AVX75eNtdXNdEIEyaxnbZw6bi3 +a3sYcoFmFihSg+AqMOf7i3+uvSvWh3rJU6hxgKjpGu5AexY8ePpCVTxUa62QPs+m +8tL7DaX/omqZkMeVjqjqkOJ0LxqSnyL+3ek0GV7GTqy3/xhnlL2PouOoLptSd6tR +kHjRvovw5dfBZLdLnLq3TsDVAic+/YNkoZufxS6PtG9Lk4Bl0OfU/4zlpWVEuhht +1MckgusuD5vHIMiiSo41suCPvwAVPprTXd4kBkkGR6ho3sONt/93kWNENp0fZtfR +S6vcA+Hhxu/7QWjXcI89JyQD0RSjjaoGYotbKzznwPy9ErJL+ar8h5EvQAjAq5oQ +gm0+2LAJOeTo+8V9C87zTP8TKr669EGhlutYdv7cZHys3O7wZzVRtML6OcZi1tCH +HHX/udp7nZcZGmmWfA871xv6nHZl/zTaH6U/dIxj9dL6fJMCn+NzL+M4DCtkpjiu +DP7lmYoIjrU1nSKi9orsmqj2pMyJKPKRkJqYWGbimDyK0QIPkWGdR60nprECAwEA +AQKCAgEA18az1ERf9Fm33Q0GmE039IdnxlMy9qQ/2XyS5xsdCXVIZFvuClhW6Y+7 +0ScVLpx95fLr/8SxF9mYymRlmh+ySFrDYnSnYTi9DmHQ5OmkKGMr64OyQNqFErSt +NMdMA/7z7sr9fv3sVUyMLMMqWB6oQgXRttki5bm1UgZlW+EzuZwQ6wbWbWTiAEt3 +VkppeUo2x0poXxdu/rXhdEUrwC+qmTfQgaBQ+zFOwK0gPhTwE3hP/xZQ4+jL08+8 +vRwyWTNZLYOLmiSxLCJzZXiwNfUwda7M2iw+SJ0WKCOBz1pzYJsFMA2b8Ta4EX89 +Kailiu328UMK19Jp2dhLcLUYS8B2rVVAK5b/O6iKV8UpKTriXDiCKSpcugpsQ1ML +zq/6vR0SQXD+/W0MesGaNa33votBXJSsf9kZnYJw43n+W4Z/XFUE5pyNM/+TGAqw +yuF4FX2sJL1uP5VMOh2HdthTr+/ewx/Trn9/re0p54z83plVlp4qbcORLiQ2uDf6 +ZZ0/gHzNTp4Fzz81ZvHLm9smpe8cLvojrKLvCl0hv5zAf3QtsajpTN9uM7AsshV1 +QVZSuAxb5n9bcij5F2za1/dd7WLlvsSzgNJ4Td/gEDI8qepB0+7PGlJ17sMg0nWP +nFxUfGIsCF1KOoPwLyaNHHrRGjJigFUufqkbmSWkOzgC6pZVUXECggEBAP81To16 +O5BlGDahcQkjKkqUwUtkhjE9/KQBh3zHqxsitI8f0U7eL3Ge1qhbgEgvHwHOjWSV +pcG9atE55b7qlqqGQboiO1jfyLfIVLfamj0fHLinO/pV/wcBNy6Hz4rP7DNJDCMz +0agz/Ys3VXrZIk5sO0sUBYMBxho1x0n65Z06iK1SwD/x4Xg3/Psyx+ujEEkSsv5I +Gg7aOTHLRSIPUx/OK+4M3sp58PeMGfEYNYxNiEoMiUQgu/srKRjs+pUKXCkEraNW +8s/ODYJ7iso6Z1z4NxfBH+hh+UrxTffh7t0Sz5gdUwUnBNb2I4EdeCcCTOnWYkut +/GKW8oHD7f9VDS0CggEBAOM06rrp9rSsl6UhTu8LS5bjBeyUxab4HLZKP5YBitQO +ltcPS05MxQ3UQ1BAMDRjXE2nrKlWMOAybrffEXBi4U1jYt7CiuCwwsPyaYNWT5qO +Iwdjebkeq3+Mh8c48swhOwRLWSGg6mtRoR/c5cthYU62+s2zdxc/yhVTQ0WNFabT +23PYtjjW41WuR6K7Dhrdcw0MwIs1arZHTsDdU6Hln9raTSNwlHMBWVz/tzuwLieQ +WEUXvsQvPtgPyohmDd0ueXiuS2FiYaXKFIMFj5/JyyJc1OCr1vIQN8mMcUjNbk2I +VaeeSPawgKIiYARhbjJtjwjY6D59gOZrNGYASQOTGhUCggEAJPOB8SgekbShgd90 +L1+BExVgu1rNtzmDZ/e0t1Ntqdsni4WO172B3xChgfTlqQ3xjmBqxoKIYnnbinm4 +kyECOaSAxcOJFkAonruJ0Kj9JhZoITBNldx3tXruk3UkjrO2PmK4OCybkaAdeNfF +L6lat0Iif6dheOt71HWu6j5CmrZL7dSKc3fBLpfksDZVDgApLntfoUOtSjM8jsIg +u2K+pV9Dqw7//w8S3bTSWL8pmavsLNSN12hp7177b1l4mrXKTEIaJglD1OS/vgHH +QaqdJq/lwjG7PflZkAlKQbbbz/SWTC8Kwzc4EyvGTj6HFBbYLg9VYiHJ5jh22mUV +A6A77QKCAQAM6DWpdp8QNnnK5LCCPecGZFEy1mTADno7FM6169KCJ24EO5cwlIXh +Ojy0s2DJqRdWRf82A3J1WggWI/Luqn9YERxNwUl4aDI4RW4fCuksw4RT6B/DF23w +qgAQnjiUxhJ/NPSUR3rpq9J2Z+sZ+ac4fIaU5uwOAw6s1XUN32zqdECUPSxk4Dg7 +5tGk+fFcL1ZY2G+buOYeAsEDjc8xdET3fs1BBSU5v0rfUJuNJX4Ju1Z4Xlf09yYf +yg3cX8fL19cItwYLOzaG34r4wnkdP65tfk6NkNV+HNO+fF73Hsx0VRlgk0pb0T0N +eNxxg0NqU/T7MK9I1YJcFJz+ame7b0DdAoIBAFw3Sf9LbVVNh8ef4OqjBZR8RCYq +4HeG0FPYvMLzUtFi7j4uBfiL4+pNpSFvecSuLRKE8Pr5dPRJNPNgJud5gvuykBZX +Q9ktQJTAPZK8Q5neLeXfAdoF3szJuEZbDdGSps4JFokVIX+h3c+uFRD9QMSh+bz1 +nEXCdYvmTs+bsTL+l7cbXq2iIKk1QnEcL+cRYr3VjP5xxZ/hGnuYqe9wmyo2MVkS +NVUmCifIvE34TO072HH49gVPrhj9qIZsfBh4LBpl75eKwXTXx+HFqHhP8OfzuK6U +v/JQn9JUGGzkmoMazQ9o5D5h/o0t/OGOPnQeqWL4BIPXdHv/dua6jLnAoU8= +-----END RSA PRIVATE KEY----- diff --git a/pkg/promotel/prometheusreceiver/scrape/testdata/server.cer b/pkg/promotel/prometheusreceiver/scrape/testdata/server.cer new file mode 100644 index 000000000..c7608c381 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/scrape/testdata/server.cer @@ -0,0 +1,22 @@ +-----BEGIN CERTIFICATE----- +MIIDljCCAn6gAwIBAgIUIcqjmIGMLSIlEGcSinhaw9+fYWowDQYJKoZIhvcNAQEL +BQAwXzELMAkGA1UEBhMCWFgxFTATBgNVBAcMDERlZmF1bHQgQ2l0eTEcMBoGA1UE +CgwTRGVmYXVsdCBDb21wYW55IEx0ZDEbMBkGA1UEAwwSUHJvbWV0aGV1cyBUZXN0 +IENBMB4XDTIyMDMxNzA2MzQ1MloXDTQ5MDMxNzA2MzQ1MlowVjELMAkGA1UEBhMC +WFgxFTATBgNVBAcMDERlZmF1bHQgQ2l0eTEcMBoGA1UECgwTRGVmYXVsdCBDb21w +YW55IEx0ZDESMBAGA1UEAwwJbG9jYWxob3N0MIIBIjANBgkqhkiG9w0BAQEFAAOC +AQ8AMIIBCgKCAQEAxCEfTBqVmUD4bLlkHdjPPFm1ruaFNfc/7IF5DobrKt6LgfuC +/4xMImPmpQFz4FJy1ryWgopRBaOimmPKHi3PxrCRMPmwTJ50Tv8bcNLuYuCbHQ+y +XCquRcUFCGb8LZ9pl9I7r1Nny67V2AKaYZ70rZeo+dHp+716C0ukrxFPSz8LHThs +Wdo9LR5zKP7ersDHMOwfzhnUo+jgseCo1aUeoWxA+of7tb1qOEXqWc7gCsQMncq4 +sOWCsQ3MKwNpuWzRTZexxwEnM/4uz6JDAx8qnHYhJAMeqeKilIgLD6w+5+IC/44a +ecbqsYZZCNSy2p/DgOdTgNm+StwRagZrp+rbqQIDAQABo1MwUTAPBgNVHREECDAG +hwR/AAABMB0GA1UdDgQWBBQCsrwnq5c2CYe44tc2i1RyvWX0FDAfBgNVHSMEGDAW +gBTM9QWZ5asSadh4iUoxyvCLC61mGzANBgkqhkiG9w0BAQsFAAOCAQEA4/7pIiWV +5Xc6XBPZIlc8+cmcDP56EIqq5VIBkB6NMdXP8nBbnga0c8o+owhk3za9A8IS7KQ/ +9+rMRPahPHrQFK44/6NiHqARTT9Im32vsH5Dgnl1+Ec2Ni3j+WRB0z3bV/T46Rsj +yVQhJI8FLiKR7hf9VFao46RBKFhi7kT7y2MeelZCfLB6lJiMmINKn9c4ElFXrIHC +RVgdWEZnpalRhADRAIItWj6ynCtD9z0Ohe1JGU5C0RlqMruj6ghtcf5SBujnRhRW +OUbP59kYS/SC1Kos0sfZAqtyRKIrbc6AEbUM9dVIywm3GIo+oMMfxlxeWOnDgrCE +HM7RCqDVnA7SNw== +-----END CERTIFICATE----- diff --git a/pkg/promotel/prometheusreceiver/scrape/testdata/server.key b/pkg/promotel/prometheusreceiver/scrape/testdata/server.key new file mode 100644 index 000000000..2266b0150 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/scrape/testdata/server.key @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEpAIBAAKCAQEAxCEfTBqVmUD4bLlkHdjPPFm1ruaFNfc/7IF5DobrKt6LgfuC +/4xMImPmpQFz4FJy1ryWgopRBaOimmPKHi3PxrCRMPmwTJ50Tv8bcNLuYuCbHQ+y +XCquRcUFCGb8LZ9pl9I7r1Nny67V2AKaYZ70rZeo+dHp+716C0ukrxFPSz8LHThs +Wdo9LR5zKP7ersDHMOwfzhnUo+jgseCo1aUeoWxA+of7tb1qOEXqWc7gCsQMncq4 +sOWCsQ3MKwNpuWzRTZexxwEnM/4uz6JDAx8qnHYhJAMeqeKilIgLD6w+5+IC/44a +ecbqsYZZCNSy2p/DgOdTgNm+StwRagZrp+rbqQIDAQABAoIBACeOjqNo0TdhtTko +gxrJ+bIwXcZy0/c4cPogeuwFJjU1QWnr8lXcVBazk3dAPcDGoEbTLoARqZm7kTYW +XlOL5dYrEn2QPpCVfNvZ9AzjXhUvO9m2qsCQEyobPJKfQslo14E5c7Q+3DZmgtbY +X47E4pCIgBoyzkBpzM2uaf6tPRLtv8QcLklcf7lP5rd0Zypc325RR6+J5nxfCoFp +fD3sj7t/lJLS8Xb6m4/YFjsVJ2qEAelZ086v8unMBEj324Vv/VqrkPFtFNJKI+Az +Pd9xFDBdsKijBn1Yam9/dj7CiyZYKaVZ9p/w7Oqkpbrt8J8S8OtNHZ4fz9FJgRu9 +uu+VTikCgYEA5ZkDmozDseA/c9JTUGAiPfAt5OrnqlKQNzp2m19GKh+Mlwg4k6O5 +uE+0vaQEfc0cX3o8qntWNsb63XC9h6oHewrdyVFMZNS4nzzmKEvGWt9ON6qfQDUs +1cgZ0Y/uKydDX/3hk/hnJbeRW429rk0/GTuSHHilBzhE0uXJ11xPG48CgYEA2q7a +yqTdqPmZFIAYT9ny099PhnGYE6cJljTUMX9Xhk4POqcigcq9kvNNsly2O1t0Eq0H +2tYo91xTCZc3Cb0N+Vx3meLIljnzhEtwzU9w6W5VGJHWiqovjGwtCdm/W28OlMzY +zM+0gVCJzZLhL0vOwBLwGUJvjgfpvgIb/W+C2UcCgYB5TJ3ayQOath7P0g6yKBfv +ITUd+/zovzXx97Ex5OPs3T4pjO5XEejMt0+F4WF+FR8oUiw65W5nAjkHRMjdI7dQ +Ci2ibpEttDTV7Bass1vYJqHsRvhbs7w8NbtuO9xYcCXoUPkcc+AKzTC+beQIckcj +zZUj9Zk6dz/lLAG3Bc3FgQKBgQC+MmZI6auAU9Y4ZlC+4qi4bfkUzaefMCC+a6RC +iKbvQOUt9j+k81h+fu6MuuYkKh6CP8wdITbwLXRrWwGbjrqgrzO2u/AJ+M07uwGZ +EAb8f+GzROR8JhjE4TEq6B/uvmDIOoI1YFF2Rz4TdjQ0lpJzrAT3czjjJy68+8is +XFhJ8QKBgQCMPpB7taMLQzuilEGabL6Xas9UxryiGoBHk4Umb107GVWgwXxWT6fk +YSlvbMQHCgVeaJe374Bghyw33Z3WilWM1fCWya/CxXlw9wakjQHiqFCIOCxdgosX +Sr35bRFWJMnHXD+jD0Vr8WrtbGzFSZb3ZrjT6WhWRIGCHcaMANN9ew== +-----END RSA PRIVATE KEY----- diff --git a/pkg/promotel/prometheusreceiver/scrape/testdata/servername.cer b/pkg/promotel/prometheusreceiver/scrape/testdata/servername.cer new file mode 100644 index 000000000..d1aea98b1 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/scrape/testdata/servername.cer @@ -0,0 +1,72 @@ +Certificate: + Data: + Version: 3 (0x2) + Serial Number: 1 (0x1) + Signature Algorithm: sha256WithRSAEncryption + Issuer: C = XX, L = Default City, O = Default Company Ltd, CN = Prometheus Test CA + Validity + Not Before: Aug 12 19:58:32 2020 GMT + Not After : Nov 27 19:58:32 2041 GMT + Subject: C = XX, ST = State, L = City, O = Prom Test Inc., CN = Test + Subject Public Key Info: + Public Key Algorithm: rsaEncryption + RSA Public-Key: (2048 bit) + Modulus: + 00:ca:58:23:ba:98:7f:ce:08:b5:03:62:92:d0:8c: + 3f:ef:7c:dd:a2:6e:38:f4:6c:3e:0c:04:c0:a4:bd: + 6c:29:85:13:01:50:64:d2:e0:e9:11:92:30:7e:e6: + 04:03:3e:91:6f:30:60:cd:d6:30:d7:47:26:0c:20: + c3:6f:49:4d:4c:11:78:57:4f:1b:18:05:dc:61:e2: + 64:2a:72:07:45:9a:6b:4c:fb:3b:3d:60:1a:8b:58: + c4:d0:7e:a9:87:e4:e1:b8:78:21:3e:4b:bb:38:07: + b2:4e:f6:32:b7:98:b4:8e:45:36:be:02:d2:3d:af: + 70:2d:c8:82:7e:fc:2d:77:a4:b5:82:cb:61:7c:49: + 7c:62:c2:87:88:45:44:fb:98:78:9c:b8:f3:a7:c3: + 08:80:60:4d:3e:1c:d0:bf:05:bc:c7:a6:e9:d3:08: + 6e:57:33:85:44:74:75:54:17:8a:e6:bc:46:d0:b6: + c2:55:4d:d0:e6:83:d5:48:bb:91:4e:48:d0:ea:f3: + 6d:9b:fa:99:3d:3c:36:98:02:e3:a5:15:2b:37:16: + c0:e3:9d:da:13:7d:df:a0:91:db:de:01:7c:e5:b4: + ad:eb:b1:7b:82:51:70:c3:9b:2a:1a:dc:53:8a:3b: + bc:b7:c8:0d:cb:b5:15:d1:13:a5:92:80:bb:a3:5a: + 4c:b3 + Exponent: 65537 (0x10001) + X509v3 extensions: + X509v3 Subject Alternative Name: + DNS:prometheus.rocks + Signature Algorithm: sha256WithRSAEncryption + 00:12:d7:ac:b7:ef:64:83:22:bb:34:3b:48:1e:c1:2c:e7:57: + fd:cd:0e:9a:fa:91:7c:f2:a7:72:45:ac:9c:8d:1b:ef:eb:5b: + 62:5a:c3:a3:f2:53:75:23:25:6b:6c:ec:6e:c0:de:92:f6:11: + 17:93:6d:b6:0d:e4:13:a3:eb:c5:ee:33:2e:f9:97:73:65:d2: + b2:23:c0:87:63:67:02:78:a4:28:4b:df:f2:c5:47:dc:49:27: + 06:53:12:d7:6d:53:95:eb:01:e3:58:6e:0c:44:43:30:bc:b0: + d7:0b:14:c7:81:8e:0d:a3:44:ee:2c:89:32:ab:8d:21:54:ed: + 8d:9a:fb:2f:f5:53:1d:ec:43:d4:ef:76:c7:5b:dd:3c:54:df: + 29:f7:64:34:e9:7e:98:49:1b:26:3c:52:77:43:70:f8:2c:8c: + 2f:af:24:cc:81:58:be:8d:b9:6e:2e:e8:75:9a:27:6e:24:05: + eb:dc:df:c6:23:93:5b:f1:03:c6:0a:4d:ad:d7:0e:cd:bc:e9: + 6e:38:c7:7f:ba:af:1b:91:1c:37:77:b6:0c:4d:81:da:04:3a: + eb:4a:c4:8a:41:29:9f:45:a0:0f:1b:d1:32:28:48:77:5d:f4: + 3a:71:7d:87:b0:98:c4:c3:94:e1:cf:65:cf:12:e2:90:02:48: + a8:b7:a1:5d +-----BEGIN CERTIFICATE----- +MIIDTTCCAjWgAwIBAgIBATANBgkqhkiG9w0BAQsFADBfMQswCQYDVQQGEwJYWDEV +MBMGA1UEBwwMRGVmYXVsdCBDaXR5MRwwGgYDVQQKDBNEZWZhdWx0IENvbXBhbnkg +THRkMRswGQYDVQQDDBJQcm9tZXRoZXVzIFRlc3QgQ0EwHhcNMjAwODEyMTk1ODMy +WhcNNDExMTI3MTk1ODMyWjBUMQswCQYDVQQGEwJYWDEOMAwGA1UECAwFU3RhdGUx +DTALBgNVBAcMBENpdHkxFzAVBgNVBAoMDlByb20gVGVzdCBJbmMuMQ0wCwYDVQQD +DARUZXN0MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAylgjuph/zgi1 +A2KS0Iw/73zdom449Gw+DATApL1sKYUTAVBk0uDpEZIwfuYEAz6RbzBgzdYw10cm +DCDDb0lNTBF4V08bGAXcYeJkKnIHRZprTPs7PWAai1jE0H6ph+ThuHghPku7OAey +TvYyt5i0jkU2vgLSPa9wLciCfvwtd6S1gsthfEl8YsKHiEVE+5h4nLjzp8MIgGBN +PhzQvwW8x6bp0whuVzOFRHR1VBeK5rxG0LbCVU3Q5oPVSLuRTkjQ6vNtm/qZPTw2 +mALjpRUrNxbA453aE33foJHb3gF85bSt67F7glFww5sqGtxTiju8t8gNy7UV0ROl +koC7o1pMswIDAQABox8wHTAbBgNVHREEFDASghBwcm9tZXRoZXVzLnJvY2tzMA0G +CSqGSIb3DQEBCwUAA4IBAQAAEtest+9kgyK7NDtIHsEs51f9zQ6a+pF88qdyRayc +jRvv61tiWsOj8lN1IyVrbOxuwN6S9hEXk222DeQTo+vF7jMu+ZdzZdKyI8CHY2cC +eKQoS9/yxUfcSScGUxLXbVOV6wHjWG4MREMwvLDXCxTHgY4No0TuLIkyq40hVO2N +mvsv9VMd7EPU73bHW908VN8p92Q06X6YSRsmPFJ3Q3D4LIwvryTMgVi+jbluLuh1 +miduJAXr3N/GI5Nb8QPGCk2t1w7NvOluOMd/uq8bkRw3d7YMTYHaBDrrSsSKQSmf +RaAPG9EyKEh3XfQ6cX2HsJjEw5Thz2XPEuKQAkiot6Fd +-----END CERTIFICATE----- diff --git a/pkg/promotel/prometheusreceiver/scrape/testdata/servername.key b/pkg/promotel/prometheusreceiver/scrape/testdata/servername.key new file mode 100644 index 000000000..95d6aca52 --- /dev/null +++ b/pkg/promotel/prometheusreceiver/scrape/testdata/servername.key @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEogIBAAKCAQEAylgjuph/zgi1A2KS0Iw/73zdom449Gw+DATApL1sKYUTAVBk +0uDpEZIwfuYEAz6RbzBgzdYw10cmDCDDb0lNTBF4V08bGAXcYeJkKnIHRZprTPs7 +PWAai1jE0H6ph+ThuHghPku7OAeyTvYyt5i0jkU2vgLSPa9wLciCfvwtd6S1gsth +fEl8YsKHiEVE+5h4nLjzp8MIgGBNPhzQvwW8x6bp0whuVzOFRHR1VBeK5rxG0LbC +VU3Q5oPVSLuRTkjQ6vNtm/qZPTw2mALjpRUrNxbA453aE33foJHb3gF85bSt67F7 +glFww5sqGtxTiju8t8gNy7UV0ROlkoC7o1pMswIDAQABAoIBADZ5vETEQcRKe9FJ +fJVA7QWg7FqKqjLD4YCC1wqDJNeYyCEWb86GVrkwTnYbnwDwm17/+0/vVn7e3NNv +Dq6rYXAVU/zNg1HYYhjIRodW47ZNeI3lJXHEqeDSKUqojyPS7yIm1WxcHy9agxrX +FZhwOEwFPlOxlsCcturcjKV7ZxJKftiWoyPodQLjlEmNoD/MQ6Obuge1dQZRLwCk +/R+EcTWHN4A+rpnZLoKFEaw5p7DTjdKSGOu+EFB+lrEg5kTOCN/kR0PYGnDH1Ygd +6/DmP0xiPpT2pKudTtI7f+QoPtff+GJ47Xy1oYks/cXUJiJbtCT9wyKQtR5mZRUc +ruNWBCECgYEA9e87HbUaMA4tAqaur684RTFAqpDjDBB8tDAxbnuQrv6947odgQHu +YcBAneL2HIvUMuusI0X52nGRwt+qOSXiS1WQwA1P44qR28VYxLIkgK1xMEpezClU +xIavMzwZtmjCZ84Q6H/qvVuqa5MuE4pe6O9vnb4cUWF280ngmf+zViUCgYEA0qAx +qzh6cUBSF6PAV+7QKXB4YLfvLloX3qwC+qkdaGjacREb7URxTKs1lHLhpmHwoPN+ +aXccxNs443Z67AK68N2RAOVw3z1IPTmSUzL7HCKqzZtRXsj+Lm8bj9sRzvWuE7RU +X2QW+9ppAvjwwrhG0vXCs3yua2usMyHjr6ekw/cCgYBSut0qCyf6Dmq5v5R36PuG +2yCjwAWAo3Mvsh6OyeZL18nM92jBYwLrwx55fkXIKImDb6ACZaG9CAM+iLrcapAL +Q4dj85ZyNsUGJwbLdBmvZ6jx07K7/xNS4PPCym7j2625+anabF1swY88jNAtJpjy +xsjHSZKBFcZL5Qg3BbswOQKBgHigD/IMRWtot9scCAMUHRkudXKGxK9aH4OCJa6i +fdoW+st4TfMjmHOdNfFPndWpD6NN8B68fbhsCHeUmi9iHOfnLK1DudHQCfguaZPG +hbOGUyWvhvluyMuVDEbl4pwRbeGRDCUZcGRKoIt4QIJ0APO+lgQvKsEQiC08gmZN +73nfAoGAKXVVV7dN59gohMTRWsOSGP+YLEj8+rGZZYNKCLVTol0VQ7T30tA0P4Cf +Dw9oLKGnDdgTtJA6Fsms858B6ANC+6Hxd9LG0ecOevKMBFHuWPm56Z0ofDzoPVBW +eDuHeR5xF0xq5PIFl/mIJJ1NK0p1Do9gwqEEIftdNyrcGefGdXk= +-----END RSA PRIVATE KEY----- diff --git a/pkg/promotel/promotel_test.go b/pkg/promotel/promotel_test.go new file mode 100644 index 000000000..037a51a4e --- /dev/null +++ b/pkg/promotel/promotel_test.go @@ -0,0 +1,249 @@ +package promotel_test + +import ( + "bytes" + "context" + "encoding/binary" + "fmt" + "math/rand" + "strings" + "sync" + "testing" + "time" + + "github.com/gogo/protobuf/proto" + "github.com/stretchr/testify/require" + + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/metadata" + "github.com/prometheus/prometheus/storage" + + "github.com/smartcontractkit/chainlink-common/pkg/promotel/prometheusreceiver/scrape" +) + +// TestScrapeLoopScrapeAndReport exercises scrapeAndReport with various scenarios +// (successful scrape, failed scrape, forced error, empty body leading to staleness, etc.). +func TestScrapeLoopScrapeAndReport(t *testing.T) { + appendable := &collectResultAppendable{&testAppender{}} + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + reg := prometheus.NewRegistry() + sl, err := scrape.NewGathererLoop(ctx, nil, appendable, reg, reg, 10*time.Millisecond) + require.NoError(t, err) + + start := time.Now() + sl.ScrapeAndReport(time.Time{}, start, nil) + // The collectResultAppender holds all appended samples. Check the last appended + // for staleness or actual data, depending on if the scrape was declared OK. + allSamples := appendable.resultFloats + // We expect at least one normal sample plus the reported samples. + require.NotEmpty(t, allSamples, "Expected to see appended samples.") + + // reset the appender + appendable.testAppender = &testAppender{} + // create counter metric + counter := prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "metric_a", + Help: "metric_a help", + }, []string{"label_a"}) + reg.MustRegister(counter) + counter.WithLabelValues("value_a").Add(42) + + mfs, err := reg.Gather() + require.NoError(t, err) + // verify that metric_a is present in Gatherer results + var foundMetric bool + for _, mf := range mfs { + if mf.GetName() == "metric_a" { + // verify metrics value + require.Len(t, mf.GetMetric(), 1) + require.Equal(t, "value_a", mf.GetMetric()[0].GetLabel()[0].GetValue()) + require.Equal(t, 42.0, mf.GetMetric()[0].GetCounter().GetValue()) + foundMetric = true + break + } + } + require.True(t, foundMetric, "Expected to see the 'metric_a' counter metric.") + + sl.ScrapeAndReport(time.Time{}, start, nil) + // Get all appended samples + allSamples = appendable.resultFloats + // verify that the counter metric 'metric_a' was reported + var found bool + for _, s := range allSamples { + if s.metric.Get("__name__") == "metric_a" && s.metric.Get("label_a") == "value_a" { + found = true + require.Equal(t, 42.0, s.f) + } + } + require.True(t, found, "Expected to see the 'metric_a' counter metric.") +} + +type floatSample struct { + metric labels.Labels + t int64 + f float64 +} + +type histogramSample struct { + t int64 + h *histogram.Histogram + fh *histogram.FloatHistogram +} + +type collectResultAppendable struct { + *testAppender +} + +func (a *collectResultAppendable) Appender(_ context.Context) storage.Appender { + return a +} + +// testAppender records all samples that were added through the appender. +// It can be used as its zero value or be backed by another appender it writes samples through. +type testAppender struct { + mtx sync.Mutex + + next storage.Appender + resultFloats []floatSample + pendingFloats []floatSample + rolledbackFloats []floatSample + resultHistograms []histogramSample + pendingHistograms []histogramSample + rolledbackHistograms []histogramSample + resultExemplars []exemplar.Exemplar + pendingExemplars []exemplar.Exemplar + resultMetadata []metadata.Metadata + pendingMetadata []metadata.Metadata +} + +func (a *testAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64, v float64) (storage.SeriesRef, error) { + a.mtx.Lock() + defer a.mtx.Unlock() + a.pendingFloats = append(a.pendingFloats, floatSample{ + metric: lset, + t: t, + f: v, + }) + + if ref == 0 { + ref = storage.SeriesRef(rand.Uint64()) + } + if a.next == nil { + return ref, nil + } + + ref, err := a.next.Append(ref, lset, t, v) + if err != nil { + return 0, err + } + return ref, err +} + +func (a *testAppender) AppendExemplar(ref storage.SeriesRef, l labels.Labels, e exemplar.Exemplar) (storage.SeriesRef, error) { + a.mtx.Lock() + defer a.mtx.Unlock() + a.pendingExemplars = append(a.pendingExemplars, e) + if a.next == nil { + return 0, nil + } + + return a.next.AppendExemplar(ref, l, e) +} + +func (a *testAppender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + a.mtx.Lock() + defer a.mtx.Unlock() + a.pendingHistograms = append(a.pendingHistograms, histogramSample{h: h, fh: fh, t: t}) + if a.next == nil { + return 0, nil + } + + return a.next.AppendHistogram(ref, l, t, h, fh) +} + +func (a *testAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels, m metadata.Metadata) (storage.SeriesRef, error) { + a.mtx.Lock() + defer a.mtx.Unlock() + a.pendingMetadata = append(a.pendingMetadata, m) + if ref == 0 { + ref = storage.SeriesRef(rand.Uint64()) + } + if a.next == nil { + return ref, nil + } + + return a.next.UpdateMetadata(ref, l, m) +} + +func (a *testAppender) AppendCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64) (storage.SeriesRef, error) { + return a.Append(ref, l, ct, 0.0) +} + +func (a *testAppender) Commit() error { + a.mtx.Lock() + defer a.mtx.Unlock() + a.resultFloats = append(a.resultFloats, a.pendingFloats...) + a.resultExemplars = append(a.resultExemplars, a.pendingExemplars...) + a.resultHistograms = append(a.resultHistograms, a.pendingHistograms...) + a.resultMetadata = append(a.resultMetadata, a.pendingMetadata...) + a.pendingFloats = nil + a.pendingExemplars = nil + a.pendingHistograms = nil + a.pendingMetadata = nil + if a.next == nil { + return nil + } + return a.next.Commit() +} + +func (a *testAppender) Rollback() error { + a.mtx.Lock() + defer a.mtx.Unlock() + a.rolledbackFloats = a.pendingFloats + a.rolledbackHistograms = a.pendingHistograms + a.pendingFloats = nil + a.pendingHistograms = nil + if a.next == nil { + return nil + } + return a.next.Rollback() +} + +func (a *testAppender) String() string { + var sb strings.Builder + for _, s := range a.resultFloats { + sb.WriteString(fmt.Sprintf("committed: %s %f %d\n", s.metric, s.f, s.t)) + } + for _, s := range a.pendingFloats { + sb.WriteString(fmt.Sprintf("pending: %s %f %d\n", s.metric, s.f, s.t)) + } + for _, s := range a.rolledbackFloats { + sb.WriteString(fmt.Sprintf("rolledback: %s %f %d\n", s.metric, s.f, s.t)) + } + return sb.String() +} + +// protoMarshalDelimited marshals a MetricFamily into a delimited +// Prometheus proto exposition format bytes (known as 'encoding=delimited`) +// +// See also https://eli.thegreenplace.net/2011/08/02/length-prefix-framing-for-protocol-buffers +func protoMarshalDelimited(t *testing.T, mf *dto.MetricFamily) []byte { + t.Helper() + + protoBuf, err := proto.Marshal(mf) + require.NoError(t, err) + + varintBuf := make([]byte, binary.MaxVarintLen32) + varintLength := binary.PutUvarint(varintBuf, uint64(len(protoBuf))) + + buf := &bytes.Buffer{} + buf.Write(varintBuf[:varintLength]) + buf.Write(protoBuf) + return buf.Bytes() +} diff --git a/pkg/promotel/receiver.go b/pkg/promotel/receiver.go index 99a87eee9..68f34bbff 100644 --- a/pkg/promotel/receiver.go +++ b/pkg/promotel/receiver.go @@ -3,9 +3,7 @@ package promotel import ( "context" - "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/prometheus/scrape" "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/consumer" "go.opentelemetry.io/collector/pdata/pmetric" @@ -13,6 +11,8 @@ import ( "go.uber.org/zap" "github.com/smartcontractkit/chainlink-common/pkg/promotel/internal" + "github.com/smartcontractkit/chainlink-common/pkg/promotel/prometheusreceiver" + "github.com/smartcontractkit/chainlink-common/pkg/promotel/prometheusreceiver/scrape" ) type Runnable interface { diff --git a/pkg/promotel/receiver_test.go b/pkg/promotel/receiver_test.go index f489d29ec..04f8724e4 100644 --- a/pkg/promotel/receiver_test.go +++ b/pkg/promotel/receiver_test.go @@ -5,7 +5,6 @@ import ( "path/filepath" "testing" - "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver" "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/component/componenttest" @@ -14,6 +13,8 @@ import ( "go.opentelemetry.io/collector/receiver/receivertest" "github.com/smartcontractkit/chainlink-common/pkg/promotel" + + "github.com/smartcontractkit/chainlink-common/pkg/promotel/prometheusreceiver" ) // TestPrometheusReceiver verifies the initialization, startup, and shutdown