diff --git a/.github/renovate.json5 b/.github/renovate.json5 index 218dc45856db6..362b8537f8f23 100644 --- a/.github/renovate.json5 +++ b/.github/renovate.json5 @@ -8,18 +8,16 @@ ], "prHourlyLimit": 4, "baseBranches": [ - "main" + "main", + "release-3.3.x", // Update when a new release is out, 2 minors, 1 major. + "release-3.2.x", // Also ensure to update the 'packageRules' section to match + "release-2.9.x" ], "packageRules": [ { - "matchBaseBranches": [ - "release-2.9.x", - "release-2.8.x" - ], - "enabled": false, - "matchPackageNames": [ - "*" - ] + // Disable updates for all branches - we only want security updates + "matchBaseBranches": ["release-3.3.x", "release-3.2.x", "release-2.9.x"], + "enabled": false }, { // Disable Go version updates @@ -60,12 +58,20 @@ "automerge": false }, { - // Enable all other updates + // Enable all other updates, and auto-merge minor and patch updates "matchFileNames": ["!operator/go.mod", "!operator/api/loki/go.mod"], "groupName": "{{packageName}}", "enabled": true, - "matchUpdateTypes": ["major", "minor", "patch"], - // After we have tested the above configuration, we can enable the following + "matchUpdateTypes": ["minor", "patch"], + "automerge": true, + "autoApprove": true + }, + { + // Enable all other updates, don't auto-merge major updates + "matchFileNames": ["!operator/go.mod", "!operator/api/loki/go.mod"], + "groupName": "{{packageName}}", + "enabled": true, + "matchUpdateTypes": ["major"], "automerge": false, "autoApprove": false } @@ -77,7 +83,9 @@ "enabled": true, "addLabels": [ "area/security" - ] + ], + "automerge": true, + "autoApprove": true }, "osvVulnerabilityAlerts": true, "prConcurrentLimit": 10, diff --git a/.github/workflows/helm-diff-ci.yml b/.github/workflows/helm-diff-ci.yml index 64e966140cbe1..2bacfd2d25dd6 100644 --- a/.github/workflows/helm-diff-ci.yml +++ b/.github/workflows/helm-diff-ci.yml @@ -3,8 +3,9 @@ name: Helm Loki Diff CI on: pull_request: paths: - - 'production/helm/loki/**' + - "production/helm/loki/**" +# These permissions are needed to assume roles from Github's OIDC. permissions: contents: read pull-requests: write @@ -273,6 +274,7 @@ jobs: summary-diff-outputs: name: Summary Diffs runs-on: ubuntu-latest + if: github.event.pull_request.head.repo.fork == false needs: - single-binary-diff - default-values-diff @@ -283,6 +285,8 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v4 + with: + persist-credentials: false - uses: actions/download-artifact@v4 with: diff --git a/clients/cmd/logstash/Dockerfile b/clients/cmd/logstash/Dockerfile index 3fe7db6fe2b47..2c36610631345 100644 --- a/clients/cmd/logstash/Dockerfile +++ b/clients/cmd/logstash/Dockerfile @@ -1,4 +1,4 @@ -FROM logstash:8.17.0 +FROM logstash:8.17.1 USER logstash ENV PATH /usr/share/logstash/vendor/jruby/bin:/usr/share/logstash/vendor/bundle/jruby/2.5.0/bin:/usr/share/logstash/jdk/bin:$PATH diff --git a/cmd/logql-analyzer/main.go b/cmd/logql-analyzer/main.go index beed1226709d4..6b4ceb8a53ca6 100644 --- a/cmd/logql-analyzer/main.go +++ b/cmd/logql-analyzer/main.go @@ -11,7 +11,6 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/grafana/loki/v3/pkg/logqlanalyzer" - "github.com/grafana/loki/v3/pkg/sizing" util_log "github.com/grafana/loki/v3/pkg/util/log" ) @@ -48,12 +47,6 @@ func createServer(cfg server.Config, logger log.Logger) (*server.Server, error) s.HTTP.Use(logqlanalyzer.CorsMiddleware()) s.HTTP.Handle("/api/logql-analyze", &logqlanalyzer.LogQLAnalyzeHandler{}).Methods(http.MethodPost, http.MethodOptions) - sizingHandler := sizing.NewHandler(log.With(logger, "component", "sizing")) - - s.HTTP.Handle("/api/sizing/helm", http.HandlerFunc(sizingHandler.GenerateHelmValues)).Methods(http.MethodGet, http.MethodOptions) - s.HTTP.Handle("/api/sizing/nodes", http.HandlerFunc(sizingHandler.Nodes)).Methods(http.MethodGet, http.MethodOptions) - s.HTTP.Handle("/api/sizing/cluster", http.HandlerFunc(sizingHandler.Cluster)).Methods(http.MethodGet, http.MethodOptions) - s.HTTP.HandleFunc("/ready", func(w http.ResponseWriter, _ *http.Request) { http.Error(w, "ready", http.StatusOK) }).Methods(http.MethodGet) diff --git a/cmd/querytee/main.go b/cmd/querytee/main.go index 5acebfed85179..92f4187bb1383 100644 --- a/cmd/querytee/main.go +++ b/cmd/querytee/main.go @@ -3,6 +3,7 @@ package main import ( "flag" "os" + "time" "github.com/go-kit/log/level" "github.com/grafana/dskit/log" @@ -62,6 +63,7 @@ func lokiReadRoutes(cfg Config) []querytee.Route { Tolerance: cfg.ProxyConfig.ValueComparisonTolerance, UseRelativeError: cfg.ProxyConfig.UseRelativeError, SkipRecentSamples: cfg.ProxyConfig.SkipRecentSamples, + SkipSamplesBefore: time.Time(cfg.ProxyConfig.SkipSamplesBefore), }) return []querytee.Route{ diff --git a/docs/sources/operations/storage/wal.md b/docs/sources/operations/storage/wal.md index b9824ad425ff6..81573018c4c1d 100644 --- a/docs/sources/operations/storage/wal.md +++ b/docs/sources/operations/storage/wal.md @@ -17,18 +17,17 @@ This section will use Kubernetes as a reference deployment paradigm in the examp The Write Ahead Log in Loki takes a few particular tradeoffs compared to other WALs you may be familiar with. The WAL aims to add additional durability guarantees, but _not at the expense of availability_. Particularly, there are two scenarios where the WAL sacrifices these guarantees. -1) Corruption/Deletion of the WAL prior to replaying it +1. Corruption/Deletion of the WAL prior to replaying it -In the event the WAL is corrupted/partially deleted, Loki will not be able to recover all of its data. In this case, Loki will attempt to recover any data it can, but will not prevent Loki from starting. + In the event the WAL is corrupted/partially deleted, Loki will not be able to recover all of its data. In this case, Loki will attempt to recover any data it can, but will not prevent Loki from starting. -You can use the Prometheus metric `loki_ingester_wal_corruptions_total` to track and alert when this happens. + You can use the Prometheus metric `loki_ingester_wal_corruptions_total` to track and alert when this happens. -1) No space left on disk +1. No space left on disk -In the event the underlying WAL disk is full, Loki will not fail incoming writes, but neither will it log them to the WAL. In this case, the persistence guarantees across process restarts will not hold. - -You can use the Prometheus metric `loki_ingester_wal_disk_full_failures_total` to track and alert when this happens. + In the event the underlying WAL disk is full, Loki will not fail incoming writes, but neither will it log them to the WAL. In this case, the persistence guarantees across process restarts will not hold. + You can use the Prometheus metric `loki_ingester_wal_disk_full_failures_total` to track and alert when this happens. ### Backpressure @@ -47,7 +46,7 @@ The following metrics are available for monitoring the WAL: 1. Since ingesters need to have the same persistent volume across restarts/rollout, all the ingesters should be run on [StatefulSet](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/) with fixed volumes. -2. Following flags needs to be set +1. Following flags needs to be set * `--ingester.wal-enabled` to `true` which enables writing to WAL during ingestion. * `--ingester.wal-dir` to the directory where the WAL data should be stored and/or recovered from. Note that this should be on the mounted volume. * `--ingester.checkpoint-duration` to the interval at which checkpoints should be created. @@ -55,10 +54,8 @@ The following metrics are available for monitoring the WAL: ## Changes in lifecycle when WAL is enabled - Flushing of data to chunk store during rollouts or scale down is disabled. This is because during a rollout of statefulset there are no ingesters that are simultaneously leaving and joining, rather the same ingester is shut down and brought back again with updated config. Hence flushing is skipped and the data is recovered from the WAL. If you need to ensure that data is always flushed to the chunk store when your pod shuts down, you can set the `--ingester.flush-on-shutdown` flag to `true`. - ## Disk space requirements Based on tests in real world: @@ -67,7 +64,7 @@ Based on tests in real world: * Checkpoint period was 5mins. * disk utilization on a WAL-only disk was steady at ~10-15GB. -You should not target 100% disk utilisation. +You should not target 100% disk utilization. ## Migrating from stateless deployments @@ -76,17 +73,17 @@ The ingester _Deployment without WAL_ and _StatefulSet with WAL_ should be scale Let's take an example of 4 ingesters. The migration would look something like this: 1. Bring up one stateful ingester `ingester-0` and wait until it's ready (accepting read and write requests). -2. Scale down the old ingester deployment to 3 and wait until the leaving ingester flushes all the data to chunk store. -3. Once that ingester has disappeared from `kc get pods ...`, add another stateful ingester and wait until it's ready. Now you have `ingester-0` and `ingester-1`. -4. Repeat step 2 to reduce remove another ingester from old deployment. -5. Repeat step 3 to add another stateful ingester. Now you have `ingester-0 ingester-1 ingester-2`. -6. Repeat step 4 and 5, and now you will finally have `ingester-0 ingester-1 ingester-2 ingester-3`. +1. Scale down the old ingester deployment to 3 and wait until the leaving ingester flushes all the data to chunk store. +1. Once that ingester has disappeared from `kc get pods ...`, add another stateful ingester and wait until it's ready. Now you have `ingester-0` and `ingester-1`. +1. Repeat step 2 to reduce remove another ingester from old deployment. +1. Repeat step 3 to add another stateful ingester. Now you have `ingester-0 ingester-1 ingester-2`. +1. Repeat step 4 and 5, and now you will finally have `ingester-0 ingester-1 ingester-2 ingester-3`. ## How to scale up/down ### Scale up -Scaling up is same as what you would do without WAL or statefulsets. Nothing to change here. +Scaling up is same as what you would do without WAL or StatefulSets. Nothing to change here. ### Scale down @@ -100,12 +97,11 @@ After hitting the endpoint for `ingester-2 ingester-3`, scale down the ingesters Also you can set the `--ingester.flush-on-shutdown` flag to `true`. This enables chunks to be flushed to long-term storage when the ingester is shut down. - ## Additional notes ### Kubernetes hacking -Statefulsets are significantly more cumbersome to work with, upgrade, and so on. Much of this stems from immutable fields on the specification. For example, if one wants to start using the WAL with single store Loki and wants separate volume mounts for the WAL and the boltdb-shipper, you may see immutability errors when attempting updates the Kubernetes statefulsets. +StatefulSets are significantly more cumbersome to work with, upgrade, and so on. Much of this stems from immutable fields on the specification. For example, if one wants to start using the WAL with single store Loki and wants separate volume mounts for the WAL and the boltdb-shipper, you may see immutability errors when attempting updates the Kubernetes StatefulSets. In this case, try `kubectl -n delete sts ingester --cascade=false`. This will leave the Pods alive but delete the StatefulSet. @@ -115,11 +111,11 @@ Then you may recreate the (updated) StatefulSet and one-by-one start deleting th 1. **StatefulSets for Ordered Scaling Down**: The Loki ingesters should be scaled down one by one, which is efficiently handled by Kubernetes StatefulSets. This ensures an ordered and reliable scaling process, as described in the [Deployment and Scaling Guarantees](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#deployment-and-scaling-guarantees) documentation. -2. **Using PreStop Lifecycle Hook**: During the Pod scaling down process, the PreStop [lifecycle hook](https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/) triggers the `/flush_shutdown` endpoint on the ingester. This action flushes the chunks and removes the ingester from the ring, allowing it to register as unready and become eligible for deletion. +1. **Using PreStop Lifecycle Hook**: During the Pod scaling down process, the PreStop [lifecycle hook](https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/) triggers the `/flush_shutdown` endpoint on the ingester. This action flushes the chunks and removes the ingester from the ring, allowing it to register as unready and become eligible for deletion. -3. **Using terminationGracePeriodSeconds**: Provides time for the ingester to flush its data before being deleted, if flushing data takes more than 30 minutes, you may need to increase it. +1. **Using terminationGracePeriodSeconds**: Provides time for the ingester to flush its data before being deleted, if flushing data takes more than 30 minutes, you may need to increase it. -4. **Cleaning Persistent Volumes**: Persistent volumes are automatically cleaned up by leveraging the [enableStatefulSetAutoDeletePVC](https://kubernetes.io/blog/2021/12/16/kubernetes-1-23-statefulset-pvc-auto-deletion/) feature in Kubernetes. +1. **Cleaning Persistent Volumes**: Persistent volumes are automatically cleaned up by leveraging the [enableStatefulSetAutoDeletePVC](https://kubernetes.io/blog/2021/12/16/kubernetes-1-23-statefulset-pvc-auto-deletion/) feature in Kubernetes. By following the above steps, you can ensure a smooth scaling down process for the Loki ingesters while maintaining data integrity and minimizing potential disruptions. @@ -127,4 +123,4 @@ By following the above steps, you can ensure a smooth scaling down process for t * When the ingester restarts for any reason (upgrade, crash, etc), it should be able to attach to the same volume in order to recover back the WAL and tokens. * 2 ingesters should not be working with the same volume/directory for the WAL. -* A rollout should bring down an ingester completely and then start the new ingester, not the other way around. \ No newline at end of file +* A rollout should bring down an ingester completely and then start the new ingester, not the other way around. diff --git a/docs/sources/send-data/alloy/_index.md b/docs/sources/send-data/alloy/_index.md index daad0d6906db7..21ad0714320ec 100644 --- a/docs/sources/send-data/alloy/_index.md +++ b/docs/sources/send-data/alloy/_index.md @@ -2,7 +2,7 @@ title: Ingesting logs to Loki using Alloy menuTitle: Grafana Alloy description: Configuring Grafana Alloy to send logs to Loki. -weight: 250 +weight: 100 --- diff --git a/docs/sources/send-data/otel/_index.md b/docs/sources/send-data/otel/_index.md index 88d099a4c3165..ba082c30f0699 100644 --- a/docs/sources/send-data/otel/_index.md +++ b/docs/sources/send-data/otel/_index.md @@ -1,10 +1,10 @@ --- title: Ingesting logs to Loki using OpenTelemetry Collector -menuTitle: OTel Collector +menuTitle: OpenTelemetry description: Configuring the OpenTelemetry Collector to send logs to Loki. aliases: - ../clients/k6/ -weight: 250 +weight: 200 --- # Ingesting logs to Loki using OpenTelemetry Collector diff --git a/docs/sources/send-data/promtail/_index.md b/docs/sources/send-data/promtail/_index.md index bfd4a54b733cb..05eec7dc43d38 100644 --- a/docs/sources/send-data/promtail/_index.md +++ b/docs/sources/send-data/promtail/_index.md @@ -4,7 +4,7 @@ menuTitle: Promtail description: How to use the Promtail agent to ship logs to Loki aliases: - ../clients/promtail/ # /docs/loki/latest/clients/promtail/ -weight: 200 +weight: 300 --- # Promtail agent diff --git a/docs/sources/send-data/promtail/cloud/eks/values.yaml b/docs/sources/send-data/promtail/cloud/eks/values.yaml index c2f26f4de1ec8..a2c060c4b0171 100644 --- a/docs/sources/send-data/promtail/cloud/eks/values.yaml +++ b/docs/sources/send-data/promtail/cloud/eks/values.yaml @@ -17,7 +17,7 @@ initContainer: image: repository: grafana/promtail - tag: 3.3.1 + tag: 3.3.2 pullPolicy: IfNotPresent ## Optionally specify an array of imagePullSecrets. ## Secrets must be manually created in the namespace. diff --git a/docs/sources/setup/size/_index.md b/docs/sources/setup/size/_index.md index 162748eb9e3b8..eab4648e5ae7a 100644 --- a/docs/sources/setup/size/_index.md +++ b/docs/sources/setup/size/_index.md @@ -1,7 +1,4 @@ --- -_build: - list: false -noindex: true title: Size the cluster menuTitle: Size the cluster description: Provides a tool that generates a Helm Chart values.yaml file based on expected ingestion, retention rate, and node type, to help size your Grafana deployment. @@ -17,162 +14,58 @@ weight: 100 -This tool helps to generate a Helm Charts `values.yaml` file based on specified - expected ingestion, retention rate and node type. It will always configure a - [scalable]({{< relref "../../get-started/deployment-modes#simple-scalable" >}}) deployment. The storage needs to be configured after generation. - -
- - - -
- - GB/day -
- -
- - days -
- -
- - -
- -
- - - - - - - - - - - - - - - -
Read ReplicasWrite ReplicasNodesCoresMemory
{{ clusterSize.TotalReadReplicas }}{{ clusterSize.TotalWriteReplicas }}{{ clusterSize.TotalNodes}}{{ clusterSize.TotalCoresRequest}}{{ clusterSize.TotalMemoryRequest}} GB
-
- - Generate and download values file - -
- - Defines the log volume in gigabytes, ie 1e+9 bytes, expected to be ingested each day. - - - Defines the node type of the Kubernetes cluster. Is a vendor or type - missing? If so, add it to pkg/sizing/node.go. - - - Defines how long the ingested logs should be kept. - - - Defines the expected query performance. Basic is sized for a max query throughput of around 3GB/s. Super aims for 25% more throughput. - -
-
- - - - - +This section is a guide to size base resource needs of a Loki cluster. + +Based on the expected ingestion volume, Loki clusters can be categorised into three tiers. Recommendations below are based on p90 resource utilisations of the relevant components. Each tab represents a different tier. +Please use this document as a rough guide to specify CPU and Memory requests in your deployment. This is only documented for [microservices/distributed](https://grafana.com/docs/loki//get-started/deployment-modes/#microservices-mode) mode at this time. + +Query resource needs can greatly vary with usage patterns and correct configurations. General notes on Query Performance: +- The rule of thumb is to run as small and as many queriers as possible. Unoptimised queries can easily require 10x of the suggested querier resources below in all tiers. Running horizontal autoscaling will be most cost effective solution to meet the demand. +- Use this [blog post](https://grafana.com/blog/2023/12/28/the-concise-guide-to-loki-how-to-get-the-most-out-of-your-query-performance/) to adopt best practices for optimised query performance. +- Parallel-querier and related components can be sized the same along with queriers to start, depending on how much Loki rules are used. +- Large Loki clusters benefit from a disk based caching solution, memcached-extstore. Please see the detailed [blog post](https://grafana.com/blog/2023/08/23/how-we-scaled-grafana-cloud-logs-memcached-cluster-to-50tb-and-improved-reliability/) and read more about [memcached/nvm-caching here](https://memcached.org/blog/nvm-caching/). +- If you’re running a cluster that handles less than 30TB/day (~1PB/month) ingestion, we do not recommend configuring memcached-extstore. The additional operational complexity does not justify the savings. + + +{{< tabs >}} +{{< tab-content name="Less than 100TB/month (3TB/day)" >}} +| Component | CPU Request | Memory Request (Gi)| Base Replicas | Total CPU Req |Total Mem Req (Gi)| +|------------------|-------------|-------------------|----------------|----------------|-----------------| +| Ingester | 2 | 4 | 6 | 12 | 36 | +| Distributor | 2 | 0.5 | 4 | 8 | 2 | +| Index gateway | 0.5 | 2 | 4 | 2 | 8 | +| Querier | 1 | 1 | 10 | 10 | 10 | +| Query-frontend | 1 | 2 | 2 | 2 | 4 | +| Query-scheduler | 1 | 0.5 | 2 | 2 | 1 | +| Compactor | 2 | 10 | 1 (Singleton) | 2 | 10 | +{{< /tab-content >}} +{{< tab-content name="100TB to 1PB /month (3-30TB/day)" >}} +| Component | CPU Request | Memory Request (Gi)| Base Replicas | Total CPU Req |Total Mem Req (Gi)| +|------------------|-------------|-------------------|----------------|----------------|-----------------| +| Ingester | 2 | 6 | 90 | 180 | 540 | +| Distributor | 2 | 1 | 40 | 80 | 40 | +| Index gateway | 0.5 | 4 | 10 | 5 | 40 | +| Querier | 1.5 | 2 | 100 | 150 | 200 | +| Query-frontend | 1 | 2 | 8 | 8 | 16 | +| Query-scheduler | 1 | 0.5 | 2 | 2 | 1 | +| Compactor | 6 | 20 | 1 (Singleton) | 6 | 20 | +{{< /tab-content >}} +{{< tab-content name="~1PB/month (30TB/day)" >}} +| Component | CPU Request | Memory Request (Gi)| Base Replicas | Total CPU Req |Total Mem Req (Gi)| +|------------------|-------------|-------------------|----------------|----------------|-----------------| +| Ingester | 4 | 8 | 150 | 600 | 1200 | +| Distributor | 2 | 1 | 100 | 200 | 100 | +| Index gateway | 1 | 4 | 20 | 20 | 80 | +| Querier | 1.5 | 3 | 250 | 375 | 750 | +| Query-frontend | 1 | 4 | 16 | 16 | 64 | +| Query-scheduler | 2 | 0.5 | 2 | 4 | 1 | +| Compactor | 6 | 40 | 1 (Singleton) | 6 | 40 | +{{< /tab-content >}} +{{< /tabs >}} + +

Instance Types

+ +These are the node types we suggest from various cloud providers. Please see the relevant specifications in the provider's documentation. +- For AWS any General Purpose machine available in your region that belongs to `M6` instance family and above for Intel chips and `T2` machine family and above for ARM chips. +- For GCP any General Purpose machine available in your region that belongs to to `E2` instance family and above. +- For memcached-extstore nodes we suggest storage optimised instances that can has NVMe storage so that the additional disk space is utilized. diff --git a/go.mod b/go.mod index a9efc1e1da6d9..e93c98c4931f1 100644 --- a/go.mod +++ b/go.mod @@ -29,7 +29,7 @@ require ( github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf github.com/cristalhq/hedgedhttp v0.9.1 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc - github.com/docker/docker v27.5.0+incompatible + github.com/docker/docker v27.5.1+incompatible github.com/docker/go-plugins-helpers v0.0.0-20240701071450-45e2431495c8 github.com/drone/envsubst v1.0.3 github.com/dustin/go-humanize v1.0.1 @@ -68,7 +68,7 @@ require ( github.com/klauspost/pgzip v1.2.6 github.com/leodido/go-syslog/v4 v4.2.0 github.com/mattn/go-ieproxy v0.0.12 - github.com/minio/minio-go/v7 v7.0.83 + github.com/minio/minio-go/v7 v7.0.84 github.com/mitchellh/go-wordwrap v1.0.1 github.com/mitchellh/mapstructure v1.5.1-0.20220423185008-bf980b35cac4 github.com/modern-go/reflect2 v1.0.2 @@ -136,14 +136,14 @@ require ( github.com/schollz/progressbar/v3 v3.18.0 github.com/shirou/gopsutil/v4 v4.24.12 github.com/thanos-io/objstore v0.0.0-20250115091151-a54d0f04b42a - github.com/twmb/franz-go v1.18.0 - github.com/twmb/franz-go/pkg/kadm v1.14.0 + github.com/twmb/franz-go v1.18.1 + github.com/twmb/franz-go/pkg/kadm v1.15.0 github.com/twmb/franz-go/pkg/kfake v0.0.0-20241015013301-cea7aa5d8037 github.com/twmb/franz-go/pkg/kmsg v1.9.0 github.com/twmb/franz-go/plugin/kotel v1.5.0 github.com/twmb/franz-go/plugin/kprom v1.1.0 github.com/willf/bloom v2.0.3+incompatible - go.opentelemetry.io/collector/pdata v1.23.0 + go.opentelemetry.io/collector/pdata v1.24.0 go4.org/netipx v0.0.0-20230125063823-8449b0a6169f golang.org/x/oauth2 v0.25.0 golang.org/x/text v0.21.0 @@ -363,9 +363,9 @@ require ( go.opentelemetry.io/collector/semconv v0.108.1 // indirect go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 // indirect - go.opentelemetry.io/otel v1.33.0 - go.opentelemetry.io/otel/metric v1.33.0 // indirect - go.opentelemetry.io/otel/trace v1.33.0 + go.opentelemetry.io/otel v1.34.0 + go.opentelemetry.io/otel/metric v1.34.0 // indirect + go.opentelemetry.io/otel/trace v1.34.0 go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect golang.org/x/mod v0.22.0 // indirect diff --git a/go.sum b/go.sum index 75981663cd131..995a5191d9218 100644 --- a/go.sum +++ b/go.sum @@ -328,8 +328,8 @@ github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5Qvfr github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= github.com/dlclark/regexp2 v1.11.4 h1:rPYF9/LECdNymJufQKmri9gV604RvvABwgOA8un7yAo= github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= -github.com/docker/docker v27.5.0+incompatible h1:um++2NcQtGRTz5eEgO6aJimo6/JxrTXC941hd05JO6U= -github.com/docker/docker v27.5.0+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= +github.com/docker/docker v27.5.1+incompatible h1:4PYU5dnBYqRQi0294d1FBECqT9ECWeQAIfE8q4YnPY8= +github.com/docker/docker v27.5.1+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c= github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc= github.com/docker/go-metrics v0.0.1 h1:AgB/0SvBxihN0X8OR4SjsblXkbMvalQ8cjmtKQ2rQV8= @@ -848,8 +848,8 @@ github.com/miekg/dns v1.1.62 h1:cN8OuEF1/x5Rq6Np+h1epln8OiyPWV+lROx9LxcGgIQ= github.com/miekg/dns v1.1.62/go.mod h1:mvDlcItzm+br7MToIKqkglaGhlFMHJ9DTNNWONWXbNQ= github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM= -github.com/minio/minio-go/v7 v7.0.83 h1:W4Kokksvlz3OKf3OqIlzDNKd4MERlC2oN8YptwJ0+GA= -github.com/minio/minio-go/v7 v7.0.83/go.mod h1:57YXpvc5l3rjPdhqNrDsvVlY0qPI6UTk1bflAe+9doY= +github.com/minio/minio-go/v7 v7.0.84 h1:D1HVmAF8JF8Bpi6IU4V9vIEj+8pc+xU88EWMs2yed0E= +github.com/minio/minio-go/v7 v7.0.84/go.mod h1:57YXpvc5l3rjPdhqNrDsvVlY0qPI6UTk1bflAe+9doY= github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= github.com/mitchellh/cli v1.1.0/go.mod h1:xcISNoH86gajksDmfB23e/pu+B+GeFRMYmoHXxx3xhI= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= @@ -1134,10 +1134,10 @@ github.com/tklauser/numcpus v0.7.0/go.mod h1:bb6dMVcj8A42tSE7i32fsIUCbQNllK5iDgu github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/ttacon/chalk v0.0.0-20160626202418-22c06c80ed31/go.mod h1:onvgF043R+lC5RZ8IT9rBXDaEDnpnw/Cl+HFiw+v/7Q= github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= -github.com/twmb/franz-go v1.18.0 h1:25FjMZfdozBywVX+5xrWC2W+W76i0xykKjTdEeD2ejw= -github.com/twmb/franz-go v1.18.0/go.mod h1:zXCGy74M0p5FbXsLeASdyvfLFsBvTubVqctIaa5wQ+I= -github.com/twmb/franz-go/pkg/kadm v1.14.0 h1:nAn1co1lXzJQocpzyIyOFOjUBf4WHWs5/fTprXy2IZs= -github.com/twmb/franz-go/pkg/kadm v1.14.0/go.mod h1:XjOPz6ZaXXjrW2jVCfLuucP8H1w2TvD6y3PT2M+aAM4= +github.com/twmb/franz-go v1.18.1 h1:D75xxCDyvTqBSiImFx2lkPduE39jz1vaD7+FNc+vMkc= +github.com/twmb/franz-go v1.18.1/go.mod h1:Uzo77TarcLTUZeLuGq+9lNpSkfZI+JErv7YJhlDjs9M= +github.com/twmb/franz-go/pkg/kadm v1.15.0 h1:Yo3NAPfcsx3Gg9/hdhq4vmwO77TqRRkvpUcGWzjworc= +github.com/twmb/franz-go/pkg/kadm v1.15.0/go.mod h1:MUdcUtnf9ph4SFBLLA/XxE29rvLhWYLM9Ygb8dfSCvw= github.com/twmb/franz-go/pkg/kfake v0.0.0-20241015013301-cea7aa5d8037 h1:M4Zj79q1OdZusy/Q8TOTttvx/oHkDVY7sc0xDyRnwWs= github.com/twmb/franz-go/pkg/kfake v0.0.0-20241015013301-cea7aa5d8037/go.mod h1:nkBI/wGFp7t1NJnnCeJdS4sX5atPAqwCPpDXKuI7SC8= github.com/twmb/franz-go/pkg/kmsg v1.9.0 h1:JojYUph2TKAau6SBtErXpXGC7E3gg4vGZMv9xFU/B6M= @@ -1207,8 +1207,8 @@ go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= -go.opentelemetry.io/collector/pdata v1.23.0 h1:tEk0dkfB8RdSukoOMfEa8duB938gfZowdfRkrJxGDrw= -go.opentelemetry.io/collector/pdata v1.23.0/go.mod h1:I2jggpBMiO8A+7TXhzNpcJZkJtvi1cU0iVNIi+6bc+o= +go.opentelemetry.io/collector/pdata v1.24.0 h1:D6j92eAzmAbQgivNBUnt8r9juOl8ugb+ihYynoFZIEg= +go.opentelemetry.io/collector/pdata v1.24.0/go.mod h1:cf3/W9E/uIvPS4MR26SnMFJhraUCattzzM6qusuONuc= go.opentelemetry.io/collector/semconv v0.108.1 h1:Txk9tauUnamZaxS5vlf1O0uZ4VD6nioRBR0nX8L/fU4= go.opentelemetry.io/collector/semconv v0.108.1/go.mod h1:zCJ5njhWpejR+A40kiEoeFm1xq1uzyZwMnRNX6/D82A= go.opentelemetry.io/contrib/detectors/gcp v1.33.0 h1:FVPoXEoILwgbZUu4X7YSgsESsAmGRgoYcnXkzgQPhP4= @@ -1217,22 +1217,22 @@ go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.5 go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0/go.mod h1:HDBUsEjOuRC0EzKZ1bSaRGZWUBAzo+MhAcUUORSr4D0= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 h1:yd02MEjBdJkG3uabWP9apV+OuWRIXGDuJEUJbOHmCFU= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0/go.mod h1:umTcuxiv1n/s/S6/c2AT/g2CQ7u5C59sHDNmfSwgz7Q= -go.opentelemetry.io/otel v1.33.0 h1:/FerN9bax5LoK51X/sI0SVYrjSE0/yUL7DpxW4K3FWw= -go.opentelemetry.io/otel v1.33.0/go.mod h1:SUUkR6csvUQl+yjReHu5uM3EtVV7MBm5FHKRlNx4I8I= +go.opentelemetry.io/otel v1.34.0 h1:zRLXxLCgL1WyKsPVrgbSdMN4c0FMkDAskSTQP+0hdUY= +go.opentelemetry.io/otel v1.34.0/go.mod h1:OWFPOQ+h4G8xpyjgqo4SxJYdDQ/qmRH+wivy7zzx9oI= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0 h1:dIIDULZJpgdiHz5tXrTgKIMLkus6jEFa7x5SOKcyR7E= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0/go.mod h1:jlRVBe7+Z1wyxFSUs48L6OBQZ5JwH2Hg/Vbl+t9rAgI= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.29.0 h1:JAv0Jwtl01UFiyWZEMiJZBiTlv5A50zNs8lsthXqIio= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.29.0/go.mod h1:QNKLmUEAq2QUbPQUfvw4fmv0bgbK7UlOSFCnXyfvSNc= go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.29.0 h1:WDdP9acbMYjbKIyJUhTvtzj601sVJOqgWdUxSdR/Ysc= go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.29.0/go.mod h1:BLbf7zbNIONBLPwvFnwNHGj4zge8uTCM/UPIVW1Mq2I= -go.opentelemetry.io/otel/metric v1.33.0 h1:r+JOocAyeRVXD8lZpjdQjzMadVZp2M4WmQ+5WtEnklQ= -go.opentelemetry.io/otel/metric v1.33.0/go.mod h1:L9+Fyctbp6HFTddIxClbQkjtubW6O9QS3Ann/M82u6M= +go.opentelemetry.io/otel/metric v1.34.0 h1:+eTR3U0MyfWjRDhmFMxe2SsW64QrZ84AOhvqS7Y+PoQ= +go.opentelemetry.io/otel/metric v1.34.0/go.mod h1:CEDrp0fy2D0MvkXE+dPV7cMi8tWZwX3dmaIhwPOaqHE= go.opentelemetry.io/otel/sdk v1.33.0 h1:iax7M131HuAm9QkZotNHEfstof92xM+N8sr3uHXc2IM= go.opentelemetry.io/otel/sdk v1.33.0/go.mod h1:A1Q5oi7/9XaMlIWzPSxLRWOI8nG3FnzHJNbiENQuihM= go.opentelemetry.io/otel/sdk/metric v1.33.0 h1:Gs5VK9/WUJhNXZgn8MR6ITatvAmKeIuCtNbsP3JkNqU= go.opentelemetry.io/otel/sdk/metric v1.33.0/go.mod h1:dL5ykHZmm1B1nVRk9dDjChwDmt81MjVp3gLkQRwKf/Q= -go.opentelemetry.io/otel/trace v1.33.0 h1:cCJuF7LRjUFso9LPnEAHJDB2pqzp+hbO8eu1qqW2d/s= -go.opentelemetry.io/otel/trace v1.33.0/go.mod h1:uIcdVUZMpTAmz0tI1z04GoVSezK37CbGV4fr1f2nBck= +go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k= +go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE= go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8= go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= diff --git a/pkg/compactor/deletion/delete_request.go b/pkg/compactor/deletion/delete_request.go index 9ce7f381fb105..592ae810b2a98 100644 --- a/pkg/compactor/deletion/delete_request.go +++ b/pkg/compactor/deletion/delete_request.go @@ -4,6 +4,7 @@ import ( "time" "github.com/go-kit/log/level" + "github.com/pkg/errors" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/labels" @@ -160,6 +161,33 @@ func (d *DeleteRequest) IsDeleted(entry retention.ChunkEntry) (bool, filter.Func return true, ff } +func (d *DeleteRequest) IsDuplicate(o *DeleteRequest) (bool, error) { + // we would never have duplicates from same request + if d.RequestID == o.RequestID { + return false, nil + } + if d.UserID != o.UserID || d.StartTime != o.StartTime || d.EndTime != o.EndTime { + return false, nil + } + + if d.logSelectorExpr == nil { + if err := d.SetQuery(d.Query); err != nil { + return false, errors.Wrapf(err, "failed to init log selector expr for request_id=%s, user_id=%s", d.RequestID, d.UserID) + } + } + if o.logSelectorExpr == nil { + if err := o.SetQuery(o.Query); err != nil { + return false, errors.Wrapf(err, "failed to init log selector expr for request_id=%s, user_id=%s", o.RequestID, o.UserID) + } + } + + if d.logSelectorExpr.String() != o.logSelectorExpr.String() { + return false, nil + } + + return true, nil +} + func intervalsOverlap(interval1, interval2 model.Interval) bool { if interval1.Start > interval2.End || interval2.Start > interval1.End { return false diff --git a/pkg/compactor/deletion/delete_request_test.go b/pkg/compactor/deletion/delete_request_test.go index 899e83f802e37..d8b64f2031f2d 100644 --- a/pkg/compactor/deletion/delete_request_test.go +++ b/pkg/compactor/deletion/delete_request_test.go @@ -432,3 +432,144 @@ func TestDeleteRequest_FilterFunction(t *testing.T) { require.Panics(t, func() { testutil.ToFloat64(dr.Metrics.deletedLinesTotal) }) }) } + +func TestDeleteRequest_IsDuplicate(t *testing.T) { + query1 := `{foo="bar", fizz="buzz"} |= "foo"` + query2 := `{foo="bar", fizz="buzz2"} |= "foo"` + + for _, tc := range []struct { + name string + req1, req2 DeleteRequest + expIsDuplicate bool + }{ + { + name: "not duplicate - different user id", + req1: DeleteRequest{ + RequestID: "1", + UserID: user1, + StartTime: now.Add(-12 * time.Hour), + EndTime: now.Add(-10 * time.Hour), + Query: query1, + }, + req2: DeleteRequest{ + RequestID: "1", + UserID: user2, + StartTime: now.Add(-12 * time.Hour), + EndTime: now.Add(-10 * time.Hour), + Query: query1, + }, + expIsDuplicate: false, + }, + { + name: "not duplicate - same request id", + req1: DeleteRequest{ + RequestID: "1", + UserID: user1, + StartTime: now.Add(-12 * time.Hour), + EndTime: now.Add(-10 * time.Hour), + Query: query1, + }, + req2: DeleteRequest{ + RequestID: "1", + UserID: user1, + StartTime: now.Add(-12 * time.Hour), + EndTime: now.Add(-10 * time.Hour), + Query: query1, + }, + expIsDuplicate: false, + }, + { + name: "not duplicate - different start time", + req1: DeleteRequest{ + RequestID: "1", + UserID: user1, + StartTime: now.Add(-12 * time.Hour), + EndTime: now.Add(-10 * time.Hour), + Query: query1, + }, + req2: DeleteRequest{ + RequestID: "2", + UserID: user1, + StartTime: now.Add(-13 * time.Hour), + EndTime: now.Add(-10 * time.Hour), + Query: query1, + }, + }, + { + name: "not duplicate - different end time", + req1: DeleteRequest{ + RequestID: "1", + UserID: user1, + StartTime: now.Add(-12 * time.Hour), + EndTime: now.Add(-10 * time.Hour), + Query: query1, + }, + req2: DeleteRequest{ + RequestID: "2", + UserID: user1, + StartTime: now.Add(-12 * time.Hour), + EndTime: now.Add(-11 * time.Hour), + Query: query1, + }, + }, + { + name: "not duplicate - different labels", + req1: DeleteRequest{ + RequestID: "1", + UserID: user1, + StartTime: now.Add(-12 * time.Hour), + EndTime: now.Add(-10 * time.Hour), + Query: query1, + }, + req2: DeleteRequest{ + RequestID: "2", + UserID: user1, + StartTime: now.Add(-12 * time.Hour), + EndTime: now.Add(-10 * time.Hour), + Query: query2, + }, + }, + { + name: "duplicate - same request", + req1: DeleteRequest{ + RequestID: "1", + UserID: user1, + StartTime: now.Add(-12 * time.Hour), + EndTime: now.Add(-10 * time.Hour), + Query: query1, + }, + req2: DeleteRequest{ + RequestID: "2", + UserID: user1, + StartTime: now.Add(-12 * time.Hour), + EndTime: now.Add(-10 * time.Hour), + Query: query1, + }, + expIsDuplicate: true, + }, + { + name: "duplicate - same request with irregularities in query", + req1: DeleteRequest{ + RequestID: "1", + UserID: user1, + StartTime: now.Add(-12 * time.Hour), + EndTime: now.Add(-10 * time.Hour), + Query: query1, + }, + req2: DeleteRequest{ + RequestID: "2", + UserID: user1, + StartTime: now.Add(-12 * time.Hour), + EndTime: now.Add(-10 * time.Hour), + Query: "{foo=\"bar\", fizz=`buzz`} |= `foo`", + }, + expIsDuplicate: true, + }, + } { + t.Run(tc.name, func(t *testing.T) { + isDuplicate, err := tc.req1.IsDuplicate(&tc.req2) + require.NoError(t, err) + require.Equal(t, tc.expIsDuplicate, isDuplicate) + }) + } +} diff --git a/pkg/compactor/deletion/delete_requests_manager.go b/pkg/compactor/deletion/delete_requests_manager.go index ba99625b2dd96..97b8c73c9f7f6 100644 --- a/pkg/compactor/deletion/delete_requests_manager.go +++ b/pkg/compactor/deletion/delete_requests_manager.go @@ -35,6 +35,7 @@ type DeleteRequestsManager struct { deleteRequestsToProcess map[string]*userDeleteRequests deleteRequestsToProcessMtx sync.Mutex + duplicateRequests []DeleteRequest metrics *deleteRequestsManagerMetrics wg sync.WaitGroup done chan struct{} @@ -92,6 +93,16 @@ func (d *DeleteRequestsManager) updateMetrics() error { oldestPendingRequestCreatedAt := model.Time(0) for _, deleteRequest := range deleteRequests { + // do not consider requests from users whose delete requests should not be processed as per their config + processRequest, err := d.shouldProcessRequest(deleteRequest) + if err != nil { + return err + } + + if !processRequest { + continue + } + // adding an extra minute here to avoid a race between cancellation of request and picking up the request for processing if deleteRequest.Status != StatusReceived || deleteRequest.CreatedAt.Add(d.deleteRequestCancelPeriod).Add(time.Minute).After(model.Now()) { continue @@ -143,6 +154,23 @@ func (d *DeleteRequestsManager) loadDeleteRequestsToProcess() error { continue } } + if ur, ok := d.deleteRequestsToProcess[deleteRequest.UserID]; ok { + for _, requestLoadedForProcessing := range ur.requests { + isDuplicate, err := requestLoadedForProcessing.IsDuplicate(&deleteRequest) + if err != nil { + return err + } + if isDuplicate { + level.Info(util_log.Logger).Log( + "msg", "found duplicate request of one of the requests loaded for processing", + "loaded_request_id", requestLoadedForProcessing.RequestID, + "duplicate_request_id", deleteRequest.RequestID, + "user", deleteRequest.UserID, + ) + d.duplicateRequests = append(d.duplicateRequests, deleteRequest) + } + } + } if reqCount >= d.batchSize { logBatchTruncation(reqCount, len(deleteRequests)) break @@ -356,6 +384,15 @@ func (d *DeleteRequestsManager) MarkPhaseFinished() { d.markRequestAsProcessed(*deleteRequest) } } + + for _, req := range d.duplicateRequests { + level.Info(util_log.Logger).Log("msg", "marking duplicate delete request as processed", + "delete_request_id", req.RequestID, + "sequence_num", req.SequenceNum, + "user", req.UserID, + ) + d.markRequestAsProcessed(req) + } } func (d *DeleteRequestsManager) IntervalMayHaveExpiredChunks(_ model.Interval, userID string) bool { diff --git a/pkg/compactor/deletion/delete_requests_manager_test.go b/pkg/compactor/deletion/delete_requests_manager_test.go index 6eabf2de38799..baa7b60b312db 100644 --- a/pkg/compactor/deletion/delete_requests_manager_test.go +++ b/pkg/compactor/deletion/delete_requests_manager_test.go @@ -48,6 +48,7 @@ func TestDeleteRequestsManager_Expired(t *testing.T) { expectedResp resp expectedDeletionRangeByUser map[string]model.Interval expectedRequestsMarkedAsProcessed []int + expectedDuplicateRequestsCount int }{ { name: "no delete requests", @@ -895,6 +896,43 @@ func TestDeleteRequestsManager_Expired(t *testing.T) { }, expectedRequestsMarkedAsProcessed: []int{0, 1}, }, + { + name: "duplicate delete request marked as processed with loaded request", + deletionMode: deletionmode.FilterAndDelete, + batchSize: 1, + deleteRequestsFromStore: []DeleteRequest{ + { + RequestID: "1", + UserID: testUserID, + Query: streamSelectorWithLineFilters, + StartTime: now.Add(-24 * time.Hour), + EndTime: now, + Status: StatusReceived, + }, + { + RequestID: "2", + UserID: testUserID, + Query: streamSelectorWithLineFilters, + StartTime: now.Add(-24 * time.Hour), + EndTime: now, + Status: StatusReceived, + }, + }, + expectedResp: resp{ + isExpired: true, + expectedFilter: func(_ time.Time, s string, _ ...labels.Label) bool { + return strings.Contains(s, "fizz") + }, + }, + expectedDeletionRangeByUser: map[string]model.Interval{ + testUserID: { + Start: now.Add(-24 * time.Hour), + End: now, + }, + }, + expectedRequestsMarkedAsProcessed: []int{0, 1}, + expectedDuplicateRequestsCount: 1, + }, } { t.Run(tc.name, func(t *testing.T) { mockDeleteRequestsStore := &mockDeleteRequestsStore{deleteRequests: tc.deleteRequestsFromStore} @@ -947,6 +985,7 @@ func TestDeleteRequestsManager_Expired(t *testing.T) { for i, reqIdx := range tc.expectedRequestsMarkedAsProcessed { require.True(t, requestsAreEqual(tc.deleteRequestsFromStore[reqIdx], processedRequests[i])) } + require.Len(t, mgr.duplicateRequests, tc.expectedDuplicateRequestsCount) }) } } diff --git a/pkg/dataobj/dataobj.go b/pkg/dataobj/dataobj.go index 9964fde324639..1d3307842384d 100644 --- a/pkg/dataobj/dataobj.go +++ b/pkg/dataobj/dataobj.go @@ -47,16 +47,29 @@ type BuilderConfig struct { // TargetObjectSize configures a target size for data objects. TargetObjectSize flagext.Bytes `yaml:"target_object_size"` + + // TargetSectionSize configures the maximum size of data in a section. Sections + // which support this parameter will place overflow data into new sections of + // the same type. + TargetSectionSize flagext.Bytes `yaml:"target_section_size"` + + // BufferSize configures the size of the buffer used to accumulate + // uncompressed logs in memory prior to sorting. + BufferSize flagext.Bytes `yaml:"buffer_size"` } // RegisterFlagsWithPrefix registers flags with the given prefix. func (cfg *BuilderConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { _ = cfg.TargetPageSize.Set("2MB") _ = cfg.TargetObjectSize.Set("1GB") + _ = cfg.BufferSize.Set("16MB") // Page Size * 8 + _ = cfg.TargetSectionSize.Set("128MB") // Target Object Size / 8 f.IntVar(&cfg.SHAPrefixSize, prefix+"sha-prefix-size", 2, "The size of the SHA prefix to use for the data object builder.") f.Var(&cfg.TargetPageSize, prefix+"target-page-size", "The size of the target page to use for the data object builder.") f.Var(&cfg.TargetObjectSize, prefix+"target-object-size", "The size of the target object to use for the data object builder.") + f.Var(&cfg.TargetSectionSize, prefix+"target-section-size", "Configures a maximum size for sections, for sections that support it.") + f.Var(&cfg.BufferSize, prefix+"buffer-size", "The size of the buffer to use for sorting logs.") } // Validate validates the BuilderConfig. @@ -77,6 +90,14 @@ func (cfg *BuilderConfig) Validate() error { errs = append(errs, errors.New("TargetObjectSize must be greater than 0")) } + if cfg.BufferSize <= 0 { + errs = append(errs, errors.New("BufferSize must be greater than 0")) + } + + if cfg.TargetSectionSize <= 0 || cfg.TargetSectionSize > cfg.TargetObjectSize { + errs = append(errs, errors.New("SectionSize must be greater than 0 and less than or equal to TargetObjectSize")) + } + return errors.Join(errs...) } @@ -148,7 +169,11 @@ func NewBuilder(cfg BuilderConfig, bucket objstore.Bucket, tenantID string) (*Bu labelCache: labelCache, streams: streams.New(metrics.streams, int(cfg.TargetPageSize)), - logs: logs.New(metrics.logs, int(cfg.TargetPageSize)), + logs: logs.New(metrics.logs, logs.Options{ + PageSizeHint: int(cfg.TargetPageSize), + BufferSize: int(cfg.BufferSize), + SectionSize: int(cfg.TargetSectionSize), + }), flushBuffer: flushBuffer, encoder: encoder, @@ -303,6 +328,8 @@ func (b *Builder) buildObject() error { return fmt.Errorf("encoding object: %w", err) } + b.metrics.builtSize.Observe(float64(b.flushBuffer.Len())) + // We pass context.Background() below to avoid allowing building an object to // time out; timing out on build would discard anything we built and would // cause data loss. diff --git a/pkg/dataobj/dataobj_test.go b/pkg/dataobj/dataobj_test.go index e367eca08188f..6c75d722ae5c8 100644 --- a/pkg/dataobj/dataobj_test.go +++ b/pkg/dataobj/dataobj_test.go @@ -19,6 +19,16 @@ import ( "github.com/grafana/loki/v3/pkg/logql/syntax" ) +var testBuilderConfig = BuilderConfig{ + SHAPrefixSize: 2, + + TargetPageSize: 2048, + TargetObjectSize: 4096, + TargetSectionSize: 4096, + + BufferSize: 2048 * 8, +} + func Test(t *testing.T) { bucket := objstore.NewInMemBucket() @@ -67,16 +77,7 @@ func Test(t *testing.T) { } t.Run("Build", func(t *testing.T) { - // Create a tiny builder which flushes a lot of objects and pages to properly - // test the builder. - builderConfig := BuilderConfig{ - SHAPrefixSize: 2, - - TargetPageSize: 1_500_000, - TargetObjectSize: 10_000_000, - } - - builder, err := NewBuilder(builderConfig, bucket, "fake") + builder, err := NewBuilder(testBuilderConfig, bucket, "fake") require.NoError(t, err) for _, entry := range streams { @@ -94,10 +95,7 @@ func Test(t *testing.T) { actual, err := result.Collect(reader.Streams(context.Background(), objects[0])) require.NoError(t, err) - - // TODO(rfratto): reenable once sorting is reintroduced. - _ = actual - // require.Equal(t, sortStreams(t, streams), actual) + require.Equal(t, sortStreams(t, streams), actual) }) } @@ -109,16 +107,7 @@ func Test_Builder_Append(t *testing.T) { bucket := objstore.NewInMemBucket() - // Create a tiny builder which flushes a lot of objects and pages to properly - // test the builder. - builderConfig := BuilderConfig{ - SHAPrefixSize: 2, - - TargetPageSize: 2048, - TargetObjectSize: 4096, - } - - builder, err := NewBuilder(builderConfig, bucket, "fake") + builder, err := NewBuilder(testBuilderConfig, bucket, "fake") require.NoError(t, err) for { diff --git a/pkg/dataobj/internal/dataset/column_builder.go b/pkg/dataobj/internal/dataset/column_builder.go index ca67fbdf150f8..0b6833e0abc86 100644 --- a/pkg/dataobj/internal/dataset/column_builder.go +++ b/pkg/dataobj/internal/dataset/column_builder.go @@ -3,6 +3,8 @@ package dataset import ( "fmt" + "github.com/klauspost/compress/zstd" + "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd" ) @@ -21,6 +23,16 @@ type BuilderOptions struct { // Compression is the compression algorithm to use for values. Compression datasetmd.CompressionType + + // CompressionOptions holds optional configuration for compression. + CompressionOptions CompressionOptions +} + +// CompressionOptions customizes the compressor used when building pages. +type CompressionOptions struct { + // Zstd holds encoding options for Zstd compression. Only used for + // [datasetmd.COMPRESSION_TYPE_ZSTD]. + Zstd []zstd.EOption } // A ColumnBuilder builds a sequence of [Value] entries of a common type into a diff --git a/pkg/dataobj/internal/dataset/dataset_iter.go b/pkg/dataobj/internal/dataset/dataset_iter.go index d223b87d1abef..34fda49a23b09 100644 --- a/pkg/dataobj/internal/dataset/dataset_iter.go +++ b/pkg/dataobj/internal/dataset/dataset_iter.go @@ -33,7 +33,6 @@ func Iter(ctx context.Context, columns []Column) result.Seq[Row] { type pullColumnIter struct { Next func() (result.Result[Value], bool) - Stop func() } return result.Iter(func(yield func(Row) bool) error { @@ -47,7 +46,9 @@ func Iter(ctx context.Context, columns []Column) result.Seq[Row] { } next, stop := result.Pull(lazyColumnIter(ctx, col.ColumnInfo(), pages)) - pullColumns = append(pullColumns, pullColumnIter{Next: next, Stop: stop}) + defer stop() + + pullColumns = append(pullColumns, pullColumnIter{Next: next}) } // Start emitting rows; each row is composed of the next value from all of diff --git a/pkg/dataobj/internal/dataset/page.go b/pkg/dataobj/internal/dataset/page.go index 9e8846d88eeed..c0b2653bbc65e 100644 --- a/pkg/dataobj/internal/dataset/page.go +++ b/pkg/dataobj/internal/dataset/page.go @@ -7,11 +7,13 @@ import ( "fmt" "hash/crc32" "io" + "sync" "github.com/golang/snappy" "github.com/klauspost/compress/zstd" "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd" + "github.com/grafana/loki/v3/pkg/dataobj/internal/util/bufpool" ) // Helper types. @@ -88,39 +90,99 @@ func (p *MemPage) reader(compression datasetmd.CompressionType) (presence io.Rea } var ( - bitmapReader = bytes.NewReader(p.Data[n : n+int(bitmapSize)]) - compressedDataReader = bytes.NewReader(p.Data[n+int(bitmapSize):]) + bitmapData = p.Data[n : n+int(bitmapSize)] + compressedValuesData = p.Data[n+int(bitmapSize):] + + bitmapReader = bytes.NewReader(bitmapData) + compressedValuesReader = bytes.NewReader(compressedValuesData) ) switch compression { case datasetmd.COMPRESSION_TYPE_UNSPECIFIED, datasetmd.COMPRESSION_TYPE_NONE: - return bitmapReader, io.NopCloser(compressedDataReader), nil + return bitmapReader, io.NopCloser(compressedValuesReader), nil case datasetmd.COMPRESSION_TYPE_SNAPPY: - sr := snappy.NewReader(compressedDataReader) - return bitmapReader, io.NopCloser(sr), nil + sr := snappyPool.Get().(*snappy.Reader) + sr.Reset(compressedValuesReader) + return bitmapReader, &closerFunc{Reader: sr, onClose: func() error { + snappyPool.Put(sr) + return nil + }}, nil case datasetmd.COMPRESSION_TYPE_ZSTD: - zr, err := zstd.NewReader(compressedDataReader) - if err != nil { - return nil, nil, fmt.Errorf("opening zstd reader: %w", err) - } - return bitmapReader, newZstdReader(zr), nil + zr := &fixedZstdReader{page: p, data: compressedValuesData} + return bitmapReader, zr, nil } panic(fmt.Sprintf("dataset.MemPage.reader: unknown compression type %q", compression.String())) } -// zstdReader implements [io.ReadCloser] for a [zstd.Decoder]. -type zstdReader struct{ *zstd.Decoder } +var snappyPool = sync.Pool{ + New: func() interface{} { + return snappy.NewReader(nil) + }, +} + +type closerFunc struct { + io.Reader + onClose func() error +} + +func (c *closerFunc) Close() error { return c.onClose() } -// newZstdReader returns a new [io.ReadCloser] for a [zstd.Decoder]. -func newZstdReader(dec *zstd.Decoder) io.ReadCloser { - return &zstdReader{Decoder: dec} +// globalZstdDecoder is a shared zstd decoder for [fixedZstdReader]. Concurrent +// uses of globalZstdDecoder are only safe when using [zstd.Decoder.DecodeAll]. +var globalZstdDecoder = func() *zstd.Decoder { + d, err := zstd.NewReader(nil, zstd.WithDecoderConcurrency(1)) + if err != nil { + panic(err) + } + return d +}() + +// fixedZstdReader is an [io.ReadCloser] that decompresses a zstd buffer in a +// single pass. +type fixedZstdReader struct { + page *MemPage + data []byte + + uncompressedBuf *bytes.Buffer + closed bool } -// Close implements [io.Closer]. -func (r *zstdReader) Close() error { - r.Decoder.Close() +func (r *fixedZstdReader) Read(p []byte) (int, error) { + if r.closed { + return 0, io.ErrClosedPipe + } + + if r.uncompressedBuf != nil { + return r.uncompressedBuf.Read(p) + } + + // We decompress the entire buffer in a single pass. While a pooled zstd + // reader would require less memory and would allow us to stream values as we + // decompress, pooling zstd decoders is difficult to do properly, as it + // requires a finalizer to release resources, and the goroutines spawned by + // decoders prevent the finalizer from ever being called. + // + // To make efficient zstd decoding less error prone, we opt for this instead. + r.uncompressedBuf = bufpool.Get(r.page.Info.UncompressedSize) + r.uncompressedBuf.Reset() + + buf, err := globalZstdDecoder.DecodeAll(r.data, r.uncompressedBuf.AvailableBuffer()) + if err != nil { + return 0, fmt.Errorf("decoding zstd: %w", err) + } + _, _ = r.uncompressedBuf.Write(buf) + + return r.uncompressedBuf.Read(p) +} + +func (r *fixedZstdReader) Close() error { + if r.uncompressedBuf != nil { + bufpool.Put(r.uncompressedBuf) + r.uncompressedBuf = nil + } + r.closed = true return nil } diff --git a/pkg/dataobj/internal/dataset/page_builder.go b/pkg/dataobj/internal/dataset/page_builder.go index 4222f3cae20a5..3f16e9e6b2174 100644 --- a/pkg/dataobj/internal/dataset/page_builder.go +++ b/pkg/dataobj/internal/dataset/page_builder.go @@ -56,7 +56,7 @@ func newPageBuilder(opts BuilderOptions) (*pageBuilder, error) { presenceBuffer = bytes.NewBuffer(nil) valuesBuffer = bytes.NewBuffer(make([]byte, 0, opts.PageSizeHint)) - valuesWriter = newCompressWriter(valuesBuffer, opts.Compression) + valuesWriter = newCompressWriter(valuesBuffer, opts.Compression, opts.CompressionOptions) ) presenceEnc := newBitmapEncoder(presenceBuffer) @@ -174,12 +174,18 @@ func (b *pageBuilder) Flush() (*MemPage, error) { return nil, fmt.Errorf("no data to flush") } - // Before we can build the page we need to finish flushing our encoders and writers. + // Before we can build the page we need to finish flushing our encoders and + // writers. + // + // We must call [compressWriter.Close] to ensure that Zstd writers write a + // proper EOF marker, otherwise synchronous decoding can't be used. + // compressWriters can continue to reset and reused after closing, so this is + // safe. if err := b.presenceEnc.Flush(); err != nil { return nil, fmt.Errorf("flushing presence encoder: %w", err) } else if err := b.valuesEnc.Flush(); err != nil { return nil, fmt.Errorf("flushing values encoder: %w", err) - } else if err := b.valuesWriter.Flush(); err != nil { + } else if err := b.valuesWriter.Close(); err != nil { return nil, fmt.Errorf("flushing values writer: %w", err) } diff --git a/pkg/dataobj/internal/dataset/page_compress_writer.go b/pkg/dataobj/internal/dataset/page_compress_writer.go index 3fad4a0edfe0f..a096ceb443449 100644 --- a/pkg/dataobj/internal/dataset/page_compress_writer.go +++ b/pkg/dataobj/internal/dataset/page_compress_writer.go @@ -20,14 +20,16 @@ type compressWriter struct { w io.WriteCloser // Compressing writer. buf *bufio.Writer // Buffered writer in front of w to be able to call WriteByte. + rawBytes int // Number of uncompressed bytes written. + compression datasetmd.CompressionType // Compression type being used. - rawBytes int // Number of uncompressed bytes written. + opts CompressionOptions // Options to customize compression. } var _ streamio.Writer = (*compressWriter)(nil) -func newCompressWriter(w io.Writer, ty datasetmd.CompressionType) *compressWriter { - c := compressWriter{compression: ty} +func newCompressWriter(w io.Writer, ty datasetmd.CompressionType, opts CompressionOptions) *compressWriter { + c := compressWriter{compression: ty, opts: opts} c.Reset(w) return &c } @@ -85,7 +87,7 @@ func (c *compressWriter) Reset(w io.Writer) { compressedWriter = snappy.NewBufferedWriter(w) case datasetmd.COMPRESSION_TYPE_ZSTD: - zw, err := zstd.NewWriter(w, zstd.WithEncoderLevel(zstd.SpeedBestCompression)) + zw, err := zstd.NewWriter(w, c.opts.Zstd...) if err != nil { panic(fmt.Sprintf("compressWriter.Reset: creating zstd writer: %v", err)) } diff --git a/pkg/dataobj/internal/dataset/page_test.go b/pkg/dataobj/internal/dataset/page_test.go index b041285433ec8..869ace87bdc35 100644 --- a/pkg/dataobj/internal/dataset/page_test.go +++ b/pkg/dataobj/internal/dataset/page_test.go @@ -1,6 +1,7 @@ package dataset import ( + "io" "math/rand" "testing" "time" @@ -10,7 +11,7 @@ import ( "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd" ) -func Test_pageBuilder_WriteRead(t *testing.T) { +func Benchmark_pageBuilder_WriteRead(b *testing.B) { in := []string{ "hello, world!", "", @@ -30,6 +31,54 @@ func Test_pageBuilder_WriteRead(t *testing.T) { Compression: datasetmd.COMPRESSION_TYPE_ZSTD, Encoding: datasetmd.ENCODING_TYPE_PLAIN, } + builder, err := newPageBuilder(opts) + require.NoError(b, err) + + for _, s := range in { + require.True(b, builder.Append(StringValue(s))) + } + + page, err := builder.Flush() + require.NoError(b, err) + require.Equal(b, len(in), page.Info.RowCount) + require.Equal(b, len(in)-2, page.Info.ValuesCount) // -2 for the empty strings + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + _, values, err := page.reader(datasetmd.COMPRESSION_TYPE_ZSTD) + if err != nil { + b.Fatal() + } + + if _, err := io.Copy(io.Discard, values); err != nil { + b.Fatal(err) + } else if err := values.Close(); err != nil { + b.Fatal(err) + } + } +} + +func Test_pageBuilder_WriteRead(t *testing.T) { + in := []string{ + "hello, world!", + "", + "this is a test of the emergency broadcast system", + "this is only a test", + "if this were a real emergency, you would be instructed to panic", + "but it's not, so don't", + "", + "this concludes the test", + "thank you for your cooperation", + "goodbye", + } + + opts := BuilderOptions{ + PageSizeHint: 1024, + Value: datasetmd.VALUE_TYPE_STRING, + Compression: datasetmd.COMPRESSION_TYPE_SNAPPY, + Encoding: datasetmd.ENCODING_TYPE_PLAIN, + } b, err := newPageBuilder(opts) require.NoError(t, err) diff --git a/pkg/dataobj/internal/encoding/decoder_metadata.go b/pkg/dataobj/internal/encoding/decoder_metadata.go index b4091110808ec..192ea4a5be4e1 100644 --- a/pkg/dataobj/internal/encoding/decoder_metadata.go +++ b/pkg/dataobj/internal/encoding/decoder_metadata.go @@ -1,7 +1,6 @@ package encoding import ( - "bytes" "encoding/binary" "fmt" "io" @@ -12,6 +11,7 @@ import ( "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/logsmd" "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/streamsmd" "github.com/grafana/loki/v3/pkg/dataobj/internal/streamio" + "github.com/grafana/loki/v3/pkg/dataobj/internal/util/bufpool" ) // decode* methods for metadata shared by Decoder implementations. @@ -108,9 +108,10 @@ func decodeProto(r streamio.Reader, pb proto.Message) error { return fmt.Errorf("read proto message size: %w", err) } - buf := bytesBufferPool.Get().(*bytes.Buffer) - buf.Reset() - defer bytesBufferPool.Put(buf) + // We know exactly how big of a buffer we need here, so we can get a bucketed + // buffer from bufpool. + buf := bufpool.Get(int(size)) + defer bufpool.Put(buf) n, err := io.Copy(buf, io.LimitReader(r, int64(size))) if err != nil { diff --git a/pkg/dataobj/internal/encoding/encoder.go b/pkg/dataobj/internal/encoding/encoder.go index 681ec2ddd56bd..a022d1795a28a 100644 --- a/pkg/dataobj/internal/encoding/encoder.go +++ b/pkg/dataobj/internal/encoding/encoder.go @@ -12,6 +12,26 @@ import ( "github.com/grafana/loki/v3/pkg/dataobj/internal/streamio" ) +// TODO(rfratto): the memory footprint of [Encoder] can very slowly grow in +// memory as [bytesBufferPool] is filled with buffers with increasing capacity: +// each encoding pass has a different number of elements, shuffling which +// elements of the hierarchy get which pooled buffers. +// +// This means that elements that require more bytes will grow the capacity of +// the buffer and put the buffer back into the pool. Even if further encoding +// passes don't need that many bytes, the buffer is kept alive with its larger +// footprint. Given enough time, all buffers in the pool will have a large +// capacity. +// +// The bufpool package provides a solution to this (bucketing pools by +// capacity), but using bufpool properly requires knowing how many bytes are +// needed. +// +// Encoder can eventually be moved to the bufpool package by calculating a +// rolling maximum of encoding size used per element across usages of an +// Encoder instance. This would then allow larger buffers to be eventually +// reclaimed regardless of how often encoding is done. + // Encoder encodes a data object. Data objects are hierarchical, split into // distinct sections that contain their own hierarchy. // diff --git a/pkg/dataobj/internal/encoding/metrics.go b/pkg/dataobj/internal/encoding/metrics.go index ce5065241a927..e0506578d72e6 100644 --- a/pkg/dataobj/internal/encoding/metrics.go +++ b/pkg/dataobj/internal/encoding/metrics.go @@ -9,6 +9,7 @@ import ( "github.com/gogo/protobuf/proto" "github.com/prometheus/client_golang/prometheus" + "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd" "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/filemd" "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/logsmd" "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/streamsmd" @@ -27,12 +28,14 @@ type Metrics struct { datasetColumnCount *prometheus.HistogramVec datasetColumnCompressedBytes *prometheus.HistogramVec datasetColumnUncompressedBytes *prometheus.HistogramVec + datasetColumnCompressionRatio *prometheus.HistogramVec datasetColumnRows *prometheus.HistogramVec datasetColumnValues *prometheus.HistogramVec datasetPageCount *prometheus.HistogramVec datasetPageCompressedBytes *prometheus.HistogramVec datasetPageUncompressedBytes *prometheus.HistogramVec + datasetPageCompressionRatio *prometheus.HistogramVec datasetPageRows *prometheus.HistogramVec datasetPageValues *prometheus.HistogramVec } @@ -100,6 +103,13 @@ func NewMetrics() *Metrics { Help: "Distribution of uncompressed bytes per encoded dataset column.", }, []string{"section", "column_type"}), + datasetColumnCompressionRatio: newNativeHistogramVec(prometheus.HistogramOpts{ + Namespace: "loki_dataobj", + Subsystem: "encoding", + Name: "dataset_column_compression_ratio", + Help: "Distribution of compression ratio per encoded dataset column. Not reported when compression is disabled.", + }, []string{"section", "column_type", "compression_type"}), + datasetColumnRows: newNativeHistogramVec(prometheus.HistogramOpts{ Namespace: "loki_dataobj", Subsystem: "encoding", @@ -135,6 +145,13 @@ func NewMetrics() *Metrics { Help: "Distribution of uncompressed bytes per encoded dataset page.", }, []string{"section", "column_type"}), + datasetPageCompressionRatio: newNativeHistogramVec(prometheus.HistogramOpts{ + Namespace: "loki_dataobj", + Subsystem: "encoding", + Name: "dataset_page_compression_ratio", + Help: "Distribution of compression ratio per encoded dataset page. Not reported when compression is disabled.", + }, []string{"section", "column_type", "compression_type"}), + datasetPageRows: newNativeHistogramVec(prometheus.HistogramOpts{ Namespace: "loki_dataobj", Subsystem: "encoding", @@ -178,11 +195,13 @@ func (m *Metrics) Register(reg prometheus.Registerer) error { errs = append(errs, reg.Register(m.datasetColumnCount)) errs = append(errs, reg.Register(m.datasetColumnCompressedBytes)) errs = append(errs, reg.Register(m.datasetColumnUncompressedBytes)) + errs = append(errs, reg.Register(m.datasetColumnCompressionRatio)) errs = append(errs, reg.Register(m.datasetColumnRows)) errs = append(errs, reg.Register(m.datasetColumnValues)) errs = append(errs, reg.Register(m.datasetPageCount)) errs = append(errs, reg.Register(m.datasetPageCompressedBytes)) errs = append(errs, reg.Register(m.datasetPageUncompressedBytes)) + errs = append(errs, reg.Register(m.datasetPageCompressionRatio)) errs = append(errs, reg.Register(m.datasetPageRows)) errs = append(errs, reg.Register(m.datasetPageValues)) return errors.Join(errs...) @@ -198,11 +217,13 @@ func (m *Metrics) Unregister(reg prometheus.Registerer) { reg.Unregister(m.datasetColumnCount) reg.Unregister(m.datasetColumnCompressedBytes) reg.Unregister(m.datasetColumnUncompressedBytes) + reg.Unregister(m.datasetColumnCompressionRatio) reg.Unregister(m.datasetColumnRows) reg.Unregister(m.datasetColumnValues) reg.Unregister(m.datasetPageCount) reg.Unregister(m.datasetPageCompressedBytes) reg.Unregister(m.datasetPageUncompressedBytes) + reg.Unregister(m.datasetPageCompressionRatio) reg.Unregister(m.datasetPageRows) reg.Unregister(m.datasetPageValues) } @@ -274,9 +295,13 @@ func (m *Metrics) observeStreamsSection(ctx context.Context, section *filemd.Sec for i, column := range columns { columnType := column.Type.String() pages := columnPages[i] + compression := column.Info.Compression m.datasetColumnCompressedBytes.WithLabelValues(sectionType, columnType).Observe(float64(column.Info.CompressedSize)) m.datasetColumnUncompressedBytes.WithLabelValues(sectionType, columnType).Observe(float64(column.Info.UncompressedSize)) + if compression != datasetmd.COMPRESSION_TYPE_NONE { + m.datasetColumnCompressionRatio.WithLabelValues(sectionType, columnType, compression.String()).Observe(float64(column.Info.UncompressedSize) / float64(column.Info.CompressedSize)) + } m.datasetColumnRows.WithLabelValues(sectionType, columnType).Observe(float64(column.Info.RowsCount)) m.datasetColumnValues.WithLabelValues(sectionType, columnType).Observe(float64(column.Info.ValuesCount)) @@ -285,6 +310,9 @@ func (m *Metrics) observeStreamsSection(ctx context.Context, section *filemd.Sec for _, page := range pages { m.datasetPageCompressedBytes.WithLabelValues(sectionType, columnType).Observe(float64(page.Info.CompressedSize)) m.datasetPageUncompressedBytes.WithLabelValues(sectionType, columnType).Observe(float64(page.Info.UncompressedSize)) + if compression != datasetmd.COMPRESSION_TYPE_NONE { + m.datasetPageCompressionRatio.WithLabelValues(sectionType, columnType, compression.String()).Observe(float64(page.Info.UncompressedSize) / float64(page.Info.CompressedSize)) + } m.datasetPageRows.WithLabelValues(sectionType, columnType).Observe(float64(page.Info.RowsCount)) m.datasetPageValues.WithLabelValues(sectionType, columnType).Observe(float64(page.Info.ValuesCount)) } @@ -323,9 +351,13 @@ func (m *Metrics) observeLogsSection(ctx context.Context, section *filemd.Sectio for i, column := range columns { columnType := column.Type.String() pages := columnPages[i] + compression := column.Info.Compression m.datasetColumnCompressedBytes.WithLabelValues(sectionType, columnType).Observe(float64(column.Info.CompressedSize)) m.datasetColumnUncompressedBytes.WithLabelValues(sectionType, columnType).Observe(float64(column.Info.UncompressedSize)) + if compression != datasetmd.COMPRESSION_TYPE_NONE { + m.datasetColumnCompressionRatio.WithLabelValues(sectionType, columnType, compression.String()).Observe(float64(column.Info.UncompressedSize) / float64(column.Info.CompressedSize)) + } m.datasetColumnRows.WithLabelValues(sectionType, columnType).Observe(float64(column.Info.RowsCount)) m.datasetColumnValues.WithLabelValues(sectionType, columnType).Observe(float64(column.Info.ValuesCount)) @@ -334,6 +366,9 @@ func (m *Metrics) observeLogsSection(ctx context.Context, section *filemd.Sectio for _, page := range pages { m.datasetPageCompressedBytes.WithLabelValues(sectionType, columnType).Observe(float64(page.Info.CompressedSize)) m.datasetPageUncompressedBytes.WithLabelValues(sectionType, columnType).Observe(float64(page.Info.UncompressedSize)) + if compression != datasetmd.COMPRESSION_TYPE_NONE { + m.datasetPageCompressionRatio.WithLabelValues(sectionType, columnType, compression.String()).Observe(float64(page.Info.UncompressedSize) / float64(page.Info.CompressedSize)) + } m.datasetPageRows.WithLabelValues(sectionType, columnType).Observe(float64(page.Info.RowsCount)) m.datasetPageValues.WithLabelValues(sectionType, columnType).Observe(float64(page.Info.ValuesCount)) } diff --git a/pkg/dataobj/internal/sections/logs/logs.go b/pkg/dataobj/internal/sections/logs/logs.go index e23ecce7cf5a0..cf08e6455e446 100644 --- a/pkg/dataobj/internal/sections/logs/logs.go +++ b/pkg/dataobj/internal/sections/logs/logs.go @@ -3,22 +3,20 @@ package logs import ( - "cmp" "context" "errors" "fmt" - "slices" "time" + "github.com/klauspost/compress/zstd" "github.com/prometheus/client_golang/prometheus" "github.com/grafana/loki/pkg/push" "github.com/grafana/loki/v3/pkg/dataobj/internal/dataset" "github.com/grafana/loki/v3/pkg/dataobj/internal/encoding" - "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd" "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/logsmd" - "github.com/grafana/loki/v3/pkg/dataobj/internal/result" + "github.com/grafana/loki/v3/pkg/dataobj/internal/util/sliceclear" ) // A Record is an individual log record within the logs section. @@ -29,141 +27,147 @@ type Record struct { Line string } +// Options configures the behavior of the logs section. +type Options struct { + // PageSizeHint is the size of pages to use when encoding the logs section. + PageSizeHint int + + // BufferSize is the size of the buffer to use when accumulating log records. + BufferSize int + + // SectionSizeHint is the size of the section to use when encoding the logs + // section. If the section size is exceeded, multiple sections will be + // created. + SectionSize int +} + // Logs accumulate a set of [Record]s within a data object. type Logs struct { - metrics *Metrics - rows int - pageSize int + metrics *Metrics + opts Options + + // Sorting the entire set of logs is very expensive, so we need to break it + // up into smaller pieces: + // + // 1. Records are accumulated in memory up to BufferSize; the current size is + // tracked by recordsSize. + // + // 2. Once the buffer is full, records are sorted and flushed to smaller + // [table]s called stripes. + // + // 3. Once the set of stripes reaches SectionSize, they are merged together + // into a final table that will be encoded as a single section. + // + // At the end of this process, there will be a set of sections that are + // encoded separately. - streamIDs *dataset.ColumnBuilder - timestamps *dataset.ColumnBuilder + records []Record // Buffered records to flush to a group. + recordsSize int - metadatas []*dataset.ColumnBuilder - metadataLookup map[string]int // map of metadata key to index in metadatas + stripes []*table // In-progress section; flushed with [mergeTables] into a single table. + stripeBuffer tableBuffer + stripesSize int // Estimated byte size of all elements in stripes. - messages *dataset.ColumnBuilder + sections []*table // Completed sections. + sectionBuffer tableBuffer } // Nwe creates a new Logs section. The pageSize argument specifies how large // pages should be. -func New(metrics *Metrics, pageSize int) *Logs { +func New(metrics *Metrics, opts Options) *Logs { if metrics == nil { metrics = NewMetrics() } - // We control the Value/Encoding tuple so creating column builders can't - // fail; if it does, we're left in an unrecoverable state where nothing can - // be encoded properly so we panic. - streamIDs, err := dataset.NewColumnBuilder("", dataset.BuilderOptions{ - PageSizeHint: pageSize, - Value: datasetmd.VALUE_TYPE_INT64, - Encoding: datasetmd.ENCODING_TYPE_DELTA, - Compression: datasetmd.COMPRESSION_TYPE_NONE, - }) - if err != nil { - panic(fmt.Sprintf("creating stream ID column: %v", err)) + return &Logs{ + metrics: metrics, + opts: opts, } +} - timestamps, err := dataset.NewColumnBuilder("", dataset.BuilderOptions{ - PageSizeHint: pageSize, - Value: datasetmd.VALUE_TYPE_INT64, - Encoding: datasetmd.ENCODING_TYPE_DELTA, - Compression: datasetmd.COMPRESSION_TYPE_NONE, - }) - if err != nil { - panic(fmt.Sprintf("creating timestamp column: %v", err)) - } +// Append adds a new entry to the set of Logs. +func (l *Logs) Append(entry Record) { + l.metrics.appendsTotal.Inc() - messages, err := dataset.NewColumnBuilder("", dataset.BuilderOptions{ - PageSizeHint: pageSize, - Value: datasetmd.VALUE_TYPE_STRING, - Encoding: datasetmd.ENCODING_TYPE_PLAIN, - Compression: datasetmd.COMPRESSION_TYPE_ZSTD, - }) - if err != nil { - panic(fmt.Sprintf("creating message column: %v", err)) - } + l.records = append(l.records, entry) + l.recordsSize += recordSize(entry) - return &Logs{ - metrics: metrics, - pageSize: pageSize, + if l.recordsSize >= l.opts.BufferSize { + l.flushRecords() + } - streamIDs: streamIDs, - timestamps: timestamps, + l.metrics.recordCount.Inc() +} - metadataLookup: make(map[string]int), +func recordSize(record Record) int { + var size int - messages: messages, + size++ // One byte per stream ID (for uvarint). + size += 8 // Eight bytes for timestamp. + for _, metadata := range record.Metadata { + size += len(metadata.Value) } + size += len(record.Line) + + return size } -// Append adds a new entry to the set of Logs. -func (l *Logs) Append(entry Record) { - l.metrics.appendsTotal.Inc() +func (l *Logs) flushRecords() { + if len(l.records) == 0 { + return + } - // Sort metadata to ensure consistent encoding. Metadata is sorted by key. - // While keys must be unique, we sort by value if two keys match; this - // ensures that the same value always gets encoded for duplicate keys. - slices.SortFunc(entry.Metadata, func(a, b push.LabelAdapter) int { - if res := cmp.Compare(a.Name, b.Name); res != 0 { - return res - } - return cmp.Compare(a.Value, b.Value) - }) + // Our stripes are intermediate tables that don't need to have the best + // compression. To maintain high throughput on appends, we use the fastest + // compression for a stripe. Better compression is then used for sections. + compressionOpts := dataset.CompressionOptions{ + Zstd: []zstd.EOption{zstd.WithEncoderLevel(zstd.SpeedFastest)}, + } - // We ignore the errors below; they only fail if given out-of-order data - // (where the row number is less than the previous row number), which can't - // ever happen here. + stripe := buildTable(&l.stripeBuffer, l.opts.PageSizeHint, compressionOpts, l.records) + l.stripes = append(l.stripes, stripe) + l.stripesSize += stripe.Size() - _ = l.streamIDs.Append(l.rows, dataset.Int64Value(entry.StreamID)) - _ = l.timestamps.Append(l.rows, dataset.Int64Value(entry.Timestamp.UnixNano())) - _ = l.messages.Append(l.rows, dataset.StringValue(entry.Line)) + l.records = sliceclear.Clear(l.records) + l.recordsSize = 0 - for _, m := range entry.Metadata { - col := l.getMetadataColumn(m.Name) - _ = col.Append(l.rows, dataset.StringValue(m.Value)) + if l.stripesSize >= l.opts.SectionSize { + l.flushSection() } - - l.rows++ - l.metrics.recordCount.Inc() } -// EstimatedSize returns the estimated size of the Logs section in bytes. -func (l *Logs) EstimatedSize() int { - var size int +func (l *Logs) flushSection() { + if len(l.stripes) == 0 { + return + } - size += l.streamIDs.EstimatedSize() - size += l.timestamps.EstimatedSize() - size += l.messages.EstimatedSize() + compressionOpts := dataset.CompressionOptions{ + Zstd: []zstd.EOption{zstd.WithEncoderLevel(zstd.SpeedDefault)}, + } - for _, md := range l.metadatas { - size += md.EstimatedSize() + section, err := mergeTables(&l.sectionBuffer, l.opts.PageSizeHint, compressionOpts, l.stripes) + if err != nil { + // We control the input to mergeTables, so this should never happen. + panic(fmt.Sprintf("merging tables: %v", err)) } - return size + l.sections = append(l.sections, section) + + l.stripes = sliceclear.Clear(l.stripes) + l.stripesSize = 0 } -func (l *Logs) getMetadataColumn(key string) *dataset.ColumnBuilder { - idx, ok := l.metadataLookup[key] - if !ok { - col, err := dataset.NewColumnBuilder(key, dataset.BuilderOptions{ - PageSizeHint: l.pageSize, - Value: datasetmd.VALUE_TYPE_STRING, - Encoding: datasetmd.ENCODING_TYPE_PLAIN, - Compression: datasetmd.COMPRESSION_TYPE_ZSTD, - }) - if err != nil { - // We control the Value/Encoding tuple so this can't fail; if it does, - // we're left in an unrecoverable state where nothing can be encoded - // properly so we panic. - panic(fmt.Sprintf("creating metadata column: %v", err)) - } +// EstimatedSize returns the estimated size of the Logs section in bytes. +func (l *Logs) EstimatedSize() int { + var size int - l.metadatas = append(l.metadatas, col) - l.metadataLookup[key] = len(l.metadatas) - 1 - return col + size += l.recordsSize + size += l.stripesSize + for _, section := range l.sections { + size += section.Size() } - return l.metadatas[idx] + + return size } // EncodeTo encodes the set of logs to the provided encoder. Before encoding, @@ -179,23 +183,28 @@ func (l *Logs) EncodeTo(enc *encoding.Encoder) error { defer l.Reset() - // TODO(rfratto): handle one section becoming too large. This can happen when - // the number of columns is very wide, due to a lot of metadata columns. - // There are two approaches to handle this: - // - // 1. Split streams into multiple sections. - // 2. Move some columns into an aggregated column which holds multiple label - // keys and values. + // Flush any remaining buffered data. + l.flushRecords() + l.flushSection() - dset, err := l.buildDataset() - if err != nil { - return fmt.Errorf("building dataset: %w", err) - } - cols, err := result.Collect(dset.ListColumns(context.Background())) // dset is in memory; "real" context not needed. - if err != nil { - return fmt.Errorf("listing columns: %w", err) + // TODO(rfratto): handle individual sections having oversized metadata. This + // can happen when the number of columns is very wide, due to a lot of + // metadata columns. + // + // As we're already splitting data into separate sections, the best solution + // for this is to aggregate the lowest cardinality columns into a combined + // column. This will reduce the number of columns in the section and thus the + // metadata size. + for _, section := range l.sections { + if err := l.encodeSection(enc, section); err != nil { + return fmt.Errorf("encoding section: %w", err) + } } + return nil +} + +func (l *Logs) encodeSection(enc *encoding.Encoder, section *table) error { logsEnc, err := enc.OpenLogs() if err != nil { return fmt.Errorf("opening logs section: %w", err) @@ -206,16 +215,14 @@ func (l *Logs) EncodeTo(enc *encoding.Encoder) error { _ = logsEnc.Discard() }() - // Encode our columns. The slice order here *must* match the order in - // [Logs.buildDataset]! { - errs := make([]error, 0, len(cols)) - errs = append(errs, encodeColumn(logsEnc, logsmd.COLUMN_TYPE_STREAM_ID, cols[0])) - errs = append(errs, encodeColumn(logsEnc, logsmd.COLUMN_TYPE_TIMESTAMP, cols[1])) - for _, mdCol := range cols[2 : len(cols)-1] { - errs = append(errs, encodeColumn(logsEnc, logsmd.COLUMN_TYPE_METADATA, mdCol)) + errs := make([]error, 0, len(section.Metadatas)+3) + errs = append(errs, encodeColumn(logsEnc, logsmd.COLUMN_TYPE_STREAM_ID, section.StreamID)) + errs = append(errs, encodeColumn(logsEnc, logsmd.COLUMN_TYPE_TIMESTAMP, section.Timestamp)) + for _, md := range section.Metadatas { + errs = append(errs, encodeColumn(logsEnc, logsmd.COLUMN_TYPE_METADATA, md)) } - errs = append(errs, encodeColumn(logsEnc, logsmd.COLUMN_TYPE_MESSAGE, cols[len(cols)-1])) + errs = append(errs, encodeColumn(logsEnc, logsmd.COLUMN_TYPE_MESSAGE, section.Message)) if err := errors.Join(errs...); err != nil { return fmt.Errorf("encoding columns: %w", err) } @@ -224,51 +231,6 @@ func (l *Logs) EncodeTo(enc *encoding.Encoder) error { return logsEnc.Commit() } -func (l *Logs) buildDataset() (dataset.Dataset, error) { - // Our columns are ordered as follows: - // - // 1. StreamID - // 2. Timestamp - // 3. Metadata columns - // 4. Message - // - // Do *not* change this order without updating [Logs.EncodeTo]! - // - // TODO(rfratto): find a clean way to decorate columns with additional - // metadata so we don't have to rely on order. - columns := make([]*dataset.MemColumn, 0, 3+len(l.metadatas)) - - // Flush never returns an error so we ignore it here to keep the code simple. - // - // TODO(rfratto): remove error return from Flush to clean up code. - streamID, _ := l.streamIDs.Flush() - timestamp, _ := l.timestamps.Flush() - columns = append(columns, streamID, timestamp) - - for _, mdBuilder := range l.metadatas { - mdBuilder.Backfill(l.rows) - - mdColumn, _ := mdBuilder.Flush() - columns = append(columns, mdColumn) - } - - messages, _ := l.messages.Flush() - columns = append(columns, messages) - - // TODO(rfratto): We need to be able to sort the columns first by StreamID - // and then by timestamp, but as it is now this is way too slow; sorting a - // 20MB dataset took several minutes due to the number of page loads - // happening across streams. - // - // Sorting can be made more efficient by: - // - // 1. Separating streams into separate datasets while appending - // 2. Sorting each stream separately - // 3. Combining sorted streams into a single dataset, which will already be - // sorted. - return dataset.FromMemory(columns), nil -} - func encodeColumn(enc *encoding.LogsEncoder, columnType logsmd.ColumnType, column dataset.Column) error { columnEnc, err := enc.OpenColumn(columnType, column.ColumnInfo()) if err != nil { @@ -307,12 +269,15 @@ func encodeColumn(enc *encoding.LogsEncoder, columnType logsmd.ColumnType, colum // Reset resets all state, allowing Logs to be reused. func (l *Logs) Reset() { - l.rows = 0 l.metrics.recordCount.Set(0) - l.streamIDs.Reset() - l.timestamps.Reset() - l.metadatas = l.metadatas[:0] - clear(l.metadataLookup) - l.messages.Reset() + l.records = sliceclear.Clear(l.records) + l.recordsSize = 0 + + l.stripes = sliceclear.Clear(l.stripes) + l.stripeBuffer.Reset() + l.stripesSize = 0 + + l.sections = sliceclear.Clear(l.sections) + l.sectionBuffer.Reset() } diff --git a/pkg/dataobj/internal/sections/logs/logs_test.go b/pkg/dataobj/internal/sections/logs/logs_test.go index 6caa84ca6dc71..4f14329a77f21 100644 --- a/pkg/dataobj/internal/sections/logs/logs_test.go +++ b/pkg/dataobj/internal/sections/logs/logs_test.go @@ -15,8 +15,6 @@ import ( ) func Test(t *testing.T) { - t.Skip("Disabled until sorting is reimplemented") - records := []logs.Record{ { StreamID: 1, @@ -44,7 +42,13 @@ func Test(t *testing.T) { }, } - tracker := logs.New(nil, 1024) + opts := logs.Options{ + PageSizeHint: 1024, + BufferSize: 256, + SectionSize: 4096, + } + + tracker := logs.New(nil, opts) for _, record := range records { tracker.Append(record) } diff --git a/pkg/dataobj/internal/sections/logs/table.go b/pkg/dataobj/internal/sections/logs/table.go new file mode 100644 index 0000000000000..27508fd511403 --- /dev/null +++ b/pkg/dataobj/internal/sections/logs/table.go @@ -0,0 +1,312 @@ +package logs + +import ( + "cmp" + "context" + "fmt" + "slices" + + "github.com/grafana/loki/v3/pkg/dataobj/internal/dataset" + "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd" + "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/logsmd" + "github.com/grafana/loki/v3/pkg/dataobj/internal/result" +) + +// A table is a collection of columns that form a logs section. +type table struct { + StreamID *tableColumn + Timestamp *tableColumn + Metadatas []*tableColumn + Message *tableColumn +} + +type tableColumn struct { + *dataset.MemColumn + + Type logsmd.ColumnType +} + +var _ dataset.Dataset = (*table)(nil) + +// ListColumns implements [dataset.Dataset]. +func (t *table) ListColumns(_ context.Context) result.Seq[dataset.Column] { + return result.Iter(func(yield func(dataset.Column) bool) error { + if !yield(t.StreamID) { + return nil + } + if !yield(t.Timestamp) { + return nil + } + for _, metadata := range t.Metadatas { + if !yield(metadata) { + return nil + } + } + if !yield(t.Message) { + return nil + } + + return nil + }) +} + +// ListPages implements [dataset.Dataset]. +func (t *table) ListPages(ctx context.Context, columns []dataset.Column) result.Seq[dataset.Pages] { + return result.Iter(func(yield func(dataset.Pages) bool) error { + for _, c := range columns { + pages, err := result.Collect(c.ListPages(ctx)) + if err != nil { + return err + } else if !yield(dataset.Pages(pages)) { + return nil + } + } + + return nil + }) +} + +// ReadPages implements [dataset.Dataset]. +func (t *table) ReadPages(ctx context.Context, pages []dataset.Page) result.Seq[dataset.PageData] { + return result.Iter(func(yield func(dataset.PageData) bool) error { + for _, p := range pages { + data, err := p.ReadPage(ctx) + if err != nil { + return err + } else if !yield(data) { + return nil + } + } + + return nil + }) + +} + +// Size returns the total size of the table in bytes. +func (t *table) Size() int { + var size int + + size += t.StreamID.ColumnInfo().CompressedSize + size += t.Timestamp.ColumnInfo().CompressedSize + for _, metadata := range t.Metadatas { + size += metadata.ColumnInfo().CompressedSize + } + size += t.Message.ColumnInfo().CompressedSize + + return size +} + +// A tableBuffer holds a set of column builders used for constructing tables. +// The zero value is ready for use. +type tableBuffer struct { + streamID *dataset.ColumnBuilder + timestamp *dataset.ColumnBuilder + + metadatas []*dataset.ColumnBuilder + metadataLookup map[string]int // map of metadata key to index in metadatas + usedMetadatas map[*dataset.ColumnBuilder]string // metadata with its name. + + message *dataset.ColumnBuilder +} + +// StreamID gets or creates a stream ID column for the buffer. +func (b *tableBuffer) StreamID(pageSize int) *dataset.ColumnBuilder { + if b.streamID != nil { + return b.streamID + } + + col, err := dataset.NewColumnBuilder("", dataset.BuilderOptions{ + PageSizeHint: pageSize, + Value: datasetmd.VALUE_TYPE_INT64, + Encoding: datasetmd.ENCODING_TYPE_DELTA, + Compression: datasetmd.COMPRESSION_TYPE_NONE, + }) + if err != nil { + // We control the Value/Encoding tuple so this can't fail; if it does, + // we're left in an unrecoverable state where nothing can be encoded + // properly so we panic. + panic(fmt.Sprintf("creating stream ID column: %v", err)) + } + + b.streamID = col + return col +} + +// Timestamp gets or creates a timestamp column for the buffer. +func (b *tableBuffer) Timestamp(pageSize int) *dataset.ColumnBuilder { + if b.timestamp != nil { + return b.timestamp + } + + col, err := dataset.NewColumnBuilder("", dataset.BuilderOptions{ + PageSizeHint: pageSize, + Value: datasetmd.VALUE_TYPE_INT64, + Encoding: datasetmd.ENCODING_TYPE_DELTA, + Compression: datasetmd.COMPRESSION_TYPE_NONE, + }) + if err != nil { + // We control the Value/Encoding tuple so this can't fail; if it does, + // we're left in an unrecoverable state where nothing can be encoded + // properly so we panic. + panic(fmt.Sprintf("creating timestamp column: %v", err)) + } + + b.timestamp = col + return col +} + +// Metadata gets or creates a metadata column for the buffer. To remove created +// metadata columns, call [tableBuffer.CleanupMetadatas]. +func (b *tableBuffer) Metadata(key string, pageSize int, compressionOpts dataset.CompressionOptions) *dataset.ColumnBuilder { + if b.usedMetadatas == nil { + b.usedMetadatas = make(map[*dataset.ColumnBuilder]string) + } + + index, ok := b.metadataLookup[key] + if ok { + builder := b.metadatas[index] + b.usedMetadatas[builder] = key + return builder + } + + col, err := dataset.NewColumnBuilder(key, dataset.BuilderOptions{ + PageSizeHint: pageSize, + Value: datasetmd.VALUE_TYPE_STRING, + Encoding: datasetmd.ENCODING_TYPE_PLAIN, + Compression: datasetmd.COMPRESSION_TYPE_ZSTD, + CompressionOptions: compressionOpts, + }) + if err != nil { + // We control the Value/Encoding tuple so this can't fail; if it does, + // we're left in an unrecoverable state where nothing can be encoded + // properly so we panic. + panic(fmt.Sprintf("creating metadata column: %v", err)) + } + + b.metadatas = append(b.metadatas, col) + + if b.metadataLookup == nil { + b.metadataLookup = make(map[string]int) + } + b.metadataLookup[key] = len(b.metadatas) - 1 + b.usedMetadatas[col] = key + return col +} + +// Message gets or creates a message column for the buffer. +func (b *tableBuffer) Message(pageSize int, compressionOpts dataset.CompressionOptions) *dataset.ColumnBuilder { + if b.message != nil { + return b.message + } + + col, err := dataset.NewColumnBuilder("", dataset.BuilderOptions{ + PageSizeHint: pageSize, + Value: datasetmd.VALUE_TYPE_STRING, + Encoding: datasetmd.ENCODING_TYPE_PLAIN, + Compression: datasetmd.COMPRESSION_TYPE_ZSTD, + CompressionOptions: compressionOpts, + }) + if err != nil { + // We control the Value/Encoding tuple so this can't fail; if it does, + // we're left in an unrecoverable state where nothing can be encoded + // properly so we panic. + panic(fmt.Sprintf("creating messages column: %v", err)) + } + + b.message = col + return col +} + +// Reset resets the buffer to its initial state. +func (b *tableBuffer) Reset() { + if b.streamID != nil { + b.streamID.Reset() + } + if b.timestamp != nil { + b.timestamp.Reset() + } + if b.message != nil { + b.message.Reset() + } + for _, md := range b.metadatas { + md.Reset() + } + + // We don't want to keep all metadata columns around forever, so we only + // retain the columns that were used in the last Flush. + var ( + newMetadatas = make([]*dataset.ColumnBuilder, 0, len(b.metadatas)) + newMetadataLookup = make(map[string]int, len(b.metadatas)) + ) + for _, md := range b.metadatas { + if b.usedMetadatas == nil { + break // Nothing was used. + } + + key, used := b.usedMetadatas[md] + if !used { + continue + } + + newMetadatas = append(newMetadatas, md) + newMetadataLookup[key] = len(newMetadatas) - 1 + } + b.metadatas = newMetadatas + b.metadataLookup = newMetadataLookup + clear(b.usedMetadatas) // Reset the used cache for next time. +} + +// Flush flushes the buffer into a table. Flush returns an error if the stream, +// timestamp, or messages column was never appended to. +// +// Only metadata columns that were appended to since the last flush are included in the table. +func (b *tableBuffer) Flush() (*table, error) { + defer b.Reset() + + if b.streamID == nil { + return nil, fmt.Errorf("no stream column") + } else if b.timestamp == nil { + return nil, fmt.Errorf("no timestamp column") + } else if b.message == nil { + return nil, fmt.Errorf("no message column") + } + + var ( + // Flush never returns an error so we ignore it here to keep the code simple. + // + // TODO(rfratto): remove error return from Flush to clean up code. + + streamID, _ = b.streamID.Flush() + timestamp, _ = b.timestamp.Flush() + messages, _ = b.message.Flush() + + metadatas = make([]*tableColumn, 0, len(b.metadatas)) + ) + + for _, metadataBuilder := range b.metadatas { + if b.usedMetadatas == nil { + continue + } else if _, ok := b.usedMetadatas[metadataBuilder]; !ok { + continue + } + + // Each metadata column may have a different number of rows compared to + // other columns. Since adding NULLs isn't free, we don't call Backfill + // here. + metadata, _ := metadataBuilder.Flush() + metadatas = append(metadatas, &tableColumn{metadata, logsmd.COLUMN_TYPE_METADATA}) + } + + // Sort metadata columns by name for consistency. + slices.SortFunc(metadatas, func(a, b *tableColumn) int { + return cmp.Compare(a.ColumnInfo().Name, b.ColumnInfo().Name) + }) + + return &table{ + StreamID: &tableColumn{streamID, logsmd.COLUMN_TYPE_STREAM_ID}, + Timestamp: &tableColumn{timestamp, logsmd.COLUMN_TYPE_TIMESTAMP}, + Metadatas: metadatas, + Message: &tableColumn{messages, logsmd.COLUMN_TYPE_MESSAGE}, + }, nil +} diff --git a/pkg/dataobj/internal/sections/logs/table_build.go b/pkg/dataobj/internal/sections/logs/table_build.go new file mode 100644 index 0000000000000..edf74bbfd7537 --- /dev/null +++ b/pkg/dataobj/internal/sections/logs/table_build.go @@ -0,0 +1,54 @@ +package logs + +import ( + "cmp" + "slices" + + "github.com/grafana/loki/v3/pkg/dataobj/internal/dataset" +) + +// buildTable builds a table from the set of provided records. The records are +// sorted with [sortRecords] prior to building the table. +func buildTable(buf *tableBuffer, pageSize int, compressionOpts dataset.CompressionOptions, records []Record) *table { + sortRecords(records) + + buf.Reset() + + var ( + streamIDBuilder = buf.StreamID(pageSize) + timestampBuilder = buf.Timestamp(pageSize) + messageBuilder = buf.Message(pageSize, compressionOpts) + ) + + for i, record := range records { + // Append only fails if given out-of-order data, where the provided row + // number is less than the previous row number. That can't happen here, so + // to keep the code readable we ignore the error values. + + _ = streamIDBuilder.Append(i, dataset.Int64Value(record.StreamID)) + _ = timestampBuilder.Append(i, dataset.Int64Value(record.Timestamp.UnixNano())) + _ = messageBuilder.Append(i, dataset.StringValue(record.Line)) + + for _, md := range record.Metadata { + metadataBuilder := buf.Metadata(md.Name, pageSize, compressionOpts) + _ = metadataBuilder.Append(i, dataset.StringValue(md.Value)) + } + } + + table, err := buf.Flush() + if err != nil { + // Unreachable; we always ensure every required column is created. + panic(err) + } + return table +} + +// sortRecords sorts the set of records by stream ID and timestamp. +func sortRecords(records []Record) { + slices.SortFunc(records, func(a, b Record) int { + if res := cmp.Compare(a.StreamID, b.StreamID); res != 0 { + return res + } + return a.Timestamp.Compare(b.Timestamp) + }) +} diff --git a/pkg/dataobj/internal/sections/logs/table_merge.go b/pkg/dataobj/internal/sections/logs/table_merge.go new file mode 100644 index 0000000000000..e54c896550dd4 --- /dev/null +++ b/pkg/dataobj/internal/sections/logs/table_merge.go @@ -0,0 +1,151 @@ +package logs + +import ( + "cmp" + "context" + "fmt" + "math" + + "github.com/grafana/loki/v3/pkg/dataobj/internal/dataset" + "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/logsmd" + "github.com/grafana/loki/v3/pkg/dataobj/internal/result" + "github.com/grafana/loki/v3/pkg/util/loser" +) + +// mergeTables merges the provided sorted tables into a new single sorted table +// using k-way merge. +func mergeTables(buf *tableBuffer, pageSize int, compressionOpts dataset.CompressionOptions, tables []*table) (*table, error) { + buf.Reset() + + var ( + streamIDBuilder = buf.StreamID(pageSize) + timestampBuilder = buf.Timestamp(pageSize) + messageBuilder = buf.Message(pageSize, compressionOpts) + ) + + var ( + tableSequences = make([]*tableSequence, 0, len(tables)) + ) + for _, t := range tables { + dsetColumns, err := result.Collect(t.ListColumns(context.Background())) + if err != nil { + return nil, err + } + + seq := dataset.Iter(context.Background(), dsetColumns) + next, stop := result.Pull(seq) + defer stop() + + tableSequences = append(tableSequences, &tableSequence{ + columns: dsetColumns, + + pull: next, stop: stop, + }) + } + + maxValue := result.Value(dataset.Row{ + Index: math.MaxInt, + Values: []dataset.Value{ + dataset.Int64Value(math.MaxInt64), + dataset.Int64Value(math.MaxInt64), + }, + }) + + var rows int + + tree := loser.New(tableSequences, maxValue, tableSequenceValue, rowResultLess, tableSequenceStop) + for tree.Next() { + seq := tree.Winner() + + row, err := tableSequenceValue(seq).Value() + if err != nil { + return nil, err + } + + for i, column := range seq.columns { + // column is guaranteed to be a *tableColumn since we got it from *table. + column := column.(*tableColumn) + + // dataset.Iter returns values in the same order as the number of + // columns. + value := row.Values[i] + + switch column.Type { + case logsmd.COLUMN_TYPE_STREAM_ID: + _ = streamIDBuilder.Append(rows, value) + case logsmd.COLUMN_TYPE_TIMESTAMP: + _ = timestampBuilder.Append(rows, value) + case logsmd.COLUMN_TYPE_METADATA: + columnBuilder := buf.Metadata(column.Info.Name, pageSize, compressionOpts) + _ = columnBuilder.Append(rows, value) + case logsmd.COLUMN_TYPE_MESSAGE: + _ = messageBuilder.Append(rows, value) + default: + return nil, fmt.Errorf("unknown column type %s", column.Type) + } + } + + rows++ + } + + return buf.Flush() +} + +type tableSequence struct { + curValue result.Result[dataset.Row] + + columns []dataset.Column + + pull func() (result.Result[dataset.Row], bool) + stop func() +} + +var _ loser.Sequence = (*tableSequence)(nil) + +func (seq *tableSequence) Next() bool { + val, ok := seq.pull() + seq.curValue = val + return ok +} + +func tableSequenceValue(seq *tableSequence) result.Result[dataset.Row] { return seq.curValue } + +func tableSequenceStop(seq *tableSequence) { seq.stop() } + +func rowResultLess(a, b result.Result[dataset.Row]) bool { + var ( + aRow, aErr = a.Value() + bRow, bErr = b.Value() + ) + + // Put errors first so we return errors early. + if aErr != nil { + return true + } else if bErr != nil { + return false + } + + return compareRows(aRow, bRow) < 0 +} + +// compareRows compares two rows by their first two columns. compareRows panics +// if a or b doesn't have at least two columns, if the first column isn't a +// int64-encoded stream ID, or if the second column isn't an int64-encoded +// timestamp. +func compareRows(a, b dataset.Row) int { + // The first two columns of each row are *always* stream ID and timestamp. + // + // TODO(rfratto): Can we find a safer way of doing this? + var ( + aStreamID = a.Values[0].Int64() + bStreamID = b.Values[0].Int64() + + aTimestamp = a.Values[1].Int64() + bTimestamp = b.Values[1].Int64() + ) + + if res := cmp.Compare(aStreamID, bStreamID); res != 0 { + return res + } + return cmp.Compare(aTimestamp, bTimestamp) +} diff --git a/pkg/dataobj/internal/sections/logs/table_test.go b/pkg/dataobj/internal/sections/logs/table_test.go new file mode 100644 index 0000000000000..91f25c141d2f0 --- /dev/null +++ b/pkg/dataobj/internal/sections/logs/table_test.go @@ -0,0 +1,81 @@ +package logs + +import ( + "context" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/grafana/loki/v3/pkg/dataobj/internal/dataset" + "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd" + "github.com/grafana/loki/v3/pkg/dataobj/internal/result" +) + +func Test_table_metadataCleanup(t *testing.T) { + var buf tableBuffer + initBuffer(&buf) + + _ = buf.Metadata("foo", 1024, dataset.CompressionOptions{}) + _ = buf.Metadata("bar", 1024, dataset.CompressionOptions{}) + + table, err := buf.Flush() + require.NoError(t, err) + require.Equal(t, 2, len(table.Metadatas)) + + initBuffer(&buf) + _ = buf.Metadata("bar", 1024, dataset.CompressionOptions{}) + + table, err = buf.Flush() + require.NoError(t, err) + require.Equal(t, 1, len(table.Metadatas)) + require.Equal(t, "bar", table.Metadatas[0].Info.Name) +} + +func initBuffer(buf *tableBuffer) { + buf.StreamID(1024) + buf.Timestamp(1024) + buf.Message(1024, dataset.CompressionOptions{}) +} + +func Test_mergeTables(t *testing.T) { + var buf tableBuffer + + var ( + tableA = buildTable(&buf, 1024, dataset.CompressionOptions{}, []Record{ + {StreamID: 1, Timestamp: time.Unix(1, 0), Line: "hello"}, + {StreamID: 2, Timestamp: time.Unix(2, 0), Line: "are"}, + {StreamID: 3, Timestamp: time.Unix(3, 0), Line: "goodbye"}, + }) + + tableB = buildTable(&buf, 1024, dataset.CompressionOptions{}, []Record{ + {StreamID: 1, Timestamp: time.Unix(2, 0), Line: "world"}, + {StreamID: 3, Timestamp: time.Unix(1, 0), Line: "you"}, + }) + + tableC = buildTable(&buf, 1024, dataset.CompressionOptions{}, []Record{ + {StreamID: 2, Timestamp: time.Unix(1, 0), Line: "how"}, + {StreamID: 3, Timestamp: time.Unix(2, 0), Line: "doing?"}, + }) + ) + + mergedTable, err := mergeTables(&buf, 1024, dataset.CompressionOptions{}, []*table{tableA, tableB, tableC}) + require.NoError(t, err) + + mergedColumns, err := result.Collect(mergedTable.ListColumns(context.Background())) + require.NoError(t, err) + + var actual []string + + for result := range dataset.Iter(context.Background(), mergedColumns) { + row, err := result.Value() + require.NoError(t, err) + require.Len(t, row.Values, 3) + require.Equal(t, datasetmd.VALUE_TYPE_STRING, row.Values[2].Type()) + + actual = append(actual, row.Values[2].String()) + } + + require.Equal(t, "hello world how are you doing? goodbye", strings.Join(actual, " ")) +} diff --git a/pkg/dataobj/internal/sections/streams/streams.go b/pkg/dataobj/internal/sections/streams/streams.go index 3fc583342b3cb..f9f4aeabd8124 100644 --- a/pkg/dataobj/internal/sections/streams/streams.go +++ b/pkg/dataobj/internal/sections/streams/streams.go @@ -17,6 +17,7 @@ import ( "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd" "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/streamsmd" "github.com/grafana/loki/v3/pkg/dataobj/internal/streamio" + "github.com/grafana/loki/v3/pkg/dataobj/internal/util/sliceclear" ) // A Stream is an individual stream within a data object. @@ -333,7 +334,7 @@ func encodeColumn(enc *encoding.StreamsEncoder, columnType streamsmd.ColumnType, func (s *Streams) Reset() { s.lastID.Store(0) clear(s.lookup) - s.ordered = s.ordered[:0] + s.ordered = sliceclear.Clear(s.ordered) s.currentLabelsSize = 0 s.globalMinTimestamp = time.Time{} s.globalMaxTimestamp = time.Time{} diff --git a/pkg/dataobj/internal/util/bufpool/bucket.go b/pkg/dataobj/internal/util/bufpool/bucket.go new file mode 100644 index 0000000000000..c64ec44b9e5d8 --- /dev/null +++ b/pkg/dataobj/internal/util/bufpool/bucket.go @@ -0,0 +1,72 @@ +package bufpool + +import ( + "bytes" + "math" + "sync" +) + +type bucket struct { + size uint64 + pool sync.Pool +} + +var buckets []*bucket + +// Bucket sizes are exponentially sized from 1KiB to 64GiB. The max boundary is +// picked arbitrarily. +const ( + bucketMin uint64 = 1024 + bucketMax uint64 = 1 << 36 /* 64 GiB */ +) + +func init() { + nextBucket := bucketMin + + for { + // Capture the size so New refers to the correct size per bucket. + buckets = append(buckets, &bucket{ + size: nextBucket, + pool: sync.Pool{ + New: func() any { + // We don't preallocate the buffer here; this will help a bucket pool + // to be filled with buffers of varying sizes within that bucket. + // + // If we *did* preallocate the buffer, then any call to + // [bytes.Buffer.Grow] beyond the bucket size would immediately cause + // it to double in size, placing it in the next bucket. + return bytes.NewBuffer(nil) + }, + }, + }) + + // Exponentially grow the bucket size up to bucketMax. + nextBucket *= 2 + if nextBucket > bucketMax { + break + } + } + + // Catch-all for buffers bigger than bucketMax. + buckets = append(buckets, &bucket{ + size: math.MaxUint64, + pool: sync.Pool{ + New: func() any { + return bytes.NewBuffer(nil) + }, + }, + }) +} + +// findBucket returns the first bucket that is large enough to hold size. +func findBucket(size uint64) *bucket { + for _, b := range buckets { + if b.size >= size { + return b + } + } + + // We shouldn't be able to reach this point; the final bucket is sized for + // anything, but if we do reach this we'll return the last bucket anyway. + return buckets[len(buckets)-1] +} diff --git a/pkg/dataobj/internal/util/bufpool/bufpool.go b/pkg/dataobj/internal/util/bufpool/bufpool.go new file mode 100644 index 0000000000000..7d048767ad68b --- /dev/null +++ b/pkg/dataobj/internal/util/bufpool/bufpool.go @@ -0,0 +1,41 @@ +// Package bufpool offers a pool of [*bytes.Buffer] objects that are placed +// into exponentially sized buckets. +// +// Bucketing prevents the memory cost of a pool from permanently increasing +// when a large buffer is placed into the pool. +package bufpool + +import ( + "bytes" +) + +// Get returns a buffer from the pool for the given size. Returned buffers are +// reset and ready for writes. +// +// The capacity of the returned buffer is guaranteed to be at least size. +func Get(size int) *bytes.Buffer { + if size < 0 { + size = 0 + } + + b := findBucket(uint64(size)) + + buf := b.pool.Get().(*bytes.Buffer) + buf.Reset() + buf.Grow(size) + return buf +} + +// Put returns a buffer to the pool. The buffer is placed into an appropriate +// bucket based on its current capacity. +func Put(buf *bytes.Buffer) { + if buf == nil { + return + } + + b := findBucket(uint64(buf.Cap())) + if b == nil { + return + } + b.pool.Put(buf) +} diff --git a/pkg/dataobj/internal/util/bufpool/bufpool_test.go b/pkg/dataobj/internal/util/bufpool/bufpool_test.go new file mode 100644 index 0000000000000..9727bab02ba65 --- /dev/null +++ b/pkg/dataobj/internal/util/bufpool/bufpool_test.go @@ -0,0 +1,36 @@ +package bufpool + +import ( + "fmt" + "math" + "testing" + + "github.com/stretchr/testify/require" +) + +func Test_findBucket(t *testing.T) { + tt := []struct { + size uint64 + expect uint64 + }{ + {size: 0, expect: 1024}, + {size: 512, expect: 1024}, + {size: 1024, expect: 1024}, + {size: 1025, expect: 2048}, + {size: (1 << 36), expect: (1 << 36)}, + {size: (1 << 37), expect: math.MaxUint64}, + } + + for _, tc := range tt { + t.Run(fmt.Sprintf("size=%d", tc.size), func(t *testing.T) { + got := findBucket(tc.size).size + require.Equal(t, tc.expect, got) + }) + } +} + +func Test(t *testing.T) { + buf := Get(1_500_000) + require.NotNil(t, buf) + require.Less(t, buf.Cap(), 2<<20, "buffer should not have grown to next bucket size") +} diff --git a/pkg/dataobj/internal/util/sliceclear/sliceclear.go b/pkg/dataobj/internal/util/sliceclear/sliceclear.go new file mode 100644 index 0000000000000..6caaf52cbc45c --- /dev/null +++ b/pkg/dataobj/internal/util/sliceclear/sliceclear.go @@ -0,0 +1,11 @@ +// Package sliceclear provides a way to clear and truncate the length of a +// slice. +package sliceclear + +// Clear zeroes out all values in s and returns s[:0]. Clear allows memory of +// previous elements in the slice to be reclained by the garbage collector +// while still allowing the underlying slice memory to be reused. +func Clear[Slice ~[]E, E any](s Slice) Slice { + clear(s) + return s[:0] +} diff --git a/pkg/dataobj/internal/util/sliceclear/sliceclear_test.go b/pkg/dataobj/internal/util/sliceclear/sliceclear_test.go new file mode 100644 index 0000000000000..fd7b7545b81ee --- /dev/null +++ b/pkg/dataobj/internal/util/sliceclear/sliceclear_test.go @@ -0,0 +1,28 @@ +package sliceclear_test + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/grafana/loki/v3/pkg/dataobj/internal/util/sliceclear" +) + +func Test(t *testing.T) { + s := make([]*int, 0, 10) + for i := 0; i < 10; i++ { + s = append(s, new(int)) + } + + s = sliceclear.Clear(s) + require.Equal(t, 10, cap(s)) + require.Equal(t, 0, len(s)) + + // Reexpand s to its full capacity and ensure that all elements have been + // zeroed out. + full := s[:cap(s)] + require.Equal(t, 10, len(full)) + for i := 0; i < 10; i++ { + require.Nil(t, full[i], "element %d was not zeroed; this can cause memory leaks", i) + } +} diff --git a/pkg/dataobj/metrics.go b/pkg/dataobj/metrics.go index cd8479d945fc3..fa18d5159d0b2 100644 --- a/pkg/dataobj/metrics.go +++ b/pkg/dataobj/metrics.go @@ -25,6 +25,7 @@ type metrics struct { flushTime prometheus.Histogram sizeEstimate prometheus.Gauge + builtSize prometheus.Histogram } // newMetrics creates a new set of [metrics] for instrumenting data objects. @@ -104,6 +105,18 @@ func newMetrics() *metrics { Help: "Current estimated size of the data object in bytes.", }), + + builtSize: prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: "loki", + Subsystem: "dataobj", + Name: "built_size_bytes", + + Help: "Distribution of constructed data object sizes in bytes.", + + NativeHistogramBucketFactor: 1.1, + NativeHistogramMaxBucketNumber: 100, + NativeHistogramMinResetDuration: 0, + }), } } @@ -131,6 +144,7 @@ func (m *metrics) Register(reg prometheus.Registerer) error { errs = append(errs, reg.Register(m.flushTime)) errs = append(errs, reg.Register(m.sizeEstimate)) + errs = append(errs, reg.Register(m.builtSize)) return errors.Join(errs...) } @@ -141,9 +155,14 @@ func (m *metrics) Unregister(reg prometheus.Registerer) { m.streams.Unregister(reg) m.encoding.Unregister(reg) + reg.Unregister(m.shaPrefixSize) + reg.Unregister(m.targetPageSize) + reg.Unregister(m.targetObjectSize) + reg.Unregister(m.appendTime) reg.Unregister(m.buildTime) reg.Unregister(m.flushTime) reg.Unregister(m.sizeEstimate) + reg.Unregister(m.builtSize) } diff --git a/pkg/ingester/instance.go b/pkg/ingester/instance.go index c6afcacfbdfde..80905bff23505 100644 --- a/pkg/ingester/instance.go +++ b/pkg/ingester/instance.go @@ -1183,7 +1183,7 @@ func (i *instance) updateOwnedStreams(isOwnedStream func(*stream) (bool, error)) }() var err error - i.streams.WithLock(func() { + i.streams.WithRLock(func() { i.ownedStreamsSvc.resetStreamCounts() err = i.streams.ForEach(func(s *stream) (bool, error) { ownedStream, err := isOwnedStream(s) diff --git a/pkg/ingester/limiter_test.go b/pkg/ingester/limiter_test.go index b611db4d109e1..78e579187a502 100644 --- a/pkg/ingester/limiter_test.go +++ b/pkg/ingester/limiter_test.go @@ -130,7 +130,7 @@ func TestStreamCountLimiter_AssertNewStreamAllowed(t *testing.T) { ownedStreamSvc := &ownedStreamService{ fixedLimit: atomic.NewInt32(testData.fixedLimit), - ownedStreamCount: testData.ownedStreamCount, + ownedStreamCount: atomic.NewInt64(int64(testData.ownedStreamCount)), } strategy := &fixedStrategy{localLimit: testData.calculatedLocalLimit} limiter := NewLimiter(limits, NilMetrics, strategy, &TenantBasedStrategy{limits: limits}) diff --git a/pkg/ingester/metrics.go b/pkg/ingester/metrics.go index ff4db43747676..5f144038bc094 100644 --- a/pkg/ingester/metrics.go +++ b/pkg/ingester/metrics.go @@ -318,8 +318,8 @@ func newIngesterMetrics(r prometheus.Registerer, metricsNamespace string) *inges Namespace: constants.Loki, Name: "ingester_streams_ownership_check_duration_ms", Help: "Distribution of streams ownership check durations in milliseconds.", - // 100ms to 5s. - Buckets: []float64{100, 250, 350, 500, 750, 1000, 1500, 2000, 5000}, + // 1ms -> 16s + Buckets: prometheus.ExponentialBuckets(1, 4, 8), }), duplicateLogBytesTotal: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ diff --git a/pkg/ingester/owned_streams.go b/pkg/ingester/owned_streams.go index 3bb729815e718..56c5a77fa768e 100644 --- a/pkg/ingester/owned_streams.go +++ b/pkg/ingester/owned_streams.go @@ -21,17 +21,18 @@ type ownedStreamService struct { tenantID string limiter *Limiter fixedLimit *atomic.Int32 - ownedStreamCount int + ownedStreamCount *atomic.Int64 lock sync.RWMutex notOwnedStreams map[model.Fingerprint]any } func newOwnedStreamService(tenantID string, limiter *Limiter) *ownedStreamService { svc := &ownedStreamService{ - tenantID: tenantID, - limiter: limiter, - fixedLimit: atomic.NewInt32(0), - notOwnedStreams: make(map[model.Fingerprint]any), + tenantID: tenantID, + limiter: limiter, + fixedLimit: atomic.NewInt32(0), + ownedStreamCount: atomic.NewInt64(0), + notOwnedStreams: make(map[model.Fingerprint]any), } svc.updateFixedLimit() @@ -39,9 +40,7 @@ func newOwnedStreamService(tenantID string, limiter *Limiter) *ownedStreamServic } func (s *ownedStreamService) getOwnedStreamCount() int { - s.lock.RLock() - defer s.lock.RUnlock() - return s.ownedStreamCount + return int(s.ownedStreamCount.Load()) } func (s *ownedStreamService) updateFixedLimit() (old, new int32) { @@ -55,12 +54,15 @@ func (s *ownedStreamService) getFixedLimit() int { } func (s *ownedStreamService) trackStreamOwnership(fp model.Fingerprint, owned bool) { - s.lock.Lock() - defer s.lock.Unlock() + // only need to inc the owned count; can use sync atomics. if owned { - s.ownedStreamCount++ + s.ownedStreamCount.Inc() return } + + // need to update map; lock required + s.lock.Lock() + defer s.lock.Unlock() notOwnedStreamsMetric.Inc() s.notOwnedStreams[fp] = nil } @@ -74,13 +76,13 @@ func (s *ownedStreamService) trackRemovedStream(fp model.Fingerprint) { delete(s.notOwnedStreams, fp) return } - s.ownedStreamCount-- + s.ownedStreamCount.Dec() } func (s *ownedStreamService) resetStreamCounts() { s.lock.Lock() defer s.lock.Unlock() - s.ownedStreamCount = 0 + s.ownedStreamCount.Store(0) notOwnedStreamsMetric.Sub(float64(len(s.notOwnedStreams))) s.notOwnedStreams = make(map[model.Fingerprint]any) } diff --git a/pkg/ingester/recalculate_owned_streams_test.go b/pkg/ingester/recalculate_owned_streams_test.go index 3e531dcdef66f..f3bea57f69bae 100644 --- a/pkg/ingester/recalculate_owned_streams_test.go +++ b/pkg/ingester/recalculate_owned_streams_test.go @@ -37,7 +37,7 @@ func Test_recalculateOwnedStreams_newRecalculateOwnedStreamsIngester(t *testing. func Test_recalculateOwnedStreams_recalculateWithIngesterStrategy(t *testing.T) { tests := map[string]struct { featureEnabled bool - expectedOwnedStreamCount int + expectedOwnedStreamCount int64 expectedNotOwnedStreamCount int }{ "expected streams ownership to be recalculated": { @@ -101,7 +101,7 @@ func Test_recalculateOwnedStreams_recalculateWithIngesterStrategy(t *testing.T) mockRing.addMapping(createStream(t, tenant, 100), true) mockRing.addMapping(createStream(t, tenant, 250), true) - require.Equal(t, 7, tenant.ownedStreamsSvc.ownedStreamCount) + require.Equal(t, int64(7), tenant.ownedStreamsSvc.ownedStreamCount.Load()) require.Len(t, tenant.ownedStreamsSvc.notOwnedStreams, 0) mockTenantsSupplier := &mockTenantsSuplier{tenants: []*instance{tenant}} @@ -116,7 +116,7 @@ func Test_recalculateOwnedStreams_recalculateWithIngesterStrategy(t *testing.T) if testData.featureEnabled { require.Equal(t, 50, tenant.ownedStreamsSvc.getFixedLimit(), "fixed limit must be updated after recalculation") } - require.Equal(t, testData.expectedOwnedStreamCount, tenant.ownedStreamsSvc.ownedStreamCount) + require.Equal(t, testData.expectedOwnedStreamCount, tenant.ownedStreamsSvc.ownedStreamCount.Load()) require.Len(t, tenant.ownedStreamsSvc.notOwnedStreams, testData.expectedNotOwnedStreamCount) }) } diff --git a/pkg/loghttp/push/otlp.go b/pkg/loghttp/push/otlp.go index 584b45a833b71..dbb4ec8349e63 100644 --- a/pkg/loghttp/push/otlp.go +++ b/pkg/loghttp/push/otlp.go @@ -185,6 +185,7 @@ func otlpToLokiPushRequest(ctx context.Context, ld plog.Logs, userID string, ten labelsStr := streamLabels.String() lbs := modelLabelsSetToLabelsList(streamLabels) + totalBytesReceived := int64(0) if _, ok := pushRequestsByStream[labelsStr]; !ok { pushRequestsByStream[labelsStr] = logproto.Stream{ @@ -197,9 +198,7 @@ func otlpToLokiPushRequest(ctx context.Context, ld plog.Logs, userID string, ten retentionPeriodForUser := tenantsRetention.RetentionPeriodFor(userID, lbs) stats.StructuredMetadataBytes[retentionPeriodForUser] += int64(resourceAttributesAsStructuredMetadataSize) - if tracker != nil { - tracker.ReceivedBytesAdd(ctx, userID, retentionPeriodForUser, lbs, float64(resourceAttributesAsStructuredMetadataSize)) - } + totalBytesReceived += int64(resourceAttributesAsStructuredMetadataSize) stats.ResourceAndSourceMetadataLabels[retentionPeriodForUser] = append(stats.ResourceAndSourceMetadataLabels[retentionPeriodForUser], resourceAttributesAsStructuredMetadata...) @@ -252,9 +251,7 @@ func otlpToLokiPushRequest(ctx context.Context, ld plog.Logs, userID string, ten scopeAttributesAsStructuredMetadataSize := loki_util.StructuredMetadataSize(scopeAttributesAsStructuredMetadata) stats.StructuredMetadataBytes[retentionPeriodForUser] += int64(scopeAttributesAsStructuredMetadataSize) - if tracker != nil { - tracker.ReceivedBytesAdd(ctx, userID, retentionPeriodForUser, lbs, float64(scopeAttributesAsStructuredMetadataSize)) - } + totalBytesReceived += int64(scopeAttributesAsStructuredMetadataSize) stats.ResourceAndSourceMetadataLabels[retentionPeriodForUser] = append(stats.ResourceAndSourceMetadataLabels[retentionPeriodForUser], scopeAttributesAsStructuredMetadata...) for k := 0; k < logs.Len(); k++ { @@ -279,17 +276,18 @@ func otlpToLokiPushRequest(ctx context.Context, ld plog.Logs, userID string, ten metadataSize := int64(loki_util.StructuredMetadataSize(entry.StructuredMetadata) - resourceAttributesAsStructuredMetadataSize - scopeAttributesAsStructuredMetadataSize) stats.StructuredMetadataBytes[retentionPeriodForUser] += metadataSize stats.LogLinesBytes[retentionPeriodForUser] += int64(len(entry.Line)) - - if tracker != nil { - tracker.ReceivedBytesAdd(ctx, userID, retentionPeriodForUser, lbs, float64(len(entry.Line))) - tracker.ReceivedBytesAdd(ctx, userID, retentionPeriodForUser, lbs, float64(metadataSize)) - } + totalBytesReceived += metadataSize + totalBytesReceived += int64(len(entry.Line)) stats.NumLines++ if entry.Timestamp.After(stats.MostRecentEntryTimestamp) { stats.MostRecentEntryTimestamp = entry.Timestamp } } + + if tracker != nil { + tracker.ReceivedBytesAdd(ctx, userID, retentionPeriodForUser, lbs, float64(totalBytesReceived)) + } } } diff --git a/pkg/loghttp/push/push.go b/pkg/loghttp/push/push.go index 9da5b29722643..759e21f293ede 100644 --- a/pkg/loghttp/push/push.go +++ b/pkg/loghttp/push/push.go @@ -281,22 +281,25 @@ func ParseLokiRequest(userID string, r *http.Request, tenantsRetention TenantsRe if tenantsRetention != nil { retentionPeriod = tenantsRetention.RetentionPeriodFor(userID, lbs) } + totalBytesReceived := int64(0) + for _, e := range s.Entries { pushStats.NumLines++ entryLabelsSize := int64(util.StructuredMetadataSize(e.StructuredMetadata)) pushStats.LogLinesBytes[retentionPeriod] += int64(len(e.Line)) pushStats.StructuredMetadataBytes[retentionPeriod] += entryLabelsSize - - if tracker != nil { - tracker.ReceivedBytesAdd(r.Context(), userID, retentionPeriod, lbs, float64(len(e.Line))) - tracker.ReceivedBytesAdd(r.Context(), userID, retentionPeriod, lbs, float64(entryLabelsSize)) - } + totalBytesReceived += int64(len(e.Line)) + totalBytesReceived += entryLabelsSize if e.Timestamp.After(pushStats.MostRecentEntryTimestamp) { pushStats.MostRecentEntryTimestamp = e.Timestamp } } + if tracker != nil { + tracker.ReceivedBytesAdd(r.Context(), userID, retentionPeriod, lbs, float64(totalBytesReceived)) + } + req.Streams[i] = s } diff --git a/pkg/sizing/algorithm.go b/pkg/sizing/algorithm.go deleted file mode 100644 index dcd2d6bb89911..0000000000000 --- a/pkg/sizing/algorithm.go +++ /dev/null @@ -1,98 +0,0 @@ -package sizing - -import ( - "math" -) - -type ClusterSize struct { - TotalNodes int - TotalReadReplicas int - TotalWriteReplicas int - TotalCoresRequest float64 - TotalMemoryRequest int - - expectedMaxReadThroughputBytesSec float64 - expectedMaxIngestBytesDay float64 -} - -type QueryPerf string - -const ( - Basic QueryPerf = "basic" - Super QueryPerf = "super" -) - -func calculateClusterSize(nt NodeType, bytesDayIngest float64, qperf QueryPerf) ClusterSize { - - // 1 Petabyte per day is maximum. We use decimal prefix https://en.wikipedia.org/wiki/Binary_prefix - bytesDayIngest = math.Min(bytesDayIngest, 1e15) - bytesSecondIngest := bytesDayIngest / 86400 - numWriteReplicasNeeded := math.Ceil(bytesSecondIngest / nt.writePod.rateBytesSecond) - - // High availability requires at least 3 replicas. - numWriteReplicasNeeded = math.Max(3, numWriteReplicasNeeded) - - //Hack based on current 4-1 mem to cpu ratio and base machine w/ 4 cores and 1 write/read - writeReplicasPerNode := float64(nt.cores / 4) - fullyWritePackedNodes := math.Floor(numWriteReplicasNeeded / writeReplicasPerNode) - replicasOnLastNode := math.Mod(numWriteReplicasNeeded, writeReplicasPerNode) - - coresOnLastNode := 0.0 - if replicasOnLastNode > 0.0 { - coresOnLastNode = math.Max(float64(nt.cores)-replicasOnLastNode*nt.writePod.cpuRequest, 0.0) - } - - nodesNeededForWrites := math.Ceil(numWriteReplicasNeeded / writeReplicasPerNode) - - // Hack based on packing 1 read and 1 write per node - readReplicasPerNode := writeReplicasPerNode - readReplicasOnFullyPackedWriteNodes := readReplicasPerNode * fullyWritePackedNodes - readReplicasOnPartiallyPackedWriteNodes := math.Floor(coresOnLastNode / nt.readPod.cpuRequest) - - // Required read replicase without considering required query performance. - baselineReadReplicas := readReplicasOnFullyPackedWriteNodes + readReplicasOnPartiallyPackedWriteNodes - - scaleUp := 0.25 - additionalReadReplicas := 0.0 - if qperf != Basic { - additionalReadReplicas = baselineReadReplicas * scaleUp - } - - readReplicasPerEmptyNode := math.Floor(float64(nt.cores) / nt.readPod.cpuRequest) - additionalNodesNeededForReads := additionalReadReplicas / readReplicasPerEmptyNode - - actualNodesAddedForReads := calculateActualReadNodes(additionalNodesNeededForReads) - actualReadReplicasAdded := actualNodesAddedForReads * readReplicasPerEmptyNode - - totalReadReplicas := actualReadReplicasAdded + baselineReadReplicas - - // High availability requires at least 3 replicas. - totalReadReplicas = math.Max(3, totalReadReplicas) - - totalReadThroughputBytesSec := totalReadReplicas * nt.readPod.rateBytesSecond - - totalNodesNeeded := nodesNeededForWrites + actualNodesAddedForReads - totalCoresRequest := numWriteReplicasNeeded*nt.writePod.cpuRequest + totalReadReplicas*nt.readPod.cpuRequest - totalMemoryRequest := numWriteReplicasNeeded*float64(nt.writePod.memoryRequest) + totalReadReplicas*float64(nt.readPod.memoryRequest) - - return ClusterSize{ - TotalNodes: int(totalNodesNeeded), - TotalReadReplicas: int(totalReadReplicas), - TotalWriteReplicas: int(numWriteReplicasNeeded), - TotalCoresRequest: totalCoresRequest, - TotalMemoryRequest: int(totalMemoryRequest), - - expectedMaxReadThroughputBytesSec: totalReadThroughputBytesSec, - expectedMaxIngestBytesDay: (nt.writePod.rateBytesSecond * numWriteReplicasNeeded) * 86400, - } -} - -func calculateActualReadNodes(additionalNodesNeededForReads float64) float64 { - if additionalNodesNeededForReads == 0.0 { - return 0 - } - if 0.0 < additionalNodesNeededForReads && additionalNodesNeededForReads < 1.0 { - return 1 - } - return math.Floor(additionalNodesNeededForReads) -} diff --git a/pkg/sizing/algorithm_test.go b/pkg/sizing/algorithm_test.go deleted file mode 100644 index 4ef36ad274c71..0000000000000 --- a/pkg/sizing/algorithm_test.go +++ /dev/null @@ -1,66 +0,0 @@ -package sizing - -import ( - "testing" - "testing/quick" - - "github.com/stretchr/testify/require" -) - -func Test_Algorithm(t *testing.T) { - f := func(ingest float64) bool { - if ingest < 0 { - ingest = -ingest - } - postiveReplicas := true - for _, cloud := range NodeTypesByProvider { - for _, node := range cloud { - size := calculateClusterSize(node, ingest, Basic) - postiveReplicas = size.TotalNodes > 0.0 && size.TotalReadReplicas > 0.0 && size.TotalWriteReplicas > 0.0 - require.Truef(t, postiveReplicas, "Cluster size was empty: ingest=%d cluster=%v node=%v", ingest, size, node) - require.InDelta(t, size.TotalReadReplicas, size.TotalWriteReplicas, 5.0, "Replicas have different sizes: ingest=%d node=%s", ingest, node.name) - - size = calculateClusterSize(node, ingest, Super) - postiveReplicas = size.TotalNodes > 0.0 && size.TotalReadReplicas > 0.0 && size.TotalWriteReplicas > 0.0 - require.Truef(t, postiveReplicas, "Cluster size was empty: ingest=%d cluster=%v node=%v", ingest, size, node) - } - } - - return postiveReplicas - } - - if err := quick.Check(f, nil); err != nil { - t.Error(err) - } - - // Sanity check for 1TB/Day - size := calculateClusterSize(NodeTypesByProvider["AWS"]["t2.xlarge"], 1e12, Basic) - require.Equalf(t, 4, size.TotalNodes, "given ingest=1PB/Day totla nodes must be big") -} - -func Test_CoresNodeInvariant(t *testing.T) { - for _, queryPerformance := range []QueryPerf{Basic, Super} { - for _, ingest := range []float64{30, 300, 1000, 2000} { - for _, cloud := range NodeTypesByProvider { - for _, node := range cloud { - size := calculateClusterSize(node, ingest, queryPerformance) - require.LessOrEqualf(t, size.TotalCoresRequest, float64(size.TotalNodes*node.cores), "given ingest=%d node=%s total cores must be less than available cores", ingest, node.name) - } - } - } - } -} - -func Test_MinimumReplicas(t *testing.T) { - for _, queryPerformance := range []QueryPerf{Basic, Super} { - for _, ingest := range []float64{1, 1000} { - for _, cloud := range NodeTypesByProvider { - for _, node := range cloud { - size := calculateClusterSize(node, ingest, queryPerformance) - require.GreaterOrEqual(t, size.TotalReadReplicas, 3) - require.GreaterOrEqual(t, size.TotalWriteReplicas, 3) - } - } - } - } -} diff --git a/pkg/sizing/helm.go b/pkg/sizing/helm.go deleted file mode 100644 index 8e988c8b55b2e..0000000000000 --- a/pkg/sizing/helm.go +++ /dev/null @@ -1,91 +0,0 @@ -package sizing - -type Pod struct { - Replicas int `json:"replicas"` - Rate int `json:"rate"` - CPU struct { - Request float64 `json:"request"` - Limit float64 `json:"limit"` - } `json:"cpu"` - Memory struct { - Request int `json:"request"` - Limit int `json:"limit"` - } `json:"memory"` -} - -type Loki struct { - AuthEnabled bool `json:"auth_enabled"` -} - -type Read struct { - Replicas int `json:"replicas"` - Resources Resources `json:"resources"` -} - -type Write struct { - Replicas int `json:"replicas"` - Resources Resources `json:"resources"` -} - -type Resources struct { - Requests struct { - CPU float64 `json:"cpu"` - Memory int `json:"memory"` - } `json:"requests"` - Limits struct { - CPU float64 `json:"cpu"` - Memory int `json:"memory"` - } `json:"limits"` -} - -type Values struct { - Loki Loki `json:"loki"` - Read Read `json:"read"` - Write Write `json:"write"` -} - -func constructHelmValues(cluster ClusterSize, nodeType NodeType) Values { - return Values{ - Loki: Loki{ - AuthEnabled: false, - }, - Read: Read{ - Replicas: cluster.TotalReadReplicas, - Resources: Resources{ - Requests: struct { - CPU float64 `json:"cpu"` - Memory int `json:"memory"` - }{ - CPU: nodeType.readPod.cpuRequest, - Memory: nodeType.readPod.memoryRequest, - }, - Limits: struct { - CPU float64 `json:"cpu"` - Memory int `json:"memory"` - }{ - CPU: nodeType.readPod.cpuLimit, - Memory: nodeType.readPod.memoryLimit, - }, - }, - }, - Write: Write{ - Replicas: cluster.TotalWriteReplicas, - Resources: Resources{ - Requests: struct { - CPU float64 `json:"cpu"` - Memory int `json:"memory"` - }{ - CPU: nodeType.writePod.cpuRequest, - Memory: nodeType.writePod.memoryRequest, - }, - Limits: struct { - CPU float64 `json:"cpu"` - Memory int `json:"memory"` - }{ - CPU: nodeType.writePod.cpuLimit, - Memory: nodeType.writePod.memoryLimit, - }, - }, - }, - } -} diff --git a/pkg/sizing/http.go b/pkg/sizing/http.go deleted file mode 100644 index f0b755af32ead..0000000000000 --- a/pkg/sizing/http.go +++ /dev/null @@ -1,120 +0,0 @@ -package sizing - -import ( - "encoding/json" - "fmt" - "net/http" - "strconv" - "strings" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "gopkg.in/yaml.v2" -) - -type Message struct { - NodeType NodeType - Ingest int - Retention int - QueryPerformance QueryPerf -} - -func decodeMesage(req *http.Request, msg *Message) error { - var err error - types := strings.Split(req.FormValue("node-type"), " - ") - nodeTypes, ok := NodeTypesByProvider[types[0]] - if !ok { - return fmt.Errorf("unknown cloud provider %s", types[0]) - } - msg.NodeType, ok = nodeTypes[types[1]] - if !ok { - return fmt.Errorf("unknown node type %s", types[1]) - } - - msg.Ingest, err = strconv.Atoi(req.FormValue("ingest")) - if err != nil { - return fmt.Errorf("cannot read ingest: %w", err) - } - - msg.Retention, err = strconv.Atoi(req.FormValue("retention")) - if err != nil { - return fmt.Errorf("cannot read retention: %w", err) - } - - msg.QueryPerformance = QueryPerf(strings.ToLower(req.FormValue("queryperf"))) - - return nil -} - -// Handler defines the REST API of the sizing tool. -type Handler struct { - logger log.Logger -} - -func NewHandler(logger log.Logger) *Handler { - return &Handler{logger: logger} -} - -func (h *Handler) GenerateHelmValues(w http.ResponseWriter, req *http.Request) { - - var msg Message - err := decodeMesage(req, &msg) - if err != nil { - level.Error(h.logger).Log("error", err) - h.respondError(w, err) - return - } - - w.Header().Set("Content-Type", "application/x-yaml; charset=utf-8") - - cluster := calculateClusterSize(msg.NodeType, float64(msg.Ingest), msg.QueryPerformance) - helm := constructHelmValues(cluster, msg.NodeType) - - enc := yaml.NewEncoder(w) - err = enc.Encode(helm) - if err != nil { - level.Error(h.logger).Log("msg", "could not encode Helm Chart values", "error", err) - } -} - -func (h *Handler) Nodes(w http.ResponseWriter, _ *http.Request) { - var nodes []string - for cloud, n := range NodeTypesByProvider { - for nodeType := range n { - nodes = append(nodes, fmt.Sprintf("%s - %s", cloud, nodeType)) - } - } - - w.Header().Set("Content-Type", "application/json") - err := json.NewEncoder(w).Encode(nodes) - if err != nil { - level.Error(h.logger).Log("msg", "could not encode node values", "error", err) - } -} - -func (h *Handler) respondError(w http.ResponseWriter, err error) { - w.WriteHeader(http.StatusBadRequest) - _, err = w.Write([]byte(fmt.Sprintf("error: %q", err))) - if err != nil { - level.Error(h.logger).Log("msg", "could not write error message", "error", err) - } -} - -func (h *Handler) Cluster(w http.ResponseWriter, req *http.Request) { - var msg Message - - err := decodeMesage(req, &msg) - if err != nil { - level.Error(h.logger).Log("error", err) - h.respondError(w, err) - return - } - - cluster := calculateClusterSize(msg.NodeType, float64(msg.Ingest), msg.QueryPerformance) - - w.Header().Set("Content-Type", "application/json") - err = json.NewEncoder(w).Encode(cluster) - if err != nil { - level.Error(h.logger).Log("msg", "could not encode cluster size", "error", err) - } -} diff --git a/pkg/sizing/node.go b/pkg/sizing/node.go deleted file mode 100644 index 79899539ae972..0000000000000 --- a/pkg/sizing/node.go +++ /dev/null @@ -1,98 +0,0 @@ -package sizing - -type NodeType struct { - name string - cores int - memoryGB int - readPod NodePod - writePod NodePod -} - -type NodePod struct { - cpuRequest float64 - cpuLimit float64 // Or null - memoryRequest int - memoryLimit int - rateBytesSecond float64 -} - -var StandardWrite = NodePod{ - cpuRequest: 1, - cpuLimit: 2, - memoryRequest: 6, - memoryLimit: 12, - rateBytesSecond: 3 * 1024 * 1024, -} - -var StandardRead = NodePod{ - cpuRequest: 3, - cpuLimit: 3, // Undefined TODO: Is this a bug - memoryRequest: 6, - memoryLimit: 8, - rateBytesSecond: 768 * 1024 * 1024, -} - -var NodeTypesByProvider = map[string]map[string]NodeType{ - "AWS": { - "t2.xlarge": { - name: "t2.xlarge", - cores: 4, - memoryGB: 16, - readPod: StandardRead, - writePod: StandardWrite, - }, - "t2.2xlarge": { - name: "t2.2xlarge", - cores: 8, - memoryGB: 32, - readPod: StandardRead, - writePod: StandardWrite, - }, - }, - "GCP": { - "e2-standard-4": { - name: "e2-standard-4", - cores: 4, - memoryGB: 16, - readPod: StandardRead, - writePod: StandardWrite, - }, - "e2-standard-8": { - name: "e2-standard-8", - cores: 8, - memoryGB: 32, - readPod: StandardRead, - writePod: StandardWrite, - }, - "e2-standard-16": { - name: "e2-standard-16", - cores: 16, - memoryGB: 64, - readPod: StandardRead, - writePod: StandardWrite, - }, - }, - "OVHcloud": { - "b2-30": { - name: "b2-30", - cores: 8, - memoryGB: 30, - readPod: StandardRead, - writePod: StandardWrite, - }, - "b2-60": { - name: "b2-60", - cores: 16, - memoryGB: 60, - readPod: StandardRead, - writePod: StandardWrite, - }, - "b2-120": { - name: "b2-120", - cores: 32, - memoryGB: 120, - readPod: StandardRead, - writePod: StandardWrite, - }, - }, -} diff --git a/production/helm/loki/scenarios/README.md b/production/helm/loki/scenarios/README.md index b84c186e23684..496286bb2009d 100644 --- a/production/helm/loki/scenarios/README.md +++ b/production/helm/loki/scenarios/README.md @@ -61,3 +61,9 @@ As the last step you need to run a diff between both files: ```shell diff current-manifest.yaml release-manifest.yaml ``` + +### Known Issues + +* The Github Action won't be able to post the diff comment if the PR is coming from a fork, because of permissions the workflow run from a fork is not able to write in the PR content. + + In this case, to review the output we recommend to download the artifacts in the workflow run and check the outputs. diff --git a/production/helm/loki/scenarios/images/added.png b/production/helm/loki/scenarios/images/added.png deleted file mode 100644 index ced9f9554a8f8..0000000000000 Binary files a/production/helm/loki/scenarios/images/added.png and /dev/null differ diff --git a/production/helm/loki/scenarios/images/img.png b/production/helm/loki/scenarios/images/img.png deleted file mode 100644 index 81ba701da26a0..0000000000000 Binary files a/production/helm/loki/scenarios/images/img.png and /dev/null differ diff --git a/production/helm/loki/scenarios/images/modified.png b/production/helm/loki/scenarios/images/modified.png deleted file mode 100644 index 39a25bae35b20..0000000000000 Binary files a/production/helm/loki/scenarios/images/modified.png and /dev/null differ diff --git a/production/helm/loki/scenarios/images/removed.png b/production/helm/loki/scenarios/images/removed.png deleted file mode 100644 index 219d64c32c983..0000000000000 Binary files a/production/helm/loki/scenarios/images/removed.png and /dev/null differ diff --git a/tools/gcplog/main.tf b/tools/gcplog/main.tf index fe6b2d84d8c31..4860c44fc52e4 100644 --- a/tools/gcplog/main.tf +++ b/tools/gcplog/main.tf @@ -2,7 +2,7 @@ terraform { required_providers { google = { source = "hashicorp/google" - version = "6.16.0" + version = "6.17.0" } } } diff --git a/tools/lambda-promtail/go.mod b/tools/lambda-promtail/go.mod index 2bfdde52595f7..c70c14249caf8 100644 --- a/tools/lambda-promtail/go.mod +++ b/tools/lambda-promtail/go.mod @@ -5,8 +5,8 @@ go 1.22 require ( github.com/aws/aws-lambda-go v1.47.0 github.com/aws/aws-sdk-go-v2 v1.33.0 - github.com/aws/aws-sdk-go-v2/config v1.29.0 - github.com/aws/aws-sdk-go-v2/service/s3 v1.73.1 + github.com/aws/aws-sdk-go-v2/config v1.29.1 + github.com/aws/aws-sdk-go-v2/service/s3 v1.73.2 github.com/go-kit/log v0.2.1 github.com/gogo/protobuf v1.3.2 github.com/golang/snappy v0.0.4 @@ -25,19 +25,19 @@ require ( github.com/alecthomas/units v0.0.0-20240626203959-61d1e3462e30 // indirect github.com/armon/go-metrics v0.4.1 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.7 // indirect - github.com/aws/aws-sdk-go-v2/credentials v1.17.53 // indirect + github.com/aws/aws-sdk-go-v2/credentials v1.17.54 // indirect github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.24 // indirect github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.28 // indirect github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.28 // indirect github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1 // indirect github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.28 // indirect github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.1 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.5.1 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.5.2 // indirect github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.9 // indirect github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.9 // indirect - github.com/aws/aws-sdk-go-v2/service/sso v1.24.10 // indirect - github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.9 // indirect - github.com/aws/aws-sdk-go-v2/service/sts v1.33.8 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.24.11 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.10 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.33.9 // indirect github.com/aws/smithy-go v1.22.1 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500 // indirect diff --git a/tools/lambda-promtail/go.sum b/tools/lambda-promtail/go.sum index 72367bf3c71e1..cefba43f6b738 100644 --- a/tools/lambda-promtail/go.sum +++ b/tools/lambda-promtail/go.sum @@ -52,10 +52,10 @@ github.com/aws/aws-sdk-go-v2 v1.33.0 h1:Evgm4DI9imD81V0WwD+TN4DCwjUMdc94TrduMLbg github.com/aws/aws-sdk-go-v2 v1.33.0/go.mod h1:P5WJBrYqqbWVaOxgH0X/FYYD47/nooaPOZPlQdmiN2U= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.7 h1:lL7IfaFzngfx0ZwUGOZdsFFnQ5uLvR0hWqqhyE7Q9M8= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.7/go.mod h1:QraP0UcVlQJsmHfioCrveWOC1nbiWUl3ej08h4mXWoc= -github.com/aws/aws-sdk-go-v2/config v1.29.0 h1:Vk/u4jof33or1qAQLdofpjKV7mQQT7DcUpnYx8kdmxY= -github.com/aws/aws-sdk-go-v2/config v1.29.0/go.mod h1:iXAZK3Gxvpq3tA+B9WaDYpZis7M8KFgdrDPMmHrgbJM= -github.com/aws/aws-sdk-go-v2/credentials v1.17.53 h1:lwrVhiEDW5yXsuVKlFVUnR2R50zt2DklhOyeLETqDuE= -github.com/aws/aws-sdk-go-v2/credentials v1.17.53/go.mod h1:CkqM1bIw/xjEpBMhBnvqUXYZbpCFuj6dnCAyDk2AtAY= +github.com/aws/aws-sdk-go-v2/config v1.29.1 h1:JZhGawAyZ/EuJeBtbQYnaoftczcb2drR2Iq36Wgz4sQ= +github.com/aws/aws-sdk-go-v2/config v1.29.1/go.mod h1:7bR2YD5euaxBhzt2y/oDkt3uNRb6tjFp98GlTFueRwk= +github.com/aws/aws-sdk-go-v2/credentials v1.17.54 h1:4UmqeOqJPvdvASZWrKlhzpRahAulBfyTJQUaYy4+hEI= +github.com/aws/aws-sdk-go-v2/credentials v1.17.54/go.mod h1:RTdfo0P0hbbTxIhmQrOsC/PquBZGabEPnCaxxKRPSnI= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.24 h1:5grmdTdMsovn9kPZPI23Hhvp0ZyNm5cRO+IZFIYiAfw= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.24/go.mod h1:zqi7TVKTswH3Ozq28PkmBmgzG1tona7mo9G2IJg4Cis= github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.28 h1:igORFSiH3bfq4lxKFkTSYDhJEUCYo6C8VKiWJjYwQuQ= @@ -68,20 +68,20 @@ github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.28 h1:7kpeALOUeThs2kEjlAxlADAVfxK github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.28/go.mod h1:pyaOYEdp1MJWgtXLy6q80r3DhsVdOIOZNB9hdTcJIvI= github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.1 h1:iXtILhvDxB6kPvEXgsDhGaZCSC6LQET5ZHSdJozeI0Y= github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.1/go.mod h1:9nu0fVANtYiAePIBh2/pFUSwtJ402hLnp854CNoDOeE= -github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.5.1 h1:mJ9FRktB8v1Ihpqwfk0AWvYEd0FgQtLsshc2Qb2TVc8= -github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.5.1/go.mod h1:dIW8puxSbYLSPv/ju0d9A3CpwXdtqvJtYKDMVmPLOWE= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.5.2 h1:e6um6+DWYQP1XCa+E9YVtG/9v1qk5lyAOelMOVwSyO8= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.5.2/go.mod h1:dIW8puxSbYLSPv/ju0d9A3CpwXdtqvJtYKDMVmPLOWE= github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.9 h1:TQmKDyETFGiXVhZfQ/I0cCFziqqX58pi4tKJGYGFSz0= github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.9/go.mod h1:HVLPK2iHQBUx7HfZeOQSEu3v2ubZaAY2YPbAm5/WUyY= github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.9 h1:2aInXbh02XsbO0KobPGMNXyv2QP73VDKsWPNJARj/+4= github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.9/go.mod h1:dgXS1i+HgWnYkPXqNoPIPKeUsUUYHaUbThC90aDnNiE= -github.com/aws/aws-sdk-go-v2/service/s3 v1.73.1 h1:OzmyfYGiMCOIAq5pa0KWcaZoA9F8FqajOJevh+hhFdY= -github.com/aws/aws-sdk-go-v2/service/s3 v1.73.1/go.mod h1:K+0a0kWDHAUXBH8GvYGS3cQRwIuRjO9bMWUz6vpNCaU= -github.com/aws/aws-sdk-go-v2/service/sso v1.24.10 h1:DyZUj3xSw3FR3TXSwDhPhuZkkT14QHBiacdbUVcD0Dg= -github.com/aws/aws-sdk-go-v2/service/sso v1.24.10/go.mod h1:Ro744S4fKiCCuZECXgOi760TiYylUM8ZBf6OGiZzJtY= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.9 h1:I1TsPEs34vbpOnR81GIcAq4/3Ud+jRHVGwx6qLQUHLs= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.9/go.mod h1:Fzsj6lZEb8AkTE5S68OhcbBqeWPsR8RnGuKPr8Todl8= -github.com/aws/aws-sdk-go-v2/service/sts v1.33.8 h1:pqEJQtlKWvnv3B6VRt60ZmsHy3SotlEBvfUBPB1KVcM= -github.com/aws/aws-sdk-go-v2/service/sts v1.33.8/go.mod h1:f6vjfZER1M17Fokn0IzssOTMT2N8ZSq+7jnNF0tArvw= +github.com/aws/aws-sdk-go-v2/service/s3 v1.73.2 h1:F3h8VYq9ZLBXYurmwrT8W0SPhgCcU0q+0WZJfT1dFt0= +github.com/aws/aws-sdk-go-v2/service/s3 v1.73.2/go.mod h1:jGJ/v7FIi7Ys9t54tmEFnrxuaWeJLpwNgKp2DXAVhOU= +github.com/aws/aws-sdk-go-v2/service/sso v1.24.11 h1:kuIyu4fTT38Kj7YCC7ouNbVZSSpqkZ+LzIfhCr6Dg+I= +github.com/aws/aws-sdk-go-v2/service/sso v1.24.11/go.mod h1:Ro744S4fKiCCuZECXgOi760TiYylUM8ZBf6OGiZzJtY= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.10 h1:l+dgv/64iVlQ3WsBbnn+JSbkj01jIi+SM0wYsj3y/hY= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.10/go.mod h1:Fzsj6lZEb8AkTE5S68OhcbBqeWPsR8RnGuKPr8Todl8= +github.com/aws/aws-sdk-go-v2/service/sts v1.33.9 h1:BRVDbewN6VZcwr+FBOszDKvYeXY1kJ+GGMCcpghlw0U= +github.com/aws/aws-sdk-go-v2/service/sts v1.33.9/go.mod h1:f6vjfZER1M17Fokn0IzssOTMT2N8ZSq+7jnNF0tArvw= github.com/aws/smithy-go v1.22.1 h1:/HPHZQ0g7f4eUeK6HKglFz8uwVfZKgoI25rb/J+dnro= github.com/aws/smithy-go v1.22.1/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg= github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3 h1:6df1vn4bBlDDo4tARvBm7l6KA9iVMnE3NWizDeWSrps= diff --git a/tools/querytee/proxy.go b/tools/querytee/proxy.go index d5d842e11c3c5..392bd008a439a 100644 --- a/tools/querytee/proxy.go +++ b/tools/querytee/proxy.go @@ -17,6 +17,7 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/gorilla/mux" + "github.com/grafana/dskit/flagext" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" ) @@ -34,6 +35,7 @@ type ProxyConfig struct { UseRelativeError bool PassThroughNonRegisteredRoutes bool SkipRecentSamples time.Duration + SkipSamplesBefore flagext.Time RequestURLFilter *regexp.Regexp InstrumentCompares bool } @@ -48,10 +50,11 @@ func (cfg *ProxyConfig) RegisterFlags(f *flag.FlagSet) { f.Float64Var(&cfg.ValueComparisonTolerance, "proxy.value-comparison-tolerance", 0.000001, "The tolerance to apply when comparing floating point values in the responses. 0 to disable tolerance and require exact match (not recommended).") f.BoolVar(&cfg.UseRelativeError, "proxy.compare-use-relative-error", false, "Use relative error tolerance when comparing floating point values.") f.DurationVar(&cfg.SkipRecentSamples, "proxy.compare-skip-recent-samples", 60*time.Second, "The window from now to skip comparing samples. 0 to disable.") + f.Var(&cfg.SkipSamplesBefore, "proxy.compare-skip-samples-before", "Skip the samples before the given time for comparison. The time can be in RFC3339 format (or) RFC3339 without the timezone and seconds (or) date only.") f.BoolVar(&cfg.PassThroughNonRegisteredRoutes, "proxy.passthrough-non-registered-routes", false, "Passthrough requests for non-registered routes to preferred backend.") - f.Func("backend.filter", "A request filter as a regular expression. Only matches are proxied to non-preferred backends.", func(raw string) error{ + f.Func("backend.filter", "A request filter as a regular expression. Only matches are proxied to non-preferred backends.", func(raw string) error { var err error - cfg.RequestURLFilter, err = regexp.Compile(raw) + cfg.RequestURLFilter, err = regexp.Compile(raw) return err }) f.BoolVar(&cfg.InstrumentCompares, "proxy.compare-instrument", false, "Reports metrics on comparisons of responses between preferred and non-preferred endpoints for supported routes.") diff --git a/tools/querytee/proxy_endpoint.go b/tools/querytee/proxy_endpoint.go index 7a6779b6ad880..e2fd53e52f4f4 100644 --- a/tools/querytee/proxy_endpoint.go +++ b/tools/querytee/proxy_endpoint.go @@ -15,10 +15,11 @@ import ( ) type ResponsesComparator interface { - Compare(expected, actual []byte) (*ComparisonSummary, error) + Compare(expected, actual []byte, queryEvaluationTime time.Time) (*ComparisonSummary, error) } type ComparisonSummary struct { + skipped bool missingMetrics int } @@ -175,13 +176,15 @@ func (p *ProxyEndpoint) executeBackendRequests(r *http.Request, resCh chan *back actualResponse := responses[i] result := comparisonSuccess - summary, err := p.compareResponses(expectedResponse, actualResponse) + summary, err := p.compareResponses(expectedResponse, actualResponse, time.Now().UTC()) if err != nil { level.Error(p.logger).Log("msg", "response comparison failed", "backend-name", p.backends[i].name, "route-name", p.routeName, "query", r.URL.RawQuery, "err", err) result = comparisonFailed + } else if summary != nil && summary.skipped { + result = comparisonSkipped } if p.instrumentCompares && summary != nil { @@ -227,10 +230,18 @@ func (p *ProxyEndpoint) waitBackendResponseForDownstream(resCh chan *backendResp return responses[0] } -func (p *ProxyEndpoint) compareResponses(expectedResponse, actualResponse *backendResponse) (*ComparisonSummary, error) { +func (p *ProxyEndpoint) compareResponses(expectedResponse, actualResponse *backendResponse, queryEvalTime time.Time) (*ComparisonSummary, error) { + if expectedResponse.err != nil { + return &ComparisonSummary{skipped: true}, nil + } + + if actualResponse.err != nil { + return nil, fmt.Errorf("skipped comparison of response because the request to the secondary backend failed: %w", actualResponse.err) + } + // compare response body only if we get a 200 if expectedResponse.status != 200 { - return nil, fmt.Errorf("skipped comparison of response because we got status code %d from preferred backend's response", expectedResponse.status) + return &ComparisonSummary{skipped: true}, nil } if actualResponse.status != 200 { @@ -241,7 +252,7 @@ func (p *ProxyEndpoint) compareResponses(expectedResponse, actualResponse *backe return nil, fmt.Errorf("expected status code %d but got %d", expectedResponse.status, actualResponse.status) } - return p.comparator.Compare(expectedResponse.body, actualResponse.body) + return p.comparator.Compare(expectedResponse.body, actualResponse.body, queryEvalTime) } type backendResponse struct { diff --git a/tools/querytee/proxy_endpoint_test.go b/tools/querytee/proxy_endpoint_test.go index 5cfee42b504dd..3c2553856fe33 100644 --- a/tools/querytee/proxy_endpoint_test.go +++ b/tools/querytee/proxy_endpoint_test.go @@ -421,6 +421,6 @@ func Test_backendResponse_statusCode(t *testing.T) { type mockComparator struct{} -func (c *mockComparator) Compare(_, _ []byte) (*ComparisonSummary, error) { +func (c *mockComparator) Compare(_, _ []byte, _ time.Time) (*ComparisonSummary, error) { return &ComparisonSummary{missingMetrics: 12}, nil } diff --git a/tools/querytee/proxy_metrics.go b/tools/querytee/proxy_metrics.go index eb2284517d47c..9637438be486d 100644 --- a/tools/querytee/proxy_metrics.go +++ b/tools/querytee/proxy_metrics.go @@ -8,6 +8,7 @@ import ( const ( comparisonSuccess = "success" comparisonFailed = "fail" + comparisonSkipped = "skipped" unknownIssuer = "unknown" canaryIssuer = "loki-canary" diff --git a/tools/querytee/proxy_test.go b/tools/querytee/proxy_test.go index 22b122bcc2aa7..6db5caeadb762 100644 --- a/tools/querytee/proxy_test.go +++ b/tools/querytee/proxy_test.go @@ -25,7 +25,7 @@ var testWriteRoutes = []Route{} type testComparator struct{} -func (testComparator) Compare(_, _ []byte) (*ComparisonSummary, error) { return nil, nil } +func (testComparator) Compare(_, _ []byte, _ time.Time) (*ComparisonSummary, error) { return nil, nil } func Test_NewProxy(t *testing.T) { cfg := ProxyConfig{} diff --git a/tools/querytee/response_comparator.go b/tools/querytee/response_comparator.go index 04a28fff85c1d..15b4bedf121aa 100644 --- a/tools/querytee/response_comparator.go +++ b/tools/querytee/response_comparator.go @@ -17,7 +17,7 @@ import ( ) // SamplesComparatorFunc helps with comparing different types of samples coming from /api/v1/query and /api/v1/query_range routes. -type SamplesComparatorFunc func(expected, actual json.RawMessage, opts SampleComparisonOptions) (*ComparisonSummary, error) +type SamplesComparatorFunc func(expected, actual json.RawMessage, evaluationTime time.Time, opts SampleComparisonOptions) (*ComparisonSummary, error) type SamplesResponse struct { Status string @@ -31,6 +31,20 @@ type SampleComparisonOptions struct { Tolerance float64 UseRelativeError bool SkipRecentSamples time.Duration + SkipSamplesBefore time.Time +} + +func (opts *SampleComparisonOptions) SkipSample(sampleTime, evaluationTime time.Time) bool { + // Skip if sample is too old + if !opts.SkipSamplesBefore.IsZero() && sampleTime.Before(opts.SkipSamplesBefore) { + return true + } + + // Skip if sample is too recent + if opts.SkipRecentSamples > 0 && sampleTime.After(evaluationTime.Add(-opts.SkipRecentSamples)) { + return true + } + return false } func NewSamplesComparator(opts SampleComparisonOptions) *SamplesComparator { @@ -55,7 +69,7 @@ func (s *SamplesComparator) RegisterSamplesType(samplesType string, comparator S s.sampleTypesComparator[samplesType] = comparator } -func (s *SamplesComparator) Compare(expectedResponse, actualResponse []byte) (*ComparisonSummary, error) { +func (s *SamplesComparator) Compare(expectedResponse, actualResponse []byte, evaluationTime time.Time) (*ComparisonSummary, error) { var expected, actual SamplesResponse err := json.Unmarshal(expectedResponse, &expected) @@ -81,10 +95,10 @@ func (s *SamplesComparator) Compare(expectedResponse, actualResponse []byte) (*C return nil, fmt.Errorf("resultType %s not registered for comparison", expected.Data.ResultType) } - return comparator(expected.Data.Result, actual.Data.Result, s.opts) + return comparator(expected.Data.Result, actual.Data.Result, evaluationTime, s.opts) } -func compareMatrix(expectedRaw, actualRaw json.RawMessage, opts SampleComparisonOptions) (*ComparisonSummary, error) { +func compareMatrix(expectedRaw, actualRaw json.RawMessage, evaluationTime time.Time, opts SampleComparisonOptions) (*ComparisonSummary, error) { var expected, actual model.Matrix err := json.Unmarshal(expectedRaw, &expected) @@ -96,7 +110,23 @@ func compareMatrix(expectedRaw, actualRaw json.RawMessage, opts SampleComparison return nil, errors.Wrap(err, "unable to unmarshal actual matrix") } + // Filter out samples outside the comparable window + if !opts.SkipSamplesBefore.IsZero() || opts.SkipRecentSamples > 0 { + expected = filterSamplesOutsideWindow(expected, func(sampleTime time.Time) bool { + return opts.SkipSample(sampleTime, evaluationTime) + }) + actual = filterSamplesOutsideWindow(actual, func(sampleTime time.Time) bool { + return opts.SkipSample(sampleTime, evaluationTime) + }) + } + + // If both matrices are empty after filtering, we can skip comparison + if len(expected) == 0 && len(actual) == 0 { + return &ComparisonSummary{skipped: true}, nil + } + if len(expected) != len(actual) { + // TODO: log the missing metrics return nil, fmt.Errorf("expected %d metrics but got %d", len(expected), len(actual)) } @@ -113,33 +143,64 @@ func compareMatrix(expectedRaw, actualRaw json.RawMessage, opts SampleComparison } actualMetric := actual[actualMetricIndex] - expectedMetricLen := len(expectedMetric.Values) - actualMetricLen := len(actualMetric.Values) - - if expectedMetricLen != actualMetricLen { - err := fmt.Errorf("expected %d samples for metric %s but got %d", expectedMetricLen, - expectedMetric.Metric, actualMetricLen) - if expectedMetricLen > 0 && actualMetricLen > 0 { - level.Error(util_log.Logger).Log("msg", err.Error(), "oldest-expected-ts", expectedMetric.Values[0].Timestamp, - "newest-expected-ts", expectedMetric.Values[expectedMetricLen-1].Timestamp, - "oldest-actual-ts", actualMetric.Values[0].Timestamp, "newest-actual-ts", actualMetric.Values[actualMetricLen-1].Timestamp) - } - return nil, err + + err := compareMatrixSamples(expectedMetric, actualMetric, opts) + if err != nil { + return nil, fmt.Errorf("%w\nExpected result for series:\n%v\n\nActual result for series:\n%v", err, expectedMetric, actualMetric) } + } + + return nil, nil +} + +func compareMatrixSamples(expected, actual *model.SampleStream, opts SampleComparisonOptions) error { + expectedEntriesCount := len(expected.Values) + actualEntriesCount := len(actual.Values) + + if expectedEntriesCount != actualEntriesCount { + err := fmt.Errorf("expected %d samples for metric %s but got %d", expectedEntriesCount, expected.Metric, actualEntriesCount) + if actualEntriesCount > 0 && expectedEntriesCount > 0 { + level.Error(util_log.Logger).Log("msg", err.Error(), + "oldest-expected-ts", expected.Values[0].Timestamp, + "newest-expected-ts", expected.Values[expectedEntriesCount-1].Timestamp, + "oldest-actual-ts", actual.Values[0].Timestamp, + "newest-actual-ts", actual.Values[actualEntriesCount-1].Timestamp) + } + return err + } + + for i := range expected.Values { + err := compareSamplePair(expected.Values[i], actual.Values[i], opts) + if err != nil { + return fmt.Errorf("float sample pair does not match for metric %s: %w", expected.Metric, err) + } + } + + return nil +} - for i, expectedSamplePair := range expectedMetric.Values { - actualSamplePair := actualMetric.Values[i] - err := compareSamplePair(expectedSamplePair, actualSamplePair, opts) - if err != nil { - return nil, errors.Wrapf(err, "sample pair not matching for metric %s", expectedMetric.Metric) +func filterSamplesOutsideWindow(matrix model.Matrix, skipSample func(time.Time) bool) model.Matrix { + result := matrix[:0] // Reuse the original slice capacity while starting with length 0 + + for _, series := range matrix { + // Reuse the original Values slice + filteredValues := series.Values[:0] + for _, sample := range series.Values { + if !skipSample(sample.Timestamp.Time()) { + filteredValues = append(filteredValues, sample) } } + + if len(filteredValues) > 0 { + series.Values = filteredValues + result = append(result, series) + } } - return nil, nil + return result } -func compareVector(expectedRaw, actualRaw json.RawMessage, opts SampleComparisonOptions) (*ComparisonSummary, error) { +func compareVector(expectedRaw, actualRaw json.RawMessage, evaluationTime time.Time, opts SampleComparisonOptions) (*ComparisonSummary, error) { var expected, actual model.Vector err := json.Unmarshal(expectedRaw, &expected) @@ -152,6 +213,29 @@ func compareVector(expectedRaw, actualRaw json.RawMessage, opts SampleComparison return nil, errors.Wrap(err, "unable to unmarshal actual vector") } + // Filter out samples outside the comparable windows + if !opts.SkipSamplesBefore.IsZero() || opts.SkipRecentSamples > 0 { + filtered := expected[:0] + for i := range expected { + if !opts.SkipSample(expected[i].Timestamp.Time(), evaluationTime) { + filtered = append(filtered, expected[i]) + } + } + expected = filtered + + filtered = actual[:0] + for i := range actual { + if !opts.SkipSample(actual[i].Timestamp.Time(), evaluationTime) { + filtered = append(filtered, actual[i]) + } + } + actual = filtered + } + + if len(expected) == 0 && len(actual) == 0 { + return &ComparisonSummary{skipped: true}, nil + } + if len(expected) != len(actual) { return nil, fmt.Errorf("expected %d metrics but got %d", len(expected), len(actual)) @@ -198,7 +282,7 @@ func compareVector(expectedRaw, actualRaw json.RawMessage, opts SampleComparison return &ComparisonSummary{missingMetrics: len(missingMetrics)}, err } -func compareScalar(expectedRaw, actualRaw json.RawMessage, opts SampleComparisonOptions) (*ComparisonSummary, error) { +func compareScalar(expectedRaw, actualRaw json.RawMessage, evaluationTime time.Time, opts SampleComparisonOptions) (*ComparisonSummary, error) { var expected, actual model.Scalar err := json.Unmarshal(expectedRaw, &expected) if err != nil { @@ -210,6 +294,10 @@ func compareScalar(expectedRaw, actualRaw json.RawMessage, opts SampleComparison return nil, errors.Wrap(err, "unable to actual expected scalar") } + if opts.SkipSample(expected.Timestamp.Time(), evaluationTime) && opts.SkipSample(actual.Timestamp.Time(), evaluationTime) { + return &ComparisonSummary{skipped: true}, nil + } + return nil, compareSamplePair(model.SamplePair{ Timestamp: expected.Timestamp, Value: expected.Value, @@ -223,9 +311,6 @@ func compareSamplePair(expected, actual model.SamplePair, opts SampleComparisonO if expected.Timestamp != actual.Timestamp { return fmt.Errorf("expected timestamp %v but got %v", expected.Timestamp, actual.Timestamp) } - if opts.SkipRecentSamples > 0 && time.Since(expected.Timestamp.Time()) < opts.SkipRecentSamples { - return nil - } if !compareSampleValue(expected.Value, actual.Value, opts) { return fmt.Errorf("expected value %s for timestamp %v but got %s", expected.Value, expected.Timestamp, actual.Value) } @@ -250,7 +335,7 @@ func compareSampleValue(first, second model.SampleValue, opts SampleComparisonOp return math.Abs(f-s) <= opts.Tolerance } -func compareStreams(expectedRaw, actualRaw json.RawMessage, _ SampleComparisonOptions) (*ComparisonSummary, error) { +func compareStreams(expectedRaw, actualRaw json.RawMessage, evaluationTime time.Time, opts SampleComparisonOptions) (*ComparisonSummary, error) { var expected, actual loghttp.Streams err := jsoniter.Unmarshal(expectedRaw, &expected) @@ -262,7 +347,23 @@ func compareStreams(expectedRaw, actualRaw json.RawMessage, _ SampleComparisonOp return nil, errors.Wrap(err, "unable to unmarshal actual streams") } + // Filter out entries outside the comparable window + if !opts.SkipSamplesBefore.IsZero() || opts.SkipRecentSamples > 0 { + expected = filterStreamsOutsideWindow(expected, func(entryTime time.Time) bool { + return opts.SkipSample(entryTime, evaluationTime) + }) + actual = filterStreamsOutsideWindow(actual, func(entryTime time.Time) bool { + return opts.SkipSample(entryTime, evaluationTime) + }) + } + + // If both streams are empty after filtering, we can skip comparison + if len(expected) == 0 && len(actual) == 0 { + return &ComparisonSummary{skipped: true}, nil + } + if len(expected) != len(actual) { + // TODO: log the missing stream return nil, fmt.Errorf("expected %d streams but got %d", len(expected), len(actual)) } @@ -285,9 +386,10 @@ func compareStreams(expectedRaw, actualRaw json.RawMessage, _ SampleComparisonOp err := fmt.Errorf("expected %d values for stream %s but got %d", expectedValuesLen, expectedStream.Labels, actualValuesLen) if expectedValuesLen > 0 && actualValuesLen > 0 { - level.Error(util_log.Logger).Log("msg", err.Error(), "oldest-expected-ts", expectedStream.Entries[0].Timestamp.UnixNano(), - "newest-expected-ts", expectedStream.Entries[expectedValuesLen-1].Timestamp.UnixNano(), - "oldest-actual-ts", actualStream.Entries[0].Timestamp.UnixNano(), "newest-actual-ts", actualStream.Entries[actualValuesLen-1].Timestamp.UnixNano()) + // assuming BACKWARD search since that is the default ordering + level.Error(util_log.Logger).Log("msg", err.Error(), "newest-expected-ts", expectedStream.Entries[0].Timestamp.UnixNano(), + "oldest-expected-ts", expectedStream.Entries[expectedValuesLen-1].Timestamp.UnixNano(), + "newest-actual-ts", actualStream.Entries[0].Timestamp.UnixNano(), "oldest-actual-ts", actualStream.Entries[actualValuesLen-1].Timestamp.UnixNano()) } return nil, err } @@ -307,3 +409,25 @@ func compareStreams(expectedRaw, actualRaw json.RawMessage, _ SampleComparisonOp return nil, nil } + +// filterStreamsOutsideWindow filters out entries that are outside the comparable window +func filterStreamsOutsideWindow(streams loghttp.Streams, skipEntry func(time.Time) bool) loghttp.Streams { + result := streams[:0] // Reuse the original slice capacity while starting with length 0 + + for _, stream := range streams { + // Reuse the original Entries slice + filteredEntries := stream.Entries[:0] + for _, entry := range stream.Entries { + if !skipEntry(entry.Timestamp) { + filteredEntries = append(filteredEntries, entry) + } + } + + if len(filteredEntries) > 0 { + stream.Entries = filteredEntries + result = append(result, stream) + } + } + + return result +} diff --git a/tools/querytee/response_comparator_test.go b/tools/querytee/response_comparator_test.go index ce53fd15ce7ce..183a7f51e83be 100644 --- a/tools/querytee/response_comparator_test.go +++ b/tools/querytee/response_comparator_test.go @@ -70,7 +70,7 @@ func TestCompareMatrix(t *testing.T) { {"metric":{"foo":"bar"},"values":[[1,"1"],[3,"2"]]} ]`), // timestamps are parsed from seconds to ms which are then added to errors as is so adding 3 0s to expected error. - err: errors.New("sample pair not matching for metric {foo=\"bar\"}: expected timestamp 2 but got 3"), + err: errors.New("float sample pair does not match for metric {foo=\"bar\"}: expected timestamp 2 but got 3"), }, { name: "difference in sample value", @@ -80,7 +80,7 @@ func TestCompareMatrix(t *testing.T) { actual: json.RawMessage(`[ {"metric":{"foo":"bar"},"values":[[1,"1"],[2,"3"]]} ]`), - err: errors.New("sample pair not matching for metric {foo=\"bar\"}: expected value 2 for timestamp 2 but got 3"), + err: errors.New("float sample pair does not match for metric {foo=\"bar\"}: expected value 2 for timestamp 2 but got 3"), }, { name: "correct samples", @@ -93,13 +93,113 @@ func TestCompareMatrix(t *testing.T) { }, } { t.Run(tc.name, func(t *testing.T) { - _, err := compareMatrix(tc.expected, tc.actual, SampleComparisonOptions{}) + _, err := compareMatrix(tc.expected, tc.actual, time.Now(), SampleComparisonOptions{}) if tc.err == nil { require.NoError(t, err) return } require.Error(t, err) - require.Equal(t, tc.err.Error(), err.Error()) + require.ErrorContains(t, err, tc.err.Error()) + }) + } +} + +func TestCompareMatrix_SamplesOutsideComparableWindow(t *testing.T) { + for _, tc := range []struct { + name string + expected json.RawMessage + actual json.RawMessage + skipSamplesBefore time.Time + skipRecentSamples time.Duration + evaluationTime time.Time + err error + }{ + { + name: "skip samples before window", + expected: json.RawMessage(`[ + {"metric":{"foo":"bar"},"values":[[0,"1"],[5,"2"],[10,"3"],[20,"4"]]} + ]`), + actual: json.RawMessage(`[ + {"metric":{"foo":"bar"},"values":[[5,"1"],[10,"3"],[20,"4"]]} + ]`), + skipSamplesBefore: time.Unix(10, 0), + evaluationTime: time.Unix(100, 0), + }, + { + name: "skip recent samples", + expected: json.RawMessage(`[ + {"metric":{"foo":"bar"},"values":[[5,"1"],[25,"2"],[90,"3"],[94,"4"],[96,"5"]]} + ]`), + actual: json.RawMessage(`[ + {"metric":{"foo":"bar"},"values":[[5,"1"],[25,"2"],[90,"3"],[95, "4"]]} + ]`), + skipRecentSamples: 10 * time.Second, + evaluationTime: time.Unix(100, 0), + }, + { + name: "skip both recent and old samples", + expected: json.RawMessage(`[ + {"metric":{"foo":"bar"},"values":[[5,"1"],[25,"2"],[80,"3"],[94,"4"],[96,"5"]]} + ]`), + actual: json.RawMessage(`[ + {"metric":{"foo":"bar"},"values":[[5,"0"],[25,"2"],[80,"3"],[95, "4"]]} + ]`), + skipSamplesBefore: time.Unix(10, 0), + skipRecentSamples: 10 * time.Second, + evaluationTime: time.Unix(100, 0), + }, + { + name: "mismatch in sample value on the right boundary", + expected: json.RawMessage(`[ + {"metric":{"foo":"bar"},"values":[[5,"1"],[25,"2"],[90,"3"],[94,"4"],[96,"5"]]} + ]`), + actual: json.RawMessage(`[ + {"metric":{"foo":"bar"},"values":[[5,"1"],[25,"2"],[90,"4"],[95, "4"]]} + ]`), + skipSamplesBefore: time.Unix(10, 0), + skipRecentSamples: 10 * time.Second, + evaluationTime: time.Unix(100, 0), + err: errors.New("float sample pair does not match for metric {foo=\"bar\"}: expected value 3 for timestamp 90 but got 4"), + }, + { + name: "mismatch in sample value on the left boundary", + expected: json.RawMessage(`[ + {"metric":{"foo":"bar"},"values":[[10,"1"],[25,"2"],[90,"3"],[94,"4"],[96,"5"]]} + ]`), + actual: json.RawMessage(`[ + {"metric":{"foo":"bar"},"values":[[10,"0"],[25,"2"],[90,"3"],[95, "4"]]} + ]`), + skipSamplesBefore: time.Unix(10, 0), + skipRecentSamples: 10 * time.Second, + evaluationTime: time.Unix(100, 0), + err: errors.New("float sample pair does not match for metric {foo=\"bar\"}: expected value 1 for timestamp 10 but got 0"), + }, + { + name: "skip entire series", + expected: json.RawMessage(`[ + {"metric":{"foo":"bar"},"values":[[50,"1"],[75,"2"]]}, + {"metric":{"foo":"buzz"},"values":[[5,"1"],[9,"4"],[96,"5"]]} + ]`), // skip comparing {"foo":"buzz"} + actual: json.RawMessage(`[ + {"metric":{"foo":"bar"},"values":[[50,"1"],[75,"2"],[95,"3"]]} + ]`), + skipSamplesBefore: time.Unix(10, 0), + skipRecentSamples: 10 * time.Second, + evaluationTime: time.Unix(100, 0), + }, + } { + t.Run(tc.name, func(t *testing.T) { + _, err := compareMatrix(tc.expected, tc.actual, tc.evaluationTime, SampleComparisonOptions{ + SkipSamplesBefore: tc.skipSamplesBefore, + SkipRecentSamples: tc.skipRecentSamples, + }) + + if tc.err == nil { + require.NoError(t, err) + return + } + require.Error(t, err) + require.ErrorContains(t, err, tc.err.Error()) }) } } @@ -176,7 +276,7 @@ func TestCompareVector(t *testing.T) { }, } { t.Run(tc.name, func(t *testing.T) { - _, err := compareVector(tc.expected, tc.actual, SampleComparisonOptions{}) + _, err := compareVector(tc.expected, tc.actual, time.Now(), SampleComparisonOptions{}) if tc.err == nil { require.NoError(t, err) return @@ -213,7 +313,7 @@ func TestCompareScalar(t *testing.T) { }, } { t.Run(tc.name, func(t *testing.T) { - _, err := compareScalar(tc.expected, tc.actual, SampleComparisonOptions{}) + _, err := compareScalar(tc.expected, tc.actual, time.Now(), SampleComparisonOptions{}) if tc.err == nil { require.NoError(t, err) return @@ -408,7 +508,7 @@ func TestCompareSamplesResponse(t *testing.T) { UseRelativeError: tc.useRelativeError, SkipRecentSamples: tc.skipRecentSamples, }) - _, err := samplesComparator.Compare(tc.expected, tc.actual) + _, err := samplesComparator.Compare(tc.expected, tc.actual, time.Now()) if tc.err == nil { require.NoError(t, err) return @@ -501,9 +601,100 @@ func TestCompareStreams(t *testing.T) { }, } { t.Run(tc.name, func(t *testing.T) { - _, err := compareStreams(tc.expected, tc.actual, SampleComparisonOptions{Tolerance: 0}) + _, err := compareStreams(tc.expected, tc.actual, time.Now(), SampleComparisonOptions{Tolerance: 0}) + if tc.err == nil { + require.NoError(t, err) + return + } + require.Error(t, err) + require.Equal(t, tc.err.Error(), err.Error()) + }) + } +} + +func TestCompareStreams_SamplesOutsideComparableWindow(t *testing.T) { + for _, tc := range []struct { + name string + expected json.RawMessage + actual json.RawMessage + skipSamplesBefore time.Time + skipRecentSamples time.Duration + evaluationTime time.Time + err error + }{ + // stream entry timestamp is in ns + { + name: "skip samples before window", + expected: json.RawMessage(`[ + {"stream":{"foo":"bar"},"values":[["5","1"],["10","2"],["50","3"],["95","4"]]} + ]`), + actual: json.RawMessage(`[ + {"stream":{"foo":"bar"},"values":[["2","0"],["10","2"],["50","3"],["95","4"]]} + ]`), + skipSamplesBefore: time.Unix(0, 10), + evaluationTime: time.Unix(0, 100), + }, + { + name: "skip recent samples", + expected: json.RawMessage(`[ + {"stream":{"foo":"bar"},"values":[["5","1"],["15","2"],["90","3"],["95","4"]]} + ]`), + actual: json.RawMessage(`[ + {"stream":{"foo":"bar"},"values":[["5","1"],["15","2"],["90","3"]]} + ]`), + skipRecentSamples: 10 * time.Nanosecond, + evaluationTime: time.Unix(0, 100), + }, + { + name: "skip both recent and old samples", + expected: json.RawMessage(`[ + {"stream":{"foo":"bar"},"values":[["5","1"],["15","2"],["50","3"],["95","4"]]} + ]`), + actual: json.RawMessage(`[ + {"stream":{"foo":"bar"},"values":[["15","2"],["50","3"]]} + ]`), + skipRecentSamples: 10 * time.Nanosecond, + skipSamplesBefore: time.Unix(0, 10), + evaluationTime: time.Unix(0, 100), + }, + { + name: "mismatch in sample value on the right boundary", + expected: json.RawMessage(`[ + {"stream":{"foo":"bar"},"values":[["5","1"],["15","2"],["50","3"],["90","4"]]} + ]`), + actual: json.RawMessage(`[ + {"stream":{"foo":"bar"},"values":[["15","2"],["50","3"],["90","5"]]} + ]`), + skipRecentSamples: 10 * time.Nanosecond, + skipSamplesBefore: time.Unix(0, 10), + evaluationTime: time.Unix(0, 100), + err: errors.New("expected line 4 for timestamp 90 but got 5 for stream {foo=\"bar\"}"), + }, + { + name: "mismatch in sample value on the left boundary", + expected: json.RawMessage(`[ + {"stream":{"foo":"bar"},"values":[["5","1"],["10","2"],["50","3"],["90","4"]]} + ]`), + actual: json.RawMessage(`[ + {"stream":{"foo":"bar"},"values":[["10","22"],["50","3"],["90","5"]]} + ]`), + skipRecentSamples: 10 * time.Nanosecond, + skipSamplesBefore: time.Unix(0, 10), + evaluationTime: time.Unix(0, 100), + err: errors.New("expected line 2 for timestamp 10 but got 22 for stream {foo=\"bar\"}"), + }, + } { + t.Run(tc.name, func(t *testing.T) { + summary, err := compareStreams(tc.expected, tc.actual, tc.evaluationTime, SampleComparisonOptions{ + SkipSamplesBefore: tc.skipSamplesBefore, + SkipRecentSamples: tc.skipRecentSamples, + }) + if tc.err == nil { require.NoError(t, err) + if summary != nil { + require.True(t, summary.skipped) + } return } require.Error(t, err) diff --git a/vendor/github.com/minio/minio-go/v7/api-copy-object.go b/vendor/github.com/minio/minio-go/v7/api-copy-object.go index 0c95d91ec7619..b6cadc86a929a 100644 --- a/vendor/github.com/minio/minio-go/v7/api-copy-object.go +++ b/vendor/github.com/minio/minio-go/v7/api-copy-object.go @@ -68,7 +68,7 @@ func (c *Client) CopyObject(ctx context.Context, dst CopyDestOptions, src CopySr Bucket: dst.Bucket, Key: dst.Object, LastModified: cpObjRes.LastModified, - ETag: trimEtag(resp.Header.Get("ETag")), + ETag: trimEtag(cpObjRes.ETag), VersionID: resp.Header.Get(amzVersionID), Expiration: expTime, ExpirationRuleID: ruleID, diff --git a/vendor/github.com/minio/minio-go/v7/api.go b/vendor/github.com/minio/minio-go/v7/api.go index cb46816d0db32..cc0ded2c7f2d5 100644 --- a/vendor/github.com/minio/minio-go/v7/api.go +++ b/vendor/github.com/minio/minio-go/v7/api.go @@ -133,7 +133,7 @@ type Options struct { // Global constants. const ( libraryName = "minio-go" - libraryVersion = "v7.0.83" + libraryVersion = "v7.0.84" ) // User Agent should always following the below style. diff --git a/vendor/github.com/minio/minio-go/v7/pkg/credentials/iam_aws.go b/vendor/github.com/minio/minio-go/v7/pkg/credentials/iam_aws.go index 0ba06e710662c..e3230bb186dab 100644 --- a/vendor/github.com/minio/minio-go/v7/pkg/credentials/iam_aws.go +++ b/vendor/github.com/minio/minio-go/v7/pkg/credentials/iam_aws.go @@ -153,9 +153,6 @@ func (m *IAM) RetrieveWithCredContext(cc *CredContext) (Value, error) { } endpoint := m.Endpoint - if endpoint == "" { - endpoint = cc.Endpoint - } switch { case identityFile != "": diff --git a/vendor/github.com/minio/minio-go/v7/pkg/s3utils/utils.go b/vendor/github.com/minio/minio-go/v7/pkg/s3utils/utils.go index 0e63ce2f7dc57..80fd029d83434 100644 --- a/vendor/github.com/minio/minio-go/v7/pkg/s3utils/utils.go +++ b/vendor/github.com/minio/minio-go/v7/pkg/s3utils/utils.go @@ -118,53 +118,53 @@ func GetRegionFromURL(endpointURL url.URL) string { if endpointURL == sentinelURL { return "" } - if endpointURL.Host == "s3-external-1.amazonaws.com" { + if endpointURL.Hostname() == "s3-external-1.amazonaws.com" { return "" } // if elb's are used we cannot calculate which region it may be, just return empty. - if elbAmazonRegex.MatchString(endpointURL.Host) || elbAmazonCnRegex.MatchString(endpointURL.Host) { + if elbAmazonRegex.MatchString(endpointURL.Hostname()) || elbAmazonCnRegex.MatchString(endpointURL.Hostname()) { return "" } // We check for FIPS dualstack matching first to avoid the non-greedy // regex for FIPS non-dualstack matching a dualstack URL - parts := amazonS3HostFIPSDualStack.FindStringSubmatch(endpointURL.Host) + parts := amazonS3HostFIPSDualStack.FindStringSubmatch(endpointURL.Hostname()) if len(parts) > 1 { return parts[1] } - parts = amazonS3HostFIPS.FindStringSubmatch(endpointURL.Host) + parts = amazonS3HostFIPS.FindStringSubmatch(endpointURL.Hostname()) if len(parts) > 1 { return parts[1] } - parts = amazonS3HostDualStack.FindStringSubmatch(endpointURL.Host) + parts = amazonS3HostDualStack.FindStringSubmatch(endpointURL.Hostname()) if len(parts) > 1 { return parts[1] } - parts = amazonS3HostHyphen.FindStringSubmatch(endpointURL.Host) + parts = amazonS3HostHyphen.FindStringSubmatch(endpointURL.Hostname()) if len(parts) > 1 { return parts[1] } - parts = amazonS3ChinaHost.FindStringSubmatch(endpointURL.Host) + parts = amazonS3ChinaHost.FindStringSubmatch(endpointURL.Hostname()) if len(parts) > 1 { return parts[1] } - parts = amazonS3ChinaHostDualStack.FindStringSubmatch(endpointURL.Host) + parts = amazonS3ChinaHostDualStack.FindStringSubmatch(endpointURL.Hostname()) if len(parts) > 1 { return parts[1] } - parts = amazonS3HostDot.FindStringSubmatch(endpointURL.Host) + parts = amazonS3HostDot.FindStringSubmatch(endpointURL.Hostname()) if len(parts) > 1 { return parts[1] } - parts = amazonS3HostPrivateLink.FindStringSubmatch(endpointURL.Host) + parts = amazonS3HostPrivateLink.FindStringSubmatch(endpointURL.Hostname()) if len(parts) > 1 { return parts[1] } diff --git a/vendor/github.com/twmb/franz-go/pkg/kadm/acls.go b/vendor/github.com/twmb/franz-go/pkg/kadm/acls.go index 62676b5b8c074..9d4fdbf0d3307 100644 --- a/vendor/github.com/twmb/franz-go/pkg/kadm/acls.go +++ b/vendor/github.com/twmb/franz-go/pkg/kadm/acls.go @@ -3,6 +3,7 @@ package kadm import ( "context" "fmt" + "math" "strings" "sync" @@ -1115,3 +1116,55 @@ func createDelDescACL(b *ACLBuilder) ([]kmsg.DeleteACLsRequestFilter, []*kmsg.De } return deletions, describes, nil } + +// DecodeACLOperations decodes an int32 bitfield into a slice of +// kmsg.ACLOperation values. +// +// This function is used to interpret the `AuthorizedOperations` field returned +// by the Kafka APIs, which specifies the operations a client is allowed to +// perform on a cluster, topic, or consumer group. It is utilized in multiple +// Kafka API responses, including Metadata, DescribeCluster, and +// DescribeGroupsResponseGroup. +// +// Caveats with Metadata API +// 1. To include authorized operations in the Metadata response, the client must explicitly +// opt in by setting `IncludeClusterAuthorizedOperations` and/or `IncludeTopicAuthorizedOperations`. +// These options were introduced in Kafka 2.3.0 as part of KIP-430. +// 2. In Kafka 2.8.0 (Metadata v11), the `AuthorizedOperations` for the cluster was removed from the +// Metadata response. Instead, clients should use the DescribeCluster API to retrieve cluster-level +// permissions. +// +// Function Behavior +// - If the bitfield equals `math.MinInt32` (-2147483648), it indicates that "AUTHORIZED_OPERATIONS_OMITTED" +// is set, and the function returns an empty slice. +// - For non-omitted values, the function iterates through all 32 bits of the bitfield. Each bit that +// is set (`1`) corresponds to an ACL operation, which is then mapped to its respective `kmsg.ACLOperation` value. +// - Undefined or unknown bits (e.g., bit 0 for `kmsg.ACLOperationUnknown`) are ignored. +// +// Supported Use Cases +// - Cluster Operations: Retrieved via the DescribeCluster API or older Metadata API versions (v8–v10). +// - Topic Operations: Retrieved via the Metadata API when `IncludeTopicAuthorizedOperations` is set. +// - Group Operations: Retrieved in the DescribeGroups API response. +func DecodeACLOperations(bitfield int32) []ACLOperation { + var operations []ACLOperation + + // MinInt32 represents "AUTHORIZED_OPERATIONS_OMITTED" + if bitfield == math.MinInt32 { + return operations + } + + // Helper function to determine if an operation is valid. + isValidOperation := func(op kmsg.ACLOperation) bool { + return op >= kmsg.ACLOperationRead && op <= kmsg.ACLOperationDescribeTokens + } + + for i := 0; i < 32; i++ { + if bitfield&(1< 0 { @@ -599,12 +612,10 @@ func (cl *Client) finishRecordPromise(pr promisedRec, err error, beforeBuffering p.mu.Lock() p.bufferedBytes -= userSize p.bufferedRecords-- - broadcast := p.blocked.Load() > 0 || p.bufferedRecords == 0 && p.flushing.Load() > 0 + broadcast = p.blocked.Load() > 0 || p.bufferedRecords == 0 && p.flushing.Load() > 0 p.mu.Unlock() - if broadcast { - p.c.Broadcast() - } + return broadcast } // partitionRecord loads the partitions for a topic and produce to them. If diff --git a/vendor/github.com/twmb/franz-go/pkg/kgo/source.go b/vendor/github.com/twmb/franz-go/pkg/kgo/source.go index 12732e90f3e87..a5aa849cfecc3 100644 --- a/vendor/github.com/twmb/franz-go/pkg/kgo/source.go +++ b/vendor/github.com/twmb/franz-go/pkg/kgo/source.go @@ -234,6 +234,7 @@ type cursorOffsetNext struct { type cursorOffsetPreferred struct { cursorOffsetNext preferredReplica int32 + ooor bool } // Moves a cursor from one source to another. This is done while handling @@ -268,12 +269,13 @@ func (cs cursorPreferreds) String() string { type pnext struct { p int32 next int32 + ooor bool } ts := make(map[string][]pnext) for _, c := range cs { t := c.from.topic p := c.from.partition - ts[t] = append(ts[t], pnext{p, c.preferredReplica}) + ts[t] = append(ts[t], pnext{p, c.preferredReplica, c.ooor}) } tsorted := make([]string, 0, len(ts)) for t, ps := range ts { @@ -303,9 +305,17 @@ func (cs cursorPreferreds) String() string { for j, p := range ps { if j < len(ps)-1 { - fmt.Fprintf(sb, "%d=>%d, ", p.p, p.next) + if p.ooor { + fmt.Fprintf(sb, "%d=>%d[ooor], ", p.p, p.next) + } else { + fmt.Fprintf(sb, "%d=>%d, ", p.p, p.next) + } } else { - fmt.Fprintf(sb, "%d=>%d", p.p, p.next) + if p.ooor { + fmt.Fprintf(sb, "%d=>%d[ooor]", p.p, p.next) + } else { + fmt.Fprintf(sb, "%d=>%d", p.p, p.next) + } } } @@ -489,8 +499,11 @@ func (s *source) discardBuffered() { // This returns the number of records taken and whether the source has been // completely drained. func (s *source) takeNBuffered(paused pausedTopics, n int) (Fetch, int, bool) { - var r Fetch - var taken int + var ( + r Fetch + rstrip Fetch + taken int + ) b := &s.buffered bf := &b.fetch @@ -500,6 +513,7 @@ func (s *source) takeNBuffered(paused pausedTopics, n int) (Fetch, int, bool) { // If the topic is outright paused, we allowUsable all // partitions in the topic and skip the topic entirely. if paused.has(t.Topic, -1) { + rstrip.Topics = append(rstrip.Topics, *t) bf.Topics = bf.Topics[1:] for _, pCursor := range b.usedOffsets[t.Topic] { pCursor.from.allowUsable() @@ -517,6 +531,15 @@ func (s *source) takeNBuffered(paused pausedTopics, n int) (Fetch, int, bool) { rt = &r.Topics[len(r.Topics)-1] rt.Partitions = nil } + var rtstrip *FetchTopic + ensureTopicStripped := func() { + if rtstrip != nil { + return + } + rstrip.Topics = append(rstrip.Topics, *t) + rtstrip = &rstrip.Topics[len(rstrip.Topics)-1] + rtstrip.Partitions = nil + } tCursors := b.usedOffsets[t.Topic] @@ -524,6 +547,8 @@ func (s *source) takeNBuffered(paused pausedTopics, n int) (Fetch, int, bool) { p := &t.Partitions[0] if paused.has(t.Topic, p.Partition) { + ensureTopicStripped() + rtstrip.Partitions = append(rtstrip.Partitions, *p) t.Partitions = t.Partitions[1:] pCursor := tCursors[p.Partition] pCursor.from.allowUsable() @@ -577,6 +602,9 @@ func (s *source) takeNBuffered(paused pausedTopics, n int) (Fetch, int, bool) { } } + if len(rstrip.Topics) > 0 { + s.hook(&rstrip, false, true) + } s.hook(&r, false, true) // unbuffered, polled drained := len(bf.Topics) == 0 @@ -1065,6 +1093,7 @@ func (s *source) handleReqResp(br *broker, req *fetchRequest, resp *kmsg.FetchRe preferreds = append(preferreds, cursorOffsetPreferred{ *partOffset, preferred, + false, }) continue } @@ -1134,6 +1163,9 @@ func (s *source) handleReqResp(br *broker, req *fetchRequest, resp *kmsg.FetchRe // KIP-392 (case 3) specifies that if we are consuming // from a follower, then if our offset request is before // the low watermark, we list offsets from the follower. + // However, Kafka does not actually implement handling + // ListOffsets from anything from the leader, so we + // need to redirect ourselves back to the leader. // // KIP-392 (case 4) specifies that if we are consuming // a follower and our request is larger than the high @@ -1187,7 +1219,22 @@ func (s *source) handleReqResp(br *broker, req *fetchRequest, resp *kmsg.FetchRe addList(-1, true) case partOffset.offset < fp.LogStartOffset: // KIP-392 case 3 - addList(s.nodeID, false) + // KIP-392 specifies that we should list offsets against the follower, + // but that actually is not supported and the Java client redirects + // back to the leader. The leader then does *not* direct the client + // back to the follower because the follower is not an in sync + // replica. If we did not redirect back to the leader, we would spin + // loop receiving offset_out_of_range from the follower for Fetch, and + // then not_leader_or_follower from the follower for ListOffsets + // (even though it is a follower). So, we just set the preferred replica + // back to the follower. We go directly back to fetching with the + // hope that the offset is available on the leader, and if not, we'll + // just get an OOOR error again and fall into case 1 just above. + preferreds = append(preferreds, cursorOffsetPreferred{ + *partOffset, + partOffset.from.leader, + true, + }) default: // partOffset.offset > fp.HighWatermark, KIP-392 case 4 if kip320 { @@ -1389,6 +1436,19 @@ func (o *cursorOffsetNext) processRespPartition(br *broker, rp *kmsg.FetchRespon h.OnFetchBatchRead(br.meta, o.from.topic, o.from.partition, m) } }) + + // If we encounter a decompression error BUT we have successfully decompressed + // one batch, it is likely that we have received a partial batch. Kafka returns + // UP TO the requested max partition bytes, sometimes truncating data at the end. + // It returns at least one valid batch, but everything after is copied as is + // (i.e. a quick slab copy). We set the error to nil and return what we have. + // + // If we have a decompression error immediately, we keep it and bubble it up. + // The client cannot progress, and the end user needs visibility. + if isDecompressErr(fp.Err) && len(fp.Records) > 0 { + fp.Err = nil + break + } } return fp @@ -1476,6 +1536,7 @@ func (o *cursorOffsetNext) processRecordBatch( if compression := byte(batch.Attributes & 0x0007); compression != 0 { var err error if rawRecords, err = decompressor.decompress(rawRecords, compression); err != nil { + fp.Err = &errDecompress{err} return 0, 0 // truncated batch } } @@ -1542,6 +1603,7 @@ func (o *cursorOffsetNext) processV1OuterMessage( rawInner, err := decompressor.decompress(message.Value, compression) if err != nil { + fp.Err = &errDecompress{err} return 0, 0 // truncated batch } @@ -1653,6 +1715,7 @@ func (o *cursorOffsetNext) processV0OuterMessage( rawInner, err := decompressor.decompress(message.Value, compression) if err != nil { + fp.Err = &errDecompress{err} return 0, 0 // truncated batch } @@ -1773,7 +1836,11 @@ func recordToRecord( ProducerID: batch.ProducerID, ProducerEpoch: batch.ProducerEpoch, LeaderEpoch: batch.PartitionLeaderEpoch, - Offset: batch.FirstOffset + int64(record.OffsetDelta), + } + if batch.FirstOffset == -1 { + r.Offset = -1 + } else { + r.Offset = batch.FirstOffset + int64(record.OffsetDelta) } if r.Attrs.TimestampType() == 0 { r.Timestamp = timeFromMillis(batch.FirstTimestamp + record.TimestampDelta64) diff --git a/vendor/go.opentelemetry.io/otel/CHANGELOG.md b/vendor/go.opentelemetry.io/otel/CHANGELOG.md index a30988f25d0a5..599d59cd130d7 100644 --- a/vendor/go.opentelemetry.io/otel/CHANGELOG.md +++ b/vendor/go.opentelemetry.io/otel/CHANGELOG.md @@ -8,6 +8,21 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## [Unreleased] + + + +## [1.34.0/0.56.0/0.10.0] 2025-01-17 + +### Changed + +- Remove the notices from `Logger` to make the whole Logs API user-facing in `go.opentelemetry.io/otel/log`. (#6167) + +### Fixed + +- Relax minimum Go version to 1.22.0 in various modules. (#6073) +- The `Type` name logged for the `go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc` client is corrected from `otlphttpgrpc` to `otlptracegrpc`. (#6143) +- The `Type` name logged for the `go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlphttpgrpc` client is corrected from `otlphttphttp` to `otlptracehttp`. (#6143) + ## [1.33.0/0.55.0/0.9.0/0.0.12] 2024-12-12 ### Added @@ -37,9 +52,6 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - Fix attribute value truncation in `go.opentelemetry.io/otel/sdk/trace`. (#5997) - Fix attribute value truncation in `go.opentelemetry.io/otel/sdk/log`. (#6032) - - - ## [1.32.0/0.54.0/0.8.0/0.0.11] 2024-11-08 ### Added @@ -3185,7 +3197,8 @@ It contains api and sdk for trace and meter. - CircleCI build CI manifest files. - CODEOWNERS file to track owners of this project. -[Unreleased]: https://github.com/open-telemetry/opentelemetry-go/compare/v1.33.0...HEAD +[Unreleased]: https://github.com/open-telemetry/opentelemetry-go/compare/v1.34.0...HEAD +[1.34.0/0.56.0/0.10.0]: https://github.com/open-telemetry/opentelemetry-go/releases/tag/v1.34.0 [1.33.0/0.55.0/0.9.0/0.0.12]: https://github.com/open-telemetry/opentelemetry-go/releases/tag/v1.33.0 [1.32.0/0.54.0/0.8.0/0.0.11]: https://github.com/open-telemetry/opentelemetry-go/releases/tag/v1.32.0 [1.31.0/0.53.0/0.7.0/0.0.10]: https://github.com/open-telemetry/opentelemetry-go/releases/tag/v1.31.0 diff --git a/vendor/go.opentelemetry.io/otel/README.md b/vendor/go.opentelemetry.io/otel/README.md index efec278905bb8..d9a19207625a2 100644 --- a/vendor/go.opentelemetry.io/otel/README.md +++ b/vendor/go.opentelemetry.io/otel/README.md @@ -1,6 +1,6 @@ # OpenTelemetry-Go -[![CI](https://github.com/open-telemetry/opentelemetry-go/workflows/ci/badge.svg)](https://github.com/open-telemetry/opentelemetry-go/actions?query=workflow%3Aci+branch%3Amain) +[![ci](https://github.com/open-telemetry/opentelemetry-go/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/open-telemetry/opentelemetry-go/actions/workflows/ci.yml) [![codecov.io](https://codecov.io/gh/open-telemetry/opentelemetry-go/coverage.svg?branch=main)](https://app.codecov.io/gh/open-telemetry/opentelemetry-go?branch=main) [![PkgGoDev](https://pkg.go.dev/badge/go.opentelemetry.io/otel)](https://pkg.go.dev/go.opentelemetry.io/otel) [![Go Report Card](https://goreportcard.com/badge/go.opentelemetry.io/otel)](https://goreportcard.com/report/go.opentelemetry.io/otel) diff --git a/vendor/go.opentelemetry.io/otel/RELEASING.md b/vendor/go.opentelemetry.io/otel/RELEASING.md index ffa9b61258ab3..4ebef4f9ddff4 100644 --- a/vendor/go.opentelemetry.io/otel/RELEASING.md +++ b/vendor/go.opentelemetry.io/otel/RELEASING.md @@ -130,6 +130,6 @@ Importantly, bump any package versions referenced to be the latest one you just Bump the dependencies in the following Go services: -- [`accountingservice`](https://github.com/open-telemetry/opentelemetry-demo/tree/main/src/accountingservice) -- [`checkoutservice`](https://github.com/open-telemetry/opentelemetry-demo/tree/main/src/checkoutservice) -- [`productcatalogservice`](https://github.com/open-telemetry/opentelemetry-demo/tree/main/src/productcatalogservice) +- [`accounting`](https://github.com/open-telemetry/opentelemetry-demo/tree/main/src/accounting) +- [`checkoutservice`](https://github.com/open-telemetry/opentelemetry-demo/tree/main/src/checkout) +- [`productcatalogservice`](https://github.com/open-telemetry/opentelemetry-demo/tree/main/src/product-catalog) diff --git a/vendor/go.opentelemetry.io/otel/renovate.json b/vendor/go.opentelemetry.io/otel/renovate.json index 0a29a2f13d802..4f80c898a1dd4 100644 --- a/vendor/go.opentelemetry.io/otel/renovate.json +++ b/vendor/go.opentelemetry.io/otel/renovate.json @@ -14,12 +14,6 @@ "matchDepTypes": ["indirect"], "enabled": true }, - { - "matchFileNames": ["internal/tools/**"], - "matchManagers": ["gomod"], - "matchDepTypes": ["indirect"], - "enabled": false - }, { "matchPackageNames": ["google.golang.org/genproto/googleapis/**"], "groupName": "googleapis" diff --git a/vendor/go.opentelemetry.io/otel/version.go b/vendor/go.opentelemetry.io/otel/version.go index fb7d12673eb23..eb22002d82432 100644 --- a/vendor/go.opentelemetry.io/otel/version.go +++ b/vendor/go.opentelemetry.io/otel/version.go @@ -5,5 +5,5 @@ package otel // import "go.opentelemetry.io/otel" // Version is the current release version of OpenTelemetry in use. func Version() string { - return "1.33.0" + return "1.34.0" } diff --git a/vendor/go.opentelemetry.io/otel/versions.yaml b/vendor/go.opentelemetry.io/otel/versions.yaml index 9f878cd1fe763..ce4fe59b0e4ba 100644 --- a/vendor/go.opentelemetry.io/otel/versions.yaml +++ b/vendor/go.opentelemetry.io/otel/versions.yaml @@ -3,7 +3,7 @@ module-sets: stable-v1: - version: v1.33.0 + version: v1.34.0 modules: - go.opentelemetry.io/otel - go.opentelemetry.io/otel/bridge/opencensus @@ -23,11 +23,11 @@ module-sets: - go.opentelemetry.io/otel/sdk/metric - go.opentelemetry.io/otel/trace experimental-metrics: - version: v0.55.0 + version: v0.56.0 modules: - go.opentelemetry.io/otel/exporters/prometheus experimental-logs: - version: v0.9.0 + version: v0.10.0 modules: - go.opentelemetry.io/otel/log - go.opentelemetry.io/otel/sdk/log diff --git a/vendor/modules.txt b/vendor/modules.txt index 49a5712e06a59..b0530eafe9dce 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -586,7 +586,7 @@ github.com/distribution/reference ## explicit; go 1.13 github.com/dlclark/regexp2 github.com/dlclark/regexp2/syntax -# github.com/docker/docker v27.5.0+incompatible +# github.com/docker/docker v27.5.1+incompatible ## explicit github.com/docker/docker/api github.com/docker/docker/api/types @@ -1276,7 +1276,7 @@ github.com/miekg/dns # github.com/minio/md5-simd v1.1.2 ## explicit; go 1.14 github.com/minio/md5-simd -# github.com/minio/minio-go/v7 v7.0.83 +# github.com/minio/minio-go/v7 v7.0.84 ## explicit; go 1.22 github.com/minio/minio-go/v7 github.com/minio/minio-go/v7/pkg/cors @@ -1652,7 +1652,7 @@ github.com/tklauser/go-sysconf # github.com/tklauser/numcpus v0.7.0 ## explicit; go 1.18 github.com/tklauser/numcpus -# github.com/twmb/franz-go v1.18.0 +# github.com/twmb/franz-go v1.18.1 ## explicit; go 1.21 github.com/twmb/franz-go/pkg/kbin github.com/twmb/franz-go/pkg/kerr @@ -1661,7 +1661,7 @@ github.com/twmb/franz-go/pkg/kgo/internal/sticky github.com/twmb/franz-go/pkg/kversion github.com/twmb/franz-go/pkg/sasl github.com/twmb/franz-go/pkg/sasl/plain -# github.com/twmb/franz-go/pkg/kadm v1.14.0 +# github.com/twmb/franz-go/pkg/kadm v1.15.0 ## explicit; go 1.21 github.com/twmb/franz-go/pkg/kadm # github.com/twmb/franz-go/pkg/kfake v0.0.0-20241015013301-cea7aa5d8037 @@ -1782,7 +1782,7 @@ go.opencensus.io/tag ## explicit; go 1.22.0 go.opentelemetry.io/auto/sdk go.opentelemetry.io/auto/sdk/internal/telemetry -# go.opentelemetry.io/collector/pdata v1.23.0 +# go.opentelemetry.io/collector/pdata v1.24.0 ## explicit; go 1.22.0 go.opentelemetry.io/collector/pdata/internal go.opentelemetry.io/collector/pdata/internal/data @@ -1819,7 +1819,7 @@ go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp/internal/request go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp/internal/semconv go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp/internal/semconvutil -# go.opentelemetry.io/otel v1.33.0 +# go.opentelemetry.io/otel v1.34.0 ## explicit; go 1.22.0 go.opentelemetry.io/otel go.opentelemetry.io/otel/attribute @@ -1836,7 +1836,7 @@ go.opentelemetry.io/otel/semconv/v1.20.0 go.opentelemetry.io/otel/semconv/v1.21.0 go.opentelemetry.io/otel/semconv/v1.24.0 go.opentelemetry.io/otel/semconv/v1.26.0 -# go.opentelemetry.io/otel/metric v1.33.0 +# go.opentelemetry.io/otel/metric v1.34.0 ## explicit; go 1.22.0 go.opentelemetry.io/otel/metric go.opentelemetry.io/otel/metric/embedded @@ -1855,7 +1855,7 @@ go.opentelemetry.io/otel/sdk/metric/internal go.opentelemetry.io/otel/sdk/metric/internal/aggregate go.opentelemetry.io/otel/sdk/metric/internal/x go.opentelemetry.io/otel/sdk/metric/metricdata -# go.opentelemetry.io/otel/trace v1.33.0 +# go.opentelemetry.io/otel/trace v1.34.0 ## explicit; go 1.22.0 go.opentelemetry.io/otel/trace go.opentelemetry.io/otel/trace/embedded