Skip to content

Commit

Permalink
Upgrade dskit and enable logging for DoUntilQuorum calls. (#5724)
Browse files Browse the repository at this point in the history
* Upgrade dskit and enable logging for DoUntilQuorum calls.

* Add changelog entry.
  • Loading branch information
charleskorn authored Aug 15, 2023
1 parent 8856cd4 commit 5c60e89
Show file tree
Hide file tree
Showing 33 changed files with 811 additions and 569 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
* [ENHANCEMENT] Compactor: Delete bucket-index, markers and debug files if there are no blocks left in the bucket index. This cleanup must be enabled by using `-compactor.no-blocks-file-cleanup-enabled` option. #5648
* [ENHANCEMENT] Ingester: reduce memory usage of active series tracker. #5665
* [ENHANCEMENT] Store-gateway: added `-store-gateway.sharding-ring.auto-forget-enabled` configuration parameter to control whether store-gateway auto-forget feature should be enabled or disabled (enabled by default). #5702
* [ENHANCEMENT] Querier: improved observability of calls to ingesters during queries. #5724
* [BUGFIX] Ingester: Handle when previous ring state is leaving and the number of tokens has changed. #5204
* [BUGFIX] Querier: fix issue where queries that use the `timestamp()` function fail with `execution: attempted to read series at index 0 from stream, but the stream has already been exhausted` if streaming chunks from ingesters to queriers is enabled. #5370
* [BUGFIX] memberlist: bring back `memberlist_client_kv_store_count` metric that used to exist in Cortex, but got lost during dskit updates before Mimir 2.0. #5377
Expand Down
109 changes: 109 additions & 0 deletions cmd/mimir/config-descriptor.json
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,33 @@
"required": false,
"desc": "",
"blockEntries": [
{
"kind": "field",
"name": "cert",
"required": false,
"desc": "Server TLS certificate. This configuration parameter is YAML only.",
"fieldValue": null,
"fieldDefaultValue": "",
"fieldType": "string"
},
{
"kind": "field",
"name": "key",
"required": false,
"desc": "Server TLS key. This configuration parameter is YAML only.",
"fieldValue": null,
"fieldDefaultValue": "",
"fieldType": "string"
},
{
"kind": "field",
"name": "client_ca",
"required": false,
"desc": "Root certificate authority used to verify client certificates. This configuration parameter is YAML only.",
"fieldValue": null,
"fieldDefaultValue": "",
"fieldType": "string"
},
{
"kind": "field",
"name": "cert_file",
Expand Down Expand Up @@ -281,6 +308,33 @@
"required": false,
"desc": "",
"blockEntries": [
{
"kind": "field",
"name": "cert",
"required": false,
"desc": "Server TLS certificate. This configuration parameter is YAML only.",
"fieldValue": null,
"fieldDefaultValue": "",
"fieldType": "string"
},
{
"kind": "field",
"name": "key",
"required": false,
"desc": "Server TLS key. This configuration parameter is YAML only.",
"fieldValue": null,
"fieldDefaultValue": "",
"fieldType": "string"
},
{
"kind": "field",
"name": "client_ca",
"required": false,
"desc": "Root certificate authority used to verify client certificates. This configuration parameter is YAML only.",
"fieldValue": null,
"fieldDefaultValue": "",
"fieldType": "string"
},
{
"kind": "field",
"name": "cert_file",
Expand Down Expand Up @@ -4681,6 +4735,17 @@
"fieldType": "int",
"fieldCategory": "advanced"
},
{
"kind": "field",
"name": "connection_pool_timeout",
"required": false,
"desc": "Maximum duration to wait to get a connection from pool.",
"fieldValue": null,
"fieldDefaultValue": 4000000000,
"fieldFlag": "query-frontend.results-cache.redis.connection-pool-timeout",
"fieldType": "duration",
"fieldCategory": "advanced"
},
{
"kind": "field",
"name": "min_idle_connections",
Expand Down Expand Up @@ -5928,6 +5993,17 @@
"fieldType": "int",
"fieldCategory": "advanced"
},
{
"kind": "field",
"name": "connection_pool_timeout",
"required": false,
"desc": "Maximum duration to wait to get a connection from pool.",
"fieldValue": null,
"fieldDefaultValue": 4000000000,
"fieldFlag": "blocks-storage.bucket-store.index-cache.redis.connection-pool-timeout",
"fieldType": "duration",
"fieldCategory": "advanced"
},
{
"kind": "field",
"name": "min_idle_connections",
Expand Down Expand Up @@ -6454,6 +6530,17 @@
"fieldType": "int",
"fieldCategory": "advanced"
},
{
"kind": "field",
"name": "connection_pool_timeout",
"required": false,
"desc": "Maximum duration to wait to get a connection from pool.",
"fieldValue": null,
"fieldDefaultValue": 4000000000,
"fieldFlag": "blocks-storage.bucket-store.chunks-cache.redis.connection-pool-timeout",
"fieldType": "duration",
"fieldCategory": "advanced"
},
{
"kind": "field",
"name": "min_idle_connections",
Expand Down Expand Up @@ -7015,6 +7102,17 @@
"fieldType": "int",
"fieldCategory": "advanced"
},
{
"kind": "field",
"name": "connection_pool_timeout",
"required": false,
"desc": "Maximum duration to wait to get a connection from pool.",
"fieldValue": null,
"fieldDefaultValue": 4000000000,
"fieldFlag": "blocks-storage.bucket-store.metadata-cache.redis.connection-pool-timeout",
"fieldType": "duration",
"fieldCategory": "advanced"
},
{
"kind": "field",
"name": "min_idle_connections",
Expand Down Expand Up @@ -11428,6 +11526,17 @@
"fieldType": "int",
"fieldCategory": "advanced"
},
{
"kind": "field",
"name": "connection_pool_timeout",
"required": false,
"desc": "Maximum duration to wait to get a connection from pool.",
"fieldValue": null,
"fieldDefaultValue": 4000000000,
"fieldFlag": "ruler-storage.cache.redis.connection-pool-timeout",
"fieldType": "duration",
"fieldCategory": "advanced"
},
{
"kind": "field",
"name": "min_idle_connections",
Expand Down
10 changes: 10 additions & 0 deletions cmd/mimir/help-all.txt.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,8 @@ Usage of ./cmd/mimir/mimir:
Override the expected name on the server certificate.
-blocks-storage.bucket-store.chunks-cache.redis.connection-pool-size int
Maximum number of connections in the pool. (default 100)
-blocks-storage.bucket-store.chunks-cache.redis.connection-pool-timeout duration
Maximum duration to wait to get a connection from pool. (default 4s)
-blocks-storage.bucket-store.chunks-cache.redis.db int
Database index.
-blocks-storage.bucket-store.chunks-cache.redis.dial-timeout duration
Expand Down Expand Up @@ -447,6 +449,8 @@ Usage of ./cmd/mimir/mimir:
Override the expected name on the server certificate.
-blocks-storage.bucket-store.index-cache.redis.connection-pool-size int
Maximum number of connections in the pool. (default 100)
-blocks-storage.bucket-store.index-cache.redis.connection-pool-timeout duration
Maximum duration to wait to get a connection from pool. (default 4s)
-blocks-storage.bucket-store.index-cache.redis.db int
Database index.
-blocks-storage.bucket-store.index-cache.redis.dial-timeout duration
Expand Down Expand Up @@ -573,6 +577,8 @@ Usage of ./cmd/mimir/mimir:
Maximum size of metafile content to cache in bytes. Caching will be skipped if the content exceeds this size. This is useful to avoid network round trip for large content if the configured caching backend has an hard limit on cached items size (in this case, you should set this limit to the same limit in the caching backend). (default 1048576)
-blocks-storage.bucket-store.metadata-cache.redis.connection-pool-size int
Maximum number of connections in the pool. (default 100)
-blocks-storage.bucket-store.metadata-cache.redis.connection-pool-timeout duration
Maximum duration to wait to get a connection from pool. (default 4s)
-blocks-storage.bucket-store.metadata-cache.redis.db int
Database index.
-blocks-storage.bucket-store.metadata-cache.redis.dial-timeout duration
Expand Down Expand Up @@ -1801,6 +1807,8 @@ Usage of ./cmd/mimir/mimir:
Override the expected name on the server certificate.
-query-frontend.results-cache.redis.connection-pool-size int
Maximum number of connections in the pool. (default 100)
-query-frontend.results-cache.redis.connection-pool-timeout duration
Maximum duration to wait to get a connection from pool. (default 4s)
-query-frontend.results-cache.redis.db int
Database index.
-query-frontend.results-cache.redis.dial-timeout duration
Expand Down Expand Up @@ -2031,6 +2039,8 @@ Usage of ./cmd/mimir/mimir:
Override the expected name on the server certificate.
-ruler-storage.cache.redis.connection-pool-size int
Maximum number of connections in the pool. (default 100)
-ruler-storage.cache.redis.connection-pool-timeout duration
Maximum duration to wait to get a connection from pool. (default 4s)
-ruler-storage.cache.redis.db int
Database index.
-ruler-storage.cache.redis.dial-timeout duration
Expand Down
24 changes: 24 additions & 0 deletions docs/sources/mimir/references/configuration-parameters/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,16 @@ The `server` block configures the HTTP and gRPC server of the launched service(s
[tls_min_version: <string> | default = ""]
http_tls_config:
# Server TLS certificate. This configuration parameter is YAML only.
[cert: <string> | default = ""]
# Server TLS key. This configuration parameter is YAML only.
[key: <string> | default = ""]
# Root certificate authority used to verify client certificates. This
# configuration parameter is YAML only.
[client_ca: <string> | default = ""]
# (advanced) HTTP server cert path.
# CLI flag: -server.http-tls-cert-path
[cert_file: <string> | default = ""]
Expand All @@ -470,6 +480,16 @@ http_tls_config:
[client_ca_file: <string> | default = ""]
grpc_tls_config:
# Server TLS certificate. This configuration parameter is YAML only.
[cert: <string> | default = ""]
# Server TLS key. This configuration parameter is YAML only.
[key: <string> | default = ""]
# Root certificate authority used to verify client certificates. This
# configuration parameter is YAML only.
[client_ca: <string> | default = ""]
# (advanced) GRPC TLS server cert path.
# CLI flag: -server.grpc-tls-cert-path
[cert_file: <string> | default = ""]
Expand Down Expand Up @@ -4134,6 +4154,10 @@ The `redis` block configures the Redis-based caching backend. The supported CLI
# CLI flag: -<prefix>.redis.connection-pool-size
[connection_pool_size: <int> | default = 100]
# (advanced) Maximum duration to wait to get a connection from pool.
# CLI flag: -<prefix>.redis.connection-pool-timeout
[connection_pool_timeout: <duration> | default = 4s]
# (advanced) Minimum number of idle connections.
# CLI flag: -<prefix>.redis.min-idle-connections
[min_idle_connections: <int> | default = 10]
Expand Down
6 changes: 3 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ require (
github.com/golang/snappy v0.0.4
github.com/google/gopacket v1.1.19
github.com/gorilla/mux v1.8.0
github.com/grafana/dskit v0.0.0-20230808012443-3f5683fdb201
github.com/grafana/dskit v0.0.0-20230815014656-95e41c2a358a
github.com/grafana/e2e v0.1.1-0.20230221201045-21ebba73580b
github.com/hashicorp/golang-lru v1.0.2
github.com/json-iterator/go v1.1.12
Expand Down Expand Up @@ -170,7 +170,7 @@ require (
github.com/hashicorp/go-multierror v1.1.1 // indirect
github.com/hashicorp/go-rootcerts v1.0.2 // indirect
github.com/hashicorp/go-sockaddr v1.0.2 // indirect
github.com/hashicorp/golang-lru/v2 v2.0.2 // indirect
github.com/hashicorp/golang-lru/v2 v2.0.5 // indirect
github.com/hashicorp/memberlist v0.5.0 // indirect
github.com/hashicorp/serf v0.10.1 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
Expand Down Expand Up @@ -201,7 +201,7 @@ require (
github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/common/sigv4 v0.1.0 // indirect
github.com/prometheus/exporter-toolkit v0.10.0 // indirect
github.com/prometheus/exporter-toolkit v0.10.1-0.20230714054209-2f4150c63f97 // indirect
github.com/rainycape/unidecode v0.0.0-20150907023854-cb7f23ec59be // indirect
github.com/rs/cors v1.9.0 // indirect
github.com/rs/xid v1.5.0 // indirect
Expand Down
12 changes: 6 additions & 6 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -857,8 +857,8 @@ github.com/gosimple/slug v1.1.1 h1:fRu/digW+NMwBIP+RmviTK97Ho/bEj/C9swrCspN3D4=
github.com/gosimple/slug v1.1.1/go.mod h1:ER78kgg1Mv0NQGlXiDe57DpCyfbNywXXZ9mIorhxAf0=
github.com/grafana-tools/sdk v0.0.0-20211220201350-966b3088eec9 h1:LQAhgcUPnzdjU/OjCJaLlPQI7NmQCRlfjMPSA1VegvA=
github.com/grafana-tools/sdk v0.0.0-20211220201350-966b3088eec9/go.mod h1:AHHlOEv1+GGQ3ktHMlhuTUwo3zljV3QJbC0+8o2kn+4=
github.com/grafana/dskit v0.0.0-20230808012443-3f5683fdb201 h1:oad2aie0d6S/Dipv3Yiv/eKaq95o5dVZFuCAdRtMNs0=
github.com/grafana/dskit v0.0.0-20230808012443-3f5683fdb201/go.mod h1:rLsknCxlzGWMVtW39D0dArFpUY3McQJbIwi5T8wogFQ=
github.com/grafana/dskit v0.0.0-20230815014656-95e41c2a358a h1:XOOz49mJ0WHhvKlQcnVlp+BPuM7BqNO7wi5b3Wzj9wo=
github.com/grafana/dskit v0.0.0-20230815014656-95e41c2a358a/go.mod h1:SA90oxyODAYOFCW/O1HrS5Zu/zhNzh+yF4D+GoWmBEk=
github.com/grafana/e2e v0.1.1-0.20230221201045-21ebba73580b h1:dzle+89/D0hOxscjZlkb6ovYA52t9hl6h/S+hI8ek1Q=
github.com/grafana/e2e v0.1.1-0.20230221201045-21ebba73580b/go.mod h1:3UsooRp7yW5/NJQBlXcTsAHOoykEhNUYXkQ3r6ehEEY=
github.com/grafana/gomemcache v0.0.0-20230316202710-a081dae0aba9 h1:WB3bGH2f1UN6jkd6uAEWfHB8OD7dKJ0v2Oo6SNfhpfQ=
Expand Down Expand Up @@ -927,8 +927,8 @@ github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ
github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
github.com/hashicorp/golang-lru v1.0.2 h1:dV3g9Z/unq5DpblPpw+Oqcv4dU/1omnb4Ok8iPY6p1c=
github.com/hashicorp/golang-lru v1.0.2/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
github.com/hashicorp/golang-lru/v2 v2.0.2 h1:Dwmkdr5Nc/oBiXgJS3CDHNhJtIHkuZ3DZF5twqnfBdU=
github.com/hashicorp/golang-lru/v2 v2.0.2/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/hashicorp/golang-lru/v2 v2.0.5 h1:wW7h1TG88eUIJ2i69gaE3uNVtEPIagzhGvHgwfx2Vm4=
github.com/hashicorp/golang-lru/v2 v2.0.5/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
github.com/hashicorp/hcl v1.0.1-vault-5 h1:kI3hhbbyzr4dldA8UdTb7ZlVVlI2DACdCfz31RPDgJM=
github.com/hashicorp/hcl v1.0.1-vault-5/go.mod h1:XYhtn6ijBSAj6n4YqAaf7RBPS4I06AItNorpy+MoQNM=
Expand Down Expand Up @@ -1144,8 +1144,8 @@ github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdO
github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY=
github.com/prometheus/common/sigv4 v0.1.0 h1:qoVebwtwwEhS85Czm2dSROY5fTo2PAPEVdDeppTwGX4=
github.com/prometheus/common/sigv4 v0.1.0/go.mod h1:2Jkxxk9yYvCkE5G1sQT7GuEXm57JrvHu9k5YwTjsNtI=
github.com/prometheus/exporter-toolkit v0.10.0 h1:yOAzZTi4M22ZzVxD+fhy1URTuNRj/36uQJJ5S8IPza8=
github.com/prometheus/exporter-toolkit v0.10.0/go.mod h1:+sVFzuvV5JDyw+Ih6p3zFxZNVnKQa3x5qPmDSiPu4ZY=
github.com/prometheus/exporter-toolkit v0.10.1-0.20230714054209-2f4150c63f97 h1:oHcfzdJnM/SFppy2aUlvomk37GI33x9vgJULihE5Dt8=
github.com/prometheus/exporter-toolkit v0.10.1-0.20230714054209-2f4150c63f97/go.mod h1:LoBCZeRh+5hX+fSULNyFnagYlQG/gBsyA/deNzROkq8=
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A=
Expand Down
20 changes: 14 additions & 6 deletions pkg/distributor/distributor.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ import (
util_math "github.com/grafana/mimir/pkg/util/math"
"github.com/grafana/mimir/pkg/util/pool"
"github.com/grafana/mimir/pkg/util/push"
"github.com/grafana/mimir/pkg/util/spanlogger"
"github.com/grafana/mimir/pkg/util/validation"
)

Expand Down Expand Up @@ -88,8 +89,6 @@ type Distributor struct {
ingesterPool *ring_client.Pool
limits *validation.Overrides

queryQuorumConfig ring.DoUntilQuorumConfig

// The global rate limiter requires a distributors ring to count
// the number of healthy instances
distributorsLifecycler *ring.BasicLifecycler
Expand Down Expand Up @@ -244,7 +243,6 @@ func New(cfg Config, clientConfig ingester_client.Config, limits *validation.Ove
ingesterPool: NewPool(cfg.PoolConfig, ingestersRing, cfg.IngesterClientFactory, log),
healthyInstancesCount: atomic.NewUint32(0),
limits: limits,
queryQuorumConfig: ring.DoUntilQuorumConfig{MinimizeRequests: cfg.MinimizeIngesterRequests, HedgingDelay: cfg.MinimiseIngesterRequestsHedgingDelay},
HATracker: haTracker,
ingestionRate: util_math.NewEWMARate(0.2, instanceIngestionRateTickInterval),

Expand Down Expand Up @@ -1249,7 +1247,17 @@ func forReplicationSet[T any](ctx context.Context, d *Distributor, replicationSe
// Nothing to do.
}

return ring.DoUntilQuorum(ctx, replicationSet, d.queryQuorumConfig, wrappedF, cleanup)
return ring.DoUntilQuorum(ctx, replicationSet, d.queryQuorumConfig(ctx), wrappedF, cleanup)
}

func (d *Distributor) queryQuorumConfig(ctx context.Context) ring.DoUntilQuorumConfig {
logger := spanlogger.FromContext(ctx, d.log)

return ring.DoUntilQuorumConfig{
MinimizeRequests: d.cfg.MinimizeIngesterRequests,
HedgingDelay: d.cfg.MinimiseIngesterRequestsHedgingDelay,
Logger: logger,
}
}

// LabelValuesForLabelName returns all of the label values that are associated with a given label name.
Expand Down Expand Up @@ -1456,7 +1464,7 @@ func (d *Distributor) labelValuesCardinality(ctx context.Context, labelNames []m
return nil, err
}

_, err = ring.DoUntilQuorum[struct{}](ctx, replicationSet, d.queryQuorumConfig, func(ctx context.Context, desc *ring.InstanceDesc) (struct{}, error) {
_, err = ring.DoUntilQuorum[struct{}](ctx, replicationSet, d.queryQuorumConfig(ctx), func(ctx context.Context, desc *ring.InstanceDesc) (struct{}, error) {
poolClient, err := d.ingesterPool.GetClientFor(desc.Addr)
if err != nil {
return struct{}{}, err
Expand Down Expand Up @@ -1747,7 +1755,7 @@ func (d *Distributor) UserStats(ctx context.Context, countMethod cardinality.Cou
req := &ingester_client.UserStatsRequest{
CountMethod: ingesterCountMethod,
}
resps, err := ring.DoUntilQuorum[zonedUserStatsResponse](ctx, replicationSet, d.queryQuorumConfig, func(ctx context.Context, desc *ring.InstanceDesc) (zonedUserStatsResponse, error) {
resps, err := ring.DoUntilQuorum[zonedUserStatsResponse](ctx, replicationSet, d.queryQuorumConfig(ctx), func(ctx context.Context, desc *ring.InstanceDesc) (zonedUserStatsResponse, error) {
poolClient, err := d.ingesterPool.GetClientFor(desc.Addr)
if err != nil {
return zonedUserStatsResponse{}, err
Expand Down
Loading

0 comments on commit 5c60e89

Please sign in to comment.