From bb5fcc929832f7bd2a6c2df348b387abcb8b961e Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Wed, 16 Sep 2020 11:52:29 +0200 Subject: [PATCH] Turn memcached circuit-breaker on by default (#3189) (#3190) * Turn memcached circuit-breaker on by default Will trip after 10 failures within 10 seconds Signed-off-by: Bryan Boreham * Change circuit-breaker log fields to avoid clash The names 'from' and 'to' are used elsewhere as dates, so avoid re-using them here as strings Signed-off-by: Bryan Boreham * Update CHANGELOG Signed-off-by: Bryan Boreham * Updated doc Signed-off-by: Marco Pracucci Co-authored-by: Marco Pracucci Co-authored-by: Bryan Boreham --- CHANGELOG.md | 2 +- docs/configuration/config-file-reference.md | 2 +- pkg/chunk/cache/memcached_client.go | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7418f9e993..0a58655f16 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,7 +50,7 @@ * [ENHANCEMENT] Add "integration" as a label for `cortex_alertmanager_notifications_total` and `cortex_alertmanager_notifications_failed_total` metrics. #3056 * [ENHANCEMENT] Add `cortex_ruler_config_last_reload_successful` and `cortex_ruler_config_last_reload_successful_seconds` to check status of users rule manager. #3056 * [ENHANCEMENT] The configuration validation now fails if an empty YAML node has been set for a root YAML config property. #3080 -* [ENHANCEMENT] Memcached dial() calls now have an optional circuit-breaker to avoid hammering a broken cache #3051 +* [ENHANCEMENT] Memcached dial() calls now have a circuit-breaker to avoid hammering a broken cache. #3051, #3189 * [ENHANCEMENT] `-ruler.evaluation-delay-duration` is now overridable as a per-tenant limit, `ruler_evaluation_delay_duration`. #3098 * [ENHANCEMENT] Add TLS support to etcd client. #3102 * [ENHANCEMENT] When a tenant accesses the Alertmanager UI or its API, if we have valid `-alertmanager.configs.fallback` we'll use that to start the manager and avoid failing the request. #3073 diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index 3d81a0a9d1..b357043aea 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -2968,7 +2968,7 @@ The `memcached_client_config` configures the client used to connect to Memcached # Trip circuit-breaker after this number of consecutive dial failures (if zero # then circuit-breaker is disabled). # CLI flag: -.memcached.circuit-breaker-consecutive-failures -[circuit_breaker_consecutive_failures: | default = 0] +[circuit_breaker_consecutive_failures: | default = 10] # Duration circuit-breaker remains open after tripping (if zero then 60 seconds # is used). diff --git a/pkg/chunk/cache/memcached_client.go b/pkg/chunk/cache/memcached_client.go index 231ab18f7b..ddb0897204 100644 --- a/pkg/chunk/cache/memcached_client.go +++ b/pkg/chunk/cache/memcached_client.go @@ -83,7 +83,7 @@ func (cfg *MemcachedClientConfig) RegisterFlagsWithPrefix(prefix, description st f.DurationVar(&cfg.Timeout, prefix+"memcached.timeout", 100*time.Millisecond, description+"Maximum time to wait before giving up on memcached requests.") f.DurationVar(&cfg.UpdateInterval, prefix+"memcached.update-interval", 1*time.Minute, description+"Period with which to poll DNS for memcache servers.") f.BoolVar(&cfg.ConsistentHash, prefix+"memcached.consistent-hash", true, description+"Use consistent hashing to distribute to memcache servers.") - f.UintVar(&cfg.CBFailures, prefix+"memcached.circuit-breaker-consecutive-failures", 0, description+"Trip circuit-breaker after this number of consecutive dial failures (if zero then circuit-breaker is disabled).") + f.UintVar(&cfg.CBFailures, prefix+"memcached.circuit-breaker-consecutive-failures", 10, description+"Trip circuit-breaker after this number of consecutive dial failures (if zero then circuit-breaker is disabled).") f.DurationVar(&cfg.CBTimeout, prefix+"memcached.circuit-breaker-timeout", 10*time.Second, description+"Duration circuit-breaker remains open after tripping (if zero then 60 seconds is used).") f.DurationVar(&cfg.CBInterval, prefix+"memcached.circuit-breaker-interval", 10*time.Second, description+"Reset circuit-breaker counts after this long (if zero then never reset).") } @@ -147,7 +147,7 @@ func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Reg } func (c *memcachedClient) circuitBreakerStateChange(name string, from gobreaker.State, to gobreaker.State) { - level.Info(c.logger).Log("msg", "circuit-breaker state change", "name", name, "from", from, "to", to) + level.Info(c.logger).Log("msg", "circuit-breaker state change", "name", name, "from-state", from, "to-state", to) } func (c *memcachedClient) dialViaCircuitBreaker(network, address string, timeout time.Duration) (net.Conn, error) {