From 487fa5837cb60d2a8de53111c97f14bd903333d2 Mon Sep 17 00:00:00 2001 From: Eric Lippmann Date: Fri, 5 Apr 2024 15:11:38 +0200 Subject: [PATCH] Consolidate default retry timeout and settings --- pkg/config/redis.go | 2 +- pkg/icingadb/cleanup.go | 18 +---------- pkg/icingadb/db.go | 70 +++++++++++++---------------------------- pkg/icingadb/driver.go | 2 +- pkg/icingadb/ha.go | 4 +-- pkg/retry/retry.go | 3 ++ 6 files changed, 30 insertions(+), 69 deletions(-) diff --git a/pkg/config/redis.go b/pkg/config/redis.go index dbb21a709..73884aed2 100644 --- a/pkg/config/redis.go +++ b/pkg/config/redis.go @@ -85,7 +85,7 @@ func dialWithLogging(dialer ctxDialerFunc, logger *logging.Logger) ctxDialerFunc retry.Retryable, backoff.NewExponentialWithJitter(1*time.Millisecond, 1*time.Second), retry.Settings{ - Timeout: 5 * time.Minute, + Timeout: retry.DefaultTimeout, OnError: func(_ time.Duration, _ uint64, err, lastErr error) { if lastErr == nil || err.Error() != lastErr.Error() { logger.Warnw("Can't connect to Redis. Retrying", zap.Error(err)) diff --git a/pkg/icingadb/cleanup.go b/pkg/icingadb/cleanup.go index 412ae3048..22bf02d6e 100644 --- a/pkg/icingadb/cleanup.go +++ b/pkg/icingadb/cleanup.go @@ -8,7 +8,6 @@ import ( "github.com/icinga/icingadb/pkg/com" "github.com/icinga/icingadb/pkg/retry" "github.com/icinga/icingadb/pkg/types" - "go.uber.org/zap" "time" ) @@ -68,22 +67,7 @@ func (db *DB) CleanupOlderThan( }, retry.Retryable, backoff.NewExponentialWithJitter(1*time.Millisecond, 1*time.Second), - retry.Settings{ - Timeout: 5 * time.Minute, - OnError: func(_ time.Duration, _ uint64, err, lastErr error) { - if lastErr == nil || err.Error() != lastErr.Error() { - db.logger.Warnw("Can't execute query. Retrying", zap.Error(err)) - } - }, - OnSuccess: func(elapsed time.Duration, attempt uint64, lastErr error) { - if attempt > 0 { - db.logger.Infow("Query retried successfully after error", - zap.Duration("after", elapsed), - zap.Uint64("attempts", attempt+1), - zap.NamedError("recovered_error", lastErr)) - } - }, - }, + db.getDefaultRetrySettings(), ) if err != nil { return 0, err diff --git a/pkg/icingadb/db.go b/pkg/icingadb/db.go index a28025f3a..c7bc178ac 100644 --- a/pkg/icingadb/db.go +++ b/pkg/icingadb/db.go @@ -347,22 +347,7 @@ func (db *DB) BulkExec( }, retry.Retryable, backoff.NewExponentialWithJitter(1*time.Millisecond, 1*time.Second), - retry.Settings{ - Timeout: 5 * time.Minute, - OnError: func(_ time.Duration, _ uint64, err, lastErr error) { - if lastErr == nil || err.Error() != lastErr.Error() { - db.logger.Warnw("Can't execute query. Retrying", zap.Error(err)) - } - }, - OnSuccess: func(elapsed time.Duration, attempt uint64, lastErr error) { - if attempt > 0 { - db.logger.Infow("Query retried successfully after error", - zap.Duration("after", elapsed), - zap.Uint64("attempts", attempt+1), - zap.NamedError("recovered_error", lastErr)) - } - }, - }, + db.getDefaultRetrySettings(), ) } }(b)) @@ -427,22 +412,7 @@ func (db *DB) NamedBulkExec( }, retry.Retryable, backoff.NewExponentialWithJitter(1*time.Millisecond, 1*time.Second), - retry.Settings{ - Timeout: 5 * time.Minute, - OnError: func(_ time.Duration, _ uint64, err, lastErr error) { - if lastErr == nil || err.Error() != lastErr.Error() { - db.logger.Warnw("Can't execute query. Retrying", zap.Error(err)) - } - }, - OnSuccess: func(elapsed time.Duration, attempt uint64, lastErr error) { - if attempt > 0 { - db.logger.Infow("Query retried successfully after error", - zap.Duration("after", elapsed), - zap.Uint64("attempts", attempt+1), - zap.NamedError("recovered_error", lastErr)) - } - }, - }, + db.getDefaultRetrySettings(), ) } }(b)) @@ -515,22 +485,7 @@ func (db *DB) NamedBulkExecTx( }, retry.Retryable, backoff.NewExponentialWithJitter(1*time.Millisecond, 1*time.Second), - retry.Settings{ - Timeout: 5 * time.Minute, - OnError: func(_ time.Duration, _ uint64, err, lastErr error) { - if lastErr == nil || err.Error() != lastErr.Error() { - db.logger.Warnw("Can't execute query. Retrying", zap.Error(err)) - } - }, - OnSuccess: func(elapsed time.Duration, attempt uint64, lastErr error) { - if attempt > 0 { - db.logger.Infow("Query retried successfully after error", - zap.Duration("after", elapsed), - zap.Uint64("attempts", attempt+1), - zap.NamedError("recovered_error", lastErr)) - } - }, - }, + db.getDefaultRetrySettings(), ) } }(b)) @@ -716,6 +671,25 @@ func (db *DB) GetSemaphoreForTable(table string) *semaphore.Weighted { } } +func (db *DB) getDefaultRetrySettings() retry.Settings { + return retry.Settings{ + Timeout: retry.DefaultTimeout, + OnError: func(_ time.Duration, _ uint64, err, lastErr error) { + if lastErr == nil || err.Error() != lastErr.Error() { + db.logger.Warnw("Can't execute query. Retrying", zap.Error(err)) + } + }, + OnSuccess: func(elapsed time.Duration, attempt uint64, lastErr error) { + if attempt > 0 { + db.logger.Infow("Query retried successfully after error", + zap.Duration("after", elapsed), + zap.Uint64("attempts", attempt+1), + zap.NamedError("recovered_error", lastErr)) + } + }, + } +} + func (db *DB) log(ctx context.Context, query string, counter *com.Counter) periodic.Stopper { return periodic.Start(ctx, db.logger.Interval(), func(tick periodic.Tick) { if count := counter.Reset(); count > 0 { diff --git a/pkg/icingadb/driver.go b/pkg/icingadb/driver.go index e2712ca83..2fde2f332 100644 --- a/pkg/icingadb/driver.go +++ b/pkg/icingadb/driver.go @@ -55,7 +55,7 @@ func (c RetryConnector) Connect(ctx context.Context) (driver.Conn, error) { shouldRetry, backoff.NewExponentialWithJitter(time.Millisecond*128, time.Minute*1), retry.Settings{ - Timeout: time.Minute * 5, + Timeout: retry.DefaultTimeout, OnError: func(_ time.Duration, _ uint64, err, lastErr error) { telemetry.UpdateCurrentDbConnErr(err) diff --git a/pkg/icingadb/ha.go b/pkg/icingadb/ha.go index 21f2e54c0..7d8e4f24d 100644 --- a/pkg/icingadb/ha.go +++ b/pkg/icingadb/ha.go @@ -154,7 +154,7 @@ func (h *HA) controller() { // expiration time. Therefore, we use a deadline ctx to retry.WithBackoff() in realize() which expires earlier // than our default timeout. // 2) Since we do not want to exit before our default timeout expires, we have to repeat step 1 until it does. - retryTimeout := time.NewTimer(5 * time.Minute) + retryTimeout := time.NewTimer(retry.DefaultTimeout) defer retryTimeout.Stop() for { @@ -246,7 +246,7 @@ func (h *HA) controller() { // But this is the best place to catch all scenarios where the timeout needs to be reset. // And since HA needs quite a bit of refactoring anyway to e.g. return immediately after calling h.abort(), // it's fine to have it here for now. - retryTimeout.Reset(5 * time.Minute) + retryTimeout.Reset(retry.DefaultTimeout) case <-h.heartbeat.Done(): if err := h.heartbeat.Err(); err != nil { h.abort(err) diff --git a/pkg/retry/retry.go b/pkg/retry/retry.go index 3c1c8e6fa..582f7c66b 100644 --- a/pkg/retry/retry.go +++ b/pkg/retry/retry.go @@ -12,6 +12,9 @@ import ( "time" ) +// DefaultTimeout is our opinionated default timeout for retrying database and Redis operations. +const DefaultTimeout = 5 * time.Minute + // RetryableFunc is a retryable function. type RetryableFunc func(context.Context) error