From 19f04a94bb50e3da1f64f8e8f0e878058d704ebc Mon Sep 17 00:00:00 2001 From: "d.anchikov" Date: Fri, 17 Jun 2022 16:54:25 +0400 Subject: [PATCH 01/28] feat: Added retry logic to remote api request --- api/handler/triggers.go | 2 +- clock/clock.go | 5 + cmd/checker/config.go | 8 +- cmd/config.go | 40 ++++- interfaces.go | 1 + local/checker.yml | 5 +- metric_source/remote/config.go | 17 +- metric_source/remote/remote.go | 37 +++-- metric_source/remote/remote_test.go | 185 +++++++++++++++++++--- metric_source/remote/request.go | 66 ++++++-- metric_source/remote/request_test.go | 222 ++++++++++++++++++++++++++- mock/clock/clock.go | 12 ++ 12 files changed, 537 insertions(+), 63 deletions(-) diff --git a/api/handler/triggers.go b/api/handler/triggers.go index f1a5987b6..017c62155 100644 --- a/api/handler/triggers.go +++ b/api/handler/triggers.go @@ -91,7 +91,7 @@ func getTriggerFromRequest(request *http.Request) (*dto.Trigger, *api.ErrorRespo return nil, api.ErrorInvalidRequest(err) case remote.ErrRemoteTriggerResponse: response := api.ErrorRemoteServerUnavailable(err) - middleware.GetLoggerEntry(request).Error("%s : %s : %s", response.StatusText, response.ErrorText, err) + middleware.GetLoggerEntry(request).Errorf("%s : %s : %s", response.StatusText, response.ErrorText, err) return nil, response default: return nil, api.ErrorInternalServer(err) diff --git a/clock/clock.go b/clock/clock.go index b5e253805..a2f80d37b 100644 --- a/clock/clock.go +++ b/clock/clock.go @@ -14,3 +14,8 @@ func NewSystemClock() *SystemClock { func (t *SystemClock) Now() time.Time { return time.Now().UTC() } + +// Sleep pauses the current goroutine for at least the passed duration +func (t *SystemClock) Sleep(duration time.Duration) { + time.Sleep(duration) +} diff --git a/cmd/checker/config.go b/cmd/checker/config.go index 6cac4b8a7..1e5d6b558 100644 --- a/cmd/checker/config.go +++ b/cmd/checker/config.go @@ -92,9 +92,11 @@ func getDefault() config { Pprof: cmd.ProfilerConfig{Enabled: false}, }, Remote: cmd.RemoteConfig{ - CheckInterval: "60s", - Timeout: "60s", - MetricsTTL: "7d", + CheckInterval: "60s", + Timeout: "60s", + MetricsTTL: "168h", + HealthCheckTimeout: "60s", + HealthCheckRetrySeconds: "1 1", }, } } diff --git a/cmd/config.go b/cmd/config.go index 02f7df4e2..84a303ecc 100644 --- a/cmd/config.go +++ b/cmd/config.go @@ -3,7 +3,9 @@ package cmd import ( "fmt" "io/ioutil" + "strconv" "strings" + "time" "github.com/moira-alert/moira/metrics" @@ -113,6 +115,12 @@ type RemoteConfig struct { MetricsTTL string `yaml:"metrics_ttl"` // Timeout for remote requests Timeout string `yaml:"timeout"` + // Retry seconds for remote requests divided by spaces + RetrySeconds string `yaml:"retry_seconds"` + // HealthCheckTimeout is timeout for remote api health check requests + HealthCheckTimeout string `yaml:"health_check_timeout"` + // Retry seconds for remote api health check requests divided by spaces + HealthCheckRetrySeconds string `yaml:"health_check_retry_seconds"` // Username for basic auth User string `yaml:"user"` // Password for basic auth @@ -129,16 +137,34 @@ type ImageStoreConfig struct { // GetRemoteSourceSettings returns remote config parsed from moira config files func (config *RemoteConfig) GetRemoteSourceSettings() *remoteSource.Config { return &remoteSource.Config{ - URL: config.URL, - CheckInterval: to.Duration(config.CheckInterval), - MetricsTTL: to.Duration(config.MetricsTTL), - Timeout: to.Duration(config.Timeout), - User: config.User, - Password: config.Password, - Enabled: config.Enabled, + URL: config.URL, + CheckInterval: to.Duration(config.CheckInterval), + MetricsTTL: to.Duration(config.MetricsTTL), + Timeout: to.Duration(config.Timeout), + RetrySeconds: ParseRetrySeconds(config.RetrySeconds), + HealthCheckTimeout: to.Duration(config.HealthCheckTimeout), + HealthCheckRetrySeconds: ParseRetrySeconds(config.HealthCheckRetrySeconds), + User: config.User, + Password: config.Password, + Enabled: config.Enabled, } } +// ParseRetrySeconds parses config value string into array of integers +func ParseRetrySeconds(retrySecondsString string) []time.Duration { + secondsStringList := strings.Fields(retrySecondsString) + retrySecondsIntList := make([]time.Duration, len(secondsStringList)) + + for index, secondsString := range secondsStringList { + secondsInt, err := strconv.Atoi(secondsString) + if err != nil { + panic(err) + } + retrySecondsIntList[index] = time.Second * time.Duration(secondsInt) + } + return retrySecondsIntList +} + // ReadConfig parses config file by the given path into Moira-used type func ReadConfig(configFileName string, config interface{}) error { configYaml, err := ioutil.ReadFile(configFileName) diff --git a/interfaces.go b/interfaces.go index e61a3d480..9e078cdfa 100644 --- a/interfaces.go +++ b/interfaces.go @@ -227,4 +227,5 @@ type PlotTheme interface { // Clock is an interface to work with Time. type Clock interface { Now() time.Time + Sleep(duration time.Duration) } diff --git a/local/checker.yml b/local/checker.yml index c704183ad..1a09c3562 100644 --- a/local/checker.yml +++ b/local/checker.yml @@ -16,8 +16,11 @@ remote: enabled: true url: "http://graphite:80/render" check_interval: 60s + metrics_ttl: 168h timeout: 60s - metrics_ttl: 7d + retry_seconds: 1 1 1 + health_check_timeout: 6s + health_check_retry_seconds: 1 1 1 checker: nodata_check_interval: 60s check_interval: 10s diff --git a/metric_source/remote/config.go b/metric_source/remote/config.go index 63278e13d..115ef2787 100644 --- a/metric_source/remote/config.go +++ b/metric_source/remote/config.go @@ -4,13 +4,16 @@ import "time" // Config represents config from remote storage type Config struct { - URL string - CheckInterval time.Duration - MetricsTTL time.Duration - Timeout time.Duration - User string - Password string - Enabled bool + URL string + CheckInterval time.Duration + MetricsTTL time.Duration + Timeout time.Duration + RetrySeconds []time.Duration + HealthCheckTimeout time.Duration + HealthCheckRetrySeconds []time.Duration + User string + Password string + Enabled bool } // isEnabled checks that remote config is enabled (url is defined and enabled flag is set) diff --git a/metric_source/remote/remote.go b/metric_source/remote/remote.go index 6be143ecf..ea93385a5 100644 --- a/metric_source/remote/remote.go +++ b/metric_source/remote/remote.go @@ -5,6 +5,8 @@ import ( "net/http" "time" + "github.com/moira-alert/moira/clock" + "github.com/moira-alert/moira" metricSource "github.com/moira-alert/moira/metric_source" ) @@ -23,10 +25,22 @@ func (err ErrRemoteTriggerResponse) Error() string { return err.InternalError.Error() } +// ErrRemoteUnavailable is a custom error when remote trigger check fails +type ErrRemoteUnavailable struct { + InternalError error + Target string +} + +// Error is a representation of Error interface method +func (err ErrRemoteUnavailable) Error() string { + return err.InternalError.Error() +} + // Remote is implementation of MetricSource interface, which implements fetch metrics method from remote graphite installation type Remote struct { config *Config client *http.Client + clock moira.Clock } // Create configures remote metric source @@ -34,6 +48,7 @@ func Create(config *Config) metricSource.MetricSource { return &Remote{ config: config, client: &http.Client{Timeout: config.Timeout}, + clock: clock.NewSystemClock(), } } @@ -50,9 +65,15 @@ func (remote *Remote) Fetch(target string, from, until int64, allowRealTimeAlert Target: target, } } - body, err := remote.makeRequest(req) + body, isRemoteAvailable, err := remote.makeRequestWithRetries(req, remote.config.Timeout, remote.config.RetrySeconds) if err != nil { - return nil, ErrRemoteTriggerResponse{ + if isRemoteAvailable { + return nil, ErrRemoteTriggerResponse{ + InternalError: err, + Target: target, + } + } + return nil, ErrRemoteUnavailable{ InternalError: err, Target: target, } @@ -83,18 +104,14 @@ func (remote *Remote) IsConfigured() (bool, error) { // IsRemoteAvailable checks if graphite API is available and returns 200 response func (remote *Remote) IsRemoteAvailable() (bool, error) { - maxRetries := 3 until := time.Now().Unix() from := until - 600 //nolint req, err := remote.prepareRequest(from, until, "NonExistingTarget") if err != nil { return false, err } - for attempt := 0; attempt < maxRetries; attempt++ { - _, err = remote.makeRequest(req) - if err == nil { - return true, nil - } - } - return false, err + _, isRemoteAvailable, err := remote.makeRequestWithRetries( + req, remote.config.HealthCheckTimeout, remote.config.HealthCheckRetrySeconds, + ) + return isRemoteAvailable, err } diff --git a/metric_source/remote/remote_test.go b/metric_source/remote/remote_test.go index 4828ef4ac..8a540bc6c 100644 --- a/metric_source/remote/remote_test.go +++ b/metric_source/remote/remote_test.go @@ -4,6 +4,11 @@ import ( "fmt" "net/http" "testing" + "time" + + "github.com/golang/mock/gomock" + + mock_clock "github.com/moira-alert/moira/mock/clock" metricSource "github.com/moira-alert/moira/metric_source" . "github.com/smartystreets/goconvey/convey" @@ -26,20 +31,79 @@ func TestIsConfigured(t *testing.T) { } func TestIsRemoteAvailable(t *testing.T) { - Convey("Is available", t, func() { - server := createServer([]byte("Some string"), http.StatusOK) - remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} - isAvailable, err := remote.IsRemoteAvailable() - So(isAvailable, ShouldBeTrue) - So(err, ShouldBeEmpty) + mockCtrl := gomock.NewController(t) + systemClock := mock_clock.NewMockClock(mockCtrl) + systemClock.EXPECT().Sleep(time.Second).Times(0) + testConfigs := []Config{ + {}, + {HealthCheckRetrySeconds: []time.Duration{time.Second}}, + {HealthCheckRetrySeconds: []time.Duration{time.Second}}, + {HealthCheckRetrySeconds: []time.Duration{time.Second, time.Second}}, + {HealthCheckRetrySeconds: []time.Duration{time.Second, time.Second}}, + {HealthCheckRetrySeconds: []time.Duration{time.Second, time.Second, time.Second}}, + } + body := []byte("Some string") + + Convey("Given server returns OK response the remote is available", t, func() { + server := createServer(body, http.StatusOK) + for _, config := range testConfigs { + config.URL = server.URL + remote := Remote{client: server.Client(), config: &config} + isAvailable, err := remote.IsRemoteAvailable() + So(isAvailable, ShouldBeTrue) + So(err, ShouldBeEmpty) + } }) - Convey("Not available", t, func() { - server := createServer([]byte("Some string"), http.StatusInternalServerError) - remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} - isAvailable, err := remote.IsRemoteAvailable() - So(isAvailable, ShouldBeFalse) - So(err, ShouldResemble, fmt.Errorf("bad response status %d: %s", http.StatusInternalServerError, "Some string")) + Convey("Given server returns Remote Unavailable responses permanently", t, func() { + for _, statusCode := range remoteUnavailableStatusCodes { + server := createTestServer(TestResponse{body, statusCode}) + + Convey(fmt.Sprintf( + "request failed with %d response status code and remote is unavailable", statusCode, + ), func() { + remote := Remote{client: server.Client()} + for _, config := range testConfigs { + config.URL = server.URL + remote.config = &config + systemClock := mock_clock.NewMockClock(mockCtrl) + systemClock.EXPECT().Sleep(time.Second).Times(len(config.HealthCheckRetrySeconds)) + remote.clock = systemClock + + isAvailable, err := remote.IsRemoteAvailable() + So(err, ShouldResemble, fmt.Errorf( + "the remote server is not available. Response status %d: %s", statusCode, string(body), + )) + So(isAvailable, ShouldBeFalse) + } + }) + } + }) + + Convey("Given server returns Remote Unavailable response temporary", t, func() { + for _, statusCode := range remoteUnavailableStatusCodes { + Convey(fmt.Sprintf( + "the remote is available with retry after %d response", statusCode, + ), func() { + for _, config := range testConfigs { + if len(config.HealthCheckRetrySeconds) == 0 { + continue + } + server := createTestServer( + TestResponse{body, statusCode}, + TestResponse{body, http.StatusOK}, + ) + config.URL = server.URL + systemClock := mock_clock.NewMockClock(mockCtrl) + systemClock.EXPECT().Sleep(time.Second).Times(1) + remote := Remote{client: server.Client(), config: &config, clock: systemClock} + + isAvailable, err := remote.IsRemoteAvailable() + So(err, ShouldBeNil) + So(isAvailable, ShouldBeTrue) + } + }) + } }) } @@ -47,6 +111,16 @@ func TestFetch(t *testing.T) { var from int64 = 300 var until int64 = 500 target := "foo.bar" //nolint + testConfigs := []Config{ + {}, + {RetrySeconds: []time.Duration{time.Second}}, + {RetrySeconds: []time.Duration{time.Second}}, + {RetrySeconds: []time.Duration{time.Second, time.Second}}, + {RetrySeconds: []time.Duration{time.Second, time.Second}}, + {RetrySeconds: []time.Duration{time.Second, time.Second, time.Second}}, + } + mockCtrl := gomock.NewController(t) + validBody := []byte("[{\"Target\": \"t1\",\"DataPoints\":[[1,2],[3,4]]}]") Convey("Request success but body is invalid", t, func() { server := createServer([]byte("[]"), http.StatusOK) @@ -62,21 +136,90 @@ func TestFetch(t *testing.T) { result, err := remote.Fetch(target, from, until, false) So(result, ShouldBeEmpty) So(err.Error(), ShouldResemble, "invalid character 'S' looking for beginning of value") + _, ok := err.(ErrRemoteTriggerResponse) + So(ok, ShouldBeTrue) }) Convey("Fail request with InternalServerError", t, func() { server := createServer([]byte("Some string"), http.StatusInternalServerError) - remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} - result, err := remote.Fetch(target, from, until, false) - So(result, ShouldBeEmpty) - So(err.Error(), ShouldResemble, fmt.Sprintf("bad response status %d: %s", http.StatusInternalServerError, "Some string")) + remote := Remote{client: server.Client()} + for _, config := range testConfigs { + config.URL = server.URL + remote.config = &config + result, err := remote.Fetch(target, from, until, false) + So(result, ShouldBeEmpty) + So(err.Error(), ShouldResemble, fmt.Sprintf("bad response status %d: %s", http.StatusInternalServerError, "Some string")) + _, ok := err.(ErrRemoteTriggerResponse) + So(ok, ShouldBeTrue) + } }) - Convey("Fail make request", t, func() { + Convey("Client calls bad url", t, func() { url := "💩%$&TR" - remote := Remote{config: &Config{URL: url}} - result, err := remote.Fetch(target, from, until, false) - So(result, ShouldBeEmpty) - So(err.Error(), ShouldResemble, "parse \"💩%$&TR\": invalid URL escape \"%$&\"") + for _, config := range testConfigs { + config.URL = url + remote := Remote{config: &config} + result, err := remote.Fetch(target, from, until, false) + So(result, ShouldBeEmpty) + So(err.Error(), ShouldResemble, "parse \"💩%$&TR\": invalid URL escape \"%$&\"") + _, ok := err.(ErrRemoteTriggerResponse) + So(ok, ShouldBeTrue) + } + }) + + Convey("Given server returns Remote Unavailable responses permanently", t, func() { + for _, statusCode := range remoteUnavailableStatusCodes { + server := createTestServer(TestResponse{validBody, statusCode}) + + Convey(fmt.Sprintf( + "request failed with %d response status code and remote is unavailable", statusCode, + ), func() { + remote := Remote{client: server.Client()} + for _, config := range testConfigs { + config.URL = server.URL + remote.config = &config + systemClock := mock_clock.NewMockClock(mockCtrl) + systemClock.EXPECT().Sleep(time.Second).Times(len(config.RetrySeconds)) + remote.clock = systemClock + + result, err := remote.Fetch(target, from, until, false) + So(err, ShouldResemble, ErrRemoteUnavailable{ + InternalError: fmt.Errorf( + "the remote server is not available. Response status %d: %s", statusCode, string(validBody), + ), Target: target, + }) + So(result, ShouldBeNil) + } + }) + } + }) + + Convey("Given server returns Remote Unavailable response temporary", t, func() { + for _, statusCode := range remoteUnavailableStatusCodes { + Convey(fmt.Sprintf( + "the remote is available with retry after %d response", statusCode, + ), func() { + for _, config := range testConfigs { + if len(config.RetrySeconds) == 0 { + continue + } + server := createTestServer( + TestResponse{validBody, statusCode}, + TestResponse{validBody, http.StatusOK}, + ) + config.URL = server.URL + systemClock := mock_clock.NewMockClock(mockCtrl) + systemClock.EXPECT().Sleep(time.Second).Times(1) + remote := Remote{client: server.Client(), config: &config, clock: systemClock} + + result, err := remote.Fetch(target, from, until, false) + So(err, ShouldBeNil) + So(result, ShouldNotBeNil) + metricsData := result.GetMetricsData() + So(len(metricsData), ShouldEqual, 1) + So(metricsData[0].Name, ShouldEqual, "t1") + } + }) + } }) } diff --git a/metric_source/remote/request.go b/metric_source/remote/request.go index c6eb72e56..3c22cc000 100644 --- a/metric_source/remote/request.go +++ b/metric_source/remote/request.go @@ -1,10 +1,12 @@ package remote import ( + "context" "fmt" "io/ioutil" "net/http" "strconv" + "time" ) func (remote *Remote) prepareRequest(from, until int64, target string) (*http.Request, error) { @@ -24,27 +26,73 @@ func (remote *Remote) prepareRequest(from, until int64, target string) (*http.Re return req, nil } -func (remote *Remote) makeRequest(req *http.Request) ([]byte, error) { - var body []byte - +func (remote *Remote) makeRequest(req *http.Request) (body []byte, isRemoteAvailable bool, err error) { resp, err := remote.client.Do(req) if resp != nil { defer resp.Body.Close() } if err != nil { - return body, fmt.Errorf("The remote server is not available or the response was reset by timeout. " + //nolint - "TTL: %s, PATH: %s, ERROR: %v ", remote.client.Timeout.String(), req.URL.RawPath, err) + return body, false, fmt.Errorf( + "the remote server is not available or the response was reset by timeout. Url: %s, Error: %v ", + req.URL.String(), + err, + ) } body, err = ioutil.ReadAll(resp.Body) if err != nil { - return body, err + return body, false, err + } + + if isRemoteUnavailableStatusCode(resp.StatusCode) { + return body, false, fmt.Errorf( + "the remote server is not available. Response status %d: %s", resp.StatusCode, string(body), + ) + } else if resp.StatusCode != http.StatusOK { + return body, true, fmt.Errorf("bad response status %d: %s", resp.StatusCode, string(body)) + } + + return body, true, nil +} + +func isRemoteUnavailableStatusCode(statusCode int) bool { + switch statusCode { + case http.StatusUnauthorized, + http.StatusBadGateway, + http.StatusServiceUnavailable, + http.StatusGatewayTimeout: + return true + default: + return false } +} - if resp.StatusCode != 200 { //nolint - return body, fmt.Errorf("bad response status %d: %s", resp.StatusCode, string(body)) +func (remote *Remote) makeRequestWithRetries( + req *http.Request, + requestTimeout time.Duration, + retrySeconds []time.Duration, +) (body []byte, isRemoteAvailable bool, err error) { + for attemptIndex := 0; attemptIndex < len(retrySeconds)+1; attemptIndex++ { + body, isRemoteAvailable, err = remote.makeRequestWithTimeout(req, requestTimeout) + if err == nil || isRemoteAvailable { + return body, true, err + } + if attemptIndex < len(retrySeconds) { + remote.clock.Sleep(retrySeconds[attemptIndex]) + } } + return nil, false, err +} - return body, nil +func (remote *Remote) makeRequestWithTimeout( + req *http.Request, + requestTimeout time.Duration, +) (body []byte, isRemoteAvailable bool, err error) { + if requestTimeout > 0 { + ctx, cancel := context.WithTimeout(context.Background(), requestTimeout) + defer cancel() + req = req.WithContext(ctx) + } + return remote.makeRequest(req) } diff --git a/metric_source/remote/request_test.go b/metric_source/remote/request_test.go index 340a8cef9..ecd9e057f 100644 --- a/metric_source/remote/request_test.go +++ b/metric_source/remote/request_test.go @@ -5,6 +5,10 @@ import ( "net/http" "net/http/httptest" "testing" + "time" + + "github.com/golang/mock/gomock" + mock_clock "github.com/moira-alert/moira/mock/clock" . "github.com/smartystreets/goconvey/convey" ) @@ -54,8 +58,9 @@ func TestMakeRequest(t *testing.T) { server := createServer(body, http.StatusOK) remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} request, _ := remote.prepareRequest(from, until, target) - actual, err := remote.makeRequest(request) + actual, isRemoteAvailable, err := remote.makeRequest(request) So(err, ShouldBeNil) + So(isRemoteAvailable, ShouldBeTrue) So(actual, ShouldResemble, body) }) @@ -63,19 +68,189 @@ func TestMakeRequest(t *testing.T) { server := createServer(body, http.StatusInternalServerError) remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} request, _ := remote.prepareRequest(from, until, target) - actual, err := remote.makeRequest(request) + actual, isRemoteAvailable, err := remote.makeRequest(request) So(err, ShouldResemble, fmt.Errorf("bad response status %d: %s", http.StatusInternalServerError, string(body))) + So(isRemoteAvailable, ShouldBeTrue) So(actual, ShouldResemble, body) }) Convey("Client calls bad url", t, func() { server := createServer(body, http.StatusOK) - remote := Remote{client: server.Client(), config: &Config{URL: "http://bad/"}} + client := server.Client() + client.Timeout = time.Millisecond + remote := Remote{client: client, config: &Config{URL: "http://bad/"}} request, _ := remote.prepareRequest(from, until, target) - actual, err := remote.makeRequest(request) + actual, isRemoteAvailable, err := remote.makeRequest(request) So(err, ShouldNotBeEmpty) + So(isRemoteAvailable, ShouldBeFalse) So(actual, ShouldBeEmpty) }) + + Convey("Client returns status Remote Unavailable status codes", t, func() { + for _, statusCode := range remoteUnavailableStatusCodes { + server := createServer(body, statusCode) + remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} + request, _ := remote.prepareRequest(from, until, target) + actual, isRemoteAvailable, err := remote.makeRequest(request) + So(err, ShouldResemble, fmt.Errorf( + "the remote server is not available. Response status %d: %s", statusCode, string(body), + )) + So(isRemoteAvailable, ShouldBeFalse) + So(actual, ShouldResemble, body) + } + }) +} + +func TestMakeRequestWithRetries(t *testing.T) { + var from int64 = 300 + var until int64 = 500 + target := "foo.bar" + body := []byte("Some string") + testConfigs := []Config{ + {}, + {RetrySeconds: []time.Duration{time.Second}}, + {RetrySeconds: []time.Duration{time.Second}}, + {RetrySeconds: []time.Duration{time.Second, time.Second}}, + {RetrySeconds: []time.Duration{time.Second, time.Second}}, + {RetrySeconds: []time.Duration{time.Second, time.Second, time.Second}}, + } + mockCtrl := gomock.NewController(t) + + Convey("Given server returns OK response", t, func() { + server := createTestServer(TestResponse{body, http.StatusOK}) + systemClock := mock_clock.NewMockClock(mockCtrl) + systemClock.EXPECT().Sleep(time.Second).Times(0) + + Convey("request is successful", func() { + remote := Remote{client: server.Client(), clock: systemClock} + + for _, config := range testConfigs { + config.URL = server.URL + remote.config = &config + request, _ := remote.prepareRequest(from, until, target) + actual, isRemoteAvailable, err := remote.makeRequestWithRetries( + request, + remote.config.Timeout, + remote.config.RetrySeconds, + ) + So(err, ShouldBeNil) + So(isRemoteAvailable, ShouldBeTrue) + So(actual, ShouldResemble, body) + } + }) + }) + + Convey("Given server returns 500 response", t, func() { + server := createTestServer(TestResponse{body, http.StatusInternalServerError}) + systemClock := mock_clock.NewMockClock(mockCtrl) + systemClock.EXPECT().Sleep(time.Second).Times(0) + + Convey("request failed with 500 response and remote is available", func() { + remote := Remote{client: server.Client(), clock: systemClock} + + for _, config := range testConfigs { + config.URL = server.URL + remote.config = &config + request, _ := remote.prepareRequest(from, until, target) + actual, isRemoteAvailable, err := remote.makeRequestWithRetries( + request, + remote.config.Timeout, + remote.config.RetrySeconds, + ) + So(err, ShouldResemble, fmt.Errorf("bad response status %d: %s", http.StatusInternalServerError, string(body))) + So(isRemoteAvailable, ShouldBeTrue) + So(actual, ShouldResemble, body) + } + }) + }) + + Convey("Given client calls bad url", t, func() { + server := createTestServer(TestResponse{body, http.StatusOK}) + + Convey("request failed and remote is unavailable", func() { + remote := Remote{client: server.Client()} + for _, config := range testConfigs { + config.URL = "http://bad/" + remote.config = &config + systemClock := mock_clock.NewMockClock(mockCtrl) + systemClock.EXPECT().Sleep(time.Second).Times(len(config.RetrySeconds)) + remote.clock = systemClock + + request, _ := remote.prepareRequest(from, until, target) + actual, isRemoteAvailable, err := remote.makeRequestWithRetries( + request, + time.Millisecond, + remote.config.RetrySeconds, + ) + So(err, ShouldNotBeEmpty) + So(isRemoteAvailable, ShouldBeFalse) + So(actual, ShouldBeEmpty) + } + }) + }) + + Convey("Given server returns Remote Unavailable responses permanently", t, func() { + for _, statusCode := range remoteUnavailableStatusCodes { + server := createTestServer(TestResponse{body, statusCode}) + + Convey(fmt.Sprintf( + "request failed with %d response status code and remote is unavailable", statusCode, + ), func() { + remote := Remote{client: server.Client()} + for _, config := range testConfigs { + config.URL = server.URL + remote.config = &config + systemClock := mock_clock.NewMockClock(mockCtrl) + systemClock.EXPECT().Sleep(time.Second).Times(len(config.RetrySeconds)) + remote.clock = systemClock + + request, _ := remote.prepareRequest(from, until, target) + actual, isRemoteAvailable, err := remote.makeRequestWithRetries( + request, + remote.config.Timeout, + remote.config.RetrySeconds, + ) + So(err, ShouldResemble, fmt.Errorf( + "the remote server is not available. Response status %d: %s", statusCode, string(body), + )) + So(isRemoteAvailable, ShouldBeFalse) + So(actual, ShouldBeNil) + } + }) + } + }) + + Convey("Given server returns Remote Unavailable response temporary", t, func() { + for _, statusCode := range remoteUnavailableStatusCodes { + Convey(fmt.Sprintf( + "request is successful with retry after %d response and remote is available", statusCode, + ), func() { + for _, config := range testConfigs { + if len(config.RetrySeconds) == 0 { + continue + } + server := createTestServer( + TestResponse{body, statusCode}, + TestResponse{body, http.StatusOK}, + ) + config.URL = server.URL + systemClock := mock_clock.NewMockClock(mockCtrl) + systemClock.EXPECT().Sleep(time.Second).Times(1) + remote := Remote{client: server.Client(), config: &config, clock: systemClock} + + request, _ := remote.prepareRequest(from, until, target) + actual, isRemoteAvailable, err := remote.makeRequestWithRetries( + request, + remote.config.Timeout, + remote.config.RetrySeconds, + ) + So(err, ShouldBeNil) + So(isRemoteAvailable, ShouldBeTrue) + So(actual, ShouldResemble, body) + } + }) + } + }) } func createServer(body []byte, statusCode int) *httptest.Server { @@ -84,3 +259,42 @@ func createServer(body []byte, statusCode int) *httptest.Server { rw.Write(body) //nolint })) } + +func createTestServer(testResponses ...TestResponse) *httptest.Server { + responseWriter := NewTestResponseWriter(testResponses) + return httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { + response := responseWriter.GetResponse() + rw.WriteHeader(response.statusCode) + rw.Write(response.body) //nolint + })) +} + +type TestResponse struct { + body []byte + statusCode int +} + +type TestResponseWriter struct { + responses []TestResponse + count int +} + +func NewTestResponseWriter(testResponses []TestResponse) *TestResponseWriter { + responseWriter := new(TestResponseWriter) + responseWriter.responses = testResponses + responseWriter.count = 0 + return responseWriter +} + +func (responseWriter *TestResponseWriter) GetResponse() TestResponse { + response := responseWriter.responses[responseWriter.count%len(responseWriter.responses)] + responseWriter.count++ + return response +} + +var remoteUnavailableStatusCodes = []int{ + http.StatusUnauthorized, + http.StatusBadGateway, + http.StatusServiceUnavailable, + http.StatusGatewayTimeout, +} diff --git a/mock/clock/clock.go b/mock/clock/clock.go index fbfb56afb..6b55a6f3a 100644 --- a/mock/clock/clock.go +++ b/mock/clock/clock.go @@ -47,3 +47,15 @@ func (mr *MockClockMockRecorder) Now() *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Now", reflect.TypeOf((*MockClock)(nil).Now)) } + +// Sleep mocks base method. +func (m *MockClock) Sleep(arg0 time.Duration) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "Sleep", arg0) +} + +// Sleep indicates an expected call of Sleep. +func (mr *MockClockMockRecorder) Sleep(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Sleep", reflect.TypeOf((*MockClock)(nil).Sleep), arg0) +} From cb081424d79e65f96500363b3d078b2ee2aee4ef Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Wed, 18 Sep 2024 14:34:56 +0700 Subject: [PATCH 02/28] refactor: local checker config --- local/checker.yml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/local/checker.yml b/local/checker.yml index 7655df519..ab764d816 100644 --- a/local/checker.yml +++ b/local/checker.yml @@ -14,18 +14,16 @@ telemetry: listen: ":8092" local: check_interval: 60s - metrics_ttl: 168h - timeout: 60s - retry_seconds: 1 1 1 - health_check_timeout: 6s - health_check_retry_seconds: 1 1 1 graphite_remote: - cluster_id: default cluster_name: Graphite 1 url: "http://graphite:80/render" check_interval: 60s - timeout: 60s metrics_ttl: 168h + timeout: 60s + retry_seconds: 1 1 1 + health_check_timeout: 6s + health_check_retry_seconds: 1 1 1 prometheus_remote: - cluster_id: default cluster_name: Prometheus 1 From c0890f71f672a09df235656d15ee0bf61827c4f4 Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Wed, 18 Sep 2024 14:57:42 +0700 Subject: [PATCH 03/28] refactor: config structs, remove unused functions --- cmd/checker/config.go | 7 ----- cmd/config.go | 41 ++++++++++------------------- metric_source/remote/config.go | 10 ++----- metric_source/remote/remote.go | 16 +++-------- metric_source/remote/remote_test.go | 6 ++--- metric_source/remote/request.go | 3 +++ 6 files changed, 26 insertions(+), 57 deletions(-) diff --git a/cmd/checker/config.go b/cmd/checker/config.go index f749da8ae..5d0cf97ba 100644 --- a/cmd/checker/config.go +++ b/cmd/checker/config.go @@ -154,13 +154,6 @@ func getDefault() config { }, Pprof: cmd.ProfilerConfig{Enabled: false}, }, - //Remote: cmd.RemoteConfig{ - // CheckInterval: "60s", - // Timeout: "60s", - // MetricsTTL: "168h", - // HealthCheckTimeout: "60s", - // HealthCheckRetrySeconds: "1 1", - //}, Local: localCheckConfig{ CheckInterval: "60s", }, diff --git a/cmd/config.go b/cmd/config.go index 4d6badba4..9668c32e8 100644 --- a/cmd/config.go +++ b/cmd/config.go @@ -235,38 +235,22 @@ type GraphiteRemoteConfig struct { RemoteCommonConfig `yaml:",inline"` // Timeout for remote requests Timeout string `yaml:"timeout"` + // Username for basic auth + User string `yaml:"user"` + // Password for basic auth + Password string `yaml:"password"` // Retry seconds for remote requests divided by spaces RetrySeconds string `yaml:"retry_seconds"` // HealthCheckTimeout is timeout for remote api health check requests HealthCheckTimeout string `yaml:"health_check_timeout"` - // Retry seconds for remote api health check requests divided by spaces + // Retry seconds for remote api health check requests divided by spaces HealthCheckRetrySeconds string `yaml:"health_check_retry_seconds"` - // Username for basic auth - User string `yaml:"user"` - // Password for basic auth - Password string `yaml:"password"` } func (config GraphiteRemoteConfig) getRemoteCommon() *RemoteCommonConfig { return &config.RemoteCommonConfig } -//// GetRemoteSourceSettings returns remote config parsed from moira config files -//func (config *RemoteConfig) GetRemoteSourceSettings() *remoteSource.Config { -// return &remoteSource.Config{ -// URL: config.URL, -// CheckInterval: to.Duration(config.CheckInterval), -// MetricsTTL: to.Duration(config.MetricsTTL), -// Timeout: to.Duration(config.Timeout), -// RetrySeconds: ParseRetrySeconds(config.RetrySeconds), -// HealthCheckTimeout: to.Duration(config.HealthCheckTimeout), -// HealthCheckRetrySeconds: ParseRetrySeconds(config.HealthCheckRetrySeconds), -// User: config.User, -// Password: config.Password, -// Enabled: config.Enabled, -// } -//} - // ParseRetrySeconds parses config value string into array of integers. func ParseRetrySeconds(retrySecondsString string) []time.Duration { secondsStringList := strings.Fields(retrySecondsString) @@ -285,12 +269,15 @@ func ParseRetrySeconds(retrySecondsString string) []time.Duration { // GetRemoteSourceSettings returns remote config parsed from moira config files. func (config *GraphiteRemoteConfig) GetRemoteSourceSettings() *graphiteRemoteSource.Config { return &graphiteRemoteSource.Config{ - URL: config.URL, - CheckInterval: to.Duration(config.CheckInterval), - MetricsTTL: to.Duration(config.MetricsTTL), - Timeout: to.Duration(config.Timeout), - User: config.User, - Password: config.Password, + URL: config.URL, + CheckInterval: to.Duration(config.CheckInterval), + MetricsTTL: to.Duration(config.MetricsTTL), + Timeout: to.Duration(config.Timeout), + User: config.User, + Password: config.Password, + RetrySeconds: ParseRetrySeconds(config.RetrySeconds), + HealthCheckTimeout: to.Duration(config.HealthCheckTimeout), + HealthCheckRetrySeconds: ParseRetrySeconds(config.HealthCheckRetrySeconds), } } diff --git a/metric_source/remote/config.go b/metric_source/remote/config.go index 0a7234098..6f28bf2db 100644 --- a/metric_source/remote/config.go +++ b/metric_source/remote/config.go @@ -8,15 +8,9 @@ type Config struct { CheckInterval time.Duration MetricsTTL time.Duration Timeout time.Duration + User string + Password string RetrySeconds []time.Duration HealthCheckTimeout time.Duration HealthCheckRetrySeconds []time.Duration - User string - Password string - Enabled bool -} - -// isEnabled checks that remote config is enabled (url is defined and enabled flag is set). -func (c *Config) isEnabled() bool { - return c.Enabled && c.URL != "" } diff --git a/metric_source/remote/remote.go b/metric_source/remote/remote.go index 2204f5ee4..a89f8da24 100644 --- a/metric_source/remote/remote.go +++ b/metric_source/remote/remote.go @@ -97,26 +97,18 @@ func (remote *Remote) GetMetricsTTLSeconds() int64 { return int64(remote.config.MetricsTTL.Seconds()) } -// IsConfigured returns false in cases that user does not properly configure remote settings like graphite URL. -func (remote *Remote) IsConfigured() (bool, error) { - return true, nil -} - -// IsRemoteAvailable checks if graphite API is available and returns 200 response. -func (remote *Remote) IsRemoteAvailable() (bool, error) { - return remote.IsAvailable() -} - // IsAvailable checks if graphite API is available and returns 200 response. func (remote *Remote) IsAvailable() (bool, error) { until := time.Now().Unix() from := until - 600 //nolint + req, err := remote.prepareRequest(from, until, "NonExistingTarget") if err != nil { return false, err } + _, isRemoteAvailable, err := remote.makeRequestWithRetries( - req, remote.config.HealthCheckTimeout, remote.config.HealthCheckRetrySeconds, - ) + req, remote.config.HealthCheckTimeout, remote.config.HealthCheckRetrySeconds) + return isRemoteAvailable, err } diff --git a/metric_source/remote/remote_test.go b/metric_source/remote/remote_test.go index 683e46df8..b69ea68c9 100644 --- a/metric_source/remote/remote_test.go +++ b/metric_source/remote/remote_test.go @@ -33,7 +33,7 @@ func TestIsRemoteAvailable(t *testing.T) { for _, config := range testConfigs { config.URL = server.URL remote := Remote{client: server.Client(), config: config} - isAvailable, err := remote.IsRemoteAvailable() + isAvailable, err := remote.IsAvailable() So(isAvailable, ShouldBeTrue) So(err, ShouldBeEmpty) } @@ -54,7 +54,7 @@ func TestIsRemoteAvailable(t *testing.T) { systemClock.EXPECT().Sleep(time.Second).Times(len(config.HealthCheckRetrySeconds)) remote.clock = systemClock - isAvailable, err := remote.IsRemoteAvailable() + isAvailable, err := remote.IsAvailable() So(err, ShouldResemble, fmt.Errorf( "the remote server is not available. Response status %d: %s", statusCode, string(body), )) @@ -82,7 +82,7 @@ func TestIsRemoteAvailable(t *testing.T) { systemClock.EXPECT().Sleep(time.Second).Times(1) remote := Remote{client: server.Client(), config: config, clock: systemClock} - isAvailable, err := remote.IsRemoteAvailable() + isAvailable, err := remote.IsAvailable() So(err, ShouldBeNil) So(isAvailable, ShouldBeTrue) } diff --git a/metric_source/remote/request.go b/metric_source/remote/request.go index 75a0028b3..db4b83c6c 100644 --- a/metric_source/remote/request.go +++ b/metric_source/remote/request.go @@ -101,9 +101,11 @@ func (remote *Remote) makeRequestWithRetries( ) (body []byte, isRemoteAvailable bool, err error) { for attemptIndex := 0; attemptIndex < len(retrySeconds)+1; attemptIndex++ { body, isRemoteAvailable, err = remote.makeRequestWithTimeout(req, requestTimeout) + if err == nil || isRemoteAvailable { return body, true, err } + if attemptIndex < len(retrySeconds) { remote.clock.Sleep(retrySeconds[attemptIndex]) } @@ -120,5 +122,6 @@ func (remote *Remote) makeRequestWithTimeout( defer cancel() req = req.WithContext(ctx) } + return remote.makeRequest(req) } From 8c52507561cb7933ed005ed1b7afe513324dc8bd Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Wed, 18 Sep 2024 15:02:25 +0700 Subject: [PATCH 04/28] refactor: remove comments, add new lines --- metric_source/remote/request.go | 33 +++++---------------------------- 1 file changed, 5 insertions(+), 28 deletions(-) diff --git a/metric_source/remote/request.go b/metric_source/remote/request.go index db4b83c6c..30cce80e3 100644 --- a/metric_source/remote/request.go +++ b/metric_source/remote/request.go @@ -14,44 +14,21 @@ func (remote *Remote) prepareRequest(from, until int64, target string) (*http.Re if err != nil { return nil, err } + q := req.URL.Query() q.Add("format", "json") q.Add("from", strconv.FormatInt(from, 10)) q.Add("target", target) q.Add("until", strconv.FormatInt(until, 10)) req.URL.RawQuery = q.Encode() + if remote.config.User != "" && remote.config.Password != "" { req.SetBasicAuth(remote.config.User, remote.config.Password) } + return req, nil } -// Old make request -//func (remote *Remote) makeRequest(req *http.Request) ([]byte, error) { -// var body []byte -// -// resp, err := remote.client.Do(req) -// if resp != nil { -// defer resp.Body.Close() -// } -// -// if err != nil { -// return body, fmt.Errorf("The remote server is not available or the response was reset by timeout. "+ //nolint -// "TTL: %s, PATH: %s, ERROR: %v ", remote.client.Timeout.String(), req.URL.RawPath, err) -// } -// -// body, err = io.ReadAll(resp.Body) -// if err != nil { -// return body, err -// } -// -// if resp.StatusCode != http.StatusOK { -// return body, fmt.Errorf("bad response status %d: %s", resp.StatusCode, string(body)) -// } -// -// return body, nil -//} - func (remote *Remote) makeRequest(req *http.Request) (body []byte, isRemoteAvailable bool, err error) { resp, err := remote.client.Do(req) if resp != nil { @@ -73,8 +50,8 @@ func (remote *Remote) makeRequest(req *http.Request) (body []byte, isRemoteAvail if isRemoteUnavailableStatusCode(resp.StatusCode) { return body, false, fmt.Errorf( - "the remote server is not available. Response status %d: %s", resp.StatusCode, string(body), - ) + "the remote server is not available. Response status %d: %s", + resp.StatusCode, string(body)) } else if resp.StatusCode != http.StatusOK { return body, true, fmt.Errorf("bad response status %d: %s", resp.StatusCode, string(body)) } From 722a211d9ec7eaf9b88dbdd4bb921cbb20929d33 Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Wed, 18 Sep 2024 17:18:48 +0700 Subject: [PATCH 05/28] refactor: remote graphite tests --- metric_source/remote/remote_test.go | 27 +++------------------------ 1 file changed, 3 insertions(+), 24 deletions(-) diff --git a/metric_source/remote/remote_test.go b/metric_source/remote/remote_test.go index b69ea68c9..31b8bae1a 100644 --- a/metric_source/remote/remote_test.go +++ b/metric_source/remote/remote_test.go @@ -89,24 +89,6 @@ func TestIsRemoteAvailable(t *testing.T) { }) } }) - - // tests from master - - Convey("Is available", t, func() { - server := createServer([]byte("Some string"), http.StatusOK) - remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} - isAvailable, err := remote.IsAvailable() - So(isAvailable, ShouldBeTrue) - So(err, ShouldBeEmpty) - }) - - Convey("Not available", t, func() { - server := createServer([]byte("Some string"), http.StatusInternalServerError) - remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} - isAvailable, err := remote.IsAvailable() - So(isAvailable, ShouldBeFalse) - So(err, ShouldResemble, fmt.Errorf("bad response status %d: %s", http.StatusInternalServerError, "Some string")) - }) } func TestFetch(t *testing.T) { @@ -138,8 +120,7 @@ func TestFetch(t *testing.T) { result, err := remote.Fetch(target, from, until, false) So(result, ShouldBeEmpty) So(err.Error(), ShouldResemble, "invalid character 'S' looking for beginning of value") - _, ok := err.(ErrRemoteTriggerResponse) - So(ok, ShouldBeTrue) + So(err, ShouldHaveSameTypeAs, ErrRemoteTriggerResponse{}) }) Convey("Fail request with InternalServerError", t, func() { @@ -151,8 +132,7 @@ func TestFetch(t *testing.T) { result, err := remote.Fetch(target, from, until, false) So(result, ShouldBeEmpty) So(err.Error(), ShouldResemble, fmt.Sprintf("bad response status %d: %s", http.StatusInternalServerError, "Some string")) - _, ok := err.(ErrRemoteTriggerResponse) - So(ok, ShouldBeTrue) + So(err, ShouldHaveSameTypeAs, ErrRemoteTriggerResponse{}) } }) @@ -164,8 +144,7 @@ func TestFetch(t *testing.T) { result, err := remote.Fetch(target, from, until, false) So(result, ShouldBeEmpty) So(err.Error(), ShouldResemble, "parse \"💩%$&TR\": invalid URL escape \"%$&\"") - _, ok := err.(ErrRemoteTriggerResponse) - So(ok, ShouldBeTrue) + So(err, ShouldHaveSameTypeAs, ErrRemoteTriggerResponse{}) } }) From 38cca14f0159d684c368ddd73573b382ba955cdc Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Wed, 18 Sep 2024 17:32:39 +0700 Subject: [PATCH 06/28] refactor: remove comment --- api/handler/triggers.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/api/handler/triggers.go b/api/handler/triggers.go index aee2ce6e7..cbbac99fd 100644 --- a/api/handler/triggers.go +++ b/api/handler/triggers.go @@ -163,8 +163,6 @@ func getTriggerFromRequest(request *http.Request) (*dto.Trigger, *api.ErrorRespo case remote.ErrRemoteTriggerResponse: response := api.ErrorRemoteServerUnavailable(err) - // middleware.GetLoggerEntry(request).Errorf("%s : %s : %s", response.StatusText, response.ErrorText, err) - middleware.GetLoggerEntry(request).Error(). String("status", response.StatusText). Error(err). From 85bc18c7892d93f9e3e96208e60015e29a709d38 Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Thu, 19 Sep 2024 15:21:41 +0700 Subject: [PATCH 07/28] refactor: api config --- local/api.yml | 5 ++++- script.sh | 5 ----- 2 files changed, 4 insertions(+), 6 deletions(-) delete mode 100755 script.sh diff --git a/local/api.yml b/local/api.yml index a48bddd83..1ab328198 100644 --- a/local/api.yml +++ b/local/api.yml @@ -17,8 +17,11 @@ graphite_remote: cluster_name: Graphite 1 url: "http://graphite:80/render" check_interval: 60s - timeout: 60s metrics_ttl: 168h + timeout: 60s + retry_seconds: 1 1 1 + health_check_timeout: 6s + health_check_retry_seconds: 1 1 1 prometheus_remote: - cluster_id: default cluster_name: Prometheus 1 diff --git a/script.sh b/script.sh deleted file mode 100755 index 25b9c5a81..000000000 --- a/script.sh +++ /dev/null @@ -1,5 +0,0 @@ -while [ true ]; do - echo "matsko.test.notifications.long.tags 10 `date +%s`" | nc localhost 2003 & - echo "Sent `date +%s`" - sleep 10 -done From f5c96c1c2b6ed645fdb59ef5190075f40ed1e6ac Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Tue, 1 Oct 2024 16:30:18 +0700 Subject: [PATCH 08/28] refactor: recognising isUnavailable code, refactor tests --- metric_source/remote/remote.go | 5 +++++ metric_source/remote/remote_test.go | 8 ++++---- metric_source/remote/request.go | 18 +++++++++--------- metric_source/remote/request_test.go | 13 +++---------- metric_source/remote/response.go | 6 ++++++ 5 files changed, 27 insertions(+), 23 deletions(-) diff --git a/metric_source/remote/remote.go b/metric_source/remote/remote.go index a89f8da24..bc2130113 100644 --- a/metric_source/remote/remote.go +++ b/metric_source/remote/remote.go @@ -48,6 +48,7 @@ func Create(config *Config) (metricSource.MetricSource, error) { if config.URL == "" { return nil, fmt.Errorf("remote graphite URL should not be empty") } + return &Remote{ config: config, client: &http.Client{Timeout: config.Timeout}, @@ -68,6 +69,7 @@ func (remote *Remote) Fetch(target string, from, until int64, allowRealTimeAlert Target: target, } } + body, isRemoteAvailable, err := remote.makeRequestWithRetries(req, remote.config.Timeout, remote.config.RetrySeconds) if err != nil { if isRemoteAvailable { @@ -76,11 +78,13 @@ func (remote *Remote) Fetch(target string, from, until int64, allowRealTimeAlert Target: target, } } + return nil, ErrRemoteUnavailable{ InternalError: err, Target: target, } } + resp, err := decodeBody(body) if err != nil { return nil, ErrRemoteTriggerResponse{ @@ -88,6 +92,7 @@ func (remote *Remote) Fetch(target string, from, until int64, allowRealTimeAlert Target: target, } } + fetchResult := convertResponse(resp, allowRealTimeAlerting) return &fetchResult, nil } diff --git a/metric_source/remote/remote_test.go b/metric_source/remote/remote_test.go index 31b8bae1a..a120f2112 100644 --- a/metric_source/remote/remote_test.go +++ b/metric_source/remote/remote_test.go @@ -40,7 +40,7 @@ func TestIsRemoteAvailable(t *testing.T) { }) Convey("Given server returns Remote Unavailable responses permanently", t, func() { - for _, statusCode := range remoteUnavailableStatusCodes { + for statusCode := range remoteUnavailableStatusCodes { server := createTestServer(TestResponse{body, statusCode}) Convey(fmt.Sprintf( @@ -65,7 +65,7 @@ func TestIsRemoteAvailable(t *testing.T) { }) Convey("Given server returns Remote Unavailable response temporary", t, func() { - for _, statusCode := range remoteUnavailableStatusCodes { + for statusCode := range remoteUnavailableStatusCodes { Convey(fmt.Sprintf( "the remote is available with retry after %d response", statusCode, ), func() { @@ -149,7 +149,7 @@ func TestFetch(t *testing.T) { }) Convey("Given server returns Remote Unavailable responses permanently", t, func() { - for _, statusCode := range remoteUnavailableStatusCodes { + for statusCode := range remoteUnavailableStatusCodes { server := createTestServer(TestResponse{validBody, statusCode}) Convey(fmt.Sprintf( @@ -176,7 +176,7 @@ func TestFetch(t *testing.T) { }) Convey("Given server returns Remote Unavailable response temporary", t, func() { - for _, statusCode := range remoteUnavailableStatusCodes { + for statusCode := range remoteUnavailableStatusCodes { Convey(fmt.Sprintf( "the remote is available with retry after %d response", statusCode, ), func() { diff --git a/metric_source/remote/request.go b/metric_source/remote/request.go index 30cce80e3..5167212d5 100644 --- a/metric_source/remote/request.go +++ b/metric_source/remote/request.go @@ -59,16 +59,16 @@ func (remote *Remote) makeRequest(req *http.Request) (body []byte, isRemoteAvail return body, true, nil } +var remoteUnavailableStatusCodes = map[int]struct{}{ + http.StatusUnauthorized: {}, + http.StatusBadGateway: {}, + http.StatusServiceUnavailable: {}, + http.StatusGatewayTimeout: {}, +} + func isRemoteUnavailableStatusCode(statusCode int) bool { - switch statusCode { - case http.StatusUnauthorized, - http.StatusBadGateway, - http.StatusServiceUnavailable, - http.StatusGatewayTimeout: - return true - default: - return false - } + _, isUnavailableCode := remoteUnavailableStatusCodes[statusCode] + return isUnavailableCode } func (remote *Remote) makeRequestWithRetries( diff --git a/metric_source/remote/request_test.go b/metric_source/remote/request_test.go index cd48a70ea..e6f1dbe05 100644 --- a/metric_source/remote/request_test.go +++ b/metric_source/remote/request_test.go @@ -87,7 +87,7 @@ func TestMakeRequest(t *testing.T) { }) Convey("Client returns status Remote Unavailable status codes", t, func() { - for _, statusCode := range remoteUnavailableStatusCodes { + for statusCode := range remoteUnavailableStatusCodes { server := createServer(body, statusCode) remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} request, _ := remote.prepareRequest(from, until, target) @@ -190,7 +190,7 @@ func TestMakeRequestWithRetries(t *testing.T) { }) Convey("Given server returns Remote Unavailable responses permanently", t, func() { - for _, statusCode := range remoteUnavailableStatusCodes { + for statusCode := range remoteUnavailableStatusCodes { server := createTestServer(TestResponse{body, statusCode}) Convey(fmt.Sprintf( @@ -221,7 +221,7 @@ func TestMakeRequestWithRetries(t *testing.T) { }) Convey("Given server returns Remote Unavailable response temporary", t, func() { - for _, statusCode := range remoteUnavailableStatusCodes { + for statusCode := range remoteUnavailableStatusCodes { Convey(fmt.Sprintf( "request is successful with retry after %d response and remote is available", statusCode, ), func() { @@ -291,10 +291,3 @@ func (responseWriter *TestResponseWriter) GetResponse() TestResponse { responseWriter.count++ return response } - -var remoteUnavailableStatusCodes = []int{ - http.StatusUnauthorized, - http.StatusBadGateway, - http.StatusServiceUnavailable, - http.StatusGatewayTimeout, -} diff --git a/metric_source/remote/response.go b/metric_source/remote/response.go index 0af2fbaa0..8cc296948 100644 --- a/metric_source/remote/response.go +++ b/metric_source/remote/response.go @@ -23,6 +23,7 @@ func convertResponse(metricsData []metricSource.MetricData, allowRealTimeAlertin metricData.Values = metricData.Values[:len(metricData.Values)-1] result = append(result, metricData) } + return FetchResult{MetricsData: result} } @@ -32,12 +33,14 @@ func decodeBody(body []byte) ([]metricSource.MetricData, error) { if err != nil { return nil, err } + res := make([]metricSource.MetricData, 0, len(tmp)) for _, m := range tmp { var stepTime int64 = 60 if len(m.DataPoints) > 1 { stepTime = int64(*m.DataPoints[1][1] - *m.DataPoints[0][1]) } + metricData := metricSource.MetricData{ Name: m.Target, StartTime: int64(*m.DataPoints[0][1]), @@ -45,6 +48,7 @@ func decodeBody(body []byte) ([]metricSource.MetricData, error) { StepTime: stepTime, Values: make([]float64, len(m.DataPoints)), } + for i, v := range m.DataPoints { if v[0] == nil { metricData.Values[i] = math.NaN() @@ -52,7 +56,9 @@ func decodeBody(body []byte) ([]metricSource.MetricData, error) { metricData.Values[i] = *v[0] } } + res = append(res, metricData) } + return res, nil } From 6842542d050fa6e01301963452ab71f8502ae7be Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Wed, 2 Oct 2024 15:53:45 +0700 Subject: [PATCH 09/28] refactor: notifier config --- local/notifier.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/local/notifier.yml b/local/notifier.yml index 6a5cf57a7..1188b4c47 100644 --- a/local/notifier.yml +++ b/local/notifier.yml @@ -17,8 +17,11 @@ graphite_remote: cluster_name: Graphite 1 url: "http://graphite:80/render" check_interval: 60s - timeout: 60s metrics_ttl: 168h + timeout: 60s + retry_seconds: 1 1 1 + health_check_timeout: 6s + health_check_retry_seconds: 1 1 1 prometheus_remote: - cluster_id: default cluster_name: Prometheus 1 From 497eb291800f0a5878b24f5f8f8fadce87d0494c Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Wed, 2 Oct 2024 16:01:30 +0700 Subject: [PATCH 10/28] refactor: remove empty line --- api/handler/triggers.go | 1 - 1 file changed, 1 deletion(-) diff --git a/api/handler/triggers.go b/api/handler/triggers.go index c1479020e..8ee2787a0 100644 --- a/api/handler/triggers.go +++ b/api/handler/triggers.go @@ -194,7 +194,6 @@ func getTriggerFromRequest(request *http.Request) (*dto.Trigger, *api.ErrorRespo return nil, api.ErrorInvalidRequest(err) case remote.ErrRemoteTriggerResponse: response := api.ErrorRemoteServerUnavailable(err) - middleware.GetLoggerEntry(request).Error(). String("status", response.StatusText). Error(err). From 2e54649dba1604a278e783501f6383e0df6420e2 Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Wed, 9 Oct 2024 10:28:54 +0700 Subject: [PATCH 11/28] feat: retries config in cmd --- cmd/config.go | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/cmd/config.go b/cmd/config.go index 680f5691f..6fa3b4c97 100644 --- a/cmd/config.go +++ b/cmd/config.go @@ -232,6 +232,28 @@ type remoteCommon interface { getRemoteCommon() *RemoteCommonConfig } +// RetriesConfig is a settings for retry policy when performing requests to remote sources. +// Stop retrying when ONE of the following conditions is satisfied: +// - Time passed since first try is greater than MaxElapsedTime; +// - Already MaxRetriesCount done. +type RetriesConfig struct { + // InitialInterval between requests. + InitialInterval string `yaml:"initial_interval"` + // RandomizationFactor is used in exponential backoff to add some randomization + // when calculating next interval between requests. + // It will be used in multiplication like: + // RandomizedInterval = RetryInterval * (random value in range [1 - RandomizationFactor, 1 + RandomizationFactor]) + RandomizationFactor float64 `yaml:"randomization_factor"` + // Each new RetryInterval will be multiplied on Multiplier. + Multiplier float64 `yaml:"multiplier"` + // MaxInterval is the cap for RetryInterval. Note that it doesn't cap the RandomizedInterval. + MaxInterval string `yaml:"max_interval"` + // MaxElapsedTime caps the time passed from first try. If time passed is greater than MaxElapsedTime than stop retrying. + MaxElapsedTime string `yaml:"max_elapsed_time"` + // MaxRetriesCount is the amount of allowed retries. So at most MaxRetriesCount will be performed. + MaxRetriesCount int `yaml:"max_retries_count"` +} + // GraphiteRemoteConfig is remote graphite settings structure. type GraphiteRemoteConfig struct { RemoteCommonConfig `yaml:",inline"` From 409b7349b1d6a560b1102d6ba8849c592ee9e7dd Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Wed, 9 Oct 2024 11:51:30 +0700 Subject: [PATCH 12/28] feat: retries logic --- cmd/config.go | 2 +- go.mod | 1 + go.sum | 2 ++ metric_source/retries/backoff_factory.go | 32 ++++++++++++++++++++ metric_source/retries/config.go | 21 +++++++++++++ metric_source/retries/retrier.go | 26 ++++++++++++++++ metric_source/retries/retryable_operation.go | 7 +++++ 7 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 metric_source/retries/backoff_factory.go create mode 100644 metric_source/retries/config.go create mode 100644 metric_source/retries/retrier.go create mode 100644 metric_source/retries/retryable_operation.go diff --git a/cmd/config.go b/cmd/config.go index 6fa3b4c97..2a145125f 100644 --- a/cmd/config.go +++ b/cmd/config.go @@ -251,7 +251,7 @@ type RetriesConfig struct { // MaxElapsedTime caps the time passed from first try. If time passed is greater than MaxElapsedTime than stop retrying. MaxElapsedTime string `yaml:"max_elapsed_time"` // MaxRetriesCount is the amount of allowed retries. So at most MaxRetriesCount will be performed. - MaxRetriesCount int `yaml:"max_retries_count"` + MaxRetriesCount uint64 `yaml:"max_retries_count"` } // GraphiteRemoteConfig is remote graphite settings structure. diff --git a/go.mod b/go.mod index a2d3a9c52..31f66497b 100644 --- a/go.mod +++ b/go.mod @@ -178,6 +178,7 @@ require ( github.com/Masterminds/goutils v1.1.1 // indirect github.com/Masterminds/semver/v3 v3.2.1 // indirect github.com/blang/semver/v4 v4.0.0 // indirect + github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/dyatlov/go-opengraph/opengraph v0.0.0-20220524092352-606d7b1e5f8a // indirect github.com/fatih/color v1.16.0 // indirect github.com/go-openapi/jsonpointer v0.20.0 // indirect diff --git a/go.sum b/go.sum index 0eacd324a..a2a205fca 100644 --- a/go.sum +++ b/go.sum @@ -509,6 +509,8 @@ github.com/bwmarrin/discordgo v0.25.0 h1:NXhdfHRNxtwso6FPdzW2i3uBvvU7UIQTghmV2T4 github.com/bwmarrin/discordgo v0.25.0/go.mod h1:NJZpH+1AfhIcyQsPeuBKsUtYrRnjkyu0kIVMCHkZtRY= github.com/carlosdp/twiliogo v0.0.0-20161027183705-b26045ebb9d1 h1:hXakhQtPnXH839q1pBl/GqfTSchqE+R5Fqn98Iu7UQM= github.com/carlosdp/twiliogo v0.0.0-20161027183705-b26045ebb9d1/go.mod h1:pAxCBpjl/0JxYZlWGP/Dyi8f/LQSCQD2WAsG/iNzqQ8= +github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= +github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/census-instrumentation/opencensus-proto v0.3.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw= diff --git a/metric_source/retries/backoff_factory.go b/metric_source/retries/backoff_factory.go new file mode 100644 index 000000000..06bf495ea --- /dev/null +++ b/metric_source/retries/backoff_factory.go @@ -0,0 +1,32 @@ +package retries + +import "github.com/cenkalti/backoff/v4" + +type BackoffFactory interface { + NewBackOff() backoff.BackOff +} + +type ExponentialBackoffFactory struct { + config Config +} + +func NewExponentialBackoffFactory(config Config) BackoffFactory { + return ExponentialBackoffFactory{ + config: config, + } +} + +func (factory ExponentialBackoffFactory) NewBackOff() backoff.BackOff { + backoffPolicy := backoff.NewExponentialBackOff( + backoff.WithInitialInterval(factory.config.InitialInterval), + backoff.WithRandomizationFactor(factory.config.RandomizationFactor), + backoff.WithMultiplier(factory.config.Multiplier), + backoff.WithMaxInterval(factory.config.MaxInterval), + backoff.WithMaxElapsedTime(factory.config.MaxElapsedTime)) + + if factory.config.MaxRetriesCount > 0 { + return backoff.WithMaxRetries(backoffPolicy, factory.config.MaxRetriesCount) + } + + return backoffPolicy +} diff --git a/metric_source/retries/config.go b/metric_source/retries/config.go new file mode 100644 index 000000000..5c0d3617a --- /dev/null +++ b/metric_source/retries/config.go @@ -0,0 +1,21 @@ +package retries + +import "time" + +type Config struct { + // InitialInterval between requests. + InitialInterval time.Duration + // RandomizationFactor is used in exponential backoff to add some randomization + // when calculating next interval between requests. + // It will be used in multiplication like: + // RandomizedInterval = RetryInterval * (random value in range [1 - RandomizationFactor, 1 + RandomizationFactor]) + RandomizationFactor float64 + // Each new RetryInterval will be multiplied on Multiplier. + Multiplier float64 + // MaxInterval is the cap for RetryInterval. Note that it doesn't cap the RandomizedInterval. + MaxInterval time.Duration + // MaxElapsedTime caps the time passed from first try. If time passed is greater than MaxElapsedTime than stop retrying. + MaxElapsedTime time.Duration + // MaxRetriesCount is the amount of allowed retries. So at most MaxRetriesCount will be performed. + MaxRetriesCount uint64 +} diff --git a/metric_source/retries/retrier.go b/metric_source/retries/retrier.go new file mode 100644 index 000000000..8a7826c5c --- /dev/null +++ b/metric_source/retries/retrier.go @@ -0,0 +1,26 @@ +package retries + +import ( + "github.com/cenkalti/backoff/v4" +) + +// Retrier retries the given operation. +type Retrier[T any] interface { + Retry(op RetryableOperation[T]) (T, error) +} + +type standardRetrier[T any] struct { + backoffFactory BackoffFactory +} + +// NewStandardRetrier returns standard retrier which will perform retries +// according to backoff policy provided by BackoffFactory. +func NewStandardRetrier[T any](backoffFactory BackoffFactory) Retrier[T] { + return standardRetrier[T]{ + backoffFactory: backoffFactory, + } +} + +func (r standardRetrier[T]) Retry(op RetryableOperation[T]) (T, error) { + return backoff.RetryWithData[T](op.DoRetryableOperation, r.backoffFactory.NewBackOff()) +} diff --git a/metric_source/retries/retryable_operation.go b/metric_source/retries/retryable_operation.go new file mode 100644 index 000000000..6e6797d58 --- /dev/null +++ b/metric_source/retries/retryable_operation.go @@ -0,0 +1,7 @@ +package retries + +// RetryableOperation is an action that can be retried after some time interval. +// If there is an error than should not be retried wrap it into backoff.PermanentError. +type RetryableOperation[T any] interface { + DoRetryableOperation() (T, error) +} From a98dd11baf961b36cb348296fc8c6c23a444b148 Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Wed, 9 Oct 2024 12:46:13 +0700 Subject: [PATCH 13/28] test: exponential backoff factory and add godocs --- metric_source/retries/backoff_factory.go | 5 ++ metric_source/retries/backoff_factory_test.go | 80 +++++++++++++++++++ metric_source/retries/config.go | 1 + metric_source/retries/retrier.go | 20 +++-- metric_source/retries/retryable_operation.go | 2 +- 5 files changed, 96 insertions(+), 12 deletions(-) create mode 100644 metric_source/retries/backoff_factory_test.go diff --git a/metric_source/retries/backoff_factory.go b/metric_source/retries/backoff_factory.go index 06bf495ea..718142161 100644 --- a/metric_source/retries/backoff_factory.go +++ b/metric_source/retries/backoff_factory.go @@ -2,20 +2,25 @@ package retries import "github.com/cenkalti/backoff/v4" +// BackoffFactory is used for creating backoff. It is expected that all backoffs created with one factory instance +// have the same behaviour. type BackoffFactory interface { NewBackOff() backoff.BackOff } +// ExponentialBackoffFactory is a factory that generates exponential backoffs based on given config. type ExponentialBackoffFactory struct { config Config } +// NewExponentialBackoffFactory creates new BackoffFactory which will generate exponential backoffs. func NewExponentialBackoffFactory(config Config) BackoffFactory { return ExponentialBackoffFactory{ config: config, } } +// NewBackOff creates new backoff. func (factory ExponentialBackoffFactory) NewBackOff() backoff.BackOff { backoffPolicy := backoff.NewExponentialBackOff( backoff.WithInitialInterval(factory.config.InitialInterval), diff --git a/metric_source/retries/backoff_factory_test.go b/metric_source/retries/backoff_factory_test.go new file mode 100644 index 000000000..ce78ffd2f --- /dev/null +++ b/metric_source/retries/backoff_factory_test.go @@ -0,0 +1,80 @@ +package retries + +import ( + "github.com/cenkalti/backoff/v4" + "testing" + "time" + + . "github.com/smartystreets/goconvey/convey" +) + +const ( + testInitialInterval = time.Millisecond * 5 + testRandomizationFactor = 0.0 + testMultiplier = 2.0 + testMaxInterval = time.Millisecond * 40 +) + +func TestExponentialBackoffFactory(t *testing.T) { + Convey("Test ExponentialBackoffFactory", t, func() { + conf := Config{ + InitialInterval: testInitialInterval, + RandomizationFactor: testRandomizationFactor, + Multiplier: testMultiplier, + MaxInterval: testMaxInterval, + } + + Convey("with maxRetriesCount != 0 and MaxElapsedTime = 0", func() { + Convey("with retry interval always lower then config.MaxInterval", func() { + conf.MaxRetriesCount = 3 + defer func() { + conf.MaxRetriesCount = 0 + }() + + expectedBackoffs := []time.Duration{ + testInitialInterval, + testInitialInterval * testMultiplier, + testInitialInterval * 4.0, + backoff.Stop, + backoff.Stop, + backoff.Stop, + } + + factory := NewExponentialBackoffFactory(conf) + + b := factory.NewBackOff() + + for i := range expectedBackoffs { + So(b.NextBackOff(), ShouldEqual, expectedBackoffs[i]) + } + }) + + Convey("with retry interval becomes config.MaxInterval", func() { + conf.MaxRetriesCount = 6 + defer func() { + conf.MaxRetriesCount = 0 + }() + + expectedBackoffs := []time.Duration{ + testInitialInterval, + testInitialInterval * testMultiplier, + testInitialInterval * 4.0, + testMaxInterval, + testMaxInterval, + testMaxInterval, + backoff.Stop, + backoff.Stop, + backoff.Stop, + } + + factory := NewExponentialBackoffFactory(conf) + + b := factory.NewBackOff() + + for i := range expectedBackoffs { + So(b.NextBackOff(), ShouldEqual, expectedBackoffs[i]) + } + }) + }) + }) +} diff --git a/metric_source/retries/config.go b/metric_source/retries/config.go index 5c0d3617a..9584a115c 100644 --- a/metric_source/retries/config.go +++ b/metric_source/retries/config.go @@ -2,6 +2,7 @@ package retries import "time" +// Config for exponential backoff retries. type Config struct { // InitialInterval between requests. InitialInterval time.Duration diff --git a/metric_source/retries/retrier.go b/metric_source/retries/retrier.go index 8a7826c5c..a7e4bd331 100644 --- a/metric_source/retries/retrier.go +++ b/metric_source/retries/retrier.go @@ -4,23 +4,21 @@ import ( "github.com/cenkalti/backoff/v4" ) -// Retrier retries the given operation. +// Retrier retries the given operation with given backoff. type Retrier[T any] interface { - Retry(op RetryableOperation[T]) (T, error) + // Retry the given operation until the op succeeds or op returns backoff.PermanentError or backoffPolicy returns backoff.Stop. + Retry(op RetryableOperation[T], backoffPolicy backoff.BackOff) (T, error) } -type standardRetrier[T any] struct { - backoffFactory BackoffFactory -} +type standardRetrier[T any] struct{} // NewStandardRetrier returns standard retrier which will perform retries // according to backoff policy provided by BackoffFactory. -func NewStandardRetrier[T any](backoffFactory BackoffFactory) Retrier[T] { - return standardRetrier[T]{ - backoffFactory: backoffFactory, - } +func NewStandardRetrier[T any]() Retrier[T] { + return standardRetrier[T]{} } -func (r standardRetrier[T]) Retry(op RetryableOperation[T]) (T, error) { - return backoff.RetryWithData[T](op.DoRetryableOperation, r.backoffFactory.NewBackOff()) +// Retry the given operation until the op succeeds or op returns backoff.PermanentError or backoffPolicy returns backoff.Stop. +func (r standardRetrier[T]) Retry(op RetryableOperation[T], backoffPolicy backoff.BackOff) (T, error) { + return backoff.RetryWithData[T](op.DoRetryableOperation, backoffPolicy) } diff --git a/metric_source/retries/retryable_operation.go b/metric_source/retries/retryable_operation.go index 6e6797d58..dfab3d17e 100644 --- a/metric_source/retries/retryable_operation.go +++ b/metric_source/retries/retryable_operation.go @@ -1,7 +1,7 @@ package retries // RetryableOperation is an action that can be retried after some time interval. -// If there is an error than should not be retried wrap it into backoff.PermanentError. +// If there is an error in DoRetryableOperation that should not be retried, wrap the error with backoff.PermanentError. type RetryableOperation[T any] interface { DoRetryableOperation() (T, error) } From 4e030757ebec8e3e90166d56fa4bd13815a7a3dd Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Wed, 9 Oct 2024 15:07:41 +0700 Subject: [PATCH 14/28] test: finish fot exponential backoff factory --- metric_source/retries/backoff_factory_test.go | 90 ++++++++++++++++++- 1 file changed, 89 insertions(+), 1 deletion(-) diff --git a/metric_source/retries/backoff_factory_test.go b/metric_source/retries/backoff_factory_test.go index ce78ffd2f..0799ea310 100644 --- a/metric_source/retries/backoff_factory_test.go +++ b/metric_source/retries/backoff_factory_test.go @@ -2,6 +2,7 @@ package retries import ( "github.com/cenkalti/backoff/v4" + "sync" "testing" "time" @@ -24,7 +25,7 @@ func TestExponentialBackoffFactory(t *testing.T) { MaxInterval: testMaxInterval, } - Convey("with maxRetriesCount != 0 and MaxElapsedTime = 0", func() { + Convey("with MaxRetriesCount != 0 and MaxElapsedTime = 0", func() { Convey("with retry interval always lower then config.MaxInterval", func() { conf.MaxRetriesCount = 3 defer func() { @@ -76,5 +77,92 @@ func TestExponentialBackoffFactory(t *testing.T) { } }) }) + + Convey("with MaxRetriesCount = 0 and MaxElapsedTime != 0", func() { + conf.MaxElapsedTime = time.Second + defer func() { + conf.MaxElapsedTime = 0 + }() + + once := sync.Once{} + + expectedBackoffs := []time.Duration{ + testInitialInterval, + backoff.Stop, + backoff.Stop, + backoff.Stop, + } + + factory := NewExponentialBackoffFactory(conf) + + b := factory.NewBackOff() + + for i := range expectedBackoffs { + So(b.NextBackOff(), ShouldEqual, expectedBackoffs[i]) + once.Do(func() { + time.Sleep(conf.MaxElapsedTime) + }) + } + }) + + Convey("with MaxRetriesCount != 0 and MaxElapsedTime != 0", func() { + Convey("MaxRetriesCount performed retries before MaxElapsedTime passed", func() { + conf.MaxElapsedTime = time.Second + conf.MaxRetriesCount = 6 + defer func() { + conf.MaxElapsedTime = 0 + conf.MaxRetriesCount = 0 + }() + + expectedBackoffs := []time.Duration{ + testInitialInterval, + testInitialInterval * testMultiplier, + testInitialInterval * 4.0, + testMaxInterval, + testMaxInterval, + testMaxInterval, + backoff.Stop, + backoff.Stop, + backoff.Stop, + } + + factory := NewExponentialBackoffFactory(conf) + + b := factory.NewBackOff() + + for i := range expectedBackoffs { + So(b.NextBackOff(), ShouldEqual, expectedBackoffs[i]) + } + }) + + Convey("MaxElapsedTime passed before MaxRetriesCount performed", func() { + conf.MaxElapsedTime = time.Second + conf.MaxRetriesCount = 6 + defer func() { + conf.MaxElapsedTime = 0 + conf.MaxRetriesCount = 0 + }() + + expectedBackoffs := []time.Duration{ + testInitialInterval, + backoff.Stop, + backoff.Stop, + backoff.Stop, + } + + once := sync.Once{} + + factory := NewExponentialBackoffFactory(conf) + + b := factory.NewBackOff() + + for i := range expectedBackoffs { + So(b.NextBackOff(), ShouldEqual, expectedBackoffs[i]) + once.Do(func() { + time.Sleep(conf.MaxElapsedTime) + }) + } + }) + }) }) } From cdace7db31e3ba671fd4060956ac8dc9394eb7b5 Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Wed, 9 Oct 2024 17:06:41 +0700 Subject: [PATCH 15/28] test: for retrier --- metric_source/retries/retrier_test.go | 168 ++++++++++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100644 metric_source/retries/retrier_test.go diff --git a/metric_source/retries/retrier_test.go b/metric_source/retries/retrier_test.go new file mode 100644 index 000000000..612fd69a4 --- /dev/null +++ b/metric_source/retries/retrier_test.go @@ -0,0 +1,168 @@ +package retries + +import ( + "errors" + "github.com/cenkalti/backoff/v4" + "testing" + + . "github.com/smartystreets/goconvey/convey" +) + +func TestStandardRetrier(t *testing.T) { + var ( + maxRetriesCount uint64 = 6 + testErr = errors.New("some test err") + errInsidePermanent = errors.New("test err inside permanent") + permanentTestErr = backoff.Permanent(errInsidePermanent) + ) + + conf := Config{ + InitialInterval: testInitialInterval, + RandomizationFactor: testRandomizationFactor, + Multiplier: testMultiplier, + MaxInterval: testMaxInterval, + MaxRetriesCount: maxRetriesCount, + } + + retrier := NewStandardRetrier[int]() + + Convey("Test retrier", t, func() { + Convey("with successful RetryableOperation", func() { + retPairs := []retPair[int]{ + { + returnValue: 25, + err: nil, + }, + { + returnValue: 26, + err: nil, + }, + } + expectedCalls := 1 + + stub := newStubRetryableOperation[int](retPairs) + + backoffPolicy := NewExponentialBackoffFactory(conf).NewBackOff() + + gotRes, gotErr := retrier.Retry(stub, backoffPolicy) + + So(gotRes, ShouldEqual, retPairs[0].returnValue) + So(gotErr, ShouldBeNil) + So(stub.calls, ShouldEqual, expectedCalls) + }) + + Convey("with successful RetryableOperation after some retries", func() { + retPairs := []retPair[int]{ + { + returnValue: 25, + err: testErr, + }, + { + returnValue: 10, + err: testErr, + }, + { + returnValue: 42, + err: nil, + }, + { + returnValue: 41, + err: nil, + }, + } + expectedCalls := 3 + + stub := newStubRetryableOperation[int](retPairs) + + backoffPolicy := NewExponentialBackoffFactory(conf).NewBackOff() + + gotRes, gotErr := retrier.Retry(stub, backoffPolicy) + + So(gotRes, ShouldEqual, retPairs[2].returnValue) + So(gotErr, ShouldBeNil) + So(stub.calls, ShouldEqual, expectedCalls) + }) + + Convey("with permanent error from RetryableOperation after some retries", func() { + retPairs := []retPair[int]{ + { + returnValue: 25, + err: testErr, + }, + { + returnValue: 10, + err: permanentTestErr, + }, + { + returnValue: 42, + err: nil, + }, + { + returnValue: 41, + err: nil, + }, + } + expectedCalls := 2 + + stub := newStubRetryableOperation[int](retPairs) + + backoffPolicy := NewExponentialBackoffFactory(conf).NewBackOff() + + gotRes, gotErr := retrier.Retry(stub, backoffPolicy) + + So(gotRes, ShouldEqual, retPairs[1].returnValue) + So(gotErr, ShouldResemble, errInsidePermanent) + So(stub.calls, ShouldEqual, expectedCalls) + }) + + Convey("with RetryableOperation failed on each retry", func() { + expectedCalls := conf.MaxRetriesCount + 1 + + stub := newStubRetryableOperation[int](nil) + + backoffPolicy := NewExponentialBackoffFactory(conf).NewBackOff() + + gotRes, gotErr := retrier.Retry(stub, backoffPolicy) + + So(gotRes, ShouldEqual, 0) + So(gotErr, ShouldResemble, errStubValuesEnded) + So(stub.calls, ShouldEqual, expectedCalls) + }) + }) +} + +type retPair[T any] struct { + returnValue T + err error +} + +type stubRetryableOperation[T any] struct { + retPairs []retPair[T] + idx int + calls int +} + +func newStubRetryableOperation[T any](pairs []retPair[T]) *stubRetryableOperation[T] { + return &stubRetryableOperation[T]{ + retPairs: pairs, + idx: 0, + calls: 0, + } +} + +var ( + errStubValuesEnded = errors.New("prepared return values and errors for stub ended") +) + +func (stub *stubRetryableOperation[T]) DoRetryableOperation() (T, error) { + stub.calls += 1 + + if stub.idx >= len(stub.retPairs) { + return *new(T), errStubValuesEnded + } + + res := stub.retPairs[stub.idx] + stub.idx += 1 + + return res.returnValue, res.err +} From f83caceb3fe99ef492035bfe40e8136520c2a0cd Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Wed, 9 Oct 2024 19:10:42 +0700 Subject: [PATCH 16/28] refactor: (work in progress) remote metric source to use retrier --- metric_source/remote/config.go | 7 +- metric_source/remote/remote.go | 51 +++++++----- metric_source/remote/request.go | 132 ++++++++++++++++++++----------- metric_source/retries/retrier.go | 3 +- 4 files changed, 125 insertions(+), 68 deletions(-) diff --git a/metric_source/remote/config.go b/metric_source/remote/config.go index 6f28bf2db..78f66f30e 100644 --- a/metric_source/remote/config.go +++ b/metric_source/remote/config.go @@ -1,6 +1,9 @@ package remote -import "time" +import ( + "github.com/moira-alert/moira/metric_source/retries" + "time" +) // Config represents config from remote storage. type Config struct { @@ -13,4 +16,6 @@ type Config struct { RetrySeconds []time.Duration HealthCheckTimeout time.Duration HealthCheckRetrySeconds []time.Duration + Retries retries.Config + HealthcheckRetries *retries.Config } diff --git a/metric_source/remote/remote.go b/metric_source/remote/remote.go index bc2130113..fc06e6a14 100644 --- a/metric_source/remote/remote.go +++ b/metric_source/remote/remote.go @@ -2,6 +2,7 @@ package remote import ( "fmt" + "github.com/moira-alert/moira/metric_source/retries" "net/http" "time" @@ -38,9 +39,12 @@ func (err ErrRemoteUnavailable) Error() string { // Remote is implementation of MetricSource interface, which implements fetch metrics method from remote graphite installation. type Remote struct { - config *Config - client *http.Client - clock moira.Clock + config *Config + client *http.Client + clock moira.Clock + retrier retries.Retrier[[]byte] + requestBackoffFactory retries.BackoffFactory + healthcheckBackoffFactory retries.BackoffFactory } // Create configures remote metric source. @@ -49,10 +53,25 @@ func Create(config *Config) (metricSource.MetricSource, error) { return nil, fmt.Errorf("remote graphite URL should not be empty") } + var ( + requestBackoffFactory retries.BackoffFactory + healthcheckBackoffFactory retries.BackoffFactory + ) + + requestBackoffFactory = retries.NewExponentialBackoffFactory(config.Retries) + if config.HealthcheckRetries != nil { + healthcheckBackoffFactory = retries.NewExponentialBackoffFactory(*config.HealthcheckRetries) + } else { + healthcheckBackoffFactory = requestBackoffFactory + } + return &Remote{ - config: config, - client: &http.Client{Timeout: config.Timeout}, - clock: clock.NewSystemClock(), + config: config, + client: &http.Client{Timeout: config.Timeout}, + clock: clock.NewSystemClock(), + retrier: retries.NewStandardRetrier[[]byte](), + requestBackoffFactory: requestBackoffFactory, + healthcheckBackoffFactory: healthcheckBackoffFactory, }, nil } @@ -70,19 +89,9 @@ func (remote *Remote) Fetch(target string, from, until int64, allowRealTimeAlert } } - body, isRemoteAvailable, err := remote.makeRequestWithRetries(req, remote.config.Timeout, remote.config.RetrySeconds) + body, err := remote.makeRequest(req) if err != nil { - if isRemoteAvailable { - return nil, ErrRemoteTriggerResponse{ - InternalError: err, - Target: target, - } - } - - return nil, ErrRemoteUnavailable{ - InternalError: err, - Target: target, - } + return nil, internalErrToPublicErr(err, target) } resp, err := decodeBody(body) @@ -112,8 +121,8 @@ func (remote *Remote) IsAvailable() (bool, error) { return false, err } - _, isRemoteAvailable, err := remote.makeRequestWithRetries( - req, remote.config.HealthCheckTimeout, remote.config.HealthCheckRetrySeconds) + _, err = remote.makeRequest(req) + err = internalErrToPublicErr(err, "") - return isRemoteAvailable, err + return !isRemoteUnavailable(err), err } diff --git a/metric_source/remote/request.go b/metric_source/remote/request.go index 5167212d5..443a71bcc 100644 --- a/metric_source/remote/request.go +++ b/metric_source/remote/request.go @@ -2,7 +2,9 @@ package remote import ( "context" + "errors" "fmt" + "github.com/cenkalti/backoff/v4" "io" "net/http" "strconv" @@ -29,76 +31,118 @@ func (remote *Remote) prepareRequest(from, until int64, target string) (*http.Re return req, nil } -func (remote *Remote) makeRequest(req *http.Request) (body []byte, isRemoteAvailable bool, err error) { - resp, err := remote.client.Do(req) +func (remote *Remote) makeRequest(req *http.Request) ([]byte, error) { + return remote.retrier.Retry( + requestToRemoteGraphite{ + client: remote.client, + request: req, + requestTimeout: remote.config.Timeout, + }, + remote.requestBackoffFactory.NewBackOff()) +} + +type requestToRemoteGraphite struct { + client *http.Client + request *http.Request + requestTimeout time.Duration +} + +func (r requestToRemoteGraphite) DoRetryableOperation() ([]byte, error) { + req := r.request + if r.requestTimeout > 0 { + ctx, cancel := context.WithTimeout(context.Background(), r.requestTimeout) + defer cancel() + req = r.request.WithContext(ctx) + } + + resp, err := r.client.Do(req) if resp != nil { defer resp.Body.Close() } if err != nil { - return body, false, fmt.Errorf( - "the remote server is not available or the response was reset by timeout. Url: %s, Error: %w ", - req.URL.String(), - err, - ) + return nil, errRemoteUnavailable{ + internalErr: fmt.Errorf( + "the remote server is not available or the response was reset by timeout. Url: %s, Error: %w ", + req.URL.String(), + err), + } } - body, err = io.ReadAll(resp.Body) + body, err := io.ReadAll(resp.Body) if err != nil { - return body, false, err + return body, errRemoteUnavailable{internalErr: err} } if isRemoteUnavailableStatusCode(resp.StatusCode) { - return body, false, fmt.Errorf( - "the remote server is not available. Response status %d: %s", - resp.StatusCode, string(body)) + return body, errRemoteUnavailable{ + internalErr: fmt.Errorf( + "the remote server is not available. Response status %d: %s", + resp.StatusCode, string(body)), + } } else if resp.StatusCode != http.StatusOK { - return body, true, fmt.Errorf("bad response status %d: %s", resp.StatusCode, string(body)) + return body, backoff.Permanent( + errInvalidRequest{ + internalErr: fmt.Errorf("bad response status %d: %s", resp.StatusCode, string(body)), + }) } - return body, true, nil + return body, nil } -var remoteUnavailableStatusCodes = map[int]struct{}{ - http.StatusUnauthorized: {}, - http.StatusBadGateway: {}, - http.StatusServiceUnavailable: {}, - http.StatusGatewayTimeout: {}, +type errInvalidRequest struct { + internalErr error } -func isRemoteUnavailableStatusCode(statusCode int) bool { - _, isUnavailableCode := remoteUnavailableStatusCodes[statusCode] - return isUnavailableCode +func (err errInvalidRequest) Error() string { + return err.internalErr.Error() } -func (remote *Remote) makeRequestWithRetries( - req *http.Request, - requestTimeout time.Duration, - retrySeconds []time.Duration, -) (body []byte, isRemoteAvailable bool, err error) { - for attemptIndex := 0; attemptIndex < len(retrySeconds)+1; attemptIndex++ { - body, isRemoteAvailable, err = remote.makeRequestWithTimeout(req, requestTimeout) +type errRemoteUnavailable struct { + internalErr error +} - if err == nil || isRemoteAvailable { - return body, true, err +func (err errRemoteUnavailable) Error() string { + return err.internalErr.Error() +} + +func isRemoteUnavailable(err error) bool { + var errUnavailable ErrRemoteUnavailable + return errors.As(err, &errUnavailable) +} + +func internalErrToPublicErr(err error, target string) error { + if err == nil { + return nil + } + + var invalidReqErr errInvalidRequest + if errors.As(err, &invalidReqErr) { + return ErrRemoteTriggerResponse{ + InternalError: invalidReqErr.internalErr, + Target: target, } + } - if attemptIndex < len(retrySeconds) { - remote.clock.Sleep(retrySeconds[attemptIndex]) + var errUnavailable errRemoteUnavailable + if errors.As(err, &errUnavailable) { + return ErrRemoteUnavailable{ + InternalError: errUnavailable.internalErr, + Target: target, } } - return nil, false, err + + return ErrRemoteTriggerResponse{} } -func (remote *Remote) makeRequestWithTimeout( - req *http.Request, - requestTimeout time.Duration, -) (body []byte, isRemoteAvailable bool, err error) { - if requestTimeout > 0 { - ctx, cancel := context.WithTimeout(context.Background(), requestTimeout) - defer cancel() - req = req.WithContext(ctx) - } +var remoteUnavailableStatusCodes = map[int]struct{}{ + http.StatusUnauthorized: {}, + http.StatusBadGateway: {}, + http.StatusServiceUnavailable: {}, + http.StatusGatewayTimeout: {}, +} - return remote.makeRequest(req) +func isRemoteUnavailableStatusCode(statusCode int) bool { + _, isUnavailableCode := remoteUnavailableStatusCodes[statusCode] + return isUnavailableCode } diff --git a/metric_source/retries/retrier.go b/metric_source/retries/retrier.go index a7e4bd331..24a8b38c7 100644 --- a/metric_source/retries/retrier.go +++ b/metric_source/retries/retrier.go @@ -12,8 +12,7 @@ type Retrier[T any] interface { type standardRetrier[T any] struct{} -// NewStandardRetrier returns standard retrier which will perform retries -// according to backoff policy provided by BackoffFactory. +// NewStandardRetrier returns standard retrier. func NewStandardRetrier[T any]() Retrier[T] { return standardRetrier[T]{} } From 7826e24dad51d426b4f8a7948b401084cb796765 Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Fri, 11 Oct 2024 17:17:00 +0700 Subject: [PATCH 17/28] refactor: configs add tests for configs --- cmd/config.go | 70 +++-- local/api.yml | 14 +- local/checker.yml | 14 +- local/notifier.yml | 14 +- metric_source/remote/config.go | 53 +++- metric_source/remote/config_test.go | 57 ++++ metric_source/remote/remote.go | 26 +- metric_source/remote/remote_test.go | 394 ++++++++++++------------- metric_source/remote/request.go | 6 +- metric_source/remote/request_test.go | 414 +++++++++++++-------------- metric_source/retries/config.go | 30 +- metric_source/retries/config_test.go | 78 +++++ 12 files changed, 678 insertions(+), 492 deletions(-) create mode 100644 metric_source/remote/config_test.go create mode 100644 metric_source/retries/config_test.go diff --git a/cmd/config.go b/cmd/config.go index 2a145125f..6b74b98bb 100644 --- a/cmd/config.go +++ b/cmd/config.go @@ -3,13 +3,11 @@ package cmd import ( "errors" "fmt" - "os" - "strconv" - "strings" - "time" - "github.com/moira-alert/moira" + "github.com/moira-alert/moira/metric_source/retries" "github.com/moira-alert/moira/metrics" + "os" + "strings" "github.com/moira-alert/moira/image_store/s3" prometheusRemoteSource "github.com/moira-alert/moira/metric_source/prometheus" @@ -254,54 +252,50 @@ type RetriesConfig struct { MaxRetriesCount uint64 `yaml:"max_retries_count"` } +func (config RetriesConfig) getRetriesSettings() retries.Config { + return retries.Config{ + InitialInterval: to.Duration(config.InitialInterval), + RandomizationFactor: config.RandomizationFactor, + Multiplier: config.Multiplier, + MaxInterval: to.Duration(config.MaxInterval), + MaxElapsedTime: to.Duration(config.MaxElapsedTime), + MaxRetriesCount: config.MaxRetriesCount, + } +} + // GraphiteRemoteConfig is remote graphite settings structure. type GraphiteRemoteConfig struct { RemoteCommonConfig `yaml:",inline"` - // Timeout for remote requests + // Timeout for remote requests. Timeout string `yaml:"timeout"` - // Username for basic auth + // Username for basic auth. User string `yaml:"user"` - // Password for basic auth + // Password for basic auth. Password string `yaml:"password"` - // Retry seconds for remote requests divided by spaces - RetrySeconds string `yaml:"retry_seconds"` - // HealthCheckTimeout is timeout for remote api health check requests - HealthCheckTimeout string `yaml:"health_check_timeout"` - // Retry seconds for remote api health check requests divided by spaces - HealthCheckRetrySeconds string `yaml:"health_check_retry_seconds"` + // Retries configuration for general requests to remote graphite. + Retries RetriesConfig `yaml:"retries"` + // HealthcheckTimeout is timeout for remote api health check requests. + HealthcheckTimeout string `yaml:"health_check_timeout"` + // HealthCheckRetries configuration for healthcheck requests to remote graphite. + HealthCheckRetries RetriesConfig `yaml:"health_check_retries"` } func (config GraphiteRemoteConfig) getRemoteCommon() *RemoteCommonConfig { return &config.RemoteCommonConfig } -// ParseRetrySeconds parses config value string into array of integers. -func ParseRetrySeconds(retrySecondsString string) []time.Duration { - secondsStringList := strings.Fields(retrySecondsString) - retrySecondsIntList := make([]time.Duration, len(secondsStringList)) - - for index, secondsString := range secondsStringList { - secondsInt, err := strconv.Atoi(secondsString) - if err != nil { - panic(err) - } - retrySecondsIntList[index] = time.Second * time.Duration(secondsInt) - } - return retrySecondsIntList -} - // GetRemoteSourceSettings returns remote config parsed from moira config files. func (config *GraphiteRemoteConfig) GetRemoteSourceSettings() *graphiteRemoteSource.Config { return &graphiteRemoteSource.Config{ - URL: config.URL, - CheckInterval: to.Duration(config.CheckInterval), - MetricsTTL: to.Duration(config.MetricsTTL), - Timeout: to.Duration(config.Timeout), - User: config.User, - Password: config.Password, - RetrySeconds: ParseRetrySeconds(config.RetrySeconds), - HealthCheckTimeout: to.Duration(config.HealthCheckTimeout), - HealthCheckRetrySeconds: ParseRetrySeconds(config.HealthCheckRetrySeconds), + URL: config.URL, + CheckInterval: to.Duration(config.CheckInterval), + MetricsTTL: to.Duration(config.MetricsTTL), + Timeout: to.Duration(config.Timeout), + User: config.User, + Password: config.Password, + Retries: config.Retries.getRetriesSettings(), + HealthcheckTimeout: to.Duration(config.HealthcheckTimeout), + HealthcheckRetries: config.HealthCheckRetries.getRetriesSettings(), } } diff --git a/local/api.yml b/local/api.yml index 1ab328198..4c057c974 100644 --- a/local/api.yml +++ b/local/api.yml @@ -19,9 +19,19 @@ graphite_remote: check_interval: 60s metrics_ttl: 168h timeout: 60s - retry_seconds: 1 1 1 + retries: + initial_interval: 60s + randomization_factor: 0.5 + multiplier: 1.5 + max_interval: 120s + max_retries_count: 3 health_check_timeout: 6s - health_check_retry_seconds: 1 1 1 + health_check_retries: + initial_interval: 20s + randomization_factor: 0.5 + multiplier: 1.5 + max_interval: 80s + max_retries_count: 3 prometheus_remote: - cluster_id: default cluster_name: Prometheus 1 diff --git a/local/checker.yml b/local/checker.yml index ab764d816..30f7e0bb9 100644 --- a/local/checker.yml +++ b/local/checker.yml @@ -21,9 +21,19 @@ graphite_remote: check_interval: 60s metrics_ttl: 168h timeout: 60s - retry_seconds: 1 1 1 + retries: + initial_interval: 60s + randomization_factor: 0.5 + multiplier: 1.5 + max_interval: 120s + max_retries_count: 3 health_check_timeout: 6s - health_check_retry_seconds: 1 1 1 + health_check_retries: + initial_interval: 20s + randomization_factor: 0.5 + multiplier: 1.5 + max_interval: 80s + max_retries_count: 3 prometheus_remote: - cluster_id: default cluster_name: Prometheus 1 diff --git a/local/notifier.yml b/local/notifier.yml index 1188b4c47..d23f24955 100644 --- a/local/notifier.yml +++ b/local/notifier.yml @@ -19,9 +19,19 @@ graphite_remote: check_interval: 60s metrics_ttl: 168h timeout: 60s - retry_seconds: 1 1 1 + retries: + initial_interval: 60s + randomization_factor: 0.5 + multiplier: 1.5 + max_interval: 120s + max_retries_count: 3 health_check_timeout: 6s - health_check_retry_seconds: 1 1 1 + health_check_retries: + initial_interval: 20s + randomization_factor: 0.5 + multiplier: 1.5 + max_interval: 80s + max_retries_count: 3 prometheus_remote: - cluster_id: default cluster_name: Prometheus 1 diff --git a/metric_source/remote/config.go b/metric_source/remote/config.go index 78f66f30e..2fbb0c29d 100644 --- a/metric_source/remote/config.go +++ b/metric_source/remote/config.go @@ -1,21 +1,52 @@ package remote import ( + "errors" "github.com/moira-alert/moira/metric_source/retries" "time" ) // Config represents config from remote storage. type Config struct { - URL string - CheckInterval time.Duration - MetricsTTL time.Duration - Timeout time.Duration - User string - Password string - RetrySeconds []time.Duration - HealthCheckTimeout time.Duration - HealthCheckRetrySeconds []time.Duration - Retries retries.Config - HealthcheckRetries *retries.Config + URL string + CheckInterval time.Duration + MetricsTTL time.Duration + Timeout time.Duration + User string + Password string + HealthcheckTimeout time.Duration + Retries retries.Config + HealthcheckRetries retries.Config +} + +var ( + errBadRemoteUrl = errors.New("remote graphite URL should not be empty") + errNoTimeout = errors.New("timeout must be specified and can't be 0") + errNoHealthcheckTimeout = errors.New("healthcheck_timeout must be specified and can't be 0") +) + +func (conf Config) validate() error { + resErrors := make([]error, 0) + + if conf.URL == "" { + resErrors = append(resErrors, errBadRemoteUrl) + } + + if conf.Timeout == 0 { + resErrors = append(resErrors, errNoTimeout) + } + + if conf.HealthcheckTimeout == 0 { + resErrors = append(resErrors, errNoHealthcheckTimeout) + } + + if errRetriesValidate := conf.Retries.Validate(); errRetriesValidate != nil { + resErrors = append(resErrors, errRetriesValidate) + } + + if errHealthcheckRetriesValidate := conf.HealthcheckRetries.Validate(); errHealthcheckRetriesValidate != nil { + resErrors = append(resErrors, errHealthcheckRetriesValidate) + } + + return errors.Join(resErrors...) } diff --git a/metric_source/remote/config_test.go b/metric_source/remote/config_test.go new file mode 100644 index 000000000..ddf5f41ee --- /dev/null +++ b/metric_source/remote/config_test.go @@ -0,0 +1,57 @@ +package remote + +import ( + "errors" + "fmt" + "github.com/moira-alert/moira/metric_source/retries" + "testing" + "time" + + . "github.com/smartystreets/goconvey/convey" +) + +func TestConfig_validate(t *testing.T) { + Convey("Test validating retries config", t, func() { + type testcase struct { + caseDesc string + conf Config + expectedErr error + } + + var ( + testInitialInterval = time.Second * 5 + testMaxInterval = time.Second * 10 + testRetriesCount uint64 = 10 + ) + + testRetriesConf := retries.Config{ + InitialInterval: testInitialInterval, + MaxInterval: testMaxInterval, + MaxRetriesCount: testRetriesCount, + } + + cases := []testcase{ + { + caseDesc: "with empty config", + conf: Config{}, + expectedErr: errors.Join(errBadRemoteUrl, errNoTimeout, errNoHealthcheckTimeout, retries.Config{}.Validate(), retries.Config{}.Validate()), + }, + { + caseDesc: "with retries config set", + conf: Config{ + Retries: testRetriesConf, + HealthcheckRetries: testRetriesConf, + }, + expectedErr: errors.Join(errBadRemoteUrl, errNoTimeout, errNoHealthcheckTimeout), + }, + } + + for i := range cases { + Convey(fmt.Sprintf("Case %d: %s", i+1, cases[i].caseDesc), func() { + err := cases[i].conf.validate() + + So(err, ShouldResemble, cases[i].expectedErr) + }) + } + }) +} diff --git a/metric_source/remote/remote.go b/metric_source/remote/remote.go index fc06e6a14..80c437383 100644 --- a/metric_source/remote/remote.go +++ b/metric_source/remote/remote.go @@ -49,29 +49,17 @@ type Remote struct { // Create configures remote metric source. func Create(config *Config) (metricSource.MetricSource, error) { - if config.URL == "" { - return nil, fmt.Errorf("remote graphite URL should not be empty") - } - - var ( - requestBackoffFactory retries.BackoffFactory - healthcheckBackoffFactory retries.BackoffFactory - ) - - requestBackoffFactory = retries.NewExponentialBackoffFactory(config.Retries) - if config.HealthcheckRetries != nil { - healthcheckBackoffFactory = retries.NewExponentialBackoffFactory(*config.HealthcheckRetries) - } else { - healthcheckBackoffFactory = requestBackoffFactory + if err := config.validate(); err != nil { + return nil, err } return &Remote{ config: config, - client: &http.Client{Timeout: config.Timeout}, + client: &http.Client{}, clock: clock.NewSystemClock(), retrier: retries.NewStandardRetrier[[]byte](), - requestBackoffFactory: requestBackoffFactory, - healthcheckBackoffFactory: healthcheckBackoffFactory, + requestBackoffFactory: retries.NewExponentialBackoffFactory(config.Retries), + healthcheckBackoffFactory: retries.NewExponentialBackoffFactory(config.HealthcheckRetries), }, nil } @@ -89,7 +77,7 @@ func (remote *Remote) Fetch(target string, from, until int64, allowRealTimeAlert } } - body, err := remote.makeRequest(req) + body, err := remote.makeRequest(req, remote.config.Timeout, remote.requestBackoffFactory.NewBackOff()) if err != nil { return nil, internalErrToPublicErr(err, target) } @@ -121,7 +109,7 @@ func (remote *Remote) IsAvailable() (bool, error) { return false, err } - _, err = remote.makeRequest(req) + _, err = remote.makeRequest(req, remote.config.HealthcheckTimeout, remote.healthcheckBackoffFactory.NewBackOff()) err = internalErrToPublicErr(err, "") return !isRemoteUnavailable(err), err diff --git a/metric_source/remote/remote_test.go b/metric_source/remote/remote_test.go index a120f2112..714431a10 100644 --- a/metric_source/remote/remote_test.go +++ b/metric_source/remote/remote_test.go @@ -1,206 +1,192 @@ package remote -import ( - "fmt" - "net/http" - "testing" - "time" - - "go.uber.org/mock/gomock" - - mock_clock "github.com/moira-alert/moira/mock/clock" - - metricSource "github.com/moira-alert/moira/metric_source" - . "github.com/smartystreets/goconvey/convey" -) - -func TestIsRemoteAvailable(t *testing.T) { - mockCtrl := gomock.NewController(t) - systemClock := mock_clock.NewMockClock(mockCtrl) - systemClock.EXPECT().Sleep(time.Second).Times(0) - testConfigs := []*Config{ - {}, - {HealthCheckRetrySeconds: []time.Duration{time.Second}}, - {HealthCheckRetrySeconds: []time.Duration{time.Second}}, - {HealthCheckRetrySeconds: []time.Duration{time.Second, time.Second}}, - {HealthCheckRetrySeconds: []time.Duration{time.Second, time.Second}}, - {HealthCheckRetrySeconds: []time.Duration{time.Second, time.Second, time.Second}}, - } - body := []byte("Some string") - - Convey("Given server returns OK response the remote is available", t, func() { - server := createServer(body, http.StatusOK) - for _, config := range testConfigs { - config.URL = server.URL - remote := Remote{client: server.Client(), config: config} - isAvailable, err := remote.IsAvailable() - So(isAvailable, ShouldBeTrue) - So(err, ShouldBeEmpty) - } - }) - - Convey("Given server returns Remote Unavailable responses permanently", t, func() { - for statusCode := range remoteUnavailableStatusCodes { - server := createTestServer(TestResponse{body, statusCode}) - - Convey(fmt.Sprintf( - "request failed with %d response status code and remote is unavailable", statusCode, - ), func() { - remote := Remote{client: server.Client()} - for _, config := range testConfigs { - config.URL = server.URL - remote.config = config - systemClock := mock_clock.NewMockClock(mockCtrl) - systemClock.EXPECT().Sleep(time.Second).Times(len(config.HealthCheckRetrySeconds)) - remote.clock = systemClock - - isAvailable, err := remote.IsAvailable() - So(err, ShouldResemble, fmt.Errorf( - "the remote server is not available. Response status %d: %s", statusCode, string(body), - )) - So(isAvailable, ShouldBeFalse) - } - }) - } - }) - - Convey("Given server returns Remote Unavailable response temporary", t, func() { - for statusCode := range remoteUnavailableStatusCodes { - Convey(fmt.Sprintf( - "the remote is available with retry after %d response", statusCode, - ), func() { - for _, config := range testConfigs { - if len(config.HealthCheckRetrySeconds) == 0 { - continue - } - server := createTestServer( - TestResponse{body, statusCode}, - TestResponse{body, http.StatusOK}, - ) - config.URL = server.URL - systemClock := mock_clock.NewMockClock(mockCtrl) - systemClock.EXPECT().Sleep(time.Second).Times(1) - remote := Remote{client: server.Client(), config: config, clock: systemClock} - - isAvailable, err := remote.IsAvailable() - So(err, ShouldBeNil) - So(isAvailable, ShouldBeTrue) - } - }) - } - }) -} - -func TestFetch(t *testing.T) { - var from int64 = 300 - var until int64 = 500 - target := "foo.bar" //nolint - testConfigs := []*Config{ - {}, - {RetrySeconds: []time.Duration{time.Second}}, - {RetrySeconds: []time.Duration{time.Second}}, - {RetrySeconds: []time.Duration{time.Second, time.Second}}, - {RetrySeconds: []time.Duration{time.Second, time.Second}}, - {RetrySeconds: []time.Duration{time.Second, time.Second, time.Second}}, - } - mockCtrl := gomock.NewController(t) - validBody := []byte("[{\"Target\": \"t1\",\"DataPoints\":[[1,2],[3,4]]}]") - - Convey("Request success but body is invalid", t, func() { - server := createServer([]byte("[]"), http.StatusOK) - remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} - result, err := remote.Fetch(target, from, until, false) - So(result, ShouldResemble, &FetchResult{MetricsData: []metricSource.MetricData{}}) - So(err, ShouldBeEmpty) - }) - - Convey("Request success but body is invalid", t, func() { - server := createServer([]byte("Some string"), http.StatusOK) - remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} - result, err := remote.Fetch(target, from, until, false) - So(result, ShouldBeEmpty) - So(err.Error(), ShouldResemble, "invalid character 'S' looking for beginning of value") - So(err, ShouldHaveSameTypeAs, ErrRemoteTriggerResponse{}) - }) - - Convey("Fail request with InternalServerError", t, func() { - server := createServer([]byte("Some string"), http.StatusInternalServerError) - remote := Remote{client: server.Client()} - for _, config := range testConfigs { - config.URL = server.URL - remote.config = config - result, err := remote.Fetch(target, from, until, false) - So(result, ShouldBeEmpty) - So(err.Error(), ShouldResemble, fmt.Sprintf("bad response status %d: %s", http.StatusInternalServerError, "Some string")) - So(err, ShouldHaveSameTypeAs, ErrRemoteTriggerResponse{}) - } - }) - - Convey("Client calls bad url", t, func() { - url := "💩%$&TR" - for _, config := range testConfigs { - config.URL = url - remote := Remote{config: config} - result, err := remote.Fetch(target, from, until, false) - So(result, ShouldBeEmpty) - So(err.Error(), ShouldResemble, "parse \"💩%$&TR\": invalid URL escape \"%$&\"") - So(err, ShouldHaveSameTypeAs, ErrRemoteTriggerResponse{}) - } - }) - - Convey("Given server returns Remote Unavailable responses permanently", t, func() { - for statusCode := range remoteUnavailableStatusCodes { - server := createTestServer(TestResponse{validBody, statusCode}) - - Convey(fmt.Sprintf( - "request failed with %d response status code and remote is unavailable", statusCode, - ), func() { - remote := Remote{client: server.Client()} - for _, config := range testConfigs { - config.URL = server.URL - remote.config = config - systemClock := mock_clock.NewMockClock(mockCtrl) - systemClock.EXPECT().Sleep(time.Second).Times(len(config.RetrySeconds)) - remote.clock = systemClock - - result, err := remote.Fetch(target, from, until, false) - So(err, ShouldResemble, ErrRemoteUnavailable{ - InternalError: fmt.Errorf( - "the remote server is not available. Response status %d: %s", statusCode, string(validBody), - ), Target: target, - }) - So(result, ShouldBeNil) - } - }) - } - }) - - Convey("Given server returns Remote Unavailable response temporary", t, func() { - for statusCode := range remoteUnavailableStatusCodes { - Convey(fmt.Sprintf( - "the remote is available with retry after %d response", statusCode, - ), func() { - for _, config := range testConfigs { - if len(config.RetrySeconds) == 0 { - continue - } - server := createTestServer( - TestResponse{validBody, statusCode}, - TestResponse{validBody, http.StatusOK}, - ) - config.URL = server.URL - systemClock := mock_clock.NewMockClock(mockCtrl) - systemClock.EXPECT().Sleep(time.Second).Times(1) - remote := Remote{client: server.Client(), config: config, clock: systemClock} - - result, err := remote.Fetch(target, from, until, false) - So(err, ShouldBeNil) - So(result, ShouldNotBeNil) - metricsData := result.GetMetricsData() - So(len(metricsData), ShouldEqual, 1) - So(metricsData[0].Name, ShouldEqual, "t1") - } - }) - } - }) -} +//func TestIsRemoteAvailable(t *testing.T) { +// mockCtrl := gomock.NewController(t) +// systemClock := mock_clock.NewMockClock(mockCtrl) +// systemClock.EXPECT().Sleep(time.Second).Times(0) +// testConfigs := []*Config{ +// {}, +// {HealthCheckRetrySeconds: []time.Duration{time.Second}}, +// {HealthCheckRetrySeconds: []time.Duration{time.Second}}, +// {HealthCheckRetrySeconds: []time.Duration{time.Second, time.Second}}, +// {HealthCheckRetrySeconds: []time.Duration{time.Second, time.Second}}, +// {HealthCheckRetrySeconds: []time.Duration{time.Second, time.Second, time.Second}}, +// } +// body := []byte("Some string") +// +// Convey("Given server returns OK response the remote is available", t, func() { +// server := createServer(body, http.StatusOK) +// for _, config := range testConfigs { +// config.URL = server.URL +// remote := Remote{client: server.Client(), config: config} +// isAvailable, err := remote.IsAvailable() +// So(isAvailable, ShouldBeTrue) +// So(err, ShouldBeEmpty) +// } +// }) +// +// Convey("Given server returns Remote Unavailable responses permanently", t, func() { +// for statusCode := range remoteUnavailableStatusCodes { +// server := createTestServer(TestResponse{body, statusCode}) +// +// Convey(fmt.Sprintf( +// "request failed with %d response status code and remote is unavailable", statusCode, +// ), func() { +// remote := Remote{client: server.Client()} +// for _, config := range testConfigs { +// config.URL = server.URL +// remote.config = config +// systemClock := mock_clock.NewMockClock(mockCtrl) +// systemClock.EXPECT().Sleep(time.Second).Times(len(config.HealthCheckRetrySeconds)) +// remote.clock = systemClock +// +// isAvailable, err := remote.IsAvailable() +// So(err, ShouldResemble, fmt.Errorf( +// "the remote server is not available. Response status %d: %s", statusCode, string(body), +// )) +// So(isAvailable, ShouldBeFalse) +// } +// }) +// } +// }) +// +// Convey("Given server returns Remote Unavailable response temporary", t, func() { +// for statusCode := range remoteUnavailableStatusCodes { +// Convey(fmt.Sprintf( +// "the remote is available with retry after %d response", statusCode, +// ), func() { +// for _, config := range testConfigs { +// if len(config.HealthCheckRetrySeconds) == 0 { +// continue +// } +// server := createTestServer( +// TestResponse{body, statusCode}, +// TestResponse{body, http.StatusOK}, +// ) +// config.URL = server.URL +// systemClock := mock_clock.NewMockClock(mockCtrl) +// systemClock.EXPECT().Sleep(time.Second).Times(1) +// remote := Remote{client: server.Client(), config: config, clock: systemClock} +// +// isAvailable, err := remote.IsAvailable() +// So(err, ShouldBeNil) +// So(isAvailable, ShouldBeTrue) +// } +// }) +// } +// }) +//} +// +//func TestFetch(t *testing.T) { +// var from int64 = 300 +// var until int64 = 500 +// target := "foo.bar" //nolint +// testConfigs := []*Config{ +// {}, +// {RetrySeconds: []time.Duration{time.Second}}, +// {RetrySeconds: []time.Duration{time.Second}}, +// {RetrySeconds: []time.Duration{time.Second, time.Second}}, +// {RetrySeconds: []time.Duration{time.Second, time.Second}}, +// {RetrySeconds: []time.Duration{time.Second, time.Second, time.Second}}, +// } +// mockCtrl := gomock.NewController(t) +// validBody := []byte("[{\"Target\": \"t1\",\"DataPoints\":[[1,2],[3,4]]}]") +// +// Convey("Request success but body is invalid", t, func() { +// server := createServer([]byte("[]"), http.StatusOK) +// remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} +// result, err := remote.Fetch(target, from, until, false) +// So(result, ShouldResemble, &FetchResult{MetricsData: []metricSource.MetricData{}}) +// So(err, ShouldBeEmpty) +// }) +// +// Convey("Request success but body is invalid", t, func() { +// server := createServer([]byte("Some string"), http.StatusOK) +// remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} +// result, err := remote.Fetch(target, from, until, false) +// So(result, ShouldBeEmpty) +// So(err.Error(), ShouldResemble, "invalid character 'S' looking for beginning of value") +// So(err, ShouldHaveSameTypeAs, ErrRemoteTriggerResponse{}) +// }) +// +// Convey("Fail request with InternalServerError", t, func() { +// server := createServer([]byte("Some string"), http.StatusInternalServerError) +// remote := Remote{client: server.Client()} +// for _, config := range testConfigs { +// config.URL = server.URL +// remote.config = config +// result, err := remote.Fetch(target, from, until, false) +// So(result, ShouldBeEmpty) +// So(err.Error(), ShouldResemble, fmt.Sprintf("bad response status %d: %s", http.StatusInternalServerError, "Some string")) +// So(err, ShouldHaveSameTypeAs, ErrRemoteTriggerResponse{}) +// } +// }) +// +// Convey("Client calls bad url", t, func() { +// url := "💩%$&TR" +// for _, config := range testConfigs { +// config.URL = url +// remote := Remote{config: config} +// result, err := remote.Fetch(target, from, until, false) +// So(result, ShouldBeEmpty) +// So(err.Error(), ShouldResemble, "parse \"💩%$&TR\": invalid URL escape \"%$&\"") +// So(err, ShouldHaveSameTypeAs, ErrRemoteTriggerResponse{}) +// } +// }) +// +// Convey("Given server returns Remote Unavailable responses permanently", t, func() { +// for statusCode := range remoteUnavailableStatusCodes { +// server := createTestServer(TestResponse{validBody, statusCode}) +// +// Convey(fmt.Sprintf( +// "request failed with %d response status code and remote is unavailable", statusCode, +// ), func() { +// remote := Remote{client: server.Client()} +// for _, config := range testConfigs { +// config.URL = server.URL +// remote.config = config +// systemClock := mock_clock.NewMockClock(mockCtrl) +// systemClock.EXPECT().Sleep(time.Second).Times(len(config.RetrySeconds)) +// remote.clock = systemClock +// +// result, err := remote.Fetch(target, from, until, false) +// So(err, ShouldResemble, ErrRemoteUnavailable{ +// InternalError: fmt.Errorf( +// "the remote server is not available. Response status %d: %s", statusCode, string(validBody), +// ), Target: target, +// }) +// So(result, ShouldBeNil) +// } +// }) +// } +// }) +// +// Convey("Given server returns Remote Unavailable response temporary", t, func() { +// for statusCode := range remoteUnavailableStatusCodes { +// Convey(fmt.Sprintf( +// "the remote is available with retry after %d response", statusCode, +// ), func() { +// for _, config := range testConfigs { +// if len(config.RetrySeconds) == 0 { +// continue +// } +// server := createTestServer( +// TestResponse{validBody, statusCode}, +// TestResponse{validBody, http.StatusOK}, +// ) +// config.URL = server.URL +// systemClock := mock_clock.NewMockClock(mockCtrl) +// systemClock.EXPECT().Sleep(time.Second).Times(1) +// remote := Remote{client: server.Client(), config: config, clock: systemClock} +// +// result, err := remote.Fetch(target, from, until, false) +// So(err, ShouldBeNil) +// So(result, ShouldNotBeNil) +// metricsData := result.GetMetricsData() +// So(len(metricsData), ShouldEqual, 1) +// So(metricsData[0].Name, ShouldEqual, "t1") +// } +// }) +// } +// }) +//} diff --git a/metric_source/remote/request.go b/metric_source/remote/request.go index 443a71bcc..42b1ee7e8 100644 --- a/metric_source/remote/request.go +++ b/metric_source/remote/request.go @@ -31,14 +31,14 @@ func (remote *Remote) prepareRequest(from, until int64, target string) (*http.Re return req, nil } -func (remote *Remote) makeRequest(req *http.Request) ([]byte, error) { +func (remote *Remote) makeRequest(req *http.Request, timeout time.Duration, backoffPolicy backoff.BackOff) ([]byte, error) { return remote.retrier.Retry( requestToRemoteGraphite{ client: remote.client, request: req, - requestTimeout: remote.config.Timeout, + requestTimeout: timeout, }, - remote.requestBackoffFactory.NewBackOff()) + backoffPolicy) } type requestToRemoteGraphite struct { diff --git a/metric_source/remote/request_test.go b/metric_source/remote/request_test.go index e6f1dbe05..42c155ae9 100644 --- a/metric_source/remote/request_test.go +++ b/metric_source/remote/request_test.go @@ -1,16 +1,10 @@ package remote import ( - "fmt" + . "github.com/smartystreets/goconvey/convey" "net/http" "net/http/httptest" "testing" - "time" - - mock_clock "github.com/moira-alert/moira/mock/clock" - "go.uber.org/mock/gomock" - - . "github.com/smartystreets/goconvey/convey" ) func TestPrepareRequest(t *testing.T) { @@ -48,210 +42,210 @@ func TestPrepareRequest(t *testing.T) { }) } -func TestMakeRequest(t *testing.T) { - var from int64 = 300 - var until int64 = 500 - target := "foo.bar" - body := []byte("Some string") - - Convey("Client returns status OK", t, func() { - server := createServer(body, http.StatusOK) - remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} - request, _ := remote.prepareRequest(from, until, target) - actual, isRemoteAvailable, err := remote.makeRequest(request) - So(err, ShouldBeNil) - So(isRemoteAvailable, ShouldBeTrue) - So(actual, ShouldResemble, body) - }) - - Convey("Client returns status InternalServerError", t, func() { - server := createServer(body, http.StatusInternalServerError) - remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} - request, _ := remote.prepareRequest(from, until, target) - actual, isRemoteAvailable, err := remote.makeRequest(request) - So(err, ShouldResemble, fmt.Errorf("bad response status %d: %s", http.StatusInternalServerError, string(body))) - So(isRemoteAvailable, ShouldBeTrue) - So(actual, ShouldResemble, body) - }) - - Convey("Client calls bad url", t, func() { - server := createServer(body, http.StatusOK) - client := server.Client() - client.Timeout = time.Millisecond - remote := Remote{client: client, config: &Config{URL: "http://bad/"}} - request, _ := remote.prepareRequest(from, until, target) - actual, isRemoteAvailable, err := remote.makeRequest(request) - So(err, ShouldNotBeEmpty) - So(isRemoteAvailable, ShouldBeFalse) - So(actual, ShouldBeEmpty) - }) - - Convey("Client returns status Remote Unavailable status codes", t, func() { - for statusCode := range remoteUnavailableStatusCodes { - server := createServer(body, statusCode) - remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} - request, _ := remote.prepareRequest(from, until, target) - actual, isRemoteAvailable, err := remote.makeRequest(request) - So(err, ShouldResemble, fmt.Errorf( - "the remote server is not available. Response status %d: %s", statusCode, string(body), - )) - So(isRemoteAvailable, ShouldBeFalse) - So(actual, ShouldResemble, body) - } - }) -} - -func TestMakeRequestWithRetries(t *testing.T) { - var from int64 = 300 - var until int64 = 500 - target := "foo.bar" - body := []byte("Some string") - testConfigs := []*Config{ - {}, - {RetrySeconds: []time.Duration{time.Second}}, - {RetrySeconds: []time.Duration{time.Second}}, - {RetrySeconds: []time.Duration{time.Second, time.Second}}, - {RetrySeconds: []time.Duration{time.Second, time.Second}}, - {RetrySeconds: []time.Duration{time.Second, time.Second, time.Second}}, - } - mockCtrl := gomock.NewController(t) - - Convey("Given server returns OK response", t, func() { - server := createTestServer(TestResponse{body, http.StatusOK}) - systemClock := mock_clock.NewMockClock(mockCtrl) - systemClock.EXPECT().Sleep(time.Second).Times(0) +//func TestMakeRequest(t *testing.T) { +// var from int64 = 300 +// var until int64 = 500 +// target := "foo.bar" +// body := []byte("Some string") +// +// Convey("Client returns status OK", t, func() { +// server := createServer(body, http.StatusOK) +// remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} +// request, _ := remote.prepareRequest(from, until, target) +// actual, isRemoteAvailable, err := remote.makeRequest(request) +// So(err, ShouldBeNil) +// So(isRemoteAvailable, ShouldBeTrue) +// So(actual, ShouldResemble, body) +// }) +// +// Convey("Client returns status InternalServerError", t, func() { +// server := createServer(body, http.StatusInternalServerError) +// remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} +// request, _ := remote.prepareRequest(from, until, target) +// actual, isRemoteAvailable, err := remote.makeRequest(request) +// So(err, ShouldResemble, fmt.Errorf("bad response status %d: %s", http.StatusInternalServerError, string(body))) +// So(isRemoteAvailable, ShouldBeTrue) +// So(actual, ShouldResemble, body) +// }) +// +// Convey("Client calls bad url", t, func() { +// server := createServer(body, http.StatusOK) +// client := server.Client() +// client.Timeout = time.Millisecond +// remote := Remote{client: client, config: &Config{URL: "http://bad/"}} +// request, _ := remote.prepareRequest(from, until, target) +// actual, isRemoteAvailable, err := remote.makeRequest(request) +// So(err, ShouldNotBeEmpty) +// So(isRemoteAvailable, ShouldBeFalse) +// So(actual, ShouldBeEmpty) +// }) +// +// Convey("Client returns status Remote Unavailable status codes", t, func() { +// for statusCode := range remoteUnavailableStatusCodes { +// server := createServer(body, statusCode) +// remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} +// request, _ := remote.prepareRequest(from, until, target) +// actual, isRemoteAvailable, err := remote.makeRequest(request) +// So(err, ShouldResemble, fmt.Errorf( +// "the remote server is not available. Response status %d: %s", statusCode, string(body), +// )) +// So(isRemoteAvailable, ShouldBeFalse) +// So(actual, ShouldResemble, body) +// } +// }) +//} - Convey("request is successful", func() { - remote := Remote{client: server.Client(), clock: systemClock} - - for _, config := range testConfigs { - config.URL = server.URL - remote.config = config - request, _ := remote.prepareRequest(from, until, target) - actual, isRemoteAvailable, err := remote.makeRequestWithRetries( - request, - remote.config.Timeout, - remote.config.RetrySeconds, - ) - So(err, ShouldBeNil) - So(isRemoteAvailable, ShouldBeTrue) - So(actual, ShouldResemble, body) - } - }) - }) - - Convey("Given server returns 500 response", t, func() { - server := createTestServer(TestResponse{body, http.StatusInternalServerError}) - systemClock := mock_clock.NewMockClock(mockCtrl) - systemClock.EXPECT().Sleep(time.Second).Times(0) - - Convey("request failed with 500 response and remote is available", func() { - remote := Remote{client: server.Client(), clock: systemClock} - - for _, config := range testConfigs { - config.URL = server.URL - remote.config = config - request, _ := remote.prepareRequest(from, until, target) - actual, isRemoteAvailable, err := remote.makeRequestWithRetries( - request, - remote.config.Timeout, - remote.config.RetrySeconds, - ) - So(err, ShouldResemble, fmt.Errorf("bad response status %d: %s", http.StatusInternalServerError, string(body))) - So(isRemoteAvailable, ShouldBeTrue) - So(actual, ShouldResemble, body) - } - }) - }) - - Convey("Given client calls bad url", t, func() { - server := createTestServer(TestResponse{body, http.StatusOK}) - - Convey("request failed and remote is unavailable", func() { - remote := Remote{client: server.Client()} - for _, config := range testConfigs { - config.URL = "http://bad/" - remote.config = config - systemClock := mock_clock.NewMockClock(mockCtrl) - systemClock.EXPECT().Sleep(time.Second).Times(len(config.RetrySeconds)) - remote.clock = systemClock - - request, _ := remote.prepareRequest(from, until, target) - actual, isRemoteAvailable, err := remote.makeRequestWithRetries( - request, - time.Millisecond, - remote.config.RetrySeconds, - ) - So(err, ShouldNotBeEmpty) - So(isRemoteAvailable, ShouldBeFalse) - So(actual, ShouldBeEmpty) - } - }) - }) - - Convey("Given server returns Remote Unavailable responses permanently", t, func() { - for statusCode := range remoteUnavailableStatusCodes { - server := createTestServer(TestResponse{body, statusCode}) - - Convey(fmt.Sprintf( - "request failed with %d response status code and remote is unavailable", statusCode, - ), func() { - remote := Remote{client: server.Client()} - for _, config := range testConfigs { - config.URL = server.URL - remote.config = config - systemClock := mock_clock.NewMockClock(mockCtrl) - systemClock.EXPECT().Sleep(time.Second).Times(len(config.RetrySeconds)) - remote.clock = systemClock - - request, _ := remote.prepareRequest(from, until, target) - actual, isRemoteAvailable, err := remote.makeRequestWithRetries( - request, - remote.config.Timeout, - remote.config.RetrySeconds, - ) - So(err, ShouldResemble, fmt.Errorf( - "the remote server is not available. Response status %d: %s", statusCode, string(body), - )) - So(isRemoteAvailable, ShouldBeFalse) - So(actual, ShouldBeNil) - } - }) - } - }) - - Convey("Given server returns Remote Unavailable response temporary", t, func() { - for statusCode := range remoteUnavailableStatusCodes { - Convey(fmt.Sprintf( - "request is successful with retry after %d response and remote is available", statusCode, - ), func() { - for _, config := range testConfigs { - if len(config.RetrySeconds) == 0 { - continue - } - server := createTestServer( - TestResponse{body, statusCode}, - TestResponse{body, http.StatusOK}, - ) - config.URL = server.URL - systemClock := mock_clock.NewMockClock(mockCtrl) - systemClock.EXPECT().Sleep(time.Second).Times(1) - remote := Remote{client: server.Client(), config: config, clock: systemClock} - - request, _ := remote.prepareRequest(from, until, target) - actual, isRemoteAvailable, err := remote.makeRequestWithRetries( - request, - remote.config.Timeout, - remote.config.RetrySeconds, - ) - So(err, ShouldBeNil) - So(isRemoteAvailable, ShouldBeTrue) - So(actual, ShouldResemble, body) - } - }) - } - }) -} +//func TestMakeRequestWithRetries(t *testing.T) { +// var from int64 = 300 +// var until int64 = 500 +// target := "foo.bar" +// body := []byte("Some string") +// testConfigs := []*Config{ +// {}, +// {RetrySeconds: []time.Duration{time.Second}}, +// {RetrySeconds: []time.Duration{time.Second}}, +// {RetrySeconds: []time.Duration{time.Second, time.Second}}, +// {RetrySeconds: []time.Duration{time.Second, time.Second}}, +// {RetrySeconds: []time.Duration{time.Second, time.Second, time.Second}}, +// } +// mockCtrl := gomock.NewController(t) +// +// Convey("Given server returns OK response", t, func() { +// server := createTestServer(TestResponse{body, http.StatusOK}) +// systemClock := mock_clock.NewMockClock(mockCtrl) +// systemClock.EXPECT().Sleep(time.Second).Times(0) +// +// Convey("request is successful", func() { +// remote := Remote{client: server.Client(), clock: systemClock} +// +// for _, config := range testConfigs { +// config.URL = server.URL +// remote.config = config +// request, _ := remote.prepareRequest(from, until, target) +// actual, isRemoteAvailable, err := remote.makeRequestWithRetries( +// request, +// remote.config.Timeout, +// remote.config.RetrySeconds, +// ) +// So(err, ShouldBeNil) +// So(isRemoteAvailable, ShouldBeTrue) +// So(actual, ShouldResemble, body) +// } +// }) +// }) +// +// Convey("Given server returns 500 response", t, func() { +// server := createTestServer(TestResponse{body, http.StatusInternalServerError}) +// systemClock := mock_clock.NewMockClock(mockCtrl) +// systemClock.EXPECT().Sleep(time.Second).Times(0) +// +// Convey("request failed with 500 response and remote is available", func() { +// remote := Remote{client: server.Client(), clock: systemClock} +// +// for _, config := range testConfigs { +// config.URL = server.URL +// remote.config = config +// request, _ := remote.prepareRequest(from, until, target) +// actual, isRemoteAvailable, err := remote.makeRequestWithRetries( +// request, +// remote.config.Timeout, +// remote.config.RetrySeconds, +// ) +// So(err, ShouldResemble, fmt.Errorf("bad response status %d: %s", http.StatusInternalServerError, string(body))) +// So(isRemoteAvailable, ShouldBeTrue) +// So(actual, ShouldResemble, body) +// } +// }) +// }) +// +// Convey("Given client calls bad url", t, func() { +// server := createTestServer(TestResponse{body, http.StatusOK}) +// +// Convey("request failed and remote is unavailable", func() { +// remote := Remote{client: server.Client()} +// for _, config := range testConfigs { +// config.URL = "http://bad/" +// remote.config = config +// systemClock := mock_clock.NewMockClock(mockCtrl) +// systemClock.EXPECT().Sleep(time.Second).Times(len(config.RetrySeconds)) +// remote.clock = systemClock +// +// request, _ := remote.prepareRequest(from, until, target) +// actual, isRemoteAvailable, err := remote.makeRequestWithRetries( +// request, +// time.Millisecond, +// remote.config.RetrySeconds, +// ) +// So(err, ShouldNotBeEmpty) +// So(isRemoteAvailable, ShouldBeFalse) +// So(actual, ShouldBeEmpty) +// } +// }) +// }) +// +// Convey("Given server returns Remote Unavailable responses permanently", t, func() { +// for statusCode := range remoteUnavailableStatusCodes { +// server := createTestServer(TestResponse{body, statusCode}) +// +// Convey(fmt.Sprintf( +// "request failed with %d response status code and remote is unavailable", statusCode, +// ), func() { +// remote := Remote{client: server.Client()} +// for _, config := range testConfigs { +// config.URL = server.URL +// remote.config = config +// systemClock := mock_clock.NewMockClock(mockCtrl) +// systemClock.EXPECT().Sleep(time.Second).Times(len(config.RetrySeconds)) +// remote.clock = systemClock +// +// request, _ := remote.prepareRequest(from, until, target) +// actual, isRemoteAvailable, err := remote.makeRequestWithRetries( +// request, +// remote.config.Timeout, +// remote.config.RetrySeconds, +// ) +// So(err, ShouldResemble, fmt.Errorf( +// "the remote server is not available. Response status %d: %s", statusCode, string(body), +// )) +// So(isRemoteAvailable, ShouldBeFalse) +// So(actual, ShouldBeNil) +// } +// }) +// } +// }) +// +// Convey("Given server returns Remote Unavailable response temporary", t, func() { +// for statusCode := range remoteUnavailableStatusCodes { +// Convey(fmt.Sprintf( +// "request is successful with retry after %d response and remote is available", statusCode, +// ), func() { +// for _, config := range testConfigs { +// if len(config.RetrySeconds) == 0 { +// continue +// } +// server := createTestServer( +// TestResponse{body, statusCode}, +// TestResponse{body, http.StatusOK}, +// ) +// config.URL = server.URL +// systemClock := mock_clock.NewMockClock(mockCtrl) +// systemClock.EXPECT().Sleep(time.Second).Times(1) +// remote := Remote{client: server.Client(), config: config, clock: systemClock} +// +// request, _ := remote.prepareRequest(from, until, target) +// actual, isRemoteAvailable, err := remote.makeRequestWithRetries( +// request, +// remote.config.Timeout, +// remote.config.RetrySeconds, +// ) +// So(err, ShouldBeNil) +// So(isRemoteAvailable, ShouldBeTrue) +// So(actual, ShouldResemble, body) +// } +// }) +// } +// }) +//} func createServer(body []byte, statusCode int) *httptest.Server { return httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { diff --git a/metric_source/retries/config.go b/metric_source/retries/config.go index 9584a115c..f32cd0d44 100644 --- a/metric_source/retries/config.go +++ b/metric_source/retries/config.go @@ -1,6 +1,9 @@ package retries -import "time" +import ( + "errors" + "time" +) // Config for exponential backoff retries. type Config struct { @@ -20,3 +23,28 @@ type Config struct { // MaxRetriesCount is the amount of allowed retries. So at most MaxRetriesCount will be performed. MaxRetriesCount uint64 } + +var ( + errNoInitialInterval = errors.New("initial_interval must be specified and can't be 0") + errNoMaxInterval = errors.New("max_interval must be specified and can't be 0") + errNoMaxElapsedTimeAndMaxRetriesCount = errors.New("at least one of max_elapsed_time, max_retries_count must be specified") +) + +// Validate checks that retries Config has all necessary fields. +func (conf Config) Validate() error { + resErrors := make([]error, 0) + + if conf.InitialInterval == 0 { + resErrors = append(resErrors, errNoInitialInterval) + } + + if conf.MaxInterval == 0 { + resErrors = append(resErrors, errNoMaxInterval) + } + + if conf.MaxElapsedTime == 0 && conf.MaxRetriesCount == 0 { + resErrors = append(resErrors, errNoMaxElapsedTimeAndMaxRetriesCount) + } + + return errors.Join(resErrors...) +} diff --git a/metric_source/retries/config_test.go b/metric_source/retries/config_test.go new file mode 100644 index 000000000..a20e75a12 --- /dev/null +++ b/metric_source/retries/config_test.go @@ -0,0 +1,78 @@ +package retries + +import ( + "errors" + "fmt" + "testing" + "time" + + . "github.com/smartystreets/goconvey/convey" +) + +func TestConfig_Validate(t *testing.T) { + Convey("Test validating retries config", t, func() { + type testcase struct { + caseDesc string + conf Config + expectedErr error + } + + var ( + testRetriesCount uint64 = 10 + testMaxElapsedTIme = time.Second * 10 + ) + + cases := []testcase{ + { + caseDesc: "with empty config", + conf: Config{}, + expectedErr: errors.Join(errNoInitialInterval, errNoMaxInterval, errNoMaxElapsedTimeAndMaxRetriesCount), + }, + { + caseDesc: "with only InitialInterval set", + conf: Config{ + InitialInterval: testInitialInterval, + }, + expectedErr: errors.Join(errNoMaxInterval, errNoMaxElapsedTimeAndMaxRetriesCount), + }, + { + caseDesc: "with only MaxInterval set", + conf: Config{ + MaxInterval: testMaxInterval, + }, + expectedErr: errors.Join(errNoInitialInterval, errNoMaxElapsedTimeAndMaxRetriesCount), + }, + { + caseDesc: "with only MaxRetriesCount set", + conf: Config{ + MaxRetriesCount: testRetriesCount, + }, + expectedErr: errors.Join(errNoInitialInterval, errNoMaxInterval), + }, + { + caseDesc: "with only MaxElapsedTime set", + conf: Config{ + MaxElapsedTime: testMaxElapsedTIme, + }, + expectedErr: errors.Join(errNoInitialInterval, errNoMaxInterval), + }, + { + caseDesc: "with valid config", + conf: Config{ + InitialInterval: testInitialInterval, + MaxInterval: testMaxInterval, + MaxElapsedTime: testMaxElapsedTIme, + }, + expectedErr: nil, + }, + } + + for i := range cases { + Convey(fmt.Sprintf("Case %d: %s", i+1, cases[i].caseDesc), func() { + err := cases[i].conf.Validate() + + So(err, ShouldResemble, cases[i].expectedErr) + }) + } + }) +} From 0edd1c561b9f53f7782514760f6eb4ab07357e7e Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Fri, 11 Oct 2024 19:57:58 +0700 Subject: [PATCH 18/28] tests: fix for requests --- metric_source/remote/request.go | 10 +- metric_source/remote/request_test.go | 471 +++++++++++++++------------ 2 files changed, 271 insertions(+), 210 deletions(-) diff --git a/metric_source/remote/request.go b/metric_source/remote/request.go index 42b1ee7e8..13f3c5cf1 100644 --- a/metric_source/remote/request.go +++ b/metric_source/remote/request.go @@ -48,12 +48,10 @@ type requestToRemoteGraphite struct { } func (r requestToRemoteGraphite) DoRetryableOperation() ([]byte, error) { - req := r.request - if r.requestTimeout > 0 { - ctx, cancel := context.WithTimeout(context.Background(), r.requestTimeout) - defer cancel() - req = r.request.WithContext(ctx) - } + ctx, cancel := context.WithTimeout(context.Background(), r.requestTimeout) + defer cancel() + + req := r.request.WithContext(ctx) resp, err := r.client.Do(req) if resp != nil { diff --git a/metric_source/remote/request_test.go b/metric_source/remote/request_test.go index 42c155ae9..8d88bb455 100644 --- a/metric_source/remote/request_test.go +++ b/metric_source/remote/request_test.go @@ -1,10 +1,16 @@ package remote import ( + "fmt" + "github.com/cenkalti/backoff/v4" + "github.com/moira-alert/moira/metric_source/retries" + mock_clock "github.com/moira-alert/moira/mock/clock" . "github.com/smartystreets/goconvey/convey" + "go.uber.org/mock/gomock" "net/http" "net/http/httptest" "testing" + "time" ) func TestPrepareRequest(t *testing.T) { @@ -42,210 +48,267 @@ func TestPrepareRequest(t *testing.T) { }) } -//func TestMakeRequest(t *testing.T) { -// var from int64 = 300 -// var until int64 = 500 -// target := "foo.bar" -// body := []byte("Some string") -// -// Convey("Client returns status OK", t, func() { -// server := createServer(body, http.StatusOK) -// remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} -// request, _ := remote.prepareRequest(from, until, target) -// actual, isRemoteAvailable, err := remote.makeRequest(request) -// So(err, ShouldBeNil) -// So(isRemoteAvailable, ShouldBeTrue) -// So(actual, ShouldResemble, body) -// }) -// -// Convey("Client returns status InternalServerError", t, func() { -// server := createServer(body, http.StatusInternalServerError) -// remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} -// request, _ := remote.prepareRequest(from, until, target) -// actual, isRemoteAvailable, err := remote.makeRequest(request) -// So(err, ShouldResemble, fmt.Errorf("bad response status %d: %s", http.StatusInternalServerError, string(body))) -// So(isRemoteAvailable, ShouldBeTrue) -// So(actual, ShouldResemble, body) -// }) -// -// Convey("Client calls bad url", t, func() { -// server := createServer(body, http.StatusOK) -// client := server.Client() -// client.Timeout = time.Millisecond -// remote := Remote{client: client, config: &Config{URL: "http://bad/"}} -// request, _ := remote.prepareRequest(from, until, target) -// actual, isRemoteAvailable, err := remote.makeRequest(request) -// So(err, ShouldNotBeEmpty) -// So(isRemoteAvailable, ShouldBeFalse) -// So(actual, ShouldBeEmpty) -// }) -// -// Convey("Client returns status Remote Unavailable status codes", t, func() { -// for statusCode := range remoteUnavailableStatusCodes { -// server := createServer(body, statusCode) -// remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} -// request, _ := remote.prepareRequest(from, until, target) -// actual, isRemoteAvailable, err := remote.makeRequest(request) -// So(err, ShouldResemble, fmt.Errorf( -// "the remote server is not available. Response status %d: %s", statusCode, string(body), -// )) -// So(isRemoteAvailable, ShouldBeFalse) -// So(actual, ShouldResemble, body) -// } -// }) -//} - -//func TestMakeRequestWithRetries(t *testing.T) { -// var from int64 = 300 -// var until int64 = 500 -// target := "foo.bar" -// body := []byte("Some string") -// testConfigs := []*Config{ -// {}, -// {RetrySeconds: []time.Duration{time.Second}}, -// {RetrySeconds: []time.Duration{time.Second}}, -// {RetrySeconds: []time.Duration{time.Second, time.Second}}, -// {RetrySeconds: []time.Duration{time.Second, time.Second}}, -// {RetrySeconds: []time.Duration{time.Second, time.Second, time.Second}}, -// } -// mockCtrl := gomock.NewController(t) -// -// Convey("Given server returns OK response", t, func() { -// server := createTestServer(TestResponse{body, http.StatusOK}) -// systemClock := mock_clock.NewMockClock(mockCtrl) -// systemClock.EXPECT().Sleep(time.Second).Times(0) -// -// Convey("request is successful", func() { -// remote := Remote{client: server.Client(), clock: systemClock} -// -// for _, config := range testConfigs { -// config.URL = server.URL -// remote.config = config -// request, _ := remote.prepareRequest(from, until, target) -// actual, isRemoteAvailable, err := remote.makeRequestWithRetries( -// request, -// remote.config.Timeout, -// remote.config.RetrySeconds, -// ) -// So(err, ShouldBeNil) -// So(isRemoteAvailable, ShouldBeTrue) -// So(actual, ShouldResemble, body) -// } -// }) -// }) -// -// Convey("Given server returns 500 response", t, func() { -// server := createTestServer(TestResponse{body, http.StatusInternalServerError}) -// systemClock := mock_clock.NewMockClock(mockCtrl) -// systemClock.EXPECT().Sleep(time.Second).Times(0) -// -// Convey("request failed with 500 response and remote is available", func() { -// remote := Remote{client: server.Client(), clock: systemClock} -// -// for _, config := range testConfigs { -// config.URL = server.URL -// remote.config = config -// request, _ := remote.prepareRequest(from, until, target) -// actual, isRemoteAvailable, err := remote.makeRequestWithRetries( -// request, -// remote.config.Timeout, -// remote.config.RetrySeconds, -// ) -// So(err, ShouldResemble, fmt.Errorf("bad response status %d: %s", http.StatusInternalServerError, string(body))) -// So(isRemoteAvailable, ShouldBeTrue) -// So(actual, ShouldResemble, body) -// } -// }) -// }) -// -// Convey("Given client calls bad url", t, func() { -// server := createTestServer(TestResponse{body, http.StatusOK}) -// -// Convey("request failed and remote is unavailable", func() { -// remote := Remote{client: server.Client()} -// for _, config := range testConfigs { -// config.URL = "http://bad/" -// remote.config = config -// systemClock := mock_clock.NewMockClock(mockCtrl) -// systemClock.EXPECT().Sleep(time.Second).Times(len(config.RetrySeconds)) -// remote.clock = systemClock -// -// request, _ := remote.prepareRequest(from, until, target) -// actual, isRemoteAvailable, err := remote.makeRequestWithRetries( -// request, -// time.Millisecond, -// remote.config.RetrySeconds, -// ) -// So(err, ShouldNotBeEmpty) -// So(isRemoteAvailable, ShouldBeFalse) -// So(actual, ShouldBeEmpty) -// } -// }) -// }) -// -// Convey("Given server returns Remote Unavailable responses permanently", t, func() { -// for statusCode := range remoteUnavailableStatusCodes { -// server := createTestServer(TestResponse{body, statusCode}) -// -// Convey(fmt.Sprintf( -// "request failed with %d response status code and remote is unavailable", statusCode, -// ), func() { -// remote := Remote{client: server.Client()} -// for _, config := range testConfigs { -// config.URL = server.URL -// remote.config = config -// systemClock := mock_clock.NewMockClock(mockCtrl) -// systemClock.EXPECT().Sleep(time.Second).Times(len(config.RetrySeconds)) -// remote.clock = systemClock -// -// request, _ := remote.prepareRequest(from, until, target) -// actual, isRemoteAvailable, err := remote.makeRequestWithRetries( -// request, -// remote.config.Timeout, -// remote.config.RetrySeconds, -// ) -// So(err, ShouldResemble, fmt.Errorf( -// "the remote server is not available. Response status %d: %s", statusCode, string(body), -// )) -// So(isRemoteAvailable, ShouldBeFalse) -// So(actual, ShouldBeNil) -// } -// }) -// } -// }) -// -// Convey("Given server returns Remote Unavailable response temporary", t, func() { -// for statusCode := range remoteUnavailableStatusCodes { -// Convey(fmt.Sprintf( -// "request is successful with retry after %d response and remote is available", statusCode, -// ), func() { -// for _, config := range testConfigs { -// if len(config.RetrySeconds) == 0 { -// continue -// } -// server := createTestServer( -// TestResponse{body, statusCode}, -// TestResponse{body, http.StatusOK}, -// ) -// config.URL = server.URL -// systemClock := mock_clock.NewMockClock(mockCtrl) -// systemClock.EXPECT().Sleep(time.Second).Times(1) -// remote := Remote{client: server.Client(), config: config, clock: systemClock} -// -// request, _ := remote.prepareRequest(from, until, target) -// actual, isRemoteAvailable, err := remote.makeRequestWithRetries( -// request, -// remote.config.Timeout, -// remote.config.RetrySeconds, -// ) -// So(err, ShouldBeNil) -// So(isRemoteAvailable, ShouldBeTrue) -// So(actual, ShouldResemble, body) -// } -// }) -// } -// }) -//} +func Test_requestToRemoteGraphite_DoRetryableOperation(t *testing.T) { + var from int64 = 300 + var until int64 = 500 + target := "foo.bar" + body := []byte("Some string") + + testTimeout := time.Millisecond * 10 + + Convey("Client returns status OK", t, func() { + server := createServer(body, http.StatusOK) + remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} + request, _ := remote.prepareRequest(from, until, target) + + retryableOp := requestToRemoteGraphite{ + request: request, + client: remote.client, + requestTimeout: testTimeout, + } + + actual, err := retryableOp.DoRetryableOperation() + + So(err, ShouldBeNil) + So(actual, ShouldResemble, body) + }) + + Convey("Client returns status InternalServerError", t, func() { + server := createServer(body, http.StatusInternalServerError) + remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} + request, _ := remote.prepareRequest(from, until, target) + + retryableOp := requestToRemoteGraphite{ + request: request, + client: remote.client, + requestTimeout: testTimeout, + } + + actual, err := retryableOp.DoRetryableOperation() + + So(err, ShouldResemble, backoff.Permanent(errInvalidRequest{ + internalErr: fmt.Errorf("bad response status %d: %s", http.StatusInternalServerError, string(body)), + })) + So(actual, ShouldResemble, body) + }) + + Convey("Client calls bad url", t, func() { + server := createServer(body, http.StatusOK) + client := server.Client() + remote := Remote{client: client, config: &Config{URL: "http://bad/"}} + request, _ := remote.prepareRequest(from, until, target) + + retryableOp := requestToRemoteGraphite{ + request: request, + client: remote.client, + requestTimeout: testTimeout, + } + + actual, err := retryableOp.DoRetryableOperation() + + So(err, ShouldHaveSameTypeAs, errRemoteUnavailable{}) + So(actual, ShouldBeEmpty) + }) + + Convey("Client returns status Remote Unavailable status codes", t, func() { + for statusCode := range remoteUnavailableStatusCodes { + server := createServer(body, statusCode) + remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} + request, _ := remote.prepareRequest(from, until, target) + + retryableOp := requestToRemoteGraphite{ + request: request, + client: remote.client, + requestTimeout: testTimeout, + } + + actual, err := retryableOp.DoRetryableOperation() + + So(err, ShouldResemble, errRemoteUnavailable{ + internalErr: fmt.Errorf( + "the remote server is not available. Response status %d: %s", statusCode, string(body)), + }) + So(actual, ShouldResemble, body) + } + }) +} + +func TestMakeRequestWithRetries(t *testing.T) { + var from int64 = 300 + var until int64 = 500 + target := "foo.bar" + body := []byte("Some string") + testConfigs := []*Config{ + { + Timeout: time.Second, + Retries: retries.Config{ + InitialInterval: time.Millisecond, + RandomizationFactor: 0.5, + Multiplier: 2, + MaxInterval: time.Millisecond * 20, + MaxRetriesCount: 2, + }, + }, + } + mockCtrl := gomock.NewController(t) + retrier := retries.NewStandardRetrier[[]byte]() + + Convey("Given server returns OK response", t, func() { + server := createTestServer(TestResponse{body, http.StatusOK}) + systemClock := mock_clock.NewMockClock(mockCtrl) + systemClock.EXPECT().Sleep(time.Second).Times(0) + + Convey("request is successful", func() { + remote := Remote{ + client: server.Client(), + retrier: retrier, + } + + for _, config := range testConfigs { + config.URL = server.URL + remote.config = config + request, _ := remote.prepareRequest(from, until, target) + backoffPolicy := retries.NewExponentialBackoffFactory(config.Retries).NewBackOff() + + actual, err := remote.makeRequest( + request, + remote.config.Timeout, + backoffPolicy, + ) + + So(err, ShouldBeNil) + So(actual, ShouldResemble, body) + } + }) + }) + + Convey("Given server returns 500 response", t, func() { + server := createTestServer(TestResponse{body, http.StatusInternalServerError}) + expectedErr := errInvalidRequest{ + internalErr: fmt.Errorf("bad response status %d: %s", http.StatusInternalServerError, string(body)), + } + //systemClock.EXPECT().Sleep(time.Second).Times(0) + + Convey("request failed with 500 response and remote is available", func() { + remote := Remote{ + client: server.Client(), + retrier: retrier, + } + + for _, config := range testConfigs { + config.URL = server.URL + remote.config = config + request, _ := remote.prepareRequest(from, until, target) + backoffPolicy := retries.NewExponentialBackoffFactory(config.Retries).NewBackOff() + + actual, err := remote.makeRequest( + request, + remote.config.Timeout, + backoffPolicy, + ) + + So(err, ShouldResemble, expectedErr) + So(actual, ShouldResemble, body) + } + }) + }) + + Convey("Given client calls bad url", t, func() { + server := createTestServer(TestResponse{body, http.StatusOK}) + + Convey("request failed and remote is unavailable", func() { + remote := Remote{ + client: server.Client(), + retrier: retrier, + } + + for _, config := range testConfigs { + config.URL = "http://bad/" + remote.config = config + + request, _ := remote.prepareRequest(from, until, target) + backoffPolicy := retries.NewExponentialBackoffFactory(config.Retries).NewBackOff() + + actual, err := remote.makeRequest( + request, + remote.config.Timeout, + backoffPolicy, + ) + + So(err, ShouldHaveSameTypeAs, errRemoteUnavailable{}) + So(actual, ShouldBeEmpty) + } + }) + }) + + Convey("Given server returns Remote Unavailable responses permanently", t, func() { + for statusCode := range remoteUnavailableStatusCodes { + server := createTestServer(TestResponse{body, statusCode}) + + Convey(fmt.Sprintf( + "request failed with %d response status code and remote is unavailable", statusCode, + ), func() { + remote := Remote{ + client: server.Client(), + retrier: retrier, + } + + for _, config := range testConfigs { + config.URL = server.URL + remote.config = config + + request, _ := remote.prepareRequest(from, until, target) + backoffPolicy := retries.NewExponentialBackoffFactory(config.Retries).NewBackOff() + + actual, err := remote.makeRequest( + request, + remote.config.Timeout, + backoffPolicy, + ) + + So(err, ShouldResemble, errRemoteUnavailable{ + internalErr: fmt.Errorf( + "the remote server is not available. Response status %d: %s", statusCode, string(body), + )}) + So(actual, ShouldResemble, body) + } + }) + } + }) + + Convey("Given server returns Remote Unavailable response temporary", t, func() { + for statusCode := range remoteUnavailableStatusCodes { + Convey(fmt.Sprintf( + "request is successful with retry after %d response and remote is available", statusCode, + ), func() { + for _, config := range testConfigs { + server := createTestServer( + TestResponse{body, statusCode}, + TestResponse{body, http.StatusOK}, + ) + config.URL = server.URL + remote := Remote{ + client: server.Client(), + config: config, + retrier: retrier, + } + + request, _ := remote.prepareRequest(from, until, target) + backoffPolicy := retries.NewExponentialBackoffFactory(config.Retries).NewBackOff() + + actual, err := remote.makeRequest( + request, + remote.config.Timeout, + backoffPolicy, + ) + + So(err, ShouldBeNil) + So(actual, ShouldResemble, body) + } + }) + } + }) +} func createServer(body []byte, statusCode int) *httptest.Server { return httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { From ab989634fe53809085418a00f459e993df284740 Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Tue, 15 Oct 2024 10:18:04 +0700 Subject: [PATCH 19/28] refactor: add testcases to config_test for remote source, use linter --- metric_source/remote/config.go | 3 +- metric_source/remote/config_test.go | 33 ++++++++++++++++++- metric_source/remote/remote.go | 7 ++-- metric_source/remote/request.go | 5 ++- metric_source/remote/request_test.go | 14 ++++---- metric_source/retries/backoff_factory_test.go | 3 +- metric_source/retries/retrier_test.go | 7 ++-- 7 files changed, 53 insertions(+), 19 deletions(-) diff --git a/metric_source/remote/config.go b/metric_source/remote/config.go index 2fbb0c29d..4eb7f43c3 100644 --- a/metric_source/remote/config.go +++ b/metric_source/remote/config.go @@ -2,8 +2,9 @@ package remote import ( "errors" - "github.com/moira-alert/moira/metric_source/retries" "time" + + "github.com/moira-alert/moira/metric_source/retries" ) // Config represents config from remote storage. diff --git a/metric_source/remote/config_test.go b/metric_source/remote/config_test.go index ddf5f41ee..3d58fd270 100644 --- a/metric_source/remote/config_test.go +++ b/metric_source/remote/config_test.go @@ -3,10 +3,11 @@ package remote import ( "errors" "fmt" - "github.com/moira-alert/moira/metric_source/retries" "testing" "time" + "github.com/moira-alert/moira/metric_source/retries" + . "github.com/smartystreets/goconvey/convey" ) @@ -44,6 +45,36 @@ func TestConfig_validate(t *testing.T) { }, expectedErr: errors.Join(errBadRemoteUrl, errNoTimeout, errNoHealthcheckTimeout), }, + { + caseDesc: "with retries config set and some url", + conf: Config{ + URL: "http://test-graphite", + Retries: testRetriesConf, + HealthcheckRetries: testRetriesConf, + }, + expectedErr: errors.Join(errNoTimeout, errNoHealthcheckTimeout), + }, + { + caseDesc: "with retries config set, some url, timeout", + conf: Config{ + Timeout: time.Second, + URL: "http://test-graphite", + Retries: testRetriesConf, + HealthcheckRetries: testRetriesConf, + }, + expectedErr: errors.Join(errNoHealthcheckTimeout), + }, + { + caseDesc: "with valid config", + conf: Config{ + Timeout: time.Second, + HealthcheckTimeout: time.Millisecond, + URL: "http://test-graphite", + Retries: testRetriesConf, + HealthcheckRetries: testRetriesConf, + }, + expectedErr: nil, + }, } for i := range cases { diff --git a/metric_source/remote/remote.go b/metric_source/remote/remote.go index 80c437383..e68fa055d 100644 --- a/metric_source/remote/remote.go +++ b/metric_source/remote/remote.go @@ -1,20 +1,17 @@ package remote import ( - "fmt" - "github.com/moira-alert/moira/metric_source/retries" "net/http" "time" + "github.com/moira-alert/moira/metric_source/retries" + "github.com/moira-alert/moira/clock" "github.com/moira-alert/moira" metricSource "github.com/moira-alert/moira/metric_source" ) -// ErrRemoteStorageDisabled is used to prevent remote.Fetch calls when remote storage is disabled. -var ErrRemoteStorageDisabled = fmt.Errorf("remote graphite storage is not enabled") - // ErrRemoteTriggerResponse is a custom error when remote trigger check fails. type ErrRemoteTriggerResponse struct { InternalError error diff --git a/metric_source/remote/request.go b/metric_source/remote/request.go index 13f3c5cf1..54696f7da 100644 --- a/metric_source/remote/request.go +++ b/metric_source/remote/request.go @@ -4,11 +4,12 @@ import ( "context" "errors" "fmt" - "github.com/cenkalti/backoff/v4" "io" "net/http" "strconv" "time" + + "github.com/cenkalti/backoff/v4" ) func (remote *Remote) prepareRequest(from, until int64, target string) (*http.Request, error) { @@ -41,12 +42,14 @@ func (remote *Remote) makeRequest(req *http.Request, timeout time.Duration, back backoffPolicy) } +// requestToRemoteGraphite implements retries.RetryableOperation. type requestToRemoteGraphite struct { client *http.Client request *http.Request requestTimeout time.Duration } +// DoRetryableOperation is a one attempt of performing request to remote graphite. func (r requestToRemoteGraphite) DoRetryableOperation() ([]byte, error) { ctx, cancel := context.WithTimeout(context.Background(), r.requestTimeout) defer cancel() diff --git a/metric_source/remote/request_test.go b/metric_source/remote/request_test.go index 8d88bb455..7010c15c7 100644 --- a/metric_source/remote/request_test.go +++ b/metric_source/remote/request_test.go @@ -2,15 +2,16 @@ package remote import ( "fmt" + "net/http" + "net/http/httptest" + "testing" + "time" + "github.com/cenkalti/backoff/v4" "github.com/moira-alert/moira/metric_source/retries" mock_clock "github.com/moira-alert/moira/mock/clock" . "github.com/smartystreets/goconvey/convey" "go.uber.org/mock/gomock" - "net/http" - "net/http/httptest" - "testing" - "time" ) func TestPrepareRequest(t *testing.T) { @@ -187,7 +188,7 @@ func TestMakeRequestWithRetries(t *testing.T) { expectedErr := errInvalidRequest{ internalErr: fmt.Errorf("bad response status %d: %s", http.StatusInternalServerError, string(body)), } - //systemClock.EXPECT().Sleep(time.Second).Times(0) + // systemClock.EXPECT().Sleep(time.Second).Times(0) Convey("request failed with 500 response and remote is available", func() { remote := Remote{ @@ -269,7 +270,8 @@ func TestMakeRequestWithRetries(t *testing.T) { So(err, ShouldResemble, errRemoteUnavailable{ internalErr: fmt.Errorf( "the remote server is not available. Response status %d: %s", statusCode, string(body), - )}) + ), + }) So(actual, ShouldResemble, body) } }) diff --git a/metric_source/retries/backoff_factory_test.go b/metric_source/retries/backoff_factory_test.go index 0799ea310..50d12cef1 100644 --- a/metric_source/retries/backoff_factory_test.go +++ b/metric_source/retries/backoff_factory_test.go @@ -1,11 +1,12 @@ package retries import ( - "github.com/cenkalti/backoff/v4" "sync" "testing" "time" + "github.com/cenkalti/backoff/v4" + . "github.com/smartystreets/goconvey/convey" ) diff --git a/metric_source/retries/retrier_test.go b/metric_source/retries/retrier_test.go index 612fd69a4..360771d86 100644 --- a/metric_source/retries/retrier_test.go +++ b/metric_source/retries/retrier_test.go @@ -2,9 +2,10 @@ package retries import ( "errors" - "github.com/cenkalti/backoff/v4" "testing" + "github.com/cenkalti/backoff/v4" + . "github.com/smartystreets/goconvey/convey" ) @@ -150,9 +151,7 @@ func newStubRetryableOperation[T any](pairs []retPair[T]) *stubRetryableOperatio } } -var ( - errStubValuesEnded = errors.New("prepared return values and errors for stub ended") -) +var errStubValuesEnded = errors.New("prepared return values and errors for stub ended") func (stub *stubRetryableOperation[T]) DoRetryableOperation() (T, error) { stub.calls += 1 From acc1c6798056d619fc7a218a9f285d1e20cdc4c7 Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Tue, 15 Oct 2024 11:09:06 +0700 Subject: [PATCH 20/28] refactor: fix remote tests, clock interface --- clock/clock.go | 7 +- cmd/config.go | 5 +- interfaces.go | 1 - metric_source/remote/remote.go | 4 - metric_source/remote/remote_test.go | 451 ++++++++++++++++----------- metric_source/remote/request_test.go | 6 +- mock/clock/clock.go | 12 - 7 files changed, 266 insertions(+), 220 deletions(-) diff --git a/clock/clock.go b/clock/clock.go index d912534d3..b527ea462 100644 --- a/clock/clock.go +++ b/clock/clock.go @@ -15,12 +15,7 @@ func (t *SystemClock) NowUTC() time.Time { return time.Now().UTC() } -// Sleep pauses the current goroutine for at least the passed duration. -func (t *SystemClock) Sleep(duration time.Duration) { - time.Sleep(duration) -} - -// NowUnix returns now time.Time as a Unix time. +// NowUnix returns current time in a Unix time format. func (t *SystemClock) NowUnix() int64 { return time.Now().Unix() } diff --git a/cmd/config.go b/cmd/config.go index 6b74b98bb..8bc59b07d 100644 --- a/cmd/config.go +++ b/cmd/config.go @@ -3,11 +3,12 @@ package cmd import ( "errors" "fmt" + "os" + "strings" + "github.com/moira-alert/moira" "github.com/moira-alert/moira/metric_source/retries" "github.com/moira-alert/moira/metrics" - "os" - "strings" "github.com/moira-alert/moira/image_store/s3" prometheusRemoteSource "github.com/moira-alert/moira/metric_source/prometheus" diff --git a/interfaces.go b/interfaces.go index f23f415d5..94fc776bd 100644 --- a/interfaces.go +++ b/interfaces.go @@ -229,6 +229,5 @@ type PlotTheme interface { // Clock is an interface to work with Time. type Clock interface { NowUTC() time.Time - Sleep(duration time.Duration) NowUnix() int64 } diff --git a/metric_source/remote/remote.go b/metric_source/remote/remote.go index e68fa055d..274a6630f 100644 --- a/metric_source/remote/remote.go +++ b/metric_source/remote/remote.go @@ -6,8 +6,6 @@ import ( "github.com/moira-alert/moira/metric_source/retries" - "github.com/moira-alert/moira/clock" - "github.com/moira-alert/moira" metricSource "github.com/moira-alert/moira/metric_source" ) @@ -38,7 +36,6 @@ func (err ErrRemoteUnavailable) Error() string { type Remote struct { config *Config client *http.Client - clock moira.Clock retrier retries.Retrier[[]byte] requestBackoffFactory retries.BackoffFactory healthcheckBackoffFactory retries.BackoffFactory @@ -53,7 +50,6 @@ func Create(config *Config) (metricSource.MetricSource, error) { return &Remote{ config: config, client: &http.Client{}, - clock: clock.NewSystemClock(), retrier: retries.NewStandardRetrier[[]byte](), requestBackoffFactory: retries.NewExponentialBackoffFactory(config.Retries), healthcheckBackoffFactory: retries.NewExponentialBackoffFactory(config.HealthcheckRetries), diff --git a/metric_source/remote/remote_test.go b/metric_source/remote/remote_test.go index 714431a10..b2758b479 100644 --- a/metric_source/remote/remote_test.go +++ b/metric_source/remote/remote_test.go @@ -1,192 +1,263 @@ package remote -//func TestIsRemoteAvailable(t *testing.T) { -// mockCtrl := gomock.NewController(t) -// systemClock := mock_clock.NewMockClock(mockCtrl) -// systemClock.EXPECT().Sleep(time.Second).Times(0) -// testConfigs := []*Config{ -// {}, -// {HealthCheckRetrySeconds: []time.Duration{time.Second}}, -// {HealthCheckRetrySeconds: []time.Duration{time.Second}}, -// {HealthCheckRetrySeconds: []time.Duration{time.Second, time.Second}}, -// {HealthCheckRetrySeconds: []time.Duration{time.Second, time.Second}}, -// {HealthCheckRetrySeconds: []time.Duration{time.Second, time.Second, time.Second}}, -// } -// body := []byte("Some string") -// -// Convey("Given server returns OK response the remote is available", t, func() { -// server := createServer(body, http.StatusOK) -// for _, config := range testConfigs { -// config.URL = server.URL -// remote := Remote{client: server.Client(), config: config} -// isAvailable, err := remote.IsAvailable() -// So(isAvailable, ShouldBeTrue) -// So(err, ShouldBeEmpty) -// } -// }) -// -// Convey("Given server returns Remote Unavailable responses permanently", t, func() { -// for statusCode := range remoteUnavailableStatusCodes { -// server := createTestServer(TestResponse{body, statusCode}) -// -// Convey(fmt.Sprintf( -// "request failed with %d response status code and remote is unavailable", statusCode, -// ), func() { -// remote := Remote{client: server.Client()} -// for _, config := range testConfigs { -// config.URL = server.URL -// remote.config = config -// systemClock := mock_clock.NewMockClock(mockCtrl) -// systemClock.EXPECT().Sleep(time.Second).Times(len(config.HealthCheckRetrySeconds)) -// remote.clock = systemClock -// -// isAvailable, err := remote.IsAvailable() -// So(err, ShouldResemble, fmt.Errorf( -// "the remote server is not available. Response status %d: %s", statusCode, string(body), -// )) -// So(isAvailable, ShouldBeFalse) -// } -// }) -// } -// }) -// -// Convey("Given server returns Remote Unavailable response temporary", t, func() { -// for statusCode := range remoteUnavailableStatusCodes { -// Convey(fmt.Sprintf( -// "the remote is available with retry after %d response", statusCode, -// ), func() { -// for _, config := range testConfigs { -// if len(config.HealthCheckRetrySeconds) == 0 { -// continue -// } -// server := createTestServer( -// TestResponse{body, statusCode}, -// TestResponse{body, http.StatusOK}, -// ) -// config.URL = server.URL -// systemClock := mock_clock.NewMockClock(mockCtrl) -// systemClock.EXPECT().Sleep(time.Second).Times(1) -// remote := Remote{client: server.Client(), config: config, clock: systemClock} -// -// isAvailable, err := remote.IsAvailable() -// So(err, ShouldBeNil) -// So(isAvailable, ShouldBeTrue) -// } -// }) -// } -// }) -//} -// -//func TestFetch(t *testing.T) { -// var from int64 = 300 -// var until int64 = 500 -// target := "foo.bar" //nolint -// testConfigs := []*Config{ -// {}, -// {RetrySeconds: []time.Duration{time.Second}}, -// {RetrySeconds: []time.Duration{time.Second}}, -// {RetrySeconds: []time.Duration{time.Second, time.Second}}, -// {RetrySeconds: []time.Duration{time.Second, time.Second}}, -// {RetrySeconds: []time.Duration{time.Second, time.Second, time.Second}}, -// } -// mockCtrl := gomock.NewController(t) -// validBody := []byte("[{\"Target\": \"t1\",\"DataPoints\":[[1,2],[3,4]]}]") -// -// Convey("Request success but body is invalid", t, func() { -// server := createServer([]byte("[]"), http.StatusOK) -// remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} -// result, err := remote.Fetch(target, from, until, false) -// So(result, ShouldResemble, &FetchResult{MetricsData: []metricSource.MetricData{}}) -// So(err, ShouldBeEmpty) -// }) -// -// Convey("Request success but body is invalid", t, func() { -// server := createServer([]byte("Some string"), http.StatusOK) -// remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} -// result, err := remote.Fetch(target, from, until, false) -// So(result, ShouldBeEmpty) -// So(err.Error(), ShouldResemble, "invalid character 'S' looking for beginning of value") -// So(err, ShouldHaveSameTypeAs, ErrRemoteTriggerResponse{}) -// }) -// -// Convey("Fail request with InternalServerError", t, func() { -// server := createServer([]byte("Some string"), http.StatusInternalServerError) -// remote := Remote{client: server.Client()} -// for _, config := range testConfigs { -// config.URL = server.URL -// remote.config = config -// result, err := remote.Fetch(target, from, until, false) -// So(result, ShouldBeEmpty) -// So(err.Error(), ShouldResemble, fmt.Sprintf("bad response status %d: %s", http.StatusInternalServerError, "Some string")) -// So(err, ShouldHaveSameTypeAs, ErrRemoteTriggerResponse{}) -// } -// }) -// -// Convey("Client calls bad url", t, func() { -// url := "💩%$&TR" -// for _, config := range testConfigs { -// config.URL = url -// remote := Remote{config: config} -// result, err := remote.Fetch(target, from, until, false) -// So(result, ShouldBeEmpty) -// So(err.Error(), ShouldResemble, "parse \"💩%$&TR\": invalid URL escape \"%$&\"") -// So(err, ShouldHaveSameTypeAs, ErrRemoteTriggerResponse{}) -// } -// }) -// -// Convey("Given server returns Remote Unavailable responses permanently", t, func() { -// for statusCode := range remoteUnavailableStatusCodes { -// server := createTestServer(TestResponse{validBody, statusCode}) -// -// Convey(fmt.Sprintf( -// "request failed with %d response status code and remote is unavailable", statusCode, -// ), func() { -// remote := Remote{client: server.Client()} -// for _, config := range testConfigs { -// config.URL = server.URL -// remote.config = config -// systemClock := mock_clock.NewMockClock(mockCtrl) -// systemClock.EXPECT().Sleep(time.Second).Times(len(config.RetrySeconds)) -// remote.clock = systemClock -// -// result, err := remote.Fetch(target, from, until, false) -// So(err, ShouldResemble, ErrRemoteUnavailable{ -// InternalError: fmt.Errorf( -// "the remote server is not available. Response status %d: %s", statusCode, string(validBody), -// ), Target: target, -// }) -// So(result, ShouldBeNil) -// } -// }) -// } -// }) -// -// Convey("Given server returns Remote Unavailable response temporary", t, func() { -// for statusCode := range remoteUnavailableStatusCodes { -// Convey(fmt.Sprintf( -// "the remote is available with retry after %d response", statusCode, -// ), func() { -// for _, config := range testConfigs { -// if len(config.RetrySeconds) == 0 { -// continue -// } -// server := createTestServer( -// TestResponse{validBody, statusCode}, -// TestResponse{validBody, http.StatusOK}, -// ) -// config.URL = server.URL -// systemClock := mock_clock.NewMockClock(mockCtrl) -// systemClock.EXPECT().Sleep(time.Second).Times(1) -// remote := Remote{client: server.Client(), config: config, clock: systemClock} -// -// result, err := remote.Fetch(target, from, until, false) -// So(err, ShouldBeNil) -// So(result, ShouldNotBeNil) -// metricsData := result.GetMetricsData() -// So(len(metricsData), ShouldEqual, 1) -// So(metricsData[0].Name, ShouldEqual, "t1") -// } -// }) -// } -// }) -//} +import ( + "fmt" + "net/http" + "testing" + "time" + + metricSource "github.com/moira-alert/moira/metric_source" + "github.com/moira-alert/moira/metric_source/retries" + + . "github.com/smartystreets/goconvey/convey" +) + +func TestIsRemoteAvailable(t *testing.T) { + testConfigs := []*Config{ + { + HealthcheckTimeout: time.Second, + HealthcheckRetries: retries.Config{ + InitialInterval: time.Millisecond, + RandomizationFactor: 0.5, + Multiplier: 2, + MaxInterval: time.Millisecond * 20, + MaxRetriesCount: 2, + }, + }, + } + body := []byte("Some string") + + retrier := retries.NewStandardRetrier[[]byte]() + + Convey("Given server returns OK response the remote is available", t, func() { + server := createServer(body, http.StatusOK) + for _, config := range testConfigs { + config.URL = server.URL + + remote := Remote{ + client: server.Client(), + config: config, + retrier: retrier, + healthcheckBackoffFactory: retries.NewExponentialBackoffFactory(config.HealthcheckRetries), + } + + isAvailable, err := remote.IsAvailable() + So(isAvailable, ShouldBeTrue) + So(err, ShouldBeEmpty) + } + }) + + Convey("Given server returns Remote Unavailable responses permanently", t, func() { + for statusCode := range remoteUnavailableStatusCodes { + server := createTestServer(TestResponse{body, statusCode}) + + Convey(fmt.Sprintf( + "request failed with %d response status code and remote is unavailable", statusCode, + ), func() { + for _, config := range testConfigs { + config.URL = server.URL + + remote := Remote{ + client: server.Client(), + config: config, + retrier: retrier, + healthcheckBackoffFactory: retries.NewExponentialBackoffFactory(config.HealthcheckRetries), + } + + isAvailable, err := remote.IsAvailable() + So(err, ShouldResemble, ErrRemoteUnavailable{ + InternalError: fmt.Errorf( + "the remote server is not available. Response status %d: %s", statusCode, string(body), + ), + }) + So(isAvailable, ShouldBeFalse) + } + }) + } + }) + + Convey("Given server returns Remote Unavailable response temporary", t, func() { + for statusCode := range remoteUnavailableStatusCodes { + Convey(fmt.Sprintf( + "the remote is available with retry after %d response", statusCode, + ), func() { + for _, config := range testConfigs { + server := createTestServer( + TestResponse{body, statusCode}, + TestResponse{body, http.StatusOK}, + ) + config.URL = server.URL + + remote := Remote{ + client: server.Client(), + config: config, + retrier: retrier, + healthcheckBackoffFactory: retries.NewExponentialBackoffFactory(config.HealthcheckRetries), + } + + isAvailable, err := remote.IsAvailable() + So(err, ShouldBeNil) + So(isAvailable, ShouldBeTrue) + } + }) + } + }) +} + +func TestFetch(t *testing.T) { + var from int64 = 300 + var until int64 = 500 + target := "foo.bar" //nolint + testConfigs := []*Config{ + { + Timeout: time.Second, + Retries: retries.Config{ + InitialInterval: time.Millisecond, + RandomizationFactor: 0.5, + Multiplier: 2, + MaxInterval: time.Millisecond * 20, + MaxRetriesCount: 2, + }, + }, + } + + retrier := retries.NewStandardRetrier[[]byte]() + validBody := []byte("[{\"Target\": \"t1\",\"DataPoints\":[[1,2],[3,4]]}]") + + Convey("Request success but body is invalid", t, func() { + server := createServer([]byte("[]"), http.StatusOK) + + conf := testConfigs[0] + conf.URL = server.URL + + remote := Remote{ + client: server.Client(), + config: conf, + retrier: retrier, + requestBackoffFactory: retries.NewExponentialBackoffFactory(conf.Retries), + } + + result, err := remote.Fetch(target, from, until, false) + So(result, ShouldResemble, &FetchResult{MetricsData: []metricSource.MetricData{}}) + So(err, ShouldBeEmpty) + }) + + Convey("Request success but body is invalid", t, func() { + server := createServer([]byte("Some string"), http.StatusOK) + + conf := testConfigs[0] + conf.URL = server.URL + + remote := Remote{ + client: server.Client(), + config: conf, + retrier: retrier, + requestBackoffFactory: retries.NewExponentialBackoffFactory(conf.Retries), + } + + result, err := remote.Fetch(target, from, until, false) + So(result, ShouldBeEmpty) + So(err.Error(), ShouldResemble, "invalid character 'S' looking for beginning of value") + So(err, ShouldHaveSameTypeAs, ErrRemoteTriggerResponse{}) + }) + + Convey("Fail request with InternalServerError", t, func() { + server := createServer([]byte("Some string"), http.StatusInternalServerError) + for _, config := range testConfigs { + config.URL = server.URL + + remote := Remote{ + client: server.Client(), + config: config, + retrier: retrier, + requestBackoffFactory: retries.NewExponentialBackoffFactory(config.Retries), + } + + result, err := remote.Fetch(target, from, until, false) + + So(result, ShouldBeEmpty) + So(err.Error(), ShouldResemble, fmt.Sprintf("bad response status %d: %s", http.StatusInternalServerError, "Some string")) + So(err, ShouldHaveSameTypeAs, ErrRemoteTriggerResponse{}) + } + }) + + Convey("Client calls bad url", t, func() { + server := createTestServer(TestResponse{[]byte("Some string"), http.StatusOK}) + url := "💩%$&TR" + + for _, config := range testConfigs { + config.URL = url + + remote := Remote{ + client: server.Client(), + config: config, + retrier: retrier, + requestBackoffFactory: retries.NewExponentialBackoffFactory(config.Retries), + } + + result, err := remote.Fetch(target, from, until, false) + So(result, ShouldBeEmpty) + So(err.Error(), ShouldResemble, "parse \"💩%$&TR\": invalid URL escape \"%$&\"") + So(err, ShouldHaveSameTypeAs, ErrRemoteTriggerResponse{}) + } + }) + + Convey("Given server returns Remote Unavailable responses permanently", t, func() { + for statusCode := range remoteUnavailableStatusCodes { + server := createTestServer(TestResponse{validBody, statusCode}) + + Convey(fmt.Sprintf( + "request failed with %d response status code and remote is unavailable", statusCode, + ), func() { + for _, config := range testConfigs { + config.URL = server.URL + remote := Remote{ + client: server.Client(), + config: config, + retrier: retrier, + requestBackoffFactory: retries.NewExponentialBackoffFactory(config.Retries), + } + + result, err := remote.Fetch(target, from, until, false) + So(err, ShouldResemble, ErrRemoteUnavailable{ + InternalError: fmt.Errorf( + "the remote server is not available. Response status %d: %s", statusCode, string(validBody), + ), Target: target, + }) + So(result, ShouldBeNil) + } + }) + } + }) + + Convey("Given server returns Remote Unavailable response temporary", t, func() { + for statusCode := range remoteUnavailableStatusCodes { + Convey(fmt.Sprintf( + "the remote is available with retry after %d response", statusCode, + ), func() { + for _, config := range testConfigs { + server := createTestServer( + TestResponse{validBody, statusCode}, + TestResponse{validBody, http.StatusOK}, + ) + config.URL = server.URL + + remote := Remote{ + client: server.Client(), + config: config, + retrier: retrier, + requestBackoffFactory: retries.NewExponentialBackoffFactory(config.Retries), + } + + result, err := remote.Fetch(target, from, until, false) + So(err, ShouldBeNil) + So(result, ShouldNotBeNil) + + metricsData := result.GetMetricsData() + So(len(metricsData), ShouldEqual, 1) + So(metricsData[0].Name, ShouldEqual, "t1") + } + }) + } + }) +} diff --git a/metric_source/remote/request_test.go b/metric_source/remote/request_test.go index 7010c15c7..96adc0fa2 100644 --- a/metric_source/remote/request_test.go +++ b/metric_source/remote/request_test.go @@ -9,9 +9,7 @@ import ( "github.com/cenkalti/backoff/v4" "github.com/moira-alert/moira/metric_source/retries" - mock_clock "github.com/moira-alert/moira/mock/clock" . "github.com/smartystreets/goconvey/convey" - "go.uber.org/mock/gomock" ) func TestPrepareRequest(t *testing.T) { @@ -151,13 +149,11 @@ func TestMakeRequestWithRetries(t *testing.T) { }, }, } - mockCtrl := gomock.NewController(t) + retrier := retries.NewStandardRetrier[[]byte]() Convey("Given server returns OK response", t, func() { server := createTestServer(TestResponse{body, http.StatusOK}) - systemClock := mock_clock.NewMockClock(mockCtrl) - systemClock.EXPECT().Sleep(time.Second).Times(0) Convey("request is successful", func() { remote := Remote{ diff --git a/mock/clock/clock.go b/mock/clock/clock.go index 0b4db69f7..8ed030da3 100644 --- a/mock/clock/clock.go +++ b/mock/clock/clock.go @@ -66,15 +66,3 @@ func (mr *MockClockMockRecorder) NowUnix() *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "NowUnix", reflect.TypeOf((*MockClock)(nil).NowUnix)) } - -// Sleep mocks base method. -func (m *MockClock) Sleep(arg0 time.Duration) { - m.ctrl.T.Helper() - m.ctrl.Call(m, "Sleep", arg0) -} - -// Sleep indicates an expected call of Sleep. -func (mr *MockClockMockRecorder) Sleep(arg0 any) *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Sleep", reflect.TypeOf((*MockClock)(nil).Sleep), arg0) -} From 139d701adcd9632d063b8483a00ce74c8d3beec6 Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Tue, 15 Oct 2024 15:56:24 +0700 Subject: [PATCH 21/28] refactor: tests --- metric_source/remote/remote_test.go | 76 ++++++++++++++++++---------- metric_source/remote/request_test.go | 28 +++++----- 2 files changed, 65 insertions(+), 39 deletions(-) diff --git a/metric_source/remote/remote_test.go b/metric_source/remote/remote_test.go index b2758b479..ecbe47764 100644 --- a/metric_source/remote/remote_test.go +++ b/metric_source/remote/remote_test.go @@ -12,26 +12,47 @@ import ( . "github.com/smartystreets/goconvey/convey" ) -func TestIsRemoteAvailable(t *testing.T) { - testConfigs := []*Config{ - { - HealthcheckTimeout: time.Second, - HealthcheckRetries: retries.Config{ - InitialInterval: time.Millisecond, - RandomizationFactor: 0.5, - Multiplier: 2, - MaxInterval: time.Millisecond * 20, - MaxRetriesCount: 2, - }, +var testConfigs = []*Config{ + { + Timeout: time.Second, + Retries: retries.Config{ + InitialInterval: time.Millisecond, + RandomizationFactor: 0.5, + Multiplier: 2, + MaxInterval: time.Millisecond * 20, + MaxRetriesCount: 2, }, - } + }, + { + Timeout: time.Millisecond * 200, + Retries: retries.Config{ + InitialInterval: time.Millisecond, + RandomizationFactor: 0.5, + Multiplier: 2, + MaxInterval: time.Second, + MaxElapsedTime: time.Second * 2, + }, + }, +} + +func TestIsAvailable(t *testing.T) { body := []byte("Some string") + isAvailableTestConfigs := make([]*Config, 0, len(testConfigs)) + for _, conf := range testConfigs { + isAvailableTestConfigs = append(isAvailableTestConfigs, &Config{ + HealthcheckTimeout: conf.Timeout, + HealthcheckRetries: conf.Retries, + }) + } + retrier := retries.NewStandardRetrier[[]byte]() Convey("Given server returns OK response the remote is available", t, func() { server := createServer(body, http.StatusOK) - for _, config := range testConfigs { + defer server.Close() + + for _, config := range isAvailableTestConfigs { config.URL = server.URL remote := Remote{ @@ -54,7 +75,7 @@ func TestIsRemoteAvailable(t *testing.T) { Convey(fmt.Sprintf( "request failed with %d response status code and remote is unavailable", statusCode, ), func() { - for _, config := range testConfigs { + for _, config := range isAvailableTestConfigs { config.URL = server.URL remote := Remote{ @@ -73,6 +94,8 @@ func TestIsRemoteAvailable(t *testing.T) { So(isAvailable, ShouldBeFalse) } }) + + server.Close() } }) @@ -81,7 +104,7 @@ func TestIsRemoteAvailable(t *testing.T) { Convey(fmt.Sprintf( "the remote is available with retry after %d response", statusCode, ), func() { - for _, config := range testConfigs { + for _, config := range isAvailableTestConfigs { server := createTestServer( TestResponse{body, statusCode}, TestResponse{body, http.StatusOK}, @@ -98,6 +121,8 @@ func TestIsRemoteAvailable(t *testing.T) { isAvailable, err := remote.IsAvailable() So(err, ShouldBeNil) So(isAvailable, ShouldBeTrue) + + server.Close() } }) } @@ -108,18 +133,6 @@ func TestFetch(t *testing.T) { var from int64 = 300 var until int64 = 500 target := "foo.bar" //nolint - testConfigs := []*Config{ - { - Timeout: time.Second, - Retries: retries.Config{ - InitialInterval: time.Millisecond, - RandomizationFactor: 0.5, - Multiplier: 2, - MaxInterval: time.Millisecond * 20, - MaxRetriesCount: 2, - }, - }, - } retrier := retries.NewStandardRetrier[[]byte]() validBody := []byte("[{\"Target\": \"t1\",\"DataPoints\":[[1,2],[3,4]]}]") @@ -144,6 +157,7 @@ func TestFetch(t *testing.T) { Convey("Request success but body is invalid", t, func() { server := createServer([]byte("Some string"), http.StatusOK) + defer server.Close() conf := testConfigs[0] conf.URL = server.URL @@ -163,6 +177,8 @@ func TestFetch(t *testing.T) { Convey("Fail request with InternalServerError", t, func() { server := createServer([]byte("Some string"), http.StatusInternalServerError) + defer server.Close() + for _, config := range testConfigs { config.URL = server.URL @@ -183,6 +199,8 @@ func TestFetch(t *testing.T) { Convey("Client calls bad url", t, func() { server := createTestServer(TestResponse{[]byte("Some string"), http.StatusOK}) + defer server.Close() + url := "💩%$&TR" for _, config := range testConfigs { @@ -227,6 +245,8 @@ func TestFetch(t *testing.T) { So(result, ShouldBeNil) } }) + + server.Close() } }) @@ -256,6 +276,8 @@ func TestFetch(t *testing.T) { metricsData := result.GetMetricsData() So(len(metricsData), ShouldEqual, 1) So(metricsData[0].Name, ShouldEqual, "t1") + + server.Close() } }) } diff --git a/metric_source/remote/request_test.go b/metric_source/remote/request_test.go index 96adc0fa2..da575685f 100644 --- a/metric_source/remote/request_test.go +++ b/metric_source/remote/request_test.go @@ -57,6 +57,8 @@ func Test_requestToRemoteGraphite_DoRetryableOperation(t *testing.T) { Convey("Client returns status OK", t, func() { server := createServer(body, http.StatusOK) + defer server.Close() + remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} request, _ := remote.prepareRequest(from, until, target) @@ -74,6 +76,8 @@ func Test_requestToRemoteGraphite_DoRetryableOperation(t *testing.T) { Convey("Client returns status InternalServerError", t, func() { server := createServer(body, http.StatusInternalServerError) + defer server.Close() + remote := Remote{client: server.Client(), config: &Config{URL: server.URL}} request, _ := remote.prepareRequest(from, until, target) @@ -93,6 +97,8 @@ func Test_requestToRemoteGraphite_DoRetryableOperation(t *testing.T) { Convey("Client calls bad url", t, func() { server := createServer(body, http.StatusOK) + defer server.Close() + client := server.Client() remote := Remote{client: client, config: &Config{URL: "http://bad/"}} request, _ := remote.prepareRequest(from, until, target) @@ -128,6 +134,8 @@ func Test_requestToRemoteGraphite_DoRetryableOperation(t *testing.T) { "the remote server is not available. Response status %d: %s", statusCode, string(body)), }) So(actual, ShouldResemble, body) + + server.Close() } }) } @@ -137,23 +145,12 @@ func TestMakeRequestWithRetries(t *testing.T) { var until int64 = 500 target := "foo.bar" body := []byte("Some string") - testConfigs := []*Config{ - { - Timeout: time.Second, - Retries: retries.Config{ - InitialInterval: time.Millisecond, - RandomizationFactor: 0.5, - Multiplier: 2, - MaxInterval: time.Millisecond * 20, - MaxRetriesCount: 2, - }, - }, - } retrier := retries.NewStandardRetrier[[]byte]() Convey("Given server returns OK response", t, func() { server := createTestServer(TestResponse{body, http.StatusOK}) + defer server.Close() Convey("request is successful", func() { remote := Remote{ @@ -181,6 +178,8 @@ func TestMakeRequestWithRetries(t *testing.T) { Convey("Given server returns 500 response", t, func() { server := createTestServer(TestResponse{body, http.StatusInternalServerError}) + defer server.Close() + expectedErr := errInvalidRequest{ internalErr: fmt.Errorf("bad response status %d: %s", http.StatusInternalServerError, string(body)), } @@ -212,6 +211,7 @@ func TestMakeRequestWithRetries(t *testing.T) { Convey("Given client calls bad url", t, func() { server := createTestServer(TestResponse{body, http.StatusOK}) + defer server.Close() Convey("request failed and remote is unavailable", func() { remote := Remote{ @@ -271,6 +271,8 @@ func TestMakeRequestWithRetries(t *testing.T) { So(actual, ShouldResemble, body) } }) + + server.Close() } }) @@ -302,6 +304,8 @@ func TestMakeRequestWithRetries(t *testing.T) { So(err, ShouldBeNil) So(actual, ShouldResemble, body) + + server.Close() } }) } From 8be8caaee6cc006df9846a109ebb154799abe5aa Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Tue, 15 Oct 2024 16:48:01 +0700 Subject: [PATCH 22/28] refactor: error initialization --- metric_source/remote/request.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/metric_source/remote/request.go b/metric_source/remote/request.go index 54696f7da..6a8dba1ac 100644 --- a/metric_source/remote/request.go +++ b/metric_source/remote/request.go @@ -133,7 +133,9 @@ func internalErrToPublicErr(err error, target string) error { } } - return ErrRemoteTriggerResponse{} + return ErrRemoteTriggerResponse{ + InternalError: err, + Target: target} } var remoteUnavailableStatusCodes = map[int]struct{}{ From 2c87158f424b0a40ea511e8531b9ce188cc30a98 Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Tue, 15 Oct 2024 18:56:24 +0700 Subject: [PATCH 23/28] style: use linter --- metric_source/remote/request.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metric_source/remote/request.go b/metric_source/remote/request.go index 6a8dba1ac..2f4df448d 100644 --- a/metric_source/remote/request.go +++ b/metric_source/remote/request.go @@ -135,7 +135,8 @@ func internalErrToPublicErr(err error, target string) error { return ErrRemoteTriggerResponse{ InternalError: err, - Target: target} + Target: target, + } } var remoteUnavailableStatusCodes = map[int]struct{}{ From 76477a6b5085e88c057005e103458c1533a9b072 Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Wed, 23 Oct 2024 13:11:39 +0700 Subject: [PATCH 24/28] style: rename var --- metric_source/remote/remote.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metric_source/remote/remote.go b/metric_source/remote/remote.go index 274a6630f..81547df89 100644 --- a/metric_source/remote/remote.go +++ b/metric_source/remote/remote.go @@ -103,7 +103,7 @@ func (remote *Remote) IsAvailable() (bool, error) { } _, err = remote.makeRequest(req, remote.config.HealthcheckTimeout, remote.healthcheckBackoffFactory.NewBackOff()) - err = internalErrToPublicErr(err, "") + publicErr := internalErrToPublicErr(err, "") - return !isRemoteUnavailable(err), err + return !isRemoteUnavailable(publicErr), publicErr } From 1bde467aa00195edeb67b14f093b909098dd5c92 Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Fri, 25 Oct 2024 12:30:05 +0700 Subject: [PATCH 25/28] refactor: deferring closing response body --- metric_source/remote/request.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/metric_source/remote/request.go b/metric_source/remote/request.go index 2f4df448d..d90884ba4 100644 --- a/metric_source/remote/request.go +++ b/metric_source/remote/request.go @@ -57,10 +57,6 @@ func (r requestToRemoteGraphite) DoRetryableOperation() ([]byte, error) { req := r.request.WithContext(ctx) resp, err := r.client.Do(req) - if resp != nil { - defer resp.Body.Close() - } - if err != nil { return nil, errRemoteUnavailable{ internalErr: fmt.Errorf( @@ -69,6 +65,7 @@ func (r requestToRemoteGraphite) DoRetryableOperation() ([]byte, error) { err), } } + defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { From eabeaed01703240df73cc3bf8f14fea913f2f89e Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Fri, 25 Oct 2024 19:23:22 +0700 Subject: [PATCH 26/28] refactor: use moira.ValidateStruct for retries and graphite remote configs validation --- metric_source/remote/config.go | 39 ++--------------------- metric_source/remote/config_test.go | 33 ++++++++++++-------- metric_source/remote/remote.go | 2 +- metric_source/retries/config.go | 34 +++----------------- metric_source/retries/config_test.go | 46 +++++++++++++++++++--------- 5 files changed, 59 insertions(+), 95 deletions(-) diff --git a/metric_source/remote/config.go b/metric_source/remote/config.go index 4eb7f43c3..53b2f90cc 100644 --- a/metric_source/remote/config.go +++ b/metric_source/remote/config.go @@ -1,7 +1,6 @@ package remote import ( - "errors" "time" "github.com/moira-alert/moira/metric_source/retries" @@ -9,45 +8,13 @@ import ( // Config represents config from remote storage. type Config struct { - URL string + URL string `validate:"required,url"` CheckInterval time.Duration MetricsTTL time.Duration - Timeout time.Duration + Timeout time.Duration `validate:"required,gt=0s"` User string Password string - HealthcheckTimeout time.Duration + HealthcheckTimeout time.Duration `validate:"required,gt=0s"` Retries retries.Config HealthcheckRetries retries.Config } - -var ( - errBadRemoteUrl = errors.New("remote graphite URL should not be empty") - errNoTimeout = errors.New("timeout must be specified and can't be 0") - errNoHealthcheckTimeout = errors.New("healthcheck_timeout must be specified and can't be 0") -) - -func (conf Config) validate() error { - resErrors := make([]error, 0) - - if conf.URL == "" { - resErrors = append(resErrors, errBadRemoteUrl) - } - - if conf.Timeout == 0 { - resErrors = append(resErrors, errNoTimeout) - } - - if conf.HealthcheckTimeout == 0 { - resErrors = append(resErrors, errNoHealthcheckTimeout) - } - - if errRetriesValidate := conf.Retries.Validate(); errRetriesValidate != nil { - resErrors = append(resErrors, errRetriesValidate) - } - - if errHealthcheckRetriesValidate := conf.HealthcheckRetries.Validate(); errHealthcheckRetriesValidate != nil { - resErrors = append(resErrors, errHealthcheckRetriesValidate) - } - - return errors.Join(resErrors...) -} diff --git a/metric_source/remote/config_test.go b/metric_source/remote/config_test.go index 3d58fd270..a04beca85 100644 --- a/metric_source/remote/config_test.go +++ b/metric_source/remote/config_test.go @@ -3,6 +3,8 @@ package remote import ( "errors" "fmt" + "github.com/go-playground/validator/v10" + "github.com/moira-alert/moira" "testing" "time" @@ -11,18 +13,19 @@ import ( . "github.com/smartystreets/goconvey/convey" ) -func TestConfig_validate(t *testing.T) { +func TestConfigWithValidateStruct(t *testing.T) { Convey("Test validating retries config", t, func() { type testcase struct { - caseDesc string - conf Config - expectedErr error + caseDesc string + conf Config + errIsNil bool } var ( testInitialInterval = time.Second * 5 testMaxInterval = time.Second * 10 testRetriesCount uint64 = 10 + validatorErr = validator.ValidationErrors{} ) testRetriesConf := retries.Config{ @@ -33,9 +36,9 @@ func TestConfig_validate(t *testing.T) { cases := []testcase{ { - caseDesc: "with empty config", - conf: Config{}, - expectedErr: errors.Join(errBadRemoteUrl, errNoTimeout, errNoHealthcheckTimeout, retries.Config{}.Validate(), retries.Config{}.Validate()), + caseDesc: "with empty config", + conf: Config{}, + errIsNil: false, }, { caseDesc: "with retries config set", @@ -43,7 +46,7 @@ func TestConfig_validate(t *testing.T) { Retries: testRetriesConf, HealthcheckRetries: testRetriesConf, }, - expectedErr: errors.Join(errBadRemoteUrl, errNoTimeout, errNoHealthcheckTimeout), + errIsNil: false, }, { caseDesc: "with retries config set and some url", @@ -52,7 +55,7 @@ func TestConfig_validate(t *testing.T) { Retries: testRetriesConf, HealthcheckRetries: testRetriesConf, }, - expectedErr: errors.Join(errNoTimeout, errNoHealthcheckTimeout), + errIsNil: false, }, { caseDesc: "with retries config set, some url, timeout", @@ -62,7 +65,7 @@ func TestConfig_validate(t *testing.T) { Retries: testRetriesConf, HealthcheckRetries: testRetriesConf, }, - expectedErr: errors.Join(errNoHealthcheckTimeout), + errIsNil: false, }, { caseDesc: "with valid config", @@ -73,15 +76,19 @@ func TestConfig_validate(t *testing.T) { Retries: testRetriesConf, HealthcheckRetries: testRetriesConf, }, - expectedErr: nil, + errIsNil: true, //nil, }, } for i := range cases { Convey(fmt.Sprintf("Case %d: %s", i+1, cases[i].caseDesc), func() { - err := cases[i].conf.validate() + err := moira.ValidateStruct(cases[i].conf) - So(err, ShouldResemble, cases[i].expectedErr) + if cases[i].errIsNil { + So(err, ShouldBeNil) + } else { + So(errors.As(err, &validatorErr), ShouldBeTrue) + } }) } }) diff --git a/metric_source/remote/remote.go b/metric_source/remote/remote.go index 81547df89..654437fbd 100644 --- a/metric_source/remote/remote.go +++ b/metric_source/remote/remote.go @@ -43,7 +43,7 @@ type Remote struct { // Create configures remote metric source. func Create(config *Config) (metricSource.MetricSource, error) { - if err := config.validate(); err != nil { + if err := moira.ValidateStruct(config); err != nil { return nil, err } diff --git a/metric_source/retries/config.go b/metric_source/retries/config.go index f32cd0d44..a7a72cc66 100644 --- a/metric_source/retries/config.go +++ b/metric_source/retries/config.go @@ -1,14 +1,13 @@ package retries import ( - "errors" "time" ) // Config for exponential backoff retries. type Config struct { // InitialInterval between requests. - InitialInterval time.Duration + InitialInterval time.Duration `validate:"required,gt=0s"` // RandomizationFactor is used in exponential backoff to add some randomization // when calculating next interval between requests. // It will be used in multiplication like: @@ -17,34 +16,9 @@ type Config struct { // Each new RetryInterval will be multiplied on Multiplier. Multiplier float64 // MaxInterval is the cap for RetryInterval. Note that it doesn't cap the RandomizedInterval. - MaxInterval time.Duration + MaxInterval time.Duration `validate:"required,gt=0s"` // MaxElapsedTime caps the time passed from first try. If time passed is greater than MaxElapsedTime than stop retrying. - MaxElapsedTime time.Duration + MaxElapsedTime time.Duration `validate:"required_if=MaxRetriesCount 0"` // MaxRetriesCount is the amount of allowed retries. So at most MaxRetriesCount will be performed. - MaxRetriesCount uint64 -} - -var ( - errNoInitialInterval = errors.New("initial_interval must be specified and can't be 0") - errNoMaxInterval = errors.New("max_interval must be specified and can't be 0") - errNoMaxElapsedTimeAndMaxRetriesCount = errors.New("at least one of max_elapsed_time, max_retries_count must be specified") -) - -// Validate checks that retries Config has all necessary fields. -func (conf Config) Validate() error { - resErrors := make([]error, 0) - - if conf.InitialInterval == 0 { - resErrors = append(resErrors, errNoInitialInterval) - } - - if conf.MaxInterval == 0 { - resErrors = append(resErrors, errNoMaxInterval) - } - - if conf.MaxElapsedTime == 0 && conf.MaxRetriesCount == 0 { - resErrors = append(resErrors, errNoMaxElapsedTimeAndMaxRetriesCount) - } - - return errors.Join(resErrors...) + MaxRetriesCount uint64 `validate:"required_if=MaxElapsedTime 0"` } diff --git a/metric_source/retries/config_test.go b/metric_source/retries/config_test.go index a20e75a12..f2de17b03 100644 --- a/metric_source/retries/config_test.go +++ b/metric_source/retries/config_test.go @@ -3,75 +3,91 @@ package retries import ( "errors" "fmt" + "github.com/go-playground/validator/v10" + "github.com/moira-alert/moira" "testing" "time" . "github.com/smartystreets/goconvey/convey" ) -func TestConfig_Validate(t *testing.T) { +func TestConfigWithValidateStruct(t *testing.T) { Convey("Test validating retries config", t, func() { type testcase struct { - caseDesc string - conf Config - expectedErr error + caseDesc string + conf Config + errIsNil bool } var ( testRetriesCount uint64 = 10 testMaxElapsedTIme = time.Second * 10 + validatorErr = validator.ValidationErrors{} ) cases := []testcase{ { - caseDesc: "with empty config", - conf: Config{}, - expectedErr: errors.Join(errNoInitialInterval, errNoMaxInterval, errNoMaxElapsedTimeAndMaxRetriesCount), + caseDesc: "with empty config", + conf: Config{}, + errIsNil: false, }, { caseDesc: "with only InitialInterval set", conf: Config{ InitialInterval: testInitialInterval, }, - expectedErr: errors.Join(errNoMaxInterval, errNoMaxElapsedTimeAndMaxRetriesCount), + errIsNil: false, }, { caseDesc: "with only MaxInterval set", conf: Config{ MaxInterval: testMaxInterval, }, - expectedErr: errors.Join(errNoInitialInterval, errNoMaxElapsedTimeAndMaxRetriesCount), + errIsNil: false, }, { caseDesc: "with only MaxRetriesCount set", conf: Config{ MaxRetriesCount: testRetriesCount, }, - expectedErr: errors.Join(errNoInitialInterval, errNoMaxInterval), + errIsNil: false, }, { caseDesc: "with only MaxElapsedTime set", conf: Config{ MaxElapsedTime: testMaxElapsedTIme, }, - expectedErr: errors.Join(errNoInitialInterval, errNoMaxInterval), + errIsNil: false, }, { - caseDesc: "with valid config", + caseDesc: "with valid config but only MaxElapsedTime set", conf: Config{ InitialInterval: testInitialInterval, MaxInterval: testMaxInterval, MaxElapsedTime: testMaxElapsedTIme, }, - expectedErr: nil, + errIsNil: true, + }, + { + caseDesc: "with valid config but only MaxRetriesCount set", + conf: Config{ + InitialInterval: testInitialInterval, + MaxInterval: testMaxInterval, + MaxRetriesCount: testRetriesCount, + }, + errIsNil: true, }, } for i := range cases { Convey(fmt.Sprintf("Case %d: %s", i+1, cases[i].caseDesc), func() { - err := cases[i].conf.Validate() + err := moira.ValidateStruct(cases[i].conf) - So(err, ShouldResemble, cases[i].expectedErr) + if cases[i].errIsNil { + So(err, ShouldBeNil) + } else { + So(errors.As(err, &validatorErr), ShouldBeTrue) + } }) } }) From 459c9e75c2b6a99d92479cddd00a6259136cef7f Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Fri, 25 Oct 2024 19:27:03 +0700 Subject: [PATCH 27/28] style: use linter --- metric_source/remote/config_test.go | 7 ++++--- metric_source/retries/config_test.go | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/metric_source/remote/config_test.go b/metric_source/remote/config_test.go index a04beca85..6e1ab0c4c 100644 --- a/metric_source/remote/config_test.go +++ b/metric_source/remote/config_test.go @@ -3,11 +3,12 @@ package remote import ( "errors" "fmt" - "github.com/go-playground/validator/v10" - "github.com/moira-alert/moira" "testing" "time" + "github.com/go-playground/validator/v10" + "github.com/moira-alert/moira" + "github.com/moira-alert/moira/metric_source/retries" . "github.com/smartystreets/goconvey/convey" @@ -76,7 +77,7 @@ func TestConfigWithValidateStruct(t *testing.T) { Retries: testRetriesConf, HealthcheckRetries: testRetriesConf, }, - errIsNil: true, //nil, + errIsNil: true, // nil, }, } diff --git a/metric_source/retries/config_test.go b/metric_source/retries/config_test.go index f2de17b03..976630e2f 100644 --- a/metric_source/retries/config_test.go +++ b/metric_source/retries/config_test.go @@ -3,11 +3,12 @@ package retries import ( "errors" "fmt" - "github.com/go-playground/validator/v10" - "github.com/moira-alert/moira" "testing" "time" + "github.com/go-playground/validator/v10" + "github.com/moira-alert/moira" + . "github.com/smartystreets/goconvey/convey" ) From 61f61fa94065fe81ac18cad9548894d752c1df68 Mon Sep 17 00:00:00 2001 From: AleksandrMatsko Date: Thu, 31 Oct 2024 15:54:20 +0700 Subject: [PATCH 28/28] style: remove comments --- metric_source/remote/request_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/metric_source/remote/request_test.go b/metric_source/remote/request_test.go index da575685f..aa98b15fa 100644 --- a/metric_source/remote/request_test.go +++ b/metric_source/remote/request_test.go @@ -183,7 +183,6 @@ func TestMakeRequestWithRetries(t *testing.T) { expectedErr := errInvalidRequest{ internalErr: fmt.Errorf("bad response status %d: %s", http.StatusInternalServerError, string(body)), } - // systemClock.EXPECT().Sleep(time.Second).Times(0) Convey("request failed with 500 response and remote is available", func() { remote := Remote{