Skip to content

Commit

Permalink
feat: Added retry logic to remote api request
Browse files Browse the repository at this point in the history
  • Loading branch information
d.anchikov committed Jun 22, 2022
1 parent e4b222f commit 3ee81e6
Show file tree
Hide file tree
Showing 11 changed files with 361 additions and 40 deletions.
2 changes: 1 addition & 1 deletion api/handler/triggers.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ func getTriggerFromRequest(request *http.Request) (*dto.Trigger, *api.ErrorRespo
return nil, api.ErrorInvalidRequest(err)
case remote.ErrRemoteTriggerResponse:
response := api.ErrorRemoteServerUnavailable(err)
middleware.GetLoggerEntry(request).Error("%s : %s : %s", response.StatusText, response.ErrorText, err)
middleware.GetLoggerEntry(request).Errorf("%s : %s : %s", response.StatusText, response.ErrorText, err)
return nil, response
default:
return nil, api.ErrorInternalServer(err)
Expand Down
5 changes: 5 additions & 0 deletions clock/clock.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,8 @@ func NewSystemClock() *SystemClock {
func (t *SystemClock) Now() time.Time {
return time.Now().UTC()
}

// Sleep pauses the current goroutine for at least the passed duration
func (t *SystemClock) Sleep(duration time.Duration) {
time.Sleep(duration)
}
40 changes: 33 additions & 7 deletions cmd/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ package cmd
import (
"fmt"
"io/ioutil"
"strconv"
"strings"
"time"

"github.com/moira-alert/moira/metrics"

Expand Down Expand Up @@ -110,6 +112,12 @@ type RemoteConfig struct {
MetricsTTL string `yaml:"metrics_ttl"`
// Timeout for remote requests
Timeout string `yaml:"timeout"`
// Retry seconds for remote requests divided by spaces
RetrySeconds string `yaml:"retry_seconds"`
// HealthCheckTimeout is timeout for remote api health check requests
HealthCheckTimeout string `yaml:"health_check_timeout"`
// Retry seconds for remote api health check requests divided by spaces
HealthCheckRetrySeconds string `yaml:"health_check_retry_seconds"`
// Username for basic auth
User string `yaml:"user"`
// Password for basic auth
Expand All @@ -126,16 +134,34 @@ type ImageStoreConfig struct {
// GetRemoteSourceSettings returns remote config parsed from moira config files
func (config *RemoteConfig) GetRemoteSourceSettings() *remoteSource.Config {
return &remoteSource.Config{
URL: config.URL,
CheckInterval: to.Duration(config.CheckInterval),
MetricsTTL: to.Duration(config.MetricsTTL),
Timeout: to.Duration(config.Timeout),
User: config.User,
Password: config.Password,
Enabled: config.Enabled,
URL: config.URL,
CheckInterval: to.Duration(config.CheckInterval),
MetricsTTL: to.Duration(config.MetricsTTL),
Timeout: to.Duration(config.Timeout),
RetrySeconds: ParseRetrySeconds(config.RetrySeconds),
HealthCheckTimeout: to.Duration(config.Timeout),
HealthCheckRetrySeconds: ParseRetrySeconds(config.HealthCheckRetrySeconds),
User: config.User,
Password: config.Password,
Enabled: config.Enabled,
}
}

// ParseRetrySeconds parses config value string into array of integers
func ParseRetrySeconds(retrySecondsString string) []time.Duration {
secondsStringList := strings.Fields(retrySecondsString)
retrySecondsIntList := make([]time.Duration, len(secondsStringList))

for _, secondsString := range secondsStringList {
secondsInt, err := strconv.Atoi(secondsString)
if err != nil {
panic(err)
}
retrySecondsIntList = append(retrySecondsIntList, time.Second*time.Duration(secondsInt))
}
return retrySecondsIntList
}

// ReadConfig parses config file by the given path into Moira-used type
func ReadConfig(configFileName string, config interface{}) error {
configYaml, err := ioutil.ReadFile(configFileName)
Expand Down
1 change: 1 addition & 0 deletions interfaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,4 +222,5 @@ type PlotTheme interface {
// Clock is an interface to work with Time.
type Clock interface {
Now() time.Time
Sleep(duration time.Duration)
}
5 changes: 4 additions & 1 deletion local/checker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@ remote:
url: "http://graphite:80/render"
check_interval: 60s
timeout: 60s
metrics_ttl: 7d
metrics_ttl: 168h
retry_seconds: 1 1 1
health_check_timeout: 6s
health_check_retry_seconds: 1 1 1
checker:
nodata_check_interval: 60s
check_interval: 10s
Expand Down
17 changes: 10 additions & 7 deletions metric_source/remote/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@ import "time"

// Config represents config from remote storage
type Config struct {
URL string
CheckInterval time.Duration
MetricsTTL time.Duration
Timeout time.Duration
User string
Password string
Enabled bool
URL string
CheckInterval time.Duration
MetricsTTL time.Duration
Timeout time.Duration
RetrySeconds []time.Duration
HealthCheckTimeout time.Duration
HealthCheckRetrySeconds []time.Duration
User string
Password string
Enabled bool
}

// isEnabled checks that remote config is enabled (url is defined and enabled flag is set)
Expand Down
37 changes: 27 additions & 10 deletions metric_source/remote/remote.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import (
"net/http"
"time"

"github.com/moira-alert/moira/clock"

"github.com/moira-alert/moira"
metricSource "github.com/moira-alert/moira/metric_source"
)
Expand All @@ -23,17 +25,30 @@ func (err ErrRemoteTriggerResponse) Error() string {
return err.InternalError.Error()
}

// ErrRemoteUnavailable is a custom error when remote trigger check fails
type ErrRemoteUnavailable struct {
InternalError error
Target string
}

// Error is a representation of Error interface method
func (err ErrRemoteUnavailable) Error() string {
return err.InternalError.Error()
}

// Remote is implementation of MetricSource interface, which implements fetch metrics method from remote graphite installation
type Remote struct {
config *Config
client *http.Client
clock moira.Clock
}

// Create configures remote metric source
func Create(config *Config) metricSource.MetricSource {
return &Remote{
config: config,
client: &http.Client{Timeout: config.Timeout},
clock: clock.NewSystemClock(),
}
}

Expand All @@ -50,9 +65,15 @@ func (remote *Remote) Fetch(target string, from, until int64, allowRealTimeAlert
Target: target,
}
}
body, err := remote.makeRequest(req)
body, isRemoteAvailable, err := remote.makeRequestWithRetries(req, remote.config.Timeout, remote.config.RetrySeconds)
if err != nil {
return nil, ErrRemoteTriggerResponse{
if isRemoteAvailable {
return nil, ErrRemoteTriggerResponse{
InternalError: err,
Target: target,
}
}
return nil, ErrRemoteUnavailable{
InternalError: err,
Target: target,
}
Expand Down Expand Up @@ -83,18 +104,14 @@ func (remote *Remote) IsConfigured() (bool, error) {

// IsRemoteAvailable checks if graphite API is available and returns 200 response
func (remote *Remote) IsRemoteAvailable() (bool, error) {
maxRetries := 3
until := time.Now().Unix()
from := until - 600 //nolint
req, err := remote.prepareRequest(from, until, "NonExistingTarget")
if err != nil {
return false, err
}
for attempt := 0; attempt < maxRetries; attempt++ {
_, err = remote.makeRequest(req)
if err == nil {
return true, nil
}
}
return false, err
_, isRemoteAvailable, err := remote.makeRequestWithRetries(
req, remote.config.HealthCheckTimeout, remote.config.HealthCheckRetrySeconds,
)
return isRemoteAvailable, err
}
2 changes: 1 addition & 1 deletion metric_source/remote/remote_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ func TestIsRemoteAvailable(t *testing.T) {
server := createServer([]byte("Some string"), http.StatusInternalServerError)
remote := Remote{client: server.Client(), config: &Config{URL: server.URL}}
isAvailable, err := remote.IsRemoteAvailable()
So(isAvailable, ShouldBeFalse)
So(isAvailable, ShouldBeTrue)
So(err, ShouldResemble, fmt.Errorf("bad response status %d: %s", http.StatusInternalServerError, "Some string"))
})
}
Expand Down
58 changes: 49 additions & 9 deletions metric_source/remote/request.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
package remote

import (
"context"
"fmt"
"io/ioutil"
"net/http"
"strconv"
"time"
)

func (remote *Remote) prepareRequest(from, until int64, target string) (*http.Request, error) {
Expand All @@ -24,27 +26,65 @@ func (remote *Remote) prepareRequest(from, until int64, target string) (*http.Re
return req, nil
}

func (remote *Remote) makeRequest(req *http.Request) ([]byte, error) {
var body []byte

func (remote *Remote) makeRequest(req *http.Request) (body []byte, isRemoteAvailable bool, err error) {
resp, err := remote.client.Do(req)
if resp != nil {
defer resp.Body.Close()
}

if err != nil {
return body, fmt.Errorf("The remote server is not available or the response was reset by timeout. " + //nolint
"TTL: %s, PATH: %s, ERROR: %v ", remote.client.Timeout.String(), req.URL.RawPath, err)
return body, false, fmt.Errorf(
"the remote server is not available or the response was reset by timeout. "+
"TTL: %s, PATH: %s, ERROR: %v ", remote.client.Timeout.String(), req.URL.RawPath, err,
)
}

body, err = ioutil.ReadAll(resp.Body)
if err != nil {
return body, err
return body, false, err
}

if isRemoteUnavailableStatusCode(resp.StatusCode) {
return body, false, fmt.Errorf(
"the remote server is not available. Response status %d: %s", resp.StatusCode, string(body),
)
} else if resp.StatusCode != http.StatusOK {
return body, true, fmt.Errorf("bad response status %d: %s", resp.StatusCode, string(body))
}

if resp.StatusCode != 200 { //nolint
return body, fmt.Errorf("bad response status %d: %s", resp.StatusCode, string(body))
return body, true, nil
}

func isRemoteUnavailableStatusCode(statusCode int) bool {
switch statusCode {
case http.StatusUnauthorized,
http.StatusBadGateway,
http.StatusServiceUnavailable,
http.StatusGatewayTimeout:
return true
default:
return false
}
}

return body, nil
func (remote *Remote) makeRequestWithRetries(
req *http.Request,
requestTimeout time.Duration,
retrySeconds []time.Duration,
) (body []byte, isRemoteAvailable bool, err error) {
if requestTimeout > 0 {
ctx, cancel := context.WithTimeout(context.Background(), requestTimeout)
defer cancel()
req = req.WithContext(ctx)
}
for attemptIndex := 0; attemptIndex < len(retrySeconds)+1; attemptIndex++ {
body, isRemoteAvailable, err = remote.makeRequest(req)
if err == nil || isRemoteAvailable {
return body, true, err
}
if attemptIndex < len(retrySeconds) {
remote.clock.Sleep(retrySeconds[attemptIndex])
}
}
return nil, false, err
}
Loading

0 comments on commit 3ee81e6

Please sign in to comment.