Skip to content

Commit

Permalink
feat: add parameter to allow return expired cache in case of errors (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
fgouteroux authored Jul 20, 2024
1 parent 7a2e2e2 commit 247ba66
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 17 deletions.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ scripts:
max_timeout: <float>
enforced: <boolean>
cacheDuration: <duration>
useExpiredCacheOnError: <boolean>
discovery:
params:
<string>: <string>
Expand Down Expand Up @@ -160,7 +161,9 @@ Prometheus will normally provide an indication of its scrape timeout to the scri

For testing purposes, the timeout can be specified directly as a URL parameter (`timeout`). If present, the URL parameter takes priority over the Prometheus HTTP header.

The `cacheDuration` config can be used to cache the results from an execution of the script for the provided time. The provided duration must be parsable by the [`time.ParseDuration`](https://pkg.go.dev/time#ParseDuration) function. If no cache duration is provided or the provided cache duration can not be parsed, the output of an script will not be cached.
The `cacheDuration` config can be used to cache the results from an execution of the script for the provided time. The provided duration must be parsable by the [`time.ParseDuration`](https://pkg.go.dev/time#ParseDuration) function. If no cache duration is provided or the provided cache duration can not be parsed, the output of an script will not be cached. It produces the metric `script_use_cache` to track in time when results returned are coming from cache.

The `useExpiredCacheOnError` config allow to return expired cache in case of errors. It produces the metric `script_use_expired_cache` for track in time if you are using expired cache, it mean there is something wrong with the script execution.

You can fine tune the script discovery options via optional script `discovery`. All these options will go through prometheus configuration where you can change them via relabel mechanism.
There are `params` to define dynamic script parameters (with reserved keys: `params`, `prefix`, `script` and `timeout`) where only value will be used during script invoking (similar to `args`), `prefix` to define prefix for all script metrics, `scrape_interval` to define how often the script scrape should run and `scrape_timeout` to define the scrape timeout for prometheus (similar to `timeout`).
Expand Down
31 changes: 21 additions & 10 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,16 +60,17 @@ type Config struct {

// ScriptConfig is the configuration for a single script.
type ScriptConfig struct {
Name string `yaml:"name"`
Script string `yaml:"script"`
Command string `yaml:"command"`
Args []string `yaml:"args"`
Env map[string]string `yaml:"env"`
AllowEnvOverwrite bool `yaml:"allowEnvOverwrite"`
IgnoreOutputOnFail bool `yaml:"ignoreOutputOnFail"`
Timeout timeout `yaml:"timeout"`
CacheDuration string `yaml:"cacheDuration"`
Discovery scriptDiscovery `yaml:"discovery"`
Name string `yaml:"name"`
Script string `yaml:"script"`
Command string `yaml:"command"`
Args []string `yaml:"args"`
Env map[string]string `yaml:"env"`
AllowEnvOverwrite bool `yaml:"allowEnvOverwrite"`
IgnoreOutputOnFail bool `yaml:"ignoreOutputOnFail"`
Timeout timeout `yaml:"timeout"`
UseExpiredCacheOnError bool `yaml:"useExpiredCacheOnError"`
CacheDuration string `yaml:"cacheDuration"`
Discovery scriptDiscovery `yaml:"discovery"`
}

// LoadConfig reads the configuration file and umarshal the data into the config struct
Expand Down Expand Up @@ -251,6 +252,16 @@ func (c *Config) GetCacheDuration(scriptName string) *time.Duration {
return nil
}

// GetUseExpiredCacheOnError returns the UseExpiredCacheOnError parameter for the provided script.
func (c *Config) GetUseExpiredCacheOnError(scriptName string) bool {
for _, script := range c.Scripts {
if script.Name == scriptName {
return script.UseExpiredCacheOnError
}
}
return false
}

// GetDiscoveryScrapeInterval returns the scrape_interval if it is valid duration, otherwise empty string.
func (sc *ScriptConfig) GetDiscoveryScrapeInterval() string {
_, err := time.ParseDuration(sc.Discovery.ScrapeInterval)
Expand Down
4 changes: 2 additions & 2 deletions pkg/exporter/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ type cacheEntry struct {
successStatus int
}

func getCacheResult(scriptName string, paramValues []string, cacheDuration time.Duration) (*string, *int, *int) {
func getCacheResult(scriptName string, paramValues []string, cacheDuration time.Duration, expCacheOnTimeout bool) (*string, *int, *int) {
if entry, ok := cache[fmt.Sprintf("%s--%s", scriptName, strings.Join(paramValues, "-"))]; ok {
if entry.cacheTime.Add(cacheDuration).After(time.Now()) {
if entry.cacheTime.Add(cacheDuration).After(time.Now()) || expCacheOnTimeout {
return &entry.formattedOutput, &entry.successStatus, &entry.exitCode
}
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/exporter/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ const (
scriptDurationSecondsType = "# TYPE script_duration_seconds gauge"
scriptExitCodeHelp = "# HELP script_exit_code The exit code of the script."
scriptExitCodeType = "# TYPE script_exit_code gauge"
scriptCacheHelp = "# HELP script_use_cache Script use cache (0 = no, 1 = yes)."
scriptCacheType = "# TYPE script_use_cache gauge"
scriptExpCacheHelp = "# HELP script_use_expired_cache Script re-use expired cache (0 = no, 1 = yes)."
scriptExpCacheType = "# TYPE script_use_expired_cache gauge"
)

type Exporter struct {
Expand Down
17 changes: 13 additions & 4 deletions pkg/exporter/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@ func (e *Exporter) metricsHandler(scriptName string, params url.Values, promethe
// stale.
cacheDuration := e.Config.GetCacheDuration(scriptName)
if cacheDuration != nil {
formattedOutput, successStatus, exitCode := getCacheResult(scriptName, paramValues, *cacheDuration)
formattedOutput, successStatus, exitCode := getCacheResult(scriptName, paramValues, *cacheDuration, false)
if formattedOutput != nil && successStatus != nil && exitCode != nil {
level.Debug(e.Logger).Log("msg", "Returning cached result", "script", scriptName)
return fmt.Sprintf("%s\n%s\n%s_success{script=\"%s\"} %d\n%s\n%s\n%s_duration_seconds{script=\"%s\"} %f\n%s\n%s\n%s_exit_code{script=\"%s\"} %d\n%s\n", scriptSuccessHelp, scriptSuccessType, namespace, scriptName, *successStatus, scriptDurationSecondsHelp, scriptDurationSecondsType, namespace, scriptName, time.Since(scriptStartTime).Seconds(), scriptExitCodeHelp, scriptExitCodeType, namespace, scriptName, *exitCode, *formattedOutput), nil
return fmt.Sprintf("%s\n%s\n%s_success{script=\"%s\"} %d\n%s\n%s\n%s_duration_seconds{script=\"%s\"} %f\n%s\n%s\n%s_exit_code{script=\"%s\"} %d\n%s\n%s\n%s_use_cache{script=\"%s\"} %d\n%s\n%s\n%s_use_expired_cache{script=\"%s\"} %d\n%s\n", scriptSuccessHelp, scriptSuccessType, namespace, scriptName, *successStatus, scriptDurationSecondsHelp, scriptDurationSecondsType, namespace, scriptName, time.Since(scriptStartTime).Seconds(), scriptExitCodeHelp, scriptExitCodeType, namespace, scriptName, *exitCode, scriptCacheHelp, scriptCacheType, namespace, scriptName, 1, scriptExpCacheHelp, scriptExpCacheType, namespace, scriptName, 0, *formattedOutput), nil
}
}

Expand Down Expand Up @@ -81,14 +81,23 @@ func (e *Exporter) metricsHandler(scriptName string, params url.Values, promethe
output, exitCode, err := runScript(scriptName, e.Logger, e.logEnv, timeout, e.Config.GetTimeoutEnforced(scriptName), runArgs, runEnv)
if err != nil {
successStatus = 0

useExpiredCacheOnError := e.Config.GetUseExpiredCacheOnError(scriptName)
if cacheDuration != nil && useExpiredCacheOnError {
formattedOutput, successStatus, exitCode := getCacheResult(scriptName, paramValues, *cacheDuration, useExpiredCacheOnError)
if formattedOutput != nil && successStatus != nil && exitCode != nil {
level.Debug(e.Logger).Log("msg", "Returning expired cache result", "script", scriptName)
return fmt.Sprintf("%s\n%s\n%s_success{script=\"%s\"} %d\n%s\n%s\n%s_duration_seconds{script=\"%s\"} %f\n%s\n%s\n%s_exit_code{script=\"%s\"} %d\n%s\n%s\n%s_use_cache{script=\"%s\"} %d\n%s\n%s\n%s_use_expired_cache{script=\"%s\"} %d\n%s\n", scriptSuccessHelp, scriptSuccessType, namespace, scriptName, *successStatus, scriptDurationSecondsHelp, scriptDurationSecondsType, namespace, scriptName, time.Since(scriptStartTime).Seconds(), scriptExitCodeHelp, scriptExitCodeType, namespace, scriptName, *exitCode, scriptCacheHelp, scriptCacheType, namespace, scriptName, 1, scriptExpCacheHelp, scriptExpCacheType, namespace, scriptName, 1, *formattedOutput), nil
}
}
}

// Get ignore output parameter and only return success and duration seconds if 'output=ignore'. If the script failed
// we also have to check the ignoreOutputOnFail setting of the script to only return the output when it is set to
// true.
outputParam := params.Get("output")
if outputParam == "ignore" || (successStatus == 0 && e.Config.GetIgnoreOutputOnFail(scriptName)) {
return fmt.Sprintf("%s\n%s\n%s_success{script=\"%s\"} %d\n%s\n%s\n%s_duration_seconds{script=\"%s\"} %f\n%s\n%s\n%s_exit_code{script=\"%s\"} %d\n", scriptSuccessHelp, scriptSuccessType, namespace, scriptName, successStatus, scriptDurationSecondsHelp, scriptDurationSecondsType, namespace, scriptName, time.Since(scriptStartTime).Seconds(), scriptExitCodeHelp, scriptExitCodeType, namespace, scriptName, exitCode), nil
return fmt.Sprintf("%s\n%s\n%s_success{script=\"%s\"} %d\n%s\n%s\n%s_duration_seconds{script=\"%s\"} %f\n%s\n%s\n%s_exit_code{script=\"%s\"} %d\n%s\n%s\n%s_use_cache{script=\"%s\"} %d\n%s\n%s\n%s_use_expired_cache{script=\"%s\"} %d\n", scriptSuccessHelp, scriptSuccessType, namespace, scriptName, successStatus, scriptDurationSecondsHelp, scriptDurationSecondsType, namespace, scriptName, time.Since(scriptStartTime).Seconds(), scriptExitCodeHelp, scriptExitCodeType, namespace, scriptName, exitCode, scriptCacheHelp, scriptCacheType, namespace, scriptName, 0, scriptExpCacheHelp, scriptExpCacheType, namespace, scriptName, 0), nil
}

// Format output
Expand Down Expand Up @@ -128,7 +137,7 @@ func (e *Exporter) metricsHandler(scriptName string, params url.Values, promethe
setCacheResult(scriptName, paramValues, formattedOutput, successStatus, exitCode)
}

return fmt.Sprintf("%s\n%s\n%s_success{script=\"%s\"} %d\n%s\n%s\n%s_duration_seconds{script=\"%s\"} %f\n%s\n%s\n%s_exit_code{script=\"%s\"} %d\n%s\n", scriptSuccessHelp, scriptSuccessType, namespace, scriptName, successStatus, scriptDurationSecondsHelp, scriptDurationSecondsType, namespace, scriptName, time.Since(scriptStartTime).Seconds(), scriptExitCodeHelp, scriptExitCodeType, namespace, scriptName, exitCode, formattedOutput), nil
return fmt.Sprintf("%s\n%s\n%s_success{script=\"%s\"} %d\n%s\n%s\n%s_duration_seconds{script=\"%s\"} %f\n%s\n%s\n%s_exit_code{script=\"%s\"} %d\n%s\n%s\n%s_use_cache{script=\"%s\"} %d\n%s\n%s\n%s_use_expired_cache{script=\"%s\"} %d\n%s\n", scriptSuccessHelp, scriptSuccessType, namespace, scriptName, successStatus, scriptDurationSecondsHelp, scriptDurationSecondsType, namespace, scriptName, time.Since(scriptStartTime).Seconds(), scriptExitCodeHelp, scriptExitCodeType, namespace, scriptName, exitCode, scriptCacheHelp, scriptCacheType, namespace, scriptName, 0, scriptExpCacheHelp, scriptExpCacheType, namespace, scriptName, 0, formattedOutput), nil
}

func (e *Exporter) MetricsHandler(w http.ResponseWriter, r *http.Request) {
Expand Down

0 comments on commit 247ba66

Please sign in to comment.