diff --git a/cmd/controller/main.go b/cmd/controller/main.go index 0d6f8500b..5d37754e9 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -53,6 +53,7 @@ func main() { flags := cmd.Flags() flags.Bool("verbose", false, "Enable verbose logging") + flags.IntVar(&config.BackoffLimit, "backoff-limit", 1, "The number of times we are willing to allow a terraform job to error before marking as a failure") flags.BoolVar(&config.EnableContextInjection, "enable-context-injection", false, "Indicates the controller should inject Configuration context into the terraform variables") flags.BoolVar(&config.EnableNamespaceProtection, "enable-namespace-protection", false, "Indicates the controller should protect the controller namespace from being deleted") flags.BoolVar(&config.EnableRevisionUpdateProtection, "enable-revision-update-protection", false, "Indicates we should protect the revisions in use from being updated") diff --git a/pkg/assets/job.yaml.tpl b/pkg/assets/job.yaml.tpl index 6a4627e12..49e3f478c 100644 --- a/pkg/assets/job.yaml.tpl +++ b/pkg/assets/job.yaml.tpl @@ -9,7 +9,7 @@ metadata: {{ $key }}: "{{ $value }}" {{- end }} spec: - backoffLimit: 2 + backoffLimit: {{ default 1 .BackoffLimit }} completions: 1 parallelism: 1 # retain the jobs for 6 hours diff --git a/pkg/controller/configuration/controller.go b/pkg/controller/configuration/controller.go index 4bff909e8..3ef294862 100644 --- a/pkg/controller/configuration/controller.go +++ b/pkg/controller/configuration/controller.go @@ -65,6 +65,9 @@ type Controller struct { // BackendTemplate is the name of the secret in the controller namespace which holds a // template used to generate the state backend BackendTemplate string + // BackoffLimit is the amount of times we are allowing a job to failed before deeming + // it a failure + BackoffLimit int // EnableContextInjection enables the injection of the context into the terraform configuration // variables. This means we shall inject an number of default variables into the configuration // such as namespace, name and labels diff --git a/pkg/controller/configuration/delete.go b/pkg/controller/configuration/delete.go index db5353901..18b52ea35 100644 --- a/pkg/controller/configuration/delete.go +++ b/pkg/controller/configuration/delete.go @@ -86,6 +86,7 @@ func (c *Controller) ensureTerraformDestroy(configuration *terraformv1alpha1.Con map[string]string{ terraformv1alpha1.RetryAnnotation: configuration.GetAnnotations()[terraformv1alpha1.RetryAnnotation], }), + BackoffLimit: c.BackoffLimit, EnableInfraCosts: c.EnableInfracosts, ExecutorImage: c.ExecutorImage, ExecutorSecrets: c.ExecutorSecrets, diff --git a/pkg/controller/configuration/ensure.go b/pkg/controller/configuration/ensure.go index c8fabb6ed..174ea54bd 100644 --- a/pkg/controller/configuration/ensure.go +++ b/pkg/controller/configuration/ensure.go @@ -712,6 +712,7 @@ func (c *Controller) ensureTerraformPlan(configuration *terraformv1alpha1.Config terraformv1alpha1.DriftAnnotation: configuration.GetAnnotations()[terraformv1alpha1.DriftAnnotation], terraformv1alpha1.RetryAnnotation: configuration.GetAnnotations()[terraformv1alpha1.RetryAnnotation], }), + BackoffLimit: c.BackoffLimit, EnableInfraCosts: c.EnableInfracosts, ExecutorImage: c.ExecutorImage, ExecutorSecrets: c.ExecutorSecrets, @@ -1149,6 +1150,7 @@ func (c *Controller) ensureTerraformApply(configuration *terraformv1alpha1.Confi state.provider.JobLabels(), configuration.GetLabels(), ), + BackoffLimit: c.BackoffLimit, EnableInfraCosts: c.EnableInfracosts, ExecutorImage: c.ExecutorImage, ExecutorSecrets: c.ExecutorSecrets, diff --git a/pkg/controller/configuration/reconcile_test.go b/pkg/controller/configuration/reconcile_test.go index 120d6eed6..c60d76e7e 100644 --- a/pkg/controller/configuration/reconcile_test.go +++ b/pkg/controller/configuration/reconcile_test.go @@ -60,6 +60,7 @@ func makeFakeController(cc client.Client) *Controller { kc: kfake.NewSimpleClientset(), cache: cache.New(5*time.Minute, 10*time.Minute), recorder: recorder, + BackoffLimit: 2, EnableInfracosts: false, EnableWatchers: true, ExecutorImage: "ghcr.io/appvia/terranetes-executor", diff --git a/pkg/server/server.go b/pkg/server/server.go index 569089655..d39c19914 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -171,6 +171,8 @@ func New(cfg *rest.Config, config Config) (*Server, error) { // @step: ensure the configuration controller is enabled if err := (&configuration.Controller{ BackendTemplate: config.BackendTemplate, + BackoffLimit: config.BackoffLimit, + ControllerJobLabels: jobLabels, ControllerNamespace: config.Namespace, EnableInfracosts: (config.InfracostsSecretName != ""), EnableTerraformVersions: config.EnableTerraformVersions, @@ -181,7 +183,6 @@ func New(cfg *rest.Config, config Config) (*Server, error) { InfracostsImage: config.InfracostsImage, InfracostsSecretName: config.InfracostsSecretName, JobTemplate: config.JobTemplate, - ControllerJobLabels: jobLabels, PolicyImage: config.PolicyImage, TerraformImage: config.TerraformImage, }).Add(mgr); err != nil { diff --git a/pkg/server/types.go b/pkg/server/types.go index 5c7b6728f..023b4b4a9 100644 --- a/pkg/server/types.go +++ b/pkg/server/types.go @@ -27,6 +27,8 @@ type Config struct { // contains an optional template to use for the backend state - unless this // is set we use the default backend state i.e. kubernetes state BackendTemplate string + // BackoffLimit is the number of times we are willing to allow a job to fail + BackoffLimit int // DriftControllerInterval is the interval for the controller to check for drift DriftControllerInterval time.Duration // DriftInterval is the minimum interval between drift checks diff --git a/pkg/utils/jobs/jobs.go b/pkg/utils/jobs/jobs.go index 71ed601d4..a003db82c 100644 --- a/pkg/utils/jobs/jobs.go +++ b/pkg/utils/jobs/jobs.go @@ -48,6 +48,9 @@ type Options struct { AdditionalJobSecrets []string // AdditionalJobLabels are additional labels added to the job AdditionalJobLabels map[string]string + // BackoffLimit is the number of times we are willing to allow a job to fail + // before we give up + BackoffLimit int // EnableInfraCosts is the flag to enable cost analysis EnableInfraCosts bool // ExecutorImage is the image to use for the terraform jobs