From b488ca7f02bcadd11876028cf3a7ae0dea588012 Mon Sep 17 00:00:00 2001 From: Rohith Jayawardene Date: Sat, 31 Aug 2024 17:10:09 +0100 Subject: [PATCH] [FEATURE] - Executor Resource Requests & Limits (#1501) * [FEAT] - Executor Resource Requests & Limits Currently consumers are not permitted to set the executor resource requests/limits - this has created an error in the past, due to higher than expected requirements, hence it was bumped to 1Gi. For certain modules though this may not be enough, hence the feature to support controling the fields from the controller arguments * feat: ensure the inputs are valid quantities * fix: adjusting due to formatting issues * feat: removing the limitation by default and placing the decision on the administrator as to limits --- cmd/controller/main.go | 4 ++ pkg/assets/job.yaml.tpl | 14 +++-- pkg/controller/configuration/controller.go | 24 +++++++++ pkg/controller/configuration/ensure.go | 28 +++++----- .../configuration/reconcile_test.go | 54 +++++++++++-------- pkg/server/server.go | 34 ++++++------ pkg/server/types.go | 8 +++ pkg/utils/jobs/jobs.go | 12 +++++ 8 files changed, 124 insertions(+), 54 deletions(-) diff --git a/cmd/controller/main.go b/cmd/controller/main.go index 446369c8e..b29c5e6f9 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -75,6 +75,10 @@ func main() { flags.IntVar(&config.MetricsPort, "metrics-port", 9090, "The port the metric endpoint binds to") flags.IntVar(&config.WebhookPort, "webhooks-port", 10081, "The port the webhook endpoint binds to") flags.StringSliceVar(&config.ExecutorSecrets, "executor-secret", []string{}, "Name of a secret in controller namespace which should be added to the job") + flags.StringVar(&config.ExecutorMemoryRequest, "executor-memory-request", "32Mi", "The default memory request for the executor container") + flags.StringVar(&config.ExecutorMemoryLimit, "executor-memory-limit", "", "The default memory limit for the executor container (default is no limit)") + flags.StringVar(&config.ExecutorCPURequest, "executor-cpu-request", "5m", "The default CPU request for the executor container") + flags.StringVar(&config.ExecutorCPULimit, "executor-cpu-limit", "", "The default CPU limit for the executor container (default is no limit)") flags.StringVar(&config.BackendTemplate, "backend-template", "", "Name of secret in the controller namespace containing a template for the terraform state") flags.StringVar(&config.ExecutorImage, "executor-image", fmt.Sprintf("ghcr.io/appvia/terranetes-executor:%s", version.Version), "The image to use for the executor") flags.StringVar(&config.InfracostsImage, "infracost-image", "infracosts/infracost:latest", "The image to use for the infracosts") diff --git a/pkg/assets/job.yaml.tpl b/pkg/assets/job.yaml.tpl index 77526c1f8..8ac8dfe7c 100644 --- a/pkg/assets/job.yaml.tpl +++ b/pkg/assets/job.yaml.tpl @@ -259,12 +259,18 @@ spec: optional: false {{- end }} resources: + {{- if or (ne .DefaultExecutorCPULimit "") (ne .DefaultExecutorMemoryLimit "") }} limits: - cpu: 1 - memory: 1Gi + {{- if ne .DefaultExecutorCPULimit "" }} + cpu: {{ .DefaultExecutorCPULimit }} + {{- end }} + {{- if ne .DefaultExecutorMemoryLimit "" }} + memory: {{ .DefaultExecutorMemoryLimit }} + {{- end }} + {{- end }} requests: - cpu: 5m - memory: 32Mi + cpu: {{ .DefaultExecutorCPURequest }} + memory: {{ .DefaultExecutorMemoryRequest }} securityContext: capabilities: drop: [ALL] diff --git a/pkg/controller/configuration/controller.go b/pkg/controller/configuration/controller.go index ad8074df1..da6fb97c5 100644 --- a/pkg/controller/configuration/controller.go +++ b/pkg/controller/configuration/controller.go @@ -27,6 +27,7 @@ import ( log "github.com/sirupsen/logrus" batchv1 "k8s.io/api/batch/v1" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes" cache "k8s.io/client-go/tools/cache" @@ -89,6 +90,14 @@ type Controller struct { // executors job everytime - these are configured by the platform team on the // cli options ExecutorSecrets []string + // DefaultExecutorMemoryRequest is the default memory request for the executor container + DefaultExecutorMemoryRequest string + // DefaultExecutorMemoryLimit is the default memory limit for the executor container + DefaultExecutorMemoryLimit string + // DefaultExecutorCPURequest is the default CPU request for the executor container + DefaultExecutorCPURequest string + // DefaultExecutorCPULimit is the default CPU limit for the executor container + DefaultExecutorCPULimit string // InfracostsImage is the image to use for all infracost jobs InfracostsImage string // InfracostsSecretName is the name of the secret containing the api and token @@ -161,6 +170,21 @@ func (c *Controller) Add(mgr manager.Manager) error { "terraform_image": c.TerraformImage, }).Info("adding the configuration controller") + // @step: ensure the resource limits are valid + for _, c := range []string{ + c.DefaultExecutorCPULimit, + c.DefaultExecutorCPURequest, + c.DefaultExecutorMemoryLimit, + c.DefaultExecutorMemoryRequest, + } { + if c == "" { + continue + } + if _, err := resource.ParseQuantity(c); err != nil { + return fmt.Errorf("invalid resource quantity: %q, error: %w", c, err) + } + } + switch { case c.ControllerNamespace == "": return errors.New("job namespace is required") diff --git a/pkg/controller/configuration/ensure.go b/pkg/controller/configuration/ensure.go index caf46c702..4c8e94cc5 100644 --- a/pkg/controller/configuration/ensure.go +++ b/pkg/controller/configuration/ensure.go @@ -815,18 +815,22 @@ func (c *Controller) ensureTerraformPlan(configuration *terraformv1alpha1.Config terraformv1alpha1.RetryAnnotation: configuration.GetAnnotations()[terraformv1alpha1.RetryAnnotation], terraformv1alpha1.JobTemplateHashLabel: state.jobTemplateHash, }), - BackoffLimit: c.BackoffLimit, - EnableInfraCosts: c.EnableInfracosts, - ExecutorImage: c.ExecutorImage, - ExecutorSecrets: c.ExecutorSecrets, - InfracostsImage: c.InfracostsImage, - InfracostsSecret: c.InfracostsSecretName, - Namespace: c.ControllerNamespace, - PolicyConstraint: state.checkovConstraint, - PolicyImage: c.PolicyImage, - SaveTerraformState: saveState, - Template: state.jobTemplate, - TerraformImage: GetTerraformImage(configuration, c.TerraformImage), + BackoffLimit: c.BackoffLimit, + DefaultExecutorCPULimit: c.DefaultExecutorCPULimit, + DefaultExecutorCPURequest: c.DefaultExecutorCPURequest, + DefaultExecutorMemoryLimit: c.DefaultExecutorMemoryLimit, + DefaultExecutorMemoryRequest: c.DefaultExecutorMemoryRequest, + EnableInfraCosts: c.EnableInfracosts, + ExecutorImage: c.ExecutorImage, + ExecutorSecrets: c.ExecutorSecrets, + InfracostsImage: c.InfracostsImage, + InfracostsSecret: c.InfracostsSecretName, + Namespace: c.ControllerNamespace, + PolicyConstraint: state.checkovConstraint, + PolicyImage: c.PolicyImage, + SaveTerraformState: saveState, + Template: state.jobTemplate, + TerraformImage: GetTerraformImage(configuration, c.TerraformImage), } // @step: use the options to generate the job diff --git a/pkg/controller/configuration/reconcile_test.go b/pkg/controller/configuration/reconcile_test.go index 873187987..1793d0f90 100644 --- a/pkg/controller/configuration/reconcile_test.go +++ b/pkg/controller/configuration/reconcile_test.go @@ -56,18 +56,22 @@ func TestController(t *testing.T) { func makeFakeController(cc client.Client) *Controller { recorder := &controllertests.FakeRecorder{} ctrl := &Controller{ - cc: cc, - kc: kfake.NewSimpleClientset(), - cache: cache.New(5*time.Minute, 10*time.Minute), - recorder: recorder, - BackoffLimit: 2, - EnableInfracosts: false, - EnableWatchers: true, - ExecutorImage: "ghcr.io/appvia/terranetes-executor", - InfracostsImage: "infracosts/infracost:latest", - ControllerNamespace: "terraform-system", - PolicyImage: "bridgecrew/checkov:2.0.1140", - TerraformImage: "hashicorp/terraform:1.1.9", + cc: cc, + kc: kfake.NewSimpleClientset(), + cache: cache.New(5*time.Minute, 10*time.Minute), + recorder: recorder, + BackoffLimit: 2, + ControllerNamespace: "terraform-system", + DefaultExecutorCPULimit: "1", + DefaultExecutorCPURequest: "5m", + DefaultExecutorMemoryLimit: "1Gi", + DefaultExecutorMemoryRequest: "32Mi", + EnableInfracosts: false, + EnableWatchers: true, + ExecutorImage: "ghcr.io/appvia/terranetes-executor", + InfracostsImage: "infracosts/infracost:latest", + PolicyImage: "bridgecrew/checkov:2.0.1140", + TerraformImage: "hashicorp/terraform:1.1.9", } return ctrl @@ -928,17 +932,21 @@ var _ = Describe("Configuration Controller", func() { recorder = &controllertests.FakeRecorder{} ctrl = &Controller{ - cc: cc, - kc: kfake.NewSimpleClientset(), - cache: cache.New(5*time.Minute, 10*time.Minute), - recorder: recorder, - EnableInfracosts: false, - EnableWatchers: true, - ExecutorImage: "ghcr.io/appvia/terranetes-executor", - InfracostsImage: "infracosts/infracost:latest", - ControllerNamespace: "default", - PolicyImage: "bridgecrew/checkov:2.0.1140", - TerraformImage: "hashicorp/terraform:1.1.9", + cc: cc, + kc: kfake.NewSimpleClientset(), + cache: cache.New(5*time.Minute, 10*time.Minute), + recorder: recorder, + DefaultExecutorCPULimit: "1", + DefaultExecutorCPURequest: "5m", + DefaultExecutorMemoryLimit: "1Gi", + DefaultExecutorMemoryRequest: "32Mi", + EnableInfracosts: false, + EnableWatchers: true, + ExecutorImage: "ghcr.io/appvia/terranetes-executor", + InfracostsImage: "infracosts/infracost:latest", + ControllerNamespace: "default", + PolicyImage: "bridgecrew/checkov:2.0.1140", + TerraformImage: "hashicorp/terraform:1.1.9", } ctrl.cache.SetDefault(cfgNamespace, fixtures.NewNamespace(cfgNamespace)) } diff --git a/pkg/server/server.go b/pkg/server/server.go index c7f9aee79..15ad418da 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -171,21 +171,25 @@ func New(cfg *rest.Config, config Config) (*Server, error) { // @step: ensure the configuration controller is enabled if err := (&configuration.Controller{ - BackendTemplate: config.BackendTemplate, - BackoffLimit: config.BackoffLimit, - ControllerJobLabels: jobLabels, - ControllerNamespace: config.Namespace, - EnableInfracosts: (config.InfracostsSecretName != ""), - EnableTerraformVersions: config.EnableTerraformVersions, - EnableWatchers: config.EnableWatchers, - EnableWebhooks: config.EnableWebhooks, - ExecutorImage: config.ExecutorImage, - ExecutorSecrets: config.ExecutorSecrets, - InfracostsImage: config.InfracostsImage, - InfracostsSecretName: config.InfracostsSecretName, - JobTemplate: config.JobTemplate, - PolicyImage: config.PolicyImage, - TerraformImage: config.TerraformImage, + BackendTemplate: config.BackendTemplate, + BackoffLimit: config.BackoffLimit, + ControllerJobLabels: jobLabels, + ControllerNamespace: config.Namespace, + DefaultExecutorCPULimit: config.ExecutorCPULimit, + DefaultExecutorCPURequest: config.ExecutorCPURequest, + DefaultExecutorMemoryLimit: config.ExecutorMemoryLimit, + DefaultExecutorMemoryRequest: config.ExecutorMemoryRequest, + EnableInfracosts: (config.InfracostsSecretName != ""), + EnableTerraformVersions: config.EnableTerraformVersions, + EnableWatchers: config.EnableWatchers, + EnableWebhooks: config.EnableWebhooks, + ExecutorImage: config.ExecutorImage, + ExecutorSecrets: config.ExecutorSecrets, + InfracostsImage: config.InfracostsImage, + InfracostsSecretName: config.InfracostsSecretName, + JobTemplate: config.JobTemplate, + PolicyImage: config.PolicyImage, + TerraformImage: config.TerraformImage, }).Add(mgr); err != nil { return nil, fmt.Errorf("failed to create the configuration controller, error: %w", err) } diff --git a/pkg/server/types.go b/pkg/server/types.go index e0a54f681..d1f971162 100644 --- a/pkg/server/types.go +++ b/pkg/server/types.go @@ -59,6 +59,14 @@ type Config struct { EnableWebhookPrefix bool // ExecutorImage is the image to use for the executor ExecutorImage string + // ExecutorMemoryRequest is the memory request for the executor container (e.g. 1Gi, 2Gi) + ExecutorMemoryRequest string + // ExecutorMemoryLimit is the memory limit for the executor container (e.g. 1Gi, 2Gi) + ExecutorMemoryLimit string + // ExecutorCPURequest is the CPU request for the executor container (e.g. 1, 2) + ExecutorCPURequest string + // ExecutorCPULimit is the CPU limit for the executor container (e.g. 1, 2) + ExecutorCPULimit string // ExecutorSecrets is a list of additional secrets to be added to the executor ExecutorSecrets []string // InfracostsSecretName is the name of the secret that contains the cost token and endpoint diff --git a/pkg/utils/jobs/jobs.go b/pkg/utils/jobs/jobs.go index 4a96c7c1c..bb4434a79 100644 --- a/pkg/utils/jobs/jobs.go +++ b/pkg/utils/jobs/jobs.go @@ -53,6 +53,14 @@ type Options struct { // BackoffLimit is the number of times we are willing to allow a job to fail // before we give up BackoffLimit int + // DefaultExecutorMemoryRequest is the default memory request for the executor + DefaultExecutorMemoryRequest string + // DefaultExecutorMemoryLimit is the default memory limit for the executor + DefaultExecutorMemoryLimit string + // DefaultExecutorCPURequest is the default CPU request for the executor + DefaultExecutorCPURequest string + // DefaultExecutorCPULimit is the default CPU limit for the executor + DefaultExecutorCPULimit string // EnableInfraCosts is the flag to enable cost analysis EnableInfraCosts bool // ExecutorImage is the image to use for the terraform jobs @@ -206,6 +214,10 @@ func (r *Render) createTerraformFromTemplate(options Options, stage string) (*ba terraformv1alpha1.ConfigurationStageLabel: stage, terraformv1alpha1.ConfigurationUIDLabel: string(r.configuration.GetUID()), }), + "DefaultExecutorMemoryRequest": options.DefaultExecutorMemoryRequest, + "DefaultExecutorMemoryLimit": options.DefaultExecutorMemoryLimit, + "DefaultExecutorCPURequest": options.DefaultExecutorCPURequest, + "DefaultExecutorCPULimit": options.DefaultExecutorCPULimit, "Provider": map[string]interface{}{ "Name": r.provider.Name, "Namespace": r.provider.Namespace,