Skip to content

Commit

Permalink
Limit pod resources
Browse files Browse the repository at this point in the history
This patch introduces back limits for the pods spawned by the
test-operator after they were increased and later removed with these
two PRs [1][2].

The problem with the previous two patches was that they only set the
Resources.Limits field and not the Resources.Requests field. When
Resources.Limits is set and Resources.Requests is empty then it
inherrits the value from Resources.Limits.

Therefore, we first hit the OOM killed issue when we set the
Resources.Limits too low and later when we increased the value we hit
the "Insufficient memory" error (due to high value in
Resources.Requests field)

This patch addresses the above mentioned issue by:
  - setting sane default values for Resource.Limits
  - setting sane default values for Resource.Requests and
  - introduces new parameter called .Spec.Resources which can be used
    to change the default values.

[1] #222
[2] #224
  • Loading branch information
lpiwowar committed Oct 30, 2024
1 parent eae3554 commit 1f7805e
Show file tree
Hide file tree
Showing 11 changed files with 111 additions and 0 deletions.
10 changes: 10 additions & 0 deletions api/v1beta1/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,11 @@ type CommonOptions struct {
// This value contains a toleration that is applied to pods spawned by the
// test pods that are spawned by the test-operator.
Tolerations []corev1.Toleration `json:"tolerations,omitempty"`

// +kubebuilder:validation:Optional
// +operator-sdk:csv:customresourcedefinitions:type=spec
// Specify Resource Requirements for a each test pod spawned
Resources corev1.ResourceRequirements `json:"resources,omitempty"`
}

type CommonOpenstackConfig struct {
Expand Down Expand Up @@ -179,4 +184,9 @@ type WorkflowCommonParameters struct {
// This value contains a toleration that is applied to pods spawned by the
// test pods that are spawned by the test-operator.
Tolerations *[]corev1.Toleration `json:"tolerations,omitempty"`

// +kubebuilder:validation:Optional
// +operator-sdk:csv:customresourcedefinitions:type=spec
// Specify Resource Requirements for a each test pod spawned
Resources *corev1.ResourceRequirements `json:"resources,omitempty"`
}
6 changes: 6 additions & 0 deletions api/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions controllers/ansibletest_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,10 @@ func (r *AnsibleTestReconciler) Reconcile(ctx context.Context, req ctrl.Request)
if instance.Spec.Workflow[externalWorkflowCounter].SELinuxLevel != nil {
instance.Spec.SELinuxLevel = *instance.Spec.Workflow[externalWorkflowCounter].SELinuxLevel
}

if instance.Spec.Workflow[externalWorkflowCounter].Resources != nil {
instance.Spec.Resources = *instance.Spec.Workflow[externalWorkflowCounter].Resources
}
}

// Service account, role, binding
Expand All @@ -263,6 +267,7 @@ func (r *AnsibleTestReconciler) Reconcile(ctx context.Context, req ctrl.Request)
externalWorkflowCounter,
containerImage,
privileged,
instance.Spec.Resources,
)
ansibleTestsJob := job.NewJob(
jobDef,
Expand Down
1 change: 1 addition & 0 deletions controllers/horizontest_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ func (r *HorizonTestReconciler) Reconcile(ctx context.Context, req ctrl.Request)
mountKubeconfig,
envVars,
containerImage,
instance.Spec.Resources,
)
horizontestJob := job.NewJob(
jobDef,
Expand Down
5 changes: 5 additions & 0 deletions controllers/tempest_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,10 @@ func (r *TempestReconciler) Reconcile(ctx context.Context, req ctrl.Request) (re
if instance.Spec.Workflow[externalWorkflowCounter].SELinuxLevel != nil {
instance.Spec.SELinuxLevel = *instance.Spec.Workflow[externalWorkflowCounter].SELinuxLevel
}

if instance.Spec.Workflow[externalWorkflowCounter].Resources != nil {
instance.Spec.Resources = *instance.Spec.Workflow[externalWorkflowCounter].Resources
}
}

jobDef := tempest.Job(
Expand All @@ -344,6 +348,7 @@ func (r *TempestReconciler) Reconcile(ctx context.Context, req ctrl.Request) (re
mountCerts,
mountSSHKey,
containerImage,
instance.Spec.Resources,
)
tempestJob := job.NewJob(
jobDef,
Expand Down
5 changes: 5 additions & 0 deletions controllers/tobiko_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,7 @@ func (r *TobikoReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res
envVars,
containerImage,
privileged,
instance.Spec.Resources,
)
tobikoJob := job.NewJob(
jobDef,
Expand Down Expand Up @@ -426,6 +427,10 @@ func (r *TobikoReconciler) PrepareTobikoEnvVars(
if instance.Spec.Workflow[step].SELinuxLevel != nil {
instance.Spec.SELinuxLevel = *instance.Spec.Workflow[step].SELinuxLevel
}

if instance.Spec.Workflow[step].Resources != nil {
instance.Spec.Resources = *instance.Spec.Workflow[step].Resources
}
}

// Prepare env vars
Expand Down
15 changes: 15 additions & 0 deletions pkg/ansibletest/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
util "github.com/openstack-k8s-operators/test-operator/pkg/util"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

Expand All @@ -22,6 +23,7 @@ func Job(
externalWorkflowCounter int,
containerImage string,
privileged bool,
resources corev1.ResourceRequirements,
) *batchv1.Job {

runAsUser := int64(227)
Expand All @@ -32,6 +34,18 @@ func Job(
capabilities := []corev1.Capability{"NET_ADMIN", "NET_RAW"}
securityContext := util.GetSecurityContext(runAsUser, capabilities, privileged)

defaultResources := corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("4000Mi"),
corev1.ResourceMemory: resource.MustParse("10Gi"),
},
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("2000Mi"),
corev1.ResourceMemory: resource.MustParse("5Gi"),
},
}
resources = util.GetResourceRequirements(resources, defaultResources)

job := &batchv1.Job{
ObjectMeta: metav1.ObjectMeta{
Name: jobName,
Expand Down Expand Up @@ -64,6 +78,7 @@ func Job(
Env: env.MergeEnvs([]corev1.EnvVar{}, envVars),
VolumeMounts: GetVolumeMounts(mountCerts, instance, externalWorkflowCounter),
SecurityContext: &securityContext,
Resources: resources,
},
},
Volumes: GetVolumes(
Expand Down
15 changes: 15 additions & 0 deletions pkg/horizontest/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
util "github.com/openstack-k8s-operators/test-operator/pkg/util"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

Expand All @@ -21,6 +22,7 @@ func Job(
mountKubeconfig bool,
envVars map[string]env.Setter,
containerImage string,
resources corev1.ResourceRequirements,
) *batchv1.Job {

runAsUser := int64(42455)
Expand All @@ -31,6 +33,18 @@ func Job(
capabilities := []corev1.Capability{"NET_ADMIN", "NET_RAW"}
securityContext := util.GetSecurityContext(runAsUser, capabilities, instance.Spec.Privileged)

defaultResources := corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("4000Mi"),
corev1.ResourceMemory: resource.MustParse("10Gi"),
},
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("2000Mi"),
corev1.ResourceMemory: resource.MustParse("5Gi"),
},
}
resources = util.GetResourceRequirements(resources, defaultResources)

job := &batchv1.Job{
ObjectMeta: metav1.ObjectMeta{
Name: jobName,
Expand Down Expand Up @@ -63,6 +77,7 @@ func Job(
Env: env.MergeEnvs([]corev1.EnvVar{}, envVars),
VolumeMounts: GetVolumeMounts(mountCerts, mountKeys, mountKubeconfig, instance),
SecurityContext: &securityContext,
Resources: resources,
},
},
Volumes: GetVolumes(
Expand Down
15 changes: 15 additions & 0 deletions pkg/tempest/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
util "github.com/openstack-k8s-operators/test-operator/pkg/util"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

Expand All @@ -22,13 +23,26 @@ func Job(
mountCerts bool,
mountSSHKey bool,
containerImage string,
resources corev1.ResourceRequirements,
) *batchv1.Job {

envVars := map[string]env.Setter{}
runAsUser := int64(42480)
runAsGroup := int64(42480)
securityContext := util.GetSecurityContext(runAsUser, []corev1.Capability{}, instance.Spec.Privileged)

defaultResources := corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("4000Mi"),
corev1.ResourceMemory: resource.MustParse("10Gi"),
},
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("2000Mi"),
corev1.ResourceMemory: resource.MustParse("5Gi"),
},
}
resources = util.GetResourceRequirements(resources, defaultResources)

job := &batchv1.Job{
ObjectMeta: metav1.ObjectMeta{
Name: jobName,
Expand Down Expand Up @@ -60,6 +74,7 @@ func Job(
Env: env.MergeEnvs([]corev1.EnvVar{}, envVars),
VolumeMounts: GetVolumeMounts(mountCerts, mountSSHKey, instance),
SecurityContext: &securityContext,
Resources: resources,
EnvFrom: []corev1.EnvFromSource{
{
ConfigMapRef: &corev1.ConfigMapEnvSource{
Expand Down
15 changes: 15 additions & 0 deletions pkg/tobiko/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
util "github.com/openstack-k8s-operators/test-operator/pkg/util"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

Expand All @@ -23,6 +24,7 @@ func Job(
envVars map[string]env.Setter,
containerImage string,
privileged bool,
resources corev1.ResourceRequirements,
) *batchv1.Job {

runAsUser := int64(42495)
Expand All @@ -33,6 +35,18 @@ func Job(
capabilities := []corev1.Capability{"NET_ADMIN", "NET_RAW"}
securityContext := util.GetSecurityContext(runAsUser, capabilities, privileged)

defaultResources := corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("4000Mi"),
corev1.ResourceMemory: resource.MustParse("10Gi"),
},
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("2000Mi"),
corev1.ResourceMemory: resource.MustParse("5Gi"),
},
}
resources = util.GetResourceRequirements(resources, defaultResources)

// Note(lpiwowar): Once the webhook is implemented move all the logic of merging
// the workflows there.
job := &batchv1.Job{
Expand Down Expand Up @@ -68,6 +82,7 @@ func Job(
Env: env.MergeEnvs([]corev1.EnvVar{}, envVars),
VolumeMounts: GetVolumeMounts(mountCerts, mountKeys, mountKubeconfig, instance),
SecurityContext: &securityContext,
Resources: resources,
},
},
Volumes: GetVolumes(
Expand Down
19 changes: 19 additions & 0 deletions pkg/util/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,22 @@ func GetSecurityContext(

return securityContext
}

func GetResourceRequirements(
resourceRequirements corev1.ResourceRequirements,
defaultResourceRequirements corev1.ResourceRequirements,
) corev1.ResourceRequirements {
if resourceRequirements.Limits == nil {
resourceRequirements.Limits = defaultResourceRequirements.Limits
}

if resourceRequirements.Requests == nil {
resourceRequirements.Requests = defaultResourceRequirements.Requests
}

if resourceRequirements.Claims == nil {
resourceRequirements.Claims = defaultResourceRequirements.Claims
}

return resourceRequirements
}

0 comments on commit 1f7805e

Please sign in to comment.