Skip to content

Commit

Permalink
Merge pull request #1425 from a7i/automated-cherry-pick-of-#1378-#1390-
Browse files Browse the repository at this point in the history
…#1412-#1413-#1416-#1395-upstream-release-1.30

Automated cherry pick of #1378: Fix the replicas type for the helm-chart
#1390: allow 'falsey' value in cmdOption
#1412: fix helm's default deschedulerPolicy
#1413: fix TOC location in Readme
#1416: use cmd context instead of using context.Background()
#1395: fix the issue that the pod anti-filtering rules are not
  • Loading branch information
k8s-ci-robot authored Jun 5, 2024
2 parents 7999094 + 7e85b79 commit 9f7e7fd
Show file tree
Hide file tree
Showing 10 changed files with 140 additions and 77 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
![Release Charts](https://github.com/kubernetes-sigs/descheduler/workflows/Release%20Charts/badge.svg)

<p align="left">
↖️ Click at the [bullet list icon] at the top left corner of the Readme visualization for the github generated table of contents.
↗️️ Click at the [bullet list icon] at the top right corner of the Readme visualization for the github generated table of contents.
</p>

<p align="center">
Expand Down
2 changes: 1 addition & 1 deletion charts/descheduler/templates/NOTES.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Descheduler installed as a {{ .Values.kind }}.

{{- if eq .Values.kind "Deployment" }}
{{- if eq .Values.replicas 1.0}}
{{- if eq (.Values.replicas | int) 1 }}
WARNING: You set replica count as 1 and workload kind as Deployment however leaderElection is not enabled. Consider enabling Leader Election for HA mode.
{{- end}}
{{- if .Values.leaderElection }}
Expand Down
6 changes: 5 additions & 1 deletion charts/descheduler/templates/cronjob.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,11 @@ spec:
args:
- --policy-config-file=/policy-dir/policy.yaml
{{- range $key, $value := .Values.cmdOptions }}
- {{ printf "--%s" $key }}{{ if $value }}={{ $value }}{{ end }}
{{- if ne $value nil }}
- {{ printf "--%s=%s" $key (toString $value) }}
{{- else }}
- {{ printf "--%s" $key }}
{{- end }}
{{- end }}
livenessProbe:
{{- toYaml .Values.livenessProbe | nindent 16 }}
Expand Down
8 changes: 6 additions & 2 deletions charts/descheduler/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ metadata:
labels:
{{- include "descheduler.labels" . | nindent 4 }}
spec:
{{- if gt .Values.replicas 1.0}}
{{- if gt (.Values.replicas | int) 1 }}
{{- if not .Values.leaderElection.enabled }}
{{- fail "You must set leaderElection to use more than 1 replica"}}
{{- end}}
Expand Down Expand Up @@ -53,7 +53,11 @@ spec:
- --policy-config-file=/policy-dir/policy.yaml
- --descheduling-interval={{ required "deschedulingInterval required for running as Deployment" .Values.deschedulingInterval }}
{{- range $key, $value := .Values.cmdOptions }}
- {{ printf "--%s" $key }}{{ if $value }}={{ $value }}{{ end }}
{{- if ne $value nil }}
- {{ printf "--%s=%s" $key (toString $value) }}
{{- else }}
- {{ printf "--%s" $key }}
{{- end }}
{{- end }}
{{- include "descheduler.leaderElection" . | nindent 12 }}
ports:
Expand Down
8 changes: 3 additions & 5 deletions charts/descheduler/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -111,14 +111,13 @@ deschedulerPolicy:
args:
podRestartThreshold: 100
includingInitContainers: true
- name: RemovePodsViolatingNodeTaints
- name: RemovePodsViolatingNodeAffinity
args:
nodeAffinityType:
- requiredDuringSchedulingIgnoredDuringExecution
- requiredDuringSchedulingIgnoredDuringExecution
- name: RemovePodsViolatingNodeTaints
- name: RemovePodsViolatingInterPodAntiAffinity
- name: RemovePodsViolatingTopologySpreadConstraint
args:
includeSoftConstraints: false
- name: LowNodeUtilization
args:
thresholds:
Expand All @@ -133,7 +132,6 @@ deschedulerPolicy:
balance:
enabled:
- RemoveDuplicates
- RemovePodsViolatingNodeAffinity
- RemovePodsViolatingTopologySpreadConstraint
- LowNodeUtilization
deschedule:
Expand Down
2 changes: 1 addition & 1 deletion cmd/descheduler/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ func NewDeschedulerCommand(out io.Writer) *cobra.Command {

secureServing.DisableHTTP2 = !s.EnableHTTP2

ctx, done := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
ctx, done := signal.NotifyContext(cmd.Context(), syscall.SIGINT, syscall.SIGTERM)

pathRecorderMux := mux.NewPathRecorderMux("descheduler")
if !s.DisableMetrics {
Expand Down
27 changes: 24 additions & 3 deletions pkg/descheduler/node/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -343,9 +343,30 @@ func podMatchesInterPodAntiAffinity(nodeIndexer podutil.GetPodsAssignedToNodeFun
if err != nil {
return false, fmt.Errorf("error listing all pods: %v", err)
}
assignedPodsInNamespace := podutil.GroupByNamespace(podsOnNode)

podsInANamespace := podutil.GroupByNamespace(podsOnNode)
nodeMap := utils.CreateNodeMap([]*v1.Node{node})
for _, term := range utils.GetPodAntiAffinityTerms(pod.Spec.Affinity.PodAntiAffinity) {
namespaces := utils.GetNamespacesFromPodAffinityTerm(pod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
klog.ErrorS(err, "Unable to convert LabelSelector into Selector")
return false, err
}

for namespace := range namespaces {
for _, assignedPod := range assignedPodsInNamespace[namespace] {
if assignedPod.Name == pod.Name || !utils.PodMatchesTermsNamespaceAndSelector(assignedPod, namespaces, selector) {
klog.V(4).InfoS("Pod doesn't match inter-pod anti-affinity rule of assigned pod on node", "candidatePod", klog.KObj(pod), "assignedPod", klog.KObj(assignedPod))
continue
}

if _, ok := node.Labels[term.TopologyKey]; ok {
klog.V(1).InfoS("Pod matches inter-pod anti-affinity rule of assigned pod on node", "candidatePod", klog.KObj(pod), "assignedPod", klog.KObj(assignedPod))
return true, nil
}
}
}
}

return utils.CheckPodsWithAntiAffinityExist(pod, podsInANamespace, nodeMap), nil
return false, nil
}
38 changes: 33 additions & 5 deletions pkg/descheduler/node/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -759,6 +759,9 @@ func TestNodeFit(t *testing.T) {
"region": "main-region",
}
})

nodeNolabel := test.BuildTestNode("node", 64000, 128*1000*1000*1000, 2, nil)

tests := []struct {
description string
pod *v1.Pod
Expand All @@ -767,7 +770,7 @@ func TestNodeFit(t *testing.T) {
err error
}{
{
description: "insufficient cpu",
description: "Insufficient cpu",
pod: test.BuildTestPod("p1", 10000, 2*1000*1000*1000, "", nil),
node: node,
podsOnNode: []*v1.Pod{
Expand All @@ -776,7 +779,7 @@ func TestNodeFit(t *testing.T) {
err: errors.New("insufficient cpu"),
},
{
description: "insufficient pod num",
description: "Insufficient pod num",
pod: test.BuildTestPod("p1", 1000, 2*1000*1000*1000, "", nil),
node: node,
podsOnNode: []*v1.Pod{
Expand All @@ -786,7 +789,7 @@ func TestNodeFit(t *testing.T) {
err: errors.New("insufficient pods"),
},
{
description: "matches inter-pod anti-affinity rule of pod on node",
description: "Pod matches inter-pod anti-affinity rule of other pod on node",
pod: test.PodWithPodAntiAffinity(test.BuildTestPod("p1", 1000, 1000, node.Name, nil), "foo", "bar"),
node: node,
podsOnNode: []*v1.Pod{
Expand All @@ -795,11 +798,36 @@ func TestNodeFit(t *testing.T) {
err: errors.New("pod matches inter-pod anti-affinity rule of other pod on node"),
},
{
description: "pod fits on node",
description: "Pod doesn't match inter-pod anti-affinity rule of other pod on node, because pod and other pod is not same namespace",
pod: test.PodWithPodAntiAffinity(test.BuildTestPod("p1", 1000, 1000, node.Name, nil), "foo", "bar"),
node: node,
podsOnNode: []*v1.Pod{
test.PodWithPodAntiAffinity(test.BuildTestPod("p2", 1000, 1000, node.Name, func(pod *v1.Pod) {
pod.Namespace = "test"
}), "foo", "bar"),
},
},
{
description: "Pod doesn't match inter-pod anti-affinity rule of other pod on node, because other pod not match labels of pod",
pod: test.PodWithPodAntiAffinity(test.BuildTestPod("p1", 1000, 1000, node.Name, nil), "foo", "bar"),
node: node,
podsOnNode: []*v1.Pod{
test.PodWithPodAntiAffinity(test.BuildTestPod("p2", 1000, 1000, node.Name, nil), "foo1", "bar1"),
},
},
{
description: "Pod doesn't match inter-pod anti-affinity rule of other pod on node, because node have no topologyKey",
pod: test.PodWithPodAntiAffinity(test.BuildTestPod("p1", 1000, 1000, "node1", nil), "foo", "bar"),
node: nodeNolabel,
podsOnNode: []*v1.Pod{
test.PodWithPodAntiAffinity(test.BuildTestPod("p2", 1000, 1000, node.Name, nil), "foo", "bar"),
},
},
{
description: "Pod fits on node",
pod: test.BuildTestPod("p1", 1000, 1000, "", func(pod *v1.Pod) {}),
node: node,
podsOnNode: []*v1.Pod{},
err: nil,
},
}

Expand Down
95 changes: 66 additions & 29 deletions pkg/utils/predicates.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,37 @@ import (
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/component-helpers/scheduling/corev1"
"k8s.io/klog/v2"
)

// GetNamespacesFromPodAffinityTerm returns a set of names
// according to the namespaces indicated in podAffinityTerm.
// If namespaces is empty it considers the given pod's namespace.
func GetNamespacesFromPodAffinityTerm(pod *v1.Pod, podAffinityTerm *v1.PodAffinityTerm) sets.Set[string] {
names := sets.New[string]()
if len(podAffinityTerm.Namespaces) == 0 {
names.Insert(pod.Namespace)
} else {
names.Insert(podAffinityTerm.Namespaces...)
}
return names
}

// PodMatchesTermsNamespaceAndSelector returns true if the given <pod>
// matches the namespace and selector defined by <affinityPod>`s <term>.
func PodMatchesTermsNamespaceAndSelector(pod *v1.Pod, namespaces sets.Set[string], selector labels.Selector) bool {
if !namespaces.Has(pod.Namespace) {
return false
}

if !selector.Matches(labels.Set(pod.Labels)) {
return false
}
return true
}

// The following code has been copied from predicates package to avoid the
// huge vendoring issues, mostly copied from
// k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates/
Expand Down Expand Up @@ -309,42 +336,52 @@ func CreateNodeMap(nodes []*v1.Node) map[string]*v1.Node {
return m
}

// CheckPodsWithAntiAffinityExist checks if there are other pods on the node that the current pod cannot tolerate.
func CheckPodsWithAntiAffinityExist(pod *v1.Pod, pods map[string][]*v1.Pod, nodeMap map[string]*v1.Node) bool {
affinity := pod.Spec.Affinity
if affinity != nil && affinity.PodAntiAffinity != nil {
for _, term := range getPodAntiAffinityTerms(affinity.PodAntiAffinity) {
namespaces := getNamespacesFromPodAffinityTerm(pod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
klog.ErrorS(err, "Unable to convert LabelSelector into Selector")
return false
}
for namespace := range namespaces {
for _, existingPod := range pods[namespace] {
if existingPod.Name != pod.Name && podMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) {
node, ok := nodeMap[pod.Spec.NodeName]
if !ok {
continue
}
nodeHavingExistingPod, ok := nodeMap[existingPod.Spec.NodeName]
if !ok {
continue
}
if hasSameLabelValue(node, nodeHavingExistingPod, term.TopologyKey) {
klog.V(1).InfoS("Found Pods matching PodAntiAffinity", "pod with anti-affinity", klog.KObj(pod))
return true
}
}
// CheckPodsWithAntiAffinityExist checks if there are other pods on the node that the current candidate pod cannot tolerate.
func CheckPodsWithAntiAffinityExist(candidatePod *v1.Pod, assignedPods map[string][]*v1.Pod, nodeMap map[string]*v1.Node) bool {
nodeHavingCandidatePod, ok := nodeMap[candidatePod.Spec.NodeName]
if !ok {
klog.Warningf("CandidatePod %s does not exist in nodeMap", klog.KObj(candidatePod))
return false
}

affinity := candidatePod.Spec.Affinity
if affinity == nil || affinity.PodAntiAffinity == nil {
return false
}

for _, term := range GetPodAntiAffinityTerms(affinity.PodAntiAffinity) {
namespaces := GetNamespacesFromPodAffinityTerm(candidatePod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
klog.ErrorS(err, "Unable to convert LabelSelector into Selector")
return false
}

for namespace := range namespaces {
for _, assignedPod := range assignedPods[namespace] {
if assignedPod.Name == candidatePod.Name || !PodMatchesTermsNamespaceAndSelector(assignedPod, namespaces, selector) {
klog.V(4).InfoS("CandidatePod doesn't matches inter-pod anti-affinity rule of assigned pod on node", "candidatePod", klog.KObj(candidatePod), "assignedPod", klog.KObj(assignedPod))
continue
}

nodeHavingAssignedPod, ok := nodeMap[assignedPod.Spec.NodeName]
if !ok {
continue
}

if hasSameLabelValue(nodeHavingCandidatePod, nodeHavingAssignedPod, term.TopologyKey) {
klog.V(1).InfoS("CandidatePod matches inter-pod anti-affinity rule of assigned pod on node", "candidatePod", klog.KObj(candidatePod), "assignedPod", klog.KObj(assignedPod))
return true
}
}
}
}

return false
}

// getPodAntiAffinityTerms gets the antiaffinity terms for the given pod.
func getPodAntiAffinityTerms(podAntiAffinity *v1.PodAntiAffinity) (terms []v1.PodAffinityTerm) {
// GetPodAntiAffinityTerms gets the antiaffinity terms for the given pod.
func GetPodAntiAffinityTerms(podAntiAffinity *v1.PodAntiAffinity) (terms []v1.PodAffinityTerm) {
if podAntiAffinity != nil {
if len(podAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 {
terms = podAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution
Expand Down
29 changes: 0 additions & 29 deletions pkg/utils/priority.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,42 +4,13 @@ import (
"context"
"fmt"

v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
clientset "k8s.io/client-go/kubernetes"
"sigs.k8s.io/descheduler/pkg/api"
)

const SystemCriticalPriority = 2 * int32(1000000000)

// getNamespacesFromPodAffinityTerm returns a set of names
// according to the namespaces indicated in podAffinityTerm.
// If namespaces is empty it considers the given pod's namespace.
func getNamespacesFromPodAffinityTerm(pod *v1.Pod, podAffinityTerm *v1.PodAffinityTerm) sets.Set[string] {
names := sets.New[string]()
if len(podAffinityTerm.Namespaces) == 0 {
names.Insert(pod.Namespace)
} else {
names.Insert(podAffinityTerm.Namespaces...)
}
return names
}

// podMatchesTermsNamespaceAndSelector returns true if the given <pod>
// matches the namespace and selector defined by <affinityPod>`s <term>.
func podMatchesTermsNamespaceAndSelector(pod *v1.Pod, namespaces sets.Set[string], selector labels.Selector) bool {
if !namespaces.Has(pod.Namespace) {
return false
}

if !selector.Matches(labels.Set(pod.Labels)) {
return false
}
return true
}

// GetPriorityFromPriorityClass gets priority from the given priority class.
// If no priority class is provided, it will return SystemCriticalPriority by default.
func GetPriorityFromPriorityClass(ctx context.Context, client clientset.Interface, name string) (int32, error) {
Expand Down

0 comments on commit 9f7e7fd

Please sign in to comment.