From d8a54e104191ba62c6be5b67ed333e757f2abdef Mon Sep 17 00:00:00 2001 From: Ilja Weis Date: Thu, 25 Jan 2018 17:25:55 +0100 Subject: [PATCH] fix node check --- check_kubernetes.go | 39 +++++++++++++++++++++++++++++++++++++-- check_kubernetes_test.go | 22 ++++++++++++++++++++++ 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/check_kubernetes.go b/check_kubernetes.go index fc34991..bc4b275 100644 --- a/check_kubernetes.go +++ b/check_kubernetes.go @@ -252,6 +252,10 @@ func checkNode(name string, kube kubernetes.Interface) (nrpe.Result, string) { nosched := node.Spec.Unschedulable ready := false + unknown := true + + resourceTroubleMsg := "" + resourceTrouble := false if len(node.Status.Conditions) < 1 { return nrpe.UNKNOWN, "node in unknown state" @@ -260,14 +264,41 @@ func checkNode(name string, kube kubernetes.Interface) (nrpe.Result, string) { for _, cond := range node.Status.Conditions { switch cond.Type { case corev1.NodeReady: + unknown = false if cond.Status == corev1.ConditionTrue { ready = true } - default: // invalid value for Type - return nrpe.UNKNOWN, "node in unknown state (invalid condition)" + case corev1.NodeOutOfDisk: + if cond.Status == corev1.ConditionTrue { + resourceTrouble = true + resourceTroubleMsg = resourceTroubleMsg + "out of disk " + } + case corev1.NodeDiskPressure: + if cond.Status == corev1.ConditionTrue { + resourceTrouble = true + resourceTroubleMsg = resourceTroubleMsg + "has disk pressure " + } + case corev1.NodeMemoryPressure: + if cond.Status == corev1.ConditionTrue { + resourceTrouble = true + resourceTroubleMsg = resourceTroubleMsg + "has memory pressure " + } + case corev1.NodeNetworkUnavailable: + if cond.Status == corev1.ConditionTrue { + resourceTrouble = true + resourceTroubleMsg = resourceTroubleMsg + "network unavailable " + } } } + if unknown { + return nrpe.UNKNOWN, "node in unknown state (invalid condition)" + } + + if ready && resourceTrouble { + return nrpe.WARNING, "node ready but hits resource limits ( " + resourceTroubleMsg + ")" + } + if ready && nosched { return nrpe.WARNING, "node ready but no scheduling allowed" } @@ -276,6 +307,10 @@ func checkNode(name string, kube kubernetes.Interface) (nrpe.Result, string) { return nrpe.OK, "node ready" } + if !ready && resourceTrouble { + return nrpe.CRITICAL, "node not ready ( " + resourceTroubleMsg + ")" + } + return nrpe.CRITICAL, "node not ready" } diff --git a/check_kubernetes_test.go b/check_kubernetes_test.go index 40e65a6..a14a399 100644 --- a/check_kubernetes_test.go +++ b/check_kubernetes_test.go @@ -462,6 +462,28 @@ func TestNode(t *testing.T) { result: nrpe.UNKNOWN, message: "unknown", }, + { + node: corev1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "foo10"}, + Status: corev1.NodeStatus{Conditions: []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionTrue}, + {Type: corev1.NodeOutOfDisk, Status: corev1.ConditionFalse}}}, + }, + name: "Ready", + result: nrpe.OK, + message: "ready", + }, + { + node: corev1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "foo11"}, + Status: corev1.NodeStatus{Conditions: []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionTrue}, + {Type: corev1.NodeOutOfDisk, Status: corev1.ConditionTrue}}}, + }, + name: "OutOfDisk", + result: nrpe.WARNING, + message: "disk", + }, } for _, test := range tests {