From 897a661edeb39c2d55c51671c5d9652c4b2ebeb0 Mon Sep 17 00:00:00 2001 From: Mahesh Date: Fri, 10 Nov 2023 20:12:37 +0530 Subject: [PATCH] Fixed Readme, healthcheck scripts and added example folder with automation script --- README.md | 91 +++++++++++++++++++ apps.kruise.io/CloneSet/health.lua | 16 ++-- .../CloneSet/testdata/degraded.yaml | 6 +- apps.kruise.io/CloneSet/testdata/healthy.yaml | 6 +- apps.kruise.io/DaemonSet/health.lua | 18 ++-- .../DaemonSet/testdata/degraded.yaml | 8 +- .../DaemonSet/testdata/healthy.yaml | 4 + apps.kruise.io/StatefulSet/health.lua | 16 ++-- .../StatefulSet/testdata/degraded.yaml | 8 ++ .../StatefulSet/testdata/healthy.yaml | 8 ++ example/argocd.yaml | 20 ++++ .../cloneset/guestbook-clone.yaml | 27 ++++++ example/update-argocd-configmap.sh | 61 +++++++++++++ rollouts.kruise.io/Rollout/health.lua | 16 ++-- rollouts.kruise.io/Rollout/health_test.yaml | 4 + .../Rollout/testdata/no_workload_found.yaml | 29 ++++++ 16 files changed, 297 insertions(+), 41 deletions(-) create mode 100644 example/argocd.yaml create mode 100644 example/openkruise_workloads/cloneset/guestbook-clone.yaml create mode 100644 example/update-argocd-configmap.sh create mode 100644 rollouts.kruise.io/Rollout/testdata/no_workload_found.yaml diff --git a/README.md b/README.md index 0df842b..f83fbb6 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,97 @@ ArgoCD, in its default configuration, would consider this application "healthy." ## Our Solution To address this challenge, we've developed a solution using Lua scripts, which can detect and report the health status of custom workload resources. These Lua scripts, along with corresponding tests, are intended to be contributed back to the ArgoCD project. By doing so, we aim to empower ArgoCD with the ability to recognize and display the health status of advanced workload resources accurately. +## How to use the integration scripts with Argo-cd + +### Requirements : + +1. Kubernetes Cluster or Local minikube installed. + +2. Argo-cd installed inside your cluster or local machine. + +3. Kruise CRD's installed in your cluster or local machine. + +This README will guide you through the process of integrating Lua scripts with ArgoCD for managing workloads. Lua scripts can help ArgoCD monitor the health status of workloads effectively. In this example, we'll use the following CloneSet: + +### Step 1: Install Openkruise as per the instructions given [here](https://openkruise.io/docs/installation/) + +### Step 2: Setup the Argo-cd Pipeline or follow [this](https://openkruise.io/docs/best-practices/gitops-with-kruise/#tekton-pipeline--argo-cd) guide + +1. Change the directory to example folders by ```cd example/``` and ```kubectl apply -f argocd.yaml```. + +2. You will have an argocd pipeline setup ready to apply and test artifacts. + +3. Change the image version or replicas count inside openkruise_workloads/cloneset/cloneset.yaml. + +4. Now you will see the changes reflected inside your kubernetes container. + +### Step 3: Argo-cd CloneSet Health Check + +### Manual way + +Configure CloneSet Argo-cd [Custom CRD Health Checks](https://argo-cd.readthedocs.io/en/stable/operator-manual/health/#custom-health-checks), With this configuration argo-cd is able to perform a healthy check of the CloneSet, such as whether the CloneSet is published and whether the Pods are ready, as follows: + +``` +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + app.kubernetes.io/name: argocd-cm + app.kubernetes.io/part-of: argocd + name: argocd-cm + namespace: argocd +data: + resource.customizations.health.apps.kruise.io_CloneSet: | + hs={ status = "Progressing", message = "Waiting for initialization" } + + if obj.status ~= nil then + + if obj.metadata.generation == obj.status.observedGeneration then + + if obj.spec.updateStrategy then + if obj.spec.updateStrategy.paused == true then + hs.status = "Suspended" + hs.message = "Cloneset is paused" + return hs + elseif obj.spec.updateStrategy.partition ~= 0 then + if obj.status.updatedReplicas >= obj.status.expectedUpdatedReplicas then + hs.status = "Suspended" + hs.message = "Cloneset needs manual intervention" + return hs + end + end + + elseif obj.status.updatedAvailableReplicas == obj.status.replicas then + hs.status = "Healthy" + hs.message = "All Cloneset workloads are ready and updated" + return hs + + else + if obj.status.updatedAvailableReplicas ~= obj.status.replicas then + hs.status = "Degraded" + hs.message = "Some replicas are not ready or available" + return hs + end + end + end + end + +return hs +``` + +### Automated way + +1. Change the directory to example folder by doing ```cd example/``` + +2. Now run the automation script update-argocd-configmap.sh by doing +```bash update-argocd-configmap.sh``` + +3. This will edit the lua script and will add the neccessary configmap configurations inorder to check cloneset the workload health and display it. + +### Step 4: You can view the health conditions of workload with argo-cd UI or CLI + +![Preview](https://openkruise.io/assets/images/argo_sync_healthy-47754891eaf67731ab458189bd61ce7b.png) + ## How to Contribute We encourage you to be a part of this effort to enhance ArgoCD's capabilities. Here are the steps to get involved: diff --git a/apps.kruise.io/CloneSet/health.lua b/apps.kruise.io/CloneSet/health.lua index ce5c05b..197ab75 100644 --- a/apps.kruise.io/CloneSet/health.lua +++ b/apps.kruise.io/CloneSet/health.lua @@ -4,17 +4,15 @@ if obj.status ~= nil then if obj.metadata.generation == obj.status.observedGeneration then - if obj.spec.updateStrategy then - if obj.spec.updateStrategy.paused == true then + if obj.spec.updateStrategy.paused == true or not obj.status.updatedAvailableReplicas then + hs.status = "Suspended" + hs.message = "Cloneset is paused" + return hs + elseif obj.spec.updateStrategy.partition ~= 0 and obj.metadata.generation > 1 then + if obj.status.updatedReplicas >= obj.status.expectedUpdatedReplicas then hs.status = "Suspended" - hs.message = "Cloneset is paused" + hs.message = "Cloneset needs manual intervention" return hs - elseif obj.spec.updateStrategy.partition ~= 0 then - if obj.status.updatedReplicas >= obj.status.expectedUpdatedReplicas then - hs.status = "Suspended" - hs.message = "Cloneset needs manual intervention" - return hs - end end elseif obj.status.updatedAvailableReplicas == obj.status.replicas then diff --git a/apps.kruise.io/CloneSet/testdata/degraded.yaml b/apps.kruise.io/CloneSet/testdata/degraded.yaml index d7da998..36e9a0d 100644 --- a/apps.kruise.io/CloneSet/testdata/degraded.yaml +++ b/apps.kruise.io/CloneSet/testdata/degraded.yaml @@ -3,7 +3,7 @@ kind: CloneSet metadata: name: cloneset-test namespace: kruise - generation: 5 + generation: 1 labels: app: sample spec: @@ -19,9 +19,11 @@ spec: containers: - name: nginx image: nginx:alpine + updateStrategy: + paused: false status: - observedGeneration: 5 + observedGeneration: 1 replicas: 2 updatedReadyReplicas: 1 updatedAvailableReplicas: 1 diff --git a/apps.kruise.io/CloneSet/testdata/healthy.yaml b/apps.kruise.io/CloneSet/testdata/healthy.yaml index f513b9f..8a19353 100644 --- a/apps.kruise.io/CloneSet/testdata/healthy.yaml +++ b/apps.kruise.io/CloneSet/testdata/healthy.yaml @@ -3,7 +3,7 @@ kind: CloneSet metadata: name: cloneset-test namespace: kruise - generation: 2 + generation: 1 labels: app: sample spec: @@ -19,10 +19,12 @@ spec: containers: - name: nginx image: nginx:alpine + updateStrategy: + paused: false status: - observedGeneration: 2 + observedGeneration: 1 replicas: 2 updatedReadyReplicas: 2 updatedAvailableReplicas: 2 diff --git a/apps.kruise.io/DaemonSet/health.lua b/apps.kruise.io/DaemonSet/health.lua index 03edec8..525cccd 100644 --- a/apps.kruise.io/DaemonSet/health.lua +++ b/apps.kruise.io/DaemonSet/health.lua @@ -4,17 +4,15 @@ if obj.status ~= nil then if obj.metadata.generation == obj.status.observedGeneration then - if obj.spec.updateStrategy then - if obj.spec.updateStrategy.rollingUpdate.paused == true then + if obj.spec.updateStrategy.rollingUpdate.paused == true or not obj.status.updatedNumberScheduled then + hs.status = "Suspended" + hs.message = "Daemonset is paused" + return hs + elseif obj.spec.updateStrategy.rollingUpdate.partition ~= 0 and obj.metadata.generation > 1 then + if obj.status.updatedNumberScheduled >= (obj.status.desiredNumberScheduled - obj.spec.updateStrategy.rollingUpdate.partition) then hs.status = "Suspended" - hs.message = "Daemonset is paused" + hs.message = "Daemonset needs manual intervention" return hs - elseif obj.spec.updateStrategy.rollingUpdate.partition ~= 0 then - if obj.status.updatedNumberScheduled > (obj.status.desiredNumberScheduled - obj.spec.updateStrategy.rollingUpdate.partition) then - hs.status = "Suspended" - hs.message = "Daemonset needs manual intervention" - return hs - end end elseif (obj.status.updatedNumberScheduled == obj.status.desiredNumberScheduled) and (obj.status.numberAvailable == obj.status.desiredNumberScheduled) then @@ -23,7 +21,7 @@ if obj.status ~= nil then return hs else - if (obj.status.updatedNumberScheduled ~= obj.status.desiredNumberScheduled) and (obj.status.numberAvailable ~= obj.status.desiredNumberScheduled) then + if (obj.status.updatedNumberScheduled == obj.status.desiredNumberScheduled) and obj.status.numberUnavailable > 0 then hs.status = "Degraded" hs.message = "Some pods are not ready or available" return hs diff --git a/apps.kruise.io/DaemonSet/testdata/degraded.yaml b/apps.kruise.io/DaemonSet/testdata/degraded.yaml index ac2ba3d..ed8cbc0 100644 --- a/apps.kruise.io/DaemonSet/testdata/degraded.yaml +++ b/apps.kruise.io/DaemonSet/testdata/degraded.yaml @@ -18,13 +18,17 @@ spec: containers: - name: nginx image: nginx:alpine + updateStrategy: + rollingUpdate: + partition: 0 + paused: false status: currentNumberScheduled: 1 daemonSetHash: 5dffcdfcd7 desiredNumberScheduled: 1 - numberAvailable: 0 + numberUnavailable: 1 numberMisscheduled: 0 numberReady: 0 observedGeneration: 1 - updatedNumberScheduled: 0 + updatedNumberScheduled: 1 diff --git a/apps.kruise.io/DaemonSet/testdata/healthy.yaml b/apps.kruise.io/DaemonSet/testdata/healthy.yaml index 3d818bc..6224ebf 100644 --- a/apps.kruise.io/DaemonSet/testdata/healthy.yaml +++ b/apps.kruise.io/DaemonSet/testdata/healthy.yaml @@ -18,6 +18,10 @@ spec: containers: - name: nginx image: nginx:alpine + updateStrategy: + rollingUpdate: + partition: 0 + paused: false status: currentNumberScheduled: 1 diff --git a/apps.kruise.io/StatefulSet/health.lua b/apps.kruise.io/StatefulSet/health.lua index 4abd2cb..ddd4660 100644 --- a/apps.kruise.io/StatefulSet/health.lua +++ b/apps.kruise.io/StatefulSet/health.lua @@ -4,17 +4,15 @@ if obj.status ~= nil then if obj.metadata.generation == obj.status.observedGeneration then - if obj.spec.updateStrategy then - if obj.spec.updateStrategy.rollingUpdate.paused == true then + if obj.spec.updateStrategy.rollingUpdate.paused == true or not obj.status.updatedAvailableReplicas then + hs.status = "Suspended" + hs.message = "Statefulset is paused" + return hs + elseif obj.spec.updateStrategy.rollingUpdate.partition ~= 0 and obj.metadata.generation > 1 then + if obj.status.updatedReplicas >= (obj.status.replicas - obj.spec.updateStrategy.rollingUpdate.partition) then hs.status = "Suspended" - hs.message = "Statefulset is paused" + hs.message = "Statefulset needs manual intervention" return hs - elseif obj.spec.updateStrategy.rollingUpdate.partition ~= 0 then - if obj.status.updatedReplicas > (obj.status.replicas - obj.spec.updateStrategy.rollingUpdate.partition) then - hs.status = "Suspended" - hs.message = "Statefulset needs manual intervention" - return hs - end end elseif obj.status.updatedAvailableReplicas == obj.status.replicas then diff --git a/apps.kruise.io/StatefulSet/testdata/degraded.yaml b/apps.kruise.io/StatefulSet/testdata/degraded.yaml index 5bd46a2..88e5891 100644 --- a/apps.kruise.io/StatefulSet/testdata/degraded.yaml +++ b/apps.kruise.io/StatefulSet/testdata/degraded.yaml @@ -19,6 +19,14 @@ spec: containers: - name: nginx image: nginx:alpine + updateStrategy: + rollingUpdate: + maxUnavailable: 1 + minReadySeconds: 0 + paused: false + partition: 0 + podUpdatePolicy: ReCreate + type: RollingUpdate status: observedGeneration: 5 diff --git a/apps.kruise.io/StatefulSet/testdata/healthy.yaml b/apps.kruise.io/StatefulSet/testdata/healthy.yaml index 683a44c..793de25 100644 --- a/apps.kruise.io/StatefulSet/testdata/healthy.yaml +++ b/apps.kruise.io/StatefulSet/testdata/healthy.yaml @@ -19,6 +19,14 @@ spec: containers: - name: nginx image: nginx:alpine + updateStrategy: + rollingUpdate: + maxUnavailable: 1 + minReadySeconds: 0 + paused: false + partition: 0 + podUpdatePolicy: ReCreate + type: RollingUpdate status: observedGeneration: 2 diff --git a/example/argocd.yaml b/example/argocd.yaml new file mode 100644 index 0000000..79218f2 --- /dev/null +++ b/example/argocd.yaml @@ -0,0 +1,20 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: openkruise-cloneset-test + namespace: argocd +spec: + project: default + source: + repoURL: 'https://github.com/maheshkasabe/kruise-argo/' + path: example/openkruise_workloads/cloneset/ + targetRevision: HEAD + destination: + server: 'https://kubernetes.default.svc' + namespace: default + syncPolicy: + syncOptions: + - CreateNamespace=true # to create the namaspace if not exsists + automated: + prune: true + selfHeal: true \ No newline at end of file diff --git a/example/openkruise_workloads/cloneset/guestbook-clone.yaml b/example/openkruise_workloads/cloneset/guestbook-clone.yaml new file mode 100644 index 0000000..1f1371c --- /dev/null +++ b/example/openkruise_workloads/cloneset/guestbook-clone.yaml @@ -0,0 +1,27 @@ +apiVersion: apps.kruise.io/v1alpha1 +kind: CloneSet +metadata: + name: guestbook-clone + labels: + app.kubernetes.io/name: guestbook-clone +spec: + replicas: 5 + selector: + matchLabels: + app.kubernetes.io/name: guestbook-clone + template: + metadata: + labels: + app.kubernetes.io/name: guestbook-clone + spec: + containers: + - name: guestbook + image: openkruise/guestbook:v2 + imagePullPolicy: Always + ports: + - name: http-server + containerPort: 3000 + updateStrategy: + type: InPlaceIfPossible + paused: true + maxUnavailable: 3 \ No newline at end of file diff --git a/example/update-argocd-configmap.sh b/example/update-argocd-configmap.sh new file mode 100644 index 0000000..72d05dd --- /dev/null +++ b/example/update-argocd-configmap.sh @@ -0,0 +1,61 @@ +#!/bin/bash + +# Set the target ConfigMap name and namespace +CONFIGMAP_NAME="argocd-cm" +NAMESPACE="argocd" + +# Define the new data to be added +NEW_DATA=$(cat <