From 7b40e3a1dfd76ce1988714316287a0938f2f861e Mon Sep 17 00:00:00 2001 From: Karthik Satchitanand Date: Tue, 29 Sep 2020 18:03:23 +0530 Subject: [PATCH] [Cherry-Pick for v1.8.2] (#157) * refactor(events): removing unnecessary event (#139) Signed-off-by: shubhamchaudhary * chore(ipfilter): Adding ipfilter in network chaos for containerd/crio (#140) Signed-off-by: shubhamchaudhary * (fix)containerKill: Fix the container kill experiment for containerd runtime (#142) Signed-off-by: Udit Gaurav * chore(log-mgmt): Refactoring code for better log/error mgmt (#141) Signed-off-by: shubhamchaudhary * chore(actions): Add GitHub action custom test to test a perticular PR (#143) Signed-off-by: Udit Gaurav * Fix typo (#147) Signed-off-by: Moti Asayag * Refactor/Modify travis and makefile to add linting and formating code (#136) Signed-off-by: Udit Gaurav * (chore)action: Fix the actions for merge (#149) Signed-off-by: Udit Gaurav * (chore)dockerfile: Optimize Dockerfile (#150) Signed-off-by: Udit Gaurav * chore(abort): Adding support for abortion in all experiments (#145) Signed-off-by: shubhamchaudhary * chore(probe): updating probes status in abort case (#154) Signed-off-by: shubhamchaudhary * fix(parelle;-execution): Adding parellel execution in pod cpu/memory-hog experiments (#152) Signed-off-by: shubhamchaudhary Co-authored-by: Shubham Chaudhary Co-authored-by: UDIT GAURAV <35391335+uditgaurav@users.noreply.github.com> Co-authored-by: Moti Asayag --- .github/workflows/guide.md | 141 ++++++ .github/workflows/main.yml | 407 ++++++++++++++++++ .travis.yml | 20 +- Makefile | 58 ++- README.md | 8 +- build/litmus-go/Dockerfile | 23 +- .../litmus/container-kill/helper/crictl.go | 16 +- .../container-kill/lib/container-kill.go | 20 +- chaoslib/litmus/disk-fill/lib/disk-fill.go | 30 +- .../lib/kubelet-service-kill.go | 12 +- chaoslib/litmus/network-chaos/helper/netem.go | 97 ++++- .../litmus/network-chaos/lib/network-chaos.go | 50 ++- .../litmus/node-cpu-hog/lib/node-cpu-hog.go | 16 +- chaoslib/litmus/node-drain/lib/node-drain.go | 67 ++- .../node-io-stress/lib/node-io-stress.go | 10 +- .../node-memory-hog/lib/node-memory-hog.go | 16 +- chaoslib/litmus/node-taint/lib/node-taint.go | 68 ++- .../pod-autoscaler/lib/pod-autoscaler.go | 23 +- .../litmus/pod-cpu-hog/lib/pod-cpu-hog.go | 235 +++++++--- chaoslib/litmus/pod-delete/lib/pod-delete.go | 11 +- .../pod-memory-hog/lib/pod-memory-hog.go | 263 ++++++++--- .../powerfulseal/pod-delete/lib/pod-delete.go | 6 +- .../container-kill/lib/container-kill.go | 14 +- chaoslib/pumba/cpu-chaos/lib/cpu-chaos.go | 14 +- .../pumba/memory-chaos/lib/memory-chaos.go | 14 +- .../pumba/network-chaos/lib/network-chaos.go | 9 +- .../pumba/pod-io-stress/lib/pod-io-stress.go | 14 +- contribute/developer-guide/README.md | 8 +- .../cassandra/pod-delete/pod-delete.go | 49 ++- .../generic/container-kill/container-kill.go | 29 +- experiments/generic/disk-fill/disk-fill.go | 25 +- .../kubelet-service-kill.go | 25 +- .../network-latency/network-latency.go | 1 - .../generic/node-cpu-hog/node-cpu-hog.go | 30 +- experiments/generic/node-drain/node-drain.go | 25 +- .../generic/node-io-stress/node-io-stress.go | 30 +- .../node-memory-hog/node-memory-hog.go | 26 +- experiments/generic/node-taint/node-taint.go | 25 +- .../generic/pod-autoscaler/pod-autoscaler.go | 16 +- .../generic/pod-cpu-hog/pod-cpu-hog.go | 29 +- experiments/generic/pod-cpu-hog/test/test.yml | 6 + experiments/generic/pod-delete/pod-delete.go | 30 +- .../generic/pod-io-stress/pod-io-stress.go | 26 +- .../generic/pod-memory-hog/pod-memory-hog.go | 29 +- .../generic/pod-memory-hog/test/test.yml | 6 + .../pod-network-corruption.go | 29 +- .../pod-network-duplication.go | 29 +- .../pod-network-latency.go | 28 +- .../pod-network-loss/pod-network-loss.go | 29 +- pkg/cassandra/liveness.go | 65 +-- pkg/cassandra/node-tools.go | 16 +- pkg/clients/clientset.go | 4 +- .../pod-cpu-hog/environment/environment.go | 2 + pkg/generic/pod-cpu-hog/types/types.go | 2 + .../pod-memory-hog/environment/environment.go | 2 + pkg/generic/pod-memory-hog/types/types.go | 2 + pkg/probe/cmdprobe.go | 2 +- pkg/result/chaosresult.go | 2 +- pkg/status/application.go | 6 +- pkg/status/nodes.go | 4 +- pkg/utils/common/common.go | 43 +- pkg/utils/exec/exec.go | 2 +- 62 files changed, 1723 insertions(+), 621 deletions(-) create mode 100644 .github/workflows/guide.md create mode 100644 .github/workflows/main.yml diff --git a/.github/workflows/guide.md b/.github/workflows/guide.md new file mode 100644 index 000000000..8ac605756 --- /dev/null +++ b/.github/workflows/guide.md @@ -0,0 +1,141 @@ +# Run E2E tests using GitHub Chaos Actions + +- When you commit code to your repository, you can continuously build and test the code to make sure that the commit doesn't introduce errors. The error could be in the form of some security issue, functional issue, or performance issue which can be tested using different custom tests, linters, or by pulling actions. This brings the need of having *Chaos Actions* which will perform a chaos test on the application over a particular commit which in-turn helps to track the performance of the application on a commit level. This can be done by commenting on the Pull Request. + +## Through comments on PR + +- We can run tests for any desired experiment or set of experiments by just commenting on the Pull Request. The format of comment will be: + +```bash + +/run-e2e- + +``` + +_Experiments Available for custom bot:_ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Resource chaosNetwork ChaosIO ChaosOthers
pod-cpu-hogpod-network-latencynode-io-stresspod-delete
pod-memory-hogpod-network-losscontainer-kill
node-cpu-hogpod-network-corruptionpod-autoscaler
node-memory-hogpod-network-duplication
+ +### Group Tests + + + + + + + + + + + + + + + + + + + + + + +
CommandDescription
/run-e2e-allRuns all available tests. This includes all resource chaos test, network chaos test, IO test and other tests. It will update the comment if it gets passed.
/run-e2e-network-chaosRuns all network chaos tests. This includes pod network corruption, pod network duplication, pod network loss, pod network latency.
/run-e2e-resource-chaosRuns all resource chaos tests. This includes pod level cpu and memory chaos test and node level cpu and memory chaos test.
/run-e2e-io-chaosRuns all io chaos tests. Currently it only includes node io stress
+ +### Individual Tests + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CommandDescription
/run-e2e-pod-deleteRuns pod delete chaos test using GitHub chaos action which fail the application pod
/run-e2e-container-killRuns container kill experiment using GitHub chaos action which kill containers on the application pod
/run-e2e-pod-cpu-hogRuns pod level CPU chaos experiment using GitHub chaos action which consume CPU resources on the application container
/run-e2e-pod-memory-hogRuns pod level memory chaos test which consume memory resources on the application container
/run-e2e-node-cpu-hogRuns node level cpu chaos test which exhaust CPU resources on the Kubernetes Node
/run-e2e-node-memory-hogRuns node level memory chaos test which exhaust CPU resources on the Kubernetes Node
/run-e2e-node-io-stressRuns pod level memory chaos test which gives IO stress on the Kubernetes Node
/run-e2e-pod-network-corruptionRun pod-network-corruption test which inject network packet corruption into application pod
/run-e2e-pod-network-latencyRun pod-network-latency test which inject network packet latency into application pod
/run-e2e-pod-network-lossRun pod-network-loss test which inject network packet loss into application pod
/run-e2e-pod-network-duplicationRun pod-network-duplication test which inject network packet duplication into application pod
+ + +***Note:*** *All the tests are performed on a KinD cluster with containerd runtime.* + +## Merge a Pull Request + +- For auto merging, we need to comment `/merge` in the PR which will add a label `merge` in the PR and then finally merge the PR according to the ENVs provided. + +_Minimum Number of Approvals:_ + +- The action will automatically check if the required number of review approvals has been reached. If the number is not reached, it will not merge the PR. + +- It will work according to the role of the commenter and branch protection Rule on the repository. diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 000000000..29c06ecf8 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,407 @@ +name: LitmusGo-CI +on: + issue_comment: + types: [created] + +jobs: + tests: + if: contains(github.event.comment.html_url, '/pull/') && startsWith(github.event.comment.body, '/run-e2e') + runs-on: ubuntu-latest + steps: + + - name: Notification for e2e Start + uses: peter-evans/create-or-update-comment@v1 + with: + comment-id: "${{ github.event.comment.id }}" + body: | + **Test Status:** The e2e test has been started please wait for the results ... + **** + | Experiment | Result | Runtime | + |------------|--------|---------| + + #Using the last commit id of pull request + - uses: octokit/request-action@v2.x + id: get_PR_commits + with: + route: GET /repos/:repo/pulls/:pull_number/commits + repo: ${{ github.repository }} + pull_number: ${{ github.event.issue.number }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: set commit to output + id: getcommit + run: | + prsha=$(echo $response | jq '.[-1].sha' | tr -d '"') + echo "::set-output name=sha::$prsha" + env: + response: ${{ steps.get_PR_commits.outputs.data }} + + - uses: actions/checkout@v2 + with: + ref: ${{steps.getcommit.outputs.sha}} + + - name: Generating Go binary and Building docker image + run: | + make build + + #Install and configure a kind cluster + - name: Installing Prerequisites (KinD Cluster) + uses: engineerd/setup-kind@v0.4.0 + with: + version: "v0.7.0" + + - name: Configuring and testing the Installation + run: | + kubectl cluster-info --context kind-kind + kind get kubeconfig --internal >$HOME/.kube/config + kubectl get nodes + + - name: Load image on the nodes of the cluster + run: | + kind load docker-image --name=kind litmuschaos/go-runner:ci + + - name: Deploy a sample application for chaos injection + run: | + kubectl apply -f https://raw.githubusercontent.com/mayadata-io/chaos-ci-lib/master/app/nginx.yml + sleep 30 + + - name: Setting up kubeconfig ENV for Github Chaos Action + run: echo ::set-env name=KUBE_CONFIG_DATA::$(base64 -w 0 ~/.kube/config) + + - name: Running Litmus pod delete chaos experiment + if: startsWith(github.event.comment.body, '/run-e2e-pod-delete') || startsWith(github.event.comment.body, '/run-e2e-all') + uses: mayadata-io/github-chaos-actions@v0.2.0 + env: + INSTALL_LITMUS: true + EXPERIMENT_NAME: pod-delete + EXPERIMENT_IMAGE: litmuschaos/go-runner + EXPERIMENT_IMAGE_TAG: ci + IMAGE_PULL_POLICY: IfNotPresent + LITMUS_CLEANUP: true + + - name: Update pod delete result + if: startsWith(github.event.comment.body, '/run-e2e-pod-delete') || startsWith(github.event.comment.body, '/run-e2e-all') + uses: peter-evans/create-or-update-comment@v1 + with: + comment-id: "${{ github.event.comment.id }}" + body: | + | Pod Delete | Pass | containerd | + + - name: Running container kill chaos experiment + if: startsWith(github.event.comment.body, '/run-e2e-container-kill') || startsWith(github.event.comment.body, '/run-e2e-all') + uses: mayadata-io/github-chaos-actions@v0.2.0 + env: + INSTALL_LITMUS: true + EXPERIMENT_NAME: container-kill + EXPERIMENT_IMAGE: litmuschaos/go-runner + EXPERIMENT_IMAGE_TAG: ci + IMAGE_PULL_POLICY: IfNotPresent + CONTAINER_RUNTIME: containerd + LITMUS_CLEANUP: true + + - name: Update container-kill result + if: startsWith(github.event.comment.body, '/run-e2e-container-kill') || startsWith(github.event.comment.body, '/run-e2e-all') + uses: peter-evans/create-or-update-comment@v1 + with: + comment-id: "${{ github.event.comment.id }}" + body: | + | Container Kill | Pass | containerd | + + - name: Running node-cpu-hog chaos experiment + if: startsWith(github.event.comment.body, '/run-e2e-node-cpu-hog') || startsWith(github.event.comment.body, '/run-e2e-resource-chaos') || startsWith(github.event.comment.body, '/run-e2e-all') + uses: mayadata-io/github-chaos-actions@v0.2.0 + env: + INSTALL_LITMUS: true + EXPERIMENT_NAME: node-cpu-hog + EXPERIMENT_IMAGE: litmuschaos/go-runner + EXPERIMENT_IMAGE_TAG: ci + IMAGE_PULL_POLICY: IfNotPresent + LITMUS_CLEANUP: true + + - name: Update node-cpu-hog result + if: startsWith(github.event.comment.body, '/run-e2e-node-cpu-hog') || startsWith(github.event.comment.body, '/run-e2e-resource-chaos') || startsWith(github.event.comment.body, '/run-e2e-all') + uses: peter-evans/create-or-update-comment@v1 + with: + comment-id: "${{ github.event.comment.id }}" + body: | + | Node CPU Hog | Pass | containerd | + + - name: Running node-memory-hog chaos experiment + if: startsWith(github.event.comment.body, '/run-e2e-node-memory-hog') || startsWith(github.event.comment.body, '/run-e2e-resource-chaos') || startsWith(github.event.comment.body, '/run-e2e-all') + uses: mayadata-io/github-chaos-actions@v0.2.0 + env: + INSTALL_LITMUS: true + EXPERIMENT_NAME: node-memory-hog + EXPERIMENT_IMAGE: litmuschaos/go-runner + EXPERIMENT_IMAGE_TAG: ci + IMAGE_PULL_POLICY: IfNotPresent + LITMUS_CLEANUP: true + + - name: Update node-memory-hog result + if: startsWith(github.event.comment.body, '/run-e2e-node-memory-hog') || startsWith(github.event.comment.body, '/run-e2e-resource-chaos') || startsWith(github.event.comment.body, '/run-e2e-all') + uses: peter-evans/create-or-update-comment@v1 + with: + comment-id: "${{ github.event.comment.id }}" + body: | + | Node MEMORY Hog | Pass | containerd | + + - name: Running pod-cpu-hog chaos experiment + if: startsWith(github.event.comment.body, '/run-e2e-pod-cpu-hog') || startsWith(github.event.comment.body, '/run-e2e-resource-chaos') || startsWith(github.event.comment.body, '/run-e2e-all') + uses: mayadata-io/github-chaos-actions@v0.2.0 + env: + INSTALL_LITMUS: true + EXPERIMENT_NAME: pod-cpu-hog + EXPERIMENT_IMAGE: litmuschaos/go-runner + EXPERIMENT_IMAGE_TAG: ci + IMAGE_PULL_POLICY: IfNotPresent + TARGET_CONTAINER: nginx + TOTAL_CHAOS_DURATION: 60 + CPU_CORES: 1 + LITMUS_CLEANUP: true + + - name: Update pod-cpu-hog result + if: startsWith(github.event.comment.body, '/run-e2e-pod-cpu-hog') || startsWith(github.event.comment.body, '/run-e2e-resource-chaos') || startsWith(github.event.comment.body, '/run-e2e-all') + uses: peter-evans/create-or-update-comment@v1 + with: + comment-id: "${{ github.event.comment.id }}" + body: | + | Pod CPU Hog | Pass | containerd | + + - name: Running pod-memory-hog chaos experiment + if: startsWith(github.event.comment.body, '/run-e2e-pod-memory-hog') || startsWith(github.event.comment.body, '/run-e2e-resource-chaos') || startsWith(github.event.comment.body, '/run-e2e-all') + uses: mayadata-io/github-chaos-actions@v0.2.0 + env: + INSTALL_LITMUS: true + EXPERIMENT_NAME: pod-cpu-hog + EXPERIMENT_IMAGE: litmuschaos/go-runner + EXPERIMENT_IMAGE_TAG: ci + IMAGE_PULL_POLICY: IfNotPresent + TARGET_CONTAINER: nginx + TOTAL_CHAOS_DURATION: 60 + MEMORY_CONSUMPTION: 500 + LITMUS_CLEANUP: true + + - name: Update pod-memory-hog result + if: startsWith(github.event.comment.body, '/run-e2e-pod-memory-hog') || startsWith(github.event.comment.body, '/run-e2e-resource-chaos') || startsWith(github.event.comment.body, '/run-e2e-all') + uses: peter-evans/create-or-update-comment@v1 + with: + comment-id: "${{ github.event.comment.id }}" + body: | + | Pod Memory Hog | Pass | containerd | + + - name: Running pod network corruption chaos experiment + if: startsWith(github.event.comment.body, '/run-e2e-pod-network-corruption') || startsWith(github.event.comment.body, '/run-e2e-network-chaos') || startsWith(github.event.comment.body, '/run-e2e-all') + uses: mayadata-io/github-chaos-actions@v0.2.0 + env: + INSTALL_LITMUS: true + EXPERIMENT_NAME: pod-network-corruption + EXPERIMENT_IMAGE: litmuschaos/go-runner + EXPERIMENT_IMAGE_TAG: ci + IMAGE_PULL_POLICY: IfNotPresent + TARGET_CONTAINER: nginx + TOTAL_CHAOS_DURATION: 60 + NETWORK_INTERFACE: eth0 + CONTAINER_RUNTIME: containerd + LITMUS_CLEANUP: true + + - name: Update pod-network-corruption result + if: startsWith(github.event.comment.body, '/run-e2e-pod-network-corruption') || startsWith(github.event.comment.body, '/run-e2e-network-chaos') || startsWith(github.event.comment.body, '/run-e2e-all') + uses: peter-evans/create-or-update-comment@v1 + with: + comment-id: "${{ github.event.comment.id }}" + body: | + | Pod Network Corruption | Pass | containerd | + + - name: Running pod network duplication chaos experiment + if: startsWith(github.event.comment.body, '/run-e2e-pod-network-duplication') || startsWith(github.event.comment.body, '/run-e2e-network-chaos') || startsWith(github.event.comment.body, '/run-e2e-all') + uses: mayadata-io/github-chaos-actions@v0.2.0 + env: + INSTALL_LITMUS: true + EXPERIMENT_NAME: pod-network-duplication + EXPERIMENT_IMAGE: litmuschaos/go-runner + EXPERIMENT_IMAGE_TAG: ci + IMAGE_PULL_POLICY: IfNotPresent + TARGET_CONTAINER: nginx + TOTAL_CHAOS_DURATION: 60 + NETWORK_INTERFACE: eth0 + CONTAINER_RUNTIME: containerd + LITMUS_CLEANUP: true + + - name: Update pod-network-duplication result + if: startsWith(github.event.comment.body, '/run-e2e-pod-network-duplication') || startsWith(github.event.comment.body, '/run-e2e-network-chaos') || startsWith(github.event.comment.body, '/run-e2e-all') + uses: peter-evans/create-or-update-comment@v1 + with: + comment-id: "${{ github.event.comment.id }}" + body: | + | Pod Network Duplication | Pass | containerd | + + - name: Running pod-network-latency chaos experiment + if: startsWith(github.event.comment.body, '/run-e2e-pod-network-latency') || startsWith(github.event.comment.body, '/run-e2e-network-chaos') || startsWith(github.event.comment.body, '/run-e2e-all') + uses: mayadata-io/github-chaos-actions@v0.2.0 + env: + INSTALL_LITMUS: true + EXPERIMENT_NAME: pod-network-latency + EXPERIMENT_IMAGE: litmuschaos/go-runner + EXPERIMENT_IMAGE_TAG: ci + IMAGE_PULL_POLICY: IfNotPresent + TARGET_CONTAINER: nginx + TOTAL_CHAOS_DURATION: 60 + NETWORK_INTERFACE: eth0 + NETWORK_LATENCY: 60000 + CONTAINER_RUNTIME: containerd + LITMUS_CLEANUP: true + + - name: Update pod-network-latency result + if: startsWith(github.event.comment.body, '/run-e2e-pod-network-latency') || startsWith(github.event.comment.body, '/run-e2e-network-chaos') || startsWith(github.event.comment.body, '/run-e2e-all') + uses: peter-evans/create-or-update-comment@v1 + with: + comment-id: "${{ github.event.comment.id }}" + body: | + | Pod Network Latency | Pass | containerd | + + - name: Running pod-network-loss chaos experiment + uses: mayadata-io/github-chaos-actions@v0.2.0 + env: + INSTALL_LITMUS: true + EXPERIMENT_NAME: pod-network-loss + EXPERIMENT_IMAGE: litmuschaos/go-runner + EXPERIMENT_IMAGE_TAG: ci + IMAGE_PULL_POLICY: IfNotPresent + TARGET_CONTAINER: nginx + TOTAL_CHAOS_DURATION: 60 + NETWORK_INTERFACE: eth0 + NETWORK_PACKET_LOSS_PERCENTAGE: 100 + CONTAINER_RUNTIME: containerd + LITMUS_CLEANUP: true + + - name: Update pod-network-loss result + if: startsWith(github.event.comment.body, '/run-e2e-pod-network-loss') || startsWith(github.event.comment.body, '/run-e2e-network-chaos') || startsWith(github.event.comment.body, '/run-e2e-all') + uses: peter-evans/create-or-update-comment@v1 + with: + comment-id: "${{ github.event.comment.id }}" + body: | + | Pod Network Loss | Pass | containerd | + + - name: Running pod autoscaler chaos experiment + if: startsWith(github.event.comment.body, '/run-e2e-pod-autoscaler') || startsWith(github.event.comment.body, '/run-e2e-all') + uses: mayadata-io/github-chaos-actions@v0.2.0 + env: + INSTALL_LITMUS: true + EXPERIMENT_NAME: pod-autoscaler + EXPERIMENT_IMAGE: litmuschaos/go-runner + EXPERIMENT_IMAGE_TAG: ci + IMAGE_PULL_POLICY: IfNotPresent + TOTAL_CHAOS_DURATION: 60 + LITMUS_CLEANUP: true + + - name: Update pod-autoscaler result + if: startsWith(github.event.comment.body, '/run-e2e-pod-autoscaler') || startsWith(github.event.comment.body, '/run-e2e-all') + uses: peter-evans/create-or-update-comment@v1 + with: + comment-id: "${{ github.event.comment.id }}" + body: | + | Pod Autoscaler | Pass | containerd | + + - name: Running node-io-stress chaos experiment + if: startsWith(github.event.comment.body, '/run-e2e-node-io-stress') || startsWith(github.event.comment.body, '/run-e2e-io-chaos') || startsWith(github.event.comment.body, '/run-e2e-all') + uses: mayadata-io/github-chaos-actions@v0.2.0 + env: + INSTALL_LITMUS: true + EXPERIMENT_NAME: node-io-stress + EXPERIMENT_IMAGE: litmuschaos/go-runner + EXPERIMENT_IMAGE_TAG: ci + IMAGE_PULL_POLICY: IfNotPresent + TOTAL_CHAOS_DURATION: 120 + FILESYSTEM_UTILIZATION_PERCENTAGE: 10 + LITMUS_CLEANUP: true + + - name: Update node-io-stress result + if: startsWith(github.event.comment.body, '/run-e2e-node-io-stress') || startsWith(github.event.comment.body, '/run-e2e-io-chaos') || startsWith(github.event.comment.body, '/run-e2e-all') + uses: peter-evans/create-or-update-comment@v1 + with: + comment-id: "${{ github.event.comment.id }}" + body: | + | Node IO Stress | Pass | containerd | + + - name: Check the test run + if: | + startsWith(github.event.comment.body, '/run-e2e-pod-delete') || startsWith(github.event.comment.body, '/run-e2e-container-kill') || + startsWith(github.event.comment.body, '/run-e2e-node-cpu-hog') || startsWith(github.event.comment.body, '/run-e2e-node-memory-hog') || + startsWith(github.event.comment.body, '/run-e2e-pod-cpu-hog') || startsWith(github.event.comment.body, '/run-e2e-pod-memory-hog') || + startsWith(github.event.comment.body, '/run-e2e-pod-network-corruption') || startsWith(github.event.comment.body, '/run-e2e-pod-network-loss') || + startsWith(github.event.comment.body, '/run-e2e-pod-network-latency') || startsWith(github.event.comment.body, '/run-e2e-pod-network-duplication') || + startsWith(github.event.comment.body, '/run-e2e-pod-autoscaler') || startsWith(github.event.comment.body, '/run-e2e-node-io-stress') || + startsWith(github.event.comment.body, '/run-e2e-resource-chaos') || startsWith(github.event.comment.body, '/run-e2e-network-chaos') || + startsWith(github.event.comment.body, '/run-e2e-io-chaos') || startsWith(github.event.comment.body, '/run-e2e-all') + run: | + echo ::set-env name=TEST_RUN::true + + - name: Check for all the jobs are succeeded + if: ${{ success() && env.TEST_RUN == 'true' }} + uses: peter-evans/create-or-update-comment@v1 + with: + comment-id: "${{ github.event.comment.id }}" + body: | + **** + **Test Result:** All tests are passed + **Run ID:** [${{ env.RUN_ID }}](https://github.com/litmuschaos/litmus-go/actions/runs/${{ env.RUN_ID }}) + reactions: hooray + env: + RUN_ID: ${{ github.run_id }} + + - name: Check for any job failed + if: ${{ failure() }} + uses: peter-evans/create-or-update-comment@v1 + with: + comment-id: "${{ github.event.comment.id }}" + body: | + **** + **Test Failed:** Some tests are failed please check + **Run ID:** [${{ env.RUN_ID }}](https://github.com/litmuschaos/litmus-go/actions/runs/${{ env.RUN_ID }}) + reactions: confused + env: + RUN_ID: ${{ github.run_id }} + + - name: Deleting KinD cluster + if: ${{ always() }} + run: kind delete cluster + + - name: Check if any test ran or not + if: env.TEST_RUN != 'true' + uses: peter-evans/create-or-update-comment@v1 + with: + comment-id: "${{ github.event.comment.id }}" + body: | + **** + **Test Result:** No test found + **Run ID:** [${{ env.RUN_ID }}](https://github.com/litmuschaoslitmus-go/actions/runs/${{ env.RUN_ID }}) + reactions: eyes + env: + RUN_ID: ${{ github.run_id }} + + # This job will merge an equipped PR in two steps: + # Firstly it will add a merge label on the target PR and then it will merge the PR according to the envs provided. + merge: + if: contains(github.event.comment.html_url, '/pull/') && startsWith(github.event.comment.body, '/merge') + runs-on: ubuntu-latest + steps: + - name: Add a merge label + uses: actions-ecosystem/action-add-labels@v1 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + labels: merge + + # The action will automatically check if the required number of review approvals has been reached. + - name: automerge + uses: "pascalgn/automerge-action@f81beb99aef41bb55ad072857d43073fba833a98" + env: + GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" + MERGE_LABELS: "merge,!WIP,!DO NOT MERGE" + MERGE_METHOD: "squash" + MERGE_FORKS: "true" + MERGE_RETRIES: "6" + MERGE_RETRY_SLEEP: "10000" + UPDATE_LABELS: "" + UPDATE_METHOD: "merge" + MERGE_DELETE_BRANCH: true diff --git a/.travis.yml b/.travis.yml index d72604190..4f14c5be4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,23 +1,33 @@ sudo: required -dist: xenial +os: linux +dist: bionic services: - docker language: go go: - - 1.13.1 + - 1.14.2 addons: apt: update: true +before_script: + - sudo apt-get update && sudo apt-get install golint + - export VERSION=$(curl --silent "https://api.github.com/repos/aquasecurity/trivy/releases/latest" | grep '"tag_name":' | sed -E 's/.*"v([^"]+)".*/\1/') + - sudo apt-get install -y rpm + - wget https://github.com/aquasecurity/trivy/releases/download/v${VERSION}/trivy_${VERSION}_Linux-64bit.tar.gz + - tar zxvf trivy_${VERSION}_Linux-64bit.tar.gz + script: # Installing and configuring dependencies - make deps - # Building go binaries - - make go-build + # Includes formatting, linting and check unused packages + - make gotasks # Build - make build + # Running trivy check + - make trivy-check after_success: - - make push \ No newline at end of file + - make push diff --git a/Makefile b/Makefile index ce328d1a6..97c035cf7 100644 --- a/Makefile +++ b/Makefile @@ -7,9 +7,10 @@ # IS_DOCKER_INSTALLED = $(shell which docker >> /dev/null 2>&1; echo $$?) -.PHONY: all -all: deps go-build build push +PACKAGES = $(shell go list ./... | grep -v '/vendor/') +.PHONY: all +all: deps gotasks build push trivy-check .PHONY: help help: @echo "" @@ -31,29 +32,64 @@ _build_check_docker: && exit 1; \ fi; -PHONY: go-build -go-build: experiment-go-binary +.PHONY: gotasks +gotasks: format lint unused-package-check -experiment-go-binary: +.PHONY: format +format: @echo "------------------" - @echo "--> Build experiment go binary" + @echo "--> Running go fmt" @echo "------------------" - @sh build/generate_go_binary + @go fmt $(PACKAGES) + +.PHONY: lint +lint: + @echo "------------------" + @echo "--> Running golint" + @echo "------------------" + @go get -u golang.org/x/lint/golint + @golint $(PACKAGES) + @echo "------------------" + @echo "--> Running go vet" + @echo "------------------" + @go vet $(PACKAGES) + +.PHONY: unused-package-check +unused-package-check: + @echo "------------------" + @echo "--> Check unused packages for the chaos-operator" + @echo "------------------" + @tidy=$$(go mod tidy); \ + if [ -n "$${tidy}" ]; then \ + echo "go mod tidy checking failed!"; echo "$${tidy}"; echo; \ + fi + .PHONY: build -build: litmus-go-build +build: -litmus-go-build: + @echo "------------------------------" + @echo "--> Build experiment go binary" + @echo "------------------------------" + @sh build/generate_go_binary @echo "------------------" @echo "--> Build go-runner image" @echo "------------------" sudo docker build . -f build/litmus-go/Dockerfile -t litmuschaos/go-runner:ci .PHONY: push -push: litmus-go-push +push: -litmus-go-push: @echo "------------------" @echo "--> go-runner image" @echo "------------------" REPONAME="litmuschaos" IMGNAME="go-runner" IMGTAG="ci" ./build/push + +.PHONY: trivy-check +trivy-check: + + @echo "------------------------" + @echo "---> Running Trivy Check" + @echo "------------------------" + @./trivy --exit-code 0 --severity HIGH --no-progress litmuschaos/go-runner:ci + @./trivy --exit-code 0 --severity CRITICAL --no-progress litmuschaos/go-runner:ci diff --git a/README.md b/README.md index 666621390..45c202372 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,13 @@ -# litmus-go: +# LitmusGo: -This repo consists of Litmus Chaos Experiments written in golang. The examples in this repo are good indicators +- This repo consists of Litmus Chaos Experiments written in golang. The examples in this repo are good indicators of how to construct the experiments in golang: complete with steady state checks, chaosresult generation, chaos injection etc.., post chaos checks, create events and reports for observability and configure sinks for these. +## Run E2E on a Pull Request + +- We can run a certain number of custom tests on a PR using GitHub chaos actions read about [custom bot](https://github.com/litmuschaos/litmus-go/blob/master/.github/workflows/guide.md) to know more. + **NOTE** - This repo can be viewed as an extension to the [litmuschaos/litmus](https://github.com/litmuschaos/litmus) repo diff --git a/build/litmus-go/Dockerfile b/build/litmus-go/Dockerfile index 5441e24b3..6ad64472d 100644 --- a/build/litmus-go/Dockerfile +++ b/build/litmus-go/Dockerfile @@ -1,29 +1,12 @@ -FROM ubuntu:18.04 as builder - -# intall gcc and supporting packages -RUN apt-get update && apt-get install -yq make gcc - -WORKDIR /code - -# download stress-ng sources -ARG STRESS_NG_VERSION -ENV STRESS_NG_VERSION ${STRESS_NG_VERSION:-0.10.10} -ADD https://github.com/ColinIanKing/stress-ng/archive/V${STRESS_NG_VERSION}.tar.gz . -RUN tar -xf V${STRESS_NG_VERSION}.tar.gz && mv stress-ng-${STRESS_NG_VERSION} stress-ng - -# make static version -WORKDIR /code/stress-ng -RUN STATIC=1 make - FROM ubuntu:bionic LABEL maintainer="LitmusChaos" #Installing necessary ubuntu packages -RUN apt-get update && apt-get install -y curl bash systemd iproute2 +RUN apt-get update && apt-get install -y curl bash systemd iproute2 stress-ng #Installing Kubectl -ENV KUBE_LATEST_VERSION="v1.18.0" +ENV KUBE_LATEST_VERSION="v1.19.0" RUN curl -L https://storage.googleapis.com/kubernetes-release/release/${KUBE_LATEST_VERSION}/bin/linux/amd64/kubectl -o /usr/local/bin/kubectl && \ chmod +x /usr/local/bin/kubectl @@ -35,8 +18,6 @@ RUN curl -L https://github.com/kubernetes-sigs/cri-tools/releases/download/v1.16 ENV PUMBA_VERSION="0.6.5" RUN curl -L https://github.com/alexei-led/pumba/releases/download/${PUMBA_VERSION}/pumba_linux_amd64 --output /usr/local/bin/pumba && chmod +x /usr/local/bin/pumba -COPY --from=builder /code/stress-ng/stress-ng / - #Copying Necessary Files COPY ./build/_output ./litmus/experiments diff --git a/chaoslib/litmus/container-kill/helper/crictl.go b/chaoslib/litmus/container-kill/helper/crictl.go index 03f981c1e..184e16e10 100644 --- a/chaoslib/litmus/container-kill/helper/crictl.go +++ b/chaoslib/litmus/container-kill/helper/crictl.go @@ -94,20 +94,20 @@ func KillContainer(experimentsDetails *experimentTypes.ExperimentDetails, client //Waiting for the chaos interval after chaos injection if experimentsDetails.ChaosInterval != 0 { - log.Infof("[Wait]: Wait for the chaos interval %vs", strconv.Itoa(experimentsDetails.ChaosInterval)) + log.Infof("[Wait]: Wait for the chaos interval %vs", experimentsDetails.ChaosInterval) waitForChaosInterval(experimentsDetails) } //Check the status of restarted container err = CheckContainerStatus(experimentsDetails, clients, experimentsDetails.TargetPod) if err != nil { - return errors.Errorf("Application container is not running, %v", err) + return errors.Errorf("Application container is not in running state, %v", err) } // It will verify that the restart count of container should increase after chaos injection err = VerifyRestartCount(experimentsDetails, experimentsDetails.TargetPod, clients, restartCountBefore) if err != nil { - return errors.Errorf("Target container is not restarted , err: %v", err) + return err } // generating the total duration of the experiment run @@ -120,7 +120,7 @@ func KillContainer(experimentsDetails *experimentTypes.ExperimentDetails, client } } - log.Infof("[Completion]: %v chaos is done", experimentsDetails.ExperimentName) + log.Infof("[Completion]: %v chaos has been completed", experimentsDetails.ExperimentName) return nil } @@ -140,7 +140,7 @@ func GetPodID(experimentsDetails *experimentTypes.ExperimentDetails) (string, er } - return "", fmt.Errorf("The application pod is unavailable") + return "", fmt.Errorf("%v pod is unavailable", experimentsDetails.TargetPod) } //GetContainerID derive the container id of the application container @@ -158,7 +158,7 @@ func GetContainerID(experimentsDetails *experimentTypes.ExperimentDetails, podID } - return "", fmt.Errorf("The application container is unavailable") + return "", fmt.Errorf("%v container is unavailable", experimentsDetails.TargetContainer) } @@ -178,7 +178,7 @@ func CheckContainerStatus(experimentsDetails *experimentTypes.ExperimentDetails, Try(func(attempt uint) error { pod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.AppNS).Get(appName, v1.GetOptions{}) if err != nil { - return errors.Errorf("Unable to list the pod, err: %v", err) + return errors.Errorf("Unable to list the pod, due to %v", err) } for _, container := range pod.Status.ContainerStatuses { if container.Ready != true { @@ -252,7 +252,7 @@ func VerifyRestartCount(experimentsDetails *experimentTypes.ExperimentDetails, p Try(func(attempt uint) error { pod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.AppNS).Get(podName, v1.GetOptions{}) if err != nil { - return errors.Errorf("Unable to get the application pod, err: %v", err) + return errors.Errorf("Unable to get the application pod, due to %v", err) } for _, container := range pod.Status.ContainerStatuses { if container.Name == experimentsDetails.TargetContainer { diff --git a/chaoslib/litmus/container-kill/lib/container-kill.go b/chaoslib/litmus/container-kill/lib/container-kill.go index aebd55aa9..f08b971bf 100644 --- a/chaoslib/litmus/container-kill/lib/container-kill.go +++ b/chaoslib/litmus/container-kill/lib/container-kill.go @@ -22,12 +22,12 @@ func PrepareContainerKill(experimentsDetails *experimentTypes.ExperimentDetails, // if the target pod is not defined it will derive the random target pod list using pod affected percentage targetPodList, err := common.GetPodList(experimentsDetails.AppNS, experimentsDetails.TargetPod, experimentsDetails.AppLabel, experimentsDetails.PodsAffectedPerc, clients) if err != nil { - return errors.Errorf("Unable to get the target pod list due to, err: %v", err) + return errors.Errorf("Unable to get the target pod list, err: %v", err) } //Waiting for the ramp time before chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } @@ -43,7 +43,7 @@ func PrepareContainerKill(experimentsDetails *experimentTypes.ExperimentDetails, if experimentsDetails.TargetContainer == "" { experimentsDetails.TargetContainer, err = GetTargetContainer(experimentsDetails, targetPodList.Items[0].Name, clients) if err != nil { - return errors.Errorf("Unable to get the target container name due to, err: %v", err) + return errors.Errorf("Unable to get the target container name, err: %v", err) } } @@ -53,7 +53,7 @@ func PrepareContainerKill(experimentsDetails *experimentTypes.ExperimentDetails, // Get Chaos Pod Annotation experimentsDetails.Annotations, err = common.GetChaosPodAnnotation(experimentsDetails.ChaosPodName, experimentsDetails.ChaosNamespace, clients) if err != nil { - return errors.Errorf("unable to get annotation, due to %v", err) + return errors.Errorf("Unable to get annotations, err: %v", err) } // creating the helper pod to perform container kill chaos @@ -69,7 +69,7 @@ func PrepareContainerKill(experimentsDetails *experimentTypes.ExperimentDetails, log.Info("[Status]: Checking the status of the helper pods") err = status.CheckApplicationStatus(experimentsDetails.ChaosNamespace, "app="+experimentsDetails.ExperimentName+"-helper", experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - return errors.Errorf("helper pods is not in running state, err: %v", err) + return errors.Errorf("helper pods are not in running state, err: %v", err) } // Wait till the completion of the helper pod @@ -77,11 +77,11 @@ func PrepareContainerKill(experimentsDetails *experimentTypes.ExperimentDetails, log.Info("[Wait]: waiting till the completion of the helper pod") podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, "app="+experimentsDetails.ExperimentName+"-helper", clients, experimentsDetails.ChaosDuration+experimentsDetails.ChaosInterval+60, experimentsDetails.ExperimentName) if err != nil || podStatus == "Failed" { - return errors.Errorf("helper pod failed due to, err: %v", err) + return errors.Errorf("helper pod failed, err: %v", err) } //Deleting all the helper pod for container-kill chaos - log.Info("[Cleanup]: Deleting all the helper pod") + log.Info("[Cleanup]: Deleting all the helper pods") err = common.DeleteAllPod("app="+experimentsDetails.ExperimentName+"-helper", experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients) if err != nil { return errors.Errorf("Unable to delete the helper pod, err: %v", err) @@ -89,7 +89,7 @@ func PrepareContainerKill(experimentsDetails *experimentTypes.ExperimentDetails, //Waiting for the ramp time after chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } return nil @@ -120,7 +120,7 @@ func GetServiceAccount(experimentsDetails *experimentTypes.ExperimentDetails, cl func GetTargetContainer(experimentsDetails *experimentTypes.ExperimentDetails, appName string, clients clients.ClientSets) (string, error) { pod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.AppNS).Get(appName, v1.GetOptions{}) if err != nil { - return "", errors.Wrapf(err, "Fail to get the application pod status, due to:%v", err) + return "", err } return pod.Spec.Containers[0].Name, nil @@ -173,7 +173,7 @@ func CreateHelperPod(experimentsDetails *experimentTypes.ExperimentDetails, clie Image: experimentsDetails.LIBImage, ImagePullPolicy: apiv1.PullAlways, Command: []string{ - "bin/bash", + "/bin/bash", }, Args: []string{ "-c", diff --git a/chaoslib/litmus/disk-fill/lib/disk-fill.go b/chaoslib/litmus/disk-fill/lib/disk-fill.go index 0e01b64e5..51a67bc7a 100644 --- a/chaoslib/litmus/disk-fill/lib/disk-fill.go +++ b/chaoslib/litmus/disk-fill/lib/disk-fill.go @@ -35,14 +35,14 @@ func PrepareDiskFill(experimentsDetails *experimentTypes.ExperimentDetails, clie //Waiting for the ramp time before chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } // Get Chaos Pod Annotation experimentsDetails.Annotations, err = common.GetChaosPodAnnotation(experimentsDetails.ChaosPodName, experimentsDetails.ChaosNamespace, clients) if err != nil { - return errors.Errorf("unable to get annotation, due to %v", err) + return errors.Errorf("unable to get annotations, err: %v", err) } // generating the chaos inject event in the chaosengine @@ -65,7 +65,7 @@ func PrepareDiskFill(experimentsDetails *experimentTypes.ExperimentDetails, clie log.Info("[Status]: Checking the status of the helper pods") err = status.CheckApplicationStatus(experimentsDetails.ChaosNamespace, "app="+experimentsDetails.ExperimentName+"-helper", experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - return errors.Errorf("helper pods is not in running state, err: %v", err) + return errors.Errorf("helper pods are not in running state, err: %v", err) } for _, pod := range targetPodList.Items { @@ -74,7 +74,7 @@ func PrepareDiskFill(experimentsDetails *experimentTypes.ExperimentDetails, clie if experimentsDetails.TargetContainer == "" { experimentsDetails.TargetContainer, err = GetTargetContainer(experimentsDetails, pod.Name, clients) if err != nil { - return errors.Errorf("Unable to get the target container name due to, err: %v", err) + return errors.Errorf("Unable to get the target container name, err: %v", err) } } @@ -111,13 +111,13 @@ func PrepareDiskFill(experimentsDetails *experimentTypes.ExperimentDetails, clie exec.SetExecCommandAttributes(&execCommandDetails, podName, "disk-fill", experimentsDetails.ChaosNamespace) ephemeralStorageDetails, err := exec.Exec(&execCommandDetails, clients, strings.Fields(command)) if err != nil { - return errors.Errorf("Unable to get ephemeral storage details due to err: %v", err) + return errors.Errorf("Unable to get ephemeral storage details, err: %v", err) } // filtering out the used ephemeral storage from the output of du command usedEphemeralStorageSize, err := FilterUsedEphemeralStorage(ephemeralStorageDetails) if err != nil { - return errors.Errorf("Unable to filter used ephemeral storage size due to err: %v", err) + return errors.Errorf("Unable to filter used ephemeral storage size, err: %v", err) } log.Infof("used ephemeral storage space: %v", strconv.Itoa(usedEphemeralStorageSize)) @@ -131,7 +131,7 @@ func PrepareDiskFill(experimentsDetails *experimentTypes.ExperimentDetails, clie command := "dd if=/dev/urandom of=/diskfill/" + containerID + "/diskfill bs=4K count=" + strconv.Itoa(sizeTobeFilled/4) _, err = exec.Exec(&execCommandDetails, clients, strings.Fields(command)) if err != nil { - return errors.Errorf("Unable to to create the files to fill the ephemeral storage due to err: %v", err) + return errors.Errorf("Unable to fill the ephemeral storage, err: %v", err) } } else { log.Warn("No required free space found!, It's Housefull") @@ -139,7 +139,7 @@ func PrepareDiskFill(experimentsDetails *experimentTypes.ExperimentDetails, clie } // waiting for the chaos duration - log.Infof("[Wait]: Waiting for the %vs after injecting chaos", strconv.Itoa(experimentsDetails.ChaosDuration)) + log.Infof("[Wait]: Waiting for the %vs after injecting chaos", experimentsDetails.ChaosDuration) common.WaitForDuration(experimentsDetails.ChaosDuration) for _, pod := range targetPodList.Items { @@ -161,7 +161,7 @@ func PrepareDiskFill(experimentsDetails *experimentTypes.ExperimentDetails, clie exec.SetExecCommandAttributes(&execCommandDetails, podName, "disk-fill", experimentsDetails.ChaosNamespace) err = Remedy(experimentsDetails, clients, containerID, pod.Name, &execCommandDetails) if err != nil { - return errors.Errorf("Unable to perform remedy operation due to err: %v", err) + return errors.Errorf("Unable to perform remedy operation due to %v", err) } } @@ -169,12 +169,12 @@ func PrepareDiskFill(experimentsDetails *experimentTypes.ExperimentDetails, clie log.Info("[Cleanup]: Deleting all the helper pod") err = common.DeleteAllPod("app="+experimentsDetails.ExperimentName+"-helper", experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients) if err != nil { - return errors.Errorf("Unable to delete the helper pod, err: %v", err) + return errors.Errorf("Unable to delete the helper pod, %v", err) } //Waiting for the ramp time after chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } return nil @@ -185,7 +185,7 @@ func PrepareDiskFill(experimentsDetails *experimentTypes.ExperimentDetails, clie func GetTargetContainer(experimentsDetails *experimentTypes.ExperimentDetails, appName string, clients clients.ClientSets) (string, error) { pod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.AppNS).Get(appName, v1.GetOptions{}) if err != nil { - return "", errors.Wrapf(err, "Fail to get the application pod status, due to:%v", err) + return "", err } return pod.Spec.Containers[0].Name, nil @@ -340,7 +340,7 @@ func Remedy(experimentsDetails *experimentTypes.ExperimentDetails, clients clien command := "rm -rf /diskfill/" + containerID + "/diskfill" _, err = exec.Exec(execCommandDetails, clients, strings.Fields(command)) if err != nil { - return errors.Errorf("Unable to delete files to clean ephemeral storage due to err: %v", err) + return errors.Errorf("Unable to delete files to reset ephemeral storage usage due to err: %v", err) } } return nil @@ -352,7 +352,7 @@ func GetHelperPodName(targetPod apiv1.Pod, clients clients.ClientSets, namespace podList, err := clients.KubeClient.CoreV1().Pods(namespace).List(v1.ListOptions{LabelSelector: labels}) if err != nil || len(podList.Items) == 0 { - return "", errors.Errorf("Unable to list the helper pods due to, err: %v", err) + return "", errors.Errorf("Unable to list the helper pods, %v", err) } for _, pod := range podList.Items { @@ -360,5 +360,5 @@ func GetHelperPodName(targetPod apiv1.Pod, clients clients.ClientSets, namespace return pod.Name, nil } } - return "", errors.Errorf("No helper pod is running on %v node", targetPod.Spec.NodeName) + return "", errors.Errorf("No helper pod is available on %v node", targetPod.Spec.NodeName) } diff --git a/chaoslib/litmus/kubelet-service-kill/lib/kubelet-service-kill.go b/chaoslib/litmus/kubelet-service-kill/lib/kubelet-service-kill.go index 50bf664e0..82c1e57ef 100644 --- a/chaoslib/litmus/kubelet-service-kill/lib/kubelet-service-kill.go +++ b/chaoslib/litmus/kubelet-service-kill/lib/kubelet-service-kill.go @@ -24,7 +24,7 @@ func PrepareKubeletKill(experimentsDetails *experimentTypes.ExperimentDetails, c //Select node for kubelet-service-kill appNodeName, err := common.GetNodeName(experimentsDetails.AppNS, experimentsDetails.AppLabel, clients) if err != nil { - return errors.Errorf("Unable to get the application nodename due to, err: %v", err) + return errors.Errorf("Unable to get the application nodename, err: %v", err) } experimentsDetails.AppNode = appNodeName @@ -38,7 +38,7 @@ func PrepareKubeletKill(experimentsDetails *experimentTypes.ExperimentDetails, c //Waiting for the ramp time before chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } @@ -51,7 +51,7 @@ func PrepareKubeletKill(experimentsDetails *experimentTypes.ExperimentDetails, c // Get Chaos Pod Annotation experimentsDetails.Annotations, err = common.GetChaosPodAnnotation(experimentsDetails.ChaosPodName, experimentsDetails.ChaosNamespace, clients) if err != nil { - return errors.Errorf("unable to get annotation, due to %v", err) + return errors.Errorf("unable to get annotations, err: %v", err) } // Creating the helper pod to perform node memory hog @@ -75,11 +75,11 @@ func PrepareKubeletKill(experimentsDetails *experimentTypes.ExperimentDetails, c } // Wait till the completion of helper pod - log.Infof("[Wait]: Waiting for %vs till the completion of the helper pod", strconv.Itoa(experimentsDetails.ChaosDuration+30)) + log.Infof("[Wait]: Waiting for %vs till the completion of the helper pod", experimentsDetails.ChaosDuration+30) podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, "name="+experimentsDetails.ExperimentName+"-"+experimentsDetails.RunID, clients, experimentsDetails.ChaosDuration+30, experimentsDetails.ExperimentName) if err != nil || podStatus == "Failed" { - return errors.Errorf("helper pod failed due to, err: %v", err) + return errors.Errorf("helper pod failed, err: %v", err) } // Checking the status of application node @@ -98,7 +98,7 @@ func PrepareKubeletKill(experimentsDetails *experimentTypes.ExperimentDetails, c //Waiting for the ramp time after chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } return nil diff --git a/chaoslib/litmus/network-chaos/helper/netem.go b/chaoslib/litmus/network-chaos/helper/netem.go index d383cfdc4..2c25d2c02 100644 --- a/chaoslib/litmus/network-chaos/helper/netem.go +++ b/chaoslib/litmus/network-chaos/helper/netem.go @@ -36,7 +36,7 @@ func main() { //Getting kubeConfig and Generate ClientSets if err := clients.GenerateClientSetFromKubeConfig(); err != nil { - log.Fatalf("Unable to Get the kubeconfig due to %v", err) + log.Fatalf("Unable to Get the kubeconfig, err: %v", err) } //Fetching all the ENV passed for the runner pod @@ -54,7 +54,7 @@ func main() { err := PreparePodNetworkChaos(&experimentsDetails, clients, &eventsDetails, &chaosDetails, &resultDetails) if err != nil { - log.Fatalf("helper pod failed due to err: %v", err) + log.Fatalf("helper pod failed, err: %v", err) } } @@ -83,7 +83,7 @@ func PreparePodNetworkChaos(experimentsDetails *experimentTypes.ExperimentDetail return err } - log.Infof("[Chaos]: Waiting for %vs", strconv.Itoa(experimentsDetails.ChaosDuration)) + log.Infof("[Chaos]: Waiting for %vs", experimentsDetails.ChaosDuration) // signChan channel is used to transmit signal notifications. signChan := make(chan os.Signal, 1) @@ -137,7 +137,7 @@ func GetPID(experimentDetails *experimentTypes.ExperimentDetails, clients client pod, err := clients.KubeClient.CoreV1().Pods(experimentDetails.AppNS).Get(experimentDetails.TargetPod, v1.GetOptions{}) if err != nil { - return 0, errors.Errorf("unable to get the pod") + return 0, err } var containerID string @@ -156,17 +156,17 @@ func GetPID(experimentDetails *experimentTypes.ExperimentDetails, clients client // deriving pid from the inspect out of target container out, err := exec.Command("crictl", "inspect", containerID).CombinedOutput() if err != nil { - log.Error(fmt.Sprintf("[cri] Failed to run crictl: %s", string(out))) + log.Error(fmt.Sprintf("[cri]: Failed to run crictl: %s", string(out))) return 0, err } // parsing data from the json output of inspect command PID, err := parsePIDFromJSON(out, experimentDetails.ContainerRuntime) if err != nil { - log.Error(fmt.Sprintf("[cri] Failed to parse json from crictl output: %s", string(out))) + log.Error(fmt.Sprintf("[cri]: Failed to parse json from crictl output: %s", string(out))) return 0, err } - log.Info(fmt.Sprintf("[cri] Container ID=%s has process PID=%d", containerID, PID)) + log.Info(fmt.Sprintf("[cri]: Container ID=%s has process PID=%d", containerID, PID)) return PID, nil @@ -202,15 +202,15 @@ func parsePIDFromJSON(j []byte, runtime string) (int, error) { } else if runtime == "crio" { var resp InfoDetails if err := json.Unmarshal(j, &resp); err != nil { - return 0, errors.Errorf("[cri] Could not find pid field in json: %s", string(j)) + return 0, errors.Errorf("[cri]: Could not find pid field in json: %s", string(j)) } pid = resp.PID } else { - return 0, errors.Errorf("no supported container runtime, runtime: %v", runtime) + return 0, errors.Errorf("[cri]: No supported container runtime, runtime: %v", runtime) } if pid == 0 { - return 0, errors.Errorf("[cri] no running target container found, pid: %v", string(pid)) + return 0, errors.Errorf("[cri]: No running target container found, pid: %v", string(pid)) } return pid, nil @@ -221,14 +221,74 @@ func parsePIDFromJSON(j []byte, runtime string) (int, error) { // and execute the netem command inside it. func InjectChaos(experimentDetails *experimentTypes.ExperimentDetails, pid int) error { - tc := fmt.Sprintf("nsenter -t %d -n tc qdisc add dev %s root netem ", pid, experimentDetails.NetworkInterface) - tc = tc + os.Getenv("NETEM_COMMAND") - cmd := exec.Command("/bin/bash", "-c", tc) - out, err := cmd.CombinedOutput() - log.Info(cmd.String()) - if err != nil { - log.Error(string(out)) - return err + netemCommands := os.Getenv("NETEM_COMMAND") + targetIPs := os.Getenv("TARGET_IPs") + + if targetIPs == "" { + tc := fmt.Sprintf("nsenter -t %d -n tc qdisc add dev %s root netem %v", pid, experimentDetails.NetworkInterface, netemCommands) + cmd := exec.Command("/bin/bash", "-c", tc) + out, err := cmd.CombinedOutput() + log.Info(cmd.String()) + if err != nil { + log.Error(string(out)) + return err + } + } else { + + ips := strings.Split(targetIPs, ",") + var uniqueIps []string + + // removing duplicates ips from the list, if any + for i := range ips { + isPresent := false + for j := range uniqueIps { + if ips[i] == uniqueIps[j] { + isPresent = true + } + } + if !isPresent { + uniqueIps = append(uniqueIps, ips[i]) + } + + } + + // Create a priority-based queue + // This instantly creates classes 1:1, 1:2, 1:3 + priority := fmt.Sprintf("nsenter -t %v -n tc qdisc add dev %v root handle 1: prio", pid, experimentDetails.NetworkInterface) + cmd := exec.Command("/bin/bash", "-c", priority) + out, err := cmd.CombinedOutput() + log.Info(cmd.String()) + if err != nil { + log.Error(string(out)) + return err + } + + // Add queueing discipline for 1:3 class. + // No traffic is going through 1:3 yet + traffic := fmt.Sprintf("nsenter -t %v -n tc qdisc add dev %v parent 1:3 netem %v", pid, experimentDetails.NetworkInterface, netemCommands) + cmd = exec.Command("/bin/bash", "-c", traffic) + out, err = cmd.CombinedOutput() + log.Info(cmd.String()) + if err != nil { + log.Error(string(out)) + return err + } + + for _, ip := range uniqueIps { + + // redirect traffic to specific IP through band 3 + // It allows ipv4 addresses only + if !strings.Contains(ip, ":") { + tc := fmt.Sprintf("nsenter -t %v -n tc filter add dev %v protocol ip parent 1:0 prio 3 u32 match ip dst %v flowid 1:3", pid, experimentDetails.NetworkInterface, ip) + cmd = exec.Command("/bin/bash", "-c", tc) + out, err = cmd.CombinedOutput() + log.Info(cmd.String()) + if err != nil { + log.Error(string(out)) + return err + } + } + } } return nil } @@ -263,6 +323,7 @@ func GetENV(experimentDetails *experimentTypes.ExperimentDetails) { experimentDetails.ChaosPodName = Getenv("POD_NAME", "") experimentDetails.ContainerRuntime = Getenv("CONTAINER_RUNTIME", "") experimentDetails.NetworkInterface = Getenv("NETWORK_INTERFACE", "eth0") + experimentDetails.TargetIPs = Getenv("TARGET_IPs", "") } // Getenv fetch the env and set the default value, if any diff --git a/chaoslib/litmus/network-chaos/lib/network-chaos.go b/chaoslib/litmus/network-chaos/lib/network-chaos.go index e1fce2d69..03699c091 100644 --- a/chaoslib/litmus/network-chaos/lib/network-chaos.go +++ b/chaoslib/litmus/network-chaos/lib/network-chaos.go @@ -1,7 +1,9 @@ package lib import ( + "net" "strconv" + "strings" clients "github.com/litmuschaos/litmus-go/pkg/clients" experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/network-chaos/types" @@ -23,12 +25,12 @@ func PrepareAndInjectChaos(experimentsDetails *experimentTypes.ExperimentDetails // if the target pod is not defined it will derive the random target pod list using pod affected percentage targetPodList, err := common.GetPodList(experimentsDetails.AppNS, experimentsDetails.TargetPod, experimentsDetails.AppLabel, experimentsDetails.PodsAffectedPerc, clients) if err != nil { - return errors.Errorf("Unable to get the target pod list due to, err: %v", err) + return errors.Errorf("Unable to get the target pod list, err: %v", err) } //Waiting for the ramp time before chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } @@ -44,14 +46,14 @@ func PrepareAndInjectChaos(experimentsDetails *experimentTypes.ExperimentDetails if experimentsDetails.TargetContainer == "" { experimentsDetails.TargetContainer, err = GetTargetContainer(experimentsDetails, targetPodList.Items[0].Name, clients) if err != nil { - return errors.Errorf("Unable to get the target container name due to, err: %v", err) + return errors.Errorf("Unable to get the target container name, err: %v", err) } } // Get Chaos Pod Annotation experimentsDetails.Annotations, err = common.GetChaosPodAnnotation(experimentsDetails.ChaosPodName, experimentsDetails.ChaosNamespace, clients) if err != nil { - return errors.Errorf("unable to get annotation, due to %v", err) + return errors.Errorf("unable to get annotations, err: %v", err) } // creating the helper pod to perform network chaos @@ -67,7 +69,7 @@ func PrepareAndInjectChaos(experimentsDetails *experimentTypes.ExperimentDetails log.Info("[Status]: Checking the status of the helper pods") err = status.CheckApplicationStatus(experimentsDetails.ChaosNamespace, "app="+experimentsDetails.ExperimentName+"-helper", experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - return errors.Errorf("helper pods is not in running state, err: %v", err) + return errors.Errorf("helper pods are not in running state, err: %v", err) } // Wait till the completion of the helper pod @@ -82,7 +84,7 @@ func PrepareAndInjectChaos(experimentsDetails *experimentTypes.ExperimentDetails log.Info("[Cleanup]: Deleting all the helper pod") err = common.DeleteAllPod("app="+experimentsDetails.ExperimentName+"-helper", experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients) if err != nil { - return errors.Errorf("Unable to delete the helper pod, err: %v", err) + return errors.Errorf("Unable to delete the helper pods, err: %v", err) } return nil @@ -103,7 +105,7 @@ func GetServiceAccount(experimentsDetails *experimentTypes.ExperimentDetails, cl func GetTargetContainer(experimentsDetails *experimentTypes.ExperimentDetails, appName string, clients clients.ClientSets) (string, error) { pod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.AppNS).Get(appName, v1.GetOptions{}) if err != nil { - return "", errors.Wrapf(err, "Fail to get the application pod status, due to:%v", err) + return "", err } return pod.Spec.Containers[0].Name, nil @@ -210,6 +212,7 @@ func GetPodEnv(experimentsDetails *experimentTypes.ExperimentDetails, podName, a "NETEM_COMMAND": args, "NETWORK_INTERFACE": experimentsDetails.NetworkInterface, "EXPERIMENT_NAME": experimentsDetails.ExperimentName, + "TARGET_IPs": GetTargetIpsArgs(experimentsDetails.TargetIPs, experimentsDetails.TargetHosts), } for key, value := range ENVList { var perEnv apiv1.EnvVar @@ -237,3 +240,36 @@ func GetValueFromDownwardAPI(apiVersion string, fieldPath string) apiv1.EnvVarSo } return downwardENV } + +// GetTargetIpsArgs return the comma separated target ips +// It fetch the ips from the target ips (if defined by users) +// it append the ips from the host, if target host is provided +func GetTargetIpsArgs(targetIPs, targetHosts string) string { + + ipsFromHost := GetIpsForTargetHosts(targetHosts) + if ipsFromHost != "" { + targetIPs = targetIPs + "," + ipsFromHost + } + return targetIPs +} + +// GetIpsForTargetHosts resolves IP addresses for comma-separated list of target hosts and returns comma-separated ips +func GetIpsForTargetHosts(targetHosts string) string { + if targetHosts == "" { + return "" + } + hosts := strings.Split(targetHosts, ",") + var commaSeparatedIPs []string + for i := range hosts { + ips, err := net.LookupIP(hosts[i]) + if err != nil { + log.Infof("Unknown host") + } else { + for j := range ips { + log.Infof("IP address: %v", ips[j]) + commaSeparatedIPs = append(commaSeparatedIPs, ips[j].String()) + } + } + } + return strings.Join(commaSeparatedIPs, ",") +} diff --git a/chaoslib/litmus/node-cpu-hog/lib/node-cpu-hog.go b/chaoslib/litmus/node-cpu-hog/lib/node-cpu-hog.go index e09262586..043647f24 100644 --- a/chaoslib/litmus/node-cpu-hog/lib/node-cpu-hog.go +++ b/chaoslib/litmus/node-cpu-hog/lib/node-cpu-hog.go @@ -25,7 +25,7 @@ func PrepareNodeCPUHog(experimentsDetails *experimentTypes.ExperimentDetails, cl //Select node for kubelet-service-kill appNodeName, err := common.GetNodeName(experimentsDetails.AppNS, experimentsDetails.AppLabel, clients) if err != nil { - return errors.Errorf("Unable to get the application nodename due to, err: %v", err) + return errors.Errorf("Unable to get the application nodename, err: %v", err) } experimentsDetails.AppNode = appNodeName @@ -48,7 +48,7 @@ func PrepareNodeCPUHog(experimentsDetails *experimentTypes.ExperimentDetails, cl //Waiting for the ramp time before chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } @@ -61,7 +61,7 @@ func PrepareNodeCPUHog(experimentsDetails *experimentTypes.ExperimentDetails, cl // Get Chaos Pod Annotation experimentsDetails.Annotations, err = common.GetChaosPodAnnotation(experimentsDetails.ChaosPodName, experimentsDetails.ChaosNamespace, clients) if err != nil { - return errors.Errorf("unable to get annotation, due to %v", err) + return errors.Errorf("unable to get annotations, err: %v", err) } // Creating the helper pod to perform node cpu hog @@ -78,7 +78,7 @@ func PrepareNodeCPUHog(experimentsDetails *experimentTypes.ExperimentDetails, cl } // Wait till the completion of helper pod - log.Infof("[Wait]: Waiting for %vs till the completion of the helper pod", strconv.Itoa(experimentsDetails.ChaosDuration+30)) + log.Infof("[Wait]: Waiting for %vs till the completion of the helper pod", experimentsDetails.ChaosDuration+30) podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, "name="+experimentsDetails.ExperimentName+"-"+experimentsDetails.RunID, clients, experimentsDetails.ChaosDuration+30, experimentsDetails.ExperimentName) if err != nil || podStatus == "Failed" { @@ -101,17 +101,17 @@ func PrepareNodeCPUHog(experimentsDetails *experimentTypes.ExperimentDetails, cl //Waiting for the ramp time after chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } return nil } -//SetCPUCapacity will fetch the node cpu capacity +//SetCPUCapacity fetch the node cpu capacity func SetCPUCapacity(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets) error { node, err := clients.KubeClient.CoreV1().Nodes().Get(experimentsDetails.AppNode, v1.GetOptions{}) if err != nil { - return errors.Wrapf(err, "Fail to get the application node, due to %v", err) + return err } cpuCapacity, _ := node.Status.Capacity.Cpu().AsInt64() @@ -144,7 +144,7 @@ func CreateHelperPod(experimentsDetails *experimentTypes.ExperimentDetails, clie Image: experimentsDetails.LIBImage, ImagePullPolicy: apiv1.PullAlways, Command: []string{ - "/stress-ng", + "stress-ng", }, Args: []string{ "--cpu", diff --git a/chaoslib/litmus/node-drain/lib/node-drain.go b/chaoslib/litmus/node-drain/lib/node-drain.go index ef56380dd..35d098e36 100644 --- a/chaoslib/litmus/node-drain/lib/node-drain.go +++ b/chaoslib/litmus/node-drain/lib/node-drain.go @@ -3,14 +3,17 @@ package lib import ( "bytes" "fmt" + "os" "os/exec" - "strconv" + "os/signal" + "syscall" "time" clients "github.com/litmuschaos/litmus-go/pkg/clients" "github.com/litmuschaos/litmus-go/pkg/events" experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/node-drain/types" "github.com/litmuschaos/litmus-go/pkg/log" + "github.com/litmuschaos/litmus-go/pkg/result" "github.com/litmuschaos/litmus-go/pkg/status" "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" @@ -26,7 +29,7 @@ func PrepareNodeDrain(experimentsDetails *experimentTypes.ExperimentDetails, cli //Waiting for the ramp time before chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } @@ -34,7 +37,7 @@ func PrepareNodeDrain(experimentsDetails *experimentTypes.ExperimentDetails, cli //Select node for kubelet-service-kill appNodeName, err := common.GetNodeName(experimentsDetails.AppNS, experimentsDetails.AppLabel, clients) if err != nil { - return errors.Errorf("Unable to get the application nodename due to, err: %v", err) + return errors.Errorf("Unable to get the application nodename, err: %v", err) } experimentsDetails.AppNode = appNodeName @@ -56,7 +59,7 @@ func PrepareNodeDrain(experimentsDetails *experimentTypes.ExperimentDetails, cli log.Info("[Status]: Verify the status of AUT after reschedule") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - return errors.Errorf("Application status check failed due to, err: %v", err) + return errors.Errorf("Application status check failed, err: %v", err) } // Verify the status of Auxiliary Applications after reschedule @@ -64,13 +67,51 @@ func PrepareNodeDrain(experimentsDetails *experimentTypes.ExperimentDetails, cli log.Info("[Status]: Verify that the Auxiliary Applications are running") err = status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - return errors.Errorf("Auxiliary Application status check failed due to %v", err) + return errors.Errorf("Auxiliary Applications status check failed, err: %v", err) } } - // Wait for Chaos Duration - log.Infof("[Wait]: Waiting for the %vs chaos duration", strconv.Itoa(experimentsDetails.ChaosDuration)) - common.WaitForDuration(experimentsDetails.ChaosDuration) + var endTime <-chan time.Time + timeDelay := time.Duration(experimentsDetails.ChaosDuration) * time.Second + + log.Infof("[Chaos]: Waiting for %vs", experimentsDetails.ChaosDuration) + + // signChan channel is used to transmit signal notifications. + signChan := make(chan os.Signal, 1) + // Catch and relay certain signal(s) to signChan channel. + signal.Notify(signChan, os.Interrupt, syscall.SIGTERM, syscall.SIGKILL) + +loop: + for { + endTime = time.After(timeDelay) + select { + case <-signChan: + log.Info("[Chaos]: Killing process started because of terminated signal received") + // updating the chaosresult after stopped + failStep := "Node Drain injection stopped!" + types.SetResultAfterCompletion(resultDetails, "Stopped", "Stopped", failStep) + result.ChaosResult(chaosDetails, clients, resultDetails, "EOT") + + // generating summary event in chaosengine + msg := experimentsDetails.ExperimentName + " experiment has been aborted" + types.SetEngineEventAttributes(eventsDetails, types.Summary, msg, "Warning", chaosDetails) + events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") + + // generating summary event in chaosresult + types.SetResultEventAttributes(eventsDetails, types.StoppedVerdict, msg, "Warning", resultDetails) + events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosResult") + + if err = UncordonNode(experimentsDetails, clients); err != nil { + log.Errorf("unable to uncordon node, err :%v", err) + + } + os.Exit(1) + case <-endTime: + log.Infof("[Chaos]: Time is up for experiment: %v", experimentsDetails.ExperimentName) + endTime = nil + break loop + } + } // Uncordon the application node err = UncordonNode(experimentsDetails, clients) @@ -80,7 +121,7 @@ func PrepareNodeDrain(experimentsDetails *experimentTypes.ExperimentDetails, cli //Waiting for the ramp time after chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } return nil @@ -106,10 +147,10 @@ func DrainNode(experimentsDetails *experimentTypes.ExperimentDetails, clients cl Try(func(attempt uint) error { nodeSpec, err := clients.KubeClient.CoreV1().Nodes().Get(experimentsDetails.AppNode, v1.GetOptions{}) if err != nil { - return errors.Errorf("Unable to get the %v node, err: %v", experimentsDetails.AppNode, err) + return err } if !nodeSpec.Spec.Unschedulable { - return errors.Errorf("Unable to drain %v node", experimentsDetails.AppNode) + return errors.Errorf("%v node is not in unschedulable state", experimentsDetails.AppNode) } return nil }) @@ -137,10 +178,10 @@ func UncordonNode(experimentsDetails *experimentTypes.ExperimentDetails, clients Try(func(attempt uint) error { nodeSpec, err := clients.KubeClient.CoreV1().Nodes().Get(experimentsDetails.AppNode, v1.GetOptions{}) if err != nil { - return errors.Errorf("Unable to get the %v node, err: %v", experimentsDetails.AppNode, err) + return err } if nodeSpec.Spec.Unschedulable { - return errors.Errorf("Unable to uncordon %v node", experimentsDetails.AppNode) + return errors.Errorf("%v node is in unschedulable state", experimentsDetails.AppNode) } return nil }) diff --git a/chaoslib/litmus/node-io-stress/lib/node-io-stress.go b/chaoslib/litmus/node-io-stress/lib/node-io-stress.go index 18b18f8d4..638b3d595 100644 --- a/chaoslib/litmus/node-io-stress/lib/node-io-stress.go +++ b/chaoslib/litmus/node-io-stress/lib/node-io-stress.go @@ -24,7 +24,7 @@ func PrepareNodeIOStress(experimentsDetails *experimentTypes.ExperimentDetails, //Select node for node-io-stress appNodeName, err := common.GetNodeName(experimentsDetails.AppNS, experimentsDetails.AppLabel, clients) if err != nil { - return errors.Errorf("Unable to get the application nodename due to, err: %v", err) + return errors.Errorf("Unable to get the application nodename, err: %v", err) } experimentsDetails.AppNode = appNodeName @@ -40,7 +40,7 @@ func PrepareNodeIOStress(experimentsDetails *experimentTypes.ExperimentDetails, //Waiting for the ramp time before chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } @@ -53,7 +53,7 @@ func PrepareNodeIOStress(experimentsDetails *experimentTypes.ExperimentDetails, // Get Chaos Pod Annotation experimentsDetails.Annotations, err = common.GetChaosPodAnnotation(experimentsDetails.ChaosPodName, experimentsDetails.ChaosNamespace, clients) if err != nil { - return errors.Errorf("unable to get annotation, due to %v", err) + return errors.Errorf("unable to get annotations, err: %v", err) } // Creating the helper pod to perform node io stress @@ -70,7 +70,7 @@ func PrepareNodeIOStress(experimentsDetails *experimentTypes.ExperimentDetails, } // Wait till the completion of helper pod - log.Infof("[Wait]: Waiting for %vs till the completion of the helper pod", strconv.Itoa(experimentsDetails.ChaosDuration+30)) + log.Infof("[Wait]: Waiting for %vs till the completion of the helper pod", experimentsDetails.ChaosDuration+30) podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, "name="+experimentsDetails.ExperimentName+"-"+experimentsDetails.RunID, clients, experimentsDetails.ChaosDuration+30, experimentsDetails.ExperimentName) if err != nil || podStatus == "Failed" { @@ -93,7 +93,7 @@ func PrepareNodeIOStress(experimentsDetails *experimentTypes.ExperimentDetails, //Waiting for the ramp time after chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } return nil diff --git a/chaoslib/litmus/node-memory-hog/lib/node-memory-hog.go b/chaoslib/litmus/node-memory-hog/lib/node-memory-hog.go index eab851498..319905c20 100644 --- a/chaoslib/litmus/node-memory-hog/lib/node-memory-hog.go +++ b/chaoslib/litmus/node-memory-hog/lib/node-memory-hog.go @@ -23,7 +23,7 @@ func PrepareNodeMemoryHog(experimentsDetails *experimentTypes.ExperimentDetails, //Select node for kubelet-service-kill appNodeName, err := common.GetNodeName(experimentsDetails.AppNS, experimentsDetails.AppLabel, clients) if err != nil { - return errors.Errorf("Unable to get the application nodename due to, err: %v", err) + return errors.Errorf("Unable to get the application nodename, err: %v", err) } experimentsDetails.AppNode = appNodeName @@ -38,7 +38,7 @@ func PrepareNodeMemoryHog(experimentsDetails *experimentTypes.ExperimentDetails, //Waiting for the ramp time before chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } @@ -60,7 +60,7 @@ func PrepareNodeMemoryHog(experimentsDetails *experimentTypes.ExperimentDetails, // Get Chaos Pod Annotation experimentsDetails.Annotations, err = common.GetChaosPodAnnotation(experimentsDetails.ChaosPodName, experimentsDetails.ChaosNamespace, clients) if err != nil { - return errors.Errorf("unable to get annotation, due to %v", err) + return errors.Errorf("unable to get annotations, err: %v", err) } // Creating the helper pod to perform node memory hog @@ -77,7 +77,7 @@ func PrepareNodeMemoryHog(experimentsDetails *experimentTypes.ExperimentDetails, } // Wait till the completion of helper pod - log.Infof("[Wait]: Waiting for %vs till the completion of the helper pod", strconv.Itoa(experimentsDetails.ChaosDuration+30)) + log.Infof("[Wait]: Waiting for %vs till the completion of the helper pod", experimentsDetails.ChaosDuration+30) podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, "name="+experimentsDetails.ExperimentName+"-"+experimentsDetails.RunID, clients, experimentsDetails.ChaosDuration+30, experimentsDetails.ExperimentName) if err != nil || podStatus == "Failed" { @@ -100,7 +100,7 @@ func PrepareNodeMemoryHog(experimentsDetails *experimentTypes.ExperimentDetails, //Waiting for the ramp time after chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } return nil @@ -111,14 +111,14 @@ func GetNodeMemoryDetails(appNodeName string, clients clients.ClientSets) (int, nodeDetails, err := clients.KubeClient.CoreV1().Nodes().Get(appNodeName, v1.GetOptions{}) if err != nil { - return 0, 0, errors.Errorf("Fail to get nodesDetails, due to %v", err) + return 0, 0, err } memoryCapacity := int(nodeDetails.Status.Capacity.Memory().Value()) memoryAllocatable := int(nodeDetails.Status.Allocatable.Memory().Value()) if memoryCapacity == 0 || memoryAllocatable == 0 { - return memoryCapacity, memoryAllocatable, errors.Errorf("Fail to get memory details of the application node") + return memoryCapacity, memoryAllocatable, errors.Errorf("Failed to get memory details of the application node") } return memoryCapacity, memoryAllocatable, nil @@ -164,7 +164,7 @@ func CreateHelperPod(experimentsDetails *experimentTypes.ExperimentDetails, clie Image: experimentsDetails.LIBImage, ImagePullPolicy: apiv1.PullAlways, Command: []string{ - "/stress-ng", + "stress-ng", }, Args: []string{ "--vm", diff --git a/chaoslib/litmus/node-taint/lib/node-taint.go b/chaoslib/litmus/node-taint/lib/node-taint.go index 7523c3e05..82457e105 100644 --- a/chaoslib/litmus/node-taint/lib/node-taint.go +++ b/chaoslib/litmus/node-taint/lib/node-taint.go @@ -2,13 +2,17 @@ package lib import ( "fmt" - "strconv" + "os" + "os/signal" "strings" + "syscall" + "time" clients "github.com/litmuschaos/litmus-go/pkg/clients" "github.com/litmuschaos/litmus-go/pkg/events" experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/node-taint/types" "github.com/litmuschaos/litmus-go/pkg/log" + "github.com/litmuschaos/litmus-go/pkg/result" "github.com/litmuschaos/litmus-go/pkg/status" "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" @@ -24,7 +28,7 @@ func PrepareNodeTaint(experimentsDetails *experimentTypes.ExperimentDetails, cli //Waiting for the ramp time before chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } @@ -32,7 +36,7 @@ func PrepareNodeTaint(experimentsDetails *experimentTypes.ExperimentDetails, cli //Select node for kubelet-service-kill appNodeName, err := common.GetNodeName(experimentsDetails.AppNS, experimentsDetails.AppLabel, clients) if err != nil { - return errors.Errorf("Unable to get the application nodename due to, err: %v", err) + return errors.Errorf("Unable to get the application nodename, err: %v", err) } experimentsDetails.AppNode = appNodeName @@ -62,13 +66,51 @@ func PrepareNodeTaint(experimentsDetails *experimentTypes.ExperimentDetails, cli log.Info("[Status]: Verify that the Auxiliary Applications are running") err = status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - return errors.Errorf("Auxiliary Application status check failed due to %v", err) + return errors.Errorf("Auxiliary Applications status check failed, err: %v", err) } } - // Wait for Chaos Duration - log.Infof("[Wait]: Waiting for the %vs chaos duration", strconv.Itoa(experimentsDetails.ChaosDuration)) - common.WaitForDuration(experimentsDetails.ChaosDuration) + var endTime <-chan time.Time + timeDelay := time.Duration(experimentsDetails.ChaosDuration) * time.Second + + log.Infof("[Chaos]: Waiting for %vs", experimentsDetails.ChaosDuration) + + // signChan channel is used to transmit signal notifications. + signChan := make(chan os.Signal, 1) + // Catch and relay certain signal(s) to signChan channel. + signal.Notify(signChan, os.Interrupt, syscall.SIGTERM, syscall.SIGKILL) + +loop: + for { + endTime = time.After(timeDelay) + select { + case <-signChan: + log.Info("[Chaos]: Killing process started because of terminated signal received") + // updating the chaosresult after stopped + failStep := "Node Taint injection stopped!" + types.SetResultAfterCompletion(resultDetails, "Stopped", "Stopped", failStep) + result.ChaosResult(chaosDetails, clients, resultDetails, "EOT") + + // generating summary event in chaosengine + msg := experimentsDetails.ExperimentName + " experiment has been aborted" + types.SetEngineEventAttributes(eventsDetails, types.Summary, msg, "Warning", chaosDetails) + events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") + + // generating summary event in chaosresult + types.SetResultEventAttributes(eventsDetails, types.StoppedVerdict, msg, "Warning", resultDetails) + events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosResult") + + if err = RemoveTaintFromNode(experimentsDetails, clients); err != nil { + log.Errorf("unable to remove taint from the node, err :%v", err) + + } + os.Exit(1) + case <-endTime: + log.Infof("[Chaos]: Time is up for experiment: %v", experimentsDetails.ExperimentName) + endTime = nil + break loop + } + } // remove taint from the application node err = RemoveTaintFromNode(experimentsDetails, clients) @@ -78,7 +120,7 @@ func PrepareNodeTaint(experimentsDetails *experimentTypes.ExperimentDetails, cli //Waiting for the ramp time after chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } return nil @@ -93,7 +135,7 @@ func TaintNode(experimentsDetails *experimentTypes.ExperimentDetails, clients cl // get the node details node, err := clients.KubeClient.CoreV1().Nodes().Get(experimentsDetails.AppNode, v1.GetOptions{}) if err != nil || node == nil { - return errors.Errorf("failed to get %v node, due to err: %v", experimentsDetails.AppNode, err) + return errors.Errorf("failed to get %v node, err: %v", experimentsDetails.AppNode, err) } // check if the taint already exists @@ -114,11 +156,11 @@ func TaintNode(experimentsDetails *experimentTypes.ExperimentDetails, clients cl updatedNodeWithTaint, err := clients.KubeClient.CoreV1().Nodes().Update(node) if err != nil || updatedNodeWithTaint == nil { - return fmt.Errorf("failed to update %v node after adding taints, due to err: %v", experimentsDetails.AppNode, err) + return fmt.Errorf("failed to update %v node after adding taints, err: %v", experimentsDetails.AppNode, err) } } - log.Infof("Successfully added taint on node %v", experimentsDetails.AppNode) + log.Infof("Successfully added taint in %v node", experimentsDetails.AppNode) return nil } @@ -132,7 +174,7 @@ func RemoveTaintFromNode(experimentsDetails *experimentTypes.ExperimentDetails, // get the node details node, err := clients.KubeClient.CoreV1().Nodes().Get(experimentsDetails.AppNode, v1.GetOptions{}) if err != nil || node == nil { - return errors.Errorf("failed to get %v node, due to err: %v", experimentsDetails.AppNode, err) + return errors.Errorf("failed to get %v node, err: %v", experimentsDetails.AppNode, err) } // check if the taint already exists @@ -155,7 +197,7 @@ func RemoveTaintFromNode(experimentsDetails *experimentTypes.ExperimentDetails, node.Spec.Taints = Newtaints updatedNodeWithTaint, err := clients.KubeClient.CoreV1().Nodes().Update(node) if err != nil || updatedNodeWithTaint == nil { - return fmt.Errorf("failed to update %v node after removing taints, due to err: %v", experimentsDetails.AppNode, err) + return fmt.Errorf("failed to update %v node after removing taints, err: %v", experimentsDetails.AppNode, err) } } diff --git a/chaoslib/litmus/pod-autoscaler/lib/pod-autoscaler.go b/chaoslib/litmus/pod-autoscaler/lib/pod-autoscaler.go index 7641fb917..085ed941c 100644 --- a/chaoslib/litmus/pod-autoscaler/lib/pod-autoscaler.go +++ b/chaoslib/litmus/pod-autoscaler/lib/pod-autoscaler.go @@ -1,7 +1,6 @@ package lib import ( - "strconv" "time" clients "github.com/litmuschaos/litmus-go/pkg/clients" @@ -28,23 +27,23 @@ func PreparePodAutoscaler(experimentsDetails *experimentTypes.ExperimentDetails, //Waiting for the ramp time before chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } err = PodAutoscalerChaos(experimentsDetails, clients, replicaCount, appName, resultDetails, eventsDetails, chaosDetails) if err != nil { - return errors.Errorf("Unable to perform autoscaling, due to %v", err) + return errors.Errorf("Unable to perform autoscaling, err: %v", err) } err = AutoscalerRecovery(experimentsDetails, clients, replicaCount, appName) if err != nil { - return errors.Errorf("Unable to recover the auto scaling, due to %v", err) + return errors.Errorf("Unable to recover the auto scaling, err: %v", err) } //Waiting for the ramp time after chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } return nil @@ -58,7 +57,7 @@ func GetApplicationDetails(experimentsDetails *experimentTypes.ExperimentDetails // Get Deployment replica count applicationList, err := clients.KubeClient.AppsV1().Deployments(experimentsDetails.AppNS).List(metav1.ListOptions{LabelSelector: experimentsDetails.AppLabel}) if err != nil || len(applicationList.Items) == 0 { - return "", 0, errors.Errorf("Unable to get application, err: %v", err) + return "", 0, errors.Errorf("Unable to list the application, err: %v", err) } for _, app := range applicationList.Items { appReplica = int(*app.Spec.Replicas) @@ -81,7 +80,7 @@ func PodAutoscalerChaos(experimentsDetails *experimentTypes.ExperimentDetails, c // RetryOnConflict uses exponential backoff to avoid exhausting the apiserver appUnderTest, err := applicationClient.Get(appName, metav1.GetOptions{}) if err != nil { - return errors.Errorf("Failed to get latest version of Application Deployment: %v", err) + return errors.Errorf("Failed to get latest version of Application Deployment, err: %v", err) } // modifying the replica count appUnderTest.Spec.Replicas = int32Ptr(replicas) @@ -89,7 +88,7 @@ func PodAutoscalerChaos(experimentsDetails *experimentTypes.ExperimentDetails, c return updateErr }) if retryErr != nil { - return errors.Errorf("Unable to scale the application, due to: %v", retryErr) + return errors.Errorf("Unable to scale the application, err: %v", retryErr) } log.Info("Application Started Scaling") @@ -125,7 +124,7 @@ func ApplicationPodStatusCheck(experimentsDetails *experimentTypes.ExperimentDet if isFailed { err = AutoscalerRecovery(experimentsDetails, clients, replicaCount, appName) if err != nil { - return errors.Errorf("Unable to perform autoscaling, due to %v", err) + return errors.Errorf("Unable to perform autoscaling, err: %v", err) } return errors.Errorf("Failed to scale the appplication, err: %v", err) } else if err != nil { @@ -150,7 +149,7 @@ func AutoscalerRecovery(experimentsDetails *experimentTypes.ExperimentDetails, c // RetryOnConflict uses exponential backoff to avoid exhausting the apiserver appUnderTest, err := applicationClient.Get(appName, metav1.GetOptions{}) if err != nil { - return errors.Errorf("Failed to get latest version of Application Deployment: %v", err) + return errors.Errorf("Failed to get latest version of Application Deployment, err: %v", err) } appUnderTest.Spec.Replicas = int32Ptr(int32(replicaCount)) // modify replica count @@ -158,7 +157,7 @@ func AutoscalerRecovery(experimentsDetails *experimentTypes.ExperimentDetails, c return updateErr }) if retryErr != nil { - return errors.Errorf("Unable to scale the, due to: %v", retryErr) + return errors.Errorf("Unable to scale the, err: %v", retryErr) } log.Info("[Info]: Application pod started rolling back") @@ -172,7 +171,7 @@ func AutoscalerRecovery(experimentsDetails *experimentTypes.ExperimentDetails, c } if int(applicationDeploy.Status.AvailableReplicas) != experimentsDetails.Replicas { log.Infof("Application Pod Available Count is: %v", applicationDeploy.Status.AvailableReplicas) - return errors.Errorf("Unable to roll back to older replica count due to, err: %v", err) + return errors.Errorf("Unable to roll back to older replica count, err: %v", err) } return nil }) diff --git a/chaoslib/litmus/pod-cpu-hog/lib/pod-cpu-hog.go b/chaoslib/litmus/pod-cpu-hog/lib/pod-cpu-hog.go index 5429ca9c0..7b82d1889 100644 --- a/chaoslib/litmus/pod-cpu-hog/lib/pod-cpu-hog.go +++ b/chaoslib/litmus/pod-cpu-hog/lib/pod-cpu-hog.go @@ -3,7 +3,6 @@ package lib import ( "os" "os/signal" - "strconv" "syscall" "time" @@ -17,6 +16,8 @@ import ( litmusexec "github.com/litmuschaos/litmus-go/pkg/utils/exec" "github.com/pkg/errors" "github.com/sirupsen/logrus" + corev1 "k8s.io/api/core/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/klog" ) @@ -30,7 +31,7 @@ func StressCPU(containerName, podName, namespace, cpuHogCmd string, clients clie litmusexec.SetExecCommandAttributes(&execCommandDetails, podName, containerName, namespace) _, err := litmusexec.Exec(&execCommandDetails, clients, command) if err != nil { - return errors.Errorf("Unable to run stress command inside target container, due to err: %v", err) + return errors.Errorf("Unable to run stress command inside target container, err: %v", err) } return nil } @@ -38,79 +39,159 @@ func StressCPU(containerName, podName, namespace, cpuHogCmd string, clients clie //ExperimentCPU function orchestrates the experiment by calling the StressCPU function for every core, of every container, of every pod that is targeted func ExperimentCPU(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - var endTime <-chan time.Time - timeDelay := time.Duration(experimentsDetails.ChaosDuration) * time.Second - // Get the target pod details for the chaos execution // if the target pod is not defined it will derive the random target pod list using pod affected percentage targetPodList, err := common.GetPodList(experimentsDetails.AppNS, experimentsDetails.TargetPod, experimentsDetails.AppLabel, experimentsDetails.PodsAffectedPerc, clients) if err != nil { - return errors.Errorf("Unable to get the target pod list due to, err: %v", err) + return errors.Errorf("Unable to get the target pod list, err: %v", err) } - for _, pod := range targetPodList.Items { + //Get the target container name of the application pod + if experimentsDetails.TargetContainer == "" { + experimentsDetails.TargetContainer, err = GetTargetContainer(experimentsDetails, targetPodList.Items[0].Name, clients) + if err != nil { + return errors.Errorf("Unable to get the target container name, err: %v", err) + } + } - for _, container := range pod.Status.ContainerStatuses { - if container.Ready != true { - return errors.Errorf("containers are not yet in running state") - } - log.InfoWithValues("The running status of container to stress is as follows", logrus.Fields{ - "container": container.Name, "Pod": pod.Name, "Status": pod.Status.Phase}) + if experimentsDetails.Sequence == "serial" { + if err = InjectChaosInSerialMode(experimentsDetails, targetPodList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + return err + } + } else { + if err = InjectChaosInParallelMode(experimentsDetails, targetPodList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + return err + } + } - log.Infof("[Chaos]:Stressing: %v cores", strconv.Itoa(experimentsDetails.CPUcores)) + return nil +} - for i := 0; i < experimentsDetails.CPUcores; i++ { +// InjectChaosInSerialMode stressed the cpu of all target application serially (one by one) +func InjectChaosInSerialMode(experimentsDetails *experimentTypes.ExperimentDetails, targetPodList corev1.PodList, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { + var endTime <-chan time.Time + timeDelay := time.Duration(experimentsDetails.ChaosDuration) * time.Second - if experimentsDetails.EngineName != "" { - msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + pod.Name + " pod" - types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) - events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") - } + for _, pod := range targetPodList.Items { - go StressCPU(container.Name, pod.Name, experimentsDetails.AppNS, experimentsDetails.ChaosInjectCmd, clients) - - log.Infof("[Chaos]:Waiting for: %vs", strconv.Itoa(experimentsDetails.ChaosDuration)) - - // signChan channel is used to transmit signal notifications. - signChan := make(chan os.Signal, 1) - // Catch and relay certain signal(s) to signChan channel. - signal.Notify(signChan, os.Interrupt, syscall.SIGTERM, syscall.SIGKILL) - loop: - for { - endTime = time.After(timeDelay) - select { - case <-signChan: - log.Info("[Chaos]: Killing process started because of terminated signal received") - err = KillStressCPU(container.Name, pod.Name, experimentsDetails.AppNS, experimentsDetails.ChaosKillCmd, clients) - if err != nil { - klog.V(0).Infof("Error in Kill stress after") - return err - } - // updating the chaosresult after stopped - failStep := "CPU hog Chaos injection stopped!" - types.SetResultAfterCompletion(resultDetails, "Stopped", "Stopped", failStep) - result.ChaosResult(chaosDetails, clients, resultDetails, "EOT") - - // generating summary event in chaosengine - msg := experimentsDetails.ExperimentName + " experiment has been aborted" - types.SetEngineEventAttributes(eventsDetails, types.Summary, msg, "Warning", chaosDetails) - events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") - - // generating summary event in chaosresult - types.SetResultEventAttributes(eventsDetails, types.StoppedVerdict, msg, "Warning", resultDetails) - events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosResult") - os.Exit(1) - case <-endTime: - log.Infof("[Chaos]: Time is up for experiment: %v", experimentsDetails.ExperimentName) - break loop - } - } - if err = KillStressCPU(container.Name, pod.Name, experimentsDetails.AppNS, experimentsDetails.ChaosKillCmd, clients); err != nil { + if experimentsDetails.EngineName != "" { + msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + pod.Name + " pod" + types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) + events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") + } + + log.InfoWithValues("[Chaos]: The Target application details", logrus.Fields{ + "container": experimentsDetails.TargetContainer, + "Pod": pod.Name, + "CPU CORE": experimentsDetails.CPUcores, + }) + go StressCPU(experimentsDetails.TargetContainer, pod.Name, experimentsDetails.AppNS, experimentsDetails.ChaosInjectCmd, clients) + + log.Infof("[Chaos]:Waiting for: %vs", experimentsDetails.ChaosDuration) + + // signChan channel is used to transmit signal notifications. + signChan := make(chan os.Signal, 1) + // Catch and relay certain signal(s) to signChan channel. + signal.Notify(signChan, os.Interrupt, syscall.SIGTERM, syscall.SIGKILL) + loop: + for { + endTime = time.After(timeDelay) + select { + case <-signChan: + log.Info("[Chaos]: Killing process started because of terminated signal received") + err := KillStressCPUSerial(experimentsDetails.TargetContainer, pod.Name, experimentsDetails.AppNS, experimentsDetails.ChaosKillCmd, clients) + if err != nil { + klog.V(0).Infof("Error in Kill stress after abortion") return err } + // updating the chaosresult after stopped + failStep := "CPU hog Chaos injection stopped!" + types.SetResultAfterCompletion(resultDetails, "Stopped", "Stopped", failStep) + result.ChaosResult(chaosDetails, clients, resultDetails, "EOT") + + // generating summary event in chaosengine + msg := experimentsDetails.ExperimentName + " experiment has been aborted" + types.SetEngineEventAttributes(eventsDetails, types.Summary, msg, "Warning", chaosDetails) + events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") + + // generating summary event in chaosresult + types.SetResultEventAttributes(eventsDetails, types.StoppedVerdict, msg, "Warning", resultDetails) + events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosResult") + os.Exit(1) + case <-endTime: + log.Infof("[Chaos]: Time is up for experiment: %v", experimentsDetails.ExperimentName) + endTime = nil + break loop } } + if err := KillStressCPUSerial(experimentsDetails.TargetContainer, pod.Name, experimentsDetails.AppNS, experimentsDetails.ChaosKillCmd, clients); err != nil { + return err + } + } + return nil +} + +// InjectChaosInParallelMode stressed the cpu of all target application in parallel mode (all at once) +func InjectChaosInParallelMode(experimentsDetails *experimentTypes.ExperimentDetails, targetPodList corev1.PodList, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { + var endTime <-chan time.Time + timeDelay := time.Duration(experimentsDetails.ChaosDuration) * time.Second + + for _, pod := range targetPodList.Items { + + if experimentsDetails.EngineName != "" { + msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + pod.Name + " pod" + types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) + events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") + } + + log.InfoWithValues("[Chaos]: The Target application details", logrus.Fields{ + "container": experimentsDetails.TargetContainer, + "Pod": pod.Name, + "CPU CORE": experimentsDetails.CPUcores, + }) + + go StressCPU(experimentsDetails.TargetContainer, pod.Name, experimentsDetails.AppNS, experimentsDetails.ChaosInjectCmd, clients) + } + log.Infof("[Chaos]:Waiting for: %vs", experimentsDetails.ChaosDuration) + + // signChan channel is used to transmit signal notifications. + signChan := make(chan os.Signal, 1) + // Catch and relay certain signal(s) to signChan channel. + signal.Notify(signChan, os.Interrupt, syscall.SIGTERM, syscall.SIGKILL) +loop: + for { + endTime = time.After(timeDelay) + select { + case <-signChan: + log.Info("[Chaos]: Killing process started because of terminated signal received") + err := KillStressCPUParallel(experimentsDetails.TargetContainer, targetPodList, experimentsDetails.AppNS, experimentsDetails.ChaosKillCmd, clients) + if err != nil { + klog.V(0).Infof("Error in Kill stress after abortion") + return err + } + // updating the chaosresult after stopped + failStep := "CPU hog Chaos injection stopped!" + types.SetResultAfterCompletion(resultDetails, "Stopped", "Stopped", failStep) + result.ChaosResult(chaosDetails, clients, resultDetails, "EOT") + + // generating summary event in chaosengine + msg := experimentsDetails.ExperimentName + " experiment has been aborted" + types.SetEngineEventAttributes(eventsDetails, types.Summary, msg, "Warning", chaosDetails) + events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") + + // generating summary event in chaosresult + types.SetResultEventAttributes(eventsDetails, types.StoppedVerdict, msg, "Warning", resultDetails) + events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosResult") + os.Exit(1) + case <-endTime: + log.Infof("[Chaos]: Time is up for experiment: %v", experimentsDetails.ExperimentName) + endTime = nil + break loop + } + } + if err := KillStressCPUParallel(experimentsDetails.TargetContainer, targetPodList, experimentsDetails.AppNS, experimentsDetails.ChaosKillCmd, clients); err != nil { + return err } return nil @@ -121,7 +202,7 @@ func PrepareCPUstress(experimentsDetails *experimentTypes.ExperimentDetails, cli //Waiting for the ramp time before chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } //Starting the CPU stress experiment @@ -131,14 +212,26 @@ func PrepareCPUstress(experimentsDetails *experimentTypes.ExperimentDetails, cli } //Waiting for the ramp time after chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } return nil } -// KillStressCPU function to kill the experiment. Triggered by either timeout of chaos duration or termination of the experiment -func KillStressCPU(containerName, podName, namespace, cpuFreeCmd string, clients clients.ClientSets) error { +//GetTargetContainer will fetch the container name from application pod +// It will return the first container name from the application pod +func GetTargetContainer(experimentsDetails *experimentTypes.ExperimentDetails, appName string, clients clients.ClientSets) (string, error) { + pod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.AppNS).Get(appName, v1.GetOptions{}) + if err != nil { + return "", err + } + + return pod.Spec.Containers[0].Name, nil +} + +// KillStressCPUSerial function to kill a stress process running inside target container +// Triggered by either timeout of chaos duration or termination of the experiment +func KillStressCPUSerial(containerName, podName, namespace, cpuFreeCmd string, clients clients.ClientSets) error { // It will contains all the pod & container details required for exec command execCommandDetails := litmusexec.PodDetails{} @@ -147,7 +240,21 @@ func KillStressCPU(containerName, podName, namespace, cpuFreeCmd string, clients litmusexec.SetExecCommandAttributes(&execCommandDetails, podName, containerName, namespace) _, err := litmusexec.Exec(&execCommandDetails, clients, command) if err != nil { - return errors.Errorf("Unable to kill the stress process, due to err: %v", err) + return errors.Errorf("Unable to kill the stress process in %v pod, err: %v", podName, err) + } + + return nil +} + +// KillStressCPUParallel function to kill all the stress process running inside target container +// Triggered by either timeout of chaos duration or termination of the experiment +func KillStressCPUParallel(containerName string, targetPodList corev1.PodList, namespace, cpuFreeCmd string, clients clients.ClientSets) error { + + for _, pod := range targetPodList.Items { + + if err := KillStressCPUSerial(containerName, pod.Name, namespace, cpuFreeCmd, clients); err != nil { + return err + } } return nil } diff --git a/chaoslib/litmus/pod-delete/lib/pod-delete.go b/chaoslib/litmus/pod-delete/lib/pod-delete.go index 9e1af1d65..4ce3f0277 100644 --- a/chaoslib/litmus/pod-delete/lib/pod-delete.go +++ b/chaoslib/litmus/pod-delete/lib/pod-delete.go @@ -1,7 +1,6 @@ package lib import ( - "strconv" "time" clients "github.com/litmuschaos/litmus-go/pkg/clients" @@ -27,18 +26,18 @@ func PreparePodDelete(experimentsDetails *experimentTypes.ExperimentDetails, cli //Waiting for the ramp time before chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } err = PodDeleteChaos(experimentsDetails, clients, eventsDetails, chaosDetails, resultDetails) if err != nil { - return errors.Errorf("Unable to delete the application pods, due to %v", err) + return errors.Errorf("Unable to delete the application pods, err: %v", err) } //Waiting for the ramp time after chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } return nil @@ -69,7 +68,7 @@ func PodDeleteChaos(experimentsDetails *experimentTypes.ExperimentDetails, clien // if the target pod is not defined it will derive the random target pod list using pod affected percentage targetPodList, err := common.GetPodList(experimentsDetails.AppNS, experimentsDetails.TargetPod, experimentsDetails.AppLabel, experimentsDetails.PodsAffectedPerc, clients) if err != nil { - return errors.Errorf("Unable to get the target pod list due to, err: %v", err) + return errors.Errorf("Unable to get the target pod list, err: %v", err) } if experimentsDetails.EngineName != "" { @@ -96,7 +95,7 @@ func PodDeleteChaos(experimentsDetails *experimentTypes.ExperimentDetails, clien //Waiting for the chaos interval after chaos injection if experimentsDetails.ChaosInterval != 0 { - log.Infof("[Wait]: Wait for the chaos interval %vs", strconv.Itoa(experimentsDetails.ChaosInterval)) + log.Infof("[Wait]: Wait for the chaos interval %vs", experimentsDetails.ChaosInterval) common.WaitForDuration(experimentsDetails.ChaosInterval) } diff --git a/chaoslib/litmus/pod-memory-hog/lib/pod-memory-hog.go b/chaoslib/litmus/pod-memory-hog/lib/pod-memory-hog.go index e71a48fb7..5d7ab8910 100644 --- a/chaoslib/litmus/pod-memory-hog/lib/pod-memory-hog.go +++ b/chaoslib/litmus/pod-memory-hog/lib/pod-memory-hog.go @@ -19,6 +19,8 @@ import ( litmusexec "github.com/litmuschaos/litmus-go/pkg/utils/exec" "github.com/pkg/errors" "github.com/sirupsen/logrus" + corev1 "k8s.io/api/core/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/klog" ) @@ -47,87 +49,187 @@ func StressMemory(MemoryConsumption, containerName, podName, namespace string, c //ExperimentMemory function orchestrates the experiment by calling the StressMemory function, of every container, of every pod that is targeted func ExperimentMemory(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - var endTime <-chan time.Time - timeDelay := time.Duration(experimentsDetails.ChaosDuration) * time.Second - // Get the target pod details for the chaos execution // if the target pod is not defined it will derive the random target pod list using pod affected percentage targetPodList, err := common.GetPodList(experimentsDetails.AppNS, experimentsDetails.TargetPod, experimentsDetails.AppLabel, experimentsDetails.PodsAffectedPerc, clients) if err != nil { - return errors.Errorf("Unable to get the target pod list due to, err: %v", err) + return errors.Errorf("Unable to get the target pod list, err: %v", err) } + //Get the target container name of the application pod + if experimentsDetails.TargetContainer == "" { + experimentsDetails.TargetContainer, err = GetTargetContainer(experimentsDetails, targetPodList.Items[0].Name, clients) + if err != nil { + return errors.Errorf("Unable to get the target container name, err: %v", err) + } + } + + if experimentsDetails.Sequence == "serial" { + if err = InjectChaosInSerialMode(experimentsDetails, targetPodList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + return err + } + } else { + if err = InjectChaosInParallelMode(experimentsDetails, targetPodList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + return err + } + } + + return nil +} + +// InjectChaosInSerialMode stressed the memory of all target application serially (one by one) +func InjectChaosInSerialMode(experimentsDetails *experimentTypes.ExperimentDetails, targetPodList corev1.PodList, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { + // creating err channel to recieve the error from the go routine + stressErr := make(chan error) + + var endTime <-chan time.Time + timeDelay := time.Duration(experimentsDetails.ChaosDuration) * time.Second + for _, pod := range targetPodList.Items { - for _, container := range pod.Status.ContainerStatuses { - if container.Ready != true { - return errors.Errorf("containers are not yet in running state") - } - log.InfoWithValues("The running status of container to stress is as follows", logrus.Fields{ - "container": container.Name, "Pod": pod.Name, "Status": pod.Status.Phase}) + if experimentsDetails.EngineName != "" { + msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + pod.Name + " pod" + types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) + events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") + } + + log.InfoWithValues("[Chaos]: The Target application details", logrus.Fields{ + "container": experimentsDetails.TargetContainer, + "Pod": pod.Name, + "Memory Consumption(MB)": experimentsDetails.MemoryConsumption, + }) + + go StressMemory(strconv.Itoa(experimentsDetails.MemoryConsumption), experimentsDetails.TargetContainer, pod.Name, experimentsDetails.AppNS, clients, stressErr) - log.Infof("[Chaos]:Stressing: %v Megabytes", strconv.Itoa(experimentsDetails.MemoryConsumption)) + log.Infof("[Chaos]:Waiting for: %vs", experimentsDetails.ChaosDuration) + + // signChan channel is used to transmit signal notifications. + signChan := make(chan os.Signal, 1) + // Catch and relay certain signal(s) to signChan channel. + signal.Notify(signChan, os.Interrupt, syscall.SIGTERM, syscall.SIGKILL) + + loop: + for { + endTime = time.After(timeDelay) + select { + case err := <-stressErr: + // skipping the execution, if recieved any error other than 137, while executing stress command and marked result as fail + // it will ignore the error code 137(oom kill), it will skip further execution and marked the result as pass + // oom kill occurs if memory to be stressed exceed than the resource limit for the target container + if err != nil { + if strings.Contains(err.Error(), "137") { + return nil + } + return err + } + case <-signChan: + log.Info("[Chaos]: Killing process started because of terminated signal received") + err = KillStressMemorySerial(experimentsDetails.TargetContainer, pod.Name, experimentsDetails.AppNS, experimentsDetails.ChaosKillCmd, clients) + if err != nil { + klog.V(0).Infof("Error in Kill stress after abortion") + return err + } + // updating the chaosresult after stopped + failStep := "Memory hog Chaos injection stopped!" + types.SetResultAfterCompletion(resultDetails, "Stopped", "Stopped", failStep) + result.ChaosResult(chaosDetails, clients, resultDetails, "EOT") - if experimentsDetails.EngineName != "" { - msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + pod.Name + " pod" - types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) + // generating summary event in chaosengine + msg := experimentsDetails.ExperimentName + " experiment has been aborted" + types.SetEngineEventAttributes(eventsDetails, types.StoppedVerdict, msg, "Warning", chaosDetails) events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") + + // generating summary event in chaosresult + types.SetResultEventAttributes(eventsDetails, types.Summary, msg, "Warning", resultDetails) + events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosResult") + os.Exit(1) + case <-endTime: + log.Infof("[Chaos]: Time is up for experiment: %v", experimentsDetails.ExperimentName) + endTime = nil + break loop } - // creating err channel to recieve the error from the go routine - stressErr := make(chan error) - go StressMemory(strconv.Itoa(experimentsDetails.MemoryConsumption), container.Name, pod.Name, experimentsDetails.AppNS, clients, stressErr) - - log.Infof("[Chaos]:Waiting for: %vs", experimentsDetails.ChaosDuration) - - // signChan channel is used to transmit signal notifications. - signChan := make(chan os.Signal, 1) - // Catch and relay certain signal(s) to signChan channel. - signal.Notify(signChan, os.Interrupt, syscall.SIGTERM, syscall.SIGKILL) - loop: - for { - endTime = time.After(timeDelay) - select { - case err := <-stressErr: - // skipping the execution, if recieved any error other than 137, while executing stress command and marked result as fail - // it will ignore the error code 137(oom kill), it will skip further execution and marked the result as pass - // oom kill occurs if memory to be stressed exceed than the resource limit for the target container - if err != nil { - if strings.Contains(err.Error(), "137") { - return nil - } - return err - } - case <-signChan: - log.Info("[Chaos]: Killing process started because of terminated signal received") - err = KillStressMemory(container.Name, pod.Name, experimentsDetails.AppNS, experimentsDetails.ChaosKillCmd, clients) - if err != nil { - klog.V(0).Infof("Error in Kill stress after") - return err - } - // updating the chaosresult after stopped - failStep := "Memory hog Chaos injection stopped!" - types.SetResultAfterCompletion(resultDetails, "Stopped", "Stopped", failStep) - result.ChaosResult(chaosDetails, clients, resultDetails, "EOT") - - // generating summary event in chaosengine - msg := experimentsDetails.ExperimentName + " experiment has been aborted" - types.SetEngineEventAttributes(eventsDetails, types.StoppedVerdict, msg, "Warning", chaosDetails) - events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") - - // generating summary event in chaosresult - types.SetResultEventAttributes(eventsDetails, types.Summary, msg, "Warning", resultDetails) - events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosResult") - os.Exit(1) - case <-endTime: - log.Infof("[Chaos]: Time is up for experiment: %v", experimentsDetails.ExperimentName) - break loop + } + if err = KillStressMemorySerial(experimentsDetails.TargetContainer, pod.Name, experimentsDetails.AppNS, experimentsDetails.ChaosKillCmd, clients); err != nil { + return err + } + } + return nil +} + +// InjectChaosInParallelMode stressed the memory of all target application in parallel mode (all at once) +func InjectChaosInParallelMode(experimentsDetails *experimentTypes.ExperimentDetails, targetPodList corev1.PodList, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { + // creating err channel to recieve the error from the go routine + stressErr := make(chan error) + + var endTime <-chan time.Time + timeDelay := time.Duration(experimentsDetails.ChaosDuration) * time.Second + + for _, pod := range targetPodList.Items { + + if experimentsDetails.EngineName != "" { + msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + pod.Name + " pod" + types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) + events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") + } + + log.InfoWithValues("[Chaos]: The Target application details", logrus.Fields{ + "container": experimentsDetails.TargetContainer, + "Pod": pod.Name, + "Memory Consumption(MB)": experimentsDetails.MemoryConsumption, + }) + + go StressMemory(strconv.Itoa(experimentsDetails.MemoryConsumption), experimentsDetails.TargetContainer, pod.Name, experimentsDetails.AppNS, clients, stressErr) + } + + log.Infof("[Chaos]:Waiting for: %vs", experimentsDetails.ChaosDuration) + + // signChan channel is used to transmit signal notifications. + signChan := make(chan os.Signal, 1) + // Catch and relay certain signal(s) to signChan channel. + signal.Notify(signChan, os.Interrupt, syscall.SIGTERM, syscall.SIGKILL) +loop: + for { + endTime = time.After(timeDelay) + select { + case err := <-stressErr: + // skipping the execution, if recieved any error other than 137, while executing stress command and marked result as fail + // it will ignore the error code 137(oom kill), it will skip further execution and marked the result as pass + // oom kill occurs if memory to be stressed exceed than the resource limit for the target container + if err != nil { + if strings.Contains(err.Error(), "137") { + return nil } + return err } - if err = KillStressMemory(container.Name, pod.Name, experimentsDetails.AppNS, experimentsDetails.ChaosKillCmd, clients); err != nil { + case <-signChan: + log.Info("[Chaos]: Killing process started because of terminated signal received") + err = KillStressMemoryParallel(experimentsDetails.TargetContainer, targetPodList, experimentsDetails.AppNS, experimentsDetails.ChaosKillCmd, clients) + if err != nil { + klog.V(0).Infof("Error in Kill stress after abortion") return err } + // updating the chaosresult after stopped + failStep := "Memory hog Chaos injection stopped!" + types.SetResultAfterCompletion(resultDetails, "Stopped", "Stopped", failStep) + result.ChaosResult(chaosDetails, clients, resultDetails, "EOT") + + // generating summary event in chaosengine + msg := experimentsDetails.ExperimentName + " experiment has been aborted" + types.SetEngineEventAttributes(eventsDetails, types.StoppedVerdict, msg, "Warning", chaosDetails) + events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") + + // generating summary event in chaosresult + types.SetResultEventAttributes(eventsDetails, types.Summary, msg, "Warning", resultDetails) + events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosResult") + os.Exit(1) + case <-endTime: + log.Infof("[Chaos]: Time is up for experiment: %v", experimentsDetails.ExperimentName) + break loop } } + if err = KillStressMemoryParallel(experimentsDetails.TargetContainer, targetPodList, experimentsDetails.AppNS, experimentsDetails.ChaosKillCmd, clients); err != nil { + return err + } return nil } @@ -137,7 +239,7 @@ func PrepareMemoryStress(experimentsDetails *experimentTypes.ExperimentDetails, //Waiting for the ramp time before chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } //Starting the Memory stress experiment @@ -147,14 +249,26 @@ func PrepareMemoryStress(experimentsDetails *experimentTypes.ExperimentDetails, } //Waiting for the ramp time after chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } return nil } -//KillStressMemory function to kill the experiment. Triggered by either timeout of chaos duration or termination of the experiment -func KillStressMemory(containerName, podName, namespace, memFreeCmd string, clients clients.ClientSets) error { +//GetTargetContainer will fetch the container name from application pod +// It will return the first container name from the application pod +func GetTargetContainer(experimentsDetails *experimentTypes.ExperimentDetails, appName string, clients clients.ClientSets) (string, error) { + pod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.AppNS).Get(appName, v1.GetOptions{}) + if err != nil { + return "", err + } + + return pod.Spec.Containers[0].Name, nil +} + +// KillStressMemorySerial function to kill a stress process running inside target container +// Triggered by either timeout of chaos duration or termination of the experiment +func KillStressMemorySerial(containerName, podName, namespace, memFreeCmd string, clients clients.ClientSets) error { // It will contains all the pod & container details required for exec command execCommandDetails := litmusexec.PodDetails{} @@ -163,7 +277,20 @@ func KillStressMemory(containerName, podName, namespace, memFreeCmd string, clie litmusexec.SetExecCommandAttributes(&execCommandDetails, podName, containerName, namespace) _, err := litmusexec.Exec(&execCommandDetails, clients, command) if err != nil { - return errors.Errorf("Unable to kill stress process inside target container, due to err: %v", err) + return errors.Errorf("Unable to kill stress process inside target container, err: %v", err) + } + return nil +} + +// KillStressMemoryParallel function to kill all the stress process running inside target container +// Triggered by either timeout of chaos duration or termination of the experiment +func KillStressMemoryParallel(containerName string, targetPodList corev1.PodList, namespace, memFreeCmd string, clients clients.ClientSets) error { + + for _, pod := range targetPodList.Items { + + if err := KillStressMemorySerial(containerName, pod.Name, namespace, memFreeCmd, clients); err != nil { + return err + } } return nil } diff --git a/chaoslib/powerfulseal/pod-delete/lib/pod-delete.go b/chaoslib/powerfulseal/pod-delete/lib/pod-delete.go index b7db2e69b..a7702270b 100644 --- a/chaoslib/powerfulseal/pod-delete/lib/pod-delete.go +++ b/chaoslib/powerfulseal/pod-delete/lib/pod-delete.go @@ -23,7 +23,7 @@ func PreparePodDelete(experimentsDetails *experimentTypes.ExperimentDetails, cli //Waiting for the ramp time before chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } @@ -65,7 +65,7 @@ func PreparePodDelete(experimentsDetails *experimentTypes.ExperimentDetails, cli } // Wait for Chaos Duration - log.Infof("[Wait]: Waiting for the %vs chaos duration", strconv.Itoa(experimentsDetails.ChaosDuration)) + log.Infof("[Wait]: Waiting for the %vs chaos duration", experimentsDetails.ChaosDuration) common.WaitForDuration(experimentsDetails.ChaosDuration) //Deleting the powerfulseal deployment @@ -84,7 +84,7 @@ func PreparePodDelete(experimentsDetails *experimentTypes.ExperimentDetails, cli //Waiting for the ramp time after chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } return nil diff --git a/chaoslib/pumba/container-kill/lib/container-kill.go b/chaoslib/pumba/container-kill/lib/container-kill.go index e7da48294..6580f7571 100644 --- a/chaoslib/pumba/container-kill/lib/container-kill.go +++ b/chaoslib/pumba/container-kill/lib/container-kill.go @@ -24,12 +24,12 @@ func PrepareContainerKill(experimentsDetails *experimentTypes.ExperimentDetails, // if the target pod is not defined it will derive the random target pod list using pod affected percentage targetPodList, err := common.GetPodList(experimentsDetails.AppNS, experimentsDetails.TargetPod, experimentsDetails.AppLabel, experimentsDetails.PodsAffectedPerc, clients) if err != nil { - return errors.Errorf("Unable to get the target pod list due to, err: %v", err) + return errors.Errorf("Unable to get the target pod list, err: %v", err) } //Waiting for the ramp time before chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } @@ -37,14 +37,14 @@ func PrepareContainerKill(experimentsDetails *experimentTypes.ExperimentDetails, if experimentsDetails.TargetContainer == "" { experimentsDetails.TargetContainer, err = GetTargetContainer(experimentsDetails, targetPodList.Items[0].Name, clients) if err != nil { - return errors.Errorf("Unable to get the target container name due to, err: %v", err) + return errors.Errorf("Unable to get the target container name, err: %v", err) } } // Get Chaos Pod Annotation experimentsDetails.Annotations, err = common.GetChaosPodAnnotation(experimentsDetails.ChaosPodName, experimentsDetails.ChaosNamespace, clients) if err != nil { - return errors.Errorf("unable to get annotation, due to %v", err) + return errors.Errorf("unable to get annotations, err: %v", err) } if experimentsDetails.EngineName != "" { @@ -99,7 +99,7 @@ func PrepareContainerKill(experimentsDetails *experimentTypes.ExperimentDetails, //Waiting for the ramp time after chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } return nil @@ -110,7 +110,7 @@ func PrepareContainerKill(experimentsDetails *experimentTypes.ExperimentDetails, func GetTargetContainer(experimentsDetails *experimentTypes.ExperimentDetails, appName string, clients clients.ClientSets) (string, error) { pod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.AppNS).Get(appName, v1.GetOptions{}) if err != nil { - return "", errors.Wrapf(err, "Fail to get the application pod status, due to:%v", err) + return "", err } return pod.Spec.Containers[0].Name, nil @@ -142,7 +142,7 @@ func VerifyRestartCount(experimentsDetails *experimentTypes.ExperimentDetails, p for index := range podList.Items { pod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.AppNS).Get(podList.Items[index].Name, v1.GetOptions{}) if err != nil { - return errors.Errorf("Unable to get the application pod, err: %v", err) + return err } for _, container := range pod.Status.ContainerStatuses { if container.Name == experimentsDetails.TargetContainer { diff --git a/chaoslib/pumba/cpu-chaos/lib/cpu-chaos.go b/chaoslib/pumba/cpu-chaos/lib/cpu-chaos.go index 9a6aa45dd..ad477de92 100644 --- a/chaoslib/pumba/cpu-chaos/lib/cpu-chaos.go +++ b/chaoslib/pumba/cpu-chaos/lib/cpu-chaos.go @@ -23,12 +23,12 @@ func PreparePodCPUHog(experimentsDetails *experimentTypes.ExperimentDetails, cli targetPodList, err := common.GetPodList(experimentsDetails.AppNS, experimentsDetails.TargetPod, experimentsDetails.AppLabel, experimentsDetails.PodsAffectedPerc, clients) if err != nil { - return errors.Errorf("Unable to get the target pod list due to, err: %v", err) + return errors.Errorf("Unable to get the target pod list, err: %v", err) } //Waiting for the ramp time before chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } @@ -41,7 +41,7 @@ func PreparePodCPUHog(experimentsDetails *experimentTypes.ExperimentDetails, cli // Get Chaos Pod Annotation experimentsDetails.Annotations, err = common.GetChaosPodAnnotation(experimentsDetails.ChaosPodName, experimentsDetails.ChaosNamespace, clients) if err != nil { - return errors.Errorf("unable to get annotation, due to %v", err) + return errors.Errorf("unable to get annotations, err: %v", err) } // creating the helper pod to perform cpu chaos @@ -84,7 +84,7 @@ func PreparePodCPUHog(experimentsDetails *experimentTypes.ExperimentDetails, cli //Waiting for the ramp time after chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } return nil @@ -114,7 +114,7 @@ func CreateHelperPod(experimentsDetails *experimentTypes.ExperimentDetails, clie RestartPolicy: apiv1.RestartPolicyNever, NodeName: appNodeName, Volumes: []apiv1.Volume{ - apiv1.Volume{ + { Name: "dockersocket", VolumeSource: apiv1.VolumeSource{ HostPath: &apiv1.HostPathVolumeSource{ @@ -124,12 +124,12 @@ func CreateHelperPod(experimentsDetails *experimentTypes.ExperimentDetails, clie }, }, Containers: []apiv1.Container{ - apiv1.Container{ + { Name: "pumba-stress", Image: experimentsDetails.LIBImage, Args: GetContainerArguments(experimentsDetails, appName), VolumeMounts: []apiv1.VolumeMount{ - apiv1.VolumeMount{ + { Name: "dockersocket", MountPath: "/var/run/docker.sock", }, diff --git a/chaoslib/pumba/memory-chaos/lib/memory-chaos.go b/chaoslib/pumba/memory-chaos/lib/memory-chaos.go index 5b64427f4..e58dffca7 100644 --- a/chaoslib/pumba/memory-chaos/lib/memory-chaos.go +++ b/chaoslib/pumba/memory-chaos/lib/memory-chaos.go @@ -23,12 +23,12 @@ func PreparePodMemoryHog(experimentsDetails *experimentTypes.ExperimentDetails, targetPodList, err := common.GetPodList(experimentsDetails.AppNS, experimentsDetails.TargetPod, experimentsDetails.AppLabel, experimentsDetails.PodsAffectedPerc, clients) if err != nil { - return errors.Errorf("Unable to get the target pod list due to, err: %v", err) + return errors.Errorf("Unable to get the target pod list, err: %v", err) } //Waiting for the ramp time before chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } @@ -41,7 +41,7 @@ func PreparePodMemoryHog(experimentsDetails *experimentTypes.ExperimentDetails, // Get Chaos Pod Annotation experimentsDetails.Annotations, err = common.GetChaosPodAnnotation(experimentsDetails.ChaosPodName, experimentsDetails.ChaosNamespace, clients) if err != nil { - return errors.Errorf("unable to get annotation, due to %v", err) + return errors.Errorf("unable to get annotations, err: %v", err) } // creating the helper pod to perform memory chaos @@ -84,7 +84,7 @@ func PreparePodMemoryHog(experimentsDetails *experimentTypes.ExperimentDetails, //Waiting for the ramp time after chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } return nil @@ -114,7 +114,7 @@ func CreateHelperPod(experimentsDetails *experimentTypes.ExperimentDetails, clie RestartPolicy: apiv1.RestartPolicyNever, NodeName: appNodeName, Volumes: []apiv1.Volume{ - apiv1.Volume{ + { Name: "dockersocket", VolumeSource: apiv1.VolumeSource{ HostPath: &apiv1.HostPathVolumeSource{ @@ -124,12 +124,12 @@ func CreateHelperPod(experimentsDetails *experimentTypes.ExperimentDetails, clie }, }, Containers: []apiv1.Container{ - apiv1.Container{ + { Name: "pumba-stress", Image: experimentsDetails.LIBImage, Args: GetContainerArguments(experimentsDetails, appName), VolumeMounts: []apiv1.VolumeMount{ - apiv1.VolumeMount{ + { Name: "dockersocket", MountPath: "/var/run/docker.sock", }, diff --git a/chaoslib/pumba/network-chaos/lib/network-chaos.go b/chaoslib/pumba/network-chaos/lib/network-chaos.go index 976caeecd..f2b1f7215 100644 --- a/chaoslib/pumba/network-chaos/lib/network-chaos.go +++ b/chaoslib/pumba/network-chaos/lib/network-chaos.go @@ -2,7 +2,6 @@ package lib import ( "net" - "strconv" "strings" clients "github.com/litmuschaos/litmus-go/pkg/clients" @@ -27,19 +26,19 @@ func PrepareAndInjectChaos(experimentsDetails *experimentTypes.ExperimentDetails // if the target pod is not defined it will derive the random target pod list using pod affected percentage targetPodList, err := common.GetPodList(experimentsDetails.AppNS, experimentsDetails.TargetPod, experimentsDetails.AppLabel, experimentsDetails.PodsAffectedPerc, clients) if err != nil { - return errors.Errorf("Unable to get the target pod list due to, err: %v", err) + return errors.Errorf("Unable to get the target pod list, err: %v", err) } //Waiting for the ramp time before chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } // Get Chaos Pod Annotation experimentsDetails.Annotations, err = common.GetChaosPodAnnotation(experimentsDetails.ChaosPodName, experimentsDetails.ChaosNamespace, clients) if err != nil { - return errors.Errorf("unable to get annotation, due to %v", err) + return errors.Errorf("unable to get annotations, err: %v", err) } if experimentsDetails.EngineName != "" { @@ -91,7 +90,7 @@ func PrepareAndInjectChaos(experimentsDetails *experimentTypes.ExperimentDetails //Waiting for the ramp time after chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } return nil diff --git a/chaoslib/pumba/pod-io-stress/lib/pod-io-stress.go b/chaoslib/pumba/pod-io-stress/lib/pod-io-stress.go index 51975d88a..3d95d9f1d 100644 --- a/chaoslib/pumba/pod-io-stress/lib/pod-io-stress.go +++ b/chaoslib/pumba/pod-io-stress/lib/pod-io-stress.go @@ -23,12 +23,12 @@ func PreparePodIOStress(experimentsDetails *experimentTypes.ExperimentDetails, c targetPodList, err := common.GetPodList(experimentsDetails.AppNS, experimentsDetails.TargetPod, experimentsDetails.AppLabel, experimentsDetails.PodsAffectedPerc, clients) if err != nil { - return errors.Errorf("Unable to get the target pod list due to, err: %v", err) + return errors.Errorf("Unable to get the target pod list, err: %v", err) } //Waiting for the ramp time before chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } @@ -41,7 +41,7 @@ func PreparePodIOStress(experimentsDetails *experimentTypes.ExperimentDetails, c // Get Chaos Pod Annotation experimentsDetails.Annotations, err = common.GetChaosPodAnnotation(experimentsDetails.ChaosPodName, experimentsDetails.ChaosNamespace, clients) if err != nil { - return errors.Errorf("unable to get annotation, due to %v", err) + return errors.Errorf("unable to get annotations, err: %v", err) } // creating the helper pod to perform network chaos @@ -85,7 +85,7 @@ func PreparePodIOStress(experimentsDetails *experimentTypes.ExperimentDetails, c //Waiting for the ramp time after chaos injection if experimentsDetails.RampTime != 0 { - log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", strconv.Itoa(experimentsDetails.RampTime)) + log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", experimentsDetails.RampTime) common.WaitForDuration(experimentsDetails.RampTime) } return nil @@ -115,7 +115,7 @@ func CreateHelperPod(experimentsDetails *experimentTypes.ExperimentDetails, clie RestartPolicy: apiv1.RestartPolicyNever, NodeName: appNodeName, Volumes: []apiv1.Volume{ - apiv1.Volume{ + { Name: "dockersocket", VolumeSource: apiv1.VolumeSource{ HostPath: &apiv1.HostPathVolumeSource{ @@ -125,12 +125,12 @@ func CreateHelperPod(experimentsDetails *experimentTypes.ExperimentDetails, clie }, }, Containers: []apiv1.Container{ - apiv1.Container{ + { Name: "pumba-stress", Image: experimentsDetails.LIBImage, Args: GetContainerArguments(experimentsDetails, appName), VolumeMounts: []apiv1.VolumeMount{ - apiv1.VolumeMount{ + { Name: "dockersocket", MountPath: "/var/run/docker.sock", }, diff --git a/contribute/developer-guide/README.md b/contribute/developer-guide/README.md index f3b704b34..76621324b 100644 --- a/contribute/developer-guide/README.md +++ b/contribute/developer-guide/README.md @@ -1,6 +1,6 @@ ## Steps to Bootstrap a Chaos Experiment -The artefacts associated with a chaos-experiment are summarized below: +The artifacts associated with a chaos-experiment are summarized below: - Submitted in the litmuschaos/litmus-go repository, under the experiments/*chaos-category*/*experiment-name* folder @@ -19,7 +19,7 @@ The artefacts associated with a chaos-experiment are summarized below: Example: [pod delete experiment in chaos-charts](https://github.com/litmuschaos/chaos-charts/tree/master/charts/generic/pod-delete) -The *generate_experiment.go* script is a simple way to bootstrap your experiment, and helps create the aforementioned artefacts in the +The *generate_experiment.go* script is a simple way to bootstrap your experiment, and helps create the aforementioned artifacts in the appropriate directory (i.e., as per the chaos-category) based on an attributes file provided as input by the chart-developer. The scaffolded files consist of placeholders which can then be filled as desired. @@ -88,7 +88,7 @@ scaffolded files consist of placeholders which can then be filled as desired. ``` -- Run the following command to generate the necessary artefacts for submitting the `sample-category` chaos chart with +- Run the following command to generate the necessary artifacts for submitting the `sample-category` chaos chart with `pod-delete` experiment. ``` @@ -97,7 +97,7 @@ scaffolded files consist of placeholders which can then be filled as desired. **Note**: In the `-generateType` attribute, select the appropriate type of manifests to be generated, where, - `chart`: Just the chaos-chart metadata, i.e., chartserviceversion yaml - - `experiment`: Chaos experiment artefacts belonging to a an existing OR new chart. + - `experiment`: Chaos experiment artifacts belonging to a an existing OR new chart. View the generated files in `/experiments/chaos-category` folder. diff --git a/experiments/cassandra/pod-delete/pod-delete.go b/experiments/cassandra/pod-delete/pod-delete.go index 04003ea77..6572fbd57 100644 --- a/experiments/cassandra/pod-delete/pod-delete.go +++ b/experiments/cassandra/pod-delete/pod-delete.go @@ -34,7 +34,7 @@ func main() { //Getting kubeConfig and Generate ClientSets if err := clients.GenerateClientSetFromKubeConfig(); err != nil { - log.Fatalf("Unable to Get the kubeconfig due to %v", err) + log.Fatalf("Unable to Get the kubeconfig, err: %v", err) } //Fetching all the ENV passed from the runner pod @@ -51,7 +51,7 @@ func main() { log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ChaoslibDetail.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT") if err != nil { - log.Errorf("Unable to Create the Chaos Result due to %v", err) + log.Errorf("Unable to Create the Chaos Result, err: %v", err) failStep := "Updating the chaos result of pod-delete experiment (SOT)" types.SetResultAfterCompletion(&resultDetails, "Fail", "Completed", failStep) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") @@ -61,6 +61,11 @@ func main() { // Set the chaos result uid result.SetResultUID(&resultDetails, clients, &chaosDetails) + // generating the event in chaosresult to marked the verdict as awaited + msg := "experiment: " + experimentsDetails.ChaoslibDetail.ExperimentName + ", Result: Awaited" + types.SetResultEventAttributes(&eventsDetails, types.AwaitedVerdict, msg, "Normal", &resultDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosResult") + //DISPLAY THE APP INFORMATION log.InfoWithValues("The application informations are as follows", logrus.Fields{ "Namespace": experimentsDetails.ChaoslibDetail.AppNS, @@ -75,7 +80,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") err = status.CheckApplicationStatus(experimentsDetails.ChaoslibDetail.AppNS, experimentsDetails.ChaoslibDetail.AppLabel, experimentsDetails.ChaoslibDetail.Timeout, experimentsDetails.ChaoslibDetail.Delay, clients) if err != nil { - log.Errorf("Application status check failed due to %v\n", err) + log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" types.SetResultAfterCompletion(&resultDetails, "Fail", "Completed", failStep) result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") @@ -90,16 +95,16 @@ func main() { log.Info("[Status]: Checking the load distribution on the ring (pre-chaos)") err = cassandra.NodeToolStatusCheck(&experimentsDetails, clients) if err != nil { - log.Fatalf("[Status]: Chaos node tool status check is failed due to %v\n", err) + log.Fatalf("[Status]: Chaos node tool status check is failed, err: %v", err) } // Cassandra liveness check if experimentsDetails.CassandraLivenessCheck == "enabled" { ResourceVersionBefore, err = cassandra.LivenessCheck(&experimentsDetails, clients) if err != nil { - log.Fatalf("[Liveness]: Cassandra liveness check failed, due to %v\n", err) + log.Fatalf("[Liveness]: Cassandra liveness check failed, err: %v", err) } - log.Info("[Confirmation]: The cassandra application liveness pod deployed successfully") + log.Info("[Confirmation]: The cassandra application liveness pod created successfully") } else { log.Warn("[Liveness]: Cassandra Liveness check skipped as it was not enabled") } @@ -108,8 +113,8 @@ func main() { if experimentsDetails.ChaoslibDetail.ChaosLib == "litmus" { err = litmusLIB.PreparePodDelete(experimentsDetails.ChaoslibDetail, clients, &resultDetails, &eventsDetails, &chaosDetails) if err != nil { - log.Errorf("Chaos injection failed due to %v\n", err) - failStep := "Including the litmus lib for cassandra-pod-delete" + log.Errorf("Chaos injection failed, err: %v", err) + failStep := "failed in chaos injection phase" types.SetResultAfterCompletion(&resultDetails, "Fail", "Completed", failStep) result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") return @@ -118,7 +123,7 @@ func main() { resultDetails.Verdict = "Pass" } else { log.Error("[Invalid]: Please Provide the correct LIB") - failStep := "Including the litmus lib for cassandra-pod-delete" + failStep := "no match found for specified lib" types.SetResultAfterCompletion(&resultDetails, "Fail", "Completed", failStep) result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") return @@ -128,7 +133,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") err = status.CheckApplicationStatus(experimentsDetails.ChaoslibDetail.AppNS, experimentsDetails.ChaoslibDetail.AppLabel, experimentsDetails.ChaoslibDetail.Timeout, experimentsDetails.ChaoslibDetail.Delay, clients) if err != nil { - log.Errorf("Application status check failed due to %v\n", err) + log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" types.SetResultAfterCompletion(&resultDetails, "Fail", "Completed", failStep) result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") @@ -143,7 +148,7 @@ func main() { log.Info("[Status]: Checking the load distribution on the ring (post-chaos)") err = cassandra.NodeToolStatusCheck(&experimentsDetails, clients) if err != nil { - log.Fatalf("[Status]: Chaos node tool status check is failed due to %v\n", err) + log.Fatalf("[Status]: Chaos node tool status check is failed, err: %v", err) } // Cassandra statefulset liveness check (post-chaos) @@ -152,26 +157,36 @@ func main() { if experimentsDetails.CassandraLivenessCheck == "enabled" { err = status.CheckApplicationStatus(experimentsDetails.ChaoslibDetail.AppNS, "name=cassandra-liveness-deploy-"+experimentsDetails.RunID, experimentsDetails.ChaoslibDetail.Timeout, experimentsDetails.ChaoslibDetail.Delay, clients) if err != nil { - log.Fatalf("Liveness status check failed due to %v\n", err) + log.Fatalf("Liveness status check failed, err: %v", err) } err = cassandra.LivenessCleanup(&experimentsDetails, clients, ResourceVersionBefore) if err != nil { - log.Fatalf("Liveness cleanup failed due to %v\n", err) + log.Fatalf("Liveness cleanup failed, err: %v", err) } } //Updating the chaosResult in the end of experiment log.Info("[The End]: Updating the chaos result of cassandra pod delete experiment (EOT)") err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") if err != nil { - log.Fatalf("Unable to Update the Chaos Result due to %v\n", err) + log.Fatalf("Unable to Update the Chaos Result, err: %v", err) + } + + // generating the event in chaosresult to marked the verdict as pass/fail + msg = "experiment: " + experimentsDetails.ChaoslibDetail.ExperimentName + ", Result: " + resultDetails.Verdict + reason := types.PassVerdict + eventType := "Normal" + if resultDetails.Verdict != "Pass" { + reason = types.FailVerdict + eventType = "Warning" } + + types.SetResultEventAttributes(&eventsDetails, reason, msg, eventType, &resultDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosResult") + if experimentsDetails.ChaoslibDetail.EngineName != "" { msg := experimentsDetails.ChaoslibDetail.ExperimentName + " experiment has been " + resultDetails.Verdict + "ed" types.SetEngineEventAttributes(&eventsDetails, types.Summary, msg, "Normal", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") } - msg := experimentsDetails.ChaoslibDetail.ExperimentName + " experiment has been " + resultDetails.Verdict + "ed" - types.SetResultEventAttributes(&eventsDetails, types.Summary, msg, "Normal", &resultDetails) - events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosResult") } diff --git a/experiments/generic/container-kill/container-kill.go b/experiments/generic/container-kill/container-kill.go index fd631c159..04af2392e 100644 --- a/experiments/generic/container-kill/container-kill.go +++ b/experiments/generic/container-kill/container-kill.go @@ -12,6 +12,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/result" "github.com/litmuschaos/litmus-go/pkg/status" "github.com/litmuschaos/litmus-go/pkg/types" + "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" ) @@ -35,7 +36,7 @@ func main() { //Getting kubeConfig and Generate ClientSets if err := clients.GenerateClientSetFromKubeConfig(); err != nil { - log.Fatalf("Unable to Get the kubeconfig due to %v", err) + log.Fatalf("Unable to Get the kubeconfig, err: %v", err) } //Fetching all the ENV passed from the runner pod @@ -55,7 +56,7 @@ func main() { log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT") if err != nil { - log.Errorf("Unable to Create the Chaos Result due to %v", err) + log.Errorf("Unable to Create the Chaos Result, err: %v", err) failStep := "Updating the chaos result of container-kill experiment (SOT)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -76,11 +77,15 @@ func main() { "Ramp Time": experimentsDetails.RampTime, }) + // Calling AbortWatcher go routine, it will continuously watch for the abort signal for the entire chaos duration and generate the required events and result + // It is being invoked here, as opposed to within the chaoslib, as these experiments do not need additional recovery/chaos revert steps like in case of network experiments + go common.AbortWatcher(experimentsDetails.ExperimentName, clients, &resultDetails, &chaosDetails, &eventsDetails) + //PRE-CHAOS APPLICATION STATUS CHECK log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Application status check failed due to %v\n", err) + log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -94,9 +99,8 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails) - events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") if err != nil { - log.Errorf("Probe failed, due to err: %v", err) + log.Errorf("Probe failed, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) @@ -115,16 +119,16 @@ func main() { if experimentsDetails.ChaosLib == "litmus" && (experimentsDetails.ContainerRuntime == "containerd" || experimentsDetails.ContainerRuntime == "crio") { err = litmusLIB.PrepareContainerKill(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) if err != nil { - failStep := "Executing litmus lib for the container-kill" + failStep := "failed in chaos injection phase" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) - log.Fatalf("Chaos injection failed due to %v\n", err) + log.Fatalf("Chaos injection failed, err: %v", err) } } else if experimentsDetails.ChaosLib == "litmus" && experimentsDetails.ContainerRuntime == "docker" { err = pumbaLIB.PrepareContainerKill(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) if err != nil { - failStep := "Executing pumba lib for the container-kill" + failStep := "failed in chaos injection phase" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) - log.Fatalf("Chaos injection failed due to %v\n", err) + log.Fatalf("Chaos injection failed, err: %v", err) } } else { failStep := "lib and container-runtime combination not supported!" @@ -139,7 +143,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Application status check failed due to %v\n", err) + log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -153,7 +157,7 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails) if err != nil { - log.Errorf("Unable to Add the probes, due to err: %v", err) + log.Errorf("Unable to Add the probes, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) @@ -173,7 +177,7 @@ func main() { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") if err != nil { - log.Fatalf("Unable to Update the Chaos Result due to %v\n", err) + log.Fatalf("Unable to Update the Chaos Result, err: %v", err) } // generating the event in chaosresult to marked the verdict as pass/fail @@ -187,6 +191,7 @@ func main() { types.SetResultEventAttributes(&eventsDetails, reason, msg, eventType, &resultDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosResult") + if experimentsDetails.EngineName != "" { msg := experimentsDetails.ExperimentName + " experiment has been " + resultDetails.Verdict + "ed" types.SetEngineEventAttributes(&eventsDetails, types.Summary, msg, "Normal", &chaosDetails) diff --git a/experiments/generic/disk-fill/disk-fill.go b/experiments/generic/disk-fill/disk-fill.go index 23c50d393..f405d3f0a 100644 --- a/experiments/generic/disk-fill/disk-fill.go +++ b/experiments/generic/disk-fill/disk-fill.go @@ -34,7 +34,7 @@ func main() { //Getting kubeConfig and Generate ClientSets if err := clients.GenerateClientSetFromKubeConfig(); err != nil { - log.Fatalf("Unable to Get the kubeconfig due to %v", err) + log.Fatalf("Unable to Get the kubeconfig, err: %v", err) } //Fetching all the ENV passed from the runner pod @@ -54,7 +54,7 @@ func main() { log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT") if err != nil { - log.Errorf("Unable to Create the Chaos Result due to %v", err) + log.Errorf("Unable to Create the Chaos Result, err: %v", err) failStep := "Updating the chaos result of disk-fill experiment (SOT)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -79,7 +79,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Application status check failed due to %v\n", err) + log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -90,7 +90,7 @@ func main() { log.Info("[Status]: Verify that the Auxiliary Applications are running (pre-chaos)") err = status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Auxiliary Application status check failed due to %v", err) + log.Errorf("Auxiliary Application status check failed, err: %v", err) failStep := "Verify that the Auxiliary Applications are running (pre-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -105,9 +105,8 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails) - events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") if err != nil { - log.Errorf("Probe failed, due to err: %v", err) + log.Errorf("Probe failed, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) @@ -126,8 +125,8 @@ func main() { if experimentsDetails.ChaosLib == "litmus" { err = litmusLIB.PrepareDiskFill(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) if err != nil { - log.Errorf("Chaos injection failed due to %v\n", err) - failStep := "Including the litmus lib for disk-fill" + log.Errorf("Chaos injection failed, err: %v", err) + failStep := "failed in chaos injection phase" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -135,7 +134,7 @@ func main() { resultDetails.Verdict = "Pass" } else { log.Error("[Invalid]: Please Provide the correct LIB") - failStep := "Including the litmus lib for disk-fill" + failStep := "no match found for specified lib" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -144,7 +143,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Application status check failed due to %v\n", err) + log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -155,7 +154,7 @@ func main() { log.Info("[Status]: Verify that the Auxiliary Applications are running (post-chaos)") err = status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Auxiliary Application status check failed due to %v", err) + log.Errorf("Auxiliary Application status check failed, err: %v", err) failStep := "Verify that the Auxiliary Applications are running (post-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -170,7 +169,7 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails) if err != nil { - log.Errorf("Unable to Add the probes, due to err: %v", err) + log.Errorf("Unable to Add the probes, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) @@ -190,7 +189,7 @@ func main() { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") if err != nil { - log.Fatalf("Unable to Update the Chaos Result due to %v\n", err) + log.Fatalf("Unable to Update the Chaos Result err: %v\n", err) } // generating the event in chaosresult to marked the verdict as pass/fail diff --git a/experiments/generic/kubelet-service-kill/kubelet-service-kill.go b/experiments/generic/kubelet-service-kill/kubelet-service-kill.go index d620868eb..b0c52bb9a 100644 --- a/experiments/generic/kubelet-service-kill/kubelet-service-kill.go +++ b/experiments/generic/kubelet-service-kill/kubelet-service-kill.go @@ -34,7 +34,7 @@ func main() { //Getting kubeConfig and Generate ClientSets if err := clients.GenerateClientSetFromKubeConfig(); err != nil { - log.Fatalf("Unable to Get the kubeconfig due to %v", err) + log.Fatalf("Unable to Get the kubeconfig, err: %v", err) } //Fetching all the ENV passed from the runner pod @@ -54,7 +54,7 @@ func main() { log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT") if err != nil { - log.Errorf("Unable to Create the Chaos Result due to %v", err) + log.Errorf("Unable to Create the Chaos Result, err: %v", err) failStep := "Updating the chaos result of kubelet-service-kill experiment (SOT)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -79,7 +79,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Application status check failed due to %v\n", err) + log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -90,7 +90,7 @@ func main() { log.Info("[Status]: Verify that the Auxiliary Applications are running (pre-chaos)") err = status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Auxiliary Application status check failed due to %v", err) + log.Errorf("Auxiliary Application status check failed, err: %v", err) failStep := "Verify that the Auxiliary Applications are running (pre-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -105,9 +105,8 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails) - events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") if err != nil { - log.Errorf("Probe failed, due to err: %v", err) + log.Errorf("Probe failed, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) @@ -126,8 +125,8 @@ func main() { if experimentsDetails.ChaosLib == "litmus" { err = litmusLIB.PrepareKubeletKill(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) if err != nil { - log.Errorf("Chaos injection failed due to %v\n", err) - failStep := "Including the litmus lib for kubelet-service-kill" + log.Errorf("Chaos injection failed, err: %v", err) + failStep := "failed in chaos injection phase" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -135,7 +134,7 @@ func main() { resultDetails.Verdict = "Pass" } else { log.Error("[Invalid]: Please Provide the correct LIB") - failStep := "Including the litmus lib for kubelet-service-kill" + failStep := "no match found for specified lib" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -144,7 +143,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Application status check failed due to %v\n", err) + log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -155,7 +154,7 @@ func main() { log.Info("[Status]: Verify that the Auxiliary Applications are running (post-chaos)") err = status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Auxiliary Application status check failed due to %v", err) + log.Errorf("Auxiliary Application status check failed, err: %v", err) failStep := "Verify that the Auxiliary Applications are running (post-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -170,7 +169,7 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails) if err != nil { - log.Errorf("Unable to Add the probes, due to err: %v", err) + log.Errorf("Unable to Add the probes, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) @@ -190,7 +189,7 @@ func main() { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") if err != nil { - log.Fatalf("Unable to Update the Chaos Result due to %v\n", err) + log.Fatalf("Unable to Update the Chaos Result, err: %v", err) } // generating the event in chaosresult to marked the verdict as pass/fail diff --git a/experiments/generic/network-latency/network-latency.go b/experiments/generic/network-latency/network-latency.go index f63b56f31..73ca39106 100644 --- a/experiments/generic/network-latency/network-latency.go +++ b/experiments/generic/network-latency/network-latency.go @@ -93,7 +93,6 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails) - events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") if err != nil { log.Errorf("Probe failed, due to err: %v", err) failStep := "Failed while adding probe" diff --git a/experiments/generic/node-cpu-hog/node-cpu-hog.go b/experiments/generic/node-cpu-hog/node-cpu-hog.go index 76208d634..097787e77 100644 --- a/experiments/generic/node-cpu-hog/node-cpu-hog.go +++ b/experiments/generic/node-cpu-hog/node-cpu-hog.go @@ -11,6 +11,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/result" "github.com/litmuschaos/litmus-go/pkg/status" "github.com/litmuschaos/litmus-go/pkg/types" + "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" "k8s.io/klog" ) @@ -35,7 +36,7 @@ func main() { //Getting kubeConfig and Generate ClientSets if err := clients.GenerateClientSetFromKubeConfig(); err != nil { - log.Fatalf("Unable to Get the kubeconfig due to %v", err) + log.Fatalf("Unable to Get the kubeconfig, err: %v", err) } //Fetching all the ENV passed from the runner pod @@ -55,7 +56,7 @@ func main() { log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT") if err != nil { - log.Errorf("Unable to Create the Chaos Result due to %v", err) + log.Errorf("Unable to Create the Chaos Result, err: %v", err) failStep := "Updating the chaos result of pod-cpu-hog experiment (SOT)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -77,11 +78,15 @@ func main() { "Ramp Time": experimentsDetails.RampTime, }) + // Calling AbortWatcher go routine, it will continuously watch for the abort signal for the entire chaos duration and generate the required events and result + // It is being invoked here, as opposed to within the chaoslib, as these experiments do not need additional recovery/chaos revert steps like in case of network experiments + go common.AbortWatcher(experimentsDetails.ExperimentName, clients, &resultDetails, &chaosDetails, &eventsDetails) + //PRE-CHAOS APPLICATION STATUS CHECK log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Application status check failed due to %v\n", err) + log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -92,7 +97,7 @@ func main() { log.Info("[Status]: Verify that the Auxiliary Applications are running (pre-chaos)") err = status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Auxiliary Application status check failed due to %v", err) + log.Errorf("Auxiliary Application status check failed, err: %v", err) failStep := "Verify that the Auxiliary Applications are running (pre-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -107,9 +112,8 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails) - events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") if err != nil { - log.Errorf("Probe failed, due to err: %v", err) + log.Errorf("Probe failed, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) @@ -128,8 +132,8 @@ func main() { if experimentsDetails.ChaosLib == "litmus" { err = litmusLIB.PrepareNodeCPUHog(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) if err != nil { - log.Errorf("[Error]: CPU hog failed due to %v\n", err) - failStep := "CPU hog Chaos injection failed" + log.Errorf("[Error]: CPU hog failed, err: %v", err) + failStep := "failed in chaos injection phase" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -137,7 +141,7 @@ func main() { resultDetails.Verdict = "Pass" } else { log.Error("[Invalid]: Please Provide the correct LIB") - failStep := "Including the litmus lib for node-cpu-hog" + failStep := "no match found for specified lib" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -146,7 +150,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - klog.V(0).Infof("Application status check failed due to %v\n", err) + klog.V(0).Infof("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -157,7 +161,7 @@ func main() { log.Info("[Status]: Verify that the Auxiliary Applications are running (post-chaos)") err = status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Auxiliary Application status check failed due to %v", err) + log.Errorf("Auxiliary Application status check failed, err: %v", err) failStep := "Verify that the Auxiliary Applications are running (post-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -172,7 +176,7 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails) if err != nil { - log.Errorf("Unable to Add the probes, due to err: %v", err) + log.Errorf("Unable to Add the probes, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) @@ -192,7 +196,7 @@ func main() { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") if err != nil { - log.Fatalf("Unable to Update the Chaos Result due to %v\n", err) + log.Fatalf("Unable to Update the Chaos Result, err: %v", err) } // generating the event in chaosresult to marked the verdict as pass/fail diff --git a/experiments/generic/node-drain/node-drain.go b/experiments/generic/node-drain/node-drain.go index 97315a3db..8daf82988 100644 --- a/experiments/generic/node-drain/node-drain.go +++ b/experiments/generic/node-drain/node-drain.go @@ -34,7 +34,7 @@ func main() { //Getting kubeConfig and Generate ClientSets if err := clients.GenerateClientSetFromKubeConfig(); err != nil { - log.Fatalf("Unable to Get the kubeconfig due to %v", err) + log.Fatalf("Unable to Get the kubeconfig, err: %v", err) } //Fetching all the ENV passed from the runner pod @@ -54,7 +54,7 @@ func main() { log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT") if err != nil { - log.Errorf("Unable to Create the Chaos Result due to %v", err) + log.Errorf("Unable to Create the Chaos Result, err: %v", err) failStep := "Updating the chaos result of node-drain experiment (SOT)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -80,7 +80,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Application status check failed due to %v\n", err) + log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -91,7 +91,7 @@ func main() { log.Info("[Status]: Verify that the Auxiliary Applications are running (pre-chaos)") err = status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Auxiliary Application status check failed due to %v", err) + log.Errorf("Auxiliary Application status check failed, err: %v", err) failStep := "Verify that the Auxiliary Applications are running (pre-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -106,9 +106,8 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails) - events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") if err != nil { - log.Errorf("Probe failed, due to err: %v", err) + log.Errorf("Probe failed, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) @@ -127,8 +126,8 @@ func main() { if experimentsDetails.ChaosLib == "litmus" { err = litmusLIB.PrepareNodeDrain(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) if err != nil { - log.Errorf("Chaos injection failed due to %v\n", err) - failStep := "Including the litmus lib for node-drain" + log.Errorf("Chaos injection failed, err: %v", err) + failStep := "failed in chaos injection phase" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -136,7 +135,7 @@ func main() { resultDetails.Verdict = "Pass" } else { log.Error("[Invalid]: Please Provide the correct LIB") - failStep := "Including the litmus lib for node-drain" + failStep := "no match found for specified lib" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -145,7 +144,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Application status check failed due to %v\n", err) + log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -156,7 +155,7 @@ func main() { log.Info("[Status]: Verify that the Auxiliary Applications are running (post-chaos)") err = status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Auxiliary Application status check failed due to %v", err) + log.Errorf("Auxiliary Application status check failed, err: %v", err) failStep := "Verify that the Auxiliary Applications are running (post-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -171,7 +170,7 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails) if err != nil { - log.Errorf("Unable to Add the probes, due to err: %v", err) + log.Errorf("Unable to Add the probes, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) @@ -191,7 +190,7 @@ func main() { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") if err != nil { - log.Fatalf("Unable to Update the Chaos Result due to %v\n", err) + log.Fatalf("Unable to Update the Chaos Result, err: %v", err) } // generating the event in chaosresult to marked the verdict as pass/fail diff --git a/experiments/generic/node-io-stress/node-io-stress.go b/experiments/generic/node-io-stress/node-io-stress.go index 0bb41d874..57453ab18 100644 --- a/experiments/generic/node-io-stress/node-io-stress.go +++ b/experiments/generic/node-io-stress/node-io-stress.go @@ -11,6 +11,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/result" "github.com/litmuschaos/litmus-go/pkg/status" "github.com/litmuschaos/litmus-go/pkg/types" + "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" "k8s.io/klog" ) @@ -35,7 +36,7 @@ func main() { //Getting kubeConfig and Generate ClientSets if err := clients.GenerateClientSetFromKubeConfig(); err != nil { - log.Fatalf("Unable to Get the kubeconfig due to %v", err) + log.Fatalf("Unable to Get the kubeconfig, err: %v", err) } //Fetching all the ENV passed from the runner pod @@ -55,7 +56,7 @@ func main() { log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT") if err != nil { - log.Errorf("Unable to Create the Chaos Result due to %v", err) + log.Errorf("Unable to Create the Chaos Result, err: %v", err) failStep := "Updating the chaos result of node-io-stress experiment (SOT)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -80,11 +81,15 @@ func main() { "FilesystemUtilizationBytes": experimentsDetails.FilesystemUtilizationBytes, }) + // Calling AbortWatcher go routine, it will continuously watch for the abort signal for the entire chaos duration and generate the required events and result + // It is being invoked here, as opposed to within the chaoslib, as these experiments do not need additional recovery/chaos revert steps like in case of network experiments + go common.AbortWatcher(experimentsDetails.ExperimentName, clients, &resultDetails, &chaosDetails, &eventsDetails) + //PRE-CHAOS APPLICATION STATUS CHECK log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Application status check failed due to %v\n", err) + log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -94,7 +99,7 @@ func main() { log.Info("[Status]: Verify that the Auxiliary Applications are running (pre-chaos)") err = status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Auxiliary Application status check failed due to %v", err) + log.Errorf("Auxiliary Application status check failed, err: %v", err) failStep := "Verify that the Auxiliary Applications are running (pre-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -109,9 +114,8 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails) - events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") if err != nil { - log.Errorf("Probe failed, due to err: %v", err) + log.Errorf("Probe failed, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) @@ -130,8 +134,8 @@ func main() { if experimentsDetails.ChaosLib == "litmus" { err = litmusLIB.PrepareNodeIOStress(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) if err != nil { - log.Errorf("[Error]: node io stress failed due to %v\n", err) - failStep := "node io stress chaos injection failed" + log.Errorf("[Error]: node io stress failed, err: %v", err) + failStep := "failed in chaos injection phase" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -139,7 +143,7 @@ func main() { resultDetails.Verdict = "Pass" } else { log.Error("[Invalid]: Please Provide the correct LIB") - failStep := "Including the litmus lib for node-io-stress" + failStep := "no match found for specified lib" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -148,7 +152,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - klog.V(0).Infof("Application status check failed due to %v\n", err) + klog.V(0).Infof("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -159,7 +163,7 @@ func main() { log.Info("[Status]: Verify that the Auxiliary Applications are running (post-chaos)") err = status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Auxiliary Application status check failed due to %v", err) + log.Errorf("Auxiliary Application status check failed, err: %v", err) failStep := "Verify that the Auxiliary Applications are running (post-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -174,7 +178,7 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails) if err != nil { - log.Errorf("Unable to Add the probes, due to err: %v", err) + log.Errorf("Unable to Add the probes, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) @@ -194,7 +198,7 @@ func main() { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") if err != nil { - log.Fatalf("Unable to Update the Chaos Result due to %v\n", err) + log.Fatalf("Unable to Update the Chaos Result, err: %v", err) } // generating the event in chaosresult to marked the verdict as pass/fail diff --git a/experiments/generic/node-memory-hog/node-memory-hog.go b/experiments/generic/node-memory-hog/node-memory-hog.go index 4e8af4239..2fca5649a 100644 --- a/experiments/generic/node-memory-hog/node-memory-hog.go +++ b/experiments/generic/node-memory-hog/node-memory-hog.go @@ -11,6 +11,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/result" "github.com/litmuschaos/litmus-go/pkg/status" "github.com/litmuschaos/litmus-go/pkg/types" + "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" "k8s.io/klog" ) @@ -35,7 +36,7 @@ func main() { //Getting kubeConfig and Generate ClientSets if err := clients.GenerateClientSetFromKubeConfig(); err != nil { - log.Fatalf("Unable to Get the kubeconfig due to %v", err) + log.Fatalf("Unable to Get the kubeconfig, err: %v", err) } //Fetching all the ENV passed from the runner pod @@ -55,7 +56,7 @@ func main() { log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT") if err != nil { - log.Errorf("Unable to Create the Chaos Result due to %v", err) + log.Errorf("Unable to Create the Chaos Result, err: %v", err) failStep := "Updating the chaos result of node-memory-hog experiment (SOT)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -78,11 +79,15 @@ func main() { "Memory Percentage": experimentsDetails.MemoryPercentage, }) + // Calling AbortWatcher go routine, it will continuously watch for the abort signal for the entire chaos duration and generate the required events and result + // It is being invoked here, as opposed to within the chaoslib, as these experiments do not need additional recovery/chaos revert steps like in case of network experiments + go common.AbortWatcher(experimentsDetails.ExperimentName, clients, &resultDetails, &chaosDetails, &eventsDetails) + //PRE-CHAOS APPLICATION STATUS CHECK log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Application status check failed due to %v\n", err) + log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -96,9 +101,8 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails) - events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") if err != nil { - log.Errorf("Probe failed, due to err: %v", err) + log.Errorf("Probe failed, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) @@ -117,8 +121,8 @@ func main() { if experimentsDetails.ChaosLib == "litmus" { err = litmusLIB.PrepareNodeMemoryHog(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) if err != nil { - log.Errorf("[Error]: node memory hog failed due to %v\n", err) - failStep := "node memory hog chaos injection failed" + log.Errorf("[Error]: node memory hog failed, err: %v", err) + failStep := "failed in chaos injection phase" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -126,7 +130,7 @@ func main() { resultDetails.Verdict = "Pass" } else { log.Error("[Invalid]: Please Provide the correct LIB") - failStep := "Including the litmus lib for node-memory-hog" + failStep := "no match found for specified lib" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -135,7 +139,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - klog.V(0).Infof("Application status check failed due to %v\n", err) + klog.V(0).Infof("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -149,7 +153,7 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails) if err != nil { - log.Errorf("Unable to Add the probes, due to err: %v", err) + log.Errorf("Unable to Add the probes, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) @@ -169,7 +173,7 @@ func main() { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") if err != nil { - log.Fatalf("Unable to Update the Chaos Result due to %v\n", err) + log.Fatalf("Unable to Update the Chaos Result, err: %v", err) } // generating the event in chaosresult to marked the verdict as pass/fail diff --git a/experiments/generic/node-taint/node-taint.go b/experiments/generic/node-taint/node-taint.go index 49a18e2a0..9372c1d55 100644 --- a/experiments/generic/node-taint/node-taint.go +++ b/experiments/generic/node-taint/node-taint.go @@ -34,7 +34,7 @@ func main() { //Getting kubeConfig and Generate ClientSets if err := clients.GenerateClientSetFromKubeConfig(); err != nil { - log.Fatalf("Unable to Get the kubeconfig due to %v", err) + log.Fatalf("Unable to Get the kubeconfig, err: %v", err) } //Fetching all the ENV passed from the runner pod @@ -54,7 +54,7 @@ func main() { log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT") if err != nil { - log.Errorf("Unable to Create the Chaos Result due to %v", err) + log.Errorf("Unable to Create the Chaos Result, err: %v", err) failStep := "Updating the chaos result of node-taint experiment (SOT)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -79,7 +79,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Application status check failed due to %v\n", err) + log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -90,7 +90,7 @@ func main() { log.Info("[Status]: Verify that the Auxiliary Applications are running (pre-chaos)") err = status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Auxiliary Application status check failed due to %v", err) + log.Errorf("Auxiliary Application status check failed, err: %v", err) failStep := "Verify that the Auxiliary Applications are running (pre-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -105,9 +105,8 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails) - events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") if err != nil { - log.Errorf("Probe failed, due to err: %v", err) + log.Errorf("Probe failed, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) @@ -126,8 +125,8 @@ func main() { if experimentsDetails.ChaosLib == "litmus" { err = litmusLIB.PrepareNodeTaint(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) if err != nil { - log.Errorf("Chaos injection failed due to %v\n", err) - failStep := "Including the litmus lib for node-taint" + log.Errorf("Chaos injection failed, err: %v", err) + failStep := "failed in chaos injection phase" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -135,7 +134,7 @@ func main() { resultDetails.Verdict = "Pass" } else { log.Error("[Invalid]: Please Provide the correct LIB") - failStep := "Including the litmus lib for node-taint" + failStep := "no match found for specified lib" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -144,7 +143,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Application status check failed due to %v\n", err) + log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -155,7 +154,7 @@ func main() { log.Info("[Status]: Verify that the Auxiliary Applications are running (post-chaos)") err = status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Auxiliary Application status check failed due to %v", err) + log.Errorf("Auxiliary Application status check failed, err: %v", err) failStep := "Verify that the Auxiliary Applications are running (post-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -170,7 +169,7 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails) if err != nil { - log.Errorf("Unable to Add the probes, due to err: %v", err) + log.Errorf("Unable to Add the probes, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) @@ -190,7 +189,7 @@ func main() { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") if err != nil { - log.Fatalf("Unable to Update the Chaos Result due to %v\n", err) + log.Fatalf("Unable to Update the Chaos Result, err: %v", err) } // generating the event in chaosresult to marked the verdict as pass/fail diff --git a/experiments/generic/pod-autoscaler/pod-autoscaler.go b/experiments/generic/pod-autoscaler/pod-autoscaler.go index 87f32c93b..a4430d940 100644 --- a/experiments/generic/pod-autoscaler/pod-autoscaler.go +++ b/experiments/generic/pod-autoscaler/pod-autoscaler.go @@ -33,7 +33,7 @@ func main() { //Getting kubeConfig and Generate ClientSets if err := clients.GenerateClientSetFromKubeConfig(); err != nil { - log.Fatalf("Unable to Get the kubeconfig due to %v", err) + log.Fatalf("Unable to Get the kubeconfig, err: %v", err) } //Fetching all the ENV passed from the runner pod @@ -50,7 +50,7 @@ func main() { log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT") if err != nil { - log.Errorf("Unable to Create the Chaos Result due to %v", err) + log.Errorf("Unable to Create the Chaos Result, err: %v", err) failStep := "Updating the chaos result of pod-delete experiment (SOT)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -75,7 +75,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Application status check failed due to %v\n", err) + log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" types.SetResultAfterCompletion(&resultDetails, "Fail", "Completed", failStep) result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") @@ -90,8 +90,8 @@ func main() { if experimentsDetails.ChaosLib == "litmus" { err = litmusLIB.PreparePodAutoscaler(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) if err != nil { - log.Errorf("Chaos injection failed due to %v\n", err) - failStep := "Including the litmus lib for pod-autoscaler" + log.Errorf("Chaos injection failed, err: %v", err) + failStep := "failed in chaos injection phase" types.SetResultAfterCompletion(&resultDetails, "Fail", "Completed", failStep) result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") return @@ -100,7 +100,7 @@ func main() { resultDetails.Verdict = "Pass" } else { log.Error("[Invalid]: Please Provide the correct LIB") - failStep := "Including the litmus lib for pod-autoscaler" + failStep := "no match found for specified lib" types.SetResultAfterCompletion(&resultDetails, "Fail", "Completed", failStep) result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") return @@ -110,7 +110,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Application status check failed due to %v\n", err) + log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" types.SetResultAfterCompletion(&resultDetails, "Fail", "Completed", failStep) result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") @@ -125,7 +125,7 @@ func main() { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") if err != nil { - log.Fatalf("Unable to Update the Chaos Result due to %v\n", err) + log.Fatalf("Unable to Update the Chaos Result, err: %v", err) } // generating the event in chaosresult to marked the verdict as pass/fail diff --git a/experiments/generic/pod-cpu-hog/pod-cpu-hog.go b/experiments/generic/pod-cpu-hog/pod-cpu-hog.go index 385077887..85171247f 100644 --- a/experiments/generic/pod-cpu-hog/pod-cpu-hog.go +++ b/experiments/generic/pod-cpu-hog/pod-cpu-hog.go @@ -12,6 +12,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/result" "github.com/litmuschaos/litmus-go/pkg/status" "github.com/litmuschaos/litmus-go/pkg/types" + "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" "k8s.io/klog" ) @@ -36,7 +37,7 @@ func main() { //Getting kubeConfig and Generate ClientSets if err := clients.GenerateClientSetFromKubeConfig(); err != nil { - log.Fatalf("Unable to Get the kubeconfig due to %v", err) + log.Fatalf("Unable to Get the kubeconfig, err: %v", err) } //Fetching all the ENV passed from the runner pod @@ -56,7 +57,7 @@ func main() { log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT") if err != nil { - log.Errorf("Unable to Create the Chaos Result due to %v", err) + log.Errorf("Unable to Create the Chaos Result, err: %v", err) failStep := "Updating the chaos result of pod-cpu-hog experiment (SOT)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -82,7 +83,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Application status check failed due to %v\n", err) + log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -96,9 +97,8 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails) - events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") if err != nil { - log.Errorf("Probe failed, due to err: %v", err) + log.Errorf("Probe failed, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) @@ -117,18 +117,21 @@ func main() { if experimentsDetails.ChaosLib == "litmus" { err = litmusLIB.PrepareCPUstress(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) if err != nil { - log.Errorf("[Error]: CPU hog failed due to %v\n", err) - failStep := "CPU hog Chaos injection failed" + log.Errorf("[Error]: CPU hog failed, err: %v", err) + failStep := "failed in chaos injection phase" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } log.Info("[Confirmation]: CPU of the application pod has been stressed successfully") resultDetails.Verdict = "Pass" } else if experimentsDetails.ChaosLib == "pumba" { + // Calling AbortWatcher go routine, it will continuously watch for the abort signal for the entire chaos duration and generate the required events and result + // It is being invoked here, as opposed to within the chaoslib, as these experiments do not need additional recovery/chaos revert steps like in case of network experiments + go common.AbortWatcher(experimentsDetails.ExperimentName, clients, &resultDetails, &chaosDetails, &eventsDetails) err = pumbaLIB.PreparePodCPUHog(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) if err != nil { - log.Errorf("[Error]: CPU hog failed due to %v\n", err) - failStep := "CPU hog Chaos injection failed" + log.Errorf("[Error]: CPU hog failed, err: %v", err) + failStep := "failed in chaos injection phase" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -137,7 +140,7 @@ func main() { } else { log.Error("[Invalid]: Please Provide the correct LIB") - failStep := "Including the litmus lib for pod-cpu-hog" + failStep := "no match found for specified lib" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -146,7 +149,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - klog.V(0).Infof("Application status check failed due to %v\n", err) + klog.V(0).Infof("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -160,7 +163,7 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails) if err != nil { - log.Errorf("Unable to Add the probes, due to err: %v", err) + log.Errorf("Unable to Add the probes, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) @@ -180,7 +183,7 @@ func main() { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") if err != nil { - log.Fatalf("Unable to Update the Chaos Result due to %v\n", err) + log.Fatalf("Unable to Update the Chaos Result, err: %v", err) } // generating the event in chaosresult to marked the verdict as pass/fail diff --git a/experiments/generic/pod-cpu-hog/test/test.yml b/experiments/generic/pod-cpu-hog/test/test.yml index aad3a6ec8..51e922276 100644 --- a/experiments/generic/pod-cpu-hog/test/test.yml +++ b/experiments/generic/pod-cpu-hog/test/test.yml @@ -49,6 +49,12 @@ spec: - name: TARGET_POD value: '' + - name: TARGET_CONTAINER + value: '' + + - name: SEQUENCE + value: 'parallel' + - name: CHAOS_NAMESPACE value: 'default' diff --git a/experiments/generic/pod-delete/pod-delete.go b/experiments/generic/pod-delete/pod-delete.go index f03011f79..4ab020134 100644 --- a/experiments/generic/pod-delete/pod-delete.go +++ b/experiments/generic/pod-delete/pod-delete.go @@ -12,6 +12,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/result" "github.com/litmuschaos/litmus-go/pkg/status" "github.com/litmuschaos/litmus-go/pkg/types" + "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" ) @@ -35,7 +36,7 @@ func main() { //Getting kubeConfig and Generate ClientSets if err := clients.GenerateClientSetFromKubeConfig(); err != nil { - log.Fatalf("Unable to Get the kubeconfig due to %v", err) + log.Fatalf("Unable to Get the kubeconfig, err: %v", err) } //Fetching all the ENV passed from the runner pod @@ -55,7 +56,7 @@ func main() { log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT") if err != nil { - log.Errorf("Unable to Create the Chaos Result due to %v", err) + log.Errorf("Unable to Create the Chaos Result, err: %v", err) failStep := "Updating the chaos result of pod-delete experiment (SOT)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -76,11 +77,15 @@ func main() { "Ramp Time": experimentsDetails.RampTime, }) + // Calling AbortWatcher go routine, it will continuously watch for the abort signal for the entire chaos duration and generate the required events and result + // It is being invoked here, as opposed to within the chaoslib, as these experiments do not need additional recovery/chaos revert steps like in case of network experiments + go common.AbortWatcher(experimentsDetails.ExperimentName, clients, &resultDetails, &chaosDetails, &eventsDetails) + //PRE-CHAOS APPLICATION STATUS CHECK log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Application status check failed due to %v\n", err) + log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -94,9 +99,8 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails) - events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") if err != nil { - log.Errorf("Probe failed, due to err: %v", err) + log.Errorf("Probe failed, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) @@ -115,8 +119,8 @@ func main() { if experimentsDetails.ChaosLib == "litmus" { err = litmusLIB.PreparePodDelete(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) if err != nil { - log.Errorf("Chaos injection failed due to %v\n", err) - failStep := "Including the litmus lib for pod-delete" + log.Errorf("Chaos injection failed, err: %v", err) + failStep := "failed in chaos injection phase" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -125,8 +129,8 @@ func main() { } else if experimentsDetails.ChaosLib == "powerfulseal" { err = powerfulseal.PreparePodDelete(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) if err != nil { - log.Errorf("Chaos injection failed due to %v\n", err) - failStep := "Including the powerfulseal lib for pod-delete" + log.Errorf("Chaos injection failed, err: %v", err) + failStep := "failed in chaos injection phase" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -134,7 +138,7 @@ func main() { resultDetails.Verdict = "Pass" } else { log.Error("[Invalid]: Please Provide the correct LIB") - failStep := "Including the litmus lib for pod-delete" + failStep := "no match found for specified lib" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -143,7 +147,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Application status check failed due to %v\n", err) + log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -157,7 +161,7 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails) if err != nil { - log.Errorf("Unable to Add the probes, due to err: %v", err) + log.Errorf("Unable to Add the probes, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) @@ -177,7 +181,7 @@ func main() { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") if err != nil { - log.Fatalf("Unable to Update the Chaos Result due to %v\n", err) + log.Fatalf("Unable to Update the Chaos Result, err: %v", err) } // generating the event in chaosresult to marked the verdict as pass/fail diff --git a/experiments/generic/pod-io-stress/pod-io-stress.go b/experiments/generic/pod-io-stress/pod-io-stress.go index bc46bd68a..f90f7b1ea 100644 --- a/experiments/generic/pod-io-stress/pod-io-stress.go +++ b/experiments/generic/pod-io-stress/pod-io-stress.go @@ -11,6 +11,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/result" "github.com/litmuschaos/litmus-go/pkg/status" "github.com/litmuschaos/litmus-go/pkg/types" + "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" "k8s.io/klog" ) @@ -35,7 +36,7 @@ func main() { //Getting kubeConfig and Generate ClientSets if err := clients.GenerateClientSetFromKubeConfig(); err != nil { - log.Fatalf("Unable to Get the kubeconfig due to %v", err) + log.Fatalf("Unable to Get the kubeconfig, err: %v", err) } //Fetching all the ENV passed from the runner pod @@ -55,7 +56,7 @@ func main() { log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT") if err != nil { - log.Errorf("Unable to Create the Chaos Result due to %v", err) + log.Errorf("Unable to Create the Chaos Result, err: %v", err) failStep := "Updating the chaos result of " + experimentsDetails.ExperimentName + " experiment (SOT)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -79,11 +80,15 @@ func main() { "NumberOfWorkers": experimentsDetails.NumberOfWorkers, }) + // Calling AbortWatcher go routine, it will continuously watch for the abort signal for the entire chaos duration and generate the required events and result + // It is being invoked here, as opposed to within the chaoslib, as these experiments do not need additional recovery/chaos revert steps like in case of network experiments + go common.AbortWatcher(experimentsDetails.ExperimentName, clients, &resultDetails, &chaosDetails, &eventsDetails) + //PRE-CHAOS APPLICATION STATUS CHECK log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Application status check failed due to %v\n", err) + log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -97,9 +102,8 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails) - events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") if err != nil { - log.Errorf("Probe failed, due to err: %v", err) + log.Errorf("Probe failed, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) @@ -118,8 +122,8 @@ func main() { if experimentsDetails.ChaosLib == "pumba" { err = pumbaLIB.PreparePodIOStress(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) if err != nil { - log.Errorf("[Error]: pod io stress chaos failed due to %v\n", err) - failStep := "pod io stress chaos injection failed" + log.Errorf("[Error]: pod io stress chaos failed, err: %v", err) + failStep := "failed in chaos injection phase" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -127,7 +131,7 @@ func main() { resultDetails.Verdict = "Pass" } else { log.Error("[Invalid]: Please Provide the correct LIB") - failStep := "Including the pumba lib for pod-io-stress" + failStep := "no match found for specified lib" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -136,7 +140,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - klog.V(0).Infof("Application status check failed due to %v\n", err) + klog.V(0).Infof("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -150,7 +154,7 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails) if err != nil { - log.Errorf("Unable to Add the probes, due to err: %v", err) + log.Errorf("Unable to Add the probes, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) @@ -170,7 +174,7 @@ func main() { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") if err != nil { - log.Fatalf("Unable to Update the Chaos Result due to %v\n", err) + log.Fatalf("Unable to Update the Chaos Result, err: %v", err) } // generating the event in chaosresult to marked the verdict as pass/fail diff --git a/experiments/generic/pod-memory-hog/pod-memory-hog.go b/experiments/generic/pod-memory-hog/pod-memory-hog.go index 30220d4fd..828980285 100644 --- a/experiments/generic/pod-memory-hog/pod-memory-hog.go +++ b/experiments/generic/pod-memory-hog/pod-memory-hog.go @@ -12,6 +12,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/result" "github.com/litmuschaos/litmus-go/pkg/status" "github.com/litmuschaos/litmus-go/pkg/types" + "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" "k8s.io/klog" ) @@ -36,7 +37,7 @@ func main() { //Getting kubeConfig and Generate ClientSets if err := clients.GenerateClientSetFromKubeConfig(); err != nil { - log.Fatalf("Unable to Get the kubeconfig due to %v", err) + log.Fatalf("Unable to Get the kubeconfig, err: %v", err) } //Fetching all the ENV passed from the runner pod @@ -56,7 +57,7 @@ func main() { log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT") if err != nil { - log.Errorf("Unable to Create the Chaos Result due to %v", err) + log.Errorf("Unable to Create the Chaos Result, err: %v", err) failStep := "Updating the chaos result of pod-memory-hog experiment (SOT)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -83,7 +84,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Application status check failed due to %v\n", err) + log.Errorf("Application status check failed,, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -97,9 +98,8 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails) - events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") if err != nil { - log.Errorf("Probe failed, due to err: %v", err) + log.Errorf("Probe failed, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) @@ -118,18 +118,21 @@ func main() { if experimentsDetails.ChaosLib == "litmus" { err = litmusLIB.PrepareMemoryStress(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) if err != nil { - log.Errorf("[Error]: pod memory hog failed due to %v\n", err) - failStep := "pod memory hog chaos injection failed" + log.Errorf("[Error]: pod memory hog failed,, err: %v", err) + failStep := "failed in chaos injection phase" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } log.Info("[Confirmation]: Memory of the application pod has been stressed successfully") resultDetails.Verdict = "Pass" } else if experimentsDetails.ChaosLib == "pumba" { + // Calling AbortWatcher go routine, it will continuously watch for the abort signal for the entire chaos duration and generate the required events and result + // It is being invoked here, as opposed to within the chaoslib, as these experiments do not need additional recovery/chaos revert steps like in case of network experiments + go common.AbortWatcher(experimentsDetails.ExperimentName, clients, &resultDetails, &chaosDetails, &eventsDetails) err = pumbaLIB.PreparePodMemoryHog(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) if err != nil { - log.Errorf("[Error]: Memory hog failed due to %v\n", err) - failStep := "Memory hog Chaos injection failed" + log.Errorf("[Error]: Memory hog failed, err: %v", err) + failStep := "failed in chaos injection phase" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -138,7 +141,7 @@ func main() { } else { log.Error("[Invalid]: Please Provide the correct LIB") - failStep := "Including the litmus lib for pod-memory-hog" + failStep := "no match found for specified lib" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -147,7 +150,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - klog.V(0).Infof("Application status check failed due to %v\n", err) + klog.V(0).Infof("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -161,7 +164,7 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails) if err != nil { - log.Errorf("Unable to Add the probes, due to err: %v", err) + log.Errorf("Unable to Add the probes, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) @@ -181,7 +184,7 @@ func main() { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") if err != nil { - log.Fatalf("Unable to Update the Chaos Result due to %v\n", err) + log.Fatalf("Unable to Update the Chaos Result, err: %v", err) } // generating the event in chaosresult to marked the verdict as pass/fail diff --git a/experiments/generic/pod-memory-hog/test/test.yml b/experiments/generic/pod-memory-hog/test/test.yml index 62a869d01..ac07e96c2 100644 --- a/experiments/generic/pod-memory-hog/test/test.yml +++ b/experiments/generic/pod-memory-hog/test/test.yml @@ -49,6 +49,12 @@ spec: - name: TARGET_POD value: '' + - name: TARGET_CONTAINER + value: '' + + - name: SEQUENCE + value: 'parallel' + - name: CHAOS_NAMESPACE value: 'default' diff --git a/experiments/generic/pod-network-corruption/pod-network-corruption.go b/experiments/generic/pod-network-corruption/pod-network-corruption.go index ca1de885b..4d0719086 100644 --- a/experiments/generic/pod-network-corruption/pod-network-corruption.go +++ b/experiments/generic/pod-network-corruption/pod-network-corruption.go @@ -12,6 +12,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/result" "github.com/litmuschaos/litmus-go/pkg/status" "github.com/litmuschaos/litmus-go/pkg/types" + "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" "k8s.io/klog" ) @@ -36,7 +37,7 @@ func main() { //Getting kubeConfig and Generate ClientSets if err := clients.GenerateClientSetFromKubeConfig(); err != nil { - log.Fatalf("Unable to Get the kubeconfig due to %v", err) + log.Fatalf("Unable to Get the kubeconfig, err: %v", err) } //Fetching all the ENV passed from the runner pod @@ -56,7 +57,7 @@ func main() { log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT") if err != nil { - log.Errorf("Unable to Create the Chaos Result due to %v", err) + log.Errorf("Unable to Create the Chaos Result, err: %v", err) failStep := "Updating the chaos result of pod-network-corruption experiment (SOT)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -81,7 +82,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Application status check failed due to %v\n", err) + log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -95,9 +96,8 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails) - events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") if err != nil { - log.Errorf("Probe failed, due to err: %v", err) + log.Errorf("Probe failed, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) @@ -114,10 +114,13 @@ func main() { // Including the pumba lib for pod-network-corruption if experimentsDetails.ChaosLib == "litmus" && experimentsDetails.ContainerRuntime == "docker" { + // Calling AbortWatcher go routine, it will continuously watch for the abort signal for the entire chaos duration and generate the required events and result + // It is being invoked here, as opposed to within the chaoslib, as these experiments do not need additional recovery/chaos revert steps like in case of network experiments + go common.AbortWatcher(experimentsDetails.ExperimentName, clients, &resultDetails, &chaosDetails, &eventsDetails) err = pumbaLIB.PodNetworkCorruptionChaos(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) if err != nil { - log.Errorf("Chaos injection failed due to %v\n", err) - failStep := "Including the pumba lib for pod-network-corruption" + log.Errorf("Chaos injection failed, err: %v", err) + failStep := "failed in chaos injection phase" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -126,8 +129,8 @@ func main() { } else if experimentsDetails.ChaosLib == "litmus" && (experimentsDetails.ContainerRuntime == "containerd" || experimentsDetails.ContainerRuntime == "crio") { err = litmusLIB.PodNetworkCorruptionChaos(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) if err != nil { - log.Errorf("Chaos injection failed due to %v\n", err) - failStep := "Including the pumba lib for pod-network-corruption" + log.Errorf("Chaos injection failed, err: %v", err) + failStep := "failed in chaos injection phase" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -135,7 +138,7 @@ func main() { resultDetails.Verdict = "Pass" } else { log.Error("[Invalid]: Please Provide the correct LIB") - failStep := "Including the pumba lib for pod-network-corruption" + failStep := "no match found for specified lib" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -144,7 +147,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - klog.V(0).Infof("Application status check failed due to %v\n", err) + klog.V(0).Infof("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -158,7 +161,7 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails) if err != nil { - log.Errorf("Unable to Add the probes, due to err: %v", err) + log.Errorf("Unable to Add the probes, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) @@ -178,7 +181,7 @@ func main() { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") if err != nil { - log.Fatalf("Unable to Update the Chaos Result due to %v\n", err) + log.Fatalf("Unable to Update the Chaos Result, err: %v", err) } // generating the event in chaosresult to marked the verdict as pass/fail diff --git a/experiments/generic/pod-network-duplication/pod-network-duplication.go b/experiments/generic/pod-network-duplication/pod-network-duplication.go index 37f6b694f..0608d89ea 100644 --- a/experiments/generic/pod-network-duplication/pod-network-duplication.go +++ b/experiments/generic/pod-network-duplication/pod-network-duplication.go @@ -12,6 +12,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/result" "github.com/litmuschaos/litmus-go/pkg/status" "github.com/litmuschaos/litmus-go/pkg/types" + "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" "k8s.io/klog" ) @@ -36,7 +37,7 @@ func main() { //Getting kubeConfig and Generate ClientSets if err := clients.GenerateClientSetFromKubeConfig(); err != nil { - log.Fatalf("Unable to Get the kubeconfig due to %v", err) + log.Fatalf("Unable to Get the kubeconfig, err: %v", err) } //Fetching all the ENV passed from the runner pod @@ -56,7 +57,7 @@ func main() { log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT") if err != nil { - log.Errorf("Unable to Create the Chaos Result due to %v", err) + log.Errorf("Unable to Create the Chaos Result, err: %v", err) failStep := "Updating the chaos result of pod-network-duplication experiment (SOT)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -81,7 +82,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Application status check failed due to %v\n", err) + log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -95,9 +96,8 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails) - events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") if err != nil { - log.Errorf("Probe failed, due to err: %v", err) + log.Errorf("Probe failed, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) @@ -114,10 +114,13 @@ func main() { // Including the pumba lib for pod-network-duplication if experimentsDetails.ChaosLib == "litmus" && experimentsDetails.ContainerRuntime == "docker" { + // Calling AbortWatcher go routine, it will continuously watch for the abort signal for the entire chaos duration and generate the required events and result + // It is being invoked here, as opposed to within the chaoslib, as these experiments do not need additional recovery/chaos revert steps like in case of network experiments + go common.AbortWatcher(experimentsDetails.ExperimentName, clients, &resultDetails, &chaosDetails, &eventsDetails) err = pumbaLIB.PodNetworkDuplicationChaos(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) if err != nil { - log.Errorf("Chaos injection failed due to %v\n", err) - failStep := "Including the pumba lib for pod-network-duplication" + log.Errorf("Chaos injection failed, err: %v", err) + failStep := "failed in chaos injection phase" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -126,8 +129,8 @@ func main() { } else if experimentsDetails.ChaosLib == "litmus" && (experimentsDetails.ContainerRuntime == "containerd" || experimentsDetails.ContainerRuntime == "crio") { err = litmusLIB.PodNetworkDuplicationChaos(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) if err != nil { - log.Errorf("Chaos injection failed due to %v\n", err) - failStep := "Including the pumba lib for pod-network-duplication" + log.Errorf("Chaos injection failed, err: %v", err) + failStep := "failed in chaos injection phase" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -135,7 +138,7 @@ func main() { resultDetails.Verdict = "Pass" } else { log.Error("[Invalid]: Please Provide the correct LIB") - failStep := "Including the pumba lib for pod-network-duplication" + failStep := "no match found for specified lib" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -144,7 +147,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - klog.V(0).Infof("Application status check failed due to %v\n", err) + klog.V(0).Infof("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -158,7 +161,7 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails) if err != nil { - log.Errorf("Unable to Add the probes, due to err: %v", err) + log.Errorf("Unable to Add the probes, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) @@ -178,7 +181,7 @@ func main() { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") if err != nil { - log.Fatalf("Unable to Update the Chaos Result due to %v\n", err) + log.Fatalf("Unable to Update the Chaos Result, err: %v", err) } // generating the event in chaosresult to marked the verdict as pass/fail diff --git a/experiments/generic/pod-network-latency/pod-network-latency.go b/experiments/generic/pod-network-latency/pod-network-latency.go index 139a13a73..475cc85e1 100644 --- a/experiments/generic/pod-network-latency/pod-network-latency.go +++ b/experiments/generic/pod-network-latency/pod-network-latency.go @@ -12,6 +12,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/result" "github.com/litmuschaos/litmus-go/pkg/status" "github.com/litmuschaos/litmus-go/pkg/types" + "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" "k8s.io/klog" ) @@ -36,7 +37,7 @@ func main() { //Getting kubeConfig and Generate ClientSets if err := clients.GenerateClientSetFromKubeConfig(); err != nil { - log.Fatalf("Unable to Get the kubeconfig due to %v", err) + log.Fatalf("Unable to Get the kubeconfig, err: %v", err) } //Fetching all the ENV passed from the runner pod @@ -56,7 +57,7 @@ func main() { log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT") if err != nil { - log.Errorf("Unable to Create the Chaos Result due to %v", err) + log.Errorf("Unable to Create the Chaos Result, err: %v", err) failStep := "Updating the chaos result of pod-network-latency experiment (SOT)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -81,7 +82,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Application status check failed due to %v\n", err) + log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -95,7 +96,7 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails) if err != nil { - log.Errorf("Unable to Add the probes, due to err: %v", err) + log.Errorf("Unable to Add the probes, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) @@ -113,10 +114,13 @@ func main() { // Including the pumba lib for pod-network-latency if experimentsDetails.ChaosLib == "litmus" && experimentsDetails.ContainerRuntime == "docker" { + // Calling AbortWatcher go routine, it will continuously watch for the abort signal for the entire chaos duration and generate the required events and result + // It is being invoked here, as opposed to within the chaoslib, as these experiments do not need additional recovery/chaos revert steps like in case of network experiments + go common.AbortWatcher(experimentsDetails.ExperimentName, clients, &resultDetails, &chaosDetails, &eventsDetails) err = pumbaLIB.PodNetworkLatencyChaos(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) if err != nil { - log.Errorf("Chaos injection failed due to %v\n", err) - failStep := "Including the pumba lib for pod-network-latency" + log.Errorf("Chaos injection failed, err: %v", err) + failStep := "failed in chaos injection phase" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -125,8 +129,8 @@ func main() { } else if experimentsDetails.ChaosLib == "litmus" && (experimentsDetails.ContainerRuntime == "containerd" || experimentsDetails.ContainerRuntime == "crio") { err = litmusLIB.PodNetworkLatencyChaos(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) if err != nil { - log.Errorf("Chaos injection failed due to %v\n", err) - failStep := "Including the pumba lib for pod-network-latency" + log.Errorf("Chaos injection failed, err: %v", err) + failStep := "failed in chaos injection phase" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -134,7 +138,7 @@ func main() { resultDetails.Verdict = "Pass" } else { log.Error("[Invalid]: Please Provide the correct LIB") - failStep := "Including the pumba lib for pod-network-latency" + failStep := "no match found for specified lib" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -143,7 +147,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - klog.V(0).Infof("Application status check failed due to %v\n", err) + klog.V(0).Infof("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -157,7 +161,7 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails) if err != nil { - log.Errorf("Unable to Add the probes, due to err: %v", err) + log.Errorf("Unable to Add the probes, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) @@ -177,7 +181,7 @@ func main() { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") if err != nil { - log.Fatalf("Unable to Update the Chaos Result due to %v\n", err) + log.Fatalf("Unable to Update the Chaos Result, err: %v", err) } // generating the event in chaosresult to marked the verdict as pass/fail diff --git a/experiments/generic/pod-network-loss/pod-network-loss.go b/experiments/generic/pod-network-loss/pod-network-loss.go index a0d9f72f5..74f9bd253 100644 --- a/experiments/generic/pod-network-loss/pod-network-loss.go +++ b/experiments/generic/pod-network-loss/pod-network-loss.go @@ -12,6 +12,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/result" "github.com/litmuschaos/litmus-go/pkg/status" "github.com/litmuschaos/litmus-go/pkg/types" + "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" "k8s.io/klog" ) @@ -36,7 +37,7 @@ func main() { //Getting kubeConfig and Generate ClientSets if err := clients.GenerateClientSetFromKubeConfig(); err != nil { - log.Fatalf("Unable to Get the kubeconfig due to %v", err) + log.Fatalf("Unable to Get the kubeconfig, err: %v", err) } //Fetching all the ENV passed from the runner pod @@ -56,7 +57,7 @@ func main() { log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT") if err != nil { - log.Errorf("Unable to Create the Chaos Result due to %v", err) + log.Errorf("Unable to Create the Chaos Result, err: %v", err) failStep := "Updating the chaos result of pod-network-loss experiment (SOT)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -81,7 +82,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - log.Errorf("Application status check failed due to %v\n", err) + log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -95,9 +96,8 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails) - events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") if err != nil { - log.Errorf("Probe failed, due to err: %v", err) + log.Errorf("Probe failed, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) @@ -114,10 +114,13 @@ func main() { // Including the pumba lib for pod-network-loss if experimentsDetails.ChaosLib == "litmus" && experimentsDetails.ContainerRuntime == "docker" { + // Calling AbortWatcher go routine, it will continuously watch for the abort signal for the entire chaos duration and generate the required events and result + // It is being invoked here, as opposed to within the chaoslib, as these experiments do not need additional recovery/chaos revert steps like in case of network experiments + go common.AbortWatcher(experimentsDetails.ExperimentName, clients, &resultDetails, &chaosDetails, &eventsDetails) err = pumbaLIB.PodNetworkLossChaos(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) if err != nil { - log.Errorf("Chaos injection failed due to %v\n", err) - failStep := "Including the pumba lib for pod-network-loss" + log.Errorf("Chaos injection failed, err: %v", err) + failStep := "failed in chaos injection phase" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -126,8 +129,8 @@ func main() { } else if experimentsDetails.ChaosLib == "litmus" && (experimentsDetails.ContainerRuntime == "containerd" || experimentsDetails.ContainerRuntime == "crio") { err = litmusLIB.PodNetworkLossChaos(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails) if err != nil { - log.Errorf("Chaos injection failed due to %v\n", err) - failStep := "Including the pumba lib for pod-network-loss" + log.Errorf("Chaos injection failed, err: %v", err) + failStep := "failed in chaos injection phase" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -135,7 +138,7 @@ func main() { resultDetails.Verdict = "Pass" } else { log.Error("[Invalid]: Please Provide the correct LIB") - failStep := "Including the pumba lib for pod-network-loss" + failStep := "no match found for specified lib" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -144,7 +147,7 @@ func main() { log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) if err != nil { - klog.V(0).Infof("Application status check failed due to %v\n", err) + klog.V(0).Infof("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return @@ -158,7 +161,7 @@ func main() { if len(resultDetails.ProbeDetails) != 0 { err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails) if err != nil { - log.Errorf("Unable to Add the probes, due to err: %v", err) + log.Errorf("Unable to Add the probes, err: %v", err) failStep := "Failed while adding probe" msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) @@ -178,7 +181,7 @@ func main() { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") if err != nil { - log.Fatalf("Unable to Update the Chaos Result due to %v\n", err) + log.Fatalf("Unable to Update the Chaos Result, err: %v", err) } // generating the event in chaosresult to marked the verdict as pass/fail diff --git a/pkg/cassandra/liveness.go b/pkg/cassandra/liveness.go index c09b4a017..b4e8b4066 100644 --- a/pkg/cassandra/liveness.go +++ b/pkg/cassandra/liveness.go @@ -51,7 +51,7 @@ func LivenessCheck(experimentsDetails *experimentTypes.ExperimentDetails, client // Record cassandra liveness pod resource version ResourceVersionBefore, err := GetLivenessPodResourceVersion(experimentsDetails, clients) if err != nil { - return ResourceVersionBefore, errors.Errorf("Fail to get the pod resource version, due to %v", err) + return ResourceVersionBefore, errors.Errorf("Failed to get the pod resource version, err: %v", err) } return ResourceVersionBefore, nil @@ -67,13 +67,13 @@ func LivenessCleanup(experimentsDetails *experimentTypes.ExperimentDetails, clie log.Info("[CleanUP]: Getting ClusterIP of liveness service") ClusterIP, err := GetServiceClusterIP(experimentsDetails, clients) if err != nil { - return errors.Errorf("Fail to get the ClusterIP of liveness service, due to %v", err) + return errors.Errorf("Failed to get the ClusterIP of liveness service, err: %v", err) } // Record cassandra liveness pod resource version after chaos ResourceVersionAfter, err := GetLivenessPodResourceVersion(experimentsDetails, clients) if err != nil { - return errors.Errorf("Fail to get the pod resource version") + return errors.Errorf("Failed to get the pod resource version, err: %v", err) } err = ResourceVersionCheck(ResourceVersionBefore, ResourceVersionAfter) @@ -83,22 +83,22 @@ func LivenessCleanup(experimentsDetails *experimentTypes.ExperimentDetails, clie err = WaitTillCycleComplete(experimentsDetails, ClusterIP) if err != nil { - return errors.Errorf("cycle complete test failed, due to %v", err) + return errors.Errorf("cycle complete test failed, err: %v", err) } err = DeleteLivenessDeployment(experimentsDetails, clients) if err != nil { - return errors.Errorf("Liveness deployment deletion failed, due to %v", err) + return errors.Errorf("Liveness deployment deletion failed, err: %v", err) } - log.Info("Cassandra liveness deployment deleted successfully") + log.Info("[Cleanup]: Cassandra liveness deployment has been deleted successfully") err = DeleteLivenessService(experimentsDetails, clients) if err != nil { - return errors.Errorf("Liveness service deletion failed, due to %v", err) + return errors.Errorf("Liveness service deletion failed, err: %v", err) } - log.Info("Cassandra liveness service deleted successfully") + log.Info("[Cleanup]: Cassandra liveness service has been deleted successfully") return nil } @@ -108,7 +108,7 @@ func GetLivenessPodResourceVersion(experimentsDetails *experimentTypes.Experimen livenessPods, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaoslibDetail.AppNS).List(metav1.ListOptions{LabelSelector: "name=cassandra-liveness-deploy-" + experimentsDetails.RunID}) if err != nil || len(livenessPods.Items) == 0 { - return "", errors.Errorf("Unable to get the liveness pod, due to %v", err) + return "", errors.Errorf("Unable to get the liveness pod, err: %v", err) } ResourceVersion := livenessPods.Items[0].ResourceVersion @@ -120,8 +120,9 @@ func GetServiceClusterIP(experimentsDetails *experimentTypes.ExperimentDetails, service, err := clients.KubeClient.CoreV1().Services(experimentsDetails.ChaoslibDetail.AppNS).Get("cassandra-liveness-service-"+experimentsDetails.RunID, metav1.GetOptions{}) if err != nil { - return "", errors.Errorf("Fail to get the liveness service") + return "", err } + return service.Spec.ClusterIP, nil } @@ -131,7 +132,7 @@ func WaitTillCycleComplete(experimentsDetails *experimentTypes.ExperimentDetails port := strconv.Itoa(experimentsDetails.LivenessServicePort) URL := "http://" + ClusterIP + ":" + port - log.Infof("The URL to check the status of liveness pod cycle: %v", URL) + log.Infof("The URL to check the status of liveness pod cycle, url: %v", URL) err := retry. Times(uint(experimentsDetails.ChaoslibDetail.Timeout / experimentsDetails.ChaoslibDetail.Delay)). @@ -139,7 +140,7 @@ func WaitTillCycleComplete(experimentsDetails *experimentTypes.ExperimentDetails Try(func(attempt uint) error { response, err := http.Get(URL) if err != nil { - return errors.Errorf("The HTTP request failed with error %s\n", err) + return errors.Errorf("The HTTP request failed with error %s", err) } data, _ := ioutil.ReadAll(response.Body) if !strings.Contains(string(data), "CycleComplete") { @@ -157,7 +158,7 @@ func WaitTillCycleComplete(experimentsDetails *experimentTypes.ExperimentDetails func ResourceVersionCheck(ResourceVersionBefore, ResourceVersionAfter string) error { if ResourceVersionBefore != ResourceVersionAfter { - return errors.Errorf(" Resource Version Check failed!") + return errors.Errorf("Resource Version Check failed, Resource version remains same") } log.Info("The cassandra cluster is active") @@ -171,7 +172,7 @@ func DeleteLivenessDeployment(experimentsDetails *experimentTypes.ExperimentDeta if err := clients.KubeClient.AppsV1().Deployments(experimentsDetails.ChaoslibDetail.AppNS).Delete("cassandra-liveness-deploy-"+experimentsDetails.RunID, &metav1.DeleteOptions{ PropagationPolicy: &deletePolicy, }); err != nil { - return errors.Errorf("Fail to delete liveness deployment, due to %v", err) + return err } err := retry. Times(uint(experimentsDetails.ChaoslibDetail.Timeout / experimentsDetails.ChaoslibDetail.Delay)). @@ -193,7 +194,7 @@ func DeleteLivenessService(experimentsDetails *experimentTypes.ExperimentDetails if err := clients.KubeClient.CoreV1().Services(experimentsDetails.ChaoslibDetail.AppNS).Delete("cassandra-liveness-service-"+experimentsDetails.RunID, &metav1.DeleteOptions{ PropagationPolicy: &deletePolicy, }); err != nil { - return errors.Errorf("Fail to delete liveness service, due to %v", err) + return errors.Errorf("Fail to delete liveness service, err: %v", err) } err := retry. Times(uint(experimentsDetails.ChaoslibDetail.Timeout / experimentsDetails.ChaoslibDetail.Delay)). @@ -238,7 +239,7 @@ func CreateLivenessPod(experimentsDetails *experimentTypes.ExperimentDetails, cl }, Spec: apiv1.PodSpec{ Volumes: []apiv1.Volume{ - apiv1.Volume{ + { Name: "status-volume", VolumeSource: apiv1.VolumeSource{ EmptyDir: &apiv1.EmptyDirVolumeSource{}, @@ -246,7 +247,7 @@ func CreateLivenessPod(experimentsDetails *experimentTypes.ExperimentDetails, cl }, }, Containers: []apiv1.Container{ - apiv1.Container{ + { Name: "liveness-business-logic", Image: experimentsDetails.CassandraLivenessImage, Command: []string{ @@ -257,41 +258,41 @@ func CreateLivenessPod(experimentsDetails *experimentTypes.ExperimentDetails, cl "bash cassandra-liveness-check.sh", }, Env: []apiv1.EnvVar{ - apiv1.EnvVar{ + { Name: "LIVENESS_PERIOD_SECONDS", Value: "10", }, - apiv1.EnvVar{ + { Name: "LIVENESS_TIMEOUT_SECONDS", Value: "10", }, - apiv1.EnvVar{ + { Name: "LIVENESS_RETRY_COUNT", Value: "10", }, - apiv1.EnvVar{ + { Name: "CASSANDRA_SVC_NAME", Value: experimentsDetails.CassandraServiceName, }, - apiv1.EnvVar{ + { Name: "REPLICATION_FACTOR", Value: experimentsDetails.KeySpaceReplicaFactor, }, - apiv1.EnvVar{ + { Name: "CASSANDRA_PORT", Value: strconv.Itoa(experimentsDetails.CassandraPort), }, }, Resources: apiv1.ResourceRequirements{}, VolumeMounts: []apiv1.VolumeMount{ - apiv1.VolumeMount{ + { Name: "status-volume", MountPath: "/var/tmp", }, }, ImagePullPolicy: apiv1.PullPolicy("Always"), }, - apiv1.Container{ + { Name: "webserver", Image: experimentsDetails.CassandraLivenessImage, Command: []string{ @@ -302,24 +303,24 @@ func CreateLivenessPod(experimentsDetails *experimentTypes.ExperimentDetails, cl "bash webserver.sh", }, Ports: []apiv1.ContainerPort{ - apiv1.ContainerPort{ + { HostPort: 0, ContainerPort: int32(experimentsDetails.LivenessServicePort), }, }, Env: []apiv1.EnvVar{ - apiv1.EnvVar{ + { Name: "INIT_WAIT_SECONDS", Value: "10", }, - apiv1.EnvVar{ + { Name: "LIVENESS_SVC_PORT", Value: strconv.Itoa(experimentsDetails.LivenessServicePort), }, }, Resources: apiv1.ResourceRequirements{}, VolumeMounts: []apiv1.VolumeMount{ - apiv1.VolumeMount{ + { Name: "status-volume", MountPath: "/var/tmp", }, @@ -337,7 +338,7 @@ func CreateLivenessPod(experimentsDetails *experimentTypes.ExperimentDetails, cl // Creating liveness deployment _, err := clients.KubeClient.AppsV1().Deployments(experimentsDetails.ChaoslibDetail.AppNS).Create(liveness) if err != nil { - return errors.Errorf("fail to create liveness deployment, due to %v", err) + return err } log.Info("Liveness Deployment Created successfully!") return nil @@ -360,7 +361,7 @@ func CreateLivenessService(experimentsDetails *experimentTypes.ExperimentDetails }, Spec: corev1.ServiceSpec{ Ports: []corev1.ServicePort{ - corev1.ServicePort{ + { Name: "liveness", Protocol: corev1.Protocol("TCP"), Port: int32(experimentsDetails.LivenessServicePort), @@ -376,7 +377,7 @@ func CreateLivenessService(experimentsDetails *experimentTypes.ExperimentDetails // Creating liveness service _, err := clients.KubeClient.CoreV1().Services(experimentsDetails.ChaoslibDetail.AppNS).Create(livenessSvc) if err != nil { - return errors.Errorf("fail to create liveness service, due to %v", err) + return err } log.Info("Liveness service created successfully!") diff --git a/pkg/cassandra/node-tools.go b/pkg/cassandra/node-tools.go index 5dde60532..fbcaedf94 100644 --- a/pkg/cassandra/node-tools.go +++ b/pkg/cassandra/node-tools.go @@ -22,23 +22,23 @@ func NodeToolStatusCheck(experimentsDetails *experimentTypes.ExperimentDetails, if err != nil { return err } - log.Infof("[NodeToolStatus]: The application pod name for checking load distribution: %v", targetPodName) + log.Infof("[NodeToolStatus]: Selecting %v pod for running `nodetool status` command", targetPodName) replicaCount, err = GetApplicationReplicaCount(experimentsDetails, clients) if err != nil { - return errors.Errorf("Unable to get app replica count, due to %v", err) + return errors.Errorf("Unable to get app replica count, err: %v", err) } log.Info("[Check]: Checking for the distribution of load on the ring") // Get the load percentage on the application pod loadPercentage, err := GetLoadDistribution(experimentsDetails, clients, targetPodName) if err != nil { - return errors.Errorf("Load distribution check failed, due to %v", err) + return errors.Errorf("Failed to get load percentage, err: %v", err) } // Check the load precentage if err = CheckLoadPercentage(loadPercentage, replicaCount); err != nil { - return errors.Errorf("Load percentage check failed, due to %v", err) + return errors.Errorf("Load percentage check failed, err: %v", err) } return nil @@ -48,7 +48,7 @@ func NodeToolStatusCheck(experimentsDetails *experimentTypes.ExperimentDetails, func GetApplicationPodName(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets) (string, error) { podList, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaoslibDetail.AppNS).List(metav1.ListOptions{LabelSelector: experimentsDetails.ChaoslibDetail.AppLabel}) if err != nil || len(podList.Items) == 0 { - return "", errors.Errorf("Fail to get the application pod in %v namespace", experimentsDetails.ChaoslibDetail.AppNS) + return "", errors.Errorf("Failed to get the application pod in %v namespace", experimentsDetails.ChaoslibDetail.AppNS) } return podList.Items[0].Name, nil @@ -58,7 +58,7 @@ func GetApplicationPodName(experimentsDetails *experimentTypes.ExperimentDetails func GetApplicationReplicaCount(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets) (int, error) { podList, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaoslibDetail.AppNS).List(metav1.ListOptions{LabelSelector: experimentsDetails.ChaoslibDetail.AppLabel}) if err != nil || len(podList.Items) == 0 { - return 0, errors.Errorf("Fail to get the application pod in %v namespace", experimentsDetails.ChaoslibDetail.AppNS) + return 0, errors.Errorf("Failed to get the application pod in %v namespace", experimentsDetails.ChaoslibDetail.AppNS) } return len(podList.Items), nil } @@ -69,7 +69,7 @@ func CheckLoadPercentage(loadPercentage []string, replicaCount int) error { // It will make sure that the replica have some load // It will fail if replica has 0% load if len(loadPercentage) != replicaCount { - return errors.Errorf("Fail to get the load on every replica") + return errors.Errorf("Failed to get the load on every replica") } for count := 0; count < len(loadPercentage); count++ { @@ -93,7 +93,7 @@ func GetLoadDistribution(experimentsDetails *experimentTypes.ExperimentDetails, litmusexec.SetExecCommandAttributes(&execCommandDetails, targetPod, "cassandra", experimentsDetails.ChaoslibDetail.AppNS) response, err := litmusexec.Exec(&execCommandDetails, clients, command) if err != nil { - return nil, errors.Errorf("Unable to get nodetool status details due to err: %v", err) + return nil, errors.Errorf("Unable to get nodetool status details, err: %v", err) } split := strings.Split(response, "\n") loadPercentage := split[:len(split)-1] diff --git a/pkg/clients/clientset.go b/pkg/clients/clientset.go index 3ff359269..d4af72289 100644 --- a/pkg/clients/clientset.go +++ b/pkg/clients/clientset.go @@ -67,7 +67,7 @@ func getKubeConfig() (*rest.Config, error) { func GenerateK8sClientSet(config *rest.Config) (*kubernetes.Clientset, error) { k8sClientSet, err := kubernetes.NewForConfig(config) if err != nil { - return nil, errors.Wrapf(err, "Unable to generate kubernetes clientSet %s: ", err) + return nil, errors.Wrapf(err, "Unable to generate kubernetes clientSet, err: %v: ", err) } return k8sClientSet, nil } @@ -76,7 +76,7 @@ func GenerateK8sClientSet(config *rest.Config) (*kubernetes.Clientset, error) { func GenerateLitmusClientSet(config *rest.Config) (*chaosClient.LitmuschaosV1alpha1Client, error) { litmusClientSet, err := chaosClient.NewForConfig(config) if err != nil { - return nil, errors.Wrapf(err, "Unable to create LitmusClientSet: %v", err) + return nil, errors.Wrapf(err, "Unable to create LitmusClientSet, err: %v", err) } return litmusClientSet, nil } diff --git a/pkg/generic/pod-cpu-hog/environment/environment.go b/pkg/generic/pod-cpu-hog/environment/environment.go index dfbb5c178..f797fecb0 100644 --- a/pkg/generic/pod-cpu-hog/environment/environment.go +++ b/pkg/generic/pod-cpu-hog/environment/environment.go @@ -32,6 +32,8 @@ func GetENV(experimentDetails *experimentTypes.ExperimentDetails) { experimentDetails.ChaosInjectCmd = Getenv("CHAOS_INJECT_COMMAND", "md5sum /dev/zero") experimentDetails.ChaosKillCmd = Getenv("CHAOS_KILL_COMMAND", "kill $(find /proc -name exe -lname '*/md5sum' 2>&1 | grep -v 'Permission denied' | awk -F/ '{print $(NF-1)}' | head -n 1)") experimentDetails.LIBImage = Getenv("LIB_IMAGE", "gaiaadm/pumba") + experimentDetails.TargetContainer = Getenv("TARGET_CONTAINER", "") + experimentDetails.Sequence = Getenv("SEQUENCE", "parallel") } diff --git a/pkg/generic/pod-cpu-hog/types/types.go b/pkg/generic/pod-cpu-hog/types/types.go index 40a9adad8..b4df7d0a1 100644 --- a/pkg/generic/pod-cpu-hog/types/types.go +++ b/pkg/generic/pod-cpu-hog/types/types.go @@ -28,4 +28,6 @@ type ExperimentDetails struct { ChaosKillCmd string LIBImage string Annotations map[string]string + TargetContainer string + Sequence string } diff --git a/pkg/generic/pod-memory-hog/environment/environment.go b/pkg/generic/pod-memory-hog/environment/environment.go index 13df2b19e..008763fe1 100644 --- a/pkg/generic/pod-memory-hog/environment/environment.go +++ b/pkg/generic/pod-memory-hog/environment/environment.go @@ -31,6 +31,8 @@ func GetENV(experimentDetails *experimentTypes.ExperimentDetails) { experimentDetails.TargetPod = Getenv("TARGET_POD", "") experimentDetails.ChaosKillCmd = Getenv("CHAOS_KILL_COMMAND", "kill $(find /proc -name exe -lname '*/dd' 2>&1 | grep -v 'Permission denied' | awk -F/ '{print $(NF-1)}' | head -n 1)") experimentDetails.LIBImage = Getenv("LIB_IMAGE", "gaiaadm/pumba") + experimentDetails.TargetContainer = Getenv("TARGET_CONTAINER", "") + experimentDetails.Sequence = Getenv("SEQUENCE", "parallel") } // Getenv fetch the env and set the default value, if any diff --git a/pkg/generic/pod-memory-hog/types/types.go b/pkg/generic/pod-memory-hog/types/types.go index 67b6ad5c9..1de3c75b6 100644 --- a/pkg/generic/pod-memory-hog/types/types.go +++ b/pkg/generic/pod-memory-hog/types/types.go @@ -27,4 +27,6 @@ type ExperimentDetails struct { ChaosKillCmd string LIBImage string Annotations map[string]string + TargetContainer string + Sequence string } diff --git a/pkg/probe/cmdprobe.go b/pkg/probe/cmdprobe.go index 4316a30b8..9f15ef383 100644 --- a/pkg/probe/cmdprobe.go +++ b/pkg/probe/cmdprobe.go @@ -205,7 +205,7 @@ func TriggerSourceCmdProbe(probe v1alpha1.CmdProbeAttributes, execCommandDetails // exec inside the external pod to get the o/p of given command output, err := litmusexec.Exec(&execCommandDetails, clients, command) if err != nil { - return errors.Errorf("Unable to get output of cmd command due to err: %v", err) + return errors.Errorf("Unable to get output of cmd command, err: %v", err) } // Trim the extra whitespaces from the output and match the actual output with the expected output if strings.TrimSpace(output) != probe.Inputs.ExpectedResult { diff --git a/pkg/result/chaosresult.go b/pkg/result/chaosresult.go index 4a2c2ec87..29eda948d 100644 --- a/pkg/result/chaosresult.go +++ b/pkg/result/chaosresult.go @@ -138,7 +138,7 @@ func PatchChaosResult(result *v1alpha1.ChaosResult, clients clients.ClientSets, if resultDetails.Verdict == "Pass" && len(resultDetails.ProbeDetails) != 0 { result.Status.ExperimentStatus.ProbeSuccessPercentage = "100" - } else if resultDetails.Verdict == "Fail" && len(resultDetails.ProbeDetails) != 0 { + } else if (resultDetails.Verdict == "Fail" || resultDetails.Verdict == "Stopped") && len(resultDetails.ProbeDetails) != 0 { probe.SetProbeVerdictAfterFailure(resultDetails) result.Status.ExperimentStatus.ProbeSuccessPercentage = strconv.Itoa((resultDetails.PassedProbeCount * 100) / len(resultDetails.ProbeDetails)) } diff --git a/pkg/status/application.go b/pkg/status/application.go index bf50ebcc8..4e169f99a 100644 --- a/pkg/status/application.go +++ b/pkg/status/application.go @@ -55,7 +55,7 @@ func CheckPodStatus(appNs string, appLabel string, timeout, delay int, clients c Try(func(attempt uint) error { podSpec, err := clients.KubeClient.CoreV1().Pods(appNs).List(metav1.ListOptions{LabelSelector: appLabel}) if err != nil || len(podSpec.Items) == 0 { - return errors.Errorf("Unable to get the pod, err: %v", err) + return errors.Errorf("Unable to list the pods, err: %v", err) } for _, pod := range podSpec.Items { if string(pod.Status.Phase) != "Running" { @@ -81,7 +81,7 @@ func CheckContainerStatus(appNs string, appLabel string, timeout, delay int, cli Try(func(attempt uint) error { podSpec, err := clients.KubeClient.CoreV1().Pods(appNs).List(metav1.ListOptions{LabelSelector: appLabel}) if err != nil || len(podSpec.Items) == 0 { - return errors.Errorf("Unable to get the pod, err: %v", err) + return errors.Errorf("Unable to list the pods, err: %v", err) } for _, pod := range podSpec.Items { for _, container := range pod.Status.ContainerStatuses { @@ -114,7 +114,7 @@ func WaitForCompletion(appNs string, appLabel string, clients clients.ClientSets Try(func(attempt uint) error { podSpec, err := clients.KubeClient.CoreV1().Pods(appNs).List(metav1.ListOptions{LabelSelector: appLabel}) if err != nil || len(podSpec.Items) == 0 { - return errors.Errorf("Unable to get the pod, err: %v", err) + return errors.Errorf("Unable to list the pods, err: %v", err) } // it will check for the status of helper pod, if it is Succeeded and target container is completed then it will marked it as completed and return // if it is still running then it will check for the target container, as we can have multiple container inside helper pod (istio) diff --git a/pkg/status/nodes.go b/pkg/status/nodes.go index 8ef5f8ac7..a625bb72e 100644 --- a/pkg/status/nodes.go +++ b/pkg/status/nodes.go @@ -20,7 +20,7 @@ func CheckNodeStatus(nodeName string, timeout, delay int, clients clients.Client Try(func(attempt uint) error { node, err := clients.KubeClient.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) if err != nil { - return errors.Errorf("Unable to get the node, err: %v", err) + return err } conditions := node.Status.Conditions isReady := false @@ -53,7 +53,7 @@ func CheckNodeNotReadyState(nodeName string, timeout, delay int, clients clients Try(func(attempt uint) error { node, err := clients.KubeClient.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) if err != nil { - return errors.Errorf("Unable to get the node, err: %v", err) + return err } conditions := node.Status.Conditions isReady := false diff --git a/pkg/utils/common/common.go b/pkg/utils/common/common.go index d5557f9e9..ea0ef8605 100644 --- a/pkg/utils/common/common.go +++ b/pkg/utils/common/common.go @@ -2,12 +2,18 @@ package common import ( "math/rand" + "os" + "os/signal" "strconv" + "syscall" "time" "github.com/litmuschaos/litmus-go/pkg/clients" + "github.com/litmuschaos/litmus-go/pkg/events" "github.com/litmuschaos/litmus-go/pkg/log" "github.com/litmuschaos/litmus-go/pkg/math" + "github.com/litmuschaos/litmus-go/pkg/result" + "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/retry" "github.com/pkg/errors" core_v1 "k8s.io/api/core/v1" @@ -101,7 +107,7 @@ func GetPodList(namespace, targetPod, appLabels string, podAffPerc int, clients realpods := core_v1.PodList{} podList, err := clients.KubeClient.CoreV1().Pods(namespace).List(v1.ListOptions{LabelSelector: appLabels}) if err != nil || len(podList.Items) == 0 { - return core_v1.PodList{}, errors.Wrapf(err, "Fail to list the application pod in %v namespace", namespace) + return core_v1.PodList{}, errors.Wrapf(err, "Failed to list the application pod in %v namespace", namespace) } isPodAvailable, err := CheckForAvailibiltyOfPod(namespace, targetPod, clients) @@ -140,7 +146,7 @@ func GetChaosPodAnnotation(podName, namespace string, clients clients.ClientSets pod, err := clients.KubeClient.CoreV1().Pods(namespace).Get(podName, v1.GetOptions{}) if err != nil { - return nil, errors.Errorf("fail to get the chaos pod annotation, due to %v", err) + return nil, err } return pod.Annotations, nil } @@ -149,7 +155,7 @@ func GetChaosPodAnnotation(podName, namespace string, clients clients.ClientSets func GetNodeName(namespace, labels string, clients clients.ClientSets) (string, error) { podList, err := clients.KubeClient.CoreV1().Pods(namespace).List(v1.ListOptions{LabelSelector: labels}) if err != nil || len(podList.Items) == 0 { - return "", errors.Wrapf(err, "Fail to get the application pod in %v namespace, due to err: %v", namespace, err) + return "", errors.Wrapf(err, "Failed to get the application pod in %v namespace, err: %v", namespace, err) } rand.Seed(time.Now().Unix()) @@ -158,3 +164,34 @@ func GetNodeName(namespace, labels string, clients clients.ClientSets) (string, return nodeName, nil } + +// AbortWatcher continuosly watch for the abort signals +// it will update chaosresult w/ failed step and create an abort event, if it recieved abort signal during chaos +func AbortWatcher(expname string, clients clients.ClientSets, resultDetails *types.ResultDetails, chaosDetails *types.ChaosDetails, eventsDetails *types.EventDetails) { + + // signChan channel is used to transmit signal notifications. + signChan := make(chan os.Signal, 1) + // Catch and relay certain signal(s) to signChan channel. + signal.Notify(signChan, os.Interrupt, syscall.SIGTERM, syscall.SIGKILL) + + for { + select { + case <-signChan: + log.Info("[Chaos]: Chaos Experiment Abortion started because of terminated signal received") + // updating the chaosresult after stopped + failStep := "Chaos injection stopped!" + types.SetResultAfterCompletion(resultDetails, "Stopped", "Stopped", failStep) + result.ChaosResult(chaosDetails, clients, resultDetails, "EOT") + + // generating summary event in chaosengine + msg := expname + " experiment has been aborted" + types.SetEngineEventAttributes(eventsDetails, types.Summary, msg, "Warning", chaosDetails) + events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") + + // generating summary event in chaosresult + types.SetResultEventAttributes(eventsDetails, types.StoppedVerdict, msg, "Warning", resultDetails) + events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosResult") + os.Exit(1) + } + } +} diff --git a/pkg/utils/exec/exec.go b/pkg/utils/exec/exec.go index 0a314e6d8..bc3c4e07d 100644 --- a/pkg/utils/exec/exec.go +++ b/pkg/utils/exec/exec.go @@ -71,7 +71,7 @@ func Exec(commandDetails *PodDetails, clients clients.ClientSets, command []stri if strings.Contains(err.Error(), "137") { log.Warn("Chaos process OOM killed as the provided value exceeds resource limits") } else { - log.Infof("[Prepare]: Unable to run command inside container due to, err : %v", err.Error()) + log.Infof("[Prepare]: Unable to run command inside container, err : %v", err.Error()) } return "", err }