From cf20ce5ceadb95f6ec296b87f9599963745cfae6 Mon Sep 17 00:00:00 2001 From: Udit Gaurav <35391335+uditgaurav@users.noreply.github.com> Date: Sat, 15 Jan 2022 16:53:20 +0530 Subject: [PATCH] Cherry-Pick for 2.5.0 (#480) * Changed the failstep message in experiment template of litmus-sdk to follow the new convention (#475) Signed-off-by: Akash Shrivastava * Chore(stress-chaos): Support stress-chaos experiment with custom experiment name (#474) Signed-off-by: udit * Minor changes in Azure Experiments (#476) * Removed AUT and app check from azure experiments Signed-off-by: Akash Shrivastava * Changed azure status functions to accept string values rather than experimentType struct Signed-off-by: Akash Shrivastava * Removed unused variables from test.yaml Signed-off-by: Akash Shrivastava * Added AUT and Auxillary app check Signed-off-by: Akash Shrivastava * Updated test.yaml Signed-off-by: Akash Shrivastava * Chore(network-chaos): Add jitter in pod-network-latency experiment (#478) Signed-off-by: uditgaurav Co-authored-by: Akash Shrivastava --- .github/workflows/run-e2e-on-pr-commits.yml | 2 +- build/Dockerfile | 2 +- .../network-chaos/lib/latency/latency.go | 2 +- .../stress-chaos/helper/stress-helper.go | 7 +-- .../litmus/stress-chaos/lib/stress-chaos.go | 1 + .../developer-guide/templates/experiment.tmpl | 22 ++++---- .../experiment/azure-disk-loss.go | 23 ++++---- .../azure/azure-disk-loss/test/test.yml | 35 ++++++------ .../experiment/azure-instance-stop.go | 17 +++--- experiments/azure/instance-stop/test/test.yml | 53 +++++++++++++++---- .../disk-loss/environment/environment.go | 11 ++-- pkg/azure/disk-loss/types/types.go | 4 +- .../instance-stop/environment/environment.go | 1 + pkg/azure/instance-stop/types/types.go | 1 + pkg/cloud/azure/disk/disk-status.go | 9 ++-- pkg/cloud/azure/instance/instance-status.go | 41 ++------------ .../network-chaos/environment/environment.go | 1 + pkg/generic/network-chaos/types/types.go | 1 + .../stress-chaos/environment/environment.go | 3 ++ pkg/generic/stress-chaos/types/types.go | 1 + 20 files changed, 121 insertions(+), 116 deletions(-) diff --git a/.github/workflows/run-e2e-on-pr-commits.yml b/.github/workflows/run-e2e-on-pr-commits.yml index e58f77f24..e0f1b461e 100644 --- a/.github/workflows/run-e2e-on-pr-commits.yml +++ b/.github/workflows/run-e2e-on-pr-commits.yml @@ -1,7 +1,7 @@ name: E2E on: pull_request: - branches: [master] + branches: [v2.5.x] types: [opened, synchronize, reopened] paths-ignore: - '**.md' diff --git a/build/Dockerfile b/build/Dockerfile index 76c614238..994bee1b8 100644 --- a/build/Dockerfile +++ b/build/Dockerfile @@ -24,7 +24,7 @@ RUN apk --update add \ # Packaging stage # Image source: https://github.com/litmuschaos/test-tools/blob/master/custom/hardened-alpine/experiment/Dockerfile # The base image is non-root (have litmus user) with default litmus directory. -FROM litmuschaos/experiment-alpine:2.4.0 +FROM litmuschaos/experiment-alpine:2.5.0 LABEL maintainer="LitmusChaos" diff --git a/chaoslib/litmus/network-chaos/lib/latency/latency.go b/chaoslib/litmus/network-chaos/lib/latency/latency.go index 83a84a309..c5482f1d9 100644 --- a/chaoslib/litmus/network-chaos/lib/latency/latency.go +++ b/chaoslib/litmus/network-chaos/lib/latency/latency.go @@ -12,6 +12,6 @@ import ( //PodNetworkLatencyChaos contains the steps to prepare and inject chaos func PodNetworkLatencyChaos(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - args := "delay " + strconv.Itoa(experimentsDetails.NetworkLatency) + "ms" + args := "delay " + strconv.Itoa(experimentsDetails.NetworkLatency) + "ms " + strconv.Itoa(experimentsDetails.Jitter) + "ms" return network_chaos.PrepareAndInjectChaos(experimentsDetails, clients, resultDetails, eventsDetails, chaosDetails, args) } diff --git a/chaoslib/litmus/stress-chaos/helper/stress-helper.go b/chaoslib/litmus/stress-chaos/helper/stress-helper.go index 770ddc243..89fde479e 100644 --- a/chaoslib/litmus/stress-chaos/helper/stress-helper.go +++ b/chaoslib/litmus/stress-chaos/helper/stress-helper.go @@ -230,8 +230,8 @@ func prepareStressor(experimentDetails *experimentTypes.ExperimentDetails) []str strconv.Itoa(experimentDetails.ChaosDuration) + "s", } - switch experimentDetails.ExperimentName { - case "pod-cpu-hog": + switch experimentDetails.StressType { + case "pod-cpu-stress": log.InfoWithValues("[Info]: Details of Stressor:", logrus.Fields{ "CPU Core": experimentDetails.CPUcores, @@ -239,7 +239,7 @@ func prepareStressor(experimentDetails *experimentTypes.ExperimentDetails) []str }) stressArgs = append(stressArgs, "--cpu "+strconv.Itoa(experimentDetails.CPUcores)) - case "pod-memory-hog": + case "pod-memory-stress": log.InfoWithValues("[Info]: Details of Stressor:", logrus.Fields{ "Number of Workers": experimentDetails.NumberOfWorkers, @@ -515,6 +515,7 @@ func getENV(experimentDetails *experimentTypes.ExperimentDetails) { experimentDetails.NumberOfWorkers, _ = strconv.Atoi(types.Getenv("NUMBER_OF_WORKERS", "")) experimentDetails.MemoryConsumption, _ = strconv.Atoi(types.Getenv("MEMORY_CONSUMPTION", "")) experimentDetails.VolumeMountPath = types.Getenv("VOLUME_MOUNT_PATH", "") + experimentDetails.StressType = types.Getenv("STRESS_TYPE", "") } // abortWatcher continuously watch for the abort signals diff --git a/chaoslib/litmus/stress-chaos/lib/stress-chaos.go b/chaoslib/litmus/stress-chaos/lib/stress-chaos.go index d8af8f245..9bceeac10 100644 --- a/chaoslib/litmus/stress-chaos/lib/stress-chaos.go +++ b/chaoslib/litmus/stress-chaos/lib/stress-chaos.go @@ -296,6 +296,7 @@ func getPodEnv(experimentsDetails *experimentTypes.ExperimentDetails, podName st SetEnv("NUMBER_OF_WORKERS", strconv.Itoa(experimentsDetails.NumberOfWorkers)). SetEnv("MEMORY_CONSUMPTION", strconv.Itoa(experimentsDetails.MemoryConsumption)). SetEnv("VOLUME_MOUNT_PATH", experimentsDetails.VolumeMountPath). + SetEnv("STRESS_TYPE", experimentsDetails.StressType). SetEnv("INSTANCE_ID", experimentsDetails.InstanceID). SetEnvFromDownwardAPI("v1", "metadata.name") diff --git a/contribute/developer-guide/templates/experiment.tmpl b/contribute/developer-guide/templates/experiment.tmpl index 99f88b5b0..152ec3a74 100644 --- a/contribute/developer-guide/templates/experiment.tmpl +++ b/contribute/developer-guide/templates/experiment.tmpl @@ -47,7 +47,7 @@ func Experiment(clients clients.ClientSets){ log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName) if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT");err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) - failStep := "Updating the chaos result of pod-delete experiment (SOT)" + failStep := "[pre-chaos]: Failed to update the chaos result of pod-delete experiment (SOT), err: " + err.Error() result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -78,7 +78,7 @@ func Experiment(clients clients.ClientSets){ log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) - failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" + failStep := "[pre-chaos]: Failed to verify that the AUT (Application Under Test) is in running state, err: " + err.Error() types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) @@ -90,7 +90,7 @@ func Experiment(clients clients.ClientSets){ log.Info("[Status]: Verify that the Auxiliary Applications are running (pre-chaos)") if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients);err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) - failStep := "Verify that the Auxiliary Applications are running (pre-chaos)" + failStep := "[pre-chaos]: Failed to verify that the Auxiliary Applications are in running state, err: " + err.Error() result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -106,7 +106,7 @@ func Experiment(clients clients.ClientSets){ if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails);err != nil { log.Errorf("Probe Failed, err: %v", err) - failStep := "Failed while running probes" + failStep := "[pre-chaos]: Failed while running probes, err: " + err.Error() msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") @@ -129,15 +129,15 @@ func Experiment(clients clients.ClientSets){ switch experimentsDetails.ChaosLib { case "litmus": if err := litmusLIB.PrepareChaos(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { - failStep := "failed in chaos injection phase" - result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) log.Errorf("Chaos injection failed, err: %v", err) + failStep := "[chaos]: Failed inside the chaoslib, err: " + err.Error() + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } default: - failStep := "lib and container-runtime combination not supported!" + log.Error("[Invalid]: Please Provide the correct LIB") + failStep := "[chaos]: no match found for specified lib" result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) - log.Error("lib and container-runtime combination not supported, provide the correct value of lib & container-runtime") return } @@ -149,7 +149,7 @@ func Experiment(clients clients.ClientSets){ log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) - failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" + failStep := "[post-chaos]: Failed to verify that the AUT (Application Under Test) is running, err: " + err.Error() types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) @@ -161,7 +161,7 @@ func Experiment(clients clients.ClientSets){ log.Info("[Status]: Verify that the Auxiliary Applications are running (post-chaos)") if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients);err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) - failStep := "Verify that the Auxiliary Applications are running (post-chaos)" + failStep := "[post-chaos]: Failed to verify that the Auxiliary Applications are running, err: " + err.Error() result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -176,7 +176,7 @@ func Experiment(clients clients.ClientSets){ if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails);err != nil { log.Errorf("Probes Failed, err: %v", err) - failStep := "Failed while running probes" + failStep := "[post-chaos]: Failed while running probes, err: " + err.Error() msg := "AUT: Running, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") diff --git a/experiments/azure/azure-disk-loss/experiment/azure-disk-loss.go b/experiments/azure/azure-disk-loss/experiment/azure-disk-loss.go index 808341d2a..db033381a 100644 --- a/experiments/azure/azure-disk-loss/experiment/azure-disk-loss.go +++ b/experiments/azure/azure-disk-loss/experiment/azure-disk-loss.go @@ -69,6 +69,7 @@ func AzureDiskLoss(clients clients.ClientSets) { //DISPLAY THE APP INFORMATION log.InfoWithValues("The volume information is as follows", logrus.Fields{ + "Chaos Duration": experimentsDetails.ChaosDuration, "Disk Names": experimentsDetails.VirtualDiskNames, "Resource Group": experimentsDetails.ResourceGroup, "Sequence": experimentsDetails.Sequence, @@ -104,7 +105,7 @@ func AzureDiskLoss(clients clients.ClientSets) { // PRE-CHAOS VIRTUAL DISK STATUS CHECK log.Info("[Status]: Verify that the virtual disk are attached to VM instance(pre-chaos)") - if err = azureStatus.CheckVirtualDiskWithInstance(experimentsDetails); err != nil { + if err = azureStatus.CheckVirtualDiskWithInstance(experimentsDetails.SubscriptionID, experimentsDetails.VirtualDiskNames, experimentsDetails.ResourceGroup); err != nil { log.Errorf("Virtual disk status check failed, err: %v", err) failStep := "[pre-chaos]: Failed to verify that the virtual disk are attached to VM instance, err: " + err.Error() result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) @@ -118,7 +119,7 @@ func AzureDiskLoss(clients clients.ClientSets) { // run the probes in the pre-chaos check if len(resultDetails.ProbeDetails) != 0 { - if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails); err != nil { + if err = probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails); err != nil { log.Errorf("Probe Failed, err: %v", err) failStep := "[pre-chaos]: Failed while running probes, err: " + err.Error() msg := "AUT: Running, Probes: Unsuccessful" @@ -153,6 +154,15 @@ func AzureDiskLoss(clients clients.ClientSets) { log.Infof("[Confirmation]: %v chaos has been injected successfully", experimentsDetails.ExperimentName) resultDetails.Verdict = v1alpha1.ResultVerdictPassed + // POST-CHAOS VIRTUAL DISK STATUS CHECK + log.Info("[Status]: Verify that the virtual disk are attached to VM instance(post-chaos)") + if err = azureStatus.CheckVirtualDiskWithInstance(experimentsDetails.SubscriptionID, experimentsDetails.VirtualDiskNames, experimentsDetails.ResourceGroup); err != nil { + log.Errorf("Virtual disk status check failed, err: %v", err) + failStep := "[post-chaos]: Failed to verify that the virtual disk are attached to VM instance, err: " + err.Error() + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + //POST-CHAOS APPLICATION STATUS CHECK log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") if err = status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { @@ -173,15 +183,6 @@ func AzureDiskLoss(clients clients.ClientSets) { } } - // POST-CHAOS VIRTUAL DISK STATUS CHECK - log.Info("[Status]: Verify that the virtual disk are attached to VM instance(post-chaos)") - if err = azureStatus.CheckVirtualDiskWithInstance(experimentsDetails); err != nil { - log.Errorf("Virtual disk status check failed, err: %v", err) - failStep := "[post-chaos]: Failed to verify that the virtual disk are attached to VM instance, err: " + err.Error() - result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) - return - } - if experimentsDetails.EngineName != "" { // marking AUT as running, as we already checked the status of application under test msg := "AUT: Running" diff --git a/experiments/azure/azure-disk-loss/test/test.yml b/experiments/azure/azure-disk-loss/test/test.yml index ec2022207..06241148d 100644 --- a/experiments/azure/azure-disk-loss/test/test.yml +++ b/experiments/azure/azure-disk-loss/test/test.yml @@ -21,26 +21,21 @@ spec: - sleep - "3600" env: - # provide application namespace - name: APP_NAMESPACE - value: '' + value: 'default' - # provide application labels - name: APP_LABEL - value: '' - - # provide application kind + value: 'run=nginx' + - name: APP_KIND - value: '' + value: 'deployment' - name: TOTAL_CHAOS_DURATION - value: '' - - # provide auxiliary application details - namespace and labels of the applications - # sample input is - "ns1:app=percona,ns2:name=nginx" - - name: AUXILIARY_APPINFO - value: '' + value: '60' + - name: CHAOS_INTERVAL + value: '30' + ## Period to wait before injection of chaos in sec - name: RAMP_TIME value: '' @@ -48,15 +43,11 @@ spec: ## env var that describes the library used to execute the chaos ## default: litmus - name: LIB - value: '' + value: 'litmus' # provide the chaos namespace - name: CHAOS_NAMESPACE - value: '' - - # provide target container name - - name: TARGET_CONTAINER - value: '' + value: 'litmus' # provide the resouce group of the instance - name: RESOURCE_GROUP @@ -72,7 +63,7 @@ spec: # provide the sequence type for the run. Options: serial/parallel - name: SEQUENCE - value: '' + value: 'parallel' # provide the path to aks credentials mounted from secret - name: AZURE_AUTH_LOCATION @@ -88,4 +79,8 @@ spec: valueFrom: fieldRef: fieldPath: spec.serviceAccountName + + secrets: + - name: cloud-secret + mountPath: /tmp/ diff --git a/experiments/azure/instance-stop/experiment/azure-instance-stop.go b/experiments/azure/instance-stop/experiment/azure-instance-stop.go index c6ba451d2..e5006179f 100644 --- a/experiments/azure/instance-stop/experiment/azure-instance-stop.go +++ b/experiments/azure/instance-stop/experiment/azure-instance-stop.go @@ -10,12 +10,12 @@ import ( clients "github.com/litmuschaos/litmus-go/pkg/clients" azureCommon "github.com/litmuschaos/litmus-go/pkg/cloud/azure/common" azureStatus "github.com/litmuschaos/litmus-go/pkg/cloud/azure/instance" + "github.com/litmuschaos/litmus-go/pkg/status" "github.com/litmuschaos/litmus-go/pkg/events" "github.com/litmuschaos/litmus-go/pkg/log" "github.com/litmuschaos/litmus-go/pkg/probe" "github.com/litmuschaos/litmus-go/pkg/result" - "github.com/litmuschaos/litmus-go/pkg/status" "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" @@ -64,16 +64,16 @@ func AzureInstanceStop(clients clients.ClientSets) { go common.AbortWatcherWithoutExit(experimentsDetails.ExperimentName, clients, &resultDetails, &chaosDetails, &eventsDetails) //DISPLAY THE APP INFORMATION - log.InfoWithValues("The application information is as follows", logrus.Fields{ + log.InfoWithValues("The instance information is as follows", logrus.Fields{ "Chaos Duration": experimentsDetails.ChaosDuration, "Resource Group": experimentsDetails.ResourceGroup, "Instance Name": experimentsDetails.AzureInstanceName, + "Sequence": experimentsDetails.Sequence, }) //PRE-CHAOS APPLICATION STATUS CHECK log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") - err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients) - if err != nil { + if err = status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) failStep := "[pre-chaos]: Failed to verify that the AUT (Application Under Test) is in running state, err: " + err.Error() result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) @@ -83,8 +83,7 @@ func AzureInstanceStop(clients clients.ClientSets) { //PRE-CHAOS AUXILIARY APPLICATION STATUS CHECK if experimentsDetails.AuxiliaryAppInfo != "" { log.Info("[Status]: Verify that the Auxiliary Applications are running (pre-chaos)") - err = status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients) - if err != nil { + if err = status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) failStep := "[pre-chaos]: Failed to verify that the Auxiliary Applications are in running state, err: " + err.Error() result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) @@ -130,7 +129,7 @@ func AzureInstanceStop(clients clients.ClientSets) { } //Verify the azure target instance is running (pre-chaos) - if err := azureStatus.InstanceStatusCheckByName(&experimentsDetails); err != nil { + if err = azureStatus.InstanceStatusCheckByName(experimentsDetails.AzureInstanceName, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup); err != nil { log.Errorf("failed to get the azure instance status, err: %v", err) failStep := "[pre-chaos]: Failed to verify the azure instance status, err: " + err.Error() result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) @@ -158,7 +157,7 @@ func AzureInstanceStop(clients clients.ClientSets) { resultDetails.Verdict = v1alpha1.ResultVerdictPassed //Verify the azure instance is running (post chaos) - if err = azureStatus.InstanceStatusCheckByName(&experimentsDetails); err != nil { + if err = azureStatus.InstanceStatusCheckByName(experimentsDetails.AzureInstanceName, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup); err != nil { log.Errorf("failed to get the azure instance status, err: %v", err) failStep := "[pre-chaos]: Failed to update the azure instance status, err: " + err.Error() result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) @@ -168,7 +167,7 @@ func AzureInstanceStop(clients clients.ClientSets) { //POST-CHAOS APPLICATION STATUS CHECK log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") - if err = status.CheckApplicationStatus(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { + if err = status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) failStep := "[post-chaos]: Failed to verify that the AUT (Application Under Test) is running, err: " + err.Error() result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) diff --git a/experiments/azure/instance-stop/test/test.yml b/experiments/azure/instance-stop/test/test.yml index ef839e528..27961dcb1 100644 --- a/experiments/azure/instance-stop/test/test.yml +++ b/experiments/azure/instance-stop/test/test.yml @@ -21,28 +21,61 @@ spec: - sleep - "3600" env: - - name: LIB - value: 'litmus' - - name: AZURE_INSTANCE_NAME + - name: APP_NAMESPACE + value: 'default' + + - name: APP_LABEL + value: 'run=nginx' + + - name: APP_KIND + value: 'deployment' + + - name: RAMP_TIME value: '' + + - name: TOTAL_CHAOS_DURATION + value: '60' + + - name: CHAOS_INTERVAL + value: '30' + - name: LIB + value: 'litmus' + - name: CHAOS_NAMESPACE - value: 'default' + value: 'litmus' - - name: RESOURCE_GROUP + # provide the instance names (comma seperated if multiple) + - name: AZURE_INSTANCE_NAME value: '' - - - name: RAMP_TIME + + # provide the resouce group of the instance + - name: RESOURCE_GROUP value: '' + # whether the disk is attached to scale instance or not, accepted values are disable, enable + - name: SCALE_SET + value: 'disable' + + # provide the sequence type for the run. Options: serial/parallel - name: SEQUENCE value: 'parallel' - - name: CHAOS_INTERVAL - value: '30' + # provide the path to aks credentials mounted from secret + - name: AZURE_AUTH_LOCATION + value: '/tmp/azure.auth' - name: POD_NAME valueFrom: fieldRef: - fieldPath: metadata.name \ No newline at end of file + fieldPath: metadata.name + + - name: CHAOS_SERVICE_ACCOUNT + valueFrom: + fieldRef: + fieldPath: spec.serviceAccountName + + secrets: + - name: cloud-secret + mountPath: /tmp/ \ No newline at end of file diff --git a/pkg/azure/disk-loss/environment/environment.go b/pkg/azure/disk-loss/environment/environment.go index f1c0103ca..6ca36e298 100644 --- a/pkg/azure/disk-loss/environment/environment.go +++ b/pkg/azure/disk-loss/environment/environment.go @@ -14,23 +14,22 @@ func GetENV(experimentDetails *experimentTypes.ExperimentDetails) { experimentDetails.ExperimentName = types.Getenv("EXPERIMENT_NAME", "azure-disk-loss") experimentDetails.ChaosNamespace = types.Getenv("CHAOS_NAMESPACE", "litmus") experimentDetails.EngineName = types.Getenv("CHAOSENGINE", "") - experimentDetails.ChaosDuration, _ = strconv.Atoi(types.Getenv("TOTAL_CHAOS_DURATION", "30")) - experimentDetails.ChaosInterval, _ = strconv.Atoi(types.Getenv("CHAOS_INTERVAL", "30")) - experimentDetails.RampTime, _ = strconv.Atoi(types.Getenv("RAMP_TIME", "0")) - experimentDetails.ChaosLib = types.Getenv("LIB", "litmus") experimentDetails.AppNS = types.Getenv("APP_NAMESPACE", "") experimentDetails.AppLabel = types.Getenv("APP_LABEL", "") experimentDetails.AppKind = types.Getenv("APP_KIND", "") experimentDetails.AuxiliaryAppInfo = types.Getenv("AUXILIARY_APPINFO", "") + experimentDetails.TargetContainer = types.Getenv("TARGET_CONTAINER", "") + experimentDetails.ChaosDuration, _ = strconv.Atoi(types.Getenv("TOTAL_CHAOS_DURATION", "30")) + experimentDetails.ChaosInterval, _ = strconv.Atoi(types.Getenv("CHAOS_INTERVAL", "30")) + experimentDetails.RampTime, _ = strconv.Atoi(types.Getenv("RAMP_TIME", "0")) + experimentDetails.ChaosLib = types.Getenv("LIB", "litmus") experimentDetails.ChaosUID = clientTypes.UID(types.Getenv("CHAOS_UID", "")) experimentDetails.InstanceID = types.Getenv("INSTANCE_ID", "") experimentDetails.ChaosPodName = types.Getenv("POD_NAME", "") experimentDetails.Delay, _ = strconv.Atoi(types.Getenv("STATUS_CHECK_DELAY", "2")) experimentDetails.Timeout, _ = strconv.Atoi(types.Getenv("STATUS_CHECK_TIMEOUT", "180")) - experimentDetails.TargetContainer = types.Getenv("TARGET_CONTAINER", "") experimentDetails.ScaleSet = types.Getenv("SCALE_SET", "disable") experimentDetails.ResourceGroup = types.Getenv("RESOURCE_GROUP", "") experimentDetails.VirtualDiskNames = types.Getenv("VIRTUAL_DISK_NAMES", "") - experimentDetails.SubscriptionID = types.Getenv("SUBSCRIPTION_ID", "") experimentDetails.Sequence = types.Getenv("SEQUENCE", "parallel") } diff --git a/pkg/azure/disk-loss/types/types.go b/pkg/azure/disk-loss/types/types.go index fd4adecda..261dc727e 100644 --- a/pkg/azure/disk-loss/types/types.go +++ b/pkg/azure/disk-loss/types/types.go @@ -11,19 +11,19 @@ type ExperimentDetails struct { ChaosDuration int ChaosInterval int RampTime int - ChaosLib string AppNS string AppLabel string AppKind string AuxiliaryAppInfo string + ChaosLib string ChaosUID clientTypes.UID InstanceID string ChaosNamespace string ChaosPodName string + TargetContainer string Timeout int Delay int LIBImagePullPolicy string - TargetContainer string ScaleSet string ResourceGroup string SubscriptionID string diff --git a/pkg/azure/instance-stop/environment/environment.go b/pkg/azure/instance-stop/environment/environment.go index 951f3a500..20a6e87e9 100644 --- a/pkg/azure/instance-stop/environment/environment.go +++ b/pkg/azure/instance-stop/environment/environment.go @@ -18,6 +18,7 @@ func GetENV(experimentDetails *experimentTypes.ExperimentDetails) { experimentDetails.AppLabel = types.Getenv("APP_LABEL", "") experimentDetails.AppKind = types.Getenv("APP_KIND", "") experimentDetails.AuxiliaryAppInfo = types.Getenv("AUXILIARY_APPINFO", "") + experimentDetails.TargetContainer = types.Getenv("TARGET_CONTAINER", "") experimentDetails.ChaosDuration, _ = strconv.Atoi(types.Getenv("TOTAL_CHAOS_DURATION", "30")) experimentDetails.ChaosInterval, _ = strconv.Atoi(types.Getenv("CHAOS_INTERVAL", "30")) experimentDetails.RampTime, _ = strconv.Atoi(types.Getenv("RAMP_TIME", "0")) diff --git a/pkg/azure/instance-stop/types/types.go b/pkg/azure/instance-stop/types/types.go index f87c6f17e..7a3040fef 100644 --- a/pkg/azure/instance-stop/types/types.go +++ b/pkg/azure/instance-stop/types/types.go @@ -20,6 +20,7 @@ type ExperimentDetails struct { InstanceID string ChaosNamespace string ChaosPodName string + TargetContainer string Timeout int Delay int AzureInstanceName string diff --git a/pkg/cloud/azure/disk/disk-status.go b/pkg/cloud/azure/disk/disk-status.go index 99d26dff6..cea44ce2c 100644 --- a/pkg/cloud/azure/disk/disk-status.go +++ b/pkg/cloud/azure/disk/disk-status.go @@ -10,7 +10,6 @@ import ( "github.com/Azure/go-autorest/autorest/azure/auth" "github.com/litmuschaos/litmus-go/pkg/cloud/azure/common" - experimentTypes "github.com/litmuschaos/litmus-go/pkg/azure/disk-loss/types" "github.com/pkg/errors" ) @@ -80,10 +79,10 @@ func GetDiskStatus(subscriptionID, resourceGroup, diskName string) (compute.Disk } // CheckVirtualDiskWithInstance checks whether the given list of disk are attached to the provided VM instance -func CheckVirtualDiskWithInstance(experimentsDetails experimentTypes.ExperimentDetails) error { +func CheckVirtualDiskWithInstance(subscriptionID, virtualDiskNames, resourceGroup string) error { // Setup and authorize disk client - diskClient := compute.NewDisksClient(experimentsDetails.SubscriptionID) + diskClient := compute.NewDisksClient(subscriptionID) authorizer, err := auth.NewAuthorizerFromFile(azure.PublicCloud.ResourceManagerEndpoint) if err != nil { @@ -92,10 +91,10 @@ func CheckVirtualDiskWithInstance(experimentsDetails experimentTypes.ExperimentD diskClient.Authorizer = authorizer // Creating an array of the name of the attached disks - diskNameList := strings.Split(experimentsDetails.VirtualDiskNames, ",") + diskNameList := strings.Split(virtualDiskNames, ",") for _, diskName := range diskNameList { - disk, err := diskClient.Get(context.Background(), experimentsDetails.ResourceGroup, diskName) + disk, err := diskClient.Get(context.Background(), resourceGroup, diskName) if err != nil { return errors.Errorf("failed to get disk: %v, err: %v", diskName, err) } diff --git a/pkg/cloud/azure/instance/instance-status.go b/pkg/cloud/azure/instance/instance-status.go index 16a849342..b3039ddb1 100644 --- a/pkg/cloud/azure/instance/instance-status.go +++ b/pkg/cloud/azure/instance/instance-status.go @@ -2,15 +2,11 @@ package azure import ( "context" - "encoding/json" - "io/ioutil" - "os" "strings" "github.com/Azure/azure-sdk-for-go/profiles/latest/compute/mgmt/compute" "github.com/Azure/go-autorest/autorest/azure" "github.com/Azure/go-autorest/autorest/azure/auth" - experimentTypes "github.com/litmuschaos/litmus-go/pkg/azure/instance-stop/types" "github.com/litmuschaos/litmus-go/pkg/cloud/azure/common" "github.com/litmuschaos/litmus-go/pkg/log" @@ -73,45 +69,18 @@ func GetAzureScaleSetInstanceStatus(subscriptionID, resourceGroup, virtualMachin return *(*instanceDetails.Statuses)[1].DisplayStatus, nil } -// SetupSubscriptionID fetch the subscription id from the auth file and export it in experiment struct variable -func SetupSubscriptionID(experimentsDetails *experimentTypes.ExperimentDetails) error { - - var err error - authFile, err := os.Open(os.Getenv("AZURE_AUTH_LOCATION")) - if err != nil { - return errors.Errorf("fail to open auth file, err: %v", err) - } - - authFileContent, err := ioutil.ReadAll(authFile) - if err != nil { - return errors.Errorf("fail to read auth file, err: %v", err) - } - - details := make(map[string]string) - if err := json.Unmarshal(authFileContent, &details); err != nil { - return errors.Errorf("fail to unmarshal file, err: %v", err) - } - - if id, contains := details["subscriptionId"]; contains { - experimentsDetails.SubscriptionID = id - } else { - return errors.Errorf("The auth file does not have a subscriptionId field") - } - return nil -} - // InstanceStatusCheckByName is used to check the instance status of all the instance under chaos -func InstanceStatusCheckByName(experimentsDetails *experimentTypes.ExperimentDetails) error { - instanceNameList := strings.Split(experimentsDetails.AzureInstanceName, ",") +func InstanceStatusCheckByName(azureInstanceNames, scaleSet, subscriptionID, resourceGroup string) error { + instanceNameList := strings.Split(azureInstanceNames, ",") if len(instanceNameList) == 0 { return errors.Errorf("no instance found to check the status") } log.Infof("[Info]: The instance under chaos(IUC) are: %v", instanceNameList) - switch experimentsDetails.ScaleSet { + switch scaleSet { case "enable": - return ScaleSetInstanceStatusCheck(instanceNameList, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup) + return ScaleSetInstanceStatusCheck(instanceNameList, subscriptionID, resourceGroup) default: - return InstanceStatusCheck(instanceNameList, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup) + return InstanceStatusCheck(instanceNameList, subscriptionID, resourceGroup) } } diff --git a/pkg/generic/network-chaos/environment/environment.go b/pkg/generic/network-chaos/environment/environment.go index 28044ea21..dcc3943f3 100644 --- a/pkg/generic/network-chaos/environment/environment.go +++ b/pkg/generic/network-chaos/environment/environment.go @@ -28,6 +28,7 @@ func GetENV(experimentDetails *experimentTypes.ExperimentDetails) { experimentDetails.NetworkLatency, _ = strconv.Atoi(types.Getenv("NETWORK_LATENCY", "60000")) experimentDetails.NetworkPacketLossPercentage, _ = strconv.Atoi(types.Getenv("NETWORK_PACKET_LOSS_PERCENTAGE", "100")) experimentDetails.NetworkPacketCorruptionPercentage, _ = strconv.Atoi(types.Getenv("NETWORK_PACKET_CORRUPTION_PERCENTAGE", "100")) + experimentDetails.Jitter, _ = strconv.Atoi(types.Getenv("JITTER", "0")) experimentDetails.NetworkInterface = types.Getenv("NETWORK_INTERFACE", "eth0") experimentDetails.TargetContainer = types.Getenv("TARGET_CONTAINER", "") experimentDetails.TCImage = types.Getenv("TC_IMAGE", "gaiadocker/iproute2") diff --git a/pkg/generic/network-chaos/types/types.go b/pkg/generic/network-chaos/types/types.go index 2a6a0fff7..c3e5a7d31 100644 --- a/pkg/generic/network-chaos/types/types.go +++ b/pkg/generic/network-chaos/types/types.go @@ -39,4 +39,5 @@ type ExperimentDetails struct { SocketPath string Sequence string TerminationGracePeriodSeconds int + Jitter int } diff --git a/pkg/generic/stress-chaos/environment/environment.go b/pkg/generic/stress-chaos/environment/environment.go index 56c5bd764..5165f4833 100644 --- a/pkg/generic/stress-chaos/environment/environment.go +++ b/pkg/generic/stress-chaos/environment/environment.go @@ -39,10 +39,12 @@ func GetENV(experimentDetails *experimentTypes.ExperimentDetails, expName string switch expName { case "pod-cpu-hog": experimentDetails.CPUcores, _ = strconv.Atoi(types.Getenv("CPU_CORES", "1")) + experimentDetails.StressType = "pod-cpu-stress" case "pod-memory-hog": experimentDetails.MemoryConsumption, _ = strconv.Atoi(types.Getenv("MEMORY_CONSUMPTION", "500")) experimentDetails.NumberOfWorkers, _ = strconv.Atoi(types.Getenv("NUMBER_OF_WORKERS", "4")) + experimentDetails.StressType = "pod-memory-stress" case "pod-io-stress": experimentDetails.FilesystemUtilizationPercentage, _ = strconv.Atoi(types.Getenv("FILESYSTEM_UTILIZATION_PERCENTAGE", "")) @@ -50,5 +52,6 @@ func GetENV(experimentDetails *experimentTypes.ExperimentDetails, expName string experimentDetails.NumberOfWorkers, _ = strconv.Atoi(types.Getenv("NUMBER_OF_WORKERS", "4")) experimentDetails.VolumeMountPath = types.Getenv("VOLUME_MOUNT_PATH", "") experimentDetails.CPUcores, _ = strconv.Atoi(types.Getenv("CPU_CORES", "0")) + experimentDetails.StressType = "pod-io-stress" } } diff --git a/pkg/generic/stress-chaos/types/types.go b/pkg/generic/stress-chaos/types/types.go index b0fd0f04f..6a65003cc 100644 --- a/pkg/generic/stress-chaos/types/types.go +++ b/pkg/generic/stress-chaos/types/types.go @@ -38,4 +38,5 @@ type ExperimentDetails struct { NumberOfWorkers int MemoryConsumption int VolumeMountPath string + StressType string }