diff --git a/chaoslib/litmus/container_kill/container-kill.go b/chaoslib/litmus/container_kill/container-kill.go index 739198b2d..925a6bd51 100644 --- a/chaoslib/litmus/container_kill/container-kill.go +++ b/chaoslib/litmus/container_kill/container-kill.go @@ -85,7 +85,7 @@ func PrepareContainerKill(experimentsDetails *experimentTypes.ExperimentDetails, // Wait till the completion of the helper pod // set an upper limit for the waiting time log.Info("[Wait]: waiting till the completion of the helper pod") - podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, "name=container-kill-"+experimentsDetails.RunID, clients, experimentsDetails.ChaosDuration+experimentsDetails.ChaosInterval+60) + podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, "name=container-kill-"+experimentsDetails.RunID, clients, experimentsDetails.ChaosDuration+experimentsDetails.ChaosInterval+60, "container-kill") if err != nil || podStatus == "Failed" { return errors.Errorf("helper pod failed due to, err: %v", err) } diff --git a/chaoslib/litmus/kubelet_service_kill/kubelet-service-kill.go b/chaoslib/litmus/kubelet_service_kill/kubelet-service-kill.go index 469f7e93a..49ac1e7d6 100644 --- a/chaoslib/litmus/kubelet_service_kill/kubelet-service-kill.go +++ b/chaoslib/litmus/kubelet_service_kill/kubelet-service-kill.go @@ -72,7 +72,7 @@ func PrepareKubeletKill(experimentsDetails *experimentTypes.ExperimentDetails, c // Wait till the completion of helper pod log.Infof("[Wait]: Waiting for %vs till the completion of the helper pod", strconv.Itoa(experimentsDetails.ChaosDuration+30)) - podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, "name=kubelet-service-kill-"+experimentsDetails.RunID, clients, experimentsDetails.ChaosDuration+30) + podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, "name=kubelet-service-kill-"+experimentsDetails.RunID, clients, experimentsDetails.ChaosDuration+30, "kubelet-service-kill") if err != nil || podStatus == "Failed" { return errors.Errorf("helper pod failed due to, err: %v", err) } diff --git a/chaoslib/litmus/node_cpu_hog/node-cpu-hog.go b/chaoslib/litmus/node_cpu_hog/node-cpu-hog.go index e0b6eb7a2..6af8ed616 100644 --- a/chaoslib/litmus/node_cpu_hog/node-cpu-hog.go +++ b/chaoslib/litmus/node_cpu_hog/node-cpu-hog.go @@ -72,7 +72,7 @@ func PrepareNodeCPUHog(experimentsDetails *experimentTypes.ExperimentDetails, cl // Wait till the completion of helper pod log.Infof("[Wait]: Waiting for %vs till the completion of the helper pod", strconv.Itoa(experimentsDetails.ChaosDuration+30)) - podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, "name=node-cpu-hog-"+experimentsDetails.RunID, clients, experimentsDetails.ChaosDuration+30) + podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, "name=node-cpu-hog-"+experimentsDetails.RunID, clients, experimentsDetails.ChaosDuration+30, "node-cpu-hog") if err != nil || podStatus == "Failed" { return errors.Errorf("helper pod failed due to, err: %v", err) } diff --git a/chaoslib/litmus/node_memory_hog/node-memory-hog.go b/chaoslib/litmus/node_memory_hog/node-memory-hog.go index 587d151da..2cdfc869c 100644 --- a/chaoslib/litmus/node_memory_hog/node-memory-hog.go +++ b/chaoslib/litmus/node_memory_hog/node-memory-hog.go @@ -62,7 +62,7 @@ func PrepareNodeMemoryHog(experimentsDetails *experimentTypes.ExperimentDetails, // Wait till the completion of helper pod log.Infof("[Wait]: Waiting for %vs till the completion of the helper pod", strconv.Itoa(experimentsDetails.ChaosDuration+30)) - podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, "name=node-memory-hog-"+experimentsDetails.RunID, clients, experimentsDetails.ChaosDuration+30) + podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, "name=node-memory-hog-"+experimentsDetails.RunID, clients, experimentsDetails.ChaosDuration+30, "node-memory-hog") if err != nil || podStatus == "Failed" { return errors.Errorf("helper pod failed due to, err: %v", err) } diff --git a/chaoslib/litmus/pod_delete/pod-delete.go b/chaoslib/litmus/pod_delete/pod-delete.go index 3f206d46f..12f3c9470 100644 --- a/chaoslib/litmus/pod_delete/pod-delete.go +++ b/chaoslib/litmus/pod_delete/pod-delete.go @@ -54,7 +54,7 @@ func PreparePodDelete(experimentsDetails *experimentTypes.ExperimentDetails, cli // Wait till the completion of helper pod log.Info("[Wait]: waiting till the completion of the helper pod") - podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, "name=pod-delete"+runID, clients, experimentsDetails.ChaosDuration+experimentsDetails.ChaosInterval+60) + podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, "name=pod-delete"+runID, clients, experimentsDetails.ChaosDuration+experimentsDetails.ChaosInterval+60, "pod-delete") if err != nil || podStatus == "Failed" { return errors.Errorf("helper pod failed due to, err: %v", err) } diff --git a/chaoslib/pumba/network_chaos/network-chaos.go b/chaoslib/pumba/network_chaos/network-chaos.go index 415e02d2e..0199f72e5 100644 --- a/chaoslib/pumba/network_chaos/network-chaos.go +++ b/chaoslib/pumba/network_chaos/network-chaos.go @@ -73,7 +73,7 @@ func PreparePodNetworkChaos(experimentsDetails *experimentTypes.ExperimentDetail // Wait till the completion of helper pod log.Infof("[Wait]: Waiting for %vs till the completion of the helper pod", strconv.Itoa(experimentsDetails.ChaosDuration)) - podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, "name=pumba-netem-"+experimentsDetails.RunID, clients, experimentsDetails.ChaosDuration+30) + podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, "name=pumba-netem-"+experimentsDetails.RunID, clients, experimentsDetails.ChaosDuration+30, "pumba") if err != nil || podStatus == "Failed" { return errors.Errorf("helper pod failed due to, err: %v", err) } @@ -171,7 +171,7 @@ func CreateHelperPod(experimentsDetails *experimentTypes.ExperimentDetails, clie Args: []string{ "netem", "--tc-image", - "gaiadocker/iproute2", + experimentsDetails.TCImage, "--interface", experimentsDetails.NetworkInterface, "--duration", diff --git a/experiments/generic/pod-network-corruption/pod-network-corruption-k8s-job.yml b/experiments/generic/pod-network-corruption/pod-network-corruption-k8s-job.yml index 7b0c6fe91..d4970e029 100644 --- a/experiments/generic/pod-network-corruption/pod-network-corruption-k8s-job.yml +++ b/experiments/generic/pod-network-corruption/pod-network-corruption-k8s-job.yml @@ -37,6 +37,9 @@ spec: - name: NETWORK_INTERFACE value: 'eth0' + - name: TC_IMAGE + value: 'gaiadocker/iproute2' + - name: NETWORK_PACKET_CORRUPTION_PERCENTAGE value: '100' # in percentage diff --git a/experiments/generic/pod-network-duplication/pod-network-duplication-k8s-job.yml b/experiments/generic/pod-network-duplication/pod-network-duplication-k8s-job.yml index 57e6cea84..89a2b9edc 100644 --- a/experiments/generic/pod-network-duplication/pod-network-duplication-k8s-job.yml +++ b/experiments/generic/pod-network-duplication/pod-network-duplication-k8s-job.yml @@ -37,6 +37,9 @@ spec: - name: NETWORK_INTERFACE value: 'eth0' + - name: TC_IMAGE + value: 'gaiadocker/iproute2' + - name: NETWORK_PACKET_DUPLICATION_PERCENTAGE value: '100' # in percentage diff --git a/experiments/generic/pod-network-latency/pod-network-latency-k8s-job.yml b/experiments/generic/pod-network-latency/pod-network-latency-k8s-job.yml index 0f706b2aa..17524da97 100644 --- a/experiments/generic/pod-network-latency/pod-network-latency-k8s-job.yml +++ b/experiments/generic/pod-network-latency/pod-network-latency-k8s-job.yml @@ -37,6 +37,9 @@ spec: - name: NETWORK_INTERFACE value: 'eth0' + - name: TC_IMAGE + value: 'gaiadocker/iproute2' + - name: NETWORK_LATENCY value: '60000' # in ms diff --git a/experiments/generic/pod-network-loss/pod-network-loss-k8s-job.yml b/experiments/generic/pod-network-loss/pod-network-loss-k8s-job.yml index e9748f0c3..a168c1865 100644 --- a/experiments/generic/pod-network-loss/pod-network-loss-k8s-job.yml +++ b/experiments/generic/pod-network-loss/pod-network-loss-k8s-job.yml @@ -37,6 +37,9 @@ spec: - name: NETWORK_INTERFACE value: 'eth0' + - name: TC_IMAGE + value: 'gaiadocker/iproute2' + - name: NETWORK_PACKET_LOSS_PERCENTAGE value: '100' # in percentage diff --git a/pkg/generic/network-chaos/environment/environment.go b/pkg/generic/network-chaos/environment/environment.go index f2fb9026f..9108f9d57 100644 --- a/pkg/generic/network-chaos/environment/environment.go +++ b/pkg/generic/network-chaos/environment/environment.go @@ -31,6 +31,7 @@ func GetENV(experimentDetails *experimentTypes.ExperimentDetails, expName string experimentDetails.NetworkPacketCorruptionPercentage, _ = strconv.Atoi(Getenv("NETWORK_PACKET_CORRUPTION_PERCENTAGE", "100")) experimentDetails.NetworkInterface = Getenv("NETWORK_INTERFACE", "eth0") experimentDetails.TargetContainer = Getenv("TARGET_CONTAINER", "") + experimentDetails.TCImage = Getenv("TC_IMAGE", "gaiadocker/iproute2") } // Getenv fetch the env and set the default value, if any diff --git a/pkg/generic/network-chaos/types/types.go b/pkg/generic/network-chaos/types/types.go index b85f166f7..4224962e4 100644 --- a/pkg/generic/network-chaos/types/types.go +++ b/pkg/generic/network-chaos/types/types.go @@ -26,4 +26,5 @@ type ExperimentDetails struct { NetworkLatency int NetworkPacketLossPercentage int NetworkPacketCorruptionPercentage int + TCImage string } diff --git a/pkg/result/chaosresult.go b/pkg/result/chaosresult.go index eaa49e256..a5df9d2fb 100644 --- a/pkg/result/chaosresult.go +++ b/pkg/result/chaosresult.go @@ -1,38 +1,57 @@ package result import ( + "time" + "github.com/litmuschaos/chaos-operator/pkg/apis/litmuschaos/v1alpha1" clients "github.com/litmuschaos/litmus-go/pkg/clients" "github.com/litmuschaos/litmus-go/pkg/types" + "github.com/openebs/maya/pkg/util/retry" + "github.com/pkg/errors" + k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) //ChaosResult Create and Update the chaos result func ChaosResult(chaosDetails *types.ChaosDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, state string) error { - result, _ := clients.LitmusClient.ChaosResults(chaosDetails.ChaosNamespace).Get(resultDetails.Name, metav1.GetOptions{}) - - if state == "SOT" { - - if result.Name == resultDetails.Name { - err := PatchChaosResult(result, clients, chaosDetails, resultDetails) - if err != nil { - return err - } - } else { - err := InitializeChaosResult(chaosDetails, clients, resultDetails) + // It will list all the chaos-result with matching label + // it will retries until it got chaos result list or met the timeout(3 mins) + // Note: We have added labels inside chaos result and looking for matching labels to list the chaos-result + var resultList *v1alpha1.ChaosResultList + err := retry. + Times(90). + Wait(2 * time.Second). + Try(func(attempt uint) error { + result, err := clients.LitmusClient.ChaosResults(chaosDetails.ChaosNamespace).List(metav1.ListOptions{LabelSelector: "name=" + resultDetails.Name}) if err != nil { - return err + return errors.Errorf("Unable to list the chaosresult, err: %v", err) } + resultList = result + return nil + }) + + if err != nil { + return err + } + + // if there is no chaos-result with given label, it will create a new chaos-result + if len(resultList.Items) == 0 { + return InitializeChaosResult(chaosDetails, clients, resultDetails) + } + + for _, result := range resultList.Items { + + // the chaos-result is already present with matching labels + // it will patch the new parameters in the same chaos-result + if state == "SOT" { + return PatchChaosResult(&result, clients, chaosDetails, resultDetails) } - } else { + + // it will patch the chaos-result in the end of experiment resultDetails.Phase = "Completed" - err := PatchChaosResult(result, clients, chaosDetails, resultDetails) - if err != nil { - return err - } + return PatchChaosResult(&result, clients, chaosDetails, resultDetails) } - return nil } @@ -43,6 +62,9 @@ func InitializeChaosResult(chaosDetails *types.ChaosDetails, clients clients.Cli ObjectMeta: metav1.ObjectMeta{ Name: resultDetails.Name, Namespace: chaosDetails.ChaosNamespace, + Labels: map[string]string{ + "name": resultDetails.Name, + }, }, Spec: v1alpha1.ChaosResultSpec{ EngineName: chaosDetails.EngineName, @@ -56,8 +78,33 @@ func InitializeChaosResult(chaosDetails *types.ChaosDetails, clients clients.Cli }, }, } + + // It will create a new chaos-result CR _, err := clients.LitmusClient.ChaosResults(chaosDetails.ChaosNamespace).Create(chaosResult) - return err + + // if the chaos result is already present, it will patch the new parameters with the existing chaos result CR + // Note: We have added labels inside chaos result and looking for matching labels to list the chaos-result + // these labels were not present inside earlier releases so giving a retry/update if someone have a exiting result CR + // in his cluster, which was created earlier with older release/version of litmus. + // it will override the params and add the labels to it so that it will work as desired. + if k8serrors.IsAlreadyExists(err) { + chaosResult, err = clients.LitmusClient.ChaosResults(chaosDetails.ChaosNamespace).Get(resultDetails.Name, metav1.GetOptions{}) + if err != nil { + return errors.Errorf("Unable to get the chaosresult, err: %v", err) + } + // adding the labels to the chaosresullt + chaosResult.ObjectMeta.Labels = map[string]string{ + "name": resultDetails.Name, + } + // updating the chaosresult with new values + err = PatchChaosResult(chaosResult, clients, chaosDetails, resultDetails) + if err != nil { + return err + } + + } + + return nil } //PatchChaosResult Update the chaos result @@ -68,7 +115,18 @@ func PatchChaosResult(result *v1alpha1.ChaosResult, clients clients.ClientSets, result.Spec.InstanceID = chaosDetails.InstanceID result.Status.ExperimentStatus.FailStep = resultDetails.FailStep - _, err := clients.LitmusClient.ChaosResults(result.Namespace).Update(result) + // It will update the existing chaos-result CR with new values + // it will retries until it will able to update successfully or met the timeout(3 mins) + err := retry. + Times(90). + Wait(2 * time.Second). + Try(func(attempt uint) error { + _, err := clients.LitmusClient.ChaosResults(result.Namespace).Update(result) + if err != nil { + return errors.Errorf("Unable to update the chaosresult, err: %v", err) + } + return nil + }) return err } diff --git a/pkg/status/application.go b/pkg/status/application.go index 584d508f8..42ac8ec98 100644 --- a/pkg/status/application.go +++ b/pkg/status/application.go @@ -101,9 +101,10 @@ func CheckContainerStatus(appNs string, appLabel string, clients clients.ClientS } // WaitForCompletion wait until the completion of pod -func WaitForCompletion(appNs string, appLabel string, clients clients.ClientSets, duration int) (string, error) { +func WaitForCompletion(appNs string, appLabel string, clients clients.ClientSets, duration int, containerName string) (string, error) { var podStatus string - + // It will wait till the completion of target container + // it will retries until the target container completed or met the timeout(chaos duration) err := retry. Times(uint(duration)). Wait(1 * time.Second). @@ -113,11 +114,20 @@ func WaitForCompletion(appNs string, appLabel string, clients clients.ClientSets return errors.Errorf("Unable to get the pod, err: %v", err) } err = nil + // it will check for the status of helper pod, if it is Succeeded and target container is completed then it will marked it as completed and return + // if it is still running then it will check for the target container, as we can have multiple container inside helper pod (istio) + // if the target container is in completed state(ready flag is false), then we will marked the helper pod as completed + // we will retry till it met the timeout(chaos duration) for _, pod := range podSpec.Items { podStatus = string(pod.Status.Phase) log.Infof("helper pod status: %v", podStatus) if podStatus != "Succeeded" && podStatus != "Failed" { - return errors.Errorf("Helper pod is not yet completed yet") + for _, container := range pod.Status.ContainerStatuses { + + if container.Name == containerName && container.Ready { + return errors.Errorf("Container is not completed yet") + } + } } log.InfoWithValues("The running status of Pods are as follows", logrus.Fields{ "Pod": pod.Name, "Status": podStatus})