diff --git a/bin/experiment/experiment.go b/bin/experiment/experiment.go index ef01e1f2f..5c7bee086 100755 --- a/bin/experiment/experiment.go +++ b/bin/experiment/experiment.go @@ -4,6 +4,7 @@ import ( "context" "errors" "flag" + "fmt" "os" // Uncomment to load all auth plugins @@ -68,6 +69,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/sirupsen/logrus" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) func init() { @@ -106,6 +108,8 @@ func main() { //Getting kubeConfig and Generate ClientSets if err := clients.GenerateClientSetFromKubeConfig(); err != nil { log.Errorf("Unable to Get the kubeconfig, err: %v", err) + span.SetStatus(codes.Error, "Unable to Get the kubeconfig") + span.RecordError(err) return } @@ -211,6 +215,7 @@ func main() { k6Loadgen.Experiment(ctx, clients) default: log.Errorf("Unsupported -name %v, please provide the correct value of -name args", *experimentName) + span.SetStatus(codes.Error, fmt.Sprintf("Unsupported -name %v", *experimentName)) return } } diff --git a/chaoslib/litmus/pod-delete/lib/pod-delete.go b/chaoslib/litmus/pod-delete/lib/pod-delete.go index aa4fec6e8..39a0f9c35 100644 --- a/chaoslib/litmus/pod-delete/lib/pod-delete.go +++ b/chaoslib/litmus/pod-delete/lib/pod-delete.go @@ -21,6 +21,7 @@ import ( "github.com/palantir/stacktrace" "github.com/sirupsen/logrus" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -46,14 +47,22 @@ func PreparePodDelete(ctx context.Context, experimentsDetails *experimentTypes.E switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err := injectChaosInSerialMode(ctx, experimentsDetails, clients, chaosDetails, eventsDetails, resultDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err := injectChaosInParallelMode(ctx, experimentsDetails, clients, chaosDetails, eventsDetails, resultDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + errReason := fmt.Sprintf("sequence '%s' is not supported", experimentsDetails.Sequence) + span.SetStatus(codes.Error, errReason) + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: errReason} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection @@ -72,6 +81,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "could not run the probes during chaos") + span.RecordError(err) return err } } @@ -85,11 +96,16 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // Get the target pod details for the chaos execution // if the target pod is not defined it will derive the random target pod list using pod affected percentage if experimentsDetails.TargetPods == "" && chaosDetails.AppDetail == nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "provide one of the appLabel or TARGET_PODS"} + span.SetStatus(codes.Error, "provide one of the appLabel or TARGET_PODS") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "provide one of the appLabel or TARGET_PODS"} + span.RecordError(err) + return err } targetPodList, err := common.GetTargetPods(experimentsDetails.NodeLabel, experimentsDetails.TargetPods, experimentsDetails.PodsAffectedPerc, clients, chaosDetails) if err != nil { + span.SetStatus(codes.Error, "could not get target pods") + span.RecordError(err) return stacktrace.Propagate(err, "could not get target pods") } @@ -97,6 +113,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment for _, pod := range targetPodList.Items { kind, parentName, err := workloads.GetPodOwnerTypeAndName(&pod, clients.DynamicClient) if err != nil { + span.SetStatus(codes.Error, "could not get pod owner name and kind") + span.RecordError(err) return stacktrace.Propagate(err, "could not get pod owner name and kind") } common.SetParentName(parentName, kind, pod.Namespace, chaosDetails) @@ -123,12 +141,16 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment err = clients.KubeClient.CoreV1().Pods(pod.Namespace).Delete(context.Background(), pod.Name, v1.DeleteOptions{}) } if err != nil { + span.SetStatus(codes.Error, "could not delete the target pod") + span.RecordError(err) return cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Target: fmt.Sprintf("{podName: %s, namespace: %s}", pod.Name, pod.Namespace), Reason: fmt.Sprintf("failed to delete the target pod: %s", err.Error())} } switch chaosDetails.Randomness { case true: if err := common.RandomInterval(experimentsDetails.ChaosInterval); err != nil { + span.SetStatus(codes.Error, "could not get random chaos interval") + span.RecordError(err) return stacktrace.Propagate(err, "could not get random chaos interval") } default: @@ -149,6 +171,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment Namespace: parent.Namespace, } if err = status.CheckUnTerminatedPodStatusesByWorkloadName(target, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not check pod statuses by workload names") + span.RecordError(err) return stacktrace.Propagate(err, "could not check pod statuses by workload names") } } @@ -184,10 +208,15 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // Get the target pod details for the chaos execution // if the target pod is not defined it will derive the random target pod list using pod affected percentage if experimentsDetails.TargetPods == "" && chaosDetails.AppDetail == nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "please provide one of the appLabel or TARGET_PODS"} + span.SetStatus(codes.Error, "please provide one of the appLabel or TARGET_PODS") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "please provide one of the appLabel or TARGET_PODS"} + span.RecordError(err) + return err } targetPodList, err := common.GetTargetPods(experimentsDetails.NodeLabel, experimentsDetails.TargetPods, experimentsDetails.PodsAffectedPerc, clients, chaosDetails) if err != nil { + span.SetStatus(codes.Error, "could not get target pods") + span.RecordError(err) return stacktrace.Propagate(err, "could not get target pods") } @@ -195,6 +224,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime for _, pod := range targetPodList.Items { kind, parentName, err := workloads.GetPodOwnerTypeAndName(&pod, clients.DynamicClient) if err != nil { + span.SetStatus(codes.Error, "could not get pod owner name and kind") + span.RecordError(err) return stacktrace.Propagate(err, "could not get pod owner name and kind") } common.SetParentName(parentName, kind, pod.Namespace, chaosDetails) @@ -221,6 +252,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime err = clients.KubeClient.CoreV1().Pods(pod.Namespace).Delete(context.Background(), pod.Name, v1.DeleteOptions{}) } if err != nil { + span.SetStatus(codes.Error, "could not delete the target pod") + span.RecordError(err) return cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Target: fmt.Sprintf("{podName: %s, namespace: %s}", pod.Name, pod.Namespace), Reason: fmt.Sprintf("failed to delete the target pod: %s", err.Error())} } } @@ -228,6 +261,7 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime switch chaosDetails.Randomness { case true: if err := common.RandomInterval(experimentsDetails.ChaosInterval); err != nil { + span.SetStatus(codes.Error, "could not get random chaos interval") return stacktrace.Propagate(err, "could not get random chaos interval") } default: @@ -248,6 +282,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime Namespace: parent.Namespace, } if err = status.CheckUnTerminatedPodStatusesByWorkloadName(target, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not check pod statuses by workload names") + span.RecordError(err) return stacktrace.Propagate(err, "could not check pod statuses by workload names") } } diff --git a/experiments/generic/pod-delete/experiment/pod-delete.go b/experiments/generic/pod-delete/experiment/pod-delete.go index 0fb445f15..5caa4cf94 100644 --- a/experiments/generic/pod-delete/experiment/pod-delete.go +++ b/experiments/generic/pod-delete/experiment/pod-delete.go @@ -17,10 +17,14 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodDelete inject the pod-delete chaos func PodDelete(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) + experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} eventsDetails := types.EventDetails{} @@ -40,6 +44,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -49,6 +55,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to create the chaosresult, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -56,6 +64,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { if err := result.SetResultUID(&resultDetails, clients, &chaosDetails); err != nil { log.Errorf("Unable to set the result uid, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to set the result uid") + span.RecordError(err) return } @@ -85,6 +95,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { log.Errorf("failed to create %v event inside chaosengine", types.PreChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -104,6 +116,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { log.Errorf("failed to create %v event inside chaosengine", types.PreChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -117,6 +131,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PreparePodDelete(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -132,6 +148,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -150,6 +168,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { log.Errorf("failed to create %v event inside chaosengine", types.PostChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -165,6 +185,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to update the chaosresult, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to update the chaosresult") + span.RecordError(err) return } diff --git a/pkg/probe/probe.go b/pkg/probe/probe.go index fe6e1a271..1237beb79 100644 --- a/pkg/probe/probe.go +++ b/pkg/probe/probe.go @@ -18,6 +18,7 @@ import ( "github.com/palantir/stacktrace" "github.com/sirupsen/logrus" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -32,6 +33,8 @@ func RunProbes(ctx context.Context, chaosDetails *types.ChaosDetails, clients cl // get the probes details from the chaosengine probes, err := getProbesFromChaosEngine(chaosDetails, clients) if err != nil { + span.SetStatus(codes.Error, "getProbesFromChaosEngine failed") + span.RecordError(err) return err } @@ -42,6 +45,8 @@ func RunProbes(ctx context.Context, chaosDetails *types.ChaosDetails, clients cl switch strings.ToLower(probe.Mode) { case "sot", "edge", "continuous": if err := execute(probe, chaosDetails, clients, resultDetails, phase); err != nil { + span.SetStatus(codes.Error, fmt.Sprintf("%s mode %s probe execute failed", probe.Mode, probe.Name)) + span.RecordError(err) return err } } @@ -51,6 +56,8 @@ func RunProbes(ctx context.Context, chaosDetails *types.ChaosDetails, clients cl for _, probe := range probes { if strings.ToLower(probe.Mode) == "onchaos" { if err := execute(probe, chaosDetails, clients, resultDetails, phase); err != nil { + span.SetStatus(codes.Error, fmt.Sprintf("%s mode %s probe execute failed", probe.Mode, probe.Name)) + span.RecordError(err) return err } } @@ -72,13 +79,19 @@ func RunProbes(ctx context.Context, chaosDetails *types.ChaosDetails, clients cl } } if len(probeError) != 0 { - return cerrors.PreserveError{ErrString: fmt.Sprintf("[%s]", strings.Join(probeError, ","))} + errString := fmt.Sprintf("[%s]", strings.Join(probeError, ",")) + span.SetStatus(codes.Error, errString) + err := cerrors.PreserveError{ErrString: errString} + span.RecordError(err) + return err } // executes the eot and edge modes for _, probe := range probes { switch strings.ToLower(probe.Mode) { case "eot", "edge": if err := execute(probe, chaosDetails, clients, resultDetails, phase); err != nil { + span.SetStatus(codes.Error, fmt.Sprintf("%s mode %s probe execute failed", probe.Mode, probe.Name)) + span.RecordError(err) return err } }