diff --git a/.github/workflows/dependency-review.yaml b/.github/workflows/dependency-review.yaml index 678381339..7b962f441 100644 --- a/.github/workflows/dependency-review.yaml +++ b/.github/workflows/dependency-review.yaml @@ -10,7 +10,7 @@ permissions: jobs: dependency-review: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - name: 'Checkout Repository' uses: actions/checkout@v4 diff --git a/.github/workflows/ramen.yaml b/.github/workflows/ramen.yaml index c2f464b94..0a93163be 100644 --- a/.github/workflows/ramen.yaml +++ b/.github/workflows/ramen.yaml @@ -32,7 +32,7 @@ defaults: jobs: lint: name: Linters - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 steps: - name: Checkout source uses: actions/checkout@v4 @@ -67,7 +67,7 @@ jobs: golangci: name: Golangci Lint - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 strategy: matrix: directory: [., api, e2e] @@ -98,7 +98,7 @@ jobs: unit-test: name: Unit tests - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 steps: - name: Checkout source uses: actions/checkout@v4 @@ -113,7 +113,7 @@ jobs: build-image: name: Build image - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 steps: - name: Checkout source uses: actions/checkout@v4 @@ -139,7 +139,7 @@ jobs: deploy-check: name: Check artifacts and operator deployment needs: [build-image] - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 strategy: fail-fast: false matrix: @@ -216,7 +216,7 @@ jobs: (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/release-') || startsWith(github.ref, 'refs/tags/v')) - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 steps: - name: Download image artifact uses: actions/download-artifact@v4 diff --git a/.github/workflows/tools.yaml b/.github/workflows/tools.yaml index e25d186e0..c7fec9089 100644 --- a/.github/workflows/tools.yaml +++ b/.github/workflows/tools.yaml @@ -25,7 +25,7 @@ jobs: strategy: matrix: os: - - ubuntu-22.04 + - ubuntu-24.04 python-version: - "3.10" - "3.11" @@ -150,7 +150,7 @@ jobs: strategy: matrix: os: - - ubuntu-22.04 + - ubuntu-24.04 python-version: - "3.10" - "3.11" diff --git a/e2e/dractions/actions.go b/e2e/dractions/actions.go index b4ee87408..cfbdc1c4c 100644 --- a/e2e/dractions/actions.go +++ b/e2e/dractions/actions.go @@ -5,7 +5,6 @@ package dractions import ( "strings" - "time" ramen "github.com/ramendr/ramen/api/v1alpha1" "github.com/ramendr/ramen/e2e/deployers" @@ -17,8 +16,6 @@ import ( const ( OcmSchedulingDisable = "cluster.open-cluster-management.io/experimental-scheduling-disable" - - FiveSecondsDuration = 5 * time.Second ) // If AppSet/Subscription, find Placement @@ -126,7 +123,7 @@ func Failover(ctx types.Context) error { log.Info("Failing over workload") - return failoverRelocate(ctx, ramen.ActionFailover) + return failoverRelocate(ctx, ramen.ActionFailover, ramen.FailedOver) } // Determine DRPC @@ -143,24 +140,23 @@ func Relocate(ctx types.Context) error { log.Info("Relocating workload") - return failoverRelocate(ctx, ramen.ActionRelocate) + return failoverRelocate(ctx, ramen.ActionRelocate, ramen.Relocated) } -func failoverRelocate(ctx types.Context, action ramen.DRAction) error { - name := ctx.Name() +func failoverRelocate(ctx types.Context, action ramen.DRAction, state ramen.DRState) error { + drpcName := ctx.Name() namespace := ctx.Namespace() - drpcName := name client := util.Ctx.Hub.Client if err := waitAndUpdateDRPC(ctx, client, namespace, drpcName, action); err != nil { return err } - if action == ramen.ActionFailover { - return waitDRPC(ctx, client, namespace, name, ramen.FailedOver) + if err := waitDRPCPhase(ctx, client, namespace, drpcName, state); err != nil { + return err } - return waitDRPC(ctx, client, namespace, name, ramen.Relocated) + return waitDRPCReady(ctx, client, namespace, drpcName) } func waitAndUpdateDRPC( diff --git a/e2e/dractions/actionsdiscoveredapps.go b/e2e/dractions/actionsdiscoveredapps.go index b515b8148..af169e510 100644 --- a/e2e/dractions/actionsdiscoveredapps.go +++ b/e2e/dractions/actionsdiscoveredapps.go @@ -90,18 +90,18 @@ func FailoverDiscoveredApps(ctx types.Context) error { log := ctx.Logger() log.Info("Failing over workload") - return failoverRelocateDiscoveredApps(ctx, ramen.ActionFailover) + return failoverRelocateDiscoveredApps(ctx, ramen.ActionFailover, ramen.FailedOver) } func RelocateDiscoveredApps(ctx types.Context) error { log := ctx.Logger() log.Info("Relocating workload") - return failoverRelocateDiscoveredApps(ctx, ramen.ActionRelocate) + return failoverRelocateDiscoveredApps(ctx, ramen.ActionRelocate, ramen.Relocated) } // nolint:funlen,cyclop -func failoverRelocateDiscoveredApps(ctx types.Context, action ramen.DRAction) error { +func failoverRelocateDiscoveredApps(ctx types.Context, action ramen.DRAction, state ramen.DRState) error { name := ctx.Name() log := ctx.Logger() namespace := ctx.Namespace() // this namespace is in hub @@ -142,11 +142,11 @@ func failoverRelocateDiscoveredApps(ctx types.Context, action ramen.DRAction) er return err } - if err = waitDRPCProgression(ctx, client, namespace, name, ramen.ProgressionCompleted); err != nil { + if err := waitDRPCPhase(ctx, client, namespace, name, state); err != nil { return err } - if err = waitDRPCReady(ctx, client, namespace, name); err != nil { + if err := waitDRPCReady(ctx, client, namespace, name); err != nil { return err } diff --git a/e2e/dractions/retry.go b/e2e/dractions/retry.go index 1c062059e..241544e05 100644 --- a/e2e/dractions/retry.go +++ b/e2e/dractions/retry.go @@ -12,6 +12,8 @@ import ( "github.com/ramendr/ramen/e2e/types" "github.com/ramendr/ramen/e2e/util" "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "open-cluster-management.io/api/cluster/v1beta1" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -48,58 +50,36 @@ func waitDRPCReady(ctx types.Context, client client.Client, namespace string, dr log := ctx.Logger() startTime := time.Now() + log.Info("Waiting until drpc is ready") + for { drpc, err := getDRPC(client, namespace, drpcName) if err != nil { return err } - conditionReady := checkDRPCConditions(drpc) - if conditionReady && drpc.Status.LastGroupSyncTime != nil { + available := conditionMet(drpc.Status.Conditions, ramen.ConditionAvailable) + peerReady := conditionMet(drpc.Status.Conditions, ramen.ConditionPeerReady) + + if available && peerReady && drpc.Status.LastGroupSyncTime != nil { log.Info("drpc is ready") return nil } if time.Since(startTime) > util.Timeout { - if !conditionReady { - log.Info("drpc condition 'Available' or 'PeerReady' is not True") - } - - if conditionReady && drpc.Status.LastGroupSyncTime == nil { - log.Info("drpc LastGroupSyncTime is nil") - } - - return fmt.Errorf("drpc %q is not ready yet before timeout, fail", drpcName) + return fmt.Errorf("timeout waiting for drpc to become ready (Available: %v, PeerReady: %v, lastGroupSyncTime: %v)", + available, peerReady, drpc.Status.LastGroupSyncTime) } time.Sleep(util.RetryInterval) } } -func checkDRPCConditions(drpc *ramen.DRPlacementControl) bool { - available := false - peerReady := false - - for _, cond := range drpc.Status.Conditions { - if cond.Type == "Available" { - if cond.Status != "True" { - return false - } - - available = true - } - - if cond.Type == "PeerReady" { - if cond.Status != "True" { - return false - } +func conditionMet(conditions []metav1.Condition, conditionType string) bool { + condition := meta.FindStatusCondition(conditions, conditionType) - peerReady = true - } - } - - return available && peerReady + return condition != nil && condition.Status == "True" } func waitDRPCPhase(ctx types.Context, client client.Client, namespace, name string, phase ramen.DRState) error { @@ -161,16 +141,6 @@ func getTargetCluster(client client.Client, namespace, placementName string, drp return targetCluster, nil } -// first wait DRPC to have the expected phase, then check DRPC conditions -func waitDRPC(ctx types.Context, client client.Client, namespace, name string, expectedPhase ramen.DRState) error { - // check Phase - if err := waitDRPCPhase(ctx, client, namespace, name, expectedPhase); err != nil { - return err - } - // then check Conditions - return waitDRPCReady(ctx, client, namespace, name) -} - func waitDRPCDeleted(ctx types.Context, client client.Client, namespace string, name string) error { log := ctx.Logger() startTime := time.Now() diff --git a/internal/controller/drplacementcontrol.go b/internal/controller/drplacementcontrol.go index 7ae5a66a8..fea58b024 100644 --- a/internal/controller/drplacementcontrol.go +++ b/internal/controller/drplacementcontrol.go @@ -398,6 +398,9 @@ func (d *DRPCInstance) isValidFailoverTarget(cluster string) bool { vrg, err := d.reconciler.MCVGetter.GetVRGFromManagedCluster(d.instance.Name, d.vrgNamespace, cluster, annotations) if err != nil { + d.log.Info("Failed to get VRG from managed cluster", "name", d.instance.Name, "namespace", d.vrgNamespace, + "cluster", cluster, "annotations", annotations, "error", err) + return false } diff --git a/internal/controller/drplacementcontrol_controller.go b/internal/controller/drplacementcontrol_controller.go index bfd8a862b..f3399490a 100644 --- a/internal/controller/drplacementcontrol_controller.go +++ b/internal/controller/drplacementcontrol_controller.go @@ -724,15 +724,15 @@ func (r *DRPlacementControlReconciler) cleanupVRGs( return fmt.Errorf("failed to retrieve VRGs. We'll retry later. Error (%w)", err) } - if !ensureVRGsManagedByDRPC(r.Log, mwu, vrgs, drpc, vrgNamespace) { - return fmt.Errorf("VRG adoption in progress") + // We have to ensure the secondary VRG is deleted before deleting the primary VRG. This will fail until there + // is no secondary VRG in the vrgs list. + if err := r.ensureVRGsDeleted(mwu, vrgs, drpc, vrgNamespace, rmn.Secondary); err != nil { + return err } - // delete VRG manifestwork - for _, drClusterName := range rmnutil.DRPolicyClusterNames(drPolicy) { - if err := mwu.DeleteManifestWork(mwu.BuildManifestWorkName(rmnutil.MWTypeVRG), drClusterName); err != nil { - return fmt.Errorf("%w", err) - } + // This will fail until there is no primary VRG in the vrgs list. + if err := r.ensureVRGsDeleted(mwu, vrgs, drpc, vrgNamespace, rmn.Primary); err != nil { + return err } if len(vrgs) != 0 { @@ -747,6 +747,38 @@ func (r *DRPlacementControlReconciler) cleanupVRGs( return nil } +// ensureVRGsDeleted ensure that secondary or primary VRGs are deleted. Return an error if a vrg could not be deleted, +// or deletion is in progress. Return nil if vrg of specified type was not found. +func (r *DRPlacementControlReconciler) ensureVRGsDeleted( + mwu rmnutil.MWUtil, + vrgs map[string]*rmn.VolumeReplicationGroup, + drpc *rmn.DRPlacementControl, + vrgNamespace string, + replicationState rmn.ReplicationState, +) error { + var inProgress bool + + for cluster, vrg := range vrgs { + if vrg.Spec.ReplicationState == replicationState { + if !ensureVRGsManagedByDRPC(r.Log, mwu, vrgs, drpc, vrgNamespace) { + return fmt.Errorf("%s VRG adoption in progress", replicationState) + } + + if err := mwu.DeleteManifestWork(mwu.BuildManifestWorkName(rmnutil.MWTypeVRG), cluster); err != nil { + return fmt.Errorf("failed to delete %s VRG manifestwork for cluster %q: %w", replicationState, cluster, err) + } + + inProgress = true + } + } + + if inProgress { + return fmt.Errorf("%s VRG manifestwork deletion in progress", replicationState) + } + + return nil +} + func (r *DRPlacementControlReconciler) deleteAllManagedClusterViews( drpc *rmn.DRPlacementControl, clusterNames []string, ) error { diff --git a/internal/controller/util/json_util.go b/internal/controller/util/json_util.go index f5f9a846a..3f5cc3261 100644 --- a/internal/controller/util/json_util.go +++ b/internal/controller/util/json_util.go @@ -21,78 +21,62 @@ import ( ) const ( - defaultTimeoutValue = 300 - pollInterval = 100 + defaultTimeoutValue = 300 + pollInterval = 100 + expectedNumberOfJSONPaths = 2 ) -func EvaluateCheckHook(client client.Client, hook *kubeobjects.HookSpec, log logr.Logger) (bool, error) { - timeout := getTimeoutValue(hook) - nsScopedName := types.NamespacedName{ - Namespace: hook.Namespace, - Name: hook.NameSelector, - } +func getJSONObject(k8sClient client.Client, hook *kubeobjects.HookSpec) (client.Object, error) { + var obj client.Object switch hook.SelectResource { case "pod": - // handle pod type - resource := &corev1.Pod{} - - err := WaitUntilResourceExists(client, resource, nsScopedName, time.Duration(timeout)*time.Second) - if err != nil { - return false, err - } - - return EvaluateCheckHookExp(hook.Chk.Condition, resource) + obj = &corev1.Pod{} case "deployment": - // handle deployment type - resource := &appsv1.Deployment{} - - err := WaitUntilResourceExists(client, resource, nsScopedName, time.Duration(timeout)*time.Second) - if err != nil { - return false, err - } - - return EvaluateCheckHookExp(hook.Chk.Condition, resource) + obj = &appsv1.Deployment{} case "statefulset": - // handle statefulset type - resource := &appsv1.StatefulSet{} - - err := WaitUntilResourceExists(client, resource, nsScopedName, time.Duration(timeout)*time.Second) - if err != nil { - return false, err - } - - return EvaluateCheckHookExp(hook.Chk.Condition, resource) + obj = &appsv1.StatefulSet{} + default: + return obj, fmt.Errorf("unsupported resource type %s", hook.SelectResource) } - return false, nil + err := k8sClient.Get(context.Background(), + types.NamespacedName{Name: hook.NameSelector, Namespace: hook.Namespace}, + obj) + + return obj, err } -func WaitUntilResourceExists(client client.Client, obj client.Object, nsScopedName types.NamespacedName, - timeout time.Duration, -) error { - ctx, cancel := context.WithTimeout(context.Background(), timeout) +func EvaluateCheckHook(k8sClient client.Client, hook *kubeobjects.HookSpec, log logr.Logger) (bool, error) { + timeout := getTimeoutValue(hook) + + ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second) defer cancel() - ticker := time.NewTicker(pollInterval * time.Millisecond) // Poll every 100 milliseconds + ticker := time.NewTicker(pollInterval * time.Millisecond) defer ticker.Stop() for { select { case <-ctx.Done(): - return fmt.Errorf("timeout waiting for resource %s to be ready: %w", nsScopedName.Name, ctx.Err()) + return false, fmt.Errorf("timeout waiting for resource %s to be ready: %w", hook.NameSelector, ctx.Err()) case <-ticker.C: - err := client.Get(context.Background(), nsScopedName, obj) + obj, err := getJSONObject(k8sClient, hook) if err != nil { if k8serrors.IsNotFound(err) { // Resource not found, continue polling continue } - return err // Some other error occurred, return it + return false, err // Some other error occurred, return it + } + + res, err := EvaluateCheckHookExp(hook.Chk.Condition, obj) + if err != nil { + continue // This may mean that expression is not evaluated } - return nil // Resource is ready + return res, nil } } } @@ -235,27 +219,28 @@ func compareValues(val1, val2 interface{}, operator string) (bool, error) { case float64: v2, ok := val2.(float64) if !ok { - return false, fmt.Errorf("mismatched types") + return false, fmt.Errorf("types mismatch: expected %T, actual: %T", val1, val2) } return compareFloat(v1, v2, operator) case string: v2, ok := val2.(string) if !ok { - return false, fmt.Errorf("mismatched types") + return false, fmt.Errorf("types mismatch: expected %T, actual: %T", val1, val2) } return compareString(v1, v2, operator) case bool: v2, ok := val2.(bool) if !ok { - return false, fmt.Errorf("mismatched types") + return false, fmt.Errorf("types mismatch: expected %T, actual: %T", val1, val2) } return compareBool(v1, v2, operator) } - return false, fmt.Errorf("unsupported type or operator") + return false, fmt.Errorf("unsupported type or operator, types are %T and %T, operator is %s", + val1, val2, operator) } func isKindString(kind reflect.Kind) bool { @@ -290,7 +275,7 @@ func parseBooleanExpression(booleanExpression string) (op string, jsonPaths []st jsonPaths = trimLeadingTrailingWhiteSpace(exprs) - if len(exprs) == 2 && + if len(exprs) == expectedNumberOfJSONPaths && IsValidJSONPathExpression(jsonPaths[0]) && IsValidJSONPathExpression(jsonPaths[1]) { return op, jsonPaths, nil diff --git a/internal/controller/vrg_kubeobjects.go b/internal/controller/vrg_kubeobjects.go index fbc0cb816..766ccb8e6 100644 --- a/internal/controller/vrg_kubeobjects.go +++ b/internal/controller/vrg_kubeobjects.go @@ -905,8 +905,8 @@ func getResourceAndConvertToRecoverGroup( } func validateAndGetHookDetails(name string) (string, string, error) { - if !strings.Contains(name, "/") { - return "", "", errors.New("invalid format of hook name provided ") + if strings.Count(name, "/") != 1 { + return "", "", errors.New("invalid format: hook name provided should be of the form part1/part2") } parts := strings.Split(name, "/") @@ -954,9 +954,10 @@ func convertRecipeHookToCaptureSpec( func convertRecipeHookToRecoverSpec(hook Recipe.Hook, suffix string) (*kubeobjects.RecoverSpec, error) { hookSpec := getHookSpecFromHook(hook, suffix) + // A RecoverSpec with KubeResourcesSpec.IsHook set to true is never sent to + // Velero. It will only be used by Ramen to execute the hook. + // We don't need a backup name for it. return &kubeobjects.RecoverSpec{ - // BackupName: arbitrary fixed string to designate that this is will be a Backup, not Restore, object - BackupName: ramen.ReservedBackupName, Spec: kubeobjects.Spec{ KubeResourcesSpec: kubeobjects.KubeResourcesSpec{ IncludedNamespaces: []string{hook.Namespace}, @@ -1036,8 +1037,13 @@ func getOpHookSpec(hook *Recipe.Hook, suffix string) kubeobjects.HookSpec { } func convertRecipeGroupToRecoverSpec(group Recipe.Group) (*kubeobjects.RecoverSpec, error) { + backupName := group.Name + if group.BackupRef != "" { + backupName = group.BackupRef + } + return &kubeobjects.RecoverSpec{ - BackupName: group.BackupRef, + BackupName: backupName, Spec: kubeobjects.Spec{ KubeResourcesSpec: kubeobjects.KubeResourcesSpec{ IncludedNamespaces: group.IncludedNamespaces, diff --git a/internal/controller/vrg_kubeobjects_test.go b/internal/controller/vrg_kubeobjects_test.go index cd2f6e29c..1948f1389 100644 --- a/internal/controller/vrg_kubeobjects_test.go +++ b/internal/controller/vrg_kubeobjects_test.go @@ -12,7 +12,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - ramen "github.com/ramendr/ramen/api/v1alpha1" Recipe "github.com/ramendr/recipe/api/v1alpha1" ) @@ -100,7 +99,6 @@ var _ = Describe("VRG_KubeObjectProtection", func() { It("Hook to RecoverSpec", func() { targetRecoverSpec := &kubeobjects.RecoverSpec{ - BackupName: ramen.ReservedBackupName, Spec: kubeobjects.Spec{ KubeResourcesSpec: kubeobjects.KubeResourcesSpec{ IncludedNamespaces: []string{namespaceName}, diff --git a/test/addons/rbd-mirror/start b/test/addons/rbd-mirror/start index ab8028a4d..55b6192ff 100755 --- a/test/addons/rbd-mirror/start +++ b/test/addons/rbd-mirror/start @@ -111,6 +111,10 @@ def configure_rbd_mirroring(cluster, peer_info): yaml = template.substitute(cluster=cluster) kubectl.apply("--filename=-", input=yaml, context=cluster) + template = drenv.template("start-data/vgrc-sample.yaml") + yaml = template.substitute(cluster=cluster, pool=POOL_NAME) + kubectl.apply("--filename=-", input=yaml, context=cluster) + print(f"Apply rbd mirror to cluster '{cluster}'") kubectl.apply("--kustomize=start-data", context=cluster) diff --git a/test/addons/rbd-mirror/start-data/kustomization.yaml b/test/addons/rbd-mirror/start-data/kustomization.yaml index 1a51e23cc..d216a472e 100644 --- a/test/addons/rbd-mirror/start-data/kustomization.yaml +++ b/test/addons/rbd-mirror/start-data/kustomization.yaml @@ -3,7 +3,6 @@ --- resources: -- vgrc-sample.yaml - rbd-mirror.yaml namespace: rook-ceph diff --git a/test/addons/rbd-mirror/start-data/vgrc-sample.yaml b/test/addons/rbd-mirror/start-data/vgrc-sample.yaml index e4eae360d..2f44aa193 100644 --- a/test/addons/rbd-mirror/start-data/vgrc-sample.yaml +++ b/test/addons/rbd-mirror/start-data/vgrc-sample.yaml @@ -6,9 +6,14 @@ apiVersion: replication.storage.openshift.io/v1alpha1 kind: VolumeGroupReplicationClass metadata: name: vgrc-sample + labels: + ramendr.openshift.io/storageid: rook-ceph-$cluster-1 + ramendr.openshift.io/replicationid: rook-ceph-replication-1 spec: provisioner: rook-ceph.rbd.csi.ceph.com parameters: - replication.storage.openshift.io/replication-secret-name: rook-csi-rbd-provisioner - replication.storage.openshift.io/replication-secret-namespace: rook-ceph + clusterID: rook-ceph + pool: $pool + replication.storage.openshift.io/group-replication-secret-name: rook-csi-rbd-provisioner + replication.storage.openshift.io/group-replication-secret-namespace: rook-ceph schedulingInterval: 1m