diff --git a/internal/controller/drplacementcontrol.go b/internal/controller/drplacementcontrol.go
index cc3ac787c..4ce206d5a 100644
--- a/internal/controller/drplacementcontrol.go
+++ b/internal/controller/drplacementcontrol.go
@@ -825,6 +825,14 @@ func (d *DRPCInstance) RunRelocate() (bool, error) {
const done = true
+ if d.reconciler.numClustersQueriedSuccessfully != len(d.drPolicy.Spec.DRClusters) {
+ d.log.Info("Can't progress with relocation -- Not all clusters are reachable",
+ "numClustersQueriedSuccessfully", d.reconciler.numClustersQueriedSuccessfully,
+ "NumOfClusters", len(d.drPolicy.Spec.DRClusters))
+
+ return !done, nil
+ }
+
preferredCluster := d.instance.Spec.PreferredCluster
preferredClusterNamespace := d.instance.Spec.PreferredCluster
diff --git a/internal/controller/drplacementcontrol_controller.go b/internal/controller/drplacementcontrol_controller.go
index 59b0f9a15..227b2af7c 100644
--- a/internal/controller/drplacementcontrol_controller.go
+++ b/internal/controller/drplacementcontrol_controller.go
@@ -68,15 +68,16 @@ type ProgressCallback func(string, string)
// DRPlacementControlReconciler reconciles a DRPlacementControl object
type DRPlacementControlReconciler struct {
client.Client
- APIReader client.Reader
- Log logr.Logger
- MCVGetter rmnutil.ManagedClusterViewGetter
- Scheme *runtime.Scheme
- Callback ProgressCallback
- eventRecorder *rmnutil.EventReporter
- savedInstanceStatus rmn.DRPlacementControlStatus
- ObjStoreGetter ObjectStoreGetter
- RateLimiter *workqueue.TypedRateLimiter[reconcile.Request]
+ APIReader client.Reader
+ Log logr.Logger
+ MCVGetter rmnutil.ManagedClusterViewGetter
+ Scheme *runtime.Scheme
+ Callback ProgressCallback
+ eventRecorder *rmnutil.EventReporter
+ savedInstanceStatus rmn.DRPlacementControlStatus
+ ObjStoreGetter ObjectStoreGetter
+ RateLimiter *workqueue.TypedRateLimiter[reconcile.Request]
+ numClustersQueriedSuccessfully int
}
// SetupWithManager sets up the controller with the Manager.
@@ -373,11 +374,13 @@ func (r *DRPlacementControlReconciler) createDRPCInstance(
return nil, err
}
- vrgs, _, _, err := getVRGsFromManagedClusters(r.MCVGetter, drpc, drClusters, vrgNamespace, log)
+ vrgs, cqs, _, err := getVRGsFromManagedClusters(r.MCVGetter, drpc, drClusters, vrgNamespace, log)
if err != nil {
return nil, err
}
+ r.numClustersQueriedSuccessfully = cqs
+
d := &DRPCInstance{
reconciler: r,
ctx: ctx,
@@ -1097,7 +1100,7 @@ func getVRGsFromManagedClusters(
annotations[DRPCNameAnnotation] = drpc.Name
annotations[DRPCNamespaceAnnotation] = drpc.Namespace
- var clustersQueriedSuccessfully int
+ var numClustersQueriedSuccessfully int
var failedCluster string
@@ -1109,7 +1112,7 @@ func getVRGsFromManagedClusters(
// Only NotFound error is accepted
if errors.IsNotFound(err) {
log.Info(fmt.Sprintf("VRG not found on %q", drCluster.Name))
- clustersQueriedSuccessfully++
+ numClustersQueriedSuccessfully++
continue
}
@@ -1121,7 +1124,7 @@ func getVRGsFromManagedClusters(
continue
}
- clustersQueriedSuccessfully++
+ numClustersQueriedSuccessfully++
if rmnutil.ResourceIsDeleted(drCluster) {
log.Info("Skipping VRG on deleted drcluster", "drcluster", drCluster.Name, "vrg", vrg.Name)
@@ -1135,15 +1138,15 @@ func getVRGsFromManagedClusters(
}
// We are done if we successfully queried all drClusters
- if clustersQueriedSuccessfully == len(drClusters) {
- return vrgs, clustersQueriedSuccessfully, "", nil
+ if numClustersQueriedSuccessfully == len(drClusters) {
+ return vrgs, numClustersQueriedSuccessfully, "", nil
}
- if clustersQueriedSuccessfully == 0 {
+ if numClustersQueriedSuccessfully == 0 {
return vrgs, 0, "", fmt.Errorf("failed to retrieve VRGs from clusters")
}
- return vrgs, clustersQueriedSuccessfully, failedCluster, nil
+ return vrgs, numClustersQueriedSuccessfully, failedCluster, nil
}
func (r *DRPlacementControlReconciler) deleteClonedPlacementRule(ctx context.Context,
diff --git a/internal/controller/drplacementcontrol_controller_test.go b/internal/controller/drplacementcontrol_controller_test.go
index af1eca6bf..d65596972 100644
--- a/internal/controller/drplacementcontrol_controller_test.go
+++ b/internal/controller/drplacementcontrol_controller_test.go
@@ -2479,8 +2479,8 @@ var _ = Describe("DRPlacementControl Reconciler", func() {
clearFakeUserPlacementRuleStatus(UserPlacementRuleName, DefaultDRPCNamespace)
clearDRPCStatus()
expectedAction := rmn.ActionRelocate
- expectedPhase := rmn.Relocated
- exptectedPorgression := rmn.ProgressionCleaningUp
+ expectedPhase := rmn.DRState("")
+ exptectedPorgression := rmn.ProgressionStatus("")
verifyDRPCStateAndProgression(expectedAction, expectedPhase, exptectedPorgression)
// User intervention is required (simulate user intervention)
diff --git a/internal/controller/volsync/vshandler_test.go b/internal/controller/volsync/vshandler_test.go
index e9995845a..8de0105cd 100644
--- a/internal/controller/volsync/vshandler_test.go
+++ b/internal/controller/volsync/vshandler_test.go
@@ -1216,14 +1216,12 @@ var _ = Describe("VolSync_Handler", func() {
pvc := &corev1.PersistentVolumeClaim{}
JustBeforeEach(func() {
// Common checks for everything in this context - pvc should be created with correct spec
- Expect(ensurePVCErr).NotTo(HaveOccurred())
-
Eventually(func() error {
return k8sClient.Get(ctx, types.NamespacedName{
Name: pvcName,
Namespace: testNamespace.GetName(),
}, pvc)
- }, maxWait, interval).Should(Succeed())
+ }, maxWait, interval).Should(Succeed(), fmt.Sprintf("Original error %v", ensurePVCErr))
Expect(pvc.GetName()).To(Equal(pvcName))
Expect(pvc.Spec.AccessModes).To(Equal([]corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}))
diff --git a/internal/controller/vrg_volrep_test.go b/internal/controller/vrg_volrep_test.go
index 73e4e3e20..74e9027e9 100644
--- a/internal/controller/vrg_volrep_test.go
+++ b/internal/controller/vrg_volrep_test.go
@@ -614,6 +614,150 @@ var _ = Describe("VolumeReplicationGroupVolRepController", func() {
})
})
+ // Test VRG deletion when VR failed validation
+ var vrgDeleteFailedVR *vrgTest
+ //nolint:dupl
+ Context("VR failed validation in primary state", func() {
+ createTestTemplate := &template{
+ ClaimBindInfo: corev1.ClaimBound,
+ VolumeBindInfo: corev1.VolumeBound,
+ schedulingInterval: "1h",
+ storageClassName: "manual",
+ replicationClassName: "test-replicationclass",
+ vrcProvisioner: "manual.storage.com",
+ scProvisioner: "manual.storage.com",
+ replicationClassLabels: map[string]string{"protection": "ramen"},
+ }
+ It("sets up PVCs, PVs and VRGs (with s3 stores that fail uploads)", func() {
+ createTestTemplate.s3Profiles = []string{s3Profiles[vrgS3ProfileNumber].S3ProfileName}
+ vrgDeleteFailedVR = newVRGTestCaseCreateAndStart(1, createTestTemplate, true, false)
+ })
+ It("waits for VRG to create a VR for each PVC", func() {
+ expectedVRCount := len(vrgDeleteFailedVR.pvcNames)
+ vrgDeleteFailedVR.waitForVRCountToMatch(expectedVRCount)
+ })
+ It("simulate VR with failed validation", func() {
+ vrgDeleteFailedVR.promoteVolRepsWithOptions(promoteOptions{ValidatedFailed: true})
+ })
+ It("VRG can be deleted", func() {
+ By("deleting the VRG")
+ vrg := vrgDeleteFailedVR.getVRG()
+ Expect(k8sClient.Delete(context.TODO(), vrg)).To(Succeed())
+
+ By("ensuring VRG is deleted")
+ Eventually(func() error {
+ return apiReader.Get(context.TODO(), vrgDeleteFailedVR.vrgNamespacedName(), vrg)
+ }, vrgtimeout, vrginterval).
+ Should(MatchError(errors.NewNotFound(schema.GroupResource{
+ Group: ramendrv1alpha1.GroupVersion.Group,
+ Resource: "volumereplicationgroups",
+ }, vrgDeleteFailedVR.vrgName)))
+
+ vrgDeleteFailedVR.cleanupNamespace()
+ vrgDeleteFailedVR.cleanupSC()
+ vrgDeleteFailedVR.cleanupVRC()
+ })
+ })
+
+ // Test VRG deletion when VR failed validation and Validated condition is missing (csi-addons < 0.10.0)
+ var vrgDeleteIncompleteVR *vrgTest
+ //nolint:dupl
+ Context("VR failed validation in primary state and Validated condition is missing", func() {
+ createTestTemplate := &template{
+ ClaimBindInfo: corev1.ClaimBound,
+ VolumeBindInfo: corev1.VolumeBound,
+ schedulingInterval: "1h",
+ storageClassName: "manual",
+ replicationClassName: "test-replicationclass",
+ vrcProvisioner: "manual.storage.com",
+ scProvisioner: "manual.storage.com",
+ replicationClassLabels: map[string]string{"protection": "ramen"},
+ }
+ It("sets up PVCs, PVs and VRGs (with s3 stores that fail uploads)", func() {
+ createTestTemplate.s3Profiles = []string{s3Profiles[vrgS3ProfileNumber].S3ProfileName}
+ vrgDeleteIncompleteVR = newVRGTestCaseCreateAndStart(1, createTestTemplate, true, false)
+ })
+ It("waits for VRG to create a VR for each PVC", func() {
+ expectedVRCount := len(vrgDeleteFailedVR.pvcNames)
+ vrgDeleteIncompleteVR.waitForVRCountToMatch(expectedVRCount)
+ })
+ It("simulate incomplete VR", func() {
+ vrgDeleteIncompleteVR.promoteVolRepsWithOptions(promoteOptions{ValidatedFailed: true, ValidatedMissing: true})
+ })
+ It("VRG can not be deleted", func() {
+ By("deleting the VRG")
+ vrg := vrgDeleteIncompleteVR.getVRG()
+ Expect(k8sClient.Delete(context.TODO(), vrg)).To(Succeed())
+
+ By("ensuring VRG cannot be deleted")
+ Consistently(func() error {
+ return apiReader.Get(context.TODO(), vrgDeleteIncompleteVR.vrgNamespacedName(), vrg)
+ }, vrgtimeout, vrginterval).
+ Should(Succeed(), "VRG %s was deleted when VR is incomplete", vrgDeleteIncompleteVR.vrgName)
+
+ By("deleting the VRs")
+ vrgDeleteIncompleteVR.deleteVolReps()
+
+ By("ensuring the VRG is deleted")
+ Eventually(func() error {
+ return apiReader.Get(context.TODO(), vrgDeleteFailedVR.vrgNamespacedName(), vrg)
+ }, vrgtimeout, vrginterval).
+ Should(MatchError(errors.NewNotFound(schema.GroupResource{
+ Group: ramendrv1alpha1.GroupVersion.Group,
+ Resource: "volumereplicationgroups",
+ }, vrgDeleteFailedVR.vrgName)))
+
+ vrgDeleteIncompleteVR.cleanupNamespace()
+ vrgDeleteIncompleteVR.cleanupSC()
+ vrgDeleteIncompleteVR.cleanupVRC()
+ })
+ })
+
+ // Test VRG deletion when VR completed and Validated condition is missing (csi-addons < 0.10.0)
+ var vrgDeleteCompletedVR *vrgTest
+ //nolint:dupl
+ Context("VR failed validation in primary state and Validated condition is missing", func() {
+ createTestTemplate := &template{
+ ClaimBindInfo: corev1.ClaimBound,
+ VolumeBindInfo: corev1.VolumeBound,
+ schedulingInterval: "1h",
+ storageClassName: "manual",
+ replicationClassName: "test-replicationclass",
+ vrcProvisioner: "manual.storage.com",
+ scProvisioner: "manual.storage.com",
+ replicationClassLabels: map[string]string{"protection": "ramen"},
+ }
+ It("sets up PVCs, PVs and VRGs (with s3 stores that fail uploads)", func() {
+ createTestTemplate.s3Profiles = []string{s3Profiles[vrgS3ProfileNumber].S3ProfileName}
+ vrgDeleteCompletedVR = newVRGTestCaseCreateAndStart(1, createTestTemplate, true, false)
+ })
+ It("waits for VRG to create a VR for each PVC", func() {
+ expectedVRCount := len(vrgDeleteFailedVR.pvcNames)
+ vrgDeleteCompletedVR.waitForVRCountToMatch(expectedVRCount)
+ })
+ It("simulate completed VR", func() {
+ vrgDeleteCompletedVR.promoteVolRepsWithOptions(promoteOptions{ValidatedMissing: true})
+ })
+ It("VRG can be deleted", func() {
+ By("deleting the VRG")
+ vrg := vrgDeleteCompletedVR.getVRG()
+ Expect(k8sClient.Delete(context.TODO(), vrg)).To(Succeed())
+
+ By("ensuring the VRG is deleted")
+ Eventually(func() error {
+ return apiReader.Get(context.TODO(), vrgDeleteFailedVR.vrgNamespacedName(), vrg)
+ }, vrgtimeout, vrginterval).
+ Should(MatchError(errors.NewNotFound(schema.GroupResource{
+ Group: ramendrv1alpha1.GroupVersion.Group,
+ Resource: "volumereplicationgroups",
+ }, vrgDeleteFailedVR.vrgName)))
+
+ vrgDeleteCompletedVR.cleanupNamespace()
+ vrgDeleteCompletedVR.cleanupSC()
+ vrgDeleteCompletedVR.cleanupVRC()
+ })
+ })
+
// Try the simple case of creating VRG, PVC, PV and
// check whether VolRep resources are created or not
var vrgTestCases []*vrgTest
@@ -2164,17 +2308,26 @@ func (v *vrgTest) waitForVRCountToMatch(vrCount int) {
}
func (v *vrgTest) promoteVolReps() {
- v.promoteVolRepsAndDo(func(index, count int) {
+ v.promoteVolRepsAndDo(promoteOptions{}, func(index, count int) {
// VRG should not be ready until last VolRep is ready.
v.verifyVRGStatusExpectation(index == count-1, vrgController.VRGConditionReasonReady)
})
}
func (v *vrgTest) promoteVolRepsWithoutVrgStatusCheck() {
- v.promoteVolRepsAndDo(func(index, count int) {})
+ v.promoteVolRepsAndDo(promoteOptions{}, func(index, count int) {})
+}
+
+func (v *vrgTest) promoteVolRepsWithOptions(options promoteOptions) {
+ v.promoteVolRepsAndDo(options, func(index, count int) {})
+}
+
+type promoteOptions struct {
+ ValidatedMissing bool
+ ValidatedFailed bool
}
-func (v *vrgTest) promoteVolRepsAndDo(do func(int, int)) {
+func (v *vrgTest) promoteVolRepsAndDo(options promoteOptions, do func(int, int)) {
By("Promoting VolumeReplication resources " + v.namespace)
volRepList := &volrep.VolumeReplicationList{}
@@ -2188,33 +2341,17 @@ func (v *vrgTest) promoteVolRepsAndDo(do func(int, int)) {
volRep := volRepList.Items[index]
volRepStatus := volrep.VolumeReplicationStatus{
- Conditions: []metav1.Condition{
- {
- Type: volrep.ConditionCompleted,
- Reason: volrep.Promoted,
- ObservedGeneration: volRep.Generation,
- Status: metav1.ConditionTrue,
- LastTransitionTime: metav1.NewTime(time.Now()),
- },
- {
- Type: volrep.ConditionDegraded,
- Reason: volrep.Healthy,
- ObservedGeneration: volRep.Generation,
- Status: metav1.ConditionFalse,
- LastTransitionTime: metav1.NewTime(time.Now()),
- },
- {
- Type: volrep.ConditionResyncing,
- Reason: volrep.NotResyncing,
- ObservedGeneration: volRep.Generation,
- Status: metav1.ConditionFalse,
- LastTransitionTime: metav1.NewTime(time.Now()),
- },
- },
+ Conditions: v.generateVRConditions(volRep.Generation, options),
+ ObservedGeneration: volRep.Generation,
+ State: volrep.PrimaryState,
+ Message: "volume is marked primary",
+ }
+
+ if options.ValidatedFailed {
+ volRepStatus.State = volrep.UnknownState
+ volRepStatus.Message = "precondition failed ..."
}
- volRepStatus.ObservedGeneration = volRep.Generation
- volRepStatus.State = volrep.PrimaryState
- volRepStatus.Message = "volume is marked primary"
+
volRep.Status = volRepStatus
err = k8sClient.Status().Update(context.TODO(), &volRep)
@@ -2224,12 +2361,88 @@ func (v *vrgTest) promoteVolRepsAndDo(do func(int, int)) {
Name: volRep.Name,
Namespace: volRep.Namespace,
}
- v.waitForVolRepPromotion(volrepKey)
+
+ if options.ValidatedFailed {
+ if options.ValidatedMissing {
+ v.waitForVolRepCondition(volrepKey, volrep.ConditionCompleted, metav1.ConditionFalse)
+ } else {
+ v.waitForVolRepCondition(volrepKey, volrep.ConditionValidated, metav1.ConditionFalse)
+ }
+ } else {
+ v.waitForVolRepCondition(volrepKey, volrep.ConditionCompleted, metav1.ConditionTrue)
+ v.waitForProtectedPVCs(volrepKey)
+ }
do(index, len(volRepList.Items))
}
}
+func (v *vrgTest) generateVRConditions(generation int64, options promoteOptions) []metav1.Condition {
+ var conditions []metav1.Condition
+
+ lastTransitionTime := metav1.NewTime(time.Now())
+
+ if !options.ValidatedMissing {
+ validated := metav1.Condition{
+ Type: volrep.ConditionValidated,
+ Reason: volrep.PrerequisiteNotMet,
+ ObservedGeneration: generation,
+ Status: metav1.ConditionFalse,
+ LastTransitionTime: lastTransitionTime,
+ }
+
+ if options.ValidatedFailed {
+ validated.Status = metav1.ConditionFalse
+ validated.Reason = volrep.PrerequisiteNotMet
+ }
+
+ conditions = append(conditions, validated)
+ }
+
+ completed := metav1.Condition{
+ Type: volrep.ConditionCompleted,
+ Reason: volrep.Promoted,
+ ObservedGeneration: generation,
+ Status: metav1.ConditionTrue,
+ LastTransitionTime: lastTransitionTime,
+ }
+
+ if options.ValidatedFailed {
+ completed.Status = metav1.ConditionFalse
+ completed.Reason = volrep.FailedToPromote
+ }
+
+ degraded := metav1.Condition{
+ Type: volrep.ConditionDegraded,
+ Reason: volrep.Healthy,
+ ObservedGeneration: generation,
+ Status: metav1.ConditionFalse,
+ LastTransitionTime: lastTransitionTime,
+ }
+ resyncing := metav1.Condition{
+ Type: volrep.ConditionResyncing,
+ Reason: volrep.NotResyncing,
+ ObservedGeneration: generation,
+ Status: metav1.ConditionFalse,
+ LastTransitionTime: lastTransitionTime,
+ }
+
+ return append(conditions, completed, degraded, resyncing)
+}
+
+func (v *vrgTest) deleteVolReps() {
+ vrList := &volrep.VolumeReplicationList{}
+ err := k8sClient.List(context.TODO(), vrList, &client.ListOptions{Namespace: v.namespace})
+ Expect(err).NotTo(HaveOccurred(), "failed to get a list of VRs in namespace %s", v.namespace)
+
+ for i := range vrList.Items {
+ vr := vrList.Items[i]
+
+ err := k8sClient.Delete(context.TODO(), &vr)
+ Expect(err).NotTo(HaveOccurred(), "failed to delete volRep %v/%s", vr.Namespace, vr.Name)
+ }
+}
+
func (v *vrgTest) protectDeletionOfVolReps() {
By("Adding a finalizer to protect VolumeReplication resources being deleted " + v.namespace)
@@ -2268,23 +2481,36 @@ func (v *vrgTest) unprotectDeletionOfVolReps() {
}
}
-func (v *vrgTest) waitForVolRepPromotion(vrNamespacedName types.NamespacedName) {
+func (v *vrgTest) waitForVolRepCondition(
+ vrNamespacedName types.NamespacedName,
+ conditionType string,
+ conditionStatus metav1.ConditionStatus,
+) {
updatedVolRep := volrep.VolumeReplication{}
Eventually(func() bool {
err := k8sClient.Get(context.TODO(), vrNamespacedName, &updatedVolRep)
+ if err != nil {
+ return false
+ }
- return err == nil && len(updatedVolRep.Status.Conditions) == 3
+ condition := meta.FindStatusCondition(updatedVolRep.Status.Conditions, conditionType)
+ if condition == nil {
+ return false
+ }
+
+ return condition.Status == conditionStatus
}, vrgtimeout, vrginterval).Should(BeTrue(),
- "failed to wait for volRep condition type to change to 'ConditionCompleted' (%d)",
- len(updatedVolRep.Status.Conditions))
+ "failed to wait for volRep condition %q to become %q", conditionType, conditionStatus)
+}
+func (v *vrgTest) waitForProtectedPVCs(vrNamespacedName types.NamespacedName) {
Eventually(func() bool {
vrg := v.getVRG()
// as of now name of VolumeReplication resource created by the VolumeReplicationGroup
// is same as the pvc that it replicates. When that changes this has to be changed to
// use the right name to get the appropriate protected PVC condition from VRG status.
- protectedPVC := vrgController.FindProtectedPVC(vrg, updatedVolRep.Namespace, updatedVolRep.Name)
+ protectedPVC := vrgController.FindProtectedPVC(vrg, vrNamespacedName.Namespace, vrNamespacedName.Name)
// failed to get the protectedPVC. Returning false
if protectedPVC == nil {
@@ -2293,7 +2519,7 @@ func (v *vrgTest) waitForVolRepPromotion(vrNamespacedName types.NamespacedName)
return v.checkProtectedPVCSuccess(vrg, protectedPVC)
}, vrgtimeout, vrginterval).Should(BeTrue(),
- "while waiting for protected pvc condition %s/%s", updatedVolRep.Namespace, updatedVolRep.Name)
+ "while waiting for protected pvc condition %s/%s", vrNamespacedName.Namespace, vrNamespacedName.Name)
}
func (v *vrgTest) checkProtectedPVCSuccess(vrg *ramendrv1alpha1.VolumeReplicationGroup,
diff --git a/test/Makefile b/test/Makefile
index 7008c364c..7b58f13e5 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -5,6 +5,9 @@
# hardware acceleration for VMs.
DRIVER ?= vm
+# drenv start timeout in seconds
+TIMEOUT ?= 600
+
env := envs/$(DRIVER).yaml
prefix := drenv-test-
@@ -50,7 +53,7 @@ coverage-html:
xdg-open htmlcov/index.html
cluster:
- drenv start --name-prefix $(prefix) $(env) -v
+ drenv start --name-prefix $(prefix) $(env) --verbose --timeout $(TIMEOUT)
clean:
drenv delete --name-prefix $(prefix) $(env)
diff --git a/test/addons/rook-pool/start b/test/addons/rook-pool/start
index d41502433..b06b5ca2e 100755
--- a/test/addons/rook-pool/start
+++ b/test/addons/rook-pool/start
@@ -15,9 +15,13 @@ from drenv import kubectl
def deploy(cluster):
print("Creating RBD pool and storage/snapshot classes")
+
+ template = drenv.template("storage-class.yaml")
+ yaml = template.substitute(cluster=cluster)
+
+ kubectl.apply("--filename=-", input=yaml, context=cluster)
kubectl.apply(
"--filename=replica-pool.yaml",
- "--filename=storage-class.yaml",
"--filename=snapshot-class.yaml",
context=cluster,
)
diff --git a/test/addons/rook-pool/storage-class.yaml b/test/addons/rook-pool/storage-class.yaml
index 389fe2378..bb4786d80 100644
--- a/test/addons/rook-pool/storage-class.yaml
+++ b/test/addons/rook-pool/storage-class.yaml
@@ -7,7 +7,7 @@ kind: StorageClass
metadata:
name: rook-ceph-block
labels:
- ramendr.openshift.io/storageid: rook-ceph-storage-id
+ ramendr.openshift.io/storageid: rook-ceph-$cluster-1
provisioner: rook-ceph.rbd.csi.ceph.com
parameters:
clusterID: rook-ceph
diff --git a/test/drenv/__main__.py b/test/drenv/__main__.py
index d81107262..7fd709483 100644
--- a/test/drenv/__main__.py
+++ b/test/drenv/__main__.py
@@ -76,6 +76,11 @@ def parse_args():
metavar="N",
help="maximum number of workers per profile",
)
+ p.add_argument(
+ "--timeout",
+ type=int,
+ help="time in seconds to wait until clsuter is started",
+ )
p = add_command(sp, "stop", do_stop, help="stop an environment")
p.add_argument(
@@ -379,7 +384,7 @@ def start_cluster(profile, hooks=(), args=None, **options):
provider = providers.get(profile["provider"])
existing = provider.exists(profile)
- provider.start(profile, verbose=args.verbose)
+ provider.start(profile, verbose=args.verbose, timeout=args.timeout)
provider.configure(profile, existing=existing)
if existing:
diff --git a/test/drenv/envfile.py b/test/drenv/envfile.py
index 2c8c19218..d166a7325 100644
--- a/test/drenv/envfile.py
+++ b/test/drenv/envfile.py
@@ -46,11 +46,11 @@
},
"darwin": {
PROVIDER: {
- "x86_64": "minikube",
+ "x86_64": "lima",
"arm64": "lima",
},
VM: {
- "x86_64": "hyperkit",
+ "x86_64": "",
"arm64": "",
},
CONTAINER: "podman",
@@ -136,7 +136,7 @@ def _validate_profile(profile, addons_root):
# If True, this is an external cluster and we don't have to start it.
profile.setdefault("external", False)
- # Properties for drenv managed cluster.
+ # Common properties.
profile.setdefault("provider", PROVIDER)
profile.setdefault("driver", VM)
profile.setdefault("container_runtime", "")
@@ -155,6 +155,9 @@ def _validate_profile(profile, addons_root):
profile.setdefault("containerd", None)
profile.setdefault("workers", [])
+ # Lima provider properties.
+ profile.setdefault("rosetta", True)
+
_validate_platform_defaults(profile)
for i, worker in enumerate(profile["workers"]):
diff --git a/test/drenv/kubeconfig.py b/test/drenv/kubeconfig.py
index 06c3bc8f3..cf2c78447 100644
--- a/test/drenv/kubeconfig.py
+++ b/test/drenv/kubeconfig.py
@@ -51,7 +51,7 @@ def remove(profile, target=DEFAULT_CONFIG):
return
for k in ("contexts", "clusters", "users"):
- old = config.get(k, [])
+ old = config.get(k) or []
new = [v for v in old if v["name"] != profile["name"]]
if len(new) < len(old):
config[k] = new
diff --git a/test/drenv/providers/external.py b/test/drenv/providers/external.py
index 2ed2a0cd9..d1f0ae835 100644
--- a/test/drenv/providers/external.py
+++ b/test/drenv/providers/external.py
@@ -25,7 +25,7 @@ def exists(profile):
return True
-def start(profile, verbose=False):
+def start(profile, verbose=False, timeout=None):
start = time.monotonic()
logging.info("[%s] Checking external cluster status", profile["name"])
diff --git a/test/drenv/providers/lima/__init__.py b/test/drenv/providers/lima/__init__.py
index 7e6570f89..d2db1c75c 100644
--- a/test/drenv/providers/lima/__init__.py
+++ b/test/drenv/providers/lima/__init__.py
@@ -61,7 +61,7 @@ def exists(profile):
return False
-def start(profile, verbose=False):
+def start(profile, verbose=False, timeout=None):
start = time.monotonic()
logging.info("[%s] Starting lima cluster", profile["name"])
@@ -76,7 +76,7 @@ def start(profile, verbose=False):
# Get vm before starting to detect a stopped vm.
vm = _get_vm(profile)
- _start_vm(profile)
+ _start_vm(profile, timeout=timeout)
_add_kubeconfig(profile, vm)
debug = partial(logging.debug, f"[{profile['name']}] %s")
@@ -181,7 +181,9 @@ def _write_config(profile, path):
# The "vz" type is required to support amd64 images on arm64, needed for
# OCM, and also provide the best performance.
config["vmType"] = "vz"
- config["rosetta"] = {"enabled": True, "binfmt": True}
+
+ if profile["rosetta"]:
+ config["rosetta"] = {"enabled": True, "binfmt": True}
# We always use socket_vmnet to get shared network.
config["networks"] = [{"socket": "/var/run/socket_vmnet"}]
@@ -270,8 +272,12 @@ def _create_vm(profile, config):
_watch("create", "--name", profile["name"], config, context=profile["name"])
-def _start_vm(profile):
- _watch("start", profile["name"], context=profile["name"])
+def _start_vm(profile, timeout=None):
+ args = ["start"]
+ if timeout:
+ args.append(f"--timeout={timeout}s")
+ args.append(profile["name"])
+ _watch(*args, context=profile["name"])
def _stop_vm(profile):
diff --git a/test/drenv/providers/lima/k8s.yaml b/test/drenv/providers/lima/k8s.yaml
index d578755ec..3d016bc5c 100644
--- a/test/drenv/providers/lima/k8s.yaml
+++ b/test/drenv/providers/lima/k8s.yaml
@@ -13,6 +13,8 @@
images:
- location: "https://cloud-images.ubuntu.com/releases/24.04/release/ubuntu-24.04-server-cloudimg-arm64.img"
arch: "aarch64"
+ - location: "https://cloud-images.ubuntu.com/releases/24.04/release/ubuntu-24.04-server-cloudimg-amd64.img"
+ arch: "x86_64"
mounts: []
@@ -24,23 +26,11 @@ containerd:
# forwarding cannot work for multiple clusters since same port from multiple
# clusters is mapped to the same host port.
portForwards:
- - guestPortRange: [1, 65535]
- guestIP: "0.0.0.0"
- ignore: true
+ - ignore: true
+ proto: any
provision:
- - mode: system
- script: |
- #!/bin/bash
- set -eux -o pipefail
- echo "Promoting default route on shared network"
- route=$(ip route show default dev lima0)
- if [ -n "$route" ] && ! echo $route | grep -E -q "metric 1\b"; then
- ip route add $route metric 1
- ip route del $route
- fi
-
# See
- mode: system
script: |
@@ -48,20 +38,20 @@ provision:
set -eux -o pipefail
command -v kubeadm >/dev/null 2>&1 && exit 0
# Install and configure prerequisites
- cat </dev/null 2>&1; do sleep 3; done"; then
- echo >&2 "kubernetes cluster is not up and running yet"
+ if ! timeout 300s bash -c "until kubectl get --raw /readyz >/dev/null 2>&1; do sleep 3; done"; then
+ echo >&2 "kubernetes cluster is not ready yet"
exit 1
fi
- - description: "coredns deployment to be running"
- script: |
- #!/bin/bash
- set -eux -o pipefail
- kubectl wait -n kube-system --timeout=180s --for=condition=available deploy coredns
-
copyToHost:
- guest: "/etc/kubernetes/admin.conf"
host: "{{.Dir}}/copied-from-guest/kubeconfig.yaml"
diff --git a/test/drenv/providers/minikube.py b/test/drenv/providers/minikube.py
index 480ceb3af..f03bd39f5 100644
--- a/test/drenv/providers/minikube.py
+++ b/test/drenv/providers/minikube.py
@@ -61,7 +61,7 @@ def exists(profile):
return False
-def start(profile, verbose=False):
+def start(profile, verbose=False, timeout=None):
start = time.monotonic()
logging.info("[%s] Starting minikube cluster", profile["name"])
@@ -119,7 +119,7 @@ def start(profile, verbose=False):
# TODO: Use --interactive=false when the bug is fixed.
# https://github.com/kubernetes/minikube/issues/19518
- _watch("start", *args, profile=profile["name"])
+ _watch("start", *args, profile=profile["name"], timeout=timeout)
logging.info(
"[%s] Cluster started in %.2f seconds",
@@ -364,11 +364,11 @@ def _run(command, *args, profile=None, output=None):
return commands.run(*cmd)
-def _watch(command, *args, profile=None):
+def _watch(command, *args, profile=None, timeout=None):
cmd = ["minikube", command, "--profile", profile]
cmd.extend(args)
logging.debug("[%s] Running %s", profile, cmd)
- for line in commands.watch(*cmd):
+ for line in commands.watch(*cmd, timeout=timeout):
logging.debug("[%s] %s", profile, line)
diff --git a/test/envs/vm.yaml b/test/envs/vm.yaml
index 85da2db03..d947ba806 100644
--- a/test/envs/vm.yaml
+++ b/test/envs/vm.yaml
@@ -8,7 +8,9 @@ profiles:
- name: cluster
driver: $vm
container_runtime: containerd
- memory: "3g"
+ cpus: 1
+ memory: "2g"
+ rosetta: false
workers:
- addons:
- name: example