Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug 2304182: Disallow relocation execution when a cluster is unreachable #370

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions internal/controller/drplacementcontrol.go
Original file line number Diff line number Diff line change
Expand Up @@ -825,6 +825,14 @@ func (d *DRPCInstance) RunRelocate() (bool, error) {

const done = true

if d.reconciler.numClustersQueriedSuccessfully != len(d.drPolicy.Spec.DRClusters) {
d.log.Info("Can't progress with relocation -- Not all clusters are reachable",
"numClustersQueriedSuccessfully", d.reconciler.numClustersQueriedSuccessfully,
"NumOfClusters", len(d.drPolicy.Spec.DRClusters))

return !done, nil
}

preferredCluster := d.instance.Spec.PreferredCluster
preferredClusterNamespace := d.instance.Spec.PreferredCluster

Expand Down
37 changes: 20 additions & 17 deletions internal/controller/drplacementcontrol_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,16 @@ type ProgressCallback func(string, string)
// DRPlacementControlReconciler reconciles a DRPlacementControl object
type DRPlacementControlReconciler struct {
client.Client
APIReader client.Reader
Log logr.Logger
MCVGetter rmnutil.ManagedClusterViewGetter
Scheme *runtime.Scheme
Callback ProgressCallback
eventRecorder *rmnutil.EventReporter
savedInstanceStatus rmn.DRPlacementControlStatus
ObjStoreGetter ObjectStoreGetter
RateLimiter *workqueue.TypedRateLimiter[reconcile.Request]
APIReader client.Reader
Log logr.Logger
MCVGetter rmnutil.ManagedClusterViewGetter
Scheme *runtime.Scheme
Callback ProgressCallback
eventRecorder *rmnutil.EventReporter
savedInstanceStatus rmn.DRPlacementControlStatus
ObjStoreGetter ObjectStoreGetter
RateLimiter *workqueue.TypedRateLimiter[reconcile.Request]
numClustersQueriedSuccessfully int
}

func ManifestWorkPredicateFunc() predicate.Funcs {
Expand Down Expand Up @@ -939,11 +940,13 @@ func (r *DRPlacementControlReconciler) createDRPCInstance(
return nil, err
}

vrgs, _, _, err := getVRGsFromManagedClusters(r.MCVGetter, drpc, drClusters, vrgNamespace, log)
vrgs, cqs, _, err := getVRGsFromManagedClusters(r.MCVGetter, drpc, drClusters, vrgNamespace, log)
if err != nil {
return nil, err
}

r.numClustersQueriedSuccessfully = cqs

d := &DRPCInstance{
reconciler: r,
ctx: ctx,
Expand Down Expand Up @@ -1663,7 +1666,7 @@ func getVRGsFromManagedClusters(
annotations[DRPCNameAnnotation] = drpc.Name
annotations[DRPCNamespaceAnnotation] = drpc.Namespace

var clustersQueriedSuccessfully int
var numClustersQueriedSuccessfully int

var failedCluster string

Expand All @@ -1675,7 +1678,7 @@ func getVRGsFromManagedClusters(
// Only NotFound error is accepted
if errors.IsNotFound(err) {
log.Info(fmt.Sprintf("VRG not found on %q", drCluster.Name))
clustersQueriedSuccessfully++
numClustersQueriedSuccessfully++

continue
}
Expand All @@ -1687,7 +1690,7 @@ func getVRGsFromManagedClusters(
continue
}

clustersQueriedSuccessfully++
numClustersQueriedSuccessfully++

if rmnutil.ResourceIsDeleted(drCluster) {
log.Info("Skipping VRG on deleted drcluster", "drcluster", drCluster.Name, "vrg", vrg.Name)
Expand All @@ -1701,15 +1704,15 @@ func getVRGsFromManagedClusters(
}

// We are done if we successfully queried all drClusters
if clustersQueriedSuccessfully == len(drClusters) {
return vrgs, clustersQueriedSuccessfully, "", nil
if numClustersQueriedSuccessfully == len(drClusters) {
return vrgs, numClustersQueriedSuccessfully, "", nil
}

if clustersQueriedSuccessfully == 0 {
if numClustersQueriedSuccessfully == 0 {
return vrgs, 0, "", fmt.Errorf("failed to retrieve VRGs from clusters")
}

return vrgs, clustersQueriedSuccessfully, failedCluster, nil
return vrgs, numClustersQueriedSuccessfully, failedCluster, nil
}

func (r *DRPlacementControlReconciler) deleteClonedPlacementRule(ctx context.Context,
Expand Down
4 changes: 2 additions & 2 deletions internal/controller/drplacementcontrol_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2479,8 +2479,8 @@ var _ = Describe("DRPlacementControl Reconciler", func() {
clearFakeUserPlacementRuleStatus(UserPlacementRuleName, DefaultDRPCNamespace)
clearDRPCStatus()
expectedAction := rmn.ActionRelocate
expectedPhase := rmn.Relocated
exptectedPorgression := rmn.ProgressionCleaningUp
expectedPhase := rmn.DRState("")
exptectedPorgression := rmn.ProgressionStatus("")
verifyDRPCStateAndProgression(expectedAction, expectedPhase, exptectedPorgression)

// User intervention is required (simulate user intervention)
Expand Down