Skip to content

Commit

Permalink
Improve logging of MCP pause manager
Browse files Browse the repository at this point in the history
  • Loading branch information
bastjan committed Oct 16, 2024
1 parent 89143cc commit a8486a8
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 4 deletions.
2 changes: 1 addition & 1 deletion controllers/upgradeconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ func (r *UpgradeConfigReconciler) Reconcile(ctx context.Context, req ctrl.Reques

jobSchedRequeue, schedErr := r.scheduleJob(ctx, &uc, sched, location)
statusRequeue, stErr := r.updateNextPossibleSchedulesStatus(ctx, &uc, sched, location)
if err := multierr.Append(schedErr, stErr); err != nil {
if err := multierr.Combine(schedErr, stErr); err != nil {
return ctrl.Result{}, err
}

Expand Down
16 changes: 13 additions & 3 deletions controllers/upgradejob_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -877,13 +877,19 @@ func findTrackedHookJob(ujhookName, event string, uj managedupgradev1beta1.Upgra
// It sets a timeout condition and returns an error if the delay is expired.
// It also returns an error if the machine config pools cannot be listed or updated.
func (r *UpgradeJobReconciler) pauseUnpauseMachineConfigPools(ctx context.Context, uj *managedupgradev1beta1.UpgradeJob, ensureUnpause bool) error {
l := log.FromContext(ctx).WithName("UpgradeJobReconciler.pauseUnpauseMachineConfigPools")

var controllerManagesPools bool
var controllerPausedPools bool
for _, pool := range uj.Spec.MachineConfigPools {
if pool.DelayUpgrade == (managedupgradev1beta1.UpgradeJobMachineConfigPoolDelayUpgradeSpec{}) {
continue
}
shouldPause := !ensureUnpause && r.timeSinceStartAfter(uj) < pool.DelayUpgrade.DelayMin.Duration
timeSinceStart := r.timeSinceStartAfter(uj)
minDelayReached := timeSinceStart < pool.DelayUpgrade.DelayMin.Duration
shouldPause := !ensureUnpause && minDelayReached
l = l.WithValues("poolconfig_matchLabels", pool.MatchLabels, "shouldPause", shouldPause, "minDelayReached", minDelayReached, "ensureUnpause", ensureUnpause, "timeSinceStart", timeSinceStart)

sel, err := metav1.LabelSelectorAsSelector(pool.MatchLabels)
if err != nil {
return fmt.Errorf("failed to parse machine config pool selector: %w", err)
Expand All @@ -895,20 +901,24 @@ func (r *UpgradeJobReconciler) pauseUnpauseMachineConfigPools(ctx context.Contex
return fmt.Errorf("failed to list machine config pools %q: %w", pool.MatchLabels, err)
}
for _, mcp := range mcpl.Items {
l = l.WithValues("pool", mcp.Name, "paused", mcp.Spec.Paused)
controllerManagesPools = true
controllerPausedPools = controllerPausedPools || shouldPause
// optional delay expiry
if mcp.Spec.Paused && !shouldPause && pool.DelayUpgrade.DelayMax.Duration > 0 && r.timeSinceStartAfter(uj) >= pool.DelayUpgrade.DelayMax.Duration {
if mcp.Spec.Paused && !shouldPause && pool.DelayUpgrade.DelayMax.Duration > 0 && timeSinceStart >= pool.DelayUpgrade.DelayMax.Duration {
r.setStatusCondition(&uj.Status.Conditions, metav1.Condition{
Type: managedupgradev1beta1.UpgradeJobConditionFailed,
Status: metav1.ConditionTrue,
Reason: managedupgradev1beta1.UpgradeJobReasonUnpausingPoolsExpired,
Message: fmt.Sprintf("unpausing of pool %q expired", mcp.Name),
})
// always return an error so the current reconcile is stopped and requeued
return multierr.Append(fmt.Errorf("unpausing of pool %q expired", mcp.Name), r.Status().Update(ctx, uj))
err := fmt.Errorf("unpausing of pool %q expired", mcp.Name)
l.Error(err, "unpausing of pool expired", "maxDelay", pool.DelayUpgrade.DelayMax.Duration)
return multierr.Combine(err, r.Status().Update(ctx, uj))
}
if mcp.Spec.Paused != shouldPause {
l.Info("Updating machine config pools pause field", "from", mcp.Spec.Paused, "to", shouldPause)
mcp.Spec.Paused = shouldPause
if err := r.Update(ctx, &mcp); err != nil {
return fmt.Errorf("failed to pause/unpause machine config pool %q: %w", mcp.Name, err)
Expand Down

0 comments on commit a8486a8

Please sign in to comment.