Skip to content

Commit

Permalink
Create backups on release upgrade (k0rdent#937)
Browse files Browse the repository at this point in the history
* add new flag to scheduled backups to enable autobackups during
  upgrades
* create auto backups if velero is installed and upgrade is progressing
* block upgrades until all of the backups are completed
* new indexer for backups with autoupgrades
* new RBAC permission for apps/deployments
* update signature of func adding kcm component labels
* put mgmtbackups into backups
* reconcile mgmtbackups statuses after restoration
  • Loading branch information
zerospiel authored Jan 27, 2025
1 parent 401d0fa commit e7d9a1c
Show file tree
Hide file tree
Showing 20 changed files with 444 additions and 49 deletions.
4 changes: 0 additions & 4 deletions PROJECT
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ resources:
version: v1alpha1
- api:
crdVersion: v1
namespaced: true
controller: true
domain: k0rdent.mirantis.com
group: k0rdent.mirantis.com
Expand Down Expand Up @@ -47,7 +46,6 @@ resources:
version: v1alpha1
- api:
crdVersion: v1
namespaced: true
controller: true
domain: k0rdent.mirantis.com
group: k0rdent.mirantis.com
Expand All @@ -65,7 +63,6 @@ resources:
version: v1alpha1
- api:
crdVersion: v1
namespaced: true
controller: true
domain: k0rdent.mirantis.com
group: k0rdent.mirantis.com
Expand Down Expand Up @@ -106,7 +103,6 @@ resources:
version: v1alpha1
- api:
crdVersion: v1
namespaced: true
controller: true
domain: k0rdent.mirantis.com
group: k0rdent.mirantis.com
Expand Down
23 changes: 17 additions & 6 deletions api/v1alpha1/indexers.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ func SetupIndexers(ctx context.Context, mgr ctrl.Manager) error {
setupMultiClusterServiceServicesIndexer,
setupOwnerReferenceIndexers,
setupManagementBackupIndexer,
setupManagementBackupAutoUpgradesIndexer,
} {
merr = errors.Join(merr, f(ctx, mgr))
}
Expand Down Expand Up @@ -252,15 +253,25 @@ func setupManagementBackupIndexer(ctx context.Context, mgr ctrl.Manager) error {
return nil
}

const trueVal = "true"
if mb.Spec.Schedule != "" {
return []string{trueVal}
if mb.Spec.Schedule != "" || !mb.IsCompleted() {
return []string{"true"}
}

if mb.Status.LastBackup == nil || mb.Status.LastBackup.CompletionTimestamp.IsZero() {
return []string{trueVal}
return nil
})
}

// ManagementBackupAutoUpgradeIndexKey indexer field name to extract only [ManagementBackup] objects
// with schedule and auto-upgrade set.
const ManagementBackupAutoUpgradeIndexKey = "k0rdent.management-backup-upgrades"

func setupManagementBackupAutoUpgradesIndexer(ctx context.Context, mgr ctrl.Manager) error {
return mgr.GetFieldIndexer().IndexField(ctx, &ManagementBackup{}, ManagementBackupAutoUpgradeIndexKey, func(o client.Object) []string {
mb, ok := o.(*ManagementBackup)
if !ok || mb.Spec.Schedule == "" || !mb.Spec.PerformOnManagementUpgrade {
return nil
}

return nil
return []string{"true"}
})
}
9 changes: 9 additions & 0 deletions api/v1alpha1/management_backup_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ type ManagementBackupSpec struct {
// Schedule is a Cron expression defining when to run the scheduled [ManagementBackup].
// If not set, the object is considered to be run only once.
Schedule string `json:"schedule,omitempty"`
// PerformOnManagementUpgrade indicates that a single [ManagementBackup]
// should be created and stored in the [ManagementBackup] storage location if not default
// before the [Management] release upgrade.
PerformOnManagementUpgrade bool `json:"performOnManagementUpgrade,omitempty"`
}

// ManagementBackupStatus defines the observed state of ManagementBackup
Expand All @@ -59,6 +63,11 @@ func (s *ManagementBackup) IsSchedule() bool {
return s.Spec.Schedule != ""
}

// IsCompleted checks if the latest underlaying backup has been completed.
func (s *ManagementBackup) IsCompleted() bool {
return s.Status.LastBackup != nil && !s.Status.LastBackup.CompletionTimestamp.IsZero()
}

// TimestampedBackupName returns the backup name related to scheduled [ManagementBackup] based on the given timestamp.
func (s *ManagementBackup) TimestampedBackupName(timestamp time.Time) string {
return s.Name + "-" + timestamp.Format("20060102150405")
Expand Down
6 changes: 4 additions & 2 deletions internal/controller/accessmanagement_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,10 @@ func (r *AccessManagementReconciler) Reconcile(ctx context.Context, req ctrl.Req
return ctrl.Result{}, err
}

if err := utils.AddKCMComponentLabel(ctx, r.Client, accessMgmt); err != nil {
l.Error(err, "adding component label")
if updated, err := utils.AddKCMComponentLabel(ctx, r.Client, accessMgmt); updated || err != nil {
if err != nil {
l.Error(err, "adding component label")
}
return ctrl.Result{}, err
}

Expand Down
1 change: 1 addition & 0 deletions internal/controller/accessmanagement_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ var _ = Describe("Template Management Controller", func() {
am := am.NewAccessManagement(
am.WithName(amName),
am.WithAccessRules(accessRules),
am.WithLabels(kcm.GenericComponentNameLabel, kcm.GenericComponentLabelValueKCM),
)

ctChain := tc.NewClusterTemplateChain(tc.WithName(ctChainName), tc.WithNamespace(systemNamespace.Name), tc.ManagedByKCM())
Expand Down
130 changes: 125 additions & 5 deletions internal/controller/backup/reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"context"
"errors"
"fmt"
"strings"
"time"

cron "github.com/robfig/cron/v3"
Expand All @@ -30,6 +31,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"

kcmv1alpha1 "github.com/K0rdent/kcm/api/v1alpha1"
"github.com/K0rdent/kcm/internal/utils"
)

// scheduleMgmtNameLabel holds a reference to the [github.com/K0rdent/kcm/api/v1alpha1.ManagementBackup] object name.
Expand All @@ -40,7 +42,16 @@ func (r *Reconciler) ReconcileBackup(ctx context.Context, mgmtBackup *kcmv1alpha
return ctrl.Result{}, nil
}

l := ctrl.LoggerFrom(ctx)
if updated, err := utils.AddKCMComponentLabel(ctx, r.cl, mgmtBackup); updated || err != nil { // put all mgmtbackup to backup
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to add component label: %w", err)
}
return ctrl.Result{}, nil
}

if isRestored(mgmtBackup) {
return r.updateAfterRestoration(ctx, mgmtBackup)
}

if mgmtBackup.IsSchedule() { // schedule-creation path
cronSchedule, err := cron.ParseStandard(mgmtBackup.Spec.Schedule)
Expand All @@ -51,6 +62,7 @@ func (r *Reconciler) ReconcileBackup(ctx context.Context, mgmtBackup *kcmv1alpha
isDue, nextAttemptTime := getNextAttemptTime(mgmtBackup, cronSchedule)

// here we can put as many conditions as we want, e.g. if upgrade is progressing
// TODO: add a condition to check if management upgrade is progressing
isOkayToCreateBackup := isDue && !r.isVeleroBackupProgressing(ctx, mgmtBackup)

if isOkayToCreateBackup {
Expand All @@ -69,10 +81,11 @@ func (r *Reconciler) ReconcileBackup(ctx context.Context, mgmtBackup *kcmv1alpha
if mgmtBackup.Status.LastBackupName == "" { // is not due, nothing to do
return ctrl.Result{}, nil
}
} else if mgmtBackup.Status.LastBackupName == "" { // single mgmtbackup, velero backup has not been created yet
} else if mgmtBackup.Status.LastBackupName == "" && !isRestored(mgmtBackup) { // single mgmtbackup, velero backup has not been created yet
return r.createSingleBackup(ctx, mgmtBackup)
}

l := ctrl.LoggerFrom(ctx)
l.V(1).Info("Collecting backup status")

backupName := mgmtBackup.Name
Expand All @@ -96,8 +109,76 @@ func (r *Reconciler) ReconcileBackup(ctx context.Context, mgmtBackup *kcmv1alpha
return ctrl.Result{}, nil
}

func (r *Reconciler) updateAfterRestoration(ctx context.Context, mgmtBackup *kcmv1alpha1.ManagementBackup) (ctrl.Result, error) {
removeVeleroLabels := func() {
delete(mgmtBackup.Labels, velerov1.BackupNameLabel)
delete(mgmtBackup.Labels, velerov1.RestoreNameLabel)
}

l := ctrl.LoggerFrom(ctx).V(1)

if mgmtBackup.Status.LastBackup != nil || mgmtBackup.Status.LastBackupName != "" || !mgmtBackup.Status.LastBackupTime.IsZero() { // fast-track
l.Info("Removing velero labels after restoration when status is already set")
removeVeleroLabels()
if err := r.cl.Update(ctx, mgmtBackup); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to update ManagementBackup labels after restoration: %w", err)
}

return ctrl.Result{}, nil
}

veleroBackups := new(velerov1.BackupList)
if err := r.cl.List(ctx, veleroBackups); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to list velero Backups: %w", err)
}

updateStatus := false
if mgmtBackup.IsSchedule() {
l.Info("Updating schedule after restoration")
lastBackup, ok := getMostRecentProducedBackup(mgmtBackup.Name, veleroBackups.Items)
if ok { // if have not found then there were no backups yet
l.Info("Found last backup", "last_backup_name", lastBackup.Name)
// next attempt will be fetched on the next event
mgmtBackup.Status.LastBackup = &lastBackup.Status
mgmtBackup.Status.LastBackupName = lastBackup.Name
mgmtBackup.Status.LastBackupTime = lastBackup.Status.StartTimestamp
updateStatus = true
} else {
l.Info("No last backup has been found")
}
} else {
l.Info("Updating single backup after restoration")
for _, v := range veleroBackups.Items {
if mgmtBackup.Name == v.Name {
mgmtBackup.Status.LastBackup = &v.Status
mgmtBackup.Status.LastBackupName = v.Name
mgmtBackup.Status.LastBackupTime = v.Status.StartTimestamp
updateStatus = true
break
}
}
}

if updateStatus {
l.Info("Updating status after restoration")
if err := r.cl.Status().Update(ctx, mgmtBackup); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to update ManagementBackup status after restoration: %w", err)
}

return ctrl.Result{}, nil
}

l.Info("Removing velero labels after restoration without status set")
removeVeleroLabels()
if err := r.cl.Update(ctx, mgmtBackup); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to update ManagementBackup labels after restoration: %w", err)
}

return ctrl.Result{}, nil
}

func (r *Reconciler) createScheduleBackup(ctx context.Context, mgmtBackup *kcmv1alpha1.ManagementBackup, nextAttemptTime time.Time) (ctrl.Result, error) {
now := time.Now()
now := time.Now().In(time.UTC)
backupName := mgmtBackup.TimestampedBackupName(now)

if err := r.createNewVeleroBackup(ctx, backupName, withScheduleLabel(mgmtBackup.Name), withStorageLocation(mgmtBackup.Spec.StorageLocation)); err != nil {
Expand Down Expand Up @@ -127,7 +208,7 @@ func (r *Reconciler) createSingleBackup(ctx context.Context, mgmtBackup *kcmv1al
}

mgmtBackup.Status.LastBackupName = mgmtBackup.Name
mgmtBackup.Status.LastBackupTime = &metav1.Time{Time: time.Now()}
mgmtBackup.Status.LastBackupTime = &metav1.Time{Time: time.Now().In(time.UTC)}

if err := r.cl.Status().Update(ctx, mgmtBackup); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to update ManagementBackup %s status: %w", mgmtBackup.Name, err)
Expand Down Expand Up @@ -219,14 +300,53 @@ func (r *Reconciler) propagateMetaError(ctx context.Context, mgmtBackup *kcmv1al
return ctrl.Result{}, nil // no need to requeue if got such error
}

func getMostRecentProducedBackup(mgmtBackupName string, backups []velerov1.Backup) (*velerov1.Backup, bool) {
if len(backups) == 0 {
return &velerov1.Backup{}, false
}

now := time.Now().In(time.UTC)

const timeFormat = "20060102150405"

var (
mostRecent time.Time
minIdx int
prefix = mgmtBackupName + "-"
)
for i, backup := range backups {
if backup.Labels[scheduleMgmtNameLabel] != mgmtBackupName {
continue // process only backups produced by this schedule
}

ts := strings.TrimPrefix(backup.Name, prefix)

t, err := time.Parse(timeFormat, ts)
if err != nil {
continue
}

if !t.After(now) && (mostRecent.IsZero() || t.After(mostRecent)) {
mostRecent = t
minIdx = i
}
}

return &backups[minIdx], !mostRecent.IsZero()
}

func isRestored(mgmtBackup *kcmv1alpha1.ManagementBackup) bool {
return mgmtBackup.Labels[velerov1.RestoreNameLabel] != "" && mgmtBackup.Labels[velerov1.BackupNameLabel] != ""
}

func getNextAttemptTime(schedule *kcmv1alpha1.ManagementBackup, cronSchedule cron.Schedule) (bool, time.Time) {
lastBackupTime := schedule.CreationTimestamp.Time
if !schedule.Status.LastBackupTime.IsZero() {
lastBackupTime = schedule.Status.LastBackupTime.Time
}

nextAttemptTime := cronSchedule.Next(lastBackupTime) // might be in past so rely on now
now := time.Now()
now := time.Now().In(time.UTC)
isDue := now.After(nextAttemptTime)
if isDue {
nextAttemptTime = now
Expand Down
Loading

0 comments on commit e7d9a1c

Please sign in to comment.