diff --git a/api/v1beta1/constants.go b/api/v1beta1/constants.go index 60df6257..6c588781 100644 --- a/api/v1beta1/constants.go +++ b/api/v1beta1/constants.go @@ -35,6 +35,9 @@ const ( LabelElementalHostBootstrapped = "elementalhost.infrastructure.cluster.x-k8s.io/bootstrapped" LabelElementalHostNeedsReset = "elementalhost.infrastructure.cluster.x-k8s.io/needs-reset" LabelElementalHostReset = "elementalhost.infrastructure.cluster.x-k8s.io/reset" + LabelElementalHostInPlaceUpgrade = "elementalhost.infrastructure.cluster.x-k8s.io/in-place-upgrade" + InPlaceUpgradePending = "pending" + InPlaceUpgradeDone = "done" ) // HostPhases. @@ -48,6 +51,7 @@ const ( PhaseRunning = HostPhase("Running") PhaseTriggeringReset = HostPhase("Triggering Reset") PhaseResetting = HostPhase("Resetting") + PhaseOSVersionReconcile = HostPhase("Reconciling OS Version") ) // Conditions. @@ -88,6 +92,14 @@ const ( WaitingForResetReasonSeverity clusterv1.ConditionSeverity = clusterv1.ConditionSeverityInfo // ResetFailedReason indicates that the Host reset failed. ResetFailedReason = "ResetFailed" + + // OSVersionReady describes the Host OS version reconciliation phase. + OSVersionReady clusterv1.ConditionType = "OSVersionReady" + // OSVersionReconciliationFailedReason indicates that the attempted Host OS version reconciliation failed. + OSVersionReconciliationFailedReason = "OSVersionReconciliationFailed" + // WaitingForPostReconcileRebootReason indicates that the Host OS version was applied and the Host is going to reboot. + WaitingForPostReconcileRebootReason = "WaitingForPostReconcileReboot" + WaitingForPostReconcileRebootReasonSeverity clusterv1.ConditionSeverity = clusterv1.ConditionSeverityInfo ) // ElementalMachine Conditions and Reasons. diff --git a/api/v1beta1/elementalhost_types.go b/api/v1beta1/elementalhost_types.go index 4a7e1d16..3c58e00e 100644 --- a/api/v1beta1/elementalhost_types.go +++ b/api/v1beta1/elementalhost_types.go @@ -19,6 +19,7 @@ package v1beta1 import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" ) @@ -35,6 +36,13 @@ type ElementalHostSpec struct { // PubKey is the host public key to verify when authenticating // Elemental API requests for this host. PubKey string `json:"pubKey,omitempty"` + // OSVersionManagement defines the OS Version and options to be reconciled + // on the host. The supported schema depends on the OSPlugin in use by + // the elementa-agent. + // +optional + // +kubebuilder:validation:Schemaless + // +kubebuilder:validation:XPreserveUnknownFields + OSVersionManagement map[string]runtime.RawExtension `json:"osVersionManagement,omitempty" yaml:"osVersionManagement,omitempty"` } // ElementalHostStatus defines the observed state of ElementalHost. diff --git a/api/v1beta1/elementalmachine_types.go b/api/v1beta1/elementalmachine_types.go index 733933f3..39a335db 100644 --- a/api/v1beta1/elementalmachine_types.go +++ b/api/v1beta1/elementalmachine_types.go @@ -19,6 +19,7 @@ package v1beta1 import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" ) @@ -37,6 +38,15 @@ type ElementalMachineSpec struct { // using this host. // +optional HostRef *corev1.ObjectReference `json:"hostRef,omitempty"` + + // OSVersionManagement defines the OS Version and options to be reconciled + // on the host. The supported schema depends on the OSPlugin in use by + // the elementa-agent. Whenever an ElementalHost is associated to this + // ElementalMachine, the OSVersionManagement will be applied to it. + // +optional + // +kubebuilder:validation:Schemaless + // +kubebuilder:validation:XPreserveUnknownFields + OSVersionManagement map[string]runtime.RawExtension `json:"osVersionManagement,omitempty" yaml:"osVersionManagement,omitempty"` } // ElementalMachineStatus defines the observed state of ElementalMachine. diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go index a187f420..b44004e7 100644 --- a/api/v1beta1/zz_generated.deepcopy.go +++ b/api/v1beta1/zz_generated.deepcopy.go @@ -366,6 +366,13 @@ func (in *ElementalHostSpec) DeepCopyInto(out *ElementalHostSpec) { *out = new(v1.ObjectReference) **out = **in } + if in.OSVersionManagement != nil { + in, out := &in.OSVersionManagement, &out.OSVersionManagement + *out = make(map[string]runtime.RawExtension, len(*in)) + for key, val := range *in { + (*out)[key] = *val.DeepCopy() + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElementalHostSpec. @@ -477,6 +484,13 @@ func (in *ElementalMachineSpec) DeepCopyInto(out *ElementalMachineSpec) { *out = new(v1.ObjectReference) **out = **in } + if in.OSVersionManagement != nil { + in, out := &in.OSVersionManagement, &out.OSVersionManagement + *out = make(map[string]runtime.RawExtension, len(*in)) + for key, val := range *in { + (*out)[key] = *val.DeepCopy() + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ElementalMachineSpec. diff --git a/cmd/agent/common.go b/cmd/agent/common.go new file mode 100644 index 00000000..50abe5d7 --- /dev/null +++ b/cmd/agent/common.go @@ -0,0 +1,26 @@ +package agent + +import ( + infrastructurev1 "github.com/rancher-sandbox/cluster-api-provider-elemental/api/v1beta1" + "github.com/rancher-sandbox/cluster-api-provider-elemental/internal/agent/log" + "github.com/rancher-sandbox/cluster-api-provider-elemental/pkg/agent/osplugin" +) + +// handlePost handles post conditions such as Reboot or PowerOff. +// A true flag is returned if any of the conditions is true, to highlight the program should exit. +func handlePost(osPlugin osplugin.Plugin, post infrastructurev1.PostAction) bool { + if post.PowerOff { + log.Info("Powering off system") + if err := osPlugin.PowerOff(); err != nil { + log.Error(err, "Powering off system") + } + return true + } else if post.Reboot { + log.Info("Rebooting system") + if err := osPlugin.Reboot(); err != nil { + log.Error(err, "Rebooting system") + } + return true + } + return false +} diff --git a/cmd/agent/run.go b/cmd/agent/run.go index 116052ff..1d7b0342 100644 --- a/cmd/agent/run.go +++ b/cmd/agent/run.go @@ -7,7 +7,6 @@ import ( "github.com/rancher-sandbox/cluster-api-provider-elemental/internal/agent/log" "github.com/rancher-sandbox/cluster-api-provider-elemental/internal/agent/phase" "github.com/rancher-sandbox/cluster-api-provider-elemental/internal/api" - "github.com/rancher-sandbox/cluster-api-provider-elemental/pkg/agent/osplugin" "github.com/spf13/cobra" ) @@ -60,6 +59,23 @@ This command will reconcile the remote ElementalHost resource describing this ho return } + // Handle Upgrade + if !host.Bootstrapped || host.InPlaceUpgrade == infrastructurev1.InPlaceUpgradePending { + log.Info("Reconciling OS Version") + osVersionHandler := phase.NewOSVersionHandler(*agentContext) + post, err := osVersionHandler.Reconcile(host.OSVersionManagement) + if err != nil { + log.Error(err, "handling OS reconciliation") + log.Debugf("Waiting %s...", agentContext.Config.Agent.Reconciliation.String()) + time.Sleep(agentContext.Config.Agent.Reconciliation) + continue + } + if handlePost(agentContext.Plugin, post) { + // Exit the program if we are rebooting to apply bootstrap + return + } + } + // Handle bootstrap if needed if host.BootstrapReady && !host.Bootstrapped { log.Info("Handling bootstrap application") @@ -86,22 +102,3 @@ This command will reconcile the remote ElementalHost resource describing this ho func init() { rootCmd.AddCommand(runCmd) } - -// handlePost handles post conditions such as Reboot or PowerOff. -// A true flag is returned if any of the conditions is true, to highlight the program should exit. -func handlePost(osPlugin osplugin.Plugin, post infrastructurev1.PostAction) bool { - if post.PowerOff { - log.Info("Powering off system") - if err := osPlugin.PowerOff(); err != nil { - log.Error(err, "Powering off system") - } - return true - } else if post.Reboot { - log.Info("Rebooting system") - if err := osPlugin.Reboot(); err != nil { - log.Error(err, "Rebooting system") - } - return true - } - return false -} diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_elementalhosts.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_elementalhosts.yaml index 04c21839..1d7b6454 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_elementalhosts.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_elementalhosts.yaml @@ -154,6 +154,12 @@ spec: type: string type: object x-kubernetes-map-type: atomic + osVersionManagement: + description: |- + OSVersionManagement defines the OS Version and options to be reconciled + on the host. The supported schema depends on the OSPlugin in use by + the elementa-agent. + x-kubernetes-preserve-unknown-fields: true pubKey: description: |- PubKey is the host public key to verify when authenticating diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_elementalmachines.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_elementalmachines.yaml index d6400f66..3338688a 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_elementalmachines.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_elementalmachines.yaml @@ -105,6 +105,13 @@ spec: type: string type: object x-kubernetes-map-type: atomic + osVersionManagement: + description: |- + OSVersionManagement defines the OS Version and options to be reconciled + on the host. The supported schema depends on the OSPlugin in use by + the elementa-agent. Whenever an ElementalHost is associated to this + ElementalMachine, the OSVersionManagement will be applied to it. + x-kubernetes-preserve-unknown-fields: true providerID: description: |- ProviderID references the associated ElementalHost. diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_elementalmachinetemplates.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_elementalmachinetemplates.yaml index 91ee9313..555a97f5 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_elementalmachinetemplates.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_elementalmachinetemplates.yaml @@ -101,6 +101,13 @@ spec: type: string type: object x-kubernetes-map-type: atomic + osVersionManagement: + description: |- + OSVersionManagement defines the OS Version and options to be reconciled + on the host. The supported schema depends on the OSPlugin in use by + the elementa-agent. Whenever an ElementalHost is associated to this + ElementalMachine, the OSVersionManagement will be applied to it. + x-kubernetes-preserve-unknown-fields: true providerID: description: |- ProviderID references the associated ElementalHost. diff --git a/doc/QUICKSTART.md b/doc/QUICKSTART.md index fceaee5b..8fcf6f52 100644 --- a/doc/QUICKSTART.md +++ b/doc/QUICKSTART.md @@ -20,15 +20,14 @@ ```bash # Install dependencies - zypper install -y docker helm kubernetes1.27-client + zypper install -y docker helm kubernetes-client make yq # Install kind - [ $(uname -m) = x86_64 ] && curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.20.0/kind-linux-amd64 - chmod +x ./kind - mv ./kind /usr/local/bin/kind + curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.24.0/kind-linux-amd64 + install -o root -g root -m 0755 kind /usr/local/bin/kind # Install clusterctl - curl -L https://github.com/kubernetes-sigs/cluster-api/releases/download/v1.5.3/clusterctl-linux-amd64 -o clusterctl + curl -L https://github.com/kubernetes-sigs/cluster-api/releases/download/v1.8.1/clusterctl-linux-amd64 -o clusterctl install -o root -g root -m 0755 clusterctl /usr/local/bin/clusterctl systemctl enable docker @@ -48,7 +47,7 @@ apiVersion: kind.x-k8s.io/v1alpha4 nodes: - role: control-plane - image: kindest/node:v1.26.6 + image: kindest/node:v1.31.0 kubeadmConfigPatches: - | kind: InitConfiguration diff --git a/elemental-openapi.yaml b/elemental-openapi.yaml index a864d481..e54262f0 100644 --- a/elemental-openapi.yaml +++ b/elemental-openapi.yaml @@ -337,6 +337,9 @@ components: type: boolean condition: $ref: '#/components/schemas/V1Beta1Condition' + inPlaceUpgrade: + nullable: true + type: string installed: nullable: true type: boolean @@ -361,6 +364,8 @@ components: type: boolean bootstrapped: type: boolean + inPlaceUpgrade: + type: string installed: type: boolean labels: @@ -371,6 +376,10 @@ components: type: string needsReset: type: boolean + osVersionManagement: + additionalProperties: + $ref: '#/components/schemas/RuntimeRawExtension' + type: object type: object ApiRegistrationResponse: properties: diff --git a/internal/agent/elementalcli/runner.go b/internal/agent/elementalcli/runner.go index b8763bd0..d9219ecd 100644 --- a/internal/agent/elementalcli/runner.go +++ b/internal/agent/elementalcli/runner.go @@ -38,9 +38,16 @@ type Reset struct { Debug bool `json:"debug,omitempty" mapstructure:"debug"` } +type Upgrade struct { + ImageURI string `json:"imageUri,omitempty" mapstructure:"imageUri"` + UpgradeRecovery bool `json:"upgradeRecovery,omitempty" mapstructure:"upgradeRecovery"` + Debug bool `json:"debug,omitempty" mapstructure:"debug"` +} + type Runner interface { Install(Install) error Reset(Reset) error + Upgrade(Upgrade, string) error } func NewRunner() Runner { @@ -103,6 +110,30 @@ func (r *runner) Reset(conf Reset) error { return nil } +func (r *runner) Upgrade(conf Upgrade, correlationID string) error { + log.Debug("Running elemental upgrade") + installerOpts := []string{"elemental"} + // There are no env var bindings in elemental-cli for elemental root options + // so root flags should be passed within the command line + if conf.Debug { + installerOpts = append(installerOpts, "--debug") + } + installerOpts = append(installerOpts, "upgrade") + + cmd := exec.Command("elemental") + environmentVariables := mapToUpgradeEnv(conf, correlationID) + cmd.Env = append(os.Environ(), environmentVariables...) + cmd.Stdout = os.Stdout + cmd.Args = installerOpts + cmd.Stdin = os.Stdin + cmd.Stderr = os.Stderr + log.Debugf("running: %s\n with ENV:\n%s", strings.Join(installerOpts, " "), strings.Join(environmentVariables, "\n")) + if err := cmd.Run(); err != nil { + return fmt.Errorf("running elemental upgrade: %w", err) + } + return nil +} + func mapToInstallEnv(conf Install) []string { var variables []string // See GetInstallKeyEnvMap() in https://github.com/rancher/elemental-toolkit/blob/main/pkg/constants/constants.go @@ -130,6 +161,15 @@ func mapToResetEnv(conf Reset) []string { return variables } +func mapToUpgradeEnv(conf Upgrade, correlationID string) []string { + var variables []string + // See GetUpgradeKeyEnvMap() in https://github.com/rancher/elemental-toolkit/blob/main/pkg/constants/constants.go + variables = append(variables, formatEV("ELEMENTAL_UPGRADE_RECOVERY", strconv.FormatBool(conf.UpgradeRecovery))) + variables = append(variables, formatEV("ELEMENTAL_UPGRADE_SYSTEM", conf.ImageURI)) + variables = append(variables, formatEV("ELEMENTAL_UPGRADE_CORRELATION_ID", correlationID)) + return variables +} + func formatEV(key string, value string) string { return fmt.Sprintf("%s=%s", key, value) } diff --git a/internal/agent/elementalcli/runner_mocks.go b/internal/agent/elementalcli/runner_mocks.go index 482424d2..89744128 100644 --- a/internal/agent/elementalcli/runner_mocks.go +++ b/internal/agent/elementalcli/runner_mocks.go @@ -81,3 +81,17 @@ func (mr *MockRunnerMockRecorder) Reset(arg0 any) *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Reset", reflect.TypeOf((*MockRunner)(nil).Reset), arg0) } + +// Upgrade mocks base method. +func (m *MockRunner) Upgrade(arg0 Upgrade, arg1 string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Upgrade", arg0, arg1) + ret0, _ := ret[0].(error) + return ret0 +} + +// Upgrade indicates an expected call of Upgrade. +func (mr *MockRunnerMockRecorder) Upgrade(arg0, arg1 any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Upgrade", reflect.TypeOf((*MockRunner)(nil).Upgrade), arg0, arg1) +} diff --git a/internal/agent/phase/upgrade.go b/internal/agent/phase/upgrade.go new file mode 100644 index 00000000..6e8b82d1 --- /dev/null +++ b/internal/agent/phase/upgrade.go @@ -0,0 +1,83 @@ +package phase + +import ( + "encoding/json" + "fmt" + + infrastructurev1 "github.com/rancher-sandbox/cluster-api-provider-elemental/api/v1beta1" + "github.com/rancher-sandbox/cluster-api-provider-elemental/internal/agent/context" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" +) + +type OSVersionHandler interface { + Reconcile(map[string]runtime.RawExtension) (infrastructurev1.PostAction, error) +} + +var _ OSVersionHandler = (*osVersionHandler)(nil) + +func NewOSVersionHandler(agentContext context.AgentContext) OSVersionHandler { + return &osVersionHandler{ + agentContext: agentContext, + } +} + +type osVersionHandler struct { + agentContext context.AgentContext +} + +func (o *osVersionHandler) Reconcile(osVersionManagement map[string]runtime.RawExtension) (infrastructurev1.PostAction, error) { + post := infrastructurev1.PostAction{} + // Serialize input to JSON + bytes, err := json.Marshal(osVersionManagement) + if err != nil { + err := fmt.Errorf("marshalling Host osVersionManagement to JSON: %w", err) + updateCondition(o.agentContext.Client, o.agentContext.Hostname, clusterv1.Condition{ + Type: infrastructurev1.OSVersionReady, + Status: corev1.ConditionFalse, + Severity: clusterv1.ConditionSeverityError, + Reason: infrastructurev1.OSVersionReconciliationFailedReason, + Message: err.Error(), + }) + return post, err + } + // Ask the OSPlugin to reconcile + reboot, err := o.agentContext.Plugin.ReconcileOSVersion(bytes) + if err != nil { + err := fmt.Errorf("reconciling osVersion: %w", err) + updateCondition(o.agentContext.Client, o.agentContext.Hostname, clusterv1.Condition{ + Type: infrastructurev1.OSVersionReady, + Status: corev1.ConditionFalse, + Severity: clusterv1.ConditionSeverityError, + Reason: infrastructurev1.OSVersionReconciliationFailedReason, + Message: err.Error(), + }) + return post, err + } + if reboot { + // We only set this phase if we have to reboot, otherwise it will be most likely transitory and too spammy. + setPhase(o.agentContext.Client, o.agentContext.Hostname, infrastructurev1.PhaseOSVersionReconcile) + updateCondition(o.agentContext.Client, o.agentContext.Hostname, clusterv1.Condition{ + Type: infrastructurev1.OSVersionReady, + Status: corev1.ConditionFalse, + Severity: infrastructurev1.WaitingForPostReconcileRebootReasonSeverity, + Reason: infrastructurev1.WaitingForPostReconcileRebootReason, + Message: "Waiting for Host to reboot after OS Version has been reconciled.", + }) + post.Reboot = reboot + return post, nil + } + + // If we are not rebooting, assume there's nothing left to do for the elemental-agent. + if err := updateConditionOrFail(o.agentContext.Client, o.agentContext.Hostname, clusterv1.Condition{ + Type: infrastructurev1.OSVersionReady, + Status: corev1.ConditionTrue, + Severity: clusterv1.ConditionSeverityInfo, + Reason: "", + Message: "", + }); err != nil { + return post, fmt.Errorf("updating OSVersionReady=true condition: %w", err) + } + return post, nil +} diff --git a/internal/agent/plugin/dummy/dummy.go b/internal/agent/plugin/dummy/dummy.go index d7528c15..bb08522f 100644 --- a/internal/agent/plugin/dummy/dummy.go +++ b/internal/agent/plugin/dummy/dummy.go @@ -18,6 +18,7 @@ import ( const ( cloudInitFile = "cloud-init.yaml" installFile = "install.yaml" + osVersionFile = "os-version.yaml" resetFile = "reset.yaml" sentinelFileResetNeeded = "reset.needed" bootstrapCloudInitPath = "/etc/cloud/cloud.cfg.d/elemental-capi-bootstrap.cfg" @@ -139,6 +140,19 @@ func (p *DummyPlugin) Bootstrap(format string, input []byte) error { return nil } +func (p *DummyPlugin) ReconcileOSVersion(input []byte) (bool, error) { + path := fmt.Sprintf("%s/%s", p.workDir, osVersionFile) + log.Debugf("Copying OS Version config to file: %s", path) + bytes, err := plugin.UnmarshalRawJSONToYaml(input) + if err != nil { + return false, fmt.Errorf("unmarshalling OS Version config: %w", err) + } + if err := p.fs.WriteFile(path, bytes, os.ModePerm); err != nil { + return false, fmt.Errorf("writing OS Version config: %w", err) + } + return false, nil +} + func (p *DummyPlugin) TriggerReset() error { log.Debug("Triggering Unmanaged OS reset") sentinelFile := p.resetSentinelFilePath() diff --git a/internal/agent/plugin/elemental/elemental.go b/internal/agent/plugin/elemental/elemental.go index 5dfd0c6b..fba0ec9a 100644 --- a/internal/agent/plugin/elemental/elemental.go +++ b/internal/agent/plugin/elemental/elemental.go @@ -1,6 +1,8 @@ package main import ( + "bufio" + "crypto/sha256" "encoding/json" "errors" "fmt" @@ -30,6 +32,7 @@ const ( resetCloudConfigPath = "/oem/reset-cloud-config.yaml" bootstrapPath = "/oem/bootstrap-cloud-config.yaml" liveModeFile = "/run/elemental/live_mode" + grubEnvsFile = "/run/elemental/efi/grub_oem_env" bootstrapSentinelPath = "/run/cluster-api/bootstrap-success.complete" ) @@ -351,6 +354,72 @@ func (p *ElementalPlugin) Reset(input []byte) error { return nil } +func (p *ElementalPlugin) ReconcileOSVersion(input []byte) (bool, error) { + log.Debug("Reconciling Elemental OS Version") + oSVersionManagement := elementalcli.Upgrade{} + // Try to unmarshal first to validate the config. + if err := json.Unmarshal(input, &oSVersionManagement); err != nil { + return false, fmt.Errorf("unmarshalling oSVersionManagement config: %w", err) + } + // No version was defined, nothing to do. + if len(oSVersionManagement.ImageURI) == 0 { + log.Info("No imageURI to upgrade to was defined. Nothing to do.") + return false, nil + } + // Generate the expected correlation_id value from this config + correlationID, err := p.osVersionHash(input) + if err != nil { + return false, fmt.Errorf("calculating osVersion hash: %w", err) + } + log.Debugf("Calculated osVersion hash: %s", correlationID) + // Verify current active system has same correlation_id + osReconciled, err := p.verifyCorrelationID(correlationID) + if err != nil { + return false, fmt.Errorf("verifying correlation_id: %w", err) + } + // Already reconciled, all good! + if osReconciled { + log.Info("OSVersion is reconciled. Nothing to do") + return false, nil + } + // Do the upgrade + if err := p.cliRunner.Upgrade(oSVersionManagement, correlationID); err != nil { + return false, fmt.Errorf("invoking elemental upgrade: %w", err) + } + + return true, nil +} + +func (p *ElementalPlugin) verifyCorrelationID(correlationID string) (bool, error) { + envsFile, err := p.fs.Open(grubEnvsFile) + if err != nil { + return false, fmt.Errorf("opening file '%s': %w", grubEnvsFile, err) + } + defer envsFile.Close() + scanner := bufio.NewScanner(envsFile) + + for scanner.Scan() { + env := scanner.Text() + currentCorrelationID, found := strings.CutPrefix(env, "correlation_id=") + if found { + return currentCorrelationID == correlationID, nil + } + } + + log.Debug("Could not find correlation_id boot environment variable") + return false, nil +} + +func (p *ElementalPlugin) osVersionHash(osVersion []byte) (string, error) { + hash := sha256.New() + if _, err := hash.Write(osVersion); err != nil { + return "", fmt.Errorf("writing hash: %w", err) + } + + result := hash.Sum(nil) + return fmt.Sprintf("%x", result), nil +} + func (p *ElementalPlugin) PowerOff() error { if err := p.hostManager.PowerOff(); err != nil { return fmt.Errorf("powering off system: %w", err) diff --git a/internal/api/types.go b/internal/api/types.go index 418e320c..cd71155a 100644 --- a/internal/api/types.go +++ b/internal/api/types.go @@ -7,6 +7,7 @@ import ( "golang.org/x/exp/maps" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/util/conditions" @@ -56,11 +57,12 @@ type HostPatchRequest struct { RegistrationName string `path:"registrationName"` HostName string `path:"hostName"` - Annotations map[string]string `json:"annotations,omitempty"` - Labels map[string]string `json:"labels,omitempty"` - Bootstrapped *bool `json:"bootstrapped,omitempty"` - Installed *bool `json:"installed,omitempty"` - Reset *bool `json:"reset,omitempty"` + Annotations map[string]string `json:"annotations,omitempty"` + Labels map[string]string `json:"labels,omitempty"` + Bootstrapped *bool `json:"bootstrapped,omitempty"` + Installed *bool `json:"installed,omitempty"` + Reset *bool `json:"reset,omitempty"` + InPlaceUpgrade *string `json:"inPlaceUpgrade,omitempty"` Condition *clusterv1.Condition `json:"condition,omitempty"` Phase *infrastructurev1.HostPhase `json:"phase,omitempty"` @@ -95,6 +97,9 @@ func (h *HostPatchRequest) applyToElementalHost(elementalHost *infrastructurev1. if h.Reset != nil { elementalHost.Labels[infrastructurev1.LabelElementalHostReset] = "true" } + if h.InPlaceUpgrade != nil { + elementalHost.Labels[infrastructurev1.LabelElementalHostInPlaceUpgrade] = *h.InPlaceUpgrade + } if elementalHost.Status.Conditions == nil { elementalHost.Status.Conditions = clusterv1.Conditions{} } @@ -109,13 +114,15 @@ func (h *HostPatchRequest) applyToElementalHost(elementalHost *infrastructurev1. } type HostResponse struct { - Name string `json:"name,omitempty"` - Annotations map[string]string `json:"annotations,omitempty"` - Labels map[string]string `json:"labels,omitempty"` - BootstrapReady bool `json:"bootstrapReady,omitempty"` - Bootstrapped bool `json:"bootstrapped,omitempty"` - Installed bool `json:"installed,omitempty"` - NeedsReset bool `json:"needsReset,omitempty"` + Name string `json:"name,omitempty"` + Annotations map[string]string `json:"annotations,omitempty"` + Labels map[string]string `json:"labels,omitempty"` + BootstrapReady bool `json:"bootstrapReady,omitempty"` + Bootstrapped bool `json:"bootstrapped,omitempty"` + Installed bool `json:"installed,omitempty"` + NeedsReset bool `json:"needsReset,omitempty"` + InPlaceUpgrade string `json:"inPlaceUpgrade,omitempty"` + OSVersionManagement map[string]runtime.RawExtension `json:"osVersionManagement,omitempty" yaml:"osVersionManagement,omitempty"` } func (h *HostResponse) fromElementalHost(elementalHost infrastructurev1.ElementalHost) { @@ -136,6 +143,10 @@ func (h *HostResponse) fromElementalHost(elementalHost infrastructurev1.Elementa if value, found := elementalHost.Labels[infrastructurev1.LabelElementalHostNeedsReset]; found && value == "true" { h.NeedsReset = true } + if value, found := elementalHost.Labels[infrastructurev1.LabelElementalHostInPlaceUpgrade]; found { + h.InPlaceUpgrade = value + } + h.OSVersionManagement = elementalHost.Spec.OSVersionManagement } type RegistrationGetRequest struct { diff --git a/internal/controller/elementalhost_controller.go b/internal/controller/elementalhost_controller.go index 8d8320b4..8aad4f54 100644 --- a/internal/controller/elementalhost_controller.go +++ b/internal/controller/elementalhost_controller.go @@ -30,6 +30,7 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/log" infrastructurev1 "github.com/rancher-sandbox/cluster-api-provider-elemental/api/v1beta1" @@ -46,12 +47,41 @@ type ElementalHostReconciler struct { func (r *ElementalHostReconciler) SetupWithManager(mgr ctrl.Manager) error { if err := ctrl.NewControllerManagedBy(mgr). For(&infrastructurev1.ElementalHost{}). + Watches( + &infrastructurev1.ElementalMachine{}, + handler.EnqueueRequestsFromMapFunc(r.ElementalMachineToElementalHost), + ). Complete(r); err != nil { return fmt.Errorf("initializing ElementalHostReconciler builder: %w", err) } return nil } +func (r *ElementalHostReconciler) ElementalMachineToElementalHost(ctx context.Context, obj client.Object) []ctrl.Request { //nolint: dupl + logger := log.FromContext(ctx). + WithValues(ilog.KeyNamespace, obj.GetNamespace()). + WithValues(ilog.KeyElementalMachine, obj.GetName()) + logger.Info("Enqueueing ElementalHost reconciliation from ElementalMachine") + + requests := []ctrl.Request{} + + // Verify we are actually handling a ElementalMachine object + machine, ok := obj.(*infrastructurev1.ElementalMachine) + if !ok { + logger.Error(ErrEnqueueing, fmt.Sprintf("Expected a ElementalMachine object, but got %T", obj)) + return []ctrl.Request{} + } + + // Check the ElementalMachine was associated to any ElementalHost + if machine.Spec.HostRef != nil { + logger.Info("Adding ElementalHost to reconciliation request", ilog.KeyElementalMachine, machine.Spec.HostRef.Name) + name := client.ObjectKey{Namespace: machine.Spec.HostRef.Namespace, Name: machine.Spec.HostRef.Name} + requests = append(requests, ctrl.Request{NamespacedName: name}) + } + + return requests +} + //+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=elementalhosts,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=elementalhosts/status,verbs=get;update;patch //+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=elementalhosts/finalizers,verbs=update @@ -128,6 +158,12 @@ func (r *ElementalHostReconciler) reconcileNormal(ctx context.Context, host *inf WithValues(ilog.KeyElementalHost, host.Name) logger.Info("Normal ElementalHost reconcile") + if host.Spec.MachineRef != nil { + if err := r.reconcileOSVersionManagement(ctx, host); err != nil { + return ctrl.Result{}, fmt.Errorf("reconciling OSVersionManagement: %w", err) + } + } + // Reconcile Registered/Installed Condition (if the host is installed, assume it is registered as well) if value, found := host.Labels[infrastructurev1.LabelElementalHostInstalled]; found && value == "true" { conditions.Set(host, &v1beta1.Condition{ @@ -154,6 +190,29 @@ func (r *ElementalHostReconciler) reconcileNormal(ctx context.Context, host *inf return ctrl.Result{}, nil } +func (r *ElementalHostReconciler) reconcileOSVersionManagement(ctx context.Context, host *infrastructurev1.ElementalHost) error { + logger := log.FromContext(ctx). + WithValues(ilog.KeyNamespace, host.Namespace). + WithValues(ilog.KeyElementalHost, host.Name). + WithValues(ilog.KeyElementalMachine, host.Spec.MachineRef.Name) + logger.Info("Reconciling OSVersionManagement from associated ElementalMachine") + machine := &infrastructurev1.ElementalMachine{} + err := r.Client.Get(ctx, client.ObjectKey{ + Namespace: host.Spec.MachineRef.Namespace, + Name: host.Spec.MachineRef.Name, + }, machine) + if apierrors.IsNotFound(err) { + logger.Info("Not going to reconcile OSVersionManagement for no longer existing ElementalMachine") + return nil + } + if err != nil { + return fmt.Errorf("fetching associated ElementalMachine '%s': %w", host.Spec.MachineRef.Name, err) + } + + host.Spec.OSVersionManagement = machine.Spec.OSVersionManagement + return nil +} + func (r *ElementalHostReconciler) reconcileDelete(ctx context.Context, host *infrastructurev1.ElementalHost) (ctrl.Result, error) { logger := log.FromContext(ctx). WithValues(ilog.KeyNamespace, host.Namespace). diff --git a/internal/controller/elementalmachine_controller.go b/internal/controller/elementalmachine_controller.go index 518eb622..5066205f 100644 --- a/internal/controller/elementalmachine_controller.go +++ b/internal/controller/elementalmachine_controller.go @@ -598,6 +598,9 @@ func (r *ElementalMachineReconciler) linkElementalHostToElementalMachine(ctx con elementalHostCandidate.Labels[clusterv1.ClusterNameLabel] = name } + // Propagate OSVersionManagement + elementalHostCandidate.Spec.OSVersionManagement = elementalMachine.Spec.OSVersionManagement + // Reconciliation step #10: Set status.addresses to the provider-specific set of instance addresses // TODO: Fetch the addresses from ElementalHost to update the associated ElementalMachine diff --git a/pkg/agent/osplugin/plugin.go b/pkg/agent/osplugin/plugin.go index d9b5edd1..47d96230 100644 --- a/pkg/agent/osplugin/plugin.go +++ b/pkg/agent/osplugin/plugin.go @@ -39,6 +39,9 @@ type Plugin interface { // Bootstrap should apply the CAPI bootstrap config to the machine. // The format can be either "cloud-init" or "ignition". Bootstrap(format string, input []byte) error + // ReconcileOSVersion should reconcile the OS version on the host according to the input (in JSON format). + // You can trigger a Reboot by returning a true value. Note that in case of error this is ignored. + ReconcileOSVersion(input []byte) (bool, error) // TriggerReset should prepare the machine for reset. TriggerReset() error // Reset should reset the machine to an installable state, given an input reset config (in JSON format). diff --git a/pkg/agent/osplugin/plugin_mocks.go b/pkg/agent/osplugin/plugin_mocks.go index 6e76ec2a..7e9396dd 100644 --- a/pkg/agent/osplugin/plugin_mocks.go +++ b/pkg/agent/osplugin/plugin_mocks.go @@ -219,6 +219,21 @@ func (mr *MockPluginMockRecorder) Reboot() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Reboot", reflect.TypeOf((*MockPlugin)(nil).Reboot)) } +// ReconcileOSVersion mocks base method. +func (m *MockPlugin) ReconcileOSVersion(arg0 []byte) (bool, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ReconcileOSVersion", arg0) + ret0, _ := ret[0].(bool) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// ReconcileOSVersion indicates an expected call of ReconcileOSVersion. +func (mr *MockPluginMockRecorder) ReconcileOSVersion(arg0 any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ReconcileOSVersion", reflect.TypeOf((*MockPlugin)(nil).ReconcileOSVersion), arg0) +} + // Reset mocks base method. func (m *MockPlugin) Reset(arg0 []byte) error { m.ctrl.T.Helper() diff --git a/test/scripts/setup_kind_cluster.sh b/test/scripts/setup_kind_cluster.sh index afd8cb00..47761388 100755 --- a/test/scripts/setup_kind_cluster.sh +++ b/test/scripts/setup_kind_cluster.sh @@ -6,7 +6,7 @@ kind: Cluster apiVersion: kind.x-k8s.io/v1alpha4 nodes: - role: control-plane - image: kindest/node:v1.26.6 + image: kindest/node:v1.31.0 kubeadmConfigPatches: - | kind: InitConfiguration @@ -23,6 +23,9 @@ nodes: - containerPort: 30009 hostPort: 30009 protocol: TCP + - containerPort: 30000 + hostPort: 30000 + protocol: TCP EOF # Build the Elemental provider docker image and load it to the kind cluster @@ -131,6 +134,52 @@ spec: resetPersistent: true EOF +# Create a test registry +cat << EOF | kubectl apply -f - +apiVersion: v1 +kind: Namespace +metadata: + name: test-registry +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: test-registry + namespace: test-registry + labels: + app: test-registry +spec: + replicas: 1 + selector: + matchLabels: + app: test-registry + template: + metadata: + labels: + app: test-registry + spec: + containers: + - name: registry + image: registry:2 + ports: + - containerPort: 5000 +--- +apiVersion: v1 +kind: Service +metadata: + name: registry-nodeport + namespace: test-registry +spec: + type: NodePort + selector: + app: test-registry + ports: + - nodePort: 30000 + port: 5000 + protocol: TCP + targetPort: 5000 +EOF + # Wait for registration to be initialized kubectl wait --for=condition=ready elementalregistration my-registration --timeout=120s