From e2f60290287cd7f6af8cc6090fb7e18db08a2762 Mon Sep 17 00:00:00 2001 From: Hardik Dodiya Date: Tue, 30 Jul 2024 17:17:26 +0200 Subject: [PATCH] Implement ServerClaim-handling in MetalMachine Controller (#9) --- .gitignore | 1 + api/v1alpha1/metalmachine_types.go | 2 +- cmd/main.go | 2 + config/rbac/role.yaml | 12 ++ docs/api-reference/api.md | 72 +++++++ go.mod | 3 +- go.sum | 6 + .../controller/metalmachine_controller.go | 193 ++++++++++++++++-- 8 files changed, 267 insertions(+), 24 deletions(-) diff --git a/.gitignore b/.gitignore index 786b5aa..d966260 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ *.so *.dylib bin/* +dev/* Dockerfile.cross .tiltbuild/* diff --git a/api/v1alpha1/metalmachine_types.go b/api/v1alpha1/metalmachine_types.go index 7265b83..e437c7e 100644 --- a/api/v1alpha1/metalmachine_types.go +++ b/api/v1alpha1/metalmachine_types.go @@ -17,7 +17,7 @@ const ( MachineFinalizer = "metalmachine.infrastructure.cluster.x-k8s.io" // DefaultReconcilerRequeue is the default value for the reconcile retry. - DefaultReconcilerRequeue = 10 * time.Second + DefaultReconcilerRequeue = 5 * time.Second ) // MetalMachineSpec defines the desired state of MetalMachine diff --git a/cmd/main.go b/cmd/main.go index 0b4f64e..67d47cb 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -27,6 +27,7 @@ import ( infrastructurev1alpha1 "github.com/ironcore-dev/cluster-api-provider-metal/api/v1alpha1" "github.com/ironcore-dev/cluster-api-provider-metal/internal/controller" + metalv1alpha1 "github.com/ironcore-dev/metal-operator/api/v1alpha1" // +kubebuilder:scaffold:imports ) @@ -39,6 +40,7 @@ func init() { utilruntime.Must(clientgoscheme.AddToScheme(scheme)) utilruntime.Must(clusterv1.AddToScheme(scheme)) utilruntime.Must(infrastructurev1alpha1.AddToScheme(scheme)) + utilruntime.Must(metalv1alpha1.AddToScheme(scheme)) // +kubebuilder:scaffold:scheme } diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 2a13e17..1663c84 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -129,3 +129,15 @@ rules: - get - patch - update +- apiGroups: + - metal.ironcore.dev + resources: + - serverclaims + verbs: + - create + - delete + - get + - list + - patch + - update + - watch diff --git a/docs/api-reference/api.md b/docs/api-reference/api.md index 1ee4fc6..9ecac63 100644 --- a/docs/api-reference/api.md +++ b/docs/api-reference/api.md @@ -288,6 +288,78 @@ This is used to claim specific Server types for a MetalMachine.

MetalMachineStatus defines the observed state of MetalMachine

+ + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+ready
+ +bool + +
+(Optional) +

Ready indicates the Machine infrastructure has been provisioned and is ready.

+
+failureReason
+ +sigs.k8s.io/cluster-api/errors.MachineStatusError + +
+(Optional) +

FailureReason will be set in the event that there is a terminal problem +reconciling the Machine and will contain a succinct value suitable +for machine interpretation.

+

This field should not be set for transitive errors that a controller +faces that are expected to be fixed automatically over +time (like service outages), but instead indicate that something is +fundamentally wrong with the Machine’s spec or the configuration of +the controller, and that manual intervention is required. Examples +of terminal errors would be invalid combinations of settings in the +spec, values that are unsupported by the controller, or the +responsible controller itself being critically misconfigured.

+

Any transient errors that occur during the reconciliation of Machines +can be added as events to the Machine object and/or logged in the +controller’s output.

+
+failureMessage
+ +string + +
+(Optional) +

FailureMessage will be set in the event that there is a terminal problem +reconciling the Machine and will contain a more verbose string suitable +for logging and human consumption.

+

This field should not be set for transitive errors that a controller +faces that are expected to be fixed automatically over +time (like service outages), but instead indicate that something is +fundamentally wrong with the Machine’s spec or the configuration of +the controller, and that manual intervention is required. Examples +of terminal errors would be invalid combinations of settings in the +spec, values that are unsupported by the controller, or the +responsible controller itself being critically misconfigured.

+

Any transient errors that occur during the reconciliation of Machines +can be added as events to the Machine object and/or logged in the +controller’s output.

+

MetalMachineTemplate

diff --git a/go.mod b/go.mod index 7bda0c5..3d2e95d 100644 --- a/go.mod +++ b/go.mod @@ -6,10 +6,12 @@ toolchain go1.22.5 require ( github.com/go-logr/logr v1.4.2 + github.com/ironcore-dev/controller-utils v0.9.3 github.com/ironcore-dev/metal-operator v0.0.0-20240723113059-17e10339810f github.com/onsi/ginkgo/v2 v2.19.1 github.com/onsi/gomega v1.34.0 github.com/pkg/errors v0.9.1 + k8s.io/api v0.30.3 k8s.io/apimachinery v0.30.3 k8s.io/client-go v0.30.3 k8s.io/klog/v2 v2.130.1 @@ -86,7 +88,6 @@ require ( gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/api v0.30.3 // indirect k8s.io/apiextensions-apiserver v0.30.3 // indirect k8s.io/apiserver v0.30.3 // indirect k8s.io/component-base v0.30.3 // indirect diff --git a/go.sum b/go.sum index 0fb512e..3361476 100644 --- a/go.sum +++ b/go.sum @@ -83,6 +83,8 @@ github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4 github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= +github.com/ironcore-dev/controller-utils v0.9.3 h1:sTrnxSzX5RrLf4B8KrAH2axSC+gxfJXphkV6df2GSsw= +github.com/ironcore-dev/controller-utils v0.9.3/go.mod h1:djKnxDs0Hwxhhc0VmVY8tZnrOrElvrRV2jov/LiCZ2Y= github.com/ironcore-dev/metal-operator v0.0.0-20240723113059-17e10339810f h1:FeavQ1QSA1RQdz4Fup6KTj7nXAROBn1Fzviu5MzfOP0= github.com/ironcore-dev/metal-operator v0.0.0-20240723113059-17e10339810f/go.mod h1:u0x9uFC6MgN5c6mYqJ/QgTLSjUPHsGTKTN/jjsG9JnY= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= @@ -139,6 +141,8 @@ github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= @@ -166,6 +170,8 @@ go.opentelemetry.io/proto/otlp v1.0.0 h1:T0TX0tmXU8a3CbNXzEKGeU5mIVOdf0oykP+u2lI go.opentelemetry.io/proto/otlp v1.0.0/go.mod h1:Sy6pihPLfYHkr3NkUbEhGHFhINUSI/v80hjKIs5JXpM= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/mock v0.4.0 h1:VcM4ZOtdbR4f6VXfiOpwpVJDL6lCReaZ6mw31wqh7KU= +go.uber.org/mock v0.4.0/go.mod h1:a6FSlNadKUHUa9IP5Vyt1zh4fC7uAwxMutEAscFbkZc= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/zap v1.26.0 h1:sI7k6L95XOKS281NhVKOFCUNIvv9e0w4BF8N3u+tCRo= diff --git a/internal/controller/metalmachine_controller.go b/internal/controller/metalmachine_controller.go index 4bbc25f..c06c6d3 100644 --- a/internal/controller/metalmachine_controller.go +++ b/internal/controller/metalmachine_controller.go @@ -5,15 +5,22 @@ package controller import ( "context" + "fmt" + "github.com/go-logr/logr" "github.com/ironcore-dev/cluster-api-provider-metal/internal/scope" + "github.com/ironcore-dev/controller-utils/clientutils" "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/klog/v2" - clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + clusterapiv1beta1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -22,7 +29,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" - infrav1 "github.com/ironcore-dev/cluster-api-provider-metal/api/v1alpha1" + infrav1alpha1 "github.com/ironcore-dev/cluster-api-provider-metal/api/v1alpha1" + metalv1alpha1 "github.com/ironcore-dev/metal-operator/api/v1alpha1" ) // MetalMachineReconciler reconciles a MetalMachine object @@ -31,6 +39,11 @@ type MetalMachineReconciler struct { Scheme *runtime.Scheme } +const ( + MetalMachineFinalizer = "infrastructure.cluster.x-k8s.io/metalmachine" + DefaultIgnitionSecretKeyName = "ignition" +) + // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=metalmachines,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=metalmachines/status,verbs=get;update;patch // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=metalmachines/finalizers,verbs=update @@ -38,6 +51,7 @@ type MetalMachineReconciler struct { // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinedeployments,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinesets,verbs=get;list;watch // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=kubeadmcontrolplanes,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=metal.ironcore.dev,resources=serverclaims,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch // +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch;create;update;patch;delete @@ -45,7 +59,7 @@ func (r *MetalMachineReconciler) Reconcile(ctx context.Context, req ctrl.Request logger := log.FromContext(ctx) // Fetch the MetalMachine. - metalMachine := &infrav1.MetalMachine{} + metalMachine := &infrav1alpha1.MetalMachine{} err := r.Get(ctx, req.NamespacedName, metalMachine) if err != nil { if apierrors.IsNotFound(err) { @@ -85,7 +99,7 @@ func (r *MetalMachineReconciler) Reconcile(ctx context.Context, req ctrl.Request Name: cluster.Spec.InfrastructureRef.Name, } - metalCluster := &infrav1.MetalCluster{} + metalCluster := &infrav1alpha1.MetalCluster{} if err := r.Client.Get(ctx, metalClusterName, metalCluster); err != nil { if apierrors.IsNotFound(err) || !metalCluster.Status.Ready { logger.Info("MetalCluster is not available yet") @@ -146,30 +160,28 @@ func (r *MetalMachineReconciler) Reconcile(ctx context.Context, req ctrl.Request // SetupWithManager sets up the controller with the Manager. func (r *MetalMachineReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). - For(&infrav1.MetalMachine{}). + For(&infrav1alpha1.MetalMachine{}). Watches( - &clusterv1.Machine{}, - handler.EnqueueRequestsFromMapFunc(util.MachineToInfrastructureMapFunc(infrav1.GroupVersion.WithKind("MetalMachine"))), + &clusterapiv1beta1.Machine{}, + handler.EnqueueRequestsFromMapFunc(util.MachineToInfrastructureMapFunc(infrav1alpha1.GroupVersion.WithKind("MetalMachine"))), ). Complete(r) } -// TODO: remove nolint tag -// -//nolint:unparam -func (r *MetalMachineReconciler) reconcileDelete(_ context.Context, machineScope *scope.MachineScope) (ctrl.Result, error) { - machineScope.Logger.Info("Handling deleted MetalMachine") +func (r *MetalMachineReconciler) reconcileDelete(ctx context.Context, machineScope *scope.MachineScope) (ctrl.Result, error) { + machineScope.Logger.Info("Deleting MetalMachine") // insert ServerClaim deletion logic here - // ServerClaim is being deleted - return reconcile.Result{RequeueAfter: infrav1.DefaultReconcilerRequeue}, nil + if modified, err := clientutils.PatchEnsureNoFinalizer(ctx, r.Client, machineScope.MetalMachine, MetalMachineFinalizer); !apierrors.IsNotFound(err) || modified { + return ctrl.Result{}, err + } + machineScope.Logger.Info("Ensured that the finalizer has been removed") + + return reconcile.Result{RequeueAfter: infrav1alpha1.DefaultReconcilerRequeue}, nil } -// TODO: remove nolint tag -// -//nolint:unparam -func (r *MetalMachineReconciler) reconcileNormal(_ context.Context, machineScope *scope.MachineScope, clusterScope *scope.ClusterScope) (reconcile.Result, error) { +func (r *MetalMachineReconciler) reconcileNormal(ctx context.Context, machineScope *scope.MachineScope, clusterScope *scope.ClusterScope) (reconcile.Result, error) { clusterScope.Logger.V(4).Info("Reconciling MetalMachine") // If the MetalMachine is in an error state, return early. @@ -191,14 +203,151 @@ func (r *MetalMachineReconciler) reconcileNormal(_ context.Context, machineScope return ctrl.Result{}, nil } - // TBD add finalizer + if modified, err := clientutils.PatchEnsureFinalizer(ctx, r.Client, machineScope.MetalMachine, MetalMachineFinalizer); err != nil || modified { + return ctrl.Result{}, err + } + machineScope.Logger.Info("Ensured finalizer has been added") + + // Fetch the bootstrap data secret. + bootstrapSecret := &corev1.Secret{} + secretName := types.NamespacedName{ + Namespace: machineScope.Machine.Namespace, + Name: *machineScope.Machine.Spec.Bootstrap.DataSecretName, + } + if err := r.Client.Get(ctx, secretName, bootstrapSecret); err != nil { + machineScope.Error(err, "failed to get bootstrap data secret") + return ctrl.Result{}, err + } + + machineScope.Info("Creating IgnitionSecret", "Secret", machineScope.MetalMachine.Name) + ignitionSecret, err := r.applyIgnitionSecret(ctx, machineScope.Logger, bootstrapSecret) + if err != nil { + machineScope.Error(err, "failed to create or patch ignition secret") + return ctrl.Result{}, err + } + + machineScope.Info("Creating ServerClaim", "ServerClaim", machineScope.MetalMachine.Name) + serverClaim, err := r.applyServerClaim(ctx, machineScope.Logger, machineScope.MetalMachine, ignitionSecret) + if err != nil { + machineScope.Error(err, "failed to create or patch ServerClaim") + return ctrl.Result{}, err + } + + machineScope.Info("Patching ProviderID in MetalMachine") + if err := r.patchMetalMachineProviderID(ctx, machineScope.Logger, machineScope.MetalMachine, serverClaim); err != nil { + machineScope.Error(err, "failed to patch the MetalMachine with providerid") + return ctrl.Result{}, err + } - // Get or create the ServerClaim. - // TBD - machineScope.Info("Creating ServerClaim", "claim", machineScope.MetalMachine.Name) + if serverClaim.Status.Phase != metalv1alpha1.PhaseBound { + machineScope.Info("Waiting for ServerClaim to be Bound") + return ctrl.Result{ + RequeueAfter: infrav1alpha1.DefaultReconcilerRequeue, + }, nil + } machineScope.SetReady() machineScope.Logger.Info("MetalMachine is ready") return reconcile.Result{}, nil } + +func (r *MetalMachineReconciler) applyIgnitionSecret(ctx context.Context, log *logr.Logger, capidatasecret *corev1.Secret) (*corev1.Secret, error) { + secretObj := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("ignition-%s", capidatasecret.Name), + Namespace: capidatasecret.Namespace, + }, + TypeMeta: metav1.TypeMeta{ + Kind: "Secret", + APIVersion: corev1.SchemeGroupVersion.String(), + }, + Data: map[string][]byte{ + // TODO: Make Metal Specific changes in the Ignition if necessary + DefaultIgnitionSecretKeyName: capidatasecret.Data["value"], + }, + } + + if err := controllerutil.SetControllerReference(capidatasecret, secretObj, r.Client.Scheme()); err != nil { + return nil, fmt.Errorf("failed to set ControllerReference: %w", err) + } + + opResult, err := controllerutil.CreateOrPatch(ctx, r.Client, secretObj, nil) + if err != nil { + return nil, fmt.Errorf("failed to create or patch the IgnitionSecret: %w", err) + } + log.Info("Created or Patched IgnitionSecret", "IgnitionSecret", secretObj.Name, "Operation", opResult) + + return secretObj, nil +} + +func (r *MetalMachineReconciler) applyServerClaim(ctx context.Context, log *logr.Logger, metalmachine *infrav1alpha1.MetalMachine, ignitionsecret *corev1.Secret) (*metalv1alpha1.ServerClaim, error) { + serverClaimObj := &metalv1alpha1.ServerClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: metalmachine.Name, + Namespace: metalmachine.Namespace, + }, + TypeMeta: metav1.TypeMeta{ + APIVersion: metalv1alpha1.GroupVersion.String(), + Kind: "ServerClaim", + }, + Spec: metalv1alpha1.ServerClaimSpec{ + Power: metalv1alpha1.PowerOn, + IgnitionSecretRef: &corev1.LocalObjectReference{ + Name: ignitionsecret.Name, + }, + // TODO: Allow configuring os-image. + Image: "ghcr.io/ironcore-dev/os-images/gardenlinux:1443.3", + }, + } + + // TODO: Define proper contract for ServerSelectors. + serverSelector := metav1.LabelSelector{ + MatchLabels: map[string]string{ + "clusterapi-workload": "", + }, + } + if _, exists := metalmachine.Labels["cluster.x-k8s.io/control-plane"]; exists { + serverSelector.MatchLabels = map[string]string{ + "clusterapi-controlplane": "", + } + } + serverClaimObj.Spec.ServerSelector = &serverSelector + + if err := controllerutil.SetControllerReference(metalmachine, serverClaimObj, r.Client.Scheme()); err != nil { + return nil, fmt.Errorf("failed to set ControllerReference: %w", err) + } + + opResult, err := controllerutil.CreateOrPatch(ctx, r.Client, serverClaimObj, nil) + if err != nil { + return nil, fmt.Errorf("failed to create or patch ServerClaim: %w", err) + } + log.Info("Created or Patched ServerClaim", "ServerClaim", serverClaimObj.Name, "Operation", opResult) + + return serverClaimObj, nil +} + +func (r *MetalMachineReconciler) patchMetalMachineProviderID(ctx context.Context, log *logr.Logger, metalmachine *infrav1alpha1.MetalMachine, serverClaim *metalv1alpha1.ServerClaim) error { + server := &metalv1alpha1.Server{} + serverRefName := types.NamespacedName{ + Name: serverClaim.Spec.ServerRef.Name, + } + + if err := r.Client.Get(ctx, serverRefName, server); err != nil { + log.Error(err, "failed to fetch server associated with the server claim") + return err + } + + providerID := fmt.Sprintf("metal:///%s/%s/%s", serverClaim.Namespace, serverClaim.Name, server.Spec.UUID) + + patch := client.MergeFrom(metalmachine.DeepCopy()) + metalmachine.Spec.ProviderID = &providerID + + if err := r.Client.Patch(ctx, metalmachine, patch); err != nil { + log.Error(err, "failed to patch MetalMachine with ProviderID") + return err + } + + log.Info("Successfully patched MetalMachine with ProviderID", "ProviderID", providerID) + return nil +}