diff --git a/api/v1/helper.go b/api/v1/helper.go index 300992acb..1f30be618 100644 --- a/api/v1/helper.go +++ b/api/v1/helper.go @@ -12,6 +12,7 @@ import ( "sort" "strconv" "strings" + "time" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -1005,3 +1006,43 @@ func GenerateBridgeName(iface *InterfaceExt) string { func NeedToUpdateBridges(bridgeSpec, bridgeStatus *Bridges) bool { return !reflect.DeepEqual(bridgeSpec, bridgeStatus) } + +// SetKeepUntilTime sets an annotation to hold the "keep until time" for the node’s state. +// The "keep until time" specifies the earliest time at which the state object can be removed +// if the daemon's pod is not found on the node. +func (s *SriovNetworkNodeState) SetKeepUntilTime(t time.Time) { + ts := t.Format(time.RFC3339) + annotations := s.GetAnnotations() + if annotations == nil { + annotations = map[string]string{} + } + annotations[consts.NodeStateKeepUntilAnnotation] = ts + s.SetAnnotations(annotations) +} + +// GetKeepUntilTime returns the value that is stored in the "keep until time" annotation. +// The "keep until time" specifies the earliest time at which the state object can be removed +// if the daemon's pod is not found on the node. +// Return zero time instant if annotaion is not found on the object or if it has a wrong format. +func (s *SriovNetworkNodeState) GetKeepUntilTime() time.Time { + t, err := time.Parse(time.RFC3339, s.GetAnnotations()[consts.NodeStateKeepUntilAnnotation]) + if err != nil { + return time.Time{} + } + return t +} + +// ResetKeepUntilTime removes "keep until time" annotation from the state object. +// The "keep until time" specifies the earliest time at which the state object can be removed +// if the daemon's pod is not found on the node. +// Returns true if the value was removed, false otherwise. +func (s *SriovNetworkNodeState) ResetKeepUntilTime() bool { + annotations := s.GetAnnotations() + _, exist := annotations[consts.NodeStateKeepUntilAnnotation] + if !exist { + return false + } + delete(annotations, consts.NodeStateKeepUntilAnnotation) + s.SetAnnotations(annotations) + return true +} diff --git a/controllers/sriovnetworknodepolicy_controller.go b/controllers/sriovnetworknodepolicy_controller.go index 29438b176..f8811ed97 100644 --- a/controllers/sriovnetworknodepolicy_controller.go +++ b/controllers/sriovnetworknodepolicy_controller.go @@ -20,8 +20,10 @@ import ( "context" "encoding/json" "fmt" + "os" "reflect" "sort" + "strconv" "strings" "time" @@ -338,10 +340,7 @@ func (r *SriovNetworkNodePolicyReconciler) syncAllSriovNetworkNodeStates(ctx con logger.Error(err, "Fail to remove device plugin label from node", "node", ns.Name) return err } - logger.Info("Deleting SriovNetworkNodeState as node with that name doesn't exist", "nodeStateName", ns.Name) - err = r.Delete(ctx, &ns, &client.DeleteOptions{}) - if err != nil { - logger.Error(err, "Fail to Delete", "SriovNetworkNodeState CR:", ns.GetName()) + if err := r.handleStaleNodeState(ctx, &ns); err != nil { return err } } @@ -350,6 +349,56 @@ func (r *SriovNetworkNodePolicyReconciler) syncAllSriovNetworkNodeStates(ctx con return nil } +// handleStaleNodeState handles stale SriovNetworkNodeState CR (the CR which no longer have a corresponding node with the daemon). +// If the CR has the "keep until time" annotation, indicating the earliest time the state object can be removed, +// this function will compare it to the current time to determine if deletion is permissible and do deletion if allowed. +// If the annotation is absent, the function will create one with a timestamp in future, using either the default or a configured offset. +// If STALE_NODE_STATE_CLEANUP_DELAY_MINUTES env variable is set to 0, removes the CR immediately +func (r *SriovNetworkNodePolicyReconciler) handleStaleNodeState(ctx context.Context, ns *sriovnetworkv1.SriovNetworkNodeState) error { + logger := log.Log.WithName("handleStaleNodeState") + + var delayMinutes int + var err error + + envValue, found := os.LookupEnv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES") + if found { + delayMinutes, err = strconv.Atoi(envValue) + if err != nil || delayMinutes < 0 { + delayMinutes = constants.DefaultNodeStateCleanupDelayMinutes + logger.Error(err, "invalid value in STALE_NODE_STATE_CLEANUP_DELAY_MINUTES env variable, use default delay", + "delay", delayMinutes) + } + } else { + delayMinutes = constants.DefaultNodeStateCleanupDelayMinutes + } + + if delayMinutes != 0 { + now := time.Now().UTC() + keepUntilTime := ns.GetKeepUntilTime() + if keepUntilTime.IsZero() { + keepUntilTime = now.Add(time.Minute * time.Duration(delayMinutes)) + logger.V(2).Info("SriovNetworkNodeState has no matching node, configure cleanup delay for the state object", + "nodeStateName", ns.Name, "delay", delayMinutes, "keepUntilTime", keepUntilTime.String()) + ns.SetKeepUntilTime(keepUntilTime) + if err := r.Update(ctx, ns); err != nil { + logger.Error(err, "Fail to update SriovNetworkNodeState CR", "name", ns.GetName()) + return err + } + return nil + } + if now.Before(keepUntilTime) { + return nil + } + } + // remove the object if delayMinutes is 0 or if keepUntilTime is already passed + logger.Info("Deleting SriovNetworkNodeState as node with that name doesn't exist", "nodeStateName", ns.Name) + if err := r.Delete(ctx, ns, &client.DeleteOptions{}); err != nil { + logger.Error(err, "Fail to delete SriovNetworkNodeState CR", "name", ns.GetName()) + return err + } + return nil +} + func (r *SriovNetworkNodePolicyReconciler) syncSriovNetworkNodeState(ctx context.Context, dc *sriovnetworkv1.SriovOperatorConfig, npl *sriovnetworkv1.SriovNetworkNodePolicyList, @@ -375,9 +424,16 @@ func (r *SriovNetworkNodePolicyReconciler) syncSriovNetworkNodeState(ctx context return fmt.Errorf("failed to get SriovNetworkNodeState: %v", err) } } else { + keepUntilAnnotationUpdated := found.ResetKeepUntilTime() + if len(found.Status.Interfaces) == 0 { logger.Info("SriovNetworkNodeState Status Interfaces are empty. Skip update of policies in spec", "namespace", ns.Namespace, "name", ns.Name) + if keepUntilAnnotationUpdated { + if err := r.Update(ctx, found); err != nil { + return fmt.Errorf("couldn't update SriovNetworkNodeState: %v", err) + } + } return nil } @@ -420,7 +476,7 @@ func (r *SriovNetworkNodePolicyReconciler) syncSriovNetworkNodeState(ctx context // Note(adrianc): we check same ownerReferences since SriovNetworkNodeState // was owned by a default SriovNetworkNodePolicy. if we encounter a descripancy // we need to update. - if reflect.DeepEqual(newVersion.OwnerReferences, found.OwnerReferences) && + if !keepUntilAnnotationUpdated && reflect.DeepEqual(newVersion.OwnerReferences, found.OwnerReferences) && equality.Semantic.DeepEqual(newVersion.Spec, found.Spec) { logger.V(1).Info("SriovNetworkNodeState did not change, not updating") return nil diff --git a/controllers/sriovnetworknodepolicy_controller_test.go b/controllers/sriovnetworknodepolicy_controller_test.go index d5534f55e..b955e0133 100644 --- a/controllers/sriovnetworknodepolicy_controller_test.go +++ b/controllers/sriovnetworknodepolicy_controller_test.go @@ -3,6 +3,7 @@ package controllers import ( "context" "encoding/json" + "os" "sync" "testing" "time" @@ -11,18 +12,17 @@ import ( . "github.com/onsi/gomega" "github.com/google/go-cmp/cmp" + dptypes "github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/types" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" utilruntime "k8s.io/apimachinery/pkg/util/runtime" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" - dptypes "github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/types" - sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" - v1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" @@ -48,7 +48,7 @@ func TestRenderDevicePluginConfigData(t *testing.T) { { tname: "testVirtioVdpaVirtio", policy: sriovnetworkv1.SriovNetworkNodePolicy{ - Spec: v1.SriovNetworkNodePolicySpec{ + Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ ResourceName: "resourceName", DeviceType: consts.DeviceTypeNetDevice, VdpaType: consts.VdpaTypeVirtio, @@ -67,7 +67,7 @@ func TestRenderDevicePluginConfigData(t *testing.T) { }, { tname: "testVhostVdpaVirtio", policy: sriovnetworkv1.SriovNetworkNodePolicy{ - Spec: v1.SriovNetworkNodePolicySpec{ + Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ ResourceName: "resourceName", DeviceType: consts.DeviceTypeNetDevice, VdpaType: consts.VdpaTypeVhost, @@ -87,7 +87,7 @@ func TestRenderDevicePluginConfigData(t *testing.T) { { tname: "testExcludeTopology", policy: sriovnetworkv1.SriovNetworkNodePolicy{ - Spec: v1.SriovNetworkNodePolicySpec{ + Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ ResourceName: "resourceName", ExcludeTopology: true, }, @@ -138,6 +138,10 @@ var _ = Describe("SriovnetworkNodePolicy controller", Ordered, func() { var ctx context.Context BeforeAll(func() { + // disable stale state cleanup delay to check that the controller can cleanup state objects + DeferCleanup(os.Setenv, "STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", os.Getenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES")) + os.Setenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", "0") + By("Create SriovOperatorConfig controller k8s objs") config := makeDefaultSriovOpConfig() Expect(k8sClient.Create(context.Background(), config)).Should(Succeed()) @@ -321,3 +325,65 @@ var _ = Describe("SriovnetworkNodePolicy controller", Ordered, func() { }) }) }) + +var _ = Describe("SriovNetworkNodePolicyReconciler", Ordered, func() { + Context("handleStaleNodeState", func() { + var ( + ctx context.Context + r *SriovNetworkNodePolicyReconciler + nodeState *sriovnetworkv1.SriovNetworkNodeState + ) + + BeforeEach(func() { + ctx = context.Background() + scheme := runtime.NewScheme() + utilruntime.Must(sriovnetworkv1.AddToScheme(scheme)) + nodeState = &sriovnetworkv1.SriovNetworkNodeState{ObjectMeta: metav1.ObjectMeta{Name: "node1"}} + r = &SriovNetworkNodePolicyReconciler{Client: fake.NewClientBuilder().WithObjects(nodeState).Build()} + }) + It("should set default delay", func() { + nodeState := nodeState.DeepCopy() + Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred()) + Expect(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState)).NotTo(HaveOccurred()) + Expect(time.Now().UTC().Before(nodeState.GetKeepUntilTime())).To(BeTrue()) + }) + It("should remove CR if wait time expired", func() { + nodeState := nodeState.DeepCopy() + nodeState.SetKeepUntilTime(time.Now().UTC().Add(-time.Minute)) + Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred()) + Expect(errors.IsNotFound(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState))).To(BeTrue()) + }) + It("should keep existing wait time if already set", func() { + nodeState := nodeState.DeepCopy() + nodeState.SetKeepUntilTime(time.Now().UTC().Add(time.Minute)) + testTime := nodeState.GetKeepUntilTime() + r.Update(ctx, nodeState) + Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred()) + Expect(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState)).NotTo(HaveOccurred()) + Expect(nodeState.GetKeepUntilTime()).To(Equal(testTime)) + }) + It("non default dealy", func() { + DeferCleanup(os.Setenv, "STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", os.Getenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES")) + os.Setenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", "60") + nodeState := nodeState.DeepCopy() + Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred()) + Expect(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState)).NotTo(HaveOccurred()) + Expect(time.Until(nodeState.GetKeepUntilTime()) > 30*time.Minute).To(BeTrue()) + }) + It("invalid non default delay - should use default", func() { + DeferCleanup(os.Setenv, "STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", os.Getenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES")) + os.Setenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", "-20") + nodeState := nodeState.DeepCopy() + Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred()) + Expect(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState)).NotTo(HaveOccurred()) + Expect(time.Until(nodeState.GetKeepUntilTime()) > 20*time.Minute).To(BeTrue()) + }) + It("should remove CR if delay is zero", func() { + DeferCleanup(os.Setenv, "STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", os.Getenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES")) + os.Setenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", "0") + nodeState := nodeState.DeepCopy() + Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred()) + Expect(errors.IsNotFound(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState))).To(BeTrue()) + }) + }) +}) diff --git a/deployment/sriov-network-operator-chart/templates/operator.yaml b/deployment/sriov-network-operator-chart/templates/operator.yaml index 0e89d1959..c2a813fc8 100644 --- a/deployment/sriov-network-operator-chart/templates/operator.yaml +++ b/deployment/sriov-network-operator-chart/templates/operator.yaml @@ -112,6 +112,8 @@ spec: value: {{ .Values.operator.cniBinPath }} - name: CLUSTER_TYPE value: {{ .Values.operator.clusterType }} + - name: STALE_NODE_STATE_CLEANUP_DELAY_MINUTES + value: "{{ .Values.operator.staleNodeStateCleanupDelayMinutes }}" {{- if .Values.operator.admissionControllers.enabled }} - name: ADMISSION_CONTROLLERS_CERTIFICATES_OPERATOR_SECRET_NAME value: {{ .Values.operator.admissionControllers.certificates.secretNames.operator }} diff --git a/deployment/sriov-network-operator-chart/values.yaml b/deployment/sriov-network-operator-chart/values.yaml index c70d6e323..ec9323bf7 100644 --- a/deployment/sriov-network-operator-chart/values.yaml +++ b/deployment/sriov-network-operator-chart/values.yaml @@ -27,6 +27,10 @@ operator: resourcePrefix: "openshift.io" cniBinPath: "/opt/cni/bin" clusterType: "kubernetes" + # minimal amount of time (in minutes) the operator will wait before removing + # stale SriovNetworkNodeState objects (objects that doesn't match node with the daemon) + # "0" means no extra delay, in this case the CR will be removed by the next reconcilation cycle (may take up to 5 minutes) + staleNodeStateCleanupDelayMinutes: "30" metricsExporter: port: "9110" certificates: diff --git a/pkg/consts/constants.go b/pkg/consts/constants.go index 4ce478730..6aadef648 100644 --- a/pkg/consts/constants.go +++ b/pkg/consts/constants.go @@ -93,6 +93,14 @@ const ( MCPPauseAnnotationState = "sriovnetwork.openshift.io/state" MCPPauseAnnotationTime = "sriovnetwork.openshift.io/time" + // NodeStateKeepUntilAnnotation contains name of the "keep until time" annotation for SriovNetworkNodeState object. + // The "keep until time" specifies the earliest time at which the state object can be removed + // if the daemon's pod is not found on the node. + NodeStateKeepUntilAnnotation = "sriovnetwork.openshift.io/keep-state-until" + // DefaultNodeStateCleanupDelayMinutes contains default delay before removing stale SriovNetworkNodeState CRs + // (the CRs that no longer have a corresponding node with the daemon). + DefaultNodeStateCleanupDelayMinutes = 30 + CheckpointFileName = "sno-initial-node-state.json" Unknown = "Unknown"