From c986251aa3fe8967bdaa3ae399c2f384ae22bed5 Mon Sep 17 00:00:00 2001 From: Ivan Kolodiazhnyi Date: Sat, 23 Mar 2024 18:34:28 +0200 Subject: [PATCH] Implement RDMA subsystem mode change Now it's possible to configure RDMA subsystem mode using SR-IOV Network Operator in systemd mode We can't configure RDMA subsystem in a daemon mode because it should be done on host before any network namespace is created. --- api/v1/sriovnetworkpoolconfig_types.go | 4 + api/v1/zz_generated.deepcopy.go | 1 - cmd/sriov-network-config-daemon/service.go | 11 ++ ...vnetwork.openshift.io_sriovibnetworks.yaml | 30 +++--- ...openshift.io_sriovnetworknodepolicies.yaml | 20 ++-- ...k.openshift.io_sriovnetworknodestates.yaml | 20 ++-- ....openshift.io_sriovnetworkpoolconfigs.yaml | 82 ++++++++------ ...iovnetwork.openshift.io_sriovnetworks.yaml | 42 ++++---- ...ork.openshift.io_sriovoperatorconfigs.yaml | 26 +++-- controllers/drain_controller.go | 98 +---------------- ...vnetwork.openshift.io_sriovibnetworks.yaml | 30 +++--- ...openshift.io_sriovnetworknodepolicies.yaml | 20 ++-- ...k.openshift.io_sriovnetworknodestates.yaml | 20 ++-- ....openshift.io_sriovnetworkpoolconfigs.yaml | 82 ++++++++------ ...iovnetwork.openshift.io_sriovnetworks.yaml | 42 ++++---- ...ork.openshift.io_sriovoperatorconfigs.yaml | 26 +++-- pkg/consts/constants.go | 3 + pkg/daemon/daemon.go | 15 ++- pkg/host/internal/kernel/kernel.go | 28 +++++ pkg/host/types/interfaces.go | 4 + pkg/systemd/systemd.go | 9 +- pkg/utils/cluster.go | 101 ++++++++++++++++++ 22 files changed, 432 insertions(+), 282 deletions(-) diff --git a/api/v1/sriovnetworkpoolconfig_types.go b/api/v1/sriovnetworkpoolconfig_types.go index c6e710a99b..011ffc7d91 100644 --- a/api/v1/sriovnetworkpoolconfig_types.go +++ b/api/v1/sriovnetworkpoolconfig_types.go @@ -21,6 +21,10 @@ type SriovNetworkPoolConfigSpec struct { // Drain will respect Pod Disruption Budgets (PDBs) such as etcd quorum guards, // even if maxUnavailable is greater than one. MaxUnavailable *intstr.IntOrString `json:"maxUnavailable,omitempty"` + + // +kubebuilder:validation:Enum=shared;exclusive + // RDMA subsystem. Allowed value "shared", "exclusive". + RdmaMode string `json:"rdmaMode,omitempty"` } type OvsHardwareOffloadConfig struct { diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 5e13174ee9..f0baac9b60 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -1,5 +1,4 @@ //go:build !ignore_autogenerated -// +build !ignore_autogenerated /* Copyright 2021. diff --git a/cmd/sriov-network-config-daemon/service.go b/cmd/sriov-network-config-daemon/service.go index 1917ed63cc..45658c155d 100644 --- a/cmd/sriov-network-config-daemon/service.go +++ b/cmd/sriov-network-config-daemon/service.go @@ -152,6 +152,17 @@ func phasePre(setupLog logr.Logger, conf *systemd.SriovConfig, hostHelpers helpe hostHelpers.TryEnableTun() hostHelpers.TryEnableVhostNet() + rdmaSubsystem, err := hostHelpers.GetRDMASubsystem() + if err != nil { + setupLog.Error(err, "failed to get RDMA subsystem mode") + } + if rdmaSubsystem != conf.RdmaMode { + err = hostHelpers.SetRDMASubsystem(conf.RdmaMode) + if err != nil { + setupLog.Error(err, "failed to set RDMA subsystem mode") + } + } + return callPlugin(setupLog, PhasePre, conf, hostHelpers) } diff --git a/config/crd/bases/sriovnetwork.openshift.io_sriovibnetworks.yaml b/config/crd/bases/sriovnetwork.openshift.io_sriovibnetworks.yaml index d619d53628..4b4b44d92f 100644 --- a/config/crd/bases/sriovnetwork.openshift.io_sriovibnetworks.yaml +++ b/config/crd/bases/sriovnetwork.openshift.io_sriovibnetworks.yaml @@ -3,8 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.9.0 - creationTimestamp: null + controller-gen.kubebuilder.io/version: v0.14.0 name: sriovibnetworks.sriovnetwork.openshift.io spec: group: sriovnetwork.openshift.io @@ -21,14 +20,19 @@ spec: description: SriovIBNetwork is the Schema for the sriovibnetworks API properties: apiVersion: - description: 'APIVersion defines the versioned schema of this representation - of an object. Servers should convert recognized schemas to the latest - internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources type: string kind: - description: 'Kind is a string value representing the REST resource this - object represents. Servers may infer this from the endpoint the client - submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds type: string metadata: type: object @@ -36,8 +40,9 @@ spec: description: SriovIBNetworkSpec defines the desired state of SriovIBNetwork properties: capabilities: - description: 'Capabilities to be configured for this network. Capabilities - supported: (infinibandGUID), e.g. ''{"infinibandGUID": true}''' + description: |- + Capabilities to be configured for this network. + Capabilities supported: (infinibandGUID), e.g. '{"infinibandGUID": true}' type: string ipam: description: IPAM configuration to be used for this network. @@ -50,8 +55,9 @@ spec: - disable type: string metaPlugins: - description: MetaPluginsConfig configuration to be used in order to - chain metaplugins to the sriov interface returned by the operator. + description: |- + MetaPluginsConfig configuration to be used in order to chain metaplugins to the sriov interface returned + by the operator. type: string networkNamespace: description: Namespace of the NetworkAttachmentDefinition custom resource diff --git a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml index d5ae89ccab..9fc6c3026e 100644 --- a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml +++ b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml @@ -3,8 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.9.0 - creationTimestamp: null + controller-gen.kubebuilder.io/version: v0.14.0 name: sriovnetworknodepolicies.sriovnetwork.openshift.io spec: group: sriovnetwork.openshift.io @@ -22,14 +21,19 @@ spec: API properties: apiVersion: - description: 'APIVersion defines the versioned schema of this representation - of an object. Servers should convert recognized schemas to the latest - internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources type: string kind: - description: 'Kind is a string value representing the REST resource this - object represents. Servers may infer this from the endpoint the client - submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds type: string metadata: type: object diff --git a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml index 9686248215..5c9df83df9 100644 --- a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml +++ b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml @@ -3,8 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.9.0 - creationTimestamp: null + controller-gen.kubebuilder.io/version: v0.14.0 name: sriovnetworknodestates.sriovnetwork.openshift.io spec: group: sriovnetwork.openshift.io @@ -35,14 +34,19 @@ spec: API properties: apiVersion: - description: 'APIVersion defines the versioned schema of this representation - of an object. Servers should convert recognized schemas to the latest - internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources type: string kind: - description: 'Kind is a string value representing the REST resource this - object represents. Servers may infer this from the endpoint the client - submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds type: string metadata: type: object diff --git a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml index b819999761..3d8a6a1052 100644 --- a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml +++ b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml @@ -3,8 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.9.0 - creationTimestamp: null + controller-gen.kubebuilder.io/version: v0.14.0 name: sriovnetworkpoolconfigs.sriovnetwork.openshift.io spec: group: sriovnetwork.openshift.io @@ -22,14 +21,19 @@ spec: API properties: apiVersion: - description: 'APIVersion defines the versioned schema of this representation - of an object. Servers should convert recognized schemas to the latest - internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources type: string kind: - description: 'Kind is a string value representing the REST resource this - object represents. Servers may infer this from the endpoint the client - submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds type: string metadata: type: object @@ -40,13 +44,15 @@ spec: anyOf: - type: integer - type: string - description: "maxUnavailable defines either an integer number or percentage - of nodes in the pool that can go Unavailable during an update. \n - A value larger than 1 will mean multiple nodes going unavailable - during the update, which may affect your workload stress on the - remaining nodes. Drain will respect Pod Disruption Budgets (PDBs) - such as etcd quorum guards, even if maxUnavailable is greater than - one." + description: |- + maxUnavailable defines either an integer number or percentage + of nodes in the pool that can go Unavailable during an update. + + + A value larger than 1 will mean multiple nodes going unavailable during + the update, which may affect your workload stress on the remaining nodes. + Drain will respect Pod Disruption Budgets (PDBs) such as etcd quorum guards, + even if maxUnavailable is greater than one. x-kubernetes-int-or-string: true nodeSelector: description: nodeSelector specifies a label selector for Nodes @@ -55,24 +61,24 @@ spec: description: matchExpressions is a list of label selector requirements. The requirements are ANDed. items: - description: A label selector requirement is a selector that - contains values, a key, and an operator that relates the key - and values. + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. properties: key: description: key is the label key that the selector applies to. type: string operator: - description: operator represents a key's relationship to - a set of values. Valid operators are In, NotIn, Exists - and DoesNotExist. + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. type: string values: - description: values is an array of string values. If the - operator is In or NotIn, the values array must be non-empty. - If the operator is Exists or DoesNotExist, the values - array must be empty. This array is replaced during a strategic + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic merge patch. items: type: string @@ -85,24 +91,32 @@ spec: matchLabels: additionalProperties: type: string - description: matchLabels is a map of {key,value} pairs. A single - {key,value} in the matchLabels map is equivalent to an element - of matchExpressions, whose key field is "key", the operator - is "In", and the values array contains only "value". The requirements - are ANDed. + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. type: object type: object + x-kubernetes-map-type: atomic ovsHardwareOffloadConfig: description: OvsHardwareOffloadConfig describes the OVS HWOL configuration for selected Nodes properties: name: - description: 'Name is mandatory and must be unique. On Kubernetes: - Name is the name of OvsHardwareOffloadConfig On OpenShift: Name - is the name of MachineConfigPool to be enabled with OVS hardware - offload' + description: |- + Name is mandatory and must be unique. + On Kubernetes: + Name is the name of OvsHardwareOffloadConfig + On OpenShift: + Name is the name of MachineConfigPool to be enabled with OVS hardware offload type: string type: object + rdmaMode: + description: RDMA subsystem. Allowed value "shared", "exclusive". + enum: + - shared + - exclusive + type: string type: object status: description: SriovNetworkPoolConfigStatus defines the observed state of diff --git a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworks.yaml b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworks.yaml index 15e1bfd3f8..e33b9a3ed5 100644 --- a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworks.yaml +++ b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworks.yaml @@ -3,8 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.9.0 - creationTimestamp: null + controller-gen.kubebuilder.io/version: v0.14.0 name: sriovnetworks.sriovnetwork.openshift.io spec: group: sriovnetwork.openshift.io @@ -21,14 +20,19 @@ spec: description: SriovNetwork is the Schema for the sriovnetworks API properties: apiVersion: - description: 'APIVersion defines the versioned schema of this representation - of an object. Servers should convert recognized schemas to the latest - internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources type: string kind: - description: 'Kind is a string value representing the REST resource this - object represents. Servers may infer this from the endpoint the client - submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds type: string metadata: type: object @@ -36,8 +40,9 @@ spec: description: SriovNetworkSpec defines the desired state of SriovNetwork properties: capabilities: - description: 'Capabilities to be configured for this network. Capabilities - supported: (mac|ips), e.g. ''{"mac": true}''' + description: |- + Capabilities to be configured for this network. + Capabilities supported: (mac|ips), e.g. '{"mac": true}' type: string ipam: description: IPAM configuration to be used for this network. @@ -50,15 +55,15 @@ spec: - disable type: string logFile: - description: LogFile sets the log file of the SRIOV CNI plugin logs. - If unset (default), this will log to stderr and thus to multus and - container runtime logs. + description: |- + LogFile sets the log file of the SRIOV CNI plugin logs. If unset (default), this will log to stderr and thus + to multus and container runtime logs. type: string logLevel: default: info - description: LogLevel sets the log level of the SRIOV CNI plugin - - either of panic, error, warning, info, debug. Defaults to info if - left blank. + description: |- + LogLevel sets the log level of the SRIOV CNI plugin - either of panic, error, warning, info, debug. Defaults + to info if left blank. enum: - panic - error @@ -73,8 +78,9 @@ spec: minimum: 0 type: integer metaPlugins: - description: MetaPluginsConfig configuration to be used in order to - chain metaplugins to the sriov interface returned by the operator. + description: |- + MetaPluginsConfig configuration to be used in order to chain metaplugins to the sriov interface returned + by the operator. type: string minTxRate: description: Minimum tx rate, in Mbps, for the VF. Defaults to 0 (no diff --git a/config/crd/bases/sriovnetwork.openshift.io_sriovoperatorconfigs.yaml b/config/crd/bases/sriovnetwork.openshift.io_sriovoperatorconfigs.yaml index 74b7752abc..5d944910d2 100644 --- a/config/crd/bases/sriovnetwork.openshift.io_sriovoperatorconfigs.yaml +++ b/config/crd/bases/sriovnetwork.openshift.io_sriovoperatorconfigs.yaml @@ -3,8 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.9.0 - creationTimestamp: null + controller-gen.kubebuilder.io/version: v0.14.0 name: sriovoperatorconfigs.sriovnetwork.openshift.io spec: group: sriovnetwork.openshift.io @@ -22,14 +21,19 @@ spec: API properties: apiVersion: - description: 'APIVersion defines the versioned schema of this representation - of an object. Servers should convert recognized schemas to the latest - internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources type: string kind: - description: 'Kind is a string value representing the REST resource this - object represents. Servers may infer this from the endpoint the client - submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds type: string metadata: type: object @@ -42,9 +46,9 @@ spec: description: NodeSelector selects the nodes to be configured type: object configurationMode: - description: 'Flag to enable the sriov-network-config-daemon to use - a systemd service to configure SR-IOV devices on boot Default mode: - daemon' + description: |- + Flag to enable the sriov-network-config-daemon to use a systemd service to configure SR-IOV devices on boot + Default mode: daemon enum: - daemon - systemd diff --git a/controllers/drain_controller.go b/controllers/drain_controller.go index 2869e9a515..09e02ac9e4 100644 --- a/controllers/drain_controller.go +++ b/controllers/drain_controller.go @@ -24,11 +24,8 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/workqueue" ctrl "sigs.k8s.io/controller-runtime" @@ -48,13 +45,6 @@ import ( "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) -var ( - oneNode = intstr.FromInt32(1) - defaultNpcl = &sriovnetworkv1.SriovNetworkPoolConfig{Spec: sriovnetworkv1.SriovNetworkPoolConfigSpec{ - MaxUnavailable: &oneNode, - NodeSelector: &metav1.LabelSelector{}}} -) - type DrainReconcile struct { client.Client Scheme *runtime.Scheme @@ -345,94 +335,8 @@ func (dr *DrainReconcile) tryDrainNode(ctx context.Context, node *corev1.Node) ( } func (dr *DrainReconcile) findNodePoolConfig(ctx context.Context, node *corev1.Node) (*sriovnetworkv1.SriovNetworkPoolConfig, []corev1.Node, error) { - logger := log.FromContext(ctx) - logger.Info("findNodePoolConfig():") - // get all the sriov network pool configs - npcl := &sriovnetworkv1.SriovNetworkPoolConfigList{} - err := dr.List(ctx, npcl) - if err != nil { - logger.Error(err, "failed to list sriovNetworkPoolConfig") - return nil, nil, err - } - - selectedNpcl := []*sriovnetworkv1.SriovNetworkPoolConfig{} - nodesInPools := map[string]interface{}{} - - for _, npc := range npcl.Items { - // we skip hw offload objects - if npc.Spec.OvsHardwareOffloadConfig.Name != "" { - continue - } - - if npc.Spec.NodeSelector == nil { - npc.Spec.NodeSelector = &metav1.LabelSelector{} - } - - selector, err := metav1.LabelSelectorAsSelector(npc.Spec.NodeSelector) - if err != nil { - logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", npc.Spec.NodeSelector) - return nil, nil, err - } - - if selector.Matches(labels.Set(node.Labels)) { - selectedNpcl = append(selectedNpcl, npc.DeepCopy()) - } - - nodeList := &corev1.NodeList{} - err = dr.List(ctx, nodeList, &client.ListOptions{LabelSelector: selector}) - if err != nil { - logger.Error(err, "failed to list all the nodes matching the pool with label selector from nodeSelector", - "machineConfigPoolName", npc, - "nodeSelector", npc.Spec.NodeSelector) - return nil, nil, err - } - - for _, nodeName := range nodeList.Items { - nodesInPools[nodeName.Name] = nil - } - } - - if len(selectedNpcl) > 1 { - // don't allow the node to be part of multiple pools - err = fmt.Errorf("node is part of more then one pool") - logger.Error(err, "multiple pools founded for a specific node", "numberOfPools", len(selectedNpcl), "pools", selectedNpcl) - return nil, nil, err - } else if len(selectedNpcl) == 1 { - // found one pool for our node - logger.V(2).Info("found sriovNetworkPool", "pool", *selectedNpcl[0]) - selector, err := metav1.LabelSelectorAsSelector(selectedNpcl[0].Spec.NodeSelector) - if err != nil { - logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", selectedNpcl[0].Spec.NodeSelector) - return nil, nil, err - } - - // list all the nodes that are also part of this pool and return them - nodeList := &corev1.NodeList{} - err = dr.List(ctx, nodeList, &client.ListOptions{LabelSelector: selector}) - if err != nil { - logger.Error(err, "failed to list nodes using with label selector", "labelSelector", selector) - return nil, nil, err - } - - return selectedNpcl[0], nodeList.Items, nil - } else { - // in this case we get all the nodes and remove the ones that already part of any pool - logger.V(1).Info("node doesn't belong to any pool, using default drain configuration with MaxUnavailable of one", "pool", *defaultNpcl) - nodeList := &corev1.NodeList{} - err = dr.List(ctx, nodeList) - if err != nil { - logger.Error(err, "failed to list all the nodes") - return nil, nil, err - } + return utils.FindNodePoolConfig(ctx, node, dr.Client) - defaultNodeLists := []corev1.Node{} - for _, nodeObj := range nodeList.Items { - if _, exist := nodesInPools[nodeObj.Name]; !exist { - defaultNodeLists = append(defaultNodeLists, nodeObj) - } - } - return defaultNpcl, defaultNodeLists, nil - } } // SetupWithManager sets up the controller with the Manager. diff --git a/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovibnetworks.yaml b/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovibnetworks.yaml index d619d53628..4b4b44d92f 100644 --- a/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovibnetworks.yaml +++ b/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovibnetworks.yaml @@ -3,8 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.9.0 - creationTimestamp: null + controller-gen.kubebuilder.io/version: v0.14.0 name: sriovibnetworks.sriovnetwork.openshift.io spec: group: sriovnetwork.openshift.io @@ -21,14 +20,19 @@ spec: description: SriovIBNetwork is the Schema for the sriovibnetworks API properties: apiVersion: - description: 'APIVersion defines the versioned schema of this representation - of an object. Servers should convert recognized schemas to the latest - internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources type: string kind: - description: 'Kind is a string value representing the REST resource this - object represents. Servers may infer this from the endpoint the client - submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds type: string metadata: type: object @@ -36,8 +40,9 @@ spec: description: SriovIBNetworkSpec defines the desired state of SriovIBNetwork properties: capabilities: - description: 'Capabilities to be configured for this network. Capabilities - supported: (infinibandGUID), e.g. ''{"infinibandGUID": true}''' + description: |- + Capabilities to be configured for this network. + Capabilities supported: (infinibandGUID), e.g. '{"infinibandGUID": true}' type: string ipam: description: IPAM configuration to be used for this network. @@ -50,8 +55,9 @@ spec: - disable type: string metaPlugins: - description: MetaPluginsConfig configuration to be used in order to - chain metaplugins to the sriov interface returned by the operator. + description: |- + MetaPluginsConfig configuration to be used in order to chain metaplugins to the sriov interface returned + by the operator. type: string networkNamespace: description: Namespace of the NetworkAttachmentDefinition custom resource diff --git a/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml b/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml index d5ae89ccab..9fc6c3026e 100644 --- a/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml +++ b/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml @@ -3,8 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.9.0 - creationTimestamp: null + controller-gen.kubebuilder.io/version: v0.14.0 name: sriovnetworknodepolicies.sriovnetwork.openshift.io spec: group: sriovnetwork.openshift.io @@ -22,14 +21,19 @@ spec: API properties: apiVersion: - description: 'APIVersion defines the versioned schema of this representation - of an object. Servers should convert recognized schemas to the latest - internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources type: string kind: - description: 'Kind is a string value representing the REST resource this - object represents. Servers may infer this from the endpoint the client - submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds type: string metadata: type: object diff --git a/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml b/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml index 9686248215..5c9df83df9 100644 --- a/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml +++ b/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml @@ -3,8 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.9.0 - creationTimestamp: null + controller-gen.kubebuilder.io/version: v0.14.0 name: sriovnetworknodestates.sriovnetwork.openshift.io spec: group: sriovnetwork.openshift.io @@ -35,14 +34,19 @@ spec: API properties: apiVersion: - description: 'APIVersion defines the versioned schema of this representation - of an object. Servers should convert recognized schemas to the latest - internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources type: string kind: - description: 'Kind is a string value representing the REST resource this - object represents. Servers may infer this from the endpoint the client - submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds type: string metadata: type: object diff --git a/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml b/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml index b819999761..3d8a6a1052 100644 --- a/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml +++ b/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml @@ -3,8 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.9.0 - creationTimestamp: null + controller-gen.kubebuilder.io/version: v0.14.0 name: sriovnetworkpoolconfigs.sriovnetwork.openshift.io spec: group: sriovnetwork.openshift.io @@ -22,14 +21,19 @@ spec: API properties: apiVersion: - description: 'APIVersion defines the versioned schema of this representation - of an object. Servers should convert recognized schemas to the latest - internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources type: string kind: - description: 'Kind is a string value representing the REST resource this - object represents. Servers may infer this from the endpoint the client - submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds type: string metadata: type: object @@ -40,13 +44,15 @@ spec: anyOf: - type: integer - type: string - description: "maxUnavailable defines either an integer number or percentage - of nodes in the pool that can go Unavailable during an update. \n - A value larger than 1 will mean multiple nodes going unavailable - during the update, which may affect your workload stress on the - remaining nodes. Drain will respect Pod Disruption Budgets (PDBs) - such as etcd quorum guards, even if maxUnavailable is greater than - one." + description: |- + maxUnavailable defines either an integer number or percentage + of nodes in the pool that can go Unavailable during an update. + + + A value larger than 1 will mean multiple nodes going unavailable during + the update, which may affect your workload stress on the remaining nodes. + Drain will respect Pod Disruption Budgets (PDBs) such as etcd quorum guards, + even if maxUnavailable is greater than one. x-kubernetes-int-or-string: true nodeSelector: description: nodeSelector specifies a label selector for Nodes @@ -55,24 +61,24 @@ spec: description: matchExpressions is a list of label selector requirements. The requirements are ANDed. items: - description: A label selector requirement is a selector that - contains values, a key, and an operator that relates the key - and values. + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. properties: key: description: key is the label key that the selector applies to. type: string operator: - description: operator represents a key's relationship to - a set of values. Valid operators are In, NotIn, Exists - and DoesNotExist. + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. type: string values: - description: values is an array of string values. If the - operator is In or NotIn, the values array must be non-empty. - If the operator is Exists or DoesNotExist, the values - array must be empty. This array is replaced during a strategic + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic merge patch. items: type: string @@ -85,24 +91,32 @@ spec: matchLabels: additionalProperties: type: string - description: matchLabels is a map of {key,value} pairs. A single - {key,value} in the matchLabels map is equivalent to an element - of matchExpressions, whose key field is "key", the operator - is "In", and the values array contains only "value". The requirements - are ANDed. + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. type: object type: object + x-kubernetes-map-type: atomic ovsHardwareOffloadConfig: description: OvsHardwareOffloadConfig describes the OVS HWOL configuration for selected Nodes properties: name: - description: 'Name is mandatory and must be unique. On Kubernetes: - Name is the name of OvsHardwareOffloadConfig On OpenShift: Name - is the name of MachineConfigPool to be enabled with OVS hardware - offload' + description: |- + Name is mandatory and must be unique. + On Kubernetes: + Name is the name of OvsHardwareOffloadConfig + On OpenShift: + Name is the name of MachineConfigPool to be enabled with OVS hardware offload type: string type: object + rdmaMode: + description: RDMA subsystem. Allowed value "shared", "exclusive". + enum: + - shared + - exclusive + type: string type: object status: description: SriovNetworkPoolConfigStatus defines the observed state of diff --git a/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworks.yaml b/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworks.yaml index 15e1bfd3f8..e33b9a3ed5 100644 --- a/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworks.yaml +++ b/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworks.yaml @@ -3,8 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.9.0 - creationTimestamp: null + controller-gen.kubebuilder.io/version: v0.14.0 name: sriovnetworks.sriovnetwork.openshift.io spec: group: sriovnetwork.openshift.io @@ -21,14 +20,19 @@ spec: description: SriovNetwork is the Schema for the sriovnetworks API properties: apiVersion: - description: 'APIVersion defines the versioned schema of this representation - of an object. Servers should convert recognized schemas to the latest - internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources type: string kind: - description: 'Kind is a string value representing the REST resource this - object represents. Servers may infer this from the endpoint the client - submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds type: string metadata: type: object @@ -36,8 +40,9 @@ spec: description: SriovNetworkSpec defines the desired state of SriovNetwork properties: capabilities: - description: 'Capabilities to be configured for this network. Capabilities - supported: (mac|ips), e.g. ''{"mac": true}''' + description: |- + Capabilities to be configured for this network. + Capabilities supported: (mac|ips), e.g. '{"mac": true}' type: string ipam: description: IPAM configuration to be used for this network. @@ -50,15 +55,15 @@ spec: - disable type: string logFile: - description: LogFile sets the log file of the SRIOV CNI plugin logs. - If unset (default), this will log to stderr and thus to multus and - container runtime logs. + description: |- + LogFile sets the log file of the SRIOV CNI plugin logs. If unset (default), this will log to stderr and thus + to multus and container runtime logs. type: string logLevel: default: info - description: LogLevel sets the log level of the SRIOV CNI plugin - - either of panic, error, warning, info, debug. Defaults to info if - left blank. + description: |- + LogLevel sets the log level of the SRIOV CNI plugin - either of panic, error, warning, info, debug. Defaults + to info if left blank. enum: - panic - error @@ -73,8 +78,9 @@ spec: minimum: 0 type: integer metaPlugins: - description: MetaPluginsConfig configuration to be used in order to - chain metaplugins to the sriov interface returned by the operator. + description: |- + MetaPluginsConfig configuration to be used in order to chain metaplugins to the sriov interface returned + by the operator. type: string minTxRate: description: Minimum tx rate, in Mbps, for the VF. Defaults to 0 (no diff --git a/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovoperatorconfigs.yaml b/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovoperatorconfigs.yaml index 74b7752abc..5d944910d2 100644 --- a/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovoperatorconfigs.yaml +++ b/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovoperatorconfigs.yaml @@ -3,8 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.9.0 - creationTimestamp: null + controller-gen.kubebuilder.io/version: v0.14.0 name: sriovoperatorconfigs.sriovnetwork.openshift.io spec: group: sriovnetwork.openshift.io @@ -22,14 +21,19 @@ spec: API properties: apiVersion: - description: 'APIVersion defines the versioned schema of this representation - of an object. Servers should convert recognized schemas to the latest - internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources type: string kind: - description: 'Kind is a string value representing the REST resource this - object represents. Servers may infer this from the endpoint the client - submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds type: string metadata: type: object @@ -42,9 +46,9 @@ spec: description: NodeSelector selects the nodes to be configured type: object configurationMode: - description: 'Flag to enable the sriov-network-config-daemon to use - a systemd service to configure SR-IOV devices on boot Default mode: - daemon' + description: |- + Flag to enable the sriov-network-config-daemon to use a systemd service to configure SR-IOV devices on boot + Default mode: daemon enum: - daemon - systemd diff --git a/pkg/consts/constants.go b/pkg/consts/constants.go index 86af30beff..caefee3531 100644 --- a/pkg/consts/constants.go +++ b/pkg/consts/constants.go @@ -47,6 +47,9 @@ const ( VdpaTypeVirtio = "virtio" VdpaTypeVhost = "vhost" + RdmaSubsystemModeShared = "shared" + RdmaSubsystemModeExclusive = "exclusive" + ClusterTypeOpenshift = "openshift" ClusterTypeKubernetes = "kubernetes" diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index fce5cf57e8..f6e4def11a 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "fmt" + corev1 "k8s.io/api/core/v1" "math/rand" "os" "os/exec" @@ -453,7 +454,19 @@ func (dn *Daemon) nodeStateSyncHandler() error { // When using systemd configuration we write the file if vars.UsingSystemdMode { log.Log.V(0).Info("nodeStateSyncHandler(): writing systemd config file to host") - systemdConfModified, err := systemd.WriteConfFile(dn.desiredNodeState) + // get node object + node := &corev1.Node{} + err := dn.client.Get(context.TODO(), client.ObjectKey{Name: vars.NodeName}, node) + if err != nil { + log.Log.Error(err, "nodeStateSyncHandler(): failed to get node object") + return err + } + netPoolConfig, _, err := utils.FindNodePoolConfig(context.Background(), node, dn.client) + if err != nil { + log.Log.Error(err, "nodeStateSyncHandler(): failed to get SriovNetworkPoolConfig for the current node") + } + + systemdConfModified, err := systemd.WriteConfFile(dn.desiredNodeState, netPoolConfig) if err != nil { log.Log.Error(err, "nodeStateSyncHandler(): failed to write configuration file for systemd mode") return err diff --git a/pkg/host/internal/kernel/kernel.go b/pkg/host/internal/kernel/kernel.go index 958f2590a7..f6df31f43f 100644 --- a/pkg/host/internal/kernel/kernel.go +++ b/pkg/host/internal/kernel/kernel.go @@ -522,6 +522,34 @@ func (k *kernel) InstallRDMA(packageManager string) error { return nil } +func (k *kernel) GetRDMASubsystem() (string, error) { + log.Log.Info("GetRDMASubsystem(): retrieving RDMA subsystem mode") + chrootDefinition := utils.GetChrootExtension() + + stdout, stderr, err := k.utilsHelper.RunCommand("/bin/sh", "-c", fmt.Sprintf("%s /usr/bin/rdma system show", chrootDefinition)) + if err != nil && len(stderr) != 0 { + log.Log.Error(err, "GetRDMASubsystem(): failed to get RDMA subsystem mode", "stdout", stdout, "stderr", stderr) + return "", err + } + + // Example of an =output: netns shared copy-on-fork on + subsystem := strings.Fields(stdout)[1] + return subsystem, nil +} + +func (k *kernel) SetRDMASubsystem(mode string) error { + log.Log.Info("SetRDMASubsystem(): Updating RDMA subsystem mode") + chrootDefinition := utils.GetChrootExtension() + + stdout, stderr, err := k.utilsHelper.RunCommand("/bin/sh", "-c", fmt.Sprintf("%s /usr/bin/rdma system set net %s", chrootDefinition, mode)) + if err != nil && len(stderr) != 0 { + log.Log.Error(err, "SetRDMASubsystem(): failed to update RDMA subsystem mode", "stdout", stdout, "stderr", stderr) + return err + } + + return nil +} + func (k *kernel) TriggerUdevEvent() error { log.Log.Info("TriggerUdevEvent(): installing RDMA") diff --git a/pkg/host/types/interfaces.go b/pkg/host/types/interfaces.go index 248d6d71eb..1fbdc161e9 100644 --- a/pkg/host/types/interfaces.go +++ b/pkg/host/types/interfaces.go @@ -70,6 +70,10 @@ type KernelInterface interface { EnableRDMA(conditionFilePath, serviceName, packageManager string) (bool, error) // InstallRDMA install RDMA packages on the system InstallRDMA(packageManager string) error + // GetRDMASubsystem returns RDMA subsystem mode + GetRDMASubsystem() (string, error) + // SetRDMASubsystem changes RDMA subsystem mode + SetRDMASubsystem(mode string) error // EnableRDMAOnRHELMachine enable RDMA on a RHEL base system EnableRDMAOnRHELMachine() (bool, error) // GetOSPrettyName returns OS name diff --git a/pkg/systemd/systemd.go b/pkg/systemd/systemd.go index 2b4d6bd08f..0df9b82aa8 100644 --- a/pkg/systemd/systemd.go +++ b/pkg/systemd/systemd.go @@ -46,6 +46,7 @@ type SriovConfig struct { Spec sriovnetworkv1.SriovNetworkNodeStateSpec `yaml:"spec"` UnsupportedNics bool `yaml:"unsupportedNics"` PlatformType consts.PlatformTypes `yaml:"platformType"` + RdmaMode string `yaml:"rdmaMode"` } type SriovResult struct { @@ -64,15 +65,21 @@ func ReadConfFile() (spec *SriovConfig, err error) { return spec, err } -func WriteConfFile(newState *sriovnetworkv1.SriovNetworkNodeState) (bool, error) { +func WriteConfFile(newState *sriovnetworkv1.SriovNetworkNodeState, netPoolConfig *sriovnetworkv1.SriovNetworkPoolConfig) (bool, error) { newFile := false // remove the device plugin revision as we don't need it here newState.Spec.DpConfigVersion = "" + // shared mode is a default on OS + rdmaMode := consts.RdmaSubsystemModeShared + if netPoolConfig != nil { + rdmaMode = netPoolConfig.Spec.RdmaMode + } sriovConfig := &SriovConfig{ newState.Spec, vars.DevMode, vars.PlatformType, + rdmaMode, } _, err := os.Stat(utils.GetHostExtensionPath(SriovSystemdConfigPath)) diff --git a/pkg/utils/cluster.go b/pkg/utils/cluster.go index 6f8d72e079..0dba72de9b 100644 --- a/pkg/utils/cluster.go +++ b/pkg/utils/cluster.go @@ -3,6 +3,9 @@ package utils import ( "context" "fmt" + sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/intstr" "os" "sigs.k8s.io/controller-runtime/pkg/log" @@ -26,6 +29,13 @@ const ( controlPlaneNodeLabelKey = "node-role.kubernetes.io/control-plane" ) +var ( + oneNode = intstr.FromInt32(1) + DefaultNpcl = &sriovnetworkv1.SriovNetworkPoolConfig{Spec: sriovnetworkv1.SriovNetworkPoolConfigSpec{ + MaxUnavailable: &oneNode, + NodeSelector: &metav1.LabelSelector{}}} +) + func getNodeRole(node corev1.Node) string { for k := range node.Labels { if k == workerNodeLabelKey { @@ -161,3 +171,94 @@ func AnnotateNode(ctx context.Context, nodeName string, key, value string, c cli return AnnotateObject(ctx, node, key, value, c) } + +func FindNodePoolConfig(ctx context.Context, node *corev1.Node, c client.Client) (*sriovnetworkv1.SriovNetworkPoolConfig, []corev1.Node, error) { + logger := log.FromContext(ctx) + logger.Info("findNodePoolConfig():") + // get all the sriov network pool configs + npcl := &sriovnetworkv1.SriovNetworkPoolConfigList{} + err := c.List(ctx, npcl) + if err != nil { + logger.Error(err, "failed to list sriovNetworkPoolConfig") + return nil, nil, err + } + + selectedNpcl := []*sriovnetworkv1.SriovNetworkPoolConfig{} + nodesInPools := map[string]interface{}{} + + for _, npc := range npcl.Items { + // we skip hw offload objects + if npc.Spec.OvsHardwareOffloadConfig.Name != "" { + continue + } + + if npc.Spec.NodeSelector == nil { + npc.Spec.NodeSelector = &metav1.LabelSelector{} + } + + selector, err := metav1.LabelSelectorAsSelector(npc.Spec.NodeSelector) + if err != nil { + logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", npc.Spec.NodeSelector) + return nil, nil, err + } + + if selector.Matches(labels.Set(node.Labels)) { + selectedNpcl = append(selectedNpcl, npc.DeepCopy()) + } + + nodeList := &corev1.NodeList{} + err = c.List(ctx, nodeList, &client.ListOptions{LabelSelector: selector}) + if err != nil { + logger.Error(err, "failed to list all the nodes matching the pool with label selector from nodeSelector", + "machineConfigPoolName", npc, + "nodeSelector", npc.Spec.NodeSelector) + return nil, nil, err + } + + for _, nodeName := range nodeList.Items { + nodesInPools[nodeName.Name] = nil + } + } + + if len(selectedNpcl) > 1 { + // don't allow the node to be part of multiple pools + err = fmt.Errorf("node is part of more then one pool") + logger.Error(err, "multiple pools founded for a specific node", "numberOfPools", len(selectedNpcl), "pools", selectedNpcl) + return nil, nil, err + } else if len(selectedNpcl) == 1 { + // found one pool for our node + logger.V(2).Info("found sriovNetworkPool", "pool", *selectedNpcl[0]) + selector, err := metav1.LabelSelectorAsSelector(selectedNpcl[0].Spec.NodeSelector) + if err != nil { + logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", selectedNpcl[0].Spec.NodeSelector) + return nil, nil, err + } + + // list all the nodes that are also part of this pool and return them + nodeList := &corev1.NodeList{} + err = c.List(ctx, nodeList, &client.ListOptions{LabelSelector: selector}) + if err != nil { + logger.Error(err, "failed to list nodes using with label selector", "labelSelector", selector) + return nil, nil, err + } + + return selectedNpcl[0], nodeList.Items, nil + } else { + // in this case we get all the nodes and remove the ones that already part of any pool + logger.V(1).Info("node doesn't belong to any pool, using default drain configuration with MaxUnavailable of one", "pool", *DefaultNpcl) + nodeList := &corev1.NodeList{} + err = c.List(ctx, nodeList) + if err != nil { + logger.Error(err, "failed to list all the nodes") + return nil, nil, err + } + + defaultNodeLists := []corev1.Node{} + for _, nodeObj := range nodeList.Items { + if _, exist := nodesInPools[nodeObj.Name]; !exist { + defaultNodeLists = append(defaultNodeLists, nodeObj) + } + } + return nil, defaultNodeLists, nil + } +}