From d520ad7a19263d709993cd75b3efcd962f7c021d Mon Sep 17 00:00:00 2001 From: rohan2794 Date: Tue, 29 Oct 2024 22:55:36 +0530 Subject: [PATCH] feat(rdma): disable and enable dev link port on node via e2e agent Signed-off-by: rohan2794 --- common/e2e_agent/client.go | 74 +++++++++++++++++++++++ common/k8stest/util_rdma.go | 107 +++++++++++++++++++++++++-------- tools/e2e-agent/build.sh | 2 +- tools/e2e-agent/e2e-agent.yaml | 2 +- tools/e2e-agent/rdma.go | 73 ++++++++++++++++++++++ tools/e2e-agent/server.go | 7 +++ 6 files changed, 238 insertions(+), 27 deletions(-) diff --git a/common/e2e_agent/client.go b/common/e2e_agent/client.go index 2c27242..53a2379 100644 --- a/common/e2e_agent/client.go +++ b/common/e2e_agent/client.go @@ -109,6 +109,10 @@ type NetworkInterface struct { NetworkInterface string `json:"networkInterface"` } +type DevLinkPort struct { + DevLinkPort string `json:"devLinkPort"` +} + func sendRequest(reqType, url string, data interface{}) error { _, err := sendRequestGetResponse(reqType, url, data, true) return err @@ -1271,3 +1275,73 @@ func DisableNetworkInterface(serverAddr string, interfaceName string) (string, e logf.Log.Info("DeleteRdmaDevice succeeded", "output", out) return out, err } + +// ListDevLink list dev link +func ListDevLink(serverAddr string) (string, error) { + logf.Log.Info("Executing ListDevLink", "addr", serverAddr) + url := "http://" + getAgentAddress(serverAddr) + "/listdevlink" + encodedresult, err := sendRequestGetResponse("POST", url, nil, false) + if err != nil { + logf.Log.Info("sendRequestGetResponse", "encodedresult", encodedresult, "error", err.Error()) + return encodedresult, err + } + + out, e2eagenterrcode, err := UnwrapResult(encodedresult) + if err != nil { + logf.Log.Info("unwrap failed", "encodedresult", encodedresult, "error", err.Error()) + return encodedresult, err + } + if e2eagenterrcode != ErrNone { + return out, fmt.Errorf("failed to list available dev link , errcode %d", e2eagenterrcode) + } + logf.Log.Info("ListDevLink succeeded", "output", out) + return out, err +} + +// EnableDevLink enable dev link +func EnableDevLink(serverAddr string, devLinkPortName string) (string, error) { + data := DevLinkPort{ + DevLinkPort: devLinkPortName, + } + logf.Log.Info("Executing EnableDevLink", "addr", serverAddr, "data", data) + url := "http://" + getAgentAddress(serverAddr) + "/enabledevlink" + encodedresult, err := sendRequestGetResponse("POST", url, data, false) + if err != nil { + logf.Log.Info("sendRequestGetResponse", "encodedresult", encodedresult, "error", err.Error()) + return encodedresult, err + } + out, e2eagenterrcode, err := UnwrapResult(encodedresult) + if err != nil { + logf.Log.Info("unwrap failed", "encodedresult", encodedresult, "error", err.Error()) + return encodedresult, err + } + if e2eagenterrcode != ErrNone { + return out, fmt.Errorf("failed to create rdma device, errcode %d", e2eagenterrcode) + } + logf.Log.Info("EnableDevLink succeeded", "output", out) + return out, err +} + +// DisableDevLink disable dev link +func DisableDevLink(serverAddr string, devLinkPortName string) (string, error) { + data := DevLinkPort{ + DevLinkPort: devLinkPortName, + } + logf.Log.Info("Executing DisableDevLink", "addr", serverAddr, "data", data) + url := "http://" + getAgentAddress(serverAddr) + "/disabledevlink" + encodedresult, err := sendRequestGetResponse("POST", url, data, false) + if err != nil { + logf.Log.Info("sendRequestGetResponse", "encodedresult", encodedresult, "error", err.Error()) + return encodedresult, err + } + out, e2eagenterrcode, err := UnwrapResult(encodedresult) + if err != nil { + logf.Log.Info("unwrap failed", "encodedresult", encodedresult, "error", err.Error()) + return encodedresult, err + } + if e2eagenterrcode != ErrNone { + return out, fmt.Errorf("failed to disable dev link, errcode %d", e2eagenterrcode) + } + logf.Log.Info("DisableDevLink succeeded", "output", out) + return out, err +} diff --git a/common/k8stest/util_rdma.go b/common/k8stest/util_rdma.go index 6b6f06b..5983a72 100644 --- a/common/k8stest/util_rdma.go +++ b/common/k8stest/util_rdma.go @@ -24,6 +24,18 @@ type RdmaDeviceNetworkInterface struct { NetDevIndex int `json:"netdev_index"` } +type PortInfo struct { + PCI string `json:"pci"` + Type string `json:"type"` + Netdev string `json:"netdev"` + Flavour string `json:"flavour"` + Port int `json:"port"` +} + +type PortMap struct { + Port map[string]PortInfo `json:"port"` +} + func ListRdmaDevice(node string) ([]RdmaDeviceNetworkInterface, error) { var rdmaDeiceList []RdmaDeviceNetworkInterface nodeIp, err := GetNodeIPAddress(node) @@ -170,29 +182,33 @@ func RemoveRdmaDeviceOnNode(node string) error { func DisableRdmaOnNode(node string) error { logf.Log.Info("Disable rdma from IO engine node", "name", node) - //disable rdma on the io-engine node - //FIXME: figure out whether it's hardware RDMA device or software rdma device and disable RDMA device appropriately - platformName := e2e_config.GetConfig().Platform.Name - if platformName == "Maas" { + + iface := e2e_config.GetConfig().NetworkInterface + // get dev link port wrt to interface + rdmaDevName, err := GetDevLinkName(node, iface) + if err != nil { + return err + } + + logf.Log.Info("rdma dev", "Name", rdmaDevName, "node", node) + + if rdmaDevName != "" { nodeIp, err := GetNodeIPAddress(node) if err != nil { return fmt.Errorf("failed to get node %s ip, error: %v", node, err) } - // get interface name - iface := e2e_config.GetConfig().NetworkInterface - out, err := e2e_agent.DisableNetworkInterface(*nodeIp, iface) + + out, err := e2e_agent.DisableDevLink(*nodeIp, rdmaDevName) if err != nil { - logf.Log.Info("failed to disable network interface", "platform", platformName, "node", node, "iface", iface, "output", out) + logf.Log.Info("failed to disable rdma dev link", "node", node, "dev link", rdmaDevName, "output", out) return err } - } else if platformName == "Hetzner" { + } else { err := RemoveRdmaDeviceOnNode(node) if err != nil { - logf.Log.Info("failed to remove rdma device", "platform", platformName, "node", node, "device", RdmaDeviceName) + logf.Log.Info("failed to remove rdma device", "node", node, "device", RdmaDeviceName) return err } - } else { - return fmt.Errorf("unsupported platform") } return nil @@ -231,31 +247,72 @@ func EnableRdmaDeviceOnAllWorkerNodes() error { } func EnableRdmaOnNode(node string) error { - logf.Log.Info("Enable rdma from IO engine node", "name", node) - //enable rdma on the io-engine node - platformName := e2e_config.GetConfig().Platform.Name - //FIXME: figure out whether it's hardware RDMA device or software rdma device and disable RDMA device appropriately - if platformName == "Maas" { + logf.Log.Info("Enable rdma on IO engine node", "name", node) + // get interface name + iface := e2e_config.GetConfig().NetworkInterface + // get dev link port wrt to interface + rdmaDevPortName, err := GetDevLinkName(node, iface) + if err != nil { + return err + } + + if rdmaDevPortName != "" { nodeIp, err := GetNodeIPAddress(node) if err != nil { return fmt.Errorf("failed to get node %s ip, error: %v", node, err) } - // get interface name - iface := e2e_config.GetConfig().NetworkInterface - out, err := e2e_agent.EnableNetworkInterface(*nodeIp, iface) + + out, err := e2e_agent.EnableDevLink(*nodeIp, rdmaDevPortName) if err != nil { - logf.Log.Info("failed to enable network interface", "platform", platformName, "node", node, "iface", iface, "output", out) + logf.Log.Info("failed to enable rdma dev link", "node", node, "dev link", rdmaDevPortName, "output", out) return err } - } else if platformName == "Hetzner" { + } else { err := CreateRdmaDeviceOnNode(node) if err != nil { - logf.Log.Info("failed to create rdma device", "platform", platformName, "node", node, "device", RdmaDeviceName) + logf.Log.Info("failed to create rdma device", "node", node, "device", RdmaDeviceName) return err } - } else { - return fmt.Errorf("unsupported platform") } return nil } + +func ListDevLink(node string) (PortMap, error) { + var devLink PortMap + nodeIp, err := GetNodeIPAddress(node) + if err != nil { + return devLink, fmt.Errorf("failed to get node %s ip, error: %v", node, err) + } + + devLinkOut, err := agent.ListDevLink(*nodeIp) + if err != nil { + return devLink, fmt.Errorf("failed to list dev link on node %s , error: %v", node, err) + } + if devLinkOut == "" { + logf.Log.Info("Dev kink list failed with empty string", "output", devLinkOut) + return devLink, fmt.Errorf("failed to list dev link on node %s", node) + } + output := trimForJson(devLinkOut) + if err = json.Unmarshal([]byte(output), &devLink); err != nil { + logf.Log.Info("Failed to unmarshal dev link list", "output", output) + return devLink, fmt.Errorf("failed to unmarshal dev link list on node %s , output: %s,error: %v", node, output, err) + } + logf.Log.Info("Dev link", "node", node, "list", devLink) + return devLink, nil +} + +func GetDevLinkName(node, iface string) (string, error) { + devLinkList, err := ListDevLink(node) + if err != nil { + return "", err + } + for key, val := range devLinkList.Port { + if val.Netdev == iface { + // dev link port will be like pci/0000:3b:00.0/65535 + // dev link will be pci/0000:3b:00.0 by removing port + return key[:strings.LastIndex(key, "/")], nil + } + } + return "", nil +} diff --git a/tools/e2e-agent/build.sh b/tools/e2e-agent/build.sh index 1e22568..c76ec5f 100755 --- a/tools/e2e-agent/build.sh +++ b/tools/e2e-agent/build.sh @@ -9,7 +9,7 @@ # as long as we do not make breaking changes. set -e IMAGE="openebs/e2e-agent" -TAG="v3.0.6" +TAG="v3.0.7" registry="" tag_as_latest="" diff --git a/tools/e2e-agent/e2e-agent.yaml b/tools/e2e-agent/e2e-agent.yaml index 7519839..e07000c 100644 --- a/tools/e2e-agent/e2e-agent.yaml +++ b/tools/e2e-agent/e2e-agent.yaml @@ -68,7 +68,7 @@ spec: securityContext: privileged: true allowPrivilegeEscalation: true - image: openebs/e2e-agent:v3.0.6 + image: openebs/e2e-agent:v3.0.7 imagePullPolicy: Always volumeMounts: - name: host-root diff --git a/tools/e2e-agent/rdma.go b/tools/e2e-agent/rdma.go index 7b5ab5f..dba7a84 100644 --- a/tools/e2e-agent/rdma.go +++ b/tools/e2e-agent/rdma.go @@ -94,3 +94,76 @@ func DeleteRdmaDevice(w http.ResponseWriter, r *http.Request) { } WrapResult(string(output), ErrNone, w) } + +// ListDevLink list dev links +func ListDevLink(w http.ResponseWriter, r *http.Request) { + var msg string + klog.Info("List available dev link") + + devLinkPortCommand := "devlink port show -j" + output, err := bashLocal(devLinkPortCommand) + if err != nil { + msg = fmt.Sprintf("cannot list dev links. Error %s", err.Error()) + klog.Error(msg) + WrapResult(msg, ErrExecFailed, w) + return + } + WrapResult(string(output), ErrNone, w) +} + +// EnableDevLink enable dev link +func EnableDevLink(w http.ResponseWriter, r *http.Request) { + setDevLinkState(w, r, true) +} + +// DisableDevLink disable dev link +func DisableDevLink(w http.ResponseWriter, r *http.Request) { + setDevLinkState(w, r, false) +} + +func setDevLinkState(w http.ResponseWriter, r *http.Request, enable bool) { + var msg string + var devLink DevLinkPort + d := json.NewDecoder(r.Body) + if err := d.Decode(&devLink); err != nil { + msg = fmt.Sprintf("failed to read JSON encoded data, Error: %s", err.Error()) + klog.Error(msg) + WrapResult(msg, ErrJsonDecode, w) + return + } + if devLink.DevLinkPort == "" { + msg = "no dev link name passed" + klog.Error(msg) + WrapResult(msg, UnprocessableEntityErrorCode, w) + return + } + klog.Info("set dev link, data: %v, enable_rdma vale: %s", devLink, enable) + + devLinkDownCommand := fmt.Sprintf("devlink dev param set %s name enable_rdma value %v cmode driverinit", devLink.DevLinkPort, enable) + output, err := bashLocal(devLinkDownCommand) + if err != nil { + msg = fmt.Sprintf("cannot set %s dev link enable_rdma value to %v, Error %s", devLink.DevLinkPort, enable, err.Error()) + klog.Error(msg) + WrapResult(msg, ErrExecFailed, w) + return + } + // reload dev link + err = devLinkDriverReload(devLink.DevLinkPort) + if err != nil { + msg = fmt.Sprintf("cannot reload dev link %s, Error %s", devLink.DevLinkPort, err.Error()) + klog.Error(msg) + WrapResult(msg, ErrExecFailed, w) + return + } + WrapResult(string(output), ErrNone, w) +} + +func devLinkDriverReload(devLink string) error { + klog.Info("reload dev link, link: %v", devLink) + devLinkDriverReload := fmt.Sprintf("devlink dev reload %s", devLink) + output, err := bashLocal(devLinkDriverReload) + if err != nil { + return fmt.Errorf("failed to reload dev link %s, output: %s, error: %v", devLink, output, err) + } + return nil +} diff --git a/tools/e2e-agent/server.go b/tools/e2e-agent/server.go index 04d259c..3021417 100644 --- a/tools/e2e-agent/server.go +++ b/tools/e2e-agent/server.go @@ -70,6 +70,10 @@ type NetworkInterface struct { NetworkInterface string `json:"networkInterface"` } +type DevLinkPort struct { + DevLinkPort string `json:"devLinkPort"` +} + func homePage(w http.ResponseWriter, r *http.Request) { fmt.Fprint(w, "Welcome home!\n") } @@ -186,6 +190,9 @@ func handleRequests() { router.HandleFunc("/deleterdmadevice", DeleteRdmaDevice).Methods("POST") router.HandleFunc("/enablenetworkinterface", EnableNetworkInterface).Methods("POST") router.HandleFunc("/disablenetworkinterface", DisableNetworkInterface).Methods("POST") + router.HandleFunc("/enabledevlink", EnableDevLink).Methods("POST") + router.HandleFunc("/disabledevlink", DisableDevLink).Methods("POST") + router.HandleFunc("/listdevlink", ListDevLink).Methods("POST") log.Fatal(http.ListenAndServe(podIP+":"+restPort, router)) }