diff --git a/.reuse/dep5 b/.reuse/dep5 index 745600e60..5e251ba97 100644 --- a/.reuse/dep5 +++ b/.reuse/dep5 @@ -14,6 +14,7 @@ Files: Dockerfile config/* hack/* + test/benchmark_test/config_templates/* *.json *meson.build proto/dpdk.proto diff --git a/docs/deployment/help_dpservice-bin.md b/docs/deployment/help_dpservice-bin.md index bceb7a451..b28e0ade0 100644 --- a/docs/deployment/help_dpservice-bin.md +++ b/docs/deployment/help_dpservice-bin.md @@ -7,6 +7,7 @@ | --pf0 | IFNAME | first physical interface (e.g. eth0) | | | --pf1 | IFNAME | second physical interface (e.g. eth1) | | | --pf1-proxy | IFNAME | VF representor to use as a proxy for pf1 packets | | +| --pf1-proxy-vf | IFNAME | VF interface of the pf1-proxy VF representor | | | --ipv6 | ADDR6 | IPv6 underlay address | | | --vf-pattern | PATTERN | virtual interface name pattern (e.g. 'eth1vf') | | | --dhcp-mtu | SIZE | set the mtu field in DHCP responses (68 - 1500) | | diff --git a/docs/deployment/mellanox.md b/docs/deployment/mellanox.md index c7214cf7a..2c6103360 100644 --- a/docs/deployment/mellanox.md +++ b/docs/deployment/mellanox.md @@ -46,3 +46,6 @@ In some cases (looks like a nic/switch combination) performance is severly affec ## Dp-service setup Either `prepare.sh` script or `preparedp.service` systemd unit needs to be run before dp-service can work properly. This should already be done automatically if using the Docker image provided. Make sure this does not produce any errors. + +### Multiport-eswitch +The `prepare.sh` script supports `--multiport-eswitch` argument to set the card up in multiport-eswitch mode. There is an additional `--pf1-proxy` argument to also create a VF on PF1 for proxying PF1 traffic. Currently both arguments are needed to properly run dpservice in multiport-eswitch mode due to a (suspected) driver bug. diff --git a/docs/development/running.md b/docs/development/running.md index 8f938e86d..69b976b28 100644 --- a/docs/development/running.md +++ b/docs/development/running.md @@ -74,3 +74,16 @@ Without the help of scripts or config files, you can run the service directly (a `--vf_pattern` defines the prefix used by the virtual functions created by the smartnic and which need to be controlled by dp-service. These interfaces are then to be used by running VMs. `--ipv6` sets the underlay IPv6 address which should be used by dp-service for ingress/egress packets coming to/leaving the smartnic. + +#### Multiport-eswitch +In this mode, only the PF0 (which is bonded with PF1) needs to be specified: +```bash +./dpservice-bin -a 0000:03:00.0,class=rxq_cqe_comp_en=0,rx_vec_en=1,dv_flow_en=2,dv_esw_en=1,fdb_def_rule_en=1,representor=pf[0-1]vf[0-5] -l 0,1 -- --pf0=enp59s0f1 --pf1=enp59s0f1 --vf-pattern=enp59s0f0_ --ipv6=2a10:afc0:e01f:209:: --no-stats --no-offload --multiport-eswitch +``` + +#### PF1-proxy +In multiport-eswitch mode, currently PF1 is not usable (suspected driver problem), so dpservice provides a way to proxy the communication over a separate VF on PF1. +```bash +./dpservice-bin -a 0000:03:00.0,class=rxq_cqe_comp_en=0,rx_vec_en=1,dv_flow_en=2,dv_esw_en=1,fdb_def_rule_en=1,representor=pf[0-1]vf[0-5] -l 0,1 -- --pf0=enp59s0f1 --pf1=enp59s0f1 --vf-pattern=enp59s0f0_ --ipv6=2a10:afc0:e01f:209:: --no-stats --no-offload --multiport-eswitch --pf1-proxy enp59s0f1npf1vf0 --pf1-proxy-vf enp59s0f1v0 +``` +The `--pf1-proxy` is the representor used by dpservice for proxying packets. The `--pf1-proxy-vf` is the VF used by the Linux kernel to receive packets, i.e. the replacement for PF1. Without `--pf1-proxy-vf` dpservice is unable to determine the MAC address to use for host-host overlay traffic. diff --git a/hack/dp_conf.json b/hack/dp_conf.json index d89582c63..75387ee4d 100644 --- a/hack/dp_conf.json +++ b/hack/dp_conf.json @@ -28,6 +28,15 @@ "array_size": "IF_NAMESIZE", "ifdef": "ENABLE_PF1_PROXY" }, + { + "lgopt": "pf1-proxy-vf", + "arg": "IFNAME", + "help": "VF interface of the pf1-proxy VF representor", + "var": "pf1_proxy_vf", + "type": "char", + "array_size": "IF_NAMESIZE", + "ifdef": "ENABLE_PF1_PROXY" + }, { "lgopt": "ipv6", "arg": "ADDR6", diff --git a/hack/prepare.sh b/hack/prepare.sh index 4dc64f1e4..54bf807b1 100755 --- a/hack/prepare.sh +++ b/hack/prepare.sh @@ -13,7 +13,8 @@ OPT_MULTIPORT=false OPT_PF1_PROXY=false BLUEFIELD_IDENTIFIERS=("MT_0000000543", "MT_0000000541") -NUMVFS=126 +MAX_NUMVFS_POSSIBLE=126 +NUMVFS_DESIRED=126 CONFIG="/tmp/dp_service.conf" IS_X86_WITH_BLUEFIELD=false IS_ARM_WITH_BLUEFIELD=false @@ -143,8 +144,12 @@ function create_vf() { local pf0="${devs[0]}" local pf1="${devs[1]}" + if [[ "$OPT_MULTIPORT" == "true" && "$NUMVFS_DESIRED" -eq "$MAX_NUMVFS_POSSIBLE" ]]; then + NUMVFS_DESIRED=$((NUMVFS_DESIRED - 1)) + fi + if [[ "$IS_ARM_WITH_BLUEFIELD" == "true" ]]; then - actualvfs=$NUMVFS + actualvfs=$NUMVFS_DESIRED log "Skipping VF creation for BlueField card on ARM" # enable switchdev mode, this operation takes most time process_switchdev_mode "$pf0" @@ -185,7 +190,7 @@ function create_vf() { # calculating amount of VFs to create, 126 if more are available, or maximum available totalvfs=$(cat /sys/bus/pci/devices/$pf0/sriov_totalvfs) - actualvfs=$((NUMVFS /sys/bus/pci/devices/$pf0/sriov_numvfs if [[ "$OPT_PF1_PROXY" == "true" ]]; then @@ -246,9 +251,10 @@ function get_ipv6() { while read -r l1; do if [ "$l1" != "::1/128" ]; then echo ${l1%/*} - break + return fi done < <(ip -6 -o addr show lo | awk '{print $4}') + err "no ipv6 found" } function make_config() { @@ -257,16 +263,27 @@ function make_config() { return fi + # To make error propagation work, need to assign separately + conf_pf0="$(get_ifname 0)" + conf_pf1="$(get_ifname 1)" + conf_vf_pattern="$(get_pattern ${devs[0]})" + conf_ipv6="$(get_ipv6)" + if [[ "$OPT_MULTIPORT" == "true" ]]; then + conf_pf1_proxy="$(get_pf1_proxy ${devs[1]})" + conf_pf1_proxy_vf="$(get_pf1_proxy_vf)" + fi + { echo "# This has been generated by prepare.sh" echo "no-stats" - echo "pf0 $(get_ifname 0)" - echo "pf1 $(get_ifname 1)" - echo "vf-pattern $(get_pattern ${devs[0]})" - echo "ipv6 $(get_ipv6)" + echo "pf0 $conf_pf0" + echo "pf1 $conf_pf1" + echo "vf-pattern $conf_vf_pattern" + echo "ipv6 $conf_ipv6" if [[ "$OPT_MULTIPORT" == "true" ]]; then echo "a-pf0 ${devs[0]},class=rxq_cqe_comp_en=0,rx_vec_en=1,dv_flow_en=2,dv_esw_en=1,fdb_def_rule_en=1,representor=pf[0-1]vf[0-$[$actualvfs-1]]" if [[ "$OPT_PF1_PROXY" == "true" ]]; then - echo "pf1-proxy $(get_pf1_proxy ${devs[1]})" + echo "pf1-proxy $conf_pf1_proxy" + echo "pf1-proxy-vf $conf_pf1_proxy_vf" fi echo "multiport-eswitch" else @@ -277,7 +294,7 @@ function make_config() { if [[ "$OPT_MULTIPORT" == "true" ]]; then log "dpservice configured in multiport-eswitch mode" if [[ "$OPT_PF1_PROXY" == "true" ]]; then - log "dpservice will create a PF1-proxy" + log "dpservice will create a pf1-proxy" fi else log "dpservice configured in normal mode" diff --git a/include/dp_conf_opts.h b/include/dp_conf_opts.h index c3e8b0a87..a7c0b2dd1 100644 --- a/include/dp_conf_opts.h +++ b/include/dp_conf_opts.h @@ -30,6 +30,9 @@ const char *dp_conf_get_pf1_name(void); #ifdef ENABLE_PF1_PROXY const char *dp_conf_get_pf1_proxy(void); #endif +#ifdef ENABLE_PF1_PROXY +const char *dp_conf_get_pf1_proxy_vf(void); +#endif const char *dp_conf_get_vf_pattern(void); int dp_conf_get_dhcp_mtu(void); int dp_conf_get_wcmp_perc(void); diff --git a/include/dp_log.h b/include/dp_log.h index d596dc2ec..bf73ed725 100644 --- a/include/dp_log.h +++ b/include/dp_log.h @@ -53,6 +53,7 @@ extern "C" { #define DP_LOG_IFNAME(VALUE) _DP_LOG_STR("interface_name", VALUE) #define DP_LOG_LCORE(VALUE) _DP_LOG_UINT("lcore_id", VALUE) #define DP_LOG_RTE_GROUP(VALUE) _DP_LOG_UINT("rte_group", VALUE) +#define DP_LOG_LINKSTATE(VALUE) _DP_LOG_STR("link_state", (VALUE) ? "up" : "down") // networking stack #define DP_LOG_IPV4(VALUE) _DP_LOG_IPV4("ipv4", VALUE) #define DP_LOG_IPV6(VALUE) _DP_LOG_IPV6("ipv6", VALUE) diff --git a/include/dp_netlink.h b/include/dp_netlink.h index e731be7c0..39e9a689e 100644 --- a/include/dp_netlink.h +++ b/include/dp_netlink.h @@ -4,6 +4,7 @@ #ifndef __INCLUDE_DP_NETLINK_H__ #define __INCLUDE_DP_NETLINK_H__ +#include #include #include @@ -25,7 +26,7 @@ struct dp_nlnk_req { struct dp_nl_tlv if_tlv; }; -int dp_get_pf_neigh_mac(int if_idx, struct rte_ether_addr *neigh, const struct rte_ether_addr *own_mac); +int dp_get_pf_neigh_mac(uint32_t if_idx, struct rte_ether_addr *neigh, const struct rte_ether_addr *own_mac); #ifdef __cplusplus } diff --git a/include/dp_port.h b/include/dp_port.h index 464d49a7b..74886899e 100644 --- a/include/dp_port.h +++ b/include/dp_port.h @@ -7,8 +7,9 @@ #include #include #include -#include #include +#include +#include #include "dp_conf.h" #include "dp_firewall.h" #include "dp_internal_stats.h" @@ -89,6 +90,7 @@ struct dp_port { char dev_name[RTE_ETH_NAME_MAX_LEN]; uint8_t peer_pf_hairpin_tx_rx_queue_offset; uint16_t peer_pf_port_id; + uint32_t if_index; struct rte_ether_addr own_mac; struct rte_ether_addr neigh_mac; struct dp_port_iface iface; @@ -106,6 +108,8 @@ struct dp_port { struct rte_flow *default_flows[DP_PORT_ASYNC_FLOW_COUNT]; } default_async_rules; }; + struct rte_timer neighmac_timer; + uint8_t neighmac_period; }; struct dp_ports { @@ -129,11 +133,16 @@ void dp_ports_stop(void); void dp_ports_free(void); int dp_start_port(struct dp_port *port); +int dp_start_pf_port(uint16_t index); #ifdef ENABLE_PF1_PROXY int dp_start_pf1_proxy_port(void); #endif int dp_stop_port(struct dp_port *port); +void dp_start_acquiring_neigh_mac(struct dp_port *port); +void dp_stop_acquiring_neigh_mac(struct dp_port *port); +int dp_set_neigh_mac(uint16_t port_id, const struct rte_ether_addr *mac); + int dp_port_meter_config(struct dp_port *port, uint64_t total_flow_rate_cap, uint64_t public_flow_rate_cap); static __rte_always_inline diff --git a/include/monitoring/dp_event.h b/include/monitoring/dp_event.h index 76367b07d..8f39da2d6 100644 --- a/include/monitoring/dp_event.h +++ b/include/monitoring/dp_event.h @@ -11,6 +11,7 @@ extern "C" { #include #include #include +#include int dp_link_status_change_event_callback(uint16_t port_id, enum rte_eth_event_type type, @@ -19,9 +20,11 @@ int dp_link_status_change_event_callback(uint16_t port_id, void dp_process_event_link_msg(struct rte_mbuf *m); int dp_send_event_flow_aging_msg(void); - void dp_process_event_flow_aging_msg(struct rte_mbuf *m); +int dp_send_event_neighmac_msg(uint16_t port_id, struct rte_ether_addr *neighmac); +void dp_process_event_neighmac_msg(struct rte_mbuf *m); + #ifdef __cplusplus } #endif diff --git a/include/monitoring/dp_monitoring.h b/include/monitoring/dp_monitoring.h index 36af9b452..68691cf3e 100644 --- a/include/monitoring/dp_monitoring.h +++ b/include/monitoring/dp_monitoring.h @@ -5,6 +5,7 @@ #define __INCLUDE_DP_MONITORING_H__ #include +#include #include #include "dp_ipaddr.h" @@ -17,6 +18,7 @@ extern "C" { enum dp_event_type { DP_EVENT_TYPE_LINK_STATUS, DP_EVENT_TYPE_FLOW_AGING, + DP_EVENT_TYPE_NEIGHMAC, }; struct dp_event_msg_head { @@ -28,10 +30,16 @@ struct dp_link_status { uint8_t status; }; +struct dp_neighmac { + uint16_t port_id; + struct rte_ether_addr mac; +}; + struct dp_event_msg { struct dp_event_msg_head msg_head; union { struct dp_link_status link_status; + struct dp_neighmac neighmac; } event_entry; }; diff --git a/src/dp_conf_opts.c b/src/dp_conf_opts.c index e63a8674d..a06d718cd 100644 --- a/src/dp_conf_opts.c +++ b/src/dp_conf_opts.c @@ -22,6 +22,9 @@ _OPT_SHOPT_MAX = 255, OPT_PF1, #ifdef ENABLE_PF1_PROXY OPT_PF1_PROXY, +#endif +#ifdef ENABLE_PF1_PROXY + OPT_PF1_PROXY_VF, #endif OPT_IPV6, OPT_VF_PATTERN, @@ -61,6 +64,9 @@ static const struct option dp_conf_longopts[] = { { "pf1", 1, 0, OPT_PF1 }, #ifdef ENABLE_PF1_PROXY { "pf1-proxy", 1, 0, OPT_PF1_PROXY }, +#endif +#ifdef ENABLE_PF1_PROXY + { "pf1-proxy-vf", 1, 0, OPT_PF1_PROXY_VF }, #endif { "ipv6", 1, 0, OPT_IPV6 }, { "vf-pattern", 1, 0, OPT_VF_PATTERN }, @@ -114,6 +120,9 @@ static char pf1_name[IF_NAMESIZE]; #ifdef ENABLE_PF1_PROXY static char pf1_proxy[IF_NAMESIZE]; #endif +#ifdef ENABLE_PF1_PROXY +static char pf1_proxy_vf[IF_NAMESIZE]; +#endif static char vf_pattern[IF_NAMESIZE]; static int dhcp_mtu = 1500; static int wcmp_perc = 100; @@ -149,6 +158,13 @@ const char *dp_conf_get_pf1_proxy(void) return pf1_proxy; } +#endif +#ifdef ENABLE_PF1_PROXY +const char *dp_conf_get_pf1_proxy_vf(void) +{ + return pf1_proxy_vf; +} + #endif const char *dp_conf_get_vf_pattern(void) { @@ -248,6 +264,9 @@ static inline void dp_argparse_help(const char *progname, FILE *outfile) " --pf1=IFNAME second physical interface (e.g. eth1)\n" #ifdef ENABLE_PF1_PROXY " --pf1-proxy=IFNAME VF representor to use as a proxy for pf1 packets\n" +#endif +#ifdef ENABLE_PF1_PROXY + " --pf1-proxy-vf=IFNAME VF interface of the pf1-proxy VF representor\n" #endif " --ipv6=ADDR6 IPv6 underlay address\n" " --vf-pattern=PATTERN virtual interface name pattern (e.g. 'eth1vf')\n" @@ -290,6 +309,10 @@ static int dp_conf_parse_arg(int opt, const char *arg) #ifdef ENABLE_PF1_PROXY case OPT_PF1_PROXY: return dp_argparse_string(arg, pf1_proxy, ARRAY_SIZE(pf1_proxy)); +#endif +#ifdef ENABLE_PF1_PROXY + case OPT_PF1_PROXY_VF: + return dp_argparse_string(arg, pf1_proxy_vf, ARRAY_SIZE(pf1_proxy_vf)); #endif case OPT_IPV6: return dp_argparse_opt_ipv6(arg); diff --git a/src/dp_netlink.c b/src/dp_netlink.c index 0685cf417..adbd4a8b4 100644 --- a/src/dp_netlink.c +++ b/src/dp_netlink.c @@ -66,7 +66,7 @@ static int dp_recv_msg(struct sockaddr_nl sock_addr, int sock, char *buf, int bu return (int)msg_len; } -int dp_get_pf_neigh_mac(int if_idx, struct rte_ether_addr *neigh, const struct rte_ether_addr *own_mac) +int dp_get_pf_neigh_mac(uint32_t if_idx, struct rte_ether_addr *neigh, const struct rte_ether_addr *own_mac) { struct sockaddr_nl sa = { .nl_family = AF_NETLINK, @@ -119,11 +119,7 @@ int dp_get_pf_neigh_mac(int if_idx, struct rte_ether_addr *neigh, const struct r goto cleanup; } - // TODO this should be an error in production - if (DP_FAILED(dp_read_neigh((struct nlmsghdr *)reply, reply_len, neigh, own_mac))) - DPS_LOG_WARNING("No neighboring router found"); - - ret = DP_OK; + ret = dp_read_neigh((struct nlmsghdr *)reply, reply_len, neigh, own_mac); cleanup: close(sock); diff --git a/src/dp_port.c b/src/dp_port.c index aea93a7b7..0f42c1192 100644 --- a/src/dp_port.c +++ b/src/dp_port.c @@ -1,14 +1,14 @@ // SPDX-FileCopyrightText: 2023 SAP SE or an SAP affiliate company and IronCore contributors // SPDX-License-Identifier: Apache-2.0 -#include "dp_error.h" +#include "dp_port.h" #include #include "dp_conf.h" +#include "dp_error.h" #include "dp_hairpin.h" #include "dp_log.h" #include "dp_lpm.h" #include "dp_netlink.h" -#include "dp_port.h" #ifdef ENABLE_VIRTSVC # include "dp_virtsvc.h" #endif @@ -29,6 +29,10 @@ #define DP_PORT_PROXIED true #define DP_PORT_NORMAL false +#define DP_PORT_NEIGHMAC_INITIAL_PERIOD 1 +#define DP_PORT_NEIGHMAC_BACKOFF_COEF 2 +#define DP_PORT_NEIGHMAC_MAX_PERIOD 60 + #define DP_METER_CIR_BASE_VALUE (1024 * 1024) // 1 Mbits #define DP_METER_EBS_BREAK_VALUE 100 // 100 Mbits/s, it used to differentiate different ebs calculation strategy to achieve relative stable metering results. epirical value. #define DP_METER_MBITS_TO_BYTES (1024 * 1024 / 8) @@ -91,20 +95,9 @@ struct dp_port *dp_get_port_by_name(const char *pci_name) return _dp_port_table[port_id]; } -static void dp_set_neighmac(struct dp_port *port, const struct rte_ether_addr *mac) -{ - char strmac[18]; - - rte_ether_addr_copy(mac, &port->neigh_mac); - - snprintf(strmac, sizeof(strmac), RTE_ETHER_ADDR_PRT_FMT, RTE_ETHER_ADDR_BYTES(&port->neigh_mac)); - DPS_LOG_INFO("Setting neighboring MAC", _DP_LOG_STR("mac", strmac), DP_LOG_PORT(port)); -} - static int dp_port_init_ethdev(struct dp_port *port, struct rte_eth_dev_info *dev_info) { struct dp_dpdk_layer *dp_layer = get_dpdk_layer(); - struct rte_ether_addr pf_neigh_mac = {0}; struct rte_eth_txconf txq_conf; struct rte_eth_rxconf rxq_conf; struct rte_eth_conf port_conf = port_conf_default; @@ -182,16 +175,6 @@ static int dp_port_init_ethdev(struct dp_port *port, struct rte_eth_dev_info *de static_assert(sizeof(port->dev_name) == RTE_ETH_NAME_MAX_LEN, "Incompatible port dev_name size"); rte_eth_dev_get_name_by_port(port->port_id, port->dev_name); - if (port->is_pf) { - if (DP_FAILED(dp_get_pf_neigh_mac(dev_info->if_index, &pf_neigh_mac, &port->own_mac))) - return DP_ERROR; - dp_set_neighmac(port, &pf_neigh_mac); - } -#ifdef ENABLE_PF1_PROXY - else if (dp_conf_is_pf1_proxy_enabled() && port == dp_get_pf1_proxy()) - dp_set_neighmac(port, &dp_get_pf1()->neigh_mac); -#endif - if (dp_conf_is_multiport_eswitch() && DP_FAILED(dp_configure_async_flows(port->port_id))) return DP_ERROR; @@ -260,6 +243,7 @@ static struct dp_port *dp_port_init_interface(uint16_t port_id, struct rte_eth_d port->is_pf = is_pf; port->port_id = port_id; port->socket_id = socket_id; + port->if_index = dev_info->if_index; _dp_port_table[port_id] = port; if (is_pf && DP_FAILED(dp_port_register_pf(port))) @@ -274,6 +258,7 @@ static struct dp_port *dp_port_init_interface(uint16_t port_id, struct rte_eth_d DPS_LOG_ERR("Cannot register link status callback", DP_LOG_RET(ret)); return NULL; } + rte_timer_init(&port->neighmac_timer); } else { // All VFs belong to pf0, assign a tx queue from pf1 for it if (dp_conf_is_offload_enabled()) { @@ -294,16 +279,25 @@ static struct dp_port *dp_port_init_interface(uint16_t port_id, struct rte_eth_d static struct dp_port *dp_port_init_pf1_proxy_interface(uint16_t port_id, struct rte_eth_dev_info *dev_info) { struct dp_port *port; + uint32_t if_index; int socket_id; socket_id = dp_get_port_socket_id(port_id); if (DP_FAILED(socket_id) && socket_id != SOCKET_ID_ANY) return NULL; + if_index = if_nametoindex(dp_conf_get_pf1_proxy_vf()); + if (if_index == 0) { + DPS_LOG_ERR("Cannot get pf1-proxy vf interface index", DP_LOG_IFACE(dp_conf_get_pf1_proxy_vf())); + return NULL; + } + port = &_dp_pf1_proxy_port; port->is_pf = false; port->port_id = port_id; port->socket_id = socket_id; + port->if_index = if_index; + rte_timer_init(&port->neighmac_timer); _dp_port_table[port_id] = port; if (DP_FAILED(dp_port_init_ethdev(port, dev_info))) @@ -462,6 +456,8 @@ static int dp_stop_eth_port(struct dp_port *port) { int ret; + DPS_LOG_INFO("Stopping port", DP_LOG_PORT(port)); + if (dp_conf_is_multiport_eswitch()) { #ifdef ENABLE_VIRTSVC if (port->is_pf) @@ -580,6 +576,83 @@ static int dp_port_create_default_pf_async_templates(struct dp_port *port) return DP_OK; } + +static void dp_acquire_neigh_mac(struct dp_port *port); + +static void dp_neighmac_timer_cb(__rte_unused struct rte_timer *timer, void *arg) +{ + struct dp_port *port = arg; + + port->neighmac_period *= DP_PORT_NEIGHMAC_BACKOFF_COEF; + if (port->neighmac_period > DP_PORT_NEIGHMAC_MAX_PERIOD) + port->neighmac_period = DP_PORT_NEIGHMAC_MAX_PERIOD; + + dp_acquire_neigh_mac(port); +} + +static void dp_acquire_neigh_mac(struct dp_port *port) +{ + struct rte_ether_addr pf_neigh_mac = {0}; + int ret; + + if (DP_FAILED(dp_get_pf_neigh_mac(port->if_index, &pf_neigh_mac, &port->own_mac))) { + DPS_LOG_WARNING("No neighboring router, setting timer", DP_LOG_VALUE(port->neighmac_period), DP_LOG_PORT(port)); + + // need to use the same lcore each time, thus staying on main one even when called from the worker + ret = rte_timer_reset(&port->neighmac_timer, port->neighmac_period * rte_get_timer_hz(), + SINGLE, rte_get_main_lcore(), dp_neighmac_timer_cb, port); + if (DP_FAILED(ret)) + DPS_LOG_WARNING("Cannot start neigboring router timer", DP_LOG_PORT(port), DP_LOG_RET(ret)); + + return; + } + +#ifdef ENABLE_PF1_PROXY + if (dp_conf_is_pf1_proxy_enabled() && port == dp_get_pf1_proxy()) + port = dp_get_port_by_pf_index(1); +#endif + if (DP_FAILED(dp_send_event_neighmac_msg(port->port_id, &pf_neigh_mac))) + DPS_LOG_WARNING("Cannot send neigboring router mac to worker thread"); +} + +void dp_start_acquiring_neigh_mac(struct dp_port *port) +{ +#ifdef ENABLE_PF1_PROXY + if (dp_conf_is_pf1_proxy_enabled() && port == dp_get_pf1()) + port = &_dp_pf1_proxy_port; +#endif + port->neighmac_period = DP_PORT_NEIGHMAC_INITIAL_PERIOD; + dp_acquire_neigh_mac(port); +} + +void dp_stop_acquiring_neigh_mac(struct dp_port *port) +{ +#ifdef ENABLE_PF1_PROXY + if (dp_conf_is_pf1_proxy_enabled() && port == dp_get_pf1()) + port = &_dp_pf1_proxy_port; +#endif + rte_timer_stop_sync(&port->neighmac_timer); +} + +int dp_set_neigh_mac(uint16_t port_id, const struct rte_ether_addr *mac) +{ + struct dp_port *port; + char strmac[18]; + + port = dp_get_port_by_id(port_id); + if (!port) { + DPS_LOG_WARNING("Cannot set neighboring router, port invalid", DP_LOG_PORTID(port_id)); + return DP_ERROR; + } + + rte_ether_addr_copy(mac, &port->neigh_mac); + + snprintf(strmac, sizeof(strmac), RTE_ETHER_ADDR_PRT_FMT, RTE_ETHER_ADDR_BYTES(&port->neigh_mac)); + DPS_LOG_INFO("Setting PF neighboring router", _DP_LOG_STR("mac", strmac), DP_LOG_PORT(port)); + return DP_OK; +} + + static int dp_init_port(struct dp_port *port) { // TAP devices do not support offloading/isolation @@ -614,8 +687,13 @@ static int dp_init_port(struct dp_port *port) int dp_start_port(struct dp_port *port) { + struct rte_eth_link link = { + .link_status = RTE_ETH_LINK_DOWN + }; int ret; + DPS_LOG_INFO("Starting port", DP_LOG_PORT(port)); + ret = rte_eth_dev_start(port->port_id); if (DP_FAILED(ret)) { DPS_LOG_ERR("Cannot start ethernet port", DP_LOG_PORT(port), DP_LOG_RET(ret)); @@ -628,11 +706,43 @@ int dp_start_port(struct dp_port *port) return ret; } - port->link_status = RTE_ETH_LINK_UP; + if (port->is_pf) { + // this really only fails on bad arguments (or incompatible driver) + ret = rte_eth_link_get(port->port_id, &link); + if (DP_FAILED(ret)) + DPS_LOG_WARNING("Unable to get the initial link status, assuming it down", DP_LOG_PORT(port), DP_LOG_RET(ret)); + } else + link.link_status = RTE_ETH_LINK_UP; + + port->link_status = link.link_status; port->allocated = true; return DP_OK; } +int dp_start_pf_port(uint16_t index) +{ + struct dp_port *port = dp_get_port_by_pf_index(index); + + if (!port) { + DPS_LOG_ERR("Invalid PF index", DP_LOG_VALUE(index), DP_LOG_MAX(DP_MAX_PF_PORTS)); + return DP_ERROR; + } + + if (DP_FAILED(dp_start_port(port))) + return DP_ERROR; + + DPS_LOG_INFO("Received initial PF link state", DP_LOG_LINKSTATE(port->link_status), DP_LOG_PORT(port)); + + if (port->link_status == RTE_ETH_LINK_UP) +#ifdef ENABLE_PF1_PROXY + // Do not use PF1 in pf1-proxy mode as Linux does not use it then (thus the mac will never be there) + if (!dp_conf_is_pf1_proxy_enabled() || port != dp_get_pf1()) +#endif + dp_start_acquiring_neigh_mac(port); + + return DP_OK; +} + #ifdef ENABLE_PF1_PROXY int dp_start_pf1_proxy_port(void) { @@ -644,6 +754,9 @@ int dp_start_pf1_proxy_port(void) return ret; } + if (dp_get_pf1()->link_status == RTE_ETH_LINK_UP) + dp_start_acquiring_neigh_mac(&_dp_pf1_proxy_port); + _dp_pf1_proxy_port.allocated = true; return DP_OK; } @@ -661,6 +774,7 @@ int dp_stop_port(struct dp_port *port) return DP_OK; } + static int dp_port_total_flow_meter_config(struct dp_port *port, uint64_t total_flow_rate_cap) { return dp_set_vf_rate_limit(port->port_id, total_flow_rate_cap); diff --git a/src/dp_service.c b/src/dp_service.c index 8701f0977..5b58aa33a 100644 --- a/src/dp_service.c +++ b/src/dp_service.c @@ -160,10 +160,10 @@ static int init_interfaces(void) || DP_FAILED(dp_telemetry_init())) return DP_ERROR; - if (DP_FAILED(dp_start_port(dp_get_port_by_pf_index(0)))) + if (DP_FAILED(dp_start_pf_port(0))) return DP_ERROR; - if (DP_FAILED(dp_start_port(dp_get_port_by_pf_index(1)))) + if (DP_FAILED(dp_start_pf_port(1))) return DP_ERROR; #ifdef ENABLE_PF1_PROXY @@ -236,9 +236,26 @@ static int run_service(void) return DP_ERROR; } - if (dp_conf_is_multiport_eswitch() && dp_conf_is_offload_enabled()) { - DP_EARLY_ERR("HW offloading is currently not supported for multi-port eswitch mode"); - return DP_ERROR; + if (dp_conf_is_multiport_eswitch()) { + if (dp_conf_is_offload_enabled()) { + DP_EARLY_ERR("HW offloading is currently not supported for multi-port eswitch mode"); + return DP_ERROR; + } +#ifdef ENABLE_PF1_PROXY + if (dp_conf_is_pf1_proxy_enabled()) { + if (*dp_conf_get_pf1_proxy_vf() == '\0') { + DP_EARLY_ERR("PF1-proxy also requires --pf1-proxy-vf argument to be set"); + return DP_ERROR; + } + } +#endif + } else { +#ifdef ENABLE_PF1_PROXY + if (dp_conf_is_pf1_proxy_enabled()) { + DP_EARLY_ERR("PF1-proxy is only supported for multiport-eswitch mode"); + return DP_ERROR; + } +#endif } if (DP_FAILED(dp_log_init())) diff --git a/src/monitoring/dp_event.c b/src/monitoring/dp_event.c index 5956425ee..a2d1c66ee 100644 --- a/src/monitoring/dp_event.c +++ b/src/monitoring/dp_event.c @@ -60,6 +60,7 @@ int dp_link_status_change_event_callback(uint16_t port_id, __rte_unused void *ret_param) { struct rte_eth_link link; + struct dp_port *port; int ret; ret = rte_eth_link_get_nowait(port_id, &link); @@ -68,10 +69,18 @@ int dp_link_status_change_event_callback(uint16_t port_id, return ret; } - if (DP_FAILED(dp_send_event_link_msg(port_id, link.link_status))) + port = dp_get_port_by_id(port_id); + if (!port) { + DPS_LOG_ERR("Link change failed to get port", DP_LOG_PORTID(port_id), DP_LOG_VALUE(link.link_status)); return DP_ERROR; + } - return DP_OK; + if (link.link_status == RTE_ETH_LINK_UP) + dp_start_acquiring_neigh_mac(port); + else + dp_stop_acquiring_neigh_mac(port); + + return dp_send_event_link_msg(port_id, link.link_status); } void dp_process_event_link_msg(struct rte_mbuf *m) @@ -88,6 +97,7 @@ void dp_process_event_link_msg(struct rte_mbuf *m) } port->link_status = status; + DPS_LOG_INFO("PF link state changed", DP_LOG_LINKSTATE(port->link_status), DP_LOG_PORT(port)); } // Flow-aging message - sent periodically to age-out conntracked flows @@ -119,3 +129,28 @@ void dp_process_event_flow_aging_msg(__rte_unused struct rte_mbuf *m) // which enables fully control of hw rules' lifecycle from the software path for tcp flows. dp_process_aged_flows_non_offload(); } + +// Neighboring router MAC message - sent after acquiring it (sometimes asynchronously from a timer) + +int dp_send_event_neighmac_msg(uint16_t port_id, struct rte_ether_addr *neighmac) +{ + struct dp_event_msg neighmac_msg = { + .msg_head = { + .type = DP_EVENT_TYPE_NEIGHMAC, + }, + .event_entry = { + .neighmac = { + .port_id = port_id, + .mac = *neighmac, + } + } + }; + return dp_send_event_msg(&neighmac_msg); +} + +void dp_process_event_neighmac_msg(struct rte_mbuf *m) +{ + struct dp_event_msg *neighmac_msg = rte_pktmbuf_mtod(m, struct dp_event_msg *); + + dp_set_neigh_mac(neighmac_msg->event_entry.neighmac.port_id, &neighmac_msg->event_entry.neighmac.mac); +} diff --git a/src/monitoring/dp_monitoring.c b/src/monitoring/dp_monitoring.c index 3ab020e76..baae36e67 100644 --- a/src/monitoring/dp_monitoring.c +++ b/src/monitoring/dp_monitoring.c @@ -21,6 +21,9 @@ void dp_process_event_msg(struct rte_mbuf *m) case DP_EVENT_TYPE_FLOW_AGING: dp_process_event_flow_aging_msg(m); break; + case DP_EVENT_TYPE_NEIGHMAC: + dp_process_event_neighmac_msg(m); + break; } rte_pktmbuf_free(m); diff --git a/test/local/reflector.py b/test/local/reflector.py index 74416f44b..714b0e5eb 100755 --- a/test/local/reflector.py +++ b/test/local/reflector.py @@ -1,5 +1,8 @@ #!/usr/bin/env python3 +# SPDX-FileCopyrightText: 2023 SAP SE or an SAP affiliate company and IronCore contributors +# SPDX-License-Identifier: Apache-2.0 + import argparse import multiprocessing import sys