diff --git a/Dockerfile b/Dockerfile index c8ea6dae5..b5f5dc811 100644 --- a/Dockerfile +++ b/Dockerfile @@ -100,6 +100,7 @@ ARG DPSERVICE_FEATURES="" RUN meson setup release_build $DPSERVICE_FEATURES --buildtype=release && ninja -C release_build RUN CC=clang CXX=clang++ meson setup clang_build $DPSERVICE_FEATURES && ninja -C clang_build RUN meson setup xtratest_build $DPSERVICE_FEATURES -Denable_tests=true && ninja -C xtratest_build +RUN meson setup pf1_proxy_build $DPSERVICE_FEATURES -Denable_pf1_proxy=true && ninja -C pf1_proxy_build # Test-image to run pytest diff --git a/docs/deployment/help_dpservice-bin.md b/docs/deployment/help_dpservice-bin.md index 960fb61e0..bceb7a451 100644 --- a/docs/deployment/help_dpservice-bin.md +++ b/docs/deployment/help_dpservice-bin.md @@ -6,6 +6,7 @@ | -v, --version | None | display version and exit | | | --pf0 | IFNAME | first physical interface (e.g. eth0) | | | --pf1 | IFNAME | second physical interface (e.g. eth1) | | +| --pf1-proxy | IFNAME | VF representor to use as a proxy for pf1 packets | | | --ipv6 | ADDR6 | IPv6 underlay address | | | --vf-pattern | PATTERN | virtual interface name pattern (e.g. 'eth1vf') | | | --dhcp-mtu | SIZE | set the mtu field in DHCP responses (68 - 1500) | | diff --git a/docs/deployment/mellanox.md b/docs/deployment/mellanox.md index 2fb4c5f01..c7214cf7a 100644 --- a/docs/deployment/mellanox.md +++ b/docs/deployment/mellanox.md @@ -38,6 +38,11 @@ Set the number of VFs to the needed value (max 126 at the moment) and enable bot Restart the machine for the changes to take effect. > These changes are done in the NIC itself, it does not matter if the host is an ephemeral image or if another host OS will boot later. +### Multiport-eswitch +For this mode to be functional, an additional firmware setting `LAG_RESOURCE_ALLOCATION=1` is needed. + +In some cases (looks like a nic/switch combination) performance is severly affected when VM traffic is happening. This has been observed to be fixed by setting `ROCE_CONTROL=1` (this means "disabled", the default is `2` meaning "enabled"). The actual cause of this is yet to be discovered. + ## Dp-service setup Either `prepare.sh` script or `preparedp.service` systemd unit needs to be run before dp-service can work properly. This should already be done automatically if using the Docker image provided. Make sure this does not produce any errors. diff --git a/docs/sys_design/README.md b/docs/sys_design/README.md new file mode 100644 index 000000000..1d07e49a6 --- /dev/null +++ b/docs/sys_design/README.md @@ -0,0 +1,22 @@ +# Graph Framework +This is the graph topology for packets handled by dpservice. Offloaded packets never enter dpservice (and thus the graph) itself. + +![dpservice graph schema](dpservice_dataplane.drawio.png "dpservice graph schema") + +Note that every graph node actually has one other edge to it that leads to a **"Drop"** node, but for clarity this is omitted. As the name suggests, that node has no other edge and is simply dropping the packets without sending them anywhere. + +## PF1-proxy +When using a (conditionally compiled-in) pf1-proxy feature, all traffic for the host (i.e. not underlay traffic for dpservice) needs to be forwarded to a special VF on PF1 called "pf1-proxy" and back. + +### Traffic from proxy to PF1 +Since **all packets** without exception need to be forwarded directly to PF1, an rte-rule is installed to do just that, so all packets are offloaded and never enter the graph. + +### Traffic from PF1 to proxy +Only non-underlay IPv6 packets, i.e. IPv6 packets with destination IP matching the host's IP (`--ipv6` command-line argument) are directly forwarded by offloading via an rte-rule. The remaining packets enter dpservice normally and if they are classified as "unusable" (i.e. should be dropped by "Classify" node), they are instead forwarded to pf1-proxy. See the dashed graph edge above. + +## Virtual services +If virtual services are compiled-in, there is another path for packets to take. Packets going from a virtual IPv4 and TCP/UDP port to a specific web-service (i.e. specific IPv6 and TCP/UDP port) undergo an IP header replacement (from IPv4 to IPv6 and back) to enable VMs to contact IPv6 web-services without the use of NAT. This is useful for services that are heavily used by many connections, like DNS, k8s api-servers, etc. + +For this to work some changes to the graph topology are needed. For simplicity, this schema is separate and should be imagined as an "overlay" over the standard schema above. + +![dpservice virtual services schema](dpservice_virtsvc.drawio.png "virtual services graph schema") diff --git a/docs/sys_design/dpservice_dataplane.drawio.png b/docs/sys_design/dpservice_dataplane.drawio.png index 3757288ec..2c3a5dd4f 100644 Binary files a/docs/sys_design/dpservice_dataplane.drawio.png and b/docs/sys_design/dpservice_dataplane.drawio.png differ diff --git a/docs/sys_design/dpservice_virtsvc.drawio.png b/docs/sys_design/dpservice_virtsvc.drawio.png new file mode 100644 index 000000000..ab920ae93 Binary files /dev/null and b/docs/sys_design/dpservice_virtsvc.drawio.png differ diff --git a/hack/dp_conf.json b/hack/dp_conf.json index c4c3c40c7..d89582c63 100644 --- a/hack/dp_conf.json +++ b/hack/dp_conf.json @@ -19,6 +19,15 @@ "type": "char", "array_size": "IF_NAMESIZE" }, + { + "lgopt": "pf1-proxy", + "arg": "IFNAME", + "help": "VF representor to use as a proxy for pf1 packets", + "var": "pf1_proxy", + "type": "char", + "array_size": "IF_NAMESIZE", + "ifdef": "ENABLE_PF1_PROXY" + }, { "lgopt": "ipv6", "arg": "ADDR6", diff --git a/hack/prepare.sh b/hack/prepare.sh index b392bf8b5..4dc64f1e4 100755 --- a/hack/prepare.sh +++ b/hack/prepare.sh @@ -140,32 +140,31 @@ process_multiport_eswitch_mode() { } function create_vf() { - local pf="${devs[0]}" + local pf0="${devs[0]}" + local pf1="${devs[1]}" if [[ "$IS_ARM_WITH_BLUEFIELD" == "true" ]]; then actualvfs=$NUMVFS log "Skipping VF creation for BlueField card on ARM" # enable switchdev mode, this operation takes most time - process_switchdev_mode "$pf" + process_switchdev_mode "$pf0" return fi if [[ "$CONFIG_ONLY" == "true" ]]; then - actualvfs=$(cat /sys/bus/pci/devices/$pf/sriov_numvfs) + actualvfs=$(cat /sys/bus/pci/devices/$pf0/sriov_numvfs) log "Skipping VF creation as requested" return fi # we disable automatic binding so that VFs don't get created, saves a lot of time # plus we don't need to unbind them before enabling switchdev mode - log "disabling automatic binding of VFs on pf: $pf" - echo 0 > /sys/bus/pci/devices/$pf/sriov_drivers_autoprobe - - # calculating amount of VFs to create, 126 if more are available, or maximum available - totalvfs=$(cat /sys/bus/pci/devices/$pf/sriov_totalvfs) - actualvfs=$((NUMVFS /sys/bus/pci/devices/$pf/sriov_numvfs + log "disabling automatic binding of VFs on pf0 '$pf0'" + echo 0 > /sys/bus/pci/devices/$pf0/sriov_drivers_autoprobe + if [[ "$OPT_PF1_PROXY" == "true" ]]; then + log "enabling automatic binding of VFs on pf1 '$pf1'" + echo 1 > /sys/bus/pci/devices/$pf1/sriov_drivers_autoprobe + fi if [[ "$IS_X86_WITH_MLX" == "true" ]]; then # enable switchdev mode, this operation takes most time @@ -174,7 +173,7 @@ function create_vf() { process_switchdev_mode "$pf" done else - process_switchdev_mode "$pf" + process_switchdev_mode "$pf0" fi fi @@ -183,19 +182,60 @@ function create_vf() { process_multiport_eswitch_mode "$pf" done fi + + # calculating amount of VFs to create, 126 if more are available, or maximum available + totalvfs=$(cat /sys/bus/pci/devices/$pf0/sriov_totalvfs) + actualvfs=$((NUMVFS /sys/bus/pci/devices/$pf0/sriov_numvfs + if [[ "$OPT_PF1_PROXY" == "true" ]]; then + log "creating pf1-proxy virtual function" + echo 1 > /sys/bus/pci/devices/$pf1/sriov_numvfs + log "configuring pf1-proxy" + local pf1proxy=$(get_pf1_proxy $pf1) + ip link set $pf1proxy mtu 9100 + ip link set $pf1proxy up + local pf1_name=$(get_ifname 1) + local pf1_mac=$(cat /sys/class/net/$pf1_name/address) + local pf1proxy_vf=$(get_pf1_proxy_vf) + ip link set $pf1proxy_vf mtu 9100 + ip link set $pf1proxy_vf address $pf1_mac + ip link set $pf1proxy_vf up + fi } function get_pattern() { local dev=$1 pattern=$(devlink port | grep pci/$dev/ | grep "virtual\|pcivf" | awk '{print $5}' | sed -rn 's/(.*[a-z_])[0-9]{1,3}$/\1/p' | uniq) if [ -z "$pattern" ]; then - err "can't determine the pattern for $dev" + err "can't determine the vf pattern for $dev" elif [ $(wc -l <<< "$pattern") -ne 1 ]; then - err "multiple patterns found for $dev" + err "multiple vf patterns found for $dev" fi echo "$pattern" } +function get_pf1_proxy() { + local dev=$1 + proxy=$(devlink port | grep pci/$dev/ | grep "virtual\|pcivf" | awk '{print $5}' | uniq) + if [ -z "$proxy" ]; then + err "can't determine the pf1-proxy vf for $dev" + elif [ $(wc -l <<< "$proxy") -ne 1 ]; then + err "multiple pf1-proxy devices found for $dev" + fi + echo "$proxy" +} + +function get_pf1_proxy_vf() { + vf=$(devlink port | grep auxiliary/mlx5_core.eth.2/ | grep virtual | awk '{print $5}' | uniq) + if [ -z "$vf" ]; then + err "can't determine the pf1-proxy vf" + elif [ $(wc -l <<< "$vf") -ne 1 ]; then + err "multiple pf1-proxy vfs found" + fi + echo "$vf" +} + function get_ifname() { local port=$1 devlink port | grep "physical port $port" | awk '{ print $5}' @@ -211,13 +251,6 @@ function get_ipv6() { done < <(ip -6 -o addr show lo | awk '{print $4}') } - -function get_pf_mac() { - local pci_dev=${devs[$1]} - local pf=$(get_ifname $1) - cat /sys/bus/pci/devices/$pci_dev/net/$pf/address -} - function make_config() { if [[ "$IS_X86_WITH_BLUEFIELD" == "true" ]]; then log "Skipping config file creation on AMD/Intel 64-bit host with Bluefield" @@ -233,7 +266,7 @@ function make_config() { if [[ "$OPT_MULTIPORT" == "true" ]]; then echo "a-pf0 ${devs[0]},class=rxq_cqe_comp_en=0,rx_vec_en=1,dv_flow_en=2,dv_esw_en=1,fdb_def_rule_en=1,representor=pf[0-1]vf[0-$[$actualvfs-1]]" if [[ "$OPT_PF1_PROXY" == "true" ]]; then - echo "pf1-proxy $(get_pf_mac 1)" + echo "pf1-proxy $(get_pf1_proxy ${devs[1]})" fi echo "multiport-eswitch" else @@ -244,7 +277,7 @@ function make_config() { if [[ "$OPT_MULTIPORT" == "true" ]]; then log "dpservice configured in multiport-eswitch mode" if [[ "$OPT_PF1_PROXY" == "true" ]]; then - log "dpservice will create a TAP device to proxy PF1" + log "dpservice will create a PF1-proxy" fi else log "dpservice configured in normal mode" diff --git a/include/dp_conf.h b/include/dp_conf.h index 7acb20b75..318b8e412 100644 --- a/include/dp_conf.h +++ b/include/dp_conf.h @@ -49,9 +49,6 @@ const struct dp_conf_dhcp_dns *dp_conf_get_dhcp_dns(void); const struct dp_conf_dhcp_dns *dp_conf_get_dhcpv6_dns(void); #ifdef ENABLE_PF1_PROXY -const char *dp_get_eal_pf1_proxy_mac_addr(void); -const char *dp_get_eal_pf1_proxy_dev_name(void); -const char *dp_generate_eal_pf1_proxy_params(void); bool dp_conf_is_pf1_proxy_enabled(void); #endif diff --git a/include/dp_conf_opts.h b/include/dp_conf_opts.h index 33d9f8260..c3e8b0a87 100644 --- a/include/dp_conf_opts.h +++ b/include/dp_conf_opts.h @@ -27,6 +27,9 @@ enum dp_conf_log_format { const char *dp_conf_get_pf0_name(void); const char *dp_conf_get_pf1_name(void); +#ifdef ENABLE_PF1_PROXY +const char *dp_conf_get_pf1_proxy(void); +#endif const char *dp_conf_get_vf_pattern(void); int dp_conf_get_dhcp_mtu(void); int dp_conf_get_wcmp_perc(void); diff --git a/include/dp_port.h b/include/dp_port.h index b824038b6..464d49a7b 100644 --- a/include/dp_port.h +++ b/include/dp_port.h @@ -57,6 +57,10 @@ struct dp_port_async_template { enum dp_port_async_template_type { DP_PORT_ASYNC_TEMPLATE_PF_ISOLATION, +#ifdef ENABLE_PF1_PROXY + DP_PORT_ASYNC_TEMPLATE_PF1_FROM_PROXY, + DP_PORT_ASYNC_TEMPLATE_PF1_TO_PROXY, +#endif #ifdef ENABLE_VIRTSVC DP_PORT_ASYNC_TEMPLATE_VIRTSVC_TCP_ISOLATION, DP_PORT_ASYNC_TEMPLATE_VIRTSVC_UDP_ISOLATION, @@ -67,6 +71,10 @@ enum dp_port_async_template_type { enum dp_port_async_flow_type { DP_PORT_ASYNC_FLOW_ISOLATE_IPIP, DP_PORT_ASYNC_FLOW_ISOLATE_IPV6, +#ifdef ENABLE_PF1_PROXY + DP_PORT_ASYNC_FLOW_PF1_FROM_PROXY, + DP_PORT_ASYNC_FLOW_PF1_TO_PROXY, +#endif DP_PORT_ASYNC_FLOW_COUNT, }; @@ -108,11 +116,10 @@ struct dp_ports { // hidden structures for inline functions to access extern struct dp_port *_dp_port_table[DP_MAX_PORTS]; extern struct dp_port *_dp_pf_ports[DP_MAX_PF_PORTS]; -extern struct dp_ports _dp_ports; - #ifdef ENABLE_PF1_PROXY -extern struct dp_port _dp_pf_proxy_tap_port; +extern struct dp_port _dp_pf1_proxy_port; #endif +extern struct dp_ports _dp_ports; struct dp_port *dp_get_port_by_name(const char *pci_name); @@ -123,7 +130,7 @@ void dp_ports_free(void); int dp_start_port(struct dp_port *port); #ifdef ENABLE_PF1_PROXY -int dp_start_pf_proxy_tap_port(void); +int dp_start_pf1_proxy_port(void); #endif int dp_stop_port(struct dp_port *port); @@ -158,11 +165,6 @@ struct dp_port *dp_get_out_port(struct dp_flow *df) static __rte_always_inline struct dp_port *dp_get_port_by_id(uint16_t port_id) { -#ifdef ENABLE_PF1_PROXY - if (unlikely(dp_conf_is_pf1_proxy_enabled() && port_id == _dp_pf_proxy_tap_port.port_id)) - return &_dp_pf_proxy_tap_port; -#endif - if (unlikely(port_id >= RTE_DIM(_dp_port_table))) { DPS_LOG_ERR("Port not registered in dpservice", DP_LOG_PORTID(port_id)); return NULL; @@ -201,9 +203,9 @@ struct dp_port *dp_get_port_by_pf_index(uint16_t index) #ifdef ENABLE_PF1_PROXY static __rte_always_inline -const struct dp_port *dp_get_pf_proxy_tap_port(void) +const struct dp_port *dp_get_pf1_proxy(void) { - return &_dp_pf_proxy_tap_port; + return &_dp_pf1_proxy_port; } #endif diff --git a/include/dp_virtsvc.h b/include/dp_virtsvc.h index 4a34ad9e8..73012971b 100644 --- a/include/dp_virtsvc.h +++ b/include/dp_virtsvc.h @@ -46,6 +46,7 @@ struct dp_virtsvc { rte_be16_t service_port; uint8_t proto; uint16_t last_assigned_port; + union dp_ipv6 ul_addr; struct rte_hash *open_ports; struct dp_virtsvc_conn connections[DP_VIRTSVC_PORTCOUNT]; struct rte_flow *isolation_rules[DP_MAX_PF_PORTS]; diff --git a/include/nodes/cls_node.h b/include/nodes/cls_node.h new file mode 100644 index 000000000..04ca2f57c --- /dev/null +++ b/include/nodes/cls_node.h @@ -0,0 +1,20 @@ +// SPDX-FileCopyrightText: 2023 SAP SE or an SAP affiliate company and IronCore contributors +// SPDX-License-Identifier: Apache-2.0 + +#ifndef __INCLUDE_CLS_NODE_H__ +#define __INCLUDE_CLS_NODE_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef ENABLE_PF1_PROXY +int cls_node_append_tx(uint16_t port_id, const char *tx_node_name); +#endif + +#ifdef __cplusplus +} +#endif +#endif diff --git a/include/rte_flow/dp_rte_async_flow_isolation.h b/include/rte_flow/dp_rte_async_flow_isolation.h index 080ccc1a0..2fa7a40de 100644 --- a/include/rte_flow/dp_rte_async_flow_isolation.h +++ b/include/rte_flow/dp_rte_async_flow_isolation.h @@ -22,7 +22,8 @@ int dp_create_virtsvc_async_isolation_templates(struct dp_port *port, uint8_t pr struct rte_flow *dp_create_virtsvc_async_isolation_rule(uint16_t port_id, uint8_t proto_id, const union dp_ipv6 *svc_ipv6, rte_be16_t svc_port, - struct rte_flow_template_table *template_table); + struct rte_flow_template_table *template_table, + const union dp_ipv6 *ul_addr); #endif #ifdef __cplusplus diff --git a/include/rte_flow/dp_rte_async_flow_pf1_proxy.h b/include/rte_flow/dp_rte_async_flow_pf1_proxy.h new file mode 100644 index 000000000..cf9cd1efe --- /dev/null +++ b/include/rte_flow/dp_rte_async_flow_pf1_proxy.h @@ -0,0 +1,24 @@ +// SPDX-FileCopyrightText: 2023 SAP SE or an SAP affiliate company and IronCore contributors +// SPDX-License-Identifier: Apache-2.0 + +#ifndef __INCLUDE_DP_RTE_FLOW_ASYNC_FLOW_PF1_PROXY_H__ +#define __INCLUDE_DP_RTE_FLOW_ASYNC_FLOW_PF1_PROXY_H__ + +#define DP_PF1_PROXY_RULE_COUNT 2 + +#ifdef __cplusplus +extern "C" { +#endif + +#include "dp_port.h" + +int dp_create_pf_async_from_proxy_templates(struct dp_port *port); +int dp_create_pf_async_to_proxy_templates(struct dp_port *port); + +uint16_t dp_create_pf1_proxy_async_isolation_rules(struct dp_port *port); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/rte_flow/dp_rte_flow_helpers.h b/include/rte_flow/dp_rte_flow_helpers.h index 27689886a..0953c97c0 100644 --- a/include/rte_flow/dp_rte_flow_helpers.h +++ b/include/rte_flow/dp_rte_flow_helpers.h @@ -38,6 +38,12 @@ union dp_flow_item_l4 { struct rte_flow_item_icmp6 icmp6; }; +#ifdef ENABLE_PF1_PROXY +static const struct rte_flow_item_ethdev dp_flow_item_ethdev_mask = { + .port_id = 0xffff, +}; +#endif + static const struct rte_flow_item_eth dp_flow_item_eth_mask = { .hdr.ether_type = 0xffff, }; @@ -62,6 +68,18 @@ static const struct rte_flow_item_ipv6 dp_flow_item_ipv6_dst_mask = { .hdr.dst_addr = "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff", .hdr.proto = 0xff, }; +#ifdef ENABLE_VIRTSVC +static const struct rte_flow_item_ipv6 dp_flow_item_ipv6_src_dst_mask = { + .hdr.src_addr = "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff", + .hdr.dst_addr = "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff", + .hdr.proto = 0xff, +}; +#endif +#ifdef ENABLE_PF1_PROXY +static const struct rte_flow_item_ipv6 dp_flow_item_ipv6_dst_only_mask = { + .hdr.dst_addr = "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff", +}; +#endif static const struct rte_flow_item_ipv4 dp_flow_item_ipv4_dst_mask = { .hdr.dst_addr = 0xffffffff, diff --git a/src/dp_conf.c b/src/dp_conf.c index 4e7ff0efc..2e6767d34 100644 --- a/src/dp_conf.c +++ b/src/dp_conf.c @@ -39,32 +39,9 @@ static struct dp_conf_virtual_services virtual_services = {0}; #endif #ifdef ENABLE_PF1_PROXY -#define DP_EAL_PF1_MAC_ADDR_SIZE 18 -static char eal_pf1_proxy_mac_addr_str[DP_EAL_PF1_MAC_ADDR_SIZE] = {0}; -static const char *eal_pf1_proxy_dev_name = "pf1-tap"; -static char eal_pf1_proxy_params[DP_EAL_A_MAXLEN] = {0}; -static bool dp_conf_pf1_proxy_enabled = false; - -const char *dp_get_eal_pf1_proxy_mac_addr(void) -{ - return eal_pf1_proxy_mac_addr_str; -} - -const char *dp_get_eal_pf1_proxy_dev_name(void) -{ - return eal_pf1_proxy_dev_name; -} - -const char *dp_generate_eal_pf1_proxy_params(void) -{ - snprintf(eal_pf1_proxy_params, sizeof(eal_pf1_proxy_params), "net_tap0,iface=%s,mac=%s", - dp_get_eal_pf1_proxy_dev_name(), dp_get_eal_pf1_proxy_mac_addr()); - return eal_pf1_proxy_params; -} - bool dp_conf_is_pf1_proxy_enabled(void) { - return dp_conf_pf1_proxy_enabled; + return *pf1_proxy; } #endif @@ -299,16 +276,8 @@ static int parse_line(char *line, int lineno) if (!strcmp(key, "a-pf0")) return dp_argparse_string(value, eal_a_pf0, sizeof(eal_a_pf0)); - if (!strcmp(key, "a-pf1")) // TODO: throw an error when pf0 and pf1 are present and nic is in the mpesw mode + if (!strcmp(key, "a-pf1")) return dp_argparse_string(value, eal_a_pf1, sizeof(eal_a_pf1)); -#ifdef ENABLE_PF1_PROXY - else { - if (!strcmp(key, "pf1-proxy")) { - dp_conf_pf1_proxy_enabled = true; - return dp_argparse_string(value, eal_pf1_proxy_mac_addr_str, sizeof(eal_pf1_proxy_mac_addr_str)); - } - } -#endif // Otherwise support all long options if (!longopt) { diff --git a/src/dp_conf_opts.c b/src/dp_conf_opts.c index 049f4adc3..e63a8674d 100644 --- a/src/dp_conf_opts.c +++ b/src/dp_conf_opts.c @@ -20,6 +20,9 @@ enum { _OPT_SHOPT_MAX = 255, OPT_PF0, OPT_PF1, +#ifdef ENABLE_PF1_PROXY + OPT_PF1_PROXY, +#endif OPT_IPV6, OPT_VF_PATTERN, OPT_DHCP_MTU, @@ -56,6 +59,9 @@ static const struct option dp_conf_longopts[] = { { "version", 0, 0, OPT_VERSION }, { "pf0", 1, 0, OPT_PF0 }, { "pf1", 1, 0, OPT_PF1 }, +#ifdef ENABLE_PF1_PROXY + { "pf1-proxy", 1, 0, OPT_PF1_PROXY }, +#endif { "ipv6", 1, 0, OPT_IPV6 }, { "vf-pattern", 1, 0, OPT_VF_PATTERN }, { "dhcp-mtu", 1, 0, OPT_DHCP_MTU }, @@ -105,6 +111,9 @@ static const char *log_format_choices[] = { static char pf0_name[IF_NAMESIZE]; static char pf1_name[IF_NAMESIZE]; +#ifdef ENABLE_PF1_PROXY +static char pf1_proxy[IF_NAMESIZE]; +#endif static char vf_pattern[IF_NAMESIZE]; static int dhcp_mtu = 1500; static int wcmp_perc = 100; @@ -134,6 +143,13 @@ const char *dp_conf_get_pf1_name(void) return pf1_name; } +#ifdef ENABLE_PF1_PROXY +const char *dp_conf_get_pf1_proxy(void) +{ + return pf1_proxy; +} + +#endif const char *dp_conf_get_vf_pattern(void) { return vf_pattern; @@ -201,13 +217,15 @@ int dp_conf_get_flow_timeout(void) { return flow_timeout; } -#endif +#endif bool dp_conf_is_multiport_eswitch(void) { return multiport_eswitch; } + + /* These functions need to be implemented by the user of this generated code */ static void dp_argparse_version(void); static int dp_argparse_opt_ipv6(const char *arg); @@ -228,6 +246,9 @@ static inline void dp_argparse_help(const char *progname, FILE *outfile) " -v, --version display version and exit\n" " --pf0=IFNAME first physical interface (e.g. eth0)\n" " --pf1=IFNAME second physical interface (e.g. eth1)\n" +#ifdef ENABLE_PF1_PROXY + " --pf1-proxy=IFNAME VF representor to use as a proxy for pf1 packets\n" +#endif " --ipv6=ADDR6 IPv6 underlay address\n" " --vf-pattern=PATTERN virtual interface name pattern (e.g. 'eth1vf')\n" " --dhcp-mtu=SIZE set the mtu field in DHCP responses (68 - 1500)\n" @@ -266,6 +287,10 @@ static int dp_conf_parse_arg(int opt, const char *arg) return dp_argparse_string(arg, pf0_name, ARRAY_SIZE(pf0_name)); case OPT_PF1: return dp_argparse_string(arg, pf1_name, ARRAY_SIZE(pf1_name)); +#ifdef ENABLE_PF1_PROXY + case OPT_PF1_PROXY: + return dp_argparse_string(arg, pf1_proxy, ARRAY_SIZE(pf1_proxy)); +#endif case OPT_IPV6: return dp_argparse_opt_ipv6(arg); case OPT_VF_PATTERN: diff --git a/src/dp_graph.c b/src/dp_graph.c index c6dd6c602..76c28e454 100644 --- a/src/dp_graph.c +++ b/src/dp_graph.c @@ -10,6 +10,7 @@ #include "dp_timers.h" #include "monitoring/dp_graphtrace.h" #include "nodes/arp_node.h" +#include "nodes/cls_node.h" #include "nodes/dhcp_node.h" #include "nodes/dhcpv6_node.h" #include "nodes/ipip_encap_node.h" @@ -121,16 +122,30 @@ static rte_graph_t dp_graph_create(unsigned int lcore_id) } #ifdef ENABLE_PF1_PROXY -static int dp_graph_init_proxy_tap(void) +static int dp_graph_init_pf1_proxy(void) { + char name[RTE_NODE_NAMESIZE]; + uint16_t port_id; + if (!dp_conf_is_pf1_proxy_enabled()) return DP_OK; - const struct dp_port *port = dp_get_pf_proxy_tap_port(); - uint16_t port_id = port->port_id; + // pf1-proxy is not part of dp_ports list + // so create a separate Tx and wire Tx to CLS node + port_id = dp_get_pf1_proxy()->port_id; - if (DP_FAILED(rx_node_create(port_id, 0)) - || DP_FAILED(tx_node_create(port_id))) + // Note that there is no Rx node since all pf1-proxy -> pf1 packets are offloaded + if (DP_FAILED(tx_node_create(port_id))) + return DP_ERROR; + + snprintf(name, sizeof(name), "tx-%u", port_id); + if (DP_FAILED(cls_node_append_tx(port_id, name))) + return DP_ERROR; + + // also wire the PF1 Tx for return path + port_id = dp_get_pf1()->port_id; + snprintf(name, sizeof(name), "tx-%u", port_id); + if (DP_FAILED(cls_node_append_tx(port_id, name))) return DP_ERROR; return DP_OK; @@ -189,7 +204,7 @@ int dp_graph_init(void) return DP_ERROR; #ifdef ENABLE_PF1_PROXY - if (DP_FAILED(dp_graph_init_proxy_tap())) + if (DP_FAILED(dp_graph_init_pf1_proxy())) return DP_ERROR; #endif diff --git a/src/dp_port.c b/src/dp_port.c index 7d2b7329a..aea93a7b7 100644 --- a/src/dp_port.c +++ b/src/dp_port.c @@ -17,6 +17,7 @@ #include "nodes/rx_node.h" #include "rte_flow/dp_rte_async_flow.h" #include "rte_flow/dp_rte_async_flow_isolation.h" +#include "rte_flow/dp_rte_async_flow_pf1_proxy.h" #include "rte_flow/dp_rte_async_flow_template.h" #include "rte_flow/dp_rte_flow.h" #include "rte_flow/dp_rte_flow_capture.h" @@ -25,6 +26,9 @@ #define DP_PORT_INIT_PF true #define DP_PORT_INIT_VF false +#define DP_PORT_PROXIED true +#define DP_PORT_NORMAL false + #define DP_METER_CIR_BASE_VALUE (1024 * 1024) // 1 Mbits #define DP_METER_EBS_BREAK_VALUE 100 // 100 Mbits/s, it used to differentiate different ebs calculation strategy to achieve relative stable metering results. epirical value. #define DP_METER_MBITS_TO_BYTES (1024 * 1024 / 8) @@ -53,11 +57,10 @@ static const struct rte_meter_srtcm_params dp_srtcm_params_base = { struct dp_port *_dp_port_table[DP_MAX_PORTS]; struct dp_port *_dp_pf_ports[DP_MAX_PF_PORTS]; -struct dp_ports _dp_ports; - #ifdef ENABLE_PF1_PROXY -struct dp_port _dp_pf_proxy_tap_port; +struct dp_port _dp_pf1_proxy_port; #endif +struct dp_ports _dp_ports; static int dp_port_register_pf(struct dp_port *port) { @@ -88,10 +91,20 @@ struct dp_port *dp_get_port_by_name(const char *pci_name) return _dp_port_table[port_id]; } +static void dp_set_neighmac(struct dp_port *port, const struct rte_ether_addr *mac) +{ + char strmac[18]; + + rte_ether_addr_copy(mac, &port->neigh_mac); + + snprintf(strmac, sizeof(strmac), RTE_ETHER_ADDR_PRT_FMT, RTE_ETHER_ADDR_BYTES(&port->neigh_mac)); + DPS_LOG_INFO("Setting neighboring MAC", _DP_LOG_STR("mac", strmac), DP_LOG_PORT(port)); +} + static int dp_port_init_ethdev(struct dp_port *port, struct rte_eth_dev_info *dev_info) { struct dp_dpdk_layer *dp_layer = get_dpdk_layer(); - struct rte_ether_addr pf_neigh_mac; + struct rte_ether_addr pf_neigh_mac = {0}; struct rte_eth_txconf txq_conf; struct rte_eth_rxconf rxq_conf; struct rte_eth_conf port_conf = port_conf_default; @@ -125,7 +138,7 @@ static int dp_port_init_ethdev(struct dp_port *port, struct rte_eth_dev_info *de for (uint16_t i = 0; i < DP_NR_STD_RX_QUEUES; ++i) { mempool = dp_layer->rte_mempool; #ifdef ENABLE_PF1_PROXY - if (dp_conf_is_pf1_proxy_enabled() && (port == dp_get_pf1() || port == &_dp_pf_proxy_tap_port)) + if (dp_conf_is_pf1_proxy_enabled() && (port == dp_get_pf1() || port == dp_get_pf1_proxy())) mempool = dp_layer->rte_jumbo_mempool; #endif ret = rte_eth_rx_queue_setup(port->port_id, i, 1024, @@ -172,8 +185,15 @@ static int dp_port_init_ethdev(struct dp_port *port, struct rte_eth_dev_info *de if (port->is_pf) { if (DP_FAILED(dp_get_pf_neigh_mac(dev_info->if_index, &pf_neigh_mac, &port->own_mac))) return DP_ERROR; - rte_ether_addr_copy(&pf_neigh_mac, &port->neigh_mac); + dp_set_neighmac(port, &pf_neigh_mac); } +#ifdef ENABLE_PF1_PROXY + else if (dp_conf_is_pf1_proxy_enabled() && port == dp_get_pf1_proxy()) + dp_set_neighmac(port, &dp_get_pf1()->neigh_mac); +#endif + + if (dp_conf_is_multiport_eswitch() && DP_FAILED(dp_configure_async_flows(port->port_id))) + return DP_ERROR; return DP_OK; } @@ -196,34 +216,45 @@ static int dp_port_flow_isolate(uint16_t port_id) return DP_OK; } -static struct dp_port *dp_port_init_interface(uint16_t port_id, struct rte_eth_dev_info *dev_info, bool is_pf) +static int dp_get_port_socket_id(uint16_t port_id) { - static int last_pf1_hairpin_tx_rx_queue_offset = 1; - struct dp_port *port; int socket_id; - int ret; if (port_id >= RTE_DIM(_dp_port_table)) { DPS_LOG_ERR("Invalid port id", DP_LOG_PORTID(port_id), DP_LOG_MAX(RTE_DIM(_dp_port_table))); - return NULL; - } - - if (is_pf) { - if (dp_conf_get_nic_type() != DP_CONF_NIC_TYPE_TAP) - if (DP_FAILED(dp_port_flow_isolate(port_id))) - return NULL; + return DP_ERROR; } socket_id = rte_eth_dev_socket_id(port_id); if (DP_FAILED(socket_id)) { if (socket_id == SOCKET_ID_ANY) { - DPS_LOG_WARNING("Cannot get numa socket", DP_LOG_PORTID(port_id)); + DPS_LOG_WARNING("Cannot get numa socket, using 'any'", DP_LOG_PORTID(port_id)); } else { DPS_LOG_ERR("Cannot get numa socket", DP_LOG_PORTID(port_id), DP_LOG_RET(rte_errno)); - return NULL; + return DP_ERROR; } } + return socket_id; +} + +static struct dp_port *dp_port_init_interface(uint16_t port_id, struct rte_eth_dev_info *dev_info, bool is_pf, bool is_proxied) +{ + static int last_pf1_hairpin_tx_rx_queue_offset = 1; + struct dp_port *port; + int socket_id; + int ret; + + socket_id = dp_get_port_socket_id(port_id); + if (DP_FAILED(socket_id) && socket_id != SOCKET_ID_ANY) + return NULL; + + if (is_pf && !is_proxied) { + if (dp_conf_get_nic_type() != DP_CONF_NIC_TYPE_TAP) + if (DP_FAILED(dp_port_flow_isolate(port_id))) + return NULL; + } + // oveflow check done by liming the number of calls to this function port = _dp_ports.end++; port->is_pf = is_pf; @@ -237,9 +268,6 @@ static struct dp_port *dp_port_init_interface(uint16_t port_id, struct rte_eth_d if (DP_FAILED(dp_port_init_ethdev(port, dev_info))) return NULL; - if (dp_conf_is_multiport_eswitch() && DP_FAILED(dp_configure_async_flows(port->port_id))) - return NULL; - if (is_pf) { ret = rte_eth_dev_callback_register(port_id, RTE_ETH_EVENT_INTR_LSC, dp_link_status_change_event_callback, NULL); if (DP_FAILED(ret)) { @@ -263,91 +291,24 @@ static struct dp_port *dp_port_init_interface(uint16_t port_id, struct rte_eth_d } #ifdef ENABLE_PF1_PROXY -static struct dp_port *dp_port_init_proxied_pf_interface(uint16_t port_id, struct rte_eth_dev_info *dev_info) +static struct dp_port *dp_port_init_pf1_proxy_interface(uint16_t port_id, struct rte_eth_dev_info *dev_info) { struct dp_port *port; int socket_id; - int ret; - if (port_id >= RTE_DIM(_dp_port_table)) { - DPS_LOG_ERR("Invalid port id", DP_LOG_PORTID(port_id), DP_LOG_MAX(RTE_DIM(_dp_port_table))); + socket_id = dp_get_port_socket_id(port_id); + if (DP_FAILED(socket_id) && socket_id != SOCKET_ID_ANY) return NULL; - } - socket_id = rte_eth_dev_socket_id(port_id); - if (DP_FAILED(socket_id)) { - if (socket_id == SOCKET_ID_ANY) { - DPS_LOG_WARNING("Cannot get numa socket", DP_LOG_PORTID(port_id)); - } else { - DPS_LOG_ERR("Cannot get numa socket", DP_LOG_PORTID(port_id), DP_LOG_RET(rte_errno)); - return NULL; - } - } - - // oveflow check done by liming the number of calls to this function - port = _dp_ports.end++; - port->is_pf = true; - port->port_id = port_id; - port->socket_id = socket_id; - _dp_port_table[port_id] = port; - - if (DP_FAILED(dp_port_init_ethdev(port, dev_info))) - return NULL; - - if (dp_conf_is_multiport_eswitch() && DP_FAILED(dp_configure_async_flows(port->port_id))) - return NULL; - - DPS_LOG_INFO("INIT setting proxied pf port to promiscuous mode", DP_LOG_PORT(port)); - ret = rte_eth_promiscuous_enable(port->port_id); - if (DP_FAILED(ret)) { - DPS_LOG_ERR("Promiscuous mode setting failed", DP_LOG_PORT(port), DP_LOG_RET(ret)); - return NULL; - } - - if (DP_FAILED(dp_port_register_pf(port))) - return NULL; - ret = rte_eth_dev_callback_register(port_id, RTE_ETH_EVENT_INTR_LSC, dp_link_status_change_event_callback, NULL); - if (DP_FAILED(ret)) { - DPS_LOG_ERR("Cannot register link status callback", DP_LOG_RET(ret)); - return NULL; - } - - return port; -} - -static struct dp_port *dp_port_init_proxy_tap(uint16_t port_id, struct rte_eth_dev_info *dev_info) -{ - // struct dp_port *port; - struct dp_port *port = &_dp_pf_proxy_tap_port; - int socket_id; - int ret; - - socket_id = rte_eth_dev_socket_id(port_id); - if (DP_FAILED(socket_id)) { - if (socket_id == SOCKET_ID_ANY) { - DPS_LOG_WARNING("Cannot get numa socket", DP_LOG_PORTID(port_id)); - } else { - DPS_LOG_ERR("Cannot get numa socket", DP_LOG_PORTID(port_id), DP_LOG_RET(rte_errno)); - return NULL; - } - } - - // oveflow check done by liming the number of calls to this function - // port = _dp_ports.end++; + port = &_dp_pf1_proxy_port; port->is_pf = false; port->port_id = port_id; port->socket_id = socket_id; + _dp_port_table[port_id] = port; if (DP_FAILED(dp_port_init_ethdev(port, dev_info))) return NULL; - DPS_LOG_INFO("INIT setting proxy tap to promiscuous mode", DP_LOG_PORT(port)); - ret = rte_eth_promiscuous_enable(port->port_id); - if (DP_FAILED(ret)) { - DPS_LOG_ERR("Promiscuous mode setting failed", DP_LOG_PORT(port), DP_LOG_RET(ret)); - return NULL; - } - return port; } #endif @@ -368,61 +329,67 @@ static int dp_port_set_up_hairpins(void) return DP_OK; } -static int dp_port_init_pf(const char *pf_name) +static int dp_find_port(const char *iface_name, uint16_t *out_port_id, struct rte_eth_dev_info *out_dev_info) { uint16_t port_id; - struct rte_eth_dev_info dev_info; char ifname[IF_NAMESIZE] = {0}; - struct dp_port *port; RTE_ETH_FOREACH_DEV(port_id) { - if (DP_FAILED(dp_get_dev_info(port_id, &dev_info, ifname))) + if (DP_FAILED(dp_get_dev_info(port_id, out_dev_info, ifname))) return DP_ERROR; - if (!strncmp(pf_name, ifname, sizeof(ifname))) { - DPS_LOG_INFO("INIT initializing PF port", DP_LOG_PORTID(port_id), DP_LOG_IFNAME(ifname)); -#ifdef ENABLE_PF1_PROXY - if (dp_conf_is_pf1_proxy_enabled() && strncmp(pf_name, dp_conf_get_pf1_name(), sizeof(ifname)) == 0) - port = dp_port_init_proxied_pf_interface(port_id, &dev_info); - else - port = dp_port_init_interface(port_id, &dev_info, DP_PORT_INIT_PF); -#else - port = dp_port_init_interface(port_id, &dev_info, DP_PORT_INIT_PF); -#endif - if (!port) - return DP_ERROR; - snprintf(port->port_name, sizeof(port->port_name), "%s", pf_name); + if (!strncmp(iface_name, ifname, sizeof(ifname))) { + *out_port_id = port_id; return DP_OK; } } - DPS_LOG_ERR("No such PF", DP_LOG_NAME(pf_name)); + DPS_LOG_ERR("No such interface", DP_LOG_NAME(iface_name)); return DP_ERROR; } -#ifdef ENABLE_PF1_PROXY -static int dp_port_init_tap_proxy(const char *pf_tap_proxy_name) +static int dp_port_init_pf(const char *pf_name) { - if (!dp_conf_is_pf1_proxy_enabled()) - return DP_OK; + uint16_t port_id; + struct rte_eth_dev_info dev_info; + struct dp_port *port; + bool proxied; + + if (DP_FAILED(dp_find_port(pf_name, &port_id, &dev_info))) + return DP_ERROR; + + DPS_LOG_INFO("INIT initializing PF port", DP_LOG_PORTID(port_id), DP_LOG_IFNAME(pf_name)); +#ifdef ENABLE_PF1_PROXY + proxied = dp_conf_is_pf1_proxy_enabled() && !strcmp(pf_name, dp_conf_get_pf1_name()); +#else + proxied = false; +#endif + port = dp_port_init_interface(port_id, &dev_info, DP_PORT_INIT_PF, proxied); + if (!port) + return DP_ERROR; + snprintf(port->port_name, sizeof(port->port_name), "%s", pf_name); + return DP_OK; +} + +#ifdef ENABLE_PF1_PROXY +static int dp_port_init_pf1_proxy(const char *pf1_proxy_name) +{ uint16_t port_id; struct rte_eth_dev_info dev_info; - char ifname[IF_NAMESIZE] = {0}; struct dp_port *port; - RTE_ETH_FOREACH_DEV(port_id) { - if (DP_FAILED(dp_get_dev_info(port_id, &dev_info, ifname))) - return DP_ERROR; - if (!strncmp(pf_tap_proxy_name, ifname, sizeof(ifname))) { - DPS_LOG_INFO("INIT initializing PF proxy tap port", DP_LOG_PORTID(port_id), DP_LOG_IFNAME(ifname)); - port = dp_port_init_proxy_tap(port_id, &dev_info); - if (!port) - return DP_ERROR; - snprintf(port->port_name, sizeof(port->port_name), "%s", pf_tap_proxy_name); - return DP_OK; - } - } - DPS_LOG_ERR("No such PF proxy tap port", DP_LOG_NAME(pf_tap_proxy_name)); - return DP_ERROR; + if (!dp_conf_is_pf1_proxy_enabled()) + return DP_OK; + + if (DP_FAILED(dp_find_port(pf1_proxy_name, &port_id, &dev_info))) + return DP_ERROR; + + DPS_LOG_INFO("INIT initializing PF1 proxy port", DP_LOG_PORTID(port_id), DP_LOG_IFNAME(pf1_proxy_name)); + port = dp_port_init_pf1_proxy_interface(port_id, &dev_info); + if (!port) + return DP_ERROR; + + snprintf(port->port_name, sizeof(port->port_name), "%s", pf1_proxy_name); + return DP_OK; } #endif @@ -439,7 +406,7 @@ static int dp_port_init_vfs(const char *vf_pattern, int num_of_vfs) return DP_ERROR; if (strstr(ifname, vf_pattern) && ++vf_count <= num_of_vfs) { DPS_LOG_INFO("INIT initializing VF port", DP_LOG_PORTID(port_id), DP_LOG_IFNAME(ifname)); - port = dp_port_init_interface(port_id, &dev_info, DP_PORT_INIT_VF); + port = dp_port_init_interface(port_id, &dev_info, DP_PORT_INIT_VF, DP_PORT_NORMAL); if (!port) return DP_ERROR; snprintf(port->port_name, sizeof(port->port_name), "%s", vf_pattern); @@ -472,7 +439,7 @@ int dp_ports_init(void) if (DP_FAILED(dp_port_init_pf(dp_conf_get_pf0_name())) || DP_FAILED(dp_port_init_pf(dp_conf_get_pf1_name())) #ifdef ENABLE_PF1_PROXY - || DP_FAILED(dp_port_init_tap_proxy(dp_get_eal_pf1_proxy_dev_name())) + || DP_FAILED(dp_port_init_pf1_proxy(dp_conf_get_pf1_proxy())) #endif || DP_FAILED(dp_port_init_vfs(dp_conf_get_vf_pattern(), num_of_vfs))) return DP_ERROR; @@ -508,7 +475,7 @@ static int dp_stop_eth_port(struct dp_port *port) ret = rte_eth_dev_stop(port->port_id); if (DP_FAILED(ret)) - DPS_LOG_ERR("Cannot stop ethernet port", DP_LOG_PORTID(port->port_id), DP_LOG_RET(ret)); + DPS_LOG_ERR("Cannot stop ethernet port", DP_LOG_PORT(port), DP_LOG_RET(ret)); return ret; } @@ -518,12 +485,17 @@ void dp_ports_stop(void) // in multiport-mode, PF0 needs to be stopped last struct dp_port *pf0 = dp_get_port_by_pf_index(0); +#ifdef ENABLE_PF1_PROXY + if (_dp_pf1_proxy_port.allocated) + dp_stop_eth_port(&_dp_pf1_proxy_port); +#endif + // without stopping started ports, DPDK complains DP_FOREACH_PORT(&_dp_ports, port) { if (port->allocated && port != pf0) dp_stop_eth_port(port); } - if (pf0->allocated) + if (pf0 && pf0->allocated) dp_stop_eth_port(pf0); } @@ -581,16 +553,27 @@ static int dp_port_install_async_isolated_mode(struct dp_port *port) static int dp_port_create_default_pf_async_templates(struct dp_port *port) { - DPS_LOG_INFO("Installing PF async templates", DP_LOG_PORTID(port->port_id)); + DPS_LOG_INFO("Installing PF async templates", DP_LOG_PORT(port)); if (DP_FAILED(dp_create_pf_async_isolation_templates(port))) { - DPS_LOG_ERR("Failed to create pf async isolation templates", DP_LOG_PORTID(port->port_id)); + DPS_LOG_ERR("Failed to create pf async isolation templates", DP_LOG_PORT(port)); return DP_ERROR; } +#ifdef ENABLE_PF1_PROXY + // Even though this is PF1 linking to VF on PF1, the rules need to be created in PF0 (multiport-eswitch mode) + if (dp_conf_is_pf1_proxy_enabled() && port == dp_get_pf0()) { + if (DP_FAILED(dp_create_pf_async_from_proxy_templates(port)) + || DP_FAILED(dp_create_pf_async_to_proxy_templates(port)) + ) { + DPS_LOG_ERR("Failed to create pf async proxy templates", DP_LOG_PORT(port)); + return DP_ERROR; + } + } +#endif #ifdef ENABLE_VIRTSVC if (DP_FAILED(dp_create_virtsvc_async_isolation_templates(port, IPPROTO_TCP)) || DP_FAILED(dp_create_virtsvc_async_isolation_templates(port, IPPROTO_UDP)) ) { - DPS_LOG_ERR("Failed to create virtsvc async isolation templates", DP_LOG_PORTID(port->port_id)); + DPS_LOG_ERR("Failed to create virtsvc async isolation templates", DP_LOG_PORT(port)); return DP_ERROR; } #endif @@ -605,9 +588,13 @@ static int dp_init_port(struct dp_port *port) if (port->is_pf) { if (dp_conf_is_multiport_eswitch()) { - if (DP_FAILED(dp_port_create_default_pf_async_templates(port)) - || DP_FAILED(dp_port_install_async_isolated_mode(port))) - return DP_ERROR; + // no isolation on proxied PF +#ifdef ENABLE_PF1_PROXY + if (port == dp_get_pf0() || !dp_conf_is_pf1_proxy_enabled()) +#endif + if (DP_FAILED(dp_port_create_default_pf_async_templates(port)) + || DP_FAILED(dp_port_install_async_isolated_mode(port))) + return DP_ERROR; } else if (DP_FAILED(dp_port_install_sync_isolated_mode(port->port_id))) return DP_ERROR; @@ -647,9 +634,18 @@ int dp_start_port(struct dp_port *port) } #ifdef ENABLE_PF1_PROXY -int dp_start_pf_proxy_tap_port(void) +int dp_start_pf1_proxy_port(void) { - return dp_start_port(&_dp_pf_proxy_tap_port); + int ret; + + ret = rte_eth_dev_start(_dp_pf1_proxy_port.port_id); + if (DP_FAILED(ret)) { + DPS_LOG_ERR("Cannot start ethernet port", DP_LOG_PORT(&_dp_pf1_proxy_port), DP_LOG_RET(ret)); + return ret; + } + + _dp_pf1_proxy_port.allocated = true; + return DP_OK; } #endif diff --git a/src/dp_service.c b/src/dp_service.c index 05caf2829..8701f0977 100644 --- a/src/dp_service.c +++ b/src/dp_service.c @@ -60,23 +60,13 @@ static int dp_args_add_mellanox(int *orig_argc, char ***orig_argv) // add mellanox args (remember that they can be written to, so strdup()) dp_mlx_args[0] = dp_argv[curarg++] = strdup("-a"); dp_mlx_args[1] = dp_argv[curarg++] = strdup(dp_conf_get_eal_a_pf0()); - if (dp_conf_get_eal_a_pf1()[0] == '\0') { -#ifdef ENABLE_PF1_PROXY - if (dp_conf_is_pf1_proxy_enabled()) { - dp_mlx_args[2] = dp_argv[curarg++] = strdup("--vdev"); - dp_mlx_args[3] = dp_argv[curarg++] = strdup(dp_generate_eal_pf1_proxy_params()); - } else -#endif - { - dp_mlx_args[2] = dp_argv[curarg++] = strdup(""); - dp_mlx_args[3] = dp_argv[curarg++] = strdup(""); - } + dp_mlx_args[2] = dp_argv[curarg++] = strdup(""); + dp_mlx_args[3] = dp_argv[curarg++] = strdup(""); } else { dp_mlx_args[2] = dp_argv[curarg++] = strdup("-a"); dp_mlx_args[3] = dp_argv[curarg++] = strdup(dp_conf_get_eal_a_pf1()); } - if (!dp_mlx_args[0] || !dp_mlx_args[1] || !dp_mlx_args[2] || !dp_mlx_args[3]) { DP_EARLY_ERR("Cannot allocate Mellanox arguments"); return DP_ERROR; @@ -177,7 +167,8 @@ static int init_interfaces(void) return DP_ERROR; #ifdef ENABLE_PF1_PROXY - if (DP_FAILED(dp_start_pf_proxy_tap_port())) + if (dp_conf_is_pf1_proxy_enabled() + && DP_FAILED(dp_start_pf1_proxy_port())) return DP_ERROR; #endif diff --git a/src/dp_virtsvc.c b/src/dp_virtsvc.c index 84d2d2cba..46132a148 100644 --- a/src/dp_virtsvc.c +++ b/src/dp_virtsvc.c @@ -169,6 +169,7 @@ int dp_virtsvc_init(int socket_id) dp_virtservices_end->virtual_port = rule->virtual_port; dp_virtservices_end->service_port = rule->service_port; dp_copy_ipv6(&dp_virtservices_end->service_addr, &rule->service_addr); + dp_generate_ul_ipv6(&dp_virtservices_end->ul_addr); // last_assigned_port is 0 due to zmalloc() snprintf(hashtable_name, sizeof(hashtable_name), "virtsvc_table_%u", i); dp_virtservices_end->open_ports = dp_create_jhash_table(DP_VIRTSVC_PORTCOUNT, @@ -264,7 +265,8 @@ uint16_t dp_create_virtsvc_async_isolation_rules(uint16_t port_id, service->service_port, service->proto == IPPROTO_TCP ? tcp_template_table - : udp_template_table); + : udp_template_table, + &service->ul_addr); if (!flow) { DPS_LOG_ERR("Cannot create async virtsvc isolation rule", DP_LOG_VIRTSVC(service)); break; diff --git a/src/dpdk_layer.c b/src/dpdk_layer.c index 109cd04c7..be4b87283 100644 --- a/src/dpdk_layer.c +++ b/src/dpdk_layer.c @@ -46,9 +46,9 @@ static int dp_dpdk_layer_init_unsafe(void) #ifdef ENABLE_PF1_PROXY if (dp_conf_is_pf1_proxy_enabled()) { dp_layer.rte_jumbo_mempool = rte_pktmbuf_pool_create("jumbo_mbuf_pool", DP_JUMBO_MBUF_POOL_SIZE, - DP_MEMPOOL_CACHE_SIZE, DP_MBUF_PRIV_DATA_SIZE, - DP_JUMBO_MBUF_BUF_SIZE, - rte_socket_id()); + DP_MEMPOOL_CACHE_SIZE, DP_MBUF_PRIV_DATA_SIZE, + DP_JUMBO_MBUF_BUF_SIZE, + rte_socket_id()); if (!dp_layer.rte_jumbo_mempool) { DPS_LOG_ERR("Cannot create jumbo mbuf pool", DP_LOG_RET(rte_errno)); return DP_ERROR; diff --git a/src/meson.build b/src/meson.build index 298ceef9b..80d523ae4 100644 --- a/src/meson.build +++ b/src/meson.build @@ -75,7 +75,7 @@ if get_option('enable_virtual_services') endif if get_option('enable_pf1_proxy') dp_sources += [ - 'nodes/pf1_proxy_node.c', + 'rte_flow/dp_rte_async_flow_pf1_proxy.c', ] endif diff --git a/src/nodes/cls_node.c b/src/nodes/cls_node.c index a6a369d18..4fdab3221 100644 --- a/src/nodes/cls_node.c +++ b/src/nodes/cls_node.c @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: 2023 SAP SE or an SAP affiliate company and IronCore contributors // SPDX-License-Identifier: Apache-2.0 +#include "nodes/cls_node.h" #include #include #include @@ -23,31 +24,43 @@ # define VIRTSVC_NEXT(NEXT) #endif -#ifdef ENABLE_PF1_PROXY -#define PF1_PROXY_NEXT(NEXT) NEXT(CLS_NEXT_PF1_PROXY, "pf1_proxy") -#else -#define PF1_PROXY_NEXT(NEXT) -#endif - #define NEXT_NODES(NEXT) \ NEXT(CLS_NEXT_ARP, "arp") \ NEXT(CLS_NEXT_IPV6_ND, "ipv6_nd") \ NEXT(CLS_NEXT_CONNTRACK, "conntrack") \ NEXT(CLS_NEXT_IPIP_DECAP, "ipip_decap") \ - PF1_PROXY_NEXT(NEXT) \ VIRTSVC_NEXT(NEXT) -#ifdef ENABLE_VIRTSVC DP_NODE_REGISTER(CLS, cls, NEXT_NODES); + +#ifdef ENABLE_PF1_PROXY +static bool pf1_proxy_enabled = false; +static uint16_t pf1_port_id; +static uint16_t pf1_proxy_port_id; +#endif + static int cls_node_init(__rte_unused const struct rte_graph *graph, __rte_unused struct rte_node *node) { +#ifdef ENABLE_PF1_PROXY + pf1_proxy_enabled = dp_conf_is_pf1_proxy_enabled(); + pf1_port_id = dp_get_pf1()->port_id; + pf1_proxy_port_id = dp_get_pf1_proxy()->port_id; +#endif +#ifdef ENABLE_VIRTSVC virtsvc_present = dp_virtsvc_get_count() > 0; virtsvc_ipv4_tree = dp_virtsvc_get_ipv4_tree(); virtsvc_ipv6_tree = dp_virtsvc_get_ipv6_tree(); +#endif return DP_OK; } -#else -DP_NODE_REGISTER_NOINIT(CLS, cls, NEXT_NODES); + +#ifdef ENABLE_PF1_PROXY +static uint16_t next_tx_index[DP_MAX_PORTS]; + +int cls_node_append_tx(uint16_t port_id, const char *tx_node_name) +{ + return dp_node_append_tx(DP_NODE_GET_SELF(cls), next_tx_index, port_id, tx_node_name); +} #endif static __rte_always_inline int is_arp(const struct rte_ether_hdr *ether_hdr) @@ -126,47 +139,6 @@ static __rte_always_inline struct dp_virtsvc *get_incoming_virtsvc(const struct } #endif -#ifdef ENABLE_PF1_PROXY -static __rte_always_inline bool pf1_tap_proxy_forward(struct rte_mbuf *m) -{ - if (!dp_conf_is_pf1_proxy_enabled()) - return false; - - const struct rte_ether_hdr *ether_hdr; - const struct rte_ipv6_hdr *ipv6_hdr; - uint32_t l3_type; - - if (m->port == dp_get_pf_proxy_tap_port()->port_id) - return true; - - // this duplicates code from the main classifier, to pass underlay/virtsvc packets - // TODO needs reworking if proxy is kept as a long-term solution - if (m->port == dp_get_pf1()->port_id) { - if (unlikely((m->packet_type & RTE_PTYPE_L2_MASK) != RTE_PTYPE_L2_ETHER)) - return true; - - ether_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); - l3_type = m->packet_type & RTE_PTYPE_L3_MASK; - - if (RTE_ETH_IS_IPV6_HDR(l3_type)) { - ipv6_hdr = (const struct rte_ipv6_hdr *)(ether_hdr + 1); - if (ipv6_hdr->proto == IPPROTO_IPIP || ipv6_hdr->proto == IPPROTO_IPV6) - return false; -#ifdef ENABLE_VIRTSVC - if (virtsvc_present) { - if (get_incoming_virtsvc(ipv6_hdr)) - return false; - } -#endif - } - - return true; - } - - return false; -} -#endif - static __rte_always_inline rte_edge_t get_next_index(__rte_unused struct rte_node *node, struct rte_mbuf *m) { const struct rte_ether_hdr *ether_hdr; @@ -178,11 +150,7 @@ static __rte_always_inline rte_edge_t get_next_index(__rte_unused struct rte_nod struct dp_virtsvc *virtsvc; #endif -#ifdef ENABLE_PF1_PROXY - // TODO this is not the best way as this function duplicates work, needs reworking if pf1-proxy is kept in the future - if (unlikely(pf1_tap_proxy_forward(m))) - return CLS_NEXT_PF1_PROXY; -#endif + // this is where pf1-proxy -> pf1 should happen, but that is done via rte_flow rule if (unlikely((m->packet_type & RTE_PTYPE_L2_MASK) != RTE_PTYPE_L2_ETHER)) return CLS_NEXT_DROP; @@ -244,8 +212,7 @@ static __rte_always_inline rte_edge_t get_next_index(__rte_unused struct rte_nod df->l3_type = RTE_ETHER_TYPE_IPV6; break; default: - df->l3_type = ntohs(ether_hdr->ether_type); - return CLS_NEXT_CONNTRACK; + return CLS_NEXT_DROP; } df->tun_info.l3_type = ntohs(ether_hdr->ether_type); dp_extract_underlay_header(df, ipv6_hdr); @@ -261,6 +228,19 @@ static __rte_always_inline rte_edge_t get_next_index(__rte_unused struct rte_nod return CLS_NEXT_DROP; } +#ifdef ENABLE_PF1_PROXY +static __rte_always_inline rte_edge_t get_next_index_proxy(__rte_unused struct rte_node *node, struct rte_mbuf *m) +{ + rte_edge_t next = get_next_index(node, m); + + if (next == CLS_NEXT_DROP && pf1_proxy_enabled && m->port == pf1_port_id) + return next_tx_index[pf1_proxy_port_id]; + + return next; +} +#define get_next_index get_next_index_proxy +#endif + static uint16_t cls_node_process(struct rte_graph *graph, struct rte_node *node, void **objs, diff --git a/src/nodes/pf1_proxy_node.c b/src/nodes/pf1_proxy_node.c deleted file mode 100644 index 72f742ece..000000000 --- a/src/nodes/pf1_proxy_node.c +++ /dev/null @@ -1,63 +0,0 @@ -// SPDX-FileCopyrightText: 2023 SAP SE or an SAP affiliate company and IronCore contributors -// SPDX-License-Identifier: Apache-2.0 - -#include -#include -#include "nodes/common_node.h" - -DP_NODE_REGISTER(PF1_PROXY, pf1_proxy, DP_NODE_DEFAULT_NEXT_ONLY); - -static uint16_t pf1_port_id; -static uint16_t pf1_tap_port_id; - -static int pf1_proxy_node_init(__rte_unused const struct rte_graph *graph, __rte_unused struct rte_node *node) -{ - pf1_port_id = dp_get_pf1()->port_id; - pf1_tap_port_id = dp_get_pf_proxy_tap_port()->port_id; - return DP_OK; -} - -static __rte_always_inline int pf1_proxy_packet(struct rte_node *node, - struct rte_mbuf *pkt) -{ - uint16_t port_id; - uint16_t sent_count; - - if (pkt->port == pf1_tap_port_id) { - port_id = pf1_port_id; - } else if (pkt->port == pf1_port_id) { - port_id = pf1_tap_port_id; - } else { - DPNODE_LOG_WARNING(node, "Unexpected packet in PF1 Proxy node", DP_LOG_PORTID(pkt->port)); - return DP_ERROR; - } - - sent_count = rte_eth_tx_burst(port_id, 0, &pkt, 1); - if (sent_count != 1) { - DPNODE_LOG_WARNING(node, "Unable to send packet through PF1 Proxy node", DP_LOG_PORTID(pkt->port)); - return DP_ERROR; - } - - dp_graphtrace_tx_burst(node, (void **)&pkt, 1, port_id); - return DP_OK; -} - -static uint16_t pf1_proxy_node_process(struct rte_graph *graph, - struct rte_node *node, - void **objs, - uint16_t nb_objs) -{ - dp_graphtrace_node_burst(node, objs, nb_objs); - - // since this node is emitting packets, dp_forward_* wrapper functions cannot be used - // this code should closely resemble the one inside those functions - - for (uint16_t i = 0; i < nb_objs; ++i) { - if (DP_FAILED(pf1_proxy_packet(node, objs[i]))) { - dp_graphtrace_next_burst(node, &objs[i], 1, PF1_PROXY_NEXT_DROP); - rte_node_enqueue(graph, node, PF1_PROXY_NEXT_DROP, &objs[i], 1); - } - } - - return nb_objs; -} diff --git a/src/nodes/rx_node.c b/src/nodes/rx_node.c index 7d71e6168..45f2396c9 100644 --- a/src/nodes/rx_node.c +++ b/src/nodes/rx_node.c @@ -19,7 +19,7 @@ DP_NODE_REGISTER_SOURCE(RX, rx, NEXT_NODES); // there are multiple Tx nodes, one per port, node context is needed struct rx_node_ctx { - struct dp_port *port; + const struct dp_port *port; uint16_t queue_id; }; static_assert(sizeof(struct rx_node_ctx) <= RTE_NODE_CTX_SZ, @@ -53,7 +53,7 @@ static int rx_node_init(const struct rte_graph *graph, struct rte_node *node) { struct rx_node_ctx *ctx = (struct rx_node_ctx *)node->ctx; uint16_t port_id; - struct dp_port *port; + const struct dp_port *port; // Find this node's dedicated port to be used in processing for (port_id = 0; port_id < RTE_DIM(rx_node_ids); ++port_id) diff --git a/src/nodes/virtsvc_node.c b/src/nodes/virtsvc_node.c index 8f2459d1b..7cf468676 100644 --- a/src/nodes/virtsvc_node.c +++ b/src/nodes/virtsvc_node.c @@ -16,7 +16,7 @@ #include "nodes/common_node.h" #include "rte_flow/dp_rte_flow.h" -DP_NODE_REGISTER(VIRTSVC, virtsvc, DP_NODE_DEFAULT_NEXT_ONLY); +DP_NODE_REGISTER_NOINIT(VIRTSVC, virtsvc, DP_NODE_DEFAULT_NEXT_ONLY); static uint16_t next_tx_index[DP_MAX_PORTS]; @@ -25,15 +25,6 @@ int virtsvc_node_append_tx(uint16_t port_id, const char *tx_node_name) return dp_node_append_tx(DP_NODE_GET_SELF(virtsvc), next_tx_index, port_id, tx_node_name); } -// runtime constant, precompute -static const union dp_ipv6 *service_ul_ip; - -static int virtsvc_node_init(__rte_unused const struct rte_graph *graph, __rte_unused struct rte_node *node) -{ - service_ul_ip = dp_conf_get_underlay_ip(); - return DP_OK; -} - static __rte_always_inline void virtsvc_tcp_state_change(struct dp_virtsvc_conn *conn, uint8_t tcp_flags) { if (DP_TCP_PKT_FLAG_RST(tcp_flags)) { @@ -102,7 +93,7 @@ static __rte_always_inline uint16_t virtsvc_request_next(struct rte_node *node, ipv6_hdr->payload_len = htons((uint16_t)(hdr_total_len - sizeof(struct rte_ipv4_hdr))); ipv6_hdr->proto = proto; ipv6_hdr->hop_limits = ttl; - dp_set_src_ipv6(ipv6_hdr, service_ul_ip); + dp_set_src_ipv6(ipv6_hdr, &virtsvc->ul_addr); dp_set_dst_ipv6(ipv6_hdr, &virtsvc->service_addr); m->ol_flags |= RTE_MBUF_F_TX_IPV6; m->tx_offload = 0; diff --git a/src/rte_flow/dp_rte_async_flow_isolation.c b/src/rte_flow/dp_rte_async_flow_isolation.c index c20a6217d..ad118c5f5 100644 --- a/src/rte_flow/dp_rte_async_flow_isolation.c +++ b/src/rte_flow/dp_rte_async_flow_isolation.c @@ -8,6 +8,9 @@ #include "dp_virtsvc.h" #endif #include "rte_flow/dp_rte_async_flow.h" +#ifdef ENABLE_PF1_PROXY +#include "rte_flow/dp_rte_async_flow_pf1_proxy.h" +#endif #include "rte_flow/dp_rte_async_flow_template.h" #include "rte_flow/dp_rte_flow_helpers.h" @@ -23,15 +26,15 @@ enum dp_isolation_actions_type { DP_ISOLATION_ACTIONS_COUNT, }; -static const struct rte_flow_pattern_template_attr default_pattern_template_attr = { +static const struct rte_flow_pattern_template_attr ingress_pattern_template_attr = { .ingress = 1 }; -static const struct rte_flow_actions_template_attr default_actions_template_attr = { +static const struct rte_flow_actions_template_attr ingress_actions_template_attr = { .ingress = 1 }; -static const struct rte_flow_template_table_attr pf_default_template_table_attr = { +static const struct rte_flow_template_table_attr pf_ingress_template_table_attr = { .flow_attr = { .group = 0, .ingress = 1, @@ -39,6 +42,7 @@ static const struct rte_flow_template_table_attr pf_default_template_table_attr .nb_flows = DP_ISOLATION_DEFAULT_TABLE_MAX_RULES, }; + int dp_create_pf_async_isolation_templates(struct dp_port *port) { struct dp_port_async_template *tmpl; @@ -58,20 +62,19 @@ int dp_create_pf_async_isolation_templates(struct dp_port *port) { .type = RTE_FLOW_ITEM_TYPE_IPV6, .mask = &dp_flow_item_ipv6_mask, }, - { .type = RTE_FLOW_ITEM_TYPE_END, - }, + { .type = RTE_FLOW_ITEM_TYPE_END }, }; tmpl->pattern_templates[DP_ISOLATION_PATTERN_IPV6_PROTO] - = dp_create_async_pattern_template(port->port_id, &default_pattern_template_attr, pattern); + = dp_create_async_pattern_template(port->port_id, &ingress_pattern_template_attr, pattern); static const struct rte_flow_action actions[] = { { .type = RTE_FLOW_ACTION_TYPE_QUEUE, }, { .type = RTE_FLOW_ACTION_TYPE_END, }, }; tmpl->actions_templates[DP_ISOLATION_ACTIONS_QUEUE] - = dp_create_async_actions_template(port->port_id, &default_actions_template_attr, actions, actions); + = dp_create_async_actions_template(port->port_id, &ingress_actions_template_attr, actions, actions); - tmpl->table_attr = &pf_default_template_table_attr; + tmpl->table_attr = &pf_ingress_template_table_attr; return dp_init_async_template(port->port_id, tmpl); } @@ -79,56 +82,55 @@ int dp_create_pf_async_isolation_templates(struct dp_port *port) #ifdef ENABLE_VIRTSVC int dp_create_virtsvc_async_isolation_templates(struct dp_port *port, uint8_t proto_id) { - struct dp_port_async_template *template; + struct dp_port_async_template *tmpl; - template = dp_alloc_async_template(DP_ISOLATION_PATTERN_COUNT, DP_ISOLATION_ACTIONS_COUNT); - if (!template) + tmpl = dp_alloc_async_template(DP_ISOLATION_PATTERN_COUNT, DP_ISOLATION_ACTIONS_COUNT); + if (!tmpl) return DP_ERROR; if (proto_id == IPPROTO_TCP) - port->default_async_rules.default_templates[DP_PORT_ASYNC_TEMPLATE_VIRTSVC_TCP_ISOLATION] = template; + port->default_async_rules.default_templates[DP_PORT_ASYNC_TEMPLATE_VIRTSVC_TCP_ISOLATION] = tmpl; else - port->default_async_rules.default_templates[DP_PORT_ASYNC_TEMPLATE_VIRTSVC_UDP_ISOLATION] = template; + port->default_async_rules.default_templates[DP_PORT_ASYNC_TEMPLATE_VIRTSVC_UDP_ISOLATION] = tmpl; const struct rte_flow_item tcp_src_pattern[] = { { .type = RTE_FLOW_ITEM_TYPE_ETH, .mask = &dp_flow_item_eth_mask, }, { .type = RTE_FLOW_ITEM_TYPE_IPV6, - .mask = &dp_flow_item_ipv6_src_mask, + .mask = &dp_flow_item_ipv6_src_dst_mask, }, { .type = proto_id == IPPROTO_TCP ? RTE_FLOW_ITEM_TYPE_TCP : RTE_FLOW_ITEM_TYPE_UDP, .mask = proto_id == IPPROTO_TCP ? (const void *)&dp_flow_item_tcp_src_mask : (const void *)&dp_flow_item_udp_src_mask, }, - { .type = RTE_FLOW_ITEM_TYPE_END, - }, + { .type = RTE_FLOW_ITEM_TYPE_END }, }; - template->pattern_templates[DP_ISOLATION_PATTERN_IPV6_PROTO] - = dp_create_async_pattern_template(port->port_id, &default_pattern_template_attr, tcp_src_pattern); + tmpl->pattern_templates[DP_ISOLATION_PATTERN_IPV6_PROTO] + = dp_create_async_pattern_template(port->port_id, &ingress_pattern_template_attr, tcp_src_pattern); static const struct rte_flow_action actions[] = { { .type = RTE_FLOW_ACTION_TYPE_QUEUE, }, { .type = RTE_FLOW_ACTION_TYPE_END, }, }; - template->actions_templates[DP_ISOLATION_ACTIONS_QUEUE] - = dp_create_async_actions_template(port->port_id, &default_actions_template_attr, actions, actions); + tmpl->actions_templates[DP_ISOLATION_ACTIONS_QUEUE] + = dp_create_async_actions_template(port->port_id, &ingress_actions_template_attr, actions, actions); - template->table_attr = &pf_default_template_table_attr; + tmpl->table_attr = &pf_ingress_template_table_attr; - return dp_init_async_template(port->port_id, template); + return dp_init_async_template(port->port_id, tmpl); } #endif static struct rte_flow *dp_create_pf_async_isolation_rule(uint16_t port_id, uint8_t proto, struct rte_flow_template_table *template_table) { - struct rte_flow_item_eth eth_spec = { + const struct rte_flow_item_eth eth_spec = { .hdr.ether_type = htons(RTE_ETHER_TYPE_IPV6), }; - struct rte_flow_item_ipv6 ipv6_spec = { + const struct rte_flow_item_ipv6 ipv6_spec = { .hdr.proto = proto, }; - struct rte_flow_item pattern[] = { + const struct rte_flow_item pattern[] = { { .type = RTE_FLOW_ITEM_TYPE_ETH, .spec = ð_spec, }, @@ -156,14 +158,16 @@ static struct rte_flow *dp_create_pf_async_isolation_rule(uint16_t port_id, uint #ifdef ENABLE_VIRTSVC struct rte_flow *dp_create_virtsvc_async_isolation_rule(uint16_t port_id, uint8_t proto_id, const union dp_ipv6 *svc_ipv6, rte_be16_t svc_port, - struct rte_flow_template_table *template_table) + struct rte_flow_template_table *template_table, + const union dp_ipv6 *ul_addr) { const struct rte_flow_item_eth eth_spec = { .hdr.ether_type = htons(RTE_ETHER_TYPE_IPV6), }; - struct rte_flow_item_ipv6 ipv6_spec = { + const struct rte_flow_item_ipv6 ipv6_spec = { .hdr.proto = proto_id, .hdr.src_addr = DP_INIT_FROM_IPV6(svc_ipv6), + .hdr.dst_addr = DP_INIT_FROM_IPV6(ul_addr), }; const struct rte_flow_item_tcp tcp_spec = { .hdr.src_port = svc_port, @@ -187,7 +191,7 @@ struct rte_flow *dp_create_virtsvc_async_isolation_rule(uint16_t port_id, uint8_ static const struct rte_flow_action_queue queue_action = { .index = 0, }; - struct rte_flow_action actions[] = { + const struct rte_flow_action actions[] = { { .type = RTE_FLOW_ACTION_TYPE_QUEUE, .conf = &queue_action, }, @@ -210,7 +214,7 @@ int dp_create_pf_async_isolation_rules(struct dp_port *port) flow = dp_create_pf_async_isolation_rule(port->port_id, IPPROTO_IPIP, templates[DP_PORT_ASYNC_TEMPLATE_PF_ISOLATION]->template_table); if (!flow) { - DPS_LOG_ERR("Failed to install PF async IPIP isolation rule", DP_LOG_PORTID(port->port_id)); + DPS_LOG_ERR("Failed to install PF async IPIP isolation rule", DP_LOG_PORT(port)); return DP_ERROR; } else { port->default_async_rules.default_flows[DP_PORT_ASYNC_FLOW_ISOLATE_IPIP] = flow; @@ -220,31 +224,37 @@ int dp_create_pf_async_isolation_rules(struct dp_port *port) flow = dp_create_pf_async_isolation_rule(port->port_id, IPPROTO_IPV6, templates[DP_PORT_ASYNC_TEMPLATE_PF_ISOLATION]->template_table); if (!flow) { - DPS_LOG_ERR("Failed to install PF async IPV6 isolation rule", DP_LOG_PORTID(port->port_id)); - // cannot return, need to commit all previous rules + DPS_LOG_ERR("Failed to install PF async IPV6 isolation rule", DP_LOG_PORT(port)); + // cannot return, need to push all previous rules and then return error } else { port->default_async_rules.default_flows[DP_PORT_ASYNC_FLOW_ISOLATE_IPV6] = flow; rule_count++; } +#ifdef ENABLE_PF1_PROXY + if (dp_conf_is_pf1_proxy_enabled() && port == dp_get_pf0()) { + rules_required += DP_PF1_PROXY_RULE_COUNT; + rule_count += dp_create_pf1_proxy_async_isolation_rules(port); + // cannot return, need to push all previous rules and then return error + } +#endif + #ifdef ENABLE_VIRTSVC + rules_required += dp_virtsvc_get_count(); rule_count += dp_create_virtsvc_async_isolation_rules(port->port_id, templates[DP_PORT_ASYNC_TEMPLATE_VIRTSVC_TCP_ISOLATION]->template_table, templates[DP_PORT_ASYNC_TEMPLATE_VIRTSVC_UDP_ISOLATION]->template_table); - // cannot end on error, need to commit partial success + // cannot return, need to push all previous rules and then return error #endif if (dp_blocking_commit_async_rules(port->port_id, rule_count)) { - DPS_LOG_ERR("Failed to commit PF async isolation rules", DP_LOG_PORTID(port->port_id)); + DPS_LOG_ERR("Failed to commit PF async isolation rules", DP_LOG_PORT(port)); return DP_ERROR; } - -#ifdef ENABLE_VIRTSVC - rules_required += dp_virtsvc_get_count(); -#endif + // only now we can fail due to previous errors if (rule_count != rules_required) { - DPS_LOG_ERR("Not all PF async isolation rules were installed", DP_LOG_VALUE(rule_count), DP_LOG_MAX(rules_required), DP_LOG_PORTID(port->port_id)); + DPS_LOG_ERR("Not all PF async isolation rules were installed", DP_LOG_VALUE(rule_count), DP_LOG_MAX(rules_required), DP_LOG_PORT(port)); return DP_ERROR; } diff --git a/src/rte_flow/dp_rte_async_flow_pf1_proxy.c b/src/rte_flow/dp_rte_async_flow_pf1_proxy.c new file mode 100644 index 000000000..0d70a7650 --- /dev/null +++ b/src/rte_flow/dp_rte_async_flow_pf1_proxy.c @@ -0,0 +1,207 @@ +// SPDX-FileCopyrightText: 2023 SAP SE or an SAP affiliate company and IronCore contributors +// SPDX-License-Identifier: Apache-2.0 + +#include "rte_flow/dp_rte_async_flow_pf1_proxy.h" +#include "rte_flow/dp_rte_async_flow.h" +#include "rte_flow/dp_rte_async_flow_template.h" +#include "rte_flow/dp_rte_flow_helpers.h" + +// having MAX equal to actual number of rules impacts throughput (for unknown reason) +#define DP_PF1_DEFAULT_TABLE_MAX_RULES (DP_PF1_PROXY_RULE_COUNT+1) + +enum dp_pf1_proxy_pattern_type { + DP_PF1_PROXY_PATTERN_REPR_PORT, + DP_PF1_PROXY_PATTERN_COUNT, +}; + +enum dp_pf1_proxy_actions_type { + DP_PF1_PROXY_ACTIONS_REPR_PORT, + DP_PF1_PROXY_ACTIONS_COUNT, +}; + +static const struct rte_flow_pattern_template_attr transfer_pattern_template_attr = { + .transfer = 1 +}; + +static const struct rte_flow_actions_template_attr transfer_actions_template_attr = { + .transfer = 1 +}; + +static const struct rte_flow_template_table_attr pf_transfer_template_table_attr = { + .flow_attr = { + .group = 0, + .transfer = 1, + }, + .nb_flows = DP_PF1_DEFAULT_TABLE_MAX_RULES, +}; + + +int dp_create_pf_async_from_proxy_templates(struct dp_port *port) +{ + struct dp_port_async_template *tmpl; + + tmpl = dp_alloc_async_template(DP_PF1_PROXY_PATTERN_COUNT, DP_PF1_PROXY_ACTIONS_COUNT); + if (!tmpl) + return DP_ERROR; + + port->default_async_rules.default_templates[DP_PORT_ASYNC_TEMPLATE_PF1_FROM_PROXY] = tmpl; + + static const struct rte_flow_item pattern[] = { + { .type = RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT, + .mask = &dp_flow_item_ethdev_mask, + }, + { .type = RTE_FLOW_ITEM_TYPE_END }, + }; + tmpl->pattern_templates[DP_PF1_PROXY_PATTERN_REPR_PORT] + = dp_create_async_pattern_template(port->port_id, &transfer_pattern_template_attr, pattern); + + static const struct rte_flow_action actions[] = { + { .type = RTE_FLOW_ACTION_TYPE_REPRESENTED_PORT, }, + { .type = RTE_FLOW_ACTION_TYPE_END, }, + }; + tmpl->actions_templates[DP_PF1_PROXY_ACTIONS_REPR_PORT] + = dp_create_async_actions_template(port->port_id, &transfer_actions_template_attr, actions, actions); + + tmpl->table_attr = &pf_transfer_template_table_attr; + + return dp_init_async_template(port->port_id, tmpl); +} + +int dp_create_pf_async_to_proxy_templates(struct dp_port *port) +{ + struct dp_port_async_template *tmpl; + + tmpl = dp_alloc_async_template(DP_PF1_PROXY_PATTERN_COUNT, DP_PF1_PROXY_ACTIONS_COUNT); + if (!tmpl) + return DP_ERROR; + + port->default_async_rules.default_templates[DP_PORT_ASYNC_TEMPLATE_PF1_TO_PROXY] = tmpl; + + static const struct rte_flow_item pattern[] = { + { .type = RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT, + .mask = &dp_flow_item_ethdev_mask, + }, + { .type = RTE_FLOW_ITEM_TYPE_ETH, + .mask = &dp_flow_item_eth_mask, + }, + { .type = RTE_FLOW_ITEM_TYPE_IPV6, + .mask = &dp_flow_item_ipv6_dst_only_mask, + }, + { .type = RTE_FLOW_ITEM_TYPE_END }, + }; + tmpl->pattern_templates[DP_PF1_PROXY_PATTERN_REPR_PORT] + = dp_create_async_pattern_template(port->port_id, &transfer_pattern_template_attr, pattern); + + static const struct rte_flow_action actions[] = { + { .type = RTE_FLOW_ACTION_TYPE_REPRESENTED_PORT, }, + { .type = RTE_FLOW_ACTION_TYPE_END, }, + }; + tmpl->actions_templates[DP_PF1_PROXY_ACTIONS_REPR_PORT] + = dp_create_async_actions_template(port->port_id, &transfer_actions_template_attr, actions, actions); + + tmpl->table_attr = &pf_transfer_template_table_attr; + + return dp_init_async_template(port->port_id, tmpl); +} + + +static struct rte_flow *dp_create_pf_async_from_proxy_rule(uint16_t port_id, + uint16_t src_port_id, uint16_t dst_port_id, + struct rte_flow_template_table *template_table) +{ + const struct rte_flow_item_ethdev src_port_pattern = { + .port_id = src_port_id, + }; + const struct rte_flow_item pattern[] = { + { .type = RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT, + .spec = &src_port_pattern, + }, + { .type = RTE_FLOW_ITEM_TYPE_END }, + }; + + const struct rte_flow_item_ethdev dst_port_action = { + .port_id = dst_port_id, + }; + const struct rte_flow_action actions[] = { + { .type = RTE_FLOW_ACTION_TYPE_REPRESENTED_PORT, + .conf = &dst_port_action, + }, + { .type = RTE_FLOW_ACTION_TYPE_END }, + }; + + return dp_create_async_rule(port_id, template_table, + pattern, DP_PF1_PROXY_PATTERN_REPR_PORT, + actions, DP_PF1_PROXY_ACTIONS_REPR_PORT); +} + +static struct rte_flow *dp_create_pf_async_to_proxy_rule(uint16_t port_id, + uint16_t src_port_id, uint16_t dst_port_id, + struct rte_flow_template_table *template_table) +{ + const struct rte_flow_item_ethdev src_port_pattern = { + .port_id = src_port_id, + }; + const struct rte_flow_item_eth eth_ipv6_pattern = { + .type = htons(RTE_ETHER_TYPE_IPV6), + }; + const struct rte_flow_item_ipv6 ipv6_dst_pattern = { + .hdr.dst_addr = DP_INIT_FROM_IPV6(dp_conf_get_underlay_ip()), + }; + const struct rte_flow_item pattern[] = { + { .type = RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT, + .spec = &src_port_pattern, + }, + { .type = RTE_FLOW_ITEM_TYPE_ETH, + .spec = ð_ipv6_pattern, + }, + { .type = RTE_FLOW_ITEM_TYPE_IPV6, + .spec = &ipv6_dst_pattern, + }, + { .type = RTE_FLOW_ITEM_TYPE_END }, + }; + + const struct rte_flow_item_ethdev dst_port_action = { + .port_id = dst_port_id, + }; + const struct rte_flow_action actions[] = { + { .type = RTE_FLOW_ACTION_TYPE_REPRESENTED_PORT, + .conf = &dst_port_action, + }, + { .type = RTE_FLOW_ACTION_TYPE_END }, + }; + + return dp_create_async_rule(port_id, template_table, + pattern, DP_PF1_PROXY_PATTERN_REPR_PORT, + actions, DP_PF1_PROXY_ACTIONS_REPR_PORT); +} + +uint16_t dp_create_pf1_proxy_async_isolation_rules(struct dp_port *port) +{ + uint16_t pf1_port_id = dp_get_pf1()->port_id; + uint16_t proxy_port_id = dp_get_pf1_proxy()->port_id; + struct dp_port_async_template **templates = port->default_async_rules.default_templates; + struct rte_flow *flow; + uint16_t rule_count = 0; + + flow = dp_create_pf_async_from_proxy_rule(port->port_id, proxy_port_id, pf1_port_id, + templates[DP_PORT_ASYNC_TEMPLATE_PF1_FROM_PROXY]->template_table); + if (!flow) { + DPS_LOG_ERR("Failed to install PF async pf1 from proxy rule", DP_LOG_PORT(port)); + return rule_count; + } + + port->default_async_rules.default_flows[DP_PORT_ASYNC_FLOW_PF1_FROM_PROXY] = flow; + rule_count++; + + flow = dp_create_pf_async_to_proxy_rule(port->port_id, pf1_port_id, proxy_port_id, + templates[DP_PORT_ASYNC_TEMPLATE_PF1_TO_PROXY]->template_table); + if (!flow) { + DPS_LOG_ERR("Failed to install PF async pf1 to proxy rule", DP_LOG_PORT(port)); + return rule_count; + } + + port->default_async_rules.default_flows[DP_PORT_ASYNC_FLOW_PF1_TO_PROXY] = flow; + rule_count++; + + return rule_count; +} diff --git a/test/local/dp_service.py b/test/local/dp_service.py index 048af8cd6..e376c4645 100755 --- a/test/local/dp_service.py +++ b/test/local/dp_service.py @@ -126,7 +126,7 @@ def reconfigure_tests(self, cfgfile): elif key == "pf1": PF1.tap = value elif key == "pf1-proxy": - PF1.tap = "pf1-tap" + PF1.tap = "pf1-tap" # TODO works for now, but make better elif key == "vf-pattern": # MACs cannot be changed for VFs, use actual values VM1.mac = get_if_hwaddr(f"{value}0") diff --git a/test/local/test_telemetry.py b/test/local/test_telemetry.py index a6c98e7f1..0c88e5185 100644 --- a/test/local/test_telemetry.py +++ b/test/local/test_telemetry.py @@ -69,7 +69,6 @@ def get_telemetry(request): return response def check_tel_graph(key): - expected_tel_rx_node_count = 7 if PF1.tap == "pf1-tap" else 6 tel = get_telemetry(f"/dp_service/graph/{key}") assert tel is not None, \ "Missing graph telemetry" @@ -78,8 +77,8 @@ def check_tel_graph(key): # Check for rx-X-0 pattern where X can be any number rx_nodes = [node for node in tel["Node_0_to_255"] if re.match(r'rx-\d+-0', node)] - assert len(rx_nodes) == expected_tel_rx_node_count, \ - f"Expected {expected_tel_rx_node_count} 'rx-X-0' nodes, found {len(rx_nodes)} in {key} graph telemetry" + assert len(rx_nodes) == 6, \ + f"Expected 6 'rx-X-0' nodes, found {len(rx_nodes)} in {key} graph telemetry" def test_telemetry_graph(request, prepare_ifaces): @@ -139,17 +138,15 @@ def test_telemetry_exporter(request, prepare_ifaces, start_exporter): else: assert metric.startswith("#"), \ f"Unknown exported metric '{metric.split('{')[0]}' found" - # meson options (e.g. enable_pf1_proxy) are hard to do in these scripts, so just check manually + # meson options (e.g. enable_virtual_services) are hard to do in these scripts, so just check manually graph_nodes = GRAPH_NODES iface_stats = IFACE_STATS - if 'pf1_proxy' in graph_stats: - graph_nodes += ('pf1_proxy',) if 'virtsvc' in graph_stats: graph_nodes += ('virtsvc',) if request.config.getoption("--hw"): iface_stats += HW_IFACE_STATS if PF1.tap == "pf1-tap": - graph_nodes += ('rx-6-0',) + graph_nodes += ('tx-6',) if 'rx_q1_bytes' in interface_stats: iface_stats += HW_PF1_IFACE_STATS assert graph_stats == set(graph_nodes), \