diff --git a/docs/sys_design/dpservice_dataplane.drawio.png b/docs/sys_design/dpservice_dataplane.drawio.png index 8131cb9d9..ddc69f274 100755 Binary files a/docs/sys_design/dpservice_dataplane.drawio.png and b/docs/sys_design/dpservice_dataplane.drawio.png differ diff --git a/include/dp_mbuf_dyn.h b/include/dp_mbuf_dyn.h index d1bbff668..4736a66ff 100644 --- a/include/dp_mbuf_dyn.h +++ b/include/dp_mbuf_dyn.h @@ -19,9 +19,8 @@ extern "C" { struct dp_flow { struct { - uint16_t flow_type : 2; // local,outgoing,incoming uint16_t public_flow : 1; - uint16_t overlay_type: 1; // supported overlay type + uint16_t overlay_type : 1; // supported overlay type uint16_t nat : 3; uint16_t offload_ipv6 : 1; // tmp solution to set if we should offload ipv6 pkts uint16_t dir : 2; // store the direction of each packet @@ -61,8 +60,8 @@ struct dp_flow { uint8_t proto_id; //proto_id in outer ipv6 header uint32_t dst_vni; } tun_info; - uint8_t vnf_type; - uint8_t nxt_hop; + uint8_t vnf_type; // TODO(plague) enum? + uint8_t nxt_hop; struct flow_value *conntrack; #ifdef ENABLE_VIRTSVC struct dp_virtsvc *virtsvc; diff --git a/include/dp_port.h b/include/dp_port.h index e7d0ca39d..bd298d500 100644 --- a/include/dp_port.h +++ b/include/dp_port.h @@ -16,11 +16,6 @@ extern "C" { #define VM_IFACE_ID_MAX_LEN 64 -enum dp_port_type { - DP_PORT_PF, - DP_PORT_VF, -}; - struct macip_entry { struct rte_ether_addr own_mac; struct rte_ether_addr neigh_mac; @@ -43,7 +38,7 @@ struct vm_entry { }; struct dp_port { - enum dp_port_type port_type; + bool is_pf; uint16_t port_id; char port_name[IF_NAMESIZE]; int socket_id; diff --git a/include/rte_flow/dp_rte_flow.h b/include/rte_flow/dp_rte_flow.h index 4f77a3ae9..6cceed3cb 100644 --- a/include/rte_flow/dp_rte_flow.h +++ b/include/rte_flow/dp_rte_flow.h @@ -15,10 +15,6 @@ extern "C" #include "dp_lpm.h" #include "dp_mbuf_dyn.h" -#define DP_FLOW_TYPE_LOCAL 1 -#define DP_FLOW_TYPE_OUTGOING 2 -#define DP_FLOW_TYPE_INCOMING 3 - #define DP_FLOW_WEST_EAST 0 #define DP_FLOW_SOUTH_NORTH 1 diff --git a/src/dp_cntrack.c b/src/dp_cntrack.c index 9545f5c23..e25580c8d 100644 --- a/src/dp_cntrack.c +++ b/src/dp_cntrack.c @@ -93,31 +93,30 @@ static __rte_always_inline void dp_cntrack_init_flow_offload_flags(struct flow_v static __rte_always_inline void dp_cntrack_change_flow_offload_flags(struct rte_mbuf *m, struct flow_value *flow_val, struct dp_flow *df) { - bool offload_check = false; + bool offload_other_pf = false; + struct dp_port *port = dp_get_port(m); if (!offload_mode_enabled) return; - if (df->flags.flow_type == DP_FLOW_TYPE_INCOMING) { - if (m->port == dp_get_pf0()->port_id) - offload_check = flow_val->incoming_flow_offloaded_flag.pf0; + if (port->is_pf) { + if (port == dp_get_pf0()) + offload_other_pf = !flow_val->incoming_flow_offloaded_flag.pf0; else - offload_check = flow_val->incoming_flow_offloaded_flag.pf1; + offload_other_pf = !flow_val->incoming_flow_offloaded_flag.pf1; } if (df->flags.dir == DP_FLOW_DIR_ORG) { /* Despite the incoming flow is offloaded to one of the pf ports, pkts can arrive on another one */ /* So we need to check if the incoming flow is offloaded on the current port, */ /* if not, we do another offloading */ - if (flow_val->offload_flags.orig == DP_FLOW_NON_OFFLOAD || - (df->flags.flow_type == DP_FLOW_TYPE_INCOMING && !offload_check)) + if (flow_val->offload_flags.orig == DP_FLOW_NON_OFFLOAD || offload_other_pf) flow_val->offload_flags.orig = DP_FLOW_OFFLOAD_INSTALL; else if (flow_val->offload_flags.orig == DP_FLOW_OFFLOAD_INSTALL) flow_val->offload_flags.orig = DP_FLOW_OFFLOADED; } else if (df->flags.dir == DP_FLOW_DIR_REPLY) { - if (flow_val->offload_flags.reply == DP_FLOW_NON_OFFLOAD || - (df->flags.flow_type == DP_FLOW_TYPE_INCOMING && !offload_check)) + if (flow_val->offload_flags.reply == DP_FLOW_NON_OFFLOAD || offload_other_pf) flow_val->offload_flags.reply = DP_FLOW_OFFLOAD_INSTALL; else if (flow_val->offload_flags.reply == DP_FLOW_OFFLOAD_INSTALL) flow_val->offload_flags.reply = DP_FLOW_OFFLOADED; @@ -170,7 +169,7 @@ static __rte_always_inline struct flow_value *flow_table_insert_entry(struct flo /* This will be an uni-directional traffic, which does not expect its corresponding reverse traffic */ /* Details can be found in https://github.com/onmetal/net-dpservice/pull/341 */ if (offload_mode_enabled - && (df->flags.flow_type != DP_FLOW_TYPE_INCOMING) + && !port->is_pf && !DP_FAILED(dp_get_vnf_entry(&vnf_val, DP_VNF_TYPE_LB_ALIAS_PFX, port, DP_VNF_MATCH_ALL_PORT_ID)) ) flow_val->nf_info.nat_type = DP_FLOW_LB_TYPE_LOCAL_NEIGH_TRAFFIC; diff --git a/src/dp_firewall.c b/src/dp_firewall.c index 7928aa272..c7f015105 100644 --- a/src/dp_firewall.c +++ b/src/dp_firewall.c @@ -166,11 +166,11 @@ enum dp_fwall_action dp_get_firewall_action(struct dp_flow *df, struct dp_fwall_rule *rule; /* Outgoing traffic to PF (VF Egress, PF Ingress), PF has no Ingress rules */ - if (dst_port->port_type == DP_PORT_PF) + if (dst_port->is_pf) return dp_get_egress_action(df, &src_port->vm.fwall_head); /* Incoming from PF, PF has no Egress rules */ - if (src_port->port_type == DP_PORT_PF) + if (src_port->is_pf) egress_action = DP_FWALL_ACCEPT; /* Incoming from VF. Check originating VF's Egress rules */ else diff --git a/src/dp_flow.c b/src/dp_flow.c index d1bba88fb..bb0a292f3 100644 --- a/src/dp_flow.c +++ b/src/dp_flow.c @@ -100,7 +100,7 @@ static __rte_always_inline void dp_mark_vnf_type(struct dp_flow *df, const struc struct snat_data *s_data; struct dp_vnf_value vnf_val; - if (df->flags.flow_type == DP_FLOW_TYPE_INCOMING) { + if (port->is_pf) { if (df->vnf_type == DP_VNF_TYPE_NAT || df->vnf_type == DP_VNF_TYPE_LB_ALIAS_PFX) key->vnf = (uint8_t)df->vnf_type; else @@ -129,7 +129,7 @@ int dp_build_flow_key(struct flow_key *key /* out */, struct rte_mbuf *m /* in * key->proto = df->l4_type; - if (df->flags.flow_type == DP_FLOW_TYPE_INCOMING) + if (port->is_pf) key->vni = df->tun_info.dst_vni; else key->vni = port->vm.vni; diff --git a/src/dp_graph.c b/src/dp_graph.c index 49834ae5d..703cd3dc5 100644 --- a/src/dp_graph.c +++ b/src/dp_graph.c @@ -133,8 +133,10 @@ static int dp_graph_init_nodes(void) // some nodes need a direct Tx connection to all PF/VF ports, add them dynamically snprintf(name, sizeof(name), "tx-%u", port_id); - switch (port->port_type) { - case DP_PORT_VF: + if (port->is_pf) { + if (DP_FAILED(ipip_encap_node_append_pf_tx(port_id, name))) + return DP_ERROR; + } else { if (DP_FAILED(arp_node_append_vf_tx(port_id, name)) || DP_FAILED(dhcp_node_append_vf_tx(port_id, name)) || DP_FAILED(dhcpv6_node_append_vf_tx(port_id, name)) @@ -142,11 +144,6 @@ static int dp_graph_init_nodes(void) || DP_FAILED(firewall_node_append_vf_tx(port_id, name)) || DP_FAILED(rx_periodic_node_append_vf_tx(port_id, name))) return DP_ERROR; - break; - case DP_PORT_PF: - if (DP_FAILED(ipip_encap_node_append_pf_tx(port_id, name))) - return DP_ERROR; - break; } #ifdef ENABLE_VIRTSVC // virtual services node is bi-directional diff --git a/src/dp_hairpin.c b/src/dp_hairpin.c index 389d4ee54..45d61c00e 100644 --- a/src/dp_hairpin.c +++ b/src/dp_hairpin.c @@ -65,15 +65,14 @@ static int setup_hairpin_rx_tx_queues(uint16_t port_id, int dp_hairpin_setup(const struct dp_port *port) { - uint16_t hairpin_queue_id = 0; uint16_t peer_hairpin_queue_id = 0; hairpin_queue_id = DP_NR_STD_RX_QUEUES; - if (port->port_type == DP_PORT_VF) - peer_hairpin_queue_id = DP_NR_RESERVED_TX_QUEUES - 1 + port->peer_pf_hairpin_tx_rx_queue_offset; - else + if (port->is_pf) peer_hairpin_queue_id = DP_NR_STD_TX_QUEUES - 1 + port->peer_pf_hairpin_tx_rx_queue_offset; + else + peer_hairpin_queue_id = DP_NR_RESERVED_TX_QUEUES - 1 + port->peer_pf_hairpin_tx_rx_queue_offset; if (DP_FAILED(setup_hairpin_rx_tx_queues(port->port_id, port->peer_pf_port_id, @@ -85,7 +84,7 @@ int dp_hairpin_setup(const struct dp_port *port) } // PF's hairpin queue is configured one by one - if (port->port_type == DP_PORT_VF) { + if (!port->is_pf) { if (DP_FAILED(setup_hairpin_rx_tx_queues(port->peer_pf_port_id, port->port_id, peer_hairpin_queue_id, @@ -95,6 +94,7 @@ int dp_hairpin_setup(const struct dp_port *port) return DP_ERROR; } } + return DP_OK; } diff --git a/src/dp_internal_stats.c b/src/dp_internal_stats.c index caccc5f14..6b812f6a1 100644 --- a/src/dp_internal_stats.c +++ b/src/dp_internal_stats.c @@ -14,7 +14,7 @@ int dp_nat_get_used_ports_telemetry(struct rte_tel_data *dict) int ret; DP_FOREACH_PORT(ports, port) { - if (port->port_type != DP_PORT_VF || !port->allocated) + if (port->is_pf || !port->allocated) continue; ret = rte_tel_data_add_dict_u64(dict, port->vm.machineid, port->stats.nat_stats.used_port_cnt); diff --git a/src/dp_lpm.c b/src/dp_lpm.c index 4f1d0bc5e..458c84b86 100644 --- a/src/dp_lpm.c +++ b/src/dp_lpm.c @@ -101,7 +101,7 @@ int dp_add_route(const struct dp_port *port, uint32_t vni, uint32_t t_vni, uint3 // can only fail if node is NULL rte_rib_set_nh(node, port->port_id); /* This is an external route */ - if (port->port_type == DP_PORT_PF) { + if (port->is_pf) { route = rte_rib_get_ext(node); route->vni = t_vni; rte_memcpy(route->nh_ipv6, ip6, sizeof(route->nh_ipv6)); @@ -164,7 +164,7 @@ static int dp_list_route_entry(struct rte_rib_node *node, if (unlikely(!dst_port)) return DP_GRPC_ERR_NO_VM; - if ((ext_routes && dst_port->port_type == DP_PORT_PF) + if ((ext_routes && dst_port->is_pf) || (!ext_routes && dst_port->port_id == port->port_id && !dp_route_in_dhcp_range(node, port)) ) { reply = dp_grpc_add_reply(responder); @@ -240,7 +240,7 @@ int dp_add_route6(const struct dp_port *port, uint32_t vni, uint32_t t_vni, cons // can only fail if node is NULL rte_rib6_set_nh(node, port->port_id); /* This is an external route */ - if (port->port_type == DP_PORT_PF) { + if (port->is_pf) { route = rte_rib6_get_ext(node); route->vni = t_vni; rte_memcpy(route->nh_ipv6, ext_ip6, sizeof(route->nh_ipv6)); @@ -302,7 +302,7 @@ const struct dp_port *dp_get_ip4_dst_port(const struct dp_port *port, if (!dst_port) return NULL; - if (dst_port->port_type == DP_PORT_PF) + if (dst_port->is_pf) *route = *(struct vm_route *)rte_rib_get_ext(node); if (DP_FAILED(rte_rib_get_ip(node, route_key))) @@ -339,7 +339,7 @@ const struct dp_port *dp_get_ip6_dst_port(const struct dp_port *port, if (!dst_port) return NULL; - if (dst_port->port_type == DP_PORT_PF) + if (dst_port->is_pf) *route = *(struct vm_route *)rte_rib6_get_ext(node); return dst_port; diff --git a/src/dp_periodic_msg.c b/src/dp_periodic_msg.c index be4d8690e..a1707874a 100644 --- a/src/dp_periodic_msg.c +++ b/src/dp_periodic_msg.c @@ -25,7 +25,7 @@ void send_to_all_vfs(const struct rte_mbuf *pkt, uint16_t eth_type) int ret; DP_FOREACH_PORT(ports, port) { - if (port->port_type != DP_PORT_VF || !port->allocated) + if (port->is_pf || !port->allocated) continue; clone_buf = rte_pktmbuf_copy(pkt, dp_layer->rte_mempool, 0, UINT32_MAX); diff --git a/src/dp_port.c b/src/dp_port.c index 7f441a1f0..5a9765ad5 100644 --- a/src/dp_port.c +++ b/src/dp_port.c @@ -16,6 +16,9 @@ #include "rte_flow/dp_rte_flow_capture.h" #include "monitoring/dp_graphtrace.h" +#define DP_PORT_INIT_PF true +#define DP_PORT_INIT_VF false + static const struct rte_eth_conf port_conf_default = { .rxmode = { .mq_mode = RTE_ETH_MQ_RX_NONE, @@ -71,7 +74,7 @@ struct dp_port *dp_get_port_by_name(const char *pci_name) return _dp_port_table[port_id]; } -static int dp_port_init_ethdev(struct dp_port *port, struct rte_eth_dev_info *dev_info, enum dp_port_type port_type) +static int dp_port_init_ethdev(struct dp_port *port, struct rte_eth_dev_info *dev_info) { struct dp_dpdk_layer *dp_layer = get_dpdk_layer(); struct rte_ether_addr pf_neigh_mac; @@ -84,9 +87,9 @@ static int dp_port_init_ethdev(struct dp_port *port, struct rte_eth_dev_info *de /* Default config */ port_conf.txmode.offloads &= dev_info->tx_offload_capa; - nr_hairpin_queues = port_type == DP_PORT_VF - ? DP_NR_VF_HAIRPIN_RX_TX_QUEUES - : (DP_NR_PF_HAIRPIN_RX_TX_QUEUES + DP_NR_VF_HAIRPIN_RX_TX_QUEUES * dp_layer->num_of_vfs); + nr_hairpin_queues = port->is_pf + ? (DP_NR_PF_HAIRPIN_RX_TX_QUEUES + DP_NR_VF_HAIRPIN_RX_TX_QUEUES * dp_layer->num_of_vfs) + : DP_NR_VF_HAIRPIN_RX_TX_QUEUES; ret = rte_eth_dev_configure(port->port_id, DP_NR_STD_RX_QUEUES + nr_hairpin_queues, @@ -126,7 +129,7 @@ static int dp_port_init_ethdev(struct dp_port *port, struct rte_eth_dev_info *de } /* dp-service specific config */ - if (port_type == DP_PORT_VF) { + if (!port->is_pf) { DPS_LOG_INFO("INIT setting port to promiscuous mode", DP_LOG_PORT(port)); ret = rte_eth_promiscuous_enable(port->port_id); if (DP_FAILED(ret)) { @@ -143,7 +146,7 @@ static int dp_port_init_ethdev(struct dp_port *port, struct rte_eth_dev_info *de static_assert(sizeof(port->dev_name) == RTE_ETH_NAME_MAX_LEN, "Incompatible port dev_name size"); rte_eth_dev_get_name_by_port(port->port_id, port->dev_name); - if (port_type == DP_PORT_PF) { + if (port->is_pf) { if (DP_FAILED(dp_get_pf_neigh_mac(dev_info->if_index, &pf_neigh_mac, &port->vm.info.own_mac))) return DP_ERROR; rte_ether_addr_copy(&pf_neigh_mac, &port->vm.info.neigh_mac); @@ -170,7 +173,7 @@ static int dp_port_flow_isolate(uint16_t port_id) return DP_OK; } -static struct dp_port *dp_port_init_interface(uint16_t port_id, struct rte_eth_dev_info *dev_info, enum dp_port_type type) +static struct dp_port *dp_port_init_interface(uint16_t port_id, struct rte_eth_dev_info *dev_info, bool is_pf) { static int last_pf1_hairpin_tx_rx_queue_offset = 1; struct dp_port *port; @@ -182,7 +185,7 @@ static struct dp_port *dp_port_init_interface(uint16_t port_id, struct rte_eth_d return NULL; } - if (type == DP_PORT_PF) { + if (is_pf) { if (dp_conf_get_nic_type() != DP_CONF_NIC_TYPE_TAP) if (DP_FAILED(dp_port_flow_isolate(port_id))) return NULL; @@ -200,16 +203,15 @@ static struct dp_port *dp_port_init_interface(uint16_t port_id, struct rte_eth_d // oveflow check done by liming the number of calls to this function port = _dp_ports.end++; - port->port_type = type; + port->is_pf = is_pf; port->port_id = port_id; port->socket_id = socket_id; _dp_port_table[port_id] = port; - if (DP_FAILED(dp_port_init_ethdev(port, dev_info, type))) + if (DP_FAILED(dp_port_init_ethdev(port, dev_info))) return NULL; - switch (type) { - case DP_PORT_PF: + if (is_pf) { if (DP_FAILED(dp_port_register_pf(port))) return NULL; ret = rte_eth_dev_callback_register(port_id, RTE_ETH_EVENT_INTR_LSC, dp_link_status_change_event_callback, NULL); @@ -217,8 +219,7 @@ static struct dp_port *dp_port_init_interface(uint16_t port_id, struct rte_eth_d DPS_LOG_ERR("Cannot register link status callback", DP_LOG_RET(ret)); return NULL; } - break; - case DP_PORT_VF: + } else { // All VFs belong to pf0, assign a tx queue from pf1 for it if (dp_conf_is_offload_enabled()) { port->peer_pf_port_id = dp_get_pf1()->port_id; @@ -229,8 +230,8 @@ static struct dp_port *dp_port_init_interface(uint16_t port_id, struct rte_eth_d } } // No link status callback, VFs are not critical for cross-hypervisor communication - break; } + return port; } @@ -240,7 +241,7 @@ static int dp_port_set_up_hairpins(void) const struct dp_port *pf1 = dp_get_pf1(); DP_FOREACH_PORT(&_dp_ports, port) { - if (port->port_type == DP_PORT_PF) { + if (port->is_pf) { port->peer_pf_port_id = (port->port_id == pf0->port_id ? pf1 : pf0)->port_id; port->peer_pf_hairpin_tx_rx_queue_offset = 1; } @@ -262,7 +263,7 @@ static int dp_port_init_pf(const char *pf_name) return DP_ERROR; if (!strncmp(pf_name, ifname, sizeof(ifname))) { DPS_LOG_INFO("INIT initializing PF port", DP_LOG_PORTID(port_id), DP_LOG_IFNAME(ifname)); - port = dp_port_init_interface(port_id, &dev_info, DP_PORT_PF); + port = dp_port_init_interface(port_id, &dev_info, DP_PORT_INIT_PF); if (!port) return DP_ERROR; snprintf(port->port_name, sizeof(port->port_name), "%s", pf_name); @@ -286,7 +287,7 @@ static int dp_port_init_vfs(const char *vf_pattern, int num_of_vfs) return DP_ERROR; if (strstr(ifname, vf_pattern) && ++vf_count <= num_of_vfs) { DPS_LOG_INFO("INIT initializing VF port", DP_LOG_PORTID(port_id), DP_LOG_IFNAME(ifname)); - port = dp_port_init_interface(port_id, &dev_info, DP_PORT_VF); + port = dp_port_init_interface(port_id, &dev_info, DP_PORT_INIT_VF); if (!port) return DP_ERROR; snprintf(port->port_name, sizeof(port->port_name), "%s", vf_pattern); @@ -403,7 +404,7 @@ static int dp_init_port(struct dp_port *port) if (dp_conf_get_nic_type() == DP_CONF_NIC_TYPE_TAP) return DP_OK; - if (port->port_type == DP_PORT_PF) + if (port->is_pf) if (DP_FAILED(dp_port_install_isolated_mode(port->port_id))) return DP_ERROR; @@ -414,7 +415,7 @@ static int dp_init_port(struct dp_port *port) if (DP_FAILED(dp_port_bind_port_hairpins(port))) return DP_ERROR; - if (port->port_type == DP_PORT_VF) + if (!port->is_pf) if (DP_FAILED(dp_install_vf_init_rte_rules(port))) assert(false); // if any flow rule failed, stop process running due to possible hw/driver failure } diff --git a/src/grpc/dp_grpc_impl.c b/src/grpc/dp_grpc_impl.c index 4e2c9303e..5c8c4cb2b 100644 --- a/src/grpc/dp_grpc_impl.c +++ b/src/grpc/dp_grpc_impl.c @@ -954,7 +954,7 @@ static int dp_process_capture_status(struct dp_grpc_responder *responder) return DP_GRPC_ERR_LIMIT_REACHED; } - if (port->port_type == DP_PORT_PF) { + if (port->is_pf) { reply->interfaces[count].type = DP_CAPTURE_IFACE_TYPE_SINGLE_PF; reply->interfaces[count].spec.pf_index = port == dp_get_pf0() ? 0 : 1; } else { diff --git a/src/nodes/cls_node.c b/src/nodes/cls_node.c index dae874977..816b20d0b 100644 --- a/src/nodes/cls_node.c +++ b/src/nodes/cls_node.c @@ -150,13 +150,12 @@ static __rte_always_inline rte_edge_t get_next_index(__rte_unused struct rte_nod return CLS_NEXT_DROP; if (RTE_ETH_IS_IPV4_HDR(l3_type)) { - if (port->port_type == DP_PORT_PF) + if (port->is_pf) return CLS_NEXT_DROP; #ifdef ENABLE_VIRTSVC if (virtsvc_present) { virtsvc = get_outgoing_virtsvc(ether_hdr); if (virtsvc) { - df->flags.flow_type = DP_FLOW_TYPE_OUTGOING; df->virtsvc = virtsvc; return CLS_NEXT_VIRTSVC; } @@ -168,10 +167,9 @@ static __rte_always_inline rte_edge_t get_next_index(__rte_unused struct rte_nod if (RTE_ETH_IS_IPV6_HDR(l3_type)) { ipv6_hdr = (const struct rte_ipv6_hdr *)(ether_hdr + 1); - if (port->port_type == DP_PORT_PF) { + if (port->is_pf) { if (unlikely(is_ipv6_nd(ipv6_hdr))) return CLS_NEXT_DROP; - df->flags.flow_type = DP_FLOW_TYPE_INCOMING; #ifdef ENABLE_VIRTSVC if (virtsvc_present) { virtsvc = get_incoming_virtsvc(ipv6_hdr); diff --git a/src/nodes/common_node.c b/src/nodes/common_node.c index 33d03cca7..2d87463f8 100644 --- a/src/nodes/common_node.c +++ b/src/nodes/common_node.c @@ -38,7 +38,7 @@ int dp_node_append_vf_tx(struct rte_node_register *node, struct dp_port *port; port = dp_get_port_by_id(port_id); - if (!port || port->port_type != DP_PORT_VF) { + if (!port || port->is_pf) { DPNODE_LOG_ERR(node, "Node requires a valid virtual port to connect to"); return DP_ERROR; } @@ -55,7 +55,7 @@ int dp_node_append_pf_tx(struct rte_node_register *node, struct dp_port *port; port = dp_get_port_by_id(port_id); - if (!port || port->port_type != DP_PORT_PF) { + if (!port || !port->is_pf) { DPNODE_LOG_ERR(node, "Node requires a valid physical port to connect to"); return DP_ERROR; } diff --git a/src/nodes/conntrack_node.c b/src/nodes/conntrack_node.c index 7a7822817..15ab00191 100644 --- a/src/nodes/conntrack_node.c +++ b/src/nodes/conntrack_node.c @@ -26,27 +26,6 @@ static int conntrack_node_init(__rte_unused const struct rte_graph *graph, __rte return DP_OK; } -static __rte_always_inline rte_edge_t dp_find_nxt_graph_node(struct dp_flow *df) -{ - if (df->flags.flow_type == DP_FLOW_TYPE_INCOMING) { - switch (df->vnf_type) { - case DP_VNF_TYPE_LB: - return CONNTRACK_NEXT_LB; - case DP_VNF_TYPE_VIP: - case DP_VNF_TYPE_NAT: - return CONNTRACK_NEXT_DNAT; - case DP_VNF_TYPE_LB_ALIAS_PFX: - case DP_VNF_TYPE_INTERFACE_IP: - case DP_VNF_TYPE_ALIAS_PFX: - return CONNTRACK_NEXT_FIREWALL; - default: - return CONNTRACK_NEXT_LB; - } - } - return CONNTRACK_NEXT_DNAT; -} - - static __rte_always_inline rte_edge_t get_next_index(__rte_unused struct rte_node *node, struct rte_mbuf *m) { struct dp_flow *df = dp_get_flow_ptr(m); @@ -73,7 +52,22 @@ static __rte_always_inline rte_edge_t get_next_index(__rte_unused struct rte_nod return CONNTRACK_NEXT_DROP; } - return dp_find_nxt_graph_node(df); + if (!dp_get_port(m)->is_pf) + return CONNTRACK_NEXT_DNAT; + + switch (df->vnf_type) { + case DP_VNF_TYPE_LB: + return CONNTRACK_NEXT_LB; + case DP_VNF_TYPE_VIP: + case DP_VNF_TYPE_NAT: + return CONNTRACK_NEXT_DNAT; + case DP_VNF_TYPE_LB_ALIAS_PFX: + case DP_VNF_TYPE_INTERFACE_IP: + case DP_VNF_TYPE_ALIAS_PFX: + return CONNTRACK_NEXT_FIREWALL; + default: + return CONNTRACK_NEXT_LB; + } } static uint16_t conntrack_node_process(struct rte_graph *graph, diff --git a/src/nodes/firewall_node.c b/src/nodes/firewall_node.c index 3166e14ae..5cfb6114a 100644 --- a/src/nodes/firewall_node.c +++ b/src/nodes/firewall_node.c @@ -43,7 +43,7 @@ static __rte_always_inline rte_edge_t get_next_index(__rte_unused struct rte_nod // return FIREWALL_NEXT_DROP; } - if (dst_port->port_type == DP_PORT_PF) + if (dst_port->is_pf) return FIREWALL_NEXT_IPIP_ENCAP; return next_tx_index[dst_port->port_id]; diff --git a/src/nodes/ipv4_lookup_node.c b/src/nodes/ipv4_lookup_node.c index 877643912..7720276f2 100644 --- a/src/nodes/ipv4_lookup_node.c +++ b/src/nodes/ipv4_lookup_node.c @@ -24,41 +24,34 @@ static __rte_always_inline rte_edge_t get_next_index(__rte_unused struct rte_nod struct dp_flow *df = dp_get_flow_ptr(m); struct vm_route route; uint32_t route_key = 0; + const struct dp_port *src_port = dp_get_port(m); const struct dp_port *dst_port; // TODO: add broadcast routes when machine is added if (df->l4_type == DP_IP_PROTO_UDP && df->l4_info.trans_port.dst_port == htons(DP_BOOTP_SRV_PORT)) return IPV4_LOOKUP_NEXT_DHCP; - dst_port = dp_get_ip4_dst_port(dp_get_port(m), df->tun_info.dst_vni, df, &route, &route_key); + dst_port = dp_get_ip4_dst_port(src_port, df->tun_info.dst_vni, df, &route, &route_key); if (!dst_port) return IPV4_LOOKUP_NEXT_DROP; - if (df->flags.flow_type == DP_FLOW_TYPE_INCOMING) { - if (dst_port->port_type == DP_PORT_PF) + if (dst_port->is_pf) { + if (src_port->is_pf) return IPV4_LOOKUP_NEXT_DROP; + rte_memcpy(df->tun_info.ul_dst_addr6, route.nh_ipv6, sizeof(df->tun_info.ul_dst_addr6)); + dst_port = dp_multipath_get_pf(df->dp_flow_hash); } else { - df->tun_info.dst_vni = route.vni; - if (dst_port->port_type == DP_PORT_PF) { - rte_memcpy(df->tun_info.ul_dst_addr6, route.nh_ipv6, sizeof(df->tun_info.ul_dst_addr6)); - df->flags.flow_type = DP_FLOW_TYPE_OUTGOING; - } + // next hop is known, fill in Ether header + // (PF egress goes through a tunnel that destroys Ether header) + dp_fill_ether_hdr(rte_pktmbuf_mtod(m, struct rte_ether_hdr *), dst_port, RTE_ETHER_TYPE_IPV4); } - df->flags.public_flow = route_key == 0 ? DP_FLOW_SOUTH_NORTH : DP_FLOW_WEST_EAST; - - if (!df->flags.flow_type) - df->flags.flow_type = DP_FLOW_TYPE_LOCAL; - - if (df->flags.flow_type == DP_FLOW_TYPE_OUTGOING) - dst_port = dp_multipath_get_pf(df->dp_flow_hash); - - // next hop is known, fill in Ether header - // (PF egress goes through a tunnel that destroys Ether header) - if (dst_port->port_type == DP_PORT_VF) - dp_fill_ether_hdr(rte_pktmbuf_mtod(m, struct rte_ether_hdr *), dst_port, RTE_ETHER_TYPE_IPV4); + if (!src_port->is_pf) + df->tun_info.dst_vni = route.vni; + df->flags.public_flow = route_key == 0 ? DP_FLOW_SOUTH_NORTH : DP_FLOW_WEST_EAST; df->nxt_hop = dst_port->port_id; // always valid since coming from struct dp_port + return IPV4_LOOKUP_NEXT_NAT; } diff --git a/src/nodes/ipv6_lookup_node.c b/src/nodes/ipv6_lookup_node.c index 21162c01d..c3c8810d8 100644 --- a/src/nodes/ipv6_lookup_node.c +++ b/src/nodes/ipv6_lookup_node.c @@ -22,13 +22,10 @@ static __rte_always_inline rte_edge_t get_next_index(__rte_unused struct rte_nod struct rte_ether_hdr *ether_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)(ether_hdr + 1); struct vm_route route; + const struct dp_port *src_port = dp_get_port(m); const struct dp_port *dst_port; int t_vni; - t_vni = df->flags.flow_type == DP_FLOW_TYPE_INCOMING - ? df->tun_info.dst_vni - : 0; - dp_extract_ipv6_header(df, ipv6_hdr); if (DP_FAILED(dp_extract_l4_header(df, ipv6_hdr + 1))) @@ -38,30 +35,30 @@ static __rte_always_inline rte_edge_t get_next_index(__rte_unused struct rte_nod if (df->l4_type == DP_IP_PROTO_UDP && df->l4_info.trans_port.dst_port == htons(DHCPV6_SERVER_PORT)) return IPV6_LOOKUP_NEXT_DHCPV6; - dst_port = dp_get_ip6_dst_port(dp_get_port(m), t_vni, ipv6_hdr, &route); + t_vni = src_port->is_pf ? df->tun_info.dst_vni : 0; + + dst_port = dp_get_ip6_dst_port(src_port, t_vni, ipv6_hdr, &route); if (!dst_port) return IPV6_LOOKUP_NEXT_DROP; - df->nxt_hop = dst_port->port_id; // always valid since coming from struct dp_port - - if (df->flags.flow_type != DP_FLOW_TYPE_INCOMING) - df->tun_info.dst_vni = route.vni; - - if (dst_port->port_type == DP_PORT_PF) { + if (dst_port->is_pf) { + if (src_port->is_pf) + return IPV6_LOOKUP_NEXT_DROP; rte_memcpy(df->tun_info.ul_dst_addr6, route.nh_ipv6, sizeof(df->tun_info.ul_dst_addr6)); - df->flags.flow_type = DP_FLOW_TYPE_OUTGOING; } else { // next hop is known, fill in Ether header // (PF egress goes through a tunnel that destroys Ether header) dp_fill_ether_hdr(ether_hdr, dst_port, RTE_ETHER_TYPE_IPV6); } - if (!df->flags.flow_type) - df->flags.flow_type = DP_FLOW_TYPE_LOCAL; - if (dp_conf_is_offload_enabled()) df->flags.offload_ipv6 = 1; + if (!src_port->is_pf) + df->tun_info.dst_vni = route.vni; + + df->nxt_hop = dst_port->port_id; // always valid since coming from struct dp_port + return IPV6_LOOKUP_NEXT_FIREWALL; } diff --git a/src/nodes/lb_node.c b/src/nodes/lb_node.c index 9d780821a..e562f23af 100644 --- a/src/nodes/lb_node.c +++ b/src/nodes/lb_node.c @@ -19,9 +19,8 @@ DP_NODE_REGISTER_NOINIT(LB, lb, NEXT_NODES); static __rte_always_inline void dp_lb_set_next_hop(struct dp_flow *df, uint16_t port_id) { - df->flags.flow_type = DP_FLOW_TYPE_OUTGOING; // for recirc pkt, it will be changed back to DP_FLOW_TYPE_INCOMING in cls_node.c if (DP_FAILED(dp_get_portid_with_vnf_key(df->tun_info.ul_dst_addr6, DP_VNF_TYPE_LB_ALIAS_PFX))) { - df->nxt_hop = port_id; // needs to validated by the caller! + df->nxt_hop = port_id; // needs to validated by the caller (but it's always m->port) df->flags.nat = DP_CHG_UL_DST_IP; } else df->flags.nat = DP_LB_RECIRC; diff --git a/src/nodes/packet_relay_node.c b/src/nodes/packet_relay_node.c index c84013ebf..07697a8e9 100644 --- a/src/nodes/packet_relay_node.c +++ b/src/nodes/packet_relay_node.c @@ -34,7 +34,6 @@ static __rte_always_inline rte_edge_t lb_nnat_icmp_reply(struct dp_flow *df, str temp_ip = ipv4_hdr->dst_addr; ipv4_hdr->dst_addr = ipv4_hdr->src_addr; ipv4_hdr->src_addr = temp_ip; - df->flags.flow_type = DP_FLOW_TYPE_OUTGOING; df->nxt_hop = m->port; dp_nat_chg_ip(df, ipv4_hdr, m); memcpy(df->tun_info.ul_dst_addr6, df->tun_info.ul_src_addr6, sizeof(df->tun_info.ul_dst_addr6)); @@ -52,7 +51,6 @@ static __rte_always_inline rte_edge_t get_next_index(__rte_unused struct rte_nod return PACKET_RELAY_NEXT_DROP; if (cntrack->nf_info.nat_type == DP_FLOW_NAT_TYPE_NETWORK_NEIGH) { - df->flags.flow_type = DP_FLOW_TYPE_OUTGOING; df->nxt_hop = m->port; // trick: use src place to store old dst address for offloading rte_memcpy(df->tun_info.ul_src_addr6, df->tun_info.ul_dst_addr6, sizeof(df->tun_info.ul_src_addr6)); diff --git a/src/nodes/virtsvc_node.c b/src/nodes/virtsvc_node.c index e0353062d..205910cf7 100644 --- a/src/nodes/virtsvc_node.c +++ b/src/nodes/virtsvc_node.c @@ -232,7 +232,7 @@ static __rte_always_inline uint16_t virtsvc_reply_next(struct rte_node *node, vf_port_id = conn->vf_port_id; vf_port = dp_get_port_by_id(vf_port_id); - if (!vf_port || !vf_port->attached) + if (!vf_port) return VIRTSVC_NEXT_DROP; dp_fill_ether_hdr(ether_hdr, vf_port, RTE_ETHER_TYPE_IPV4); @@ -247,10 +247,10 @@ static __rte_always_inline rte_edge_t get_next_index(struct rte_node *node, stru if (dp_conf_is_offload_enabled()) DPNODE_LOG_WARNING(node, "Virtual services not supported while offloading"); - if (df->flags.flow_type == DP_FLOW_TYPE_OUTGOING) - return virtsvc_request_next(node, m, df); - else if (df->flags.flow_type == DP_FLOW_TYPE_INCOMING) + if (dp_get_port(m)->is_pf) return virtsvc_reply_next(node, m, df); + else + return virtsvc_request_next(node, m, df); return VIRTSVC_NEXT_DROP; } diff --git a/src/rte_flow/dp_rte_flow_capture.c b/src/rte_flow/dp_rte_flow_capture.c index ae65dc4af..da4fc8102 100644 --- a/src/rte_flow/dp_rte_flow_capture.c +++ b/src/rte_flow/dp_rte_flow_capture.c @@ -219,12 +219,10 @@ int dp_enable_pkt_capture(struct dp_port *port) if (DP_FAILED(dp_destroy_default_flow(port))) return DP_GRPC_ERR_RTE_RULE_DEL; - switch (port->port_type) { - case DP_PORT_PF: + if (port->is_pf) { if (DP_FAILED(dp_install_pf_default_flow(port, true))) return DP_GRPC_ERR_RTE_RULE_ADD; - break; - case DP_PORT_VF: + } else { if (DP_FAILED(dp_install_vf_default_jump_flow(port, DP_RTE_FLOW_CAPTURE_GROUP))) return DP_GRPC_ERR_RTE_RULE_ADD; // rollback flow rules if failed on the second one for VF. @@ -239,7 +237,6 @@ int dp_enable_pkt_capture(struct dp_port *port) } return DP_GRPC_ERR_RTE_RULE_ADD; } - break; } port->captured = true; @@ -257,19 +254,15 @@ int dp_disable_pkt_capture(struct dp_port *port) if (DP_FAILED(dp_destroy_default_flow(port))) return DP_GRPC_ERR_RTE_RULE_DEL; - switch (port->port_type) { - case DP_PORT_PF: + if (port->is_pf) { if (DP_FAILED(dp_install_pf_default_flow(port, false))) return DP_GRPC_ERR_RTE_RULE_ADD; - break; - case DP_PORT_VF: + } else { if (DP_FAILED(dp_install_vf_default_jump_flow(port, DP_RTE_FLOW_VNET_GROUP))) { // rollback does not make sense here, but rather to report the error. because the default operation should be without capturing. DPS_LOG_ERR("Failed to turn capturing off by installing default jump rule to the vnet group on vf", DP_LOG_PORT(port)); return DP_GRPC_ERR_RTE_RULE_ADD; } - - break; } port->captured = false; diff --git a/src/rte_flow/dp_rte_flow_traffic_forward.c b/src/rte_flow/dp_rte_flow_traffic_forward.c index 5e6c9d5c7..4f1664d41 100644 --- a/src/rte_flow/dp_rte_flow_traffic_forward.c +++ b/src/rte_flow/dp_rte_flow_traffic_forward.c @@ -458,7 +458,7 @@ int dp_offload_handle_tunnel_decap_traffic(struct dp_flow *df, if (cross_pf_port) { // move this packet to the right hairpin rx queue of pf, so as to be moved to vf - if (outgoing_port->port_type != DP_PORT_VF) { + if (unlikely(outgoing_port->is_pf)) { DPS_LOG_ERR("Outgoing port not a VF", DP_LOG_PORT(outgoing_port)); dp_destroy_rte_flow_agectx(agectx); // no need to free the above appeared (not allocated) agectx_capture, as the capturing rule is not installed for the cross-pf case @@ -587,8 +587,7 @@ int dp_offload_handle_local_traffic(struct dp_flow *df, static __rte_always_inline int dp_offload_handle_in_network_traffic(struct dp_flow *df, - const struct dp_port *incoming_port, - const struct dp_port *outgoing_port) + const struct dp_port *incoming_port) { // match in-network underlay packets struct rte_flow_item_eth eth_spec; // #1 @@ -609,9 +608,7 @@ int dp_offload_handle_in_network_traffic(struct dp_flow *df, // misc variables needed to create the flow struct flow_age_ctx *agectx; - - df->nxt_hop = (incoming_port == dp_get_pf0() ? dp_get_pf1() : incoming_port)->port_id; - // no need to validate as this can only be PF0/PF1 + const struct dp_port *outgoing_port; // create match pattern based on dp_flow dp_set_eth_flow_item(&pattern[pattern_cnt++], ð_spec, htons(df->tun_info.l3_type)); @@ -632,6 +629,8 @@ int dp_offload_handle_in_network_traffic(struct dp_flow *df, // set proper ethernet addresses // in network traffic has to be set via the other pf port via hairpin + outgoing_port = incoming_port == dp_get_pf0() ? dp_get_pf1() : incoming_port; + df->nxt_hop = outgoing_port->port_id; dp_set_src_mac_set_action(&actions[action_cnt++], &set_src_mac, &outgoing_port->vm.info.own_mac); dp_set_dst_mac_set_action(&actions[action_cnt++], &set_dst_mac, &outgoing_port->vm.info.neigh_mac); @@ -676,30 +675,39 @@ int dp_offload_handler(struct rte_mbuf *m, struct dp_flow *df) const struct dp_port *outgoing_port = dp_get_dst_port(df); int ret; - // TODO(plague): think about using enum for flow_type - if (df->flags.flow_type == DP_FLOW_TYPE_LOCAL) { + if (!incoming_port->is_pf && !outgoing_port->is_pf) { + // VF -> VF ret = dp_offload_handle_local_traffic(df, incoming_port, outgoing_port); if (DP_FAILED(ret)) DPS_LOG_ERR("Failed to install local flow rule", DP_LOG_PORT(incoming_port), DP_LOG_PORT(outgoing_port), DP_LOG_RET(ret)); - } else if (df->flags.flow_type == DP_FLOW_TYPE_INCOMING) { - ret = dp_offload_handle_tunnel_decap_traffic(df, incoming_port, outgoing_port, dp_get_pkt_mark(m)->flags.is_recirc); - if (DP_FAILED(ret)) - DPS_LOG_ERR("Failed to install decap flow rule", DP_LOG_PORT(incoming_port), DP_LOG_PORT(outgoing_port), DP_LOG_RET(ret)); - } else if (df->flags.flow_type == DP_FLOW_TYPE_OUTGOING) { + } else if (outgoing_port->is_pf) { if (df->conntrack->nf_info.nat_type == DP_FLOW_NAT_TYPE_NETWORK_NEIGH || df->conntrack->nf_info.nat_type == DP_FLOW_LB_TYPE_FORWARD ) { - ret = dp_offload_handle_in_network_traffic(df, incoming_port, outgoing_port); + if (unlikely(!incoming_port->is_pf)) { + DPS_LOG_ERR("Invalid in-network flow", DP_LOG_PORT(incoming_port), DP_LOG_PORT(outgoing_port)); + return DP_ERROR; + } + // PF -> PF + ret = dp_offload_handle_in_network_traffic(df, incoming_port); if (DP_FAILED(ret)) DPS_LOG_ERR("Failed to install in-network flow rule", DP_LOG_PORT(incoming_port), DP_LOG_PORT(outgoing_port), DP_LOG_RET(ret)); } else { + if (unlikely(incoming_port->is_pf)) { + DPS_LOG_ERR("Invalid encap flow", DP_LOG_PORT(incoming_port), DP_LOG_PORT(outgoing_port)); + return DP_ERROR; + } + // VF -> PF ret = dp_offload_handle_tunnel_encap_traffic(df, incoming_port, outgoing_port); if (DP_FAILED(ret)) DPS_LOG_ERR("Failed to install encap flow rule", DP_LOG_PORT(incoming_port), DP_LOG_PORT(outgoing_port), DP_LOG_RET(ret)); } } else { - DPS_LOG_ERR("Invalid flow type to offload", DP_LOG_PORT(incoming_port), DP_LOG_PORT(outgoing_port), DP_LOG_VALUE(df->flags.flow_type)); - ret = DP_ERROR; + // PF -> VF + ret = dp_offload_handle_tunnel_decap_traffic(df, incoming_port, outgoing_port, dp_get_pkt_mark(m)->flags.is_recirc); + if (DP_FAILED(ret)) + DPS_LOG_ERR("Failed to install decap flow rule", DP_LOG_PORT(incoming_port), DP_LOG_PORT(outgoing_port), DP_LOG_RET(ret)); } + return ret; }