diff --git a/docs/deployment/capture_offloaded_rx_pkts.md b/docs/deployment/capture_offloaded_rx_pkts.md new file mode 100644 index 000000000..99d2ffe80 --- /dev/null +++ b/docs/deployment/capture_offloaded_rx_pkts.md @@ -0,0 +1,62 @@ +# Feature: capture offloaded rx packets on interfaces +In offloaded mode, packets that are processed by hardware offload rules cannot be seen anymore even on the software path. To increase the visibility of this type of traffic flows, we use special rte flow rules to instrument packet processing on hardware to duplicate and capture these packets on interfaces. + +## What can be achieved and what cannot +Through tedious and complex experiment, the following features are identified and thus currently supported: + +1. Capture offloaded packets on the RX side of a VF (packets that are sent from VMs). +2. Capture offloaded packets on the RX side of PF0 (IPinIP packets that are transmitted to PF0 from the wire). + +Due to the constraint of Mellanox HW or driver, the following features currently are not supported: + +1. Capture offloaded packets on the TX side of interfaces. +2. Capture offloaded packets on the RX side of PF1. PF1 is currently not in switchdev mode, thus the used special rte flow rule does not work for it. +3. The configured UDP src port is not really respected by HW, and UDP dst port is respected instead. + + +## Capture and understand offloaded rx packets +Capturing must be started via dpservice-cli before the first packets of new flows on an interface. The target interfaces, especially VFs, need to be started first, and in total, 16 interfaces can be specified as part of the cmdline parameters. Again, as capturing on PF1 is currently not supported by HW, please only specify `--pf=0`. + + +``` +./bin/dpservice-cli capture start --sink-node-ip= --udp-src-port= --udp-dst-port= --vf= --pf=0 +``` + +for example: +``` +./bin/dpservice-cli capture start --sink-node-ip=abcd:efgh:1234:4321::1 --udp-src-port=3000 --udp-dst-port=3010 --vf=vm-1,vm-2 --pf=0 +``` + +The captured packets will be transmitted back in an encapped format to the interface (via router) of your selected sink machine, either the hypervisor where dp-service is running or a remote host. These packets are visible on physical interfaces using a regular tcpdump tool. For example, these packets can be dumped to a pcap file using a command: + +``` +sudo tcpdump -ni any udp dst port 3010 -w test.pcap +``` + +The generated test.pcap file can be opened using Wireshark(graphic). As captured packets are encaped as UDP payload, this file can be firstly modified by removing the first 62 bytes of all packets. + +``` +editcap -C 62 -F pcap test.pcap test_no_udp.pcap +``` + +The resulted test_no_udp.pcap file can be recognized by wireshark. + +The following command is used to stop capturing on all configured interfaces. Note that, to start capturing on a new set of interfaces, this stopping command has to be called first. +``` +/bin/dpservice-cli capture stop +``` + +or before you start capturing, it is also recommended to check the operation status of this capturing feature by using: +``` +/bin/dpservice-cli capture status +``` +The returned values incude this feature's operation status, as well as the configuration information using the "capture start" subcommand. + +## How offloaded packets are captured +Offloaded packets are captured by using special rte flow rules, especially the one that enables packet sampling on the RX side of an interface. The captured packets are encapsulated by prepending extra headers. Despite the fact that captured Ethernet frames are treated as UDP payload, it is flexible to use other customized headers as well. The format of encapsulation is as follows: + +``` +| Outer Ether header | Outer IPv6 header | UDP header | Captured Ether frame | +``` + +[Figure1](docs/sys_design/pkt_capture_flow_rules-VF.drawio.png) and [Figure2](docs/sys_design/pkt_capture_flow_rules-PF.drawio.png) illustrate the organization of flow rules for VF and PF. The differences between handling VF and PF are empirical. diff --git a/docs/sys_design/pkt_capture_flow_rules_PF.drawio.png b/docs/sys_design/pkt_capture_flow_rules_PF.drawio.png new file mode 100644 index 000000000..1b7e9ba3a Binary files /dev/null and b/docs/sys_design/pkt_capture_flow_rules_PF.drawio.png differ diff --git a/docs/sys_design/pkt_capture_flow_rules_VF.drawio.png b/docs/sys_design/pkt_capture_flow_rules_VF.drawio.png new file mode 100644 index 000000000..bf43fa20b Binary files /dev/null and b/docs/sys_design/pkt_capture_flow_rules_VF.drawio.png differ diff --git a/include/dp_cntrack.h b/include/dp_cntrack.h index 4aa3f890a..e8fbfbdf9 100644 --- a/include/dp_cntrack.h +++ b/include/dp_cntrack.h @@ -9,8 +9,6 @@ extern "C" { #endif -#define DP_IS_CAPTURED_HW_PKT 5 - void dp_cntrack_init(void); int dp_cntrack_handle(struct rte_mbuf *m, struct dp_flow *df); diff --git a/include/dp_error.h b/include/dp_error.h index cfbc0fd81..b94b2acd4 100644 --- a/include/dp_error.h +++ b/include/dp_error.h @@ -38,6 +38,11 @@ const char *dp_strerror_verbose(int error); ERR(ITERATOR, 207) \ ERR(OUT_OF_MEMORY, 208) \ ERR(LIMIT_REACHED, 209) \ + ERR(ALREADY_ACTIVE, 210) \ + ERR(NOT_ACTIVE, 211) \ + ERR(ROLLBACK, 212) \ + ERR(RTE_RULE_ADD, 213) \ + ERR(RTE_RULE_DEL, 214) \ /* Specific errors */ \ ERR(ROUTE_EXISTS, 301) \ ERR(ROUTE_NOT_FOUND, 302) \ @@ -62,6 +67,7 @@ const char *dp_strerror_verbose(int error); ERR(NO_LB, 422) \ ERR(NO_DROP_SUPPORT, 441) \ + #define _DP_GRPC_ERROR_ENUM(NAME, NUMBER) \ DP_GRPC_ERR_##NAME = _DP_GRPC_ERRCODES - NUMBER, enum dp_grpc_error { diff --git a/include/dp_flow.h b/include/dp_flow.h index 2bb6feb82..02c9cd527 100644 --- a/include/dp_flow.h +++ b/include/dp_flow.h @@ -16,7 +16,7 @@ extern "C" { // arbitrary big number #define FLOW_MAX 850000 -#define DP_FLOW_VAL_AGE_CTX_CAPACITY 5 +#define DP_FLOW_VAL_AGE_CTX_CAPACITY 6 #define DP_FLOW_DEFAULT_TIMEOUT 30 /* 30 seconds */ #define DP_FLOW_TCP_EXTENDED_TIMEOUT (60 * 60 * 24) /* 1 day */ diff --git a/include/dp_log.h b/include/dp_log.h index 6e497f12a..90cec004a 100644 --- a/include/dp_log.h +++ b/include/dp_log.h @@ -49,6 +49,7 @@ extern "C" { #define DP_LOG_IFNAME(VALUE) _DP_LOG_STR("interface_name", VALUE) #define DP_LOG_LCORE(VALUE) _DP_LOG_UINT("lcore_id", VALUE) #define DP_LOG_RTE_GROUP(VALUE) _DP_LOG_UINT("rte_group", VALUE) +#define DP_LOG_PORT_TYPE(VALUE) _DP_LOG_UINT("port_type", VALUE) // networking stack #define DP_LOG_IPV4(VALUE) _DP_LOG_IPV4("ipv4", VALUE) #define DP_LOG_IPV6(VALUE) _DP_LOG_IPV6("ipv6", VALUE) @@ -69,6 +70,7 @@ extern "C" { #define DP_LOG_GRPCRET(VALUE) _DP_LOG_INT("grpc_error", VALUE), _DP_LOG_STR("grpc_message", dp_grpc_strerror(VALUE)) #define DP_LOG_GRPCREQUEST(VALUE) _DP_LOG_INT("grpc_request", VALUE) #define DP_LOG_IFACE(VALUE) _DP_LOG_STR("interface_id", VALUE) +#define DP_LOG_IFACE_INDEX(VALUE) _DP_LOG_INT("interface_index", VALUE) #define DP_LOG_TVNI(VALUE) _DP_LOG_UINT("t_vni", VALUE) #define DP_LOG_PCI(VALUE) _DP_LOG_STR("pci", VALUE) #define DP_LOG_PXE_SRV(VALUE) _DP_LOG_STR("pxe_server", VALUE) diff --git a/include/dp_mbuf_dyn.h b/include/dp_mbuf_dyn.h index d61b49ec5..7981134ce 100644 --- a/include/dp_mbuf_dyn.h +++ b/include/dp_mbuf_dyn.h @@ -26,7 +26,6 @@ struct dp_flow { uint16_t offload_ipv6 : 1; // tmp solution to set if we should offload ipv6 pkts uint16_t dir : 2; // store the direction of each packet uint16_t offload_decision: 2; // store the offload status of each packet - uint16_t offload_mark: 1; // store the offload mark of each packet } flags; uint16_t l3_type; //layer-3 for inner packets. it can be crafted or extracted from raw frames union { diff --git a/include/dp_port.h b/include/dp_port.h index 0b987db7e..ebbfcafbb 100644 --- a/include/dp_port.h +++ b/include/dp_port.h @@ -37,7 +37,9 @@ struct dp_port { uint8_t peer_pf_hairpin_tx_rx_queue_offset; uint16_t peer_pf_port_id; enum dp_vf_port_attach_status attach_status; - struct rte_flow *default_flow; + struct rte_flow *default_jump_flow; + struct rte_flow *default_capture_flow; + bool captured; }; struct dp_ports { diff --git a/include/grpc/dp_async_grpc.h b/include/grpc/dp_async_grpc.h index b30e988ce..9b65e2f54 100644 --- a/include/grpc/dp_async_grpc.h +++ b/include/grpc/dp_async_grpc.h @@ -183,4 +183,8 @@ CREATE_CALLCLASS(ListFirewallRules, MultiReplyCall); CREATE_CALLCLASS(CheckVniInUse, SingleReplyCall); CREATE_CALLCLASS(ResetVni, SingleReplyCall); +CREATE_CALLCLASS(CaptureStart, SingleReplyCall); +CREATE_CALLCLASS(CaptureStop, SingleReplyCall); +CREATE_CALLCLASS(CaptureStatus, SingleReplyCall); + #endif diff --git a/include/grpc/dp_grpc_api.h b/include/grpc/dp_grpc_api.h index 5a5710358..23b1cb98d 100644 --- a/include/grpc/dp_grpc_api.h +++ b/include/grpc/dp_grpc_api.h @@ -4,6 +4,7 @@ #include #include "dp_util.h" #include "dp_firewall.h" +#include "monitoring/dp_monitoring.h" #ifdef __cplusplus extern "C" { @@ -54,6 +55,9 @@ enum dpgrpc_request_type { DP_REQ_TYPE_ListFirewallRules, DP_REQ_TYPE_CheckVniInUse, DP_REQ_TYPE_ResetVni, + DP_REQ_TYPE_CaptureStart, + DP_REQ_TYPE_CaptureStop, + DP_REQ_TYPE_CaptureStatus, }; // in sync with dpdk proto! @@ -63,6 +67,11 @@ enum dpgrpc_vni_type { DP_VNI_BOTH, }; +enum dpgrpc_capture_iface_type { + DP_CAPTURE_IFACE_TYPE_SINGLE_PF, + DP_CAPTURE_IFACE_TYPE_SINGLE_VF, +}; + struct dpgrpc_iface { char iface_id[VM_IFACE_ID_MAX_LEN]; uint32_t ip4_addr; @@ -160,6 +169,27 @@ struct dpgrpc_versions { char app[DP_GRPC_VERSION_MAX_LEN]; }; +struct dpgrpc_capture_interface { + enum dpgrpc_capture_iface_type type; + union { + char iface_id[VM_IFACE_ID_MAX_LEN]; + uint8_t pf_index; + } spec; +}; + +struct dpgrpc_capture { + uint8_t dst_addr6[DP_VNF_IPV6_ADDR_SIZE]; + uint8_t interface_count; + uint32_t udp_src_port; + uint32_t udp_dst_port; + struct dpgrpc_capture_interface interfaces[DP_CAPTURE_MAX_PORT_NUM]; + bool is_active; +}; + +struct dpgrpc_capture_stop { + uint16_t port_cnt; +}; + struct dpgrpc_request { uint16_t type; // enum dpgrpc_request_type union { @@ -198,6 +228,7 @@ struct dpgrpc_request { struct dpgrpc_vni vni_in_use; struct dpgrpc_vni vni_reset; struct dpgrpc_versions get_version; + struct dpgrpc_capture capture_start; }; }; @@ -235,6 +266,8 @@ struct dpgrpc_reply { struct dpgrpc_fwrule_info fwrule; struct dpgrpc_vni_in_use vni_in_use; struct dpgrpc_versions versions; + struct dpgrpc_capture_stop capture_stop; + struct dpgrpc_capture capture_get; }; }; diff --git a/include/grpc/dp_grpc_conv.h b/include/grpc/dp_grpc_conv.h index c1cba1a07..2ca1affce 100644 --- a/include/grpc/dp_grpc_conv.h +++ b/include/grpc/dp_grpc_conv.h @@ -24,6 +24,9 @@ namespace GrpcConv bool GrpcToDpFwallDirection(const TrafficDirection& grpc_dir, enum dp_fwall_direction *dp_dir); bool GrpcToDpFwallPort(int32_t grpc_port, uint32_t *dp_port); + bool GrpcToDpCaptureInterfaceType(const CaptureInterfaceType & grpc_type, enum dpgrpc_capture_iface_type *dp_capture_iface_type); + CaptureInterfaceType CaptureInterfaceTypeToGrpc(enum dpgrpc_capture_iface_type dp_capture_iface_type); + const char *Ipv4ToStr(uint32_t ipv4); uint32_t Ipv4PrefixLenToMask(uint32_t prefix_length); diff --git a/include/monitoring/dp_event.h b/include/monitoring/dp_event.h index e137f3df7..9014612c2 100644 --- a/include/monitoring/dp_event.h +++ b/include/monitoring/dp_event.h @@ -16,12 +16,8 @@ int dp_link_status_change_event_callback(uint16_t port_id, void dp_process_event_link_msg(struct rte_mbuf *m); int dp_send_event_flow_aging_msg(void); -int dp_send_event_hardware_capture_start_msg(void); -int dp_send_event_hardware_capture_stop_msg(void); void dp_process_event_flow_aging_msg(struct rte_mbuf *m); -void dp_process_event_hardware_capture_start_msg(struct rte_mbuf *m); -void dp_process_event_hardware_capture_stop_msg(struct rte_mbuf *m); #ifdef __cplusplus } diff --git a/include/monitoring/dp_graphtrace.h b/include/monitoring/dp_graphtrace.h index 4d3e1ae2b..d4e62335d 100644 --- a/include/monitoring/dp_graphtrace.h +++ b/include/monitoring/dp_graphtrace.h @@ -19,8 +19,7 @@ enum dp_graphtrace_loglevel { int dp_graphtrace_init(void); void dp_graphtrace_free(void); -void _dp_graphtrace_send(enum dp_graphtrace_pkt_type type, - const struct rte_node *node, +void _dp_graphtrace_send(const struct rte_node *node, const struct rte_node *next_node, void **objs, uint16_t nb_objs, uint16_t dst_port_id); @@ -53,53 +52,42 @@ void _dp_graphtrace_send(enum dp_graphtrace_pkt_type type, extern int _dp_graphtrace_flags; extern bool _dp_graphtrace_enabled; -extern bool _dp_graphtrace_hw_enabled; static __rte_always_inline void dp_graphtrace_next(const struct rte_node *node, void *obj, rte_edge_t next_index) { if (_dp_graphtrace_enabled && (_dp_graphtrace_flags & DP_GRAPHTRACE_FLAG_NODES)) - _dp_graphtrace_send(DP_GRAPHTRACE_PKT_TYPE_SOFTWARE, node, node->nodes[next_index], &obj, 1, -1); + _dp_graphtrace_send(node, node->nodes[next_index], &obj, 1, -1); _dp_graphtrace_log_next(node, obj, next_index); } static __rte_always_inline void dp_graphtrace_next_burst(const struct rte_node *node, void **objs, uint16_t nb_objs, rte_edge_t next_index) { if (_dp_graphtrace_enabled && (_dp_graphtrace_flags & DP_GRAPHTRACE_FLAG_NODES)) - _dp_graphtrace_send(DP_GRAPHTRACE_PKT_TYPE_SOFTWARE, node, node->nodes[next_index], objs, nb_objs, -1); + _dp_graphtrace_send(node, node->nodes[next_index], objs, nb_objs, -1); _dp_graphtrace_log_next_burst(node, objs, nb_objs, next_index); } static __rte_always_inline void dp_graphtrace_rx_burst(const struct rte_node *node, void **objs, uint16_t nb_objs) { if (_dp_graphtrace_enabled) - _dp_graphtrace_send(DP_GRAPHTRACE_PKT_TYPE_SOFTWARE, NULL, node, objs, nb_objs, -1); + _dp_graphtrace_send(NULL, node, objs, nb_objs, -1); _dp_graphtrace_log_rx_burst(node, objs, nb_objs); } static __rte_always_inline void dp_graphtrace_tx_burst(const struct rte_node *node, void **objs, uint16_t nb_objs, uint16_t port_id) { if (_dp_graphtrace_enabled) - _dp_graphtrace_send(DP_GRAPHTRACE_PKT_TYPE_SOFTWARE, node, NULL, objs, nb_objs, port_id); + _dp_graphtrace_send(node, NULL, objs, nb_objs, port_id); _dp_graphtrace_log_tx_burst(node, objs, nb_objs, port_id); } static __rte_always_inline void dp_graphtrace_drop_burst(const struct rte_node *node, void **objs, uint16_t nb_objs) { if (_dp_graphtrace_enabled && (_dp_graphtrace_flags & DP_GRAPHTRACE_FLAG_DROPS)) - _dp_graphtrace_send(DP_GRAPHTRACE_PKT_TYPE_SOFTWARE, node, NULL, objs, nb_objs, -1); + _dp_graphtrace_send(node, NULL, objs, nb_objs, -1); _dp_graphtrace_log_drop_burst(node, objs, nb_objs); } -static __rte_always_inline void dp_graphtrace_capture_offload_pkt(void *obj) -{ - if (_dp_graphtrace_hw_enabled) - _dp_graphtrace_send(DP_GRAPHTRACE_PKT_TYPE_OFFLOAD, NULL, NULL, &obj, 1, -1); -} - -static __rte_always_inline bool dp_is_graphtrace_hw_enabled(void) -{ - return _dp_graphtrace_enabled; -} #ifdef __cplusplus } diff --git a/include/monitoring/dp_graphtrace_shared.h b/include/monitoring/dp_graphtrace_shared.h index 13ef43686..6f8d189ac 100644 --- a/include/monitoring/dp_graphtrace_shared.h +++ b/include/monitoring/dp_graphtrace_shared.h @@ -30,11 +30,6 @@ enum dp_graphtrace_action { DP_GRAPHTRACE_ACTION_STOP, }; -enum dp_graphtrace_pkt_type { - DP_GRAPHTRACE_PKT_TYPE_SOFTWARE, - DP_GRAPHTRACE_PKT_TYPE_OFFLOAD, -}; - struct dp_graphtrace { struct rte_mempool *mempool; struct rte_ring *ringbuf; @@ -47,7 +42,6 @@ struct dp_graphtrace_params { }; struct dp_graphtrace_pktinfo { - enum dp_graphtrace_pkt_type pkt_type; uint32_t pktid; const struct rte_node *node; const struct rte_node *next_node; @@ -57,7 +51,6 @@ struct dp_graphtrace_pktinfo { struct dp_graphtrace_params_start { bool drops; bool nodes; - bool hw; }; struct dp_graphtrace_mp_request { diff --git a/include/monitoring/dp_monitoring.h b/include/monitoring/dp_monitoring.h index ae477d43d..ac17831d6 100644 --- a/include/monitoring/dp_monitoring.h +++ b/include/monitoring/dp_monitoring.h @@ -8,12 +8,11 @@ extern "C" { #endif +#define DP_CAPTURE_MAX_PORT_NUM 16 + enum dp_event_type { - DP_EVENT_TYPE_UNKNOWN, DP_EVENT_TYPE_LINK_STATUS, DP_EVENT_TYPE_FLOW_AGING, - DP_EVENT_TYPE_HARDWARE_CAPTURE_START, - DP_EVENT_TYPE_HARDWARE_CAPTURE_STOP, }; struct dp_event_msg_head { @@ -32,8 +31,22 @@ struct dp_event_msg { } event_entry; }; +struct dp_capture_hdr_config { + uint8_t capture_node_ipv6_addr[16]; + uint32_t capture_udp_src_port; + uint32_t capture_udp_dst_port; +}; + void dp_process_event_msg(struct rte_mbuf *m); + +void dp_set_capture_hdr_config(uint8_t *addr, uint32_t udp_src_port, uint32_t udp_dst_port); +const struct dp_capture_hdr_config *dp_get_capture_hdr_config(void); + +void dp_set_capture_enabled(bool enabled); + +bool dp_is_capture_enabled(void); + #ifdef __cplusplus } #endif diff --git a/include/rte_flow/dp_rte_flow.h b/include/rte_flow/dp_rte_flow.h index c455ab5ac..4f77a3ae9 100644 --- a/include/rte_flow/dp_rte_flow.h +++ b/include/rte_flow/dp_rte_flow.h @@ -33,7 +33,7 @@ extern "C" enum { DP_RTE_FLOW_DEFAULT_GROUP, - DP_RTE_FLOW_MONITORING_GROUP, + DP_RTE_FLOW_CAPTURE_GROUP, DP_RTE_FLOW_VNET_GROUP, }; diff --git a/include/rte_flow/dp_rte_flow_capture.h b/include/rte_flow/dp_rte_flow_capture.h new file mode 100644 index 000000000..8dde6c10b --- /dev/null +++ b/include/rte_flow/dp_rte_flow_capture.h @@ -0,0 +1,32 @@ +#ifndef __INCLUDE_DP_RTE_FLOW_CAPTURE_H__ +#define __INCLUDE_DP_RTE_FLOW_CAPTURE_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include "dp_port.h" + + +int dp_install_jump_rule_in_default_group(uint16_t port_id, uint32_t dst_group); + +int dp_enable_port_offload_pkt_capture(uint16_t port_id); +int dp_disable_port_offload_pkt_capture(uint16_t port_id); + +int dp_disable_pkt_capture_on_all_ifaces(void); + +int dp_destroy_default_flow(struct dp_port *port); + +void dp_configure_pkt_capture_action(uint8_t *encaped_mirror_hdr, + struct rte_flow_action_raw_encap *encap_action, + struct rte_flow_action_port_id *port_id_action, + struct rte_flow_action *sub_action); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/rte_flow/dp_rte_flow_helpers.h b/include/rte_flow/dp_rte_flow_helpers.h index 3f295a916..c8ac1b5b5 100644 --- a/include/rte_flow/dp_rte_flow_helpers.h +++ b/include/rte_flow/dp_rte_flow_helpers.h @@ -67,6 +67,9 @@ static const struct rte_flow_item_ipv4 dp_flow_item_ipv4_src_dst_mask = { static const struct rte_flow_item_udp dp_flow_item_udp_src_mask = { .hdr.src_port = 0xffff, }; +static const struct rte_flow_item_udp dp_flow_item_udp_dst_mask = { + .hdr.dst_port = 0xffff, +}; static const struct rte_flow_item_udp dp_flow_item_udp_src_dst_mask = { .hdr.src_port = 0xffff, .hdr.dst_port = 0xffff, @@ -226,6 +229,18 @@ void dp_set_udp_src_flow_item(struct rte_flow_item *item, item->last = NULL; } +static __rte_always_inline +void dp_set_udp_dst_flow_item(struct rte_flow_item *item, + struct rte_flow_item_udp *udp_spec, + rte_be16_t dst_port) +{ + udp_spec->hdr.dst_port = dst_port; + item->type = RTE_FLOW_ITEM_TYPE_UDP; + item->spec = udp_spec; + item->mask = &dp_flow_item_udp_dst_mask; + item->last = NULL; +} + static __rte_always_inline void dp_set_udp_src_dst_flow_item(struct rte_flow_item *item, struct rte_flow_item_udp *udp_spec, diff --git a/include/rte_flow/dp_rte_flow_init.h b/include/rte_flow/dp_rte_flow_init.h index 0b56e2225..dad2f7255 100644 --- a/include/rte_flow/dp_rte_flow_init.h +++ b/include/rte_flow/dp_rte_flow_init.h @@ -6,16 +6,12 @@ extern "C" { #endif #include +#include +#include +#include "dp_port.h" int dp_install_isolated_mode_ipip(int port_id, uint8_t proto_id); -int dp_install_jump_rule_in_default_group(uint16_t port_id, uint32_t group_id); -int dp_install_default_rule_in_monitoring_group(uint16_t port_id); -int dp_install_default_capture_rule_in_vnet_group(uint16_t port_id); - -int dp_turn_on_vf_offload_tracing(void); -int dp_turn_off_vf_offload_tracing(void); - #ifdef ENABLE_VIRTSVC int dp_install_isolated_mode_virtsvc(int port_id, uint8_t proto_id, const uint8_t svc_ipv6[16], uint16_t svc_port); #endif diff --git a/proto/dpdk.proto b/proto/dpdk.proto index 30fa3ac16..61b27a75a 100644 --- a/proto/dpdk.proto +++ b/proto/dpdk.proto @@ -39,6 +39,10 @@ enum VniType { VNI_BOTH = 2; } +enum CaptureInterfaceType { + SINGLE_PF = 0; + SINGLE_VF = 1; +} //// STRUCTURES message Empty { @@ -526,6 +530,46 @@ message DeleteFirewallRuleResponse { Status status = 1; } +message CapturedInterface { + CaptureInterfaceType interface_type = 1; + bytes filter = 2; + oneof spec { + bytes vf_name = 3; + uint32 pf_index = 4; + } +} + +message CaptureConfig { + IpAddress sink_node_ip = 1; + uint32 udp_src_port = 2; + uint32 udp_dst_port = 3; + repeated CapturedInterface interfaces = 4; +} + +message CaptureStartRequest { + CaptureConfig capture_config = 1; +} + +message CaptureStartResponse { + Status status = 1; +} + +message CaptureStopRequest { +} + +message CaptureStopResponse { + Status status = 1; + uint32 stopped_interface_cnt = 2; +} + +message CaptureStatusRequest { +} + +message CaptureStatusResponse { + Status status = 1; + bool is_active = 2; + CaptureConfig capture_config = 3; +} service DPDKonmetal { //// INITIALIZATION @@ -614,4 +658,9 @@ service DPDKonmetal { rpc CreateFirewallRule(CreateFirewallRuleRequest) returns (CreateFirewallRuleResponse) {} rpc GetFirewallRule(GetFirewallRuleRequest) returns (GetFirewallRuleResponse) {} rpc DeleteFirewallRule(DeleteFirewallRuleRequest) returns (DeleteFirewallRuleResponse) {} + + //// PACKET CAPTURE + rpc CaptureStart(CaptureStartRequest) returns (CaptureStartResponse) {} + rpc CaptureStop(CaptureStopRequest) returns (CaptureStopResponse) {} + rpc CaptureStatus(CaptureStatusRequest) returns (CaptureStatusResponse) {} } diff --git a/src/dp_cntrack.c b/src/dp_cntrack.c index 93e2e8e9d..3c5ec2a4b 100644 --- a/src/dp_cntrack.c +++ b/src/dp_cntrack.c @@ -75,17 +75,6 @@ static __rte_always_inline void dp_cntrack_tcp_state(struct flow_value *flow_val } -static __rte_always_inline bool dp_capture_offloaded_pkts(struct rte_mbuf *m, struct flow_value *flow_val, struct dp_flow *df) -{ - if (!offload_mode_enabled || - flow_val->offload_flags.orig == DP_FLOW_NON_OFFLOAD || flow_val->offload_flags.reply == DP_FLOW_NON_OFFLOAD) - return false; - - dp_graphtrace_capture_offload_pkt(m); - df->flags.offload_mark = DP_PKT_OFFLOAD_MARK; - return true; - -} static __rte_always_inline void dp_cntrack_init_flow_offload_flags(struct flow_value *flow_val, struct dp_flow *df) { @@ -265,8 +254,6 @@ static __rte_always_inline int dp_get_flow_val(struct rte_mbuf *m, struct dp_flo ) { // flow is the same as it was for the previous packet *p_flow_val = cached_flow_val; - if (dp_capture_offloaded_pkts(m, *p_flow_val, df)) - return DP_IS_CAPTURED_HW_PKT; dp_set_pkt_flow_direction(curr_key, cached_flow_val, df); dp_set_flow_offload_flag(m, cached_flow_val, df); return DP_OK; @@ -289,9 +276,6 @@ static __rte_always_inline int dp_get_flow_val(struct rte_mbuf *m, struct dp_flo return DP_OK; } - if (dp_capture_offloaded_pkts(m, *p_flow_val, df)) - return DP_IS_CAPTURED_HW_PKT; - // already established flow found dp_set_pkt_flow_direction(curr_key, *p_flow_val, df); dp_set_flow_offload_flag(m, *p_flow_val, df); @@ -306,8 +290,8 @@ int dp_cntrack_handle(struct rte_mbuf *m, struct dp_flow *df) int ret; ret = dp_get_flow_val(m, df, &flow_val); - if (DP_FAILED(ret) || ret == DP_IS_CAPTURED_HW_PKT) - return ret; // it is not really an error when ret == DP_IS_CAPTURED_HW_PKT, but we need to stop processing this pkt + if (DP_FAILED(ret)) + return ret; flow_val->timestamp = rte_rdtsc(); diff --git a/src/dp_port.c b/src/dp_port.c index 46281f3b1..90ce313d4 100644 --- a/src/dp_port.c +++ b/src/dp_port.c @@ -13,6 +13,7 @@ #include "nodes/rx_node.h" #include "rte_flow/dp_rte_flow_init.h" #include "rte_flow/dp_rte_flow.h" +#include "rte_flow/dp_rte_flow_capture.h" #include "monitoring/dp_graphtrace.h" static const struct rte_eth_conf port_conf_default = { @@ -180,6 +181,7 @@ static int dp_port_init_ethdev(uint16_t port_id, struct rte_eth_dev_info *dev_in nr_hairpin_queues = port_type == DP_PORT_VF ? DP_NR_VF_HAIRPIN_RX_TX_QUEUES : (DP_NR_PF_HAIRPIN_RX_TX_QUEUES + DP_NR_VF_HAIRPIN_RX_TX_QUEUES * dp_layer->num_of_vfs); + ret = rte_eth_dev_configure(port_id, DP_NR_STD_RX_QUEUES + nr_hairpin_queues, DP_NR_STD_TX_QUEUES + nr_hairpin_queues, @@ -454,58 +456,16 @@ static int dp_port_bind_port_hairpins(struct dp_port *port) return DP_OK; } -static int dp_vf_init_monitoring_rule_rollback(uint32_t port_id) -{ - struct dp_port *port = dp_port_get_vf(port_id); - struct rte_flow_error error; - - if (DP_FAILED(rte_flow_destroy(port->port_id, port->default_flow, &error))) { - DPS_LOG_ERR("Failed to destroy default flow while rollback from vf init monitoring rule installation", \ - DP_LOG_PORTID(port->port_id), DP_LOG_FLOW_ERROR(error.message)); - return DP_ERROR; - } - - if (DP_FAILED(dp_install_jump_rule_in_default_group(port->port_id, DP_RTE_FLOW_VNET_GROUP))) { - DPS_LOG_ERR("Failed to install default jump flow rule while rollback from vf init monitoring rule installation", \ - DP_LOG_PORTID(port->port_id)); - return DP_ERROR; - } - - return DP_OK; -} - static int dp_install_vf_init_rte_rules(uint32_t port_id) { int ret; - // at least one rule must be there, otherwise new packets cannot be delivered to software path - // same as the isolation rule on pf - if (dp_is_graphtrace_hw_enabled()) - ret = dp_install_jump_rule_in_default_group(port_id, DP_RTE_FLOW_MONITORING_GROUP); - else - ret = dp_install_jump_rule_in_default_group(port_id, DP_RTE_FLOW_VNET_GROUP); - + ret = dp_install_jump_rule_in_default_group(port_id, DP_RTE_FLOW_VNET_GROUP); if (DP_FAILED(ret)) { DPS_LOG_ERR("Cannot install default jump rule", DP_LOG_PORTID(port_id), DP_LOG_RET(ret)); return DP_ERROR; } - // this rule must be there, otherwise new packets cannot be delivered to software path, making communication failure - ret = dp_install_default_capture_rule_in_vnet_group(port_id); - if (DP_FAILED(ret)) { - DPS_LOG_ERR("Cannot install default capture rule in vnet group", DP_LOG_PORTID(port_id), DP_LOG_RET(ret)); - return DP_ERROR; - } - - ret = dp_install_default_rule_in_monitoring_group(port_id); - if (DP_FAILED(ret)) { - DPS_LOG_WARNING("Cannot install default rule in monitoring group", DP_LOG_PORTID(port_id), DP_LOG_RET(ret)); - if (DP_FAILED(dp_vf_init_monitoring_rule_rollback(port_id))) { - DPS_LOG_ERR("Cannot rollback from the monitoring rule installation on vf", DP_LOG_PORTID(port_id)); - return DP_ERROR; - } - } - return DP_OK; } @@ -560,6 +520,7 @@ int dp_port_start(uint16_t port_id) port->link_status = RTE_ETH_LINK_UP; port->allocated = true; + return DP_OK; } @@ -571,6 +532,9 @@ int dp_port_stop(uint16_t port_id) if (!port) return DP_ERROR; + if (DP_FAILED(dp_destroy_default_flow(port))) + return DP_ERROR; + if (DP_FAILED(dp_stop_eth_port(port_id))) return DP_ERROR; diff --git a/src/grpc/dp_async_grpc.cpp b/src/grpc/dp_async_grpc.cpp index 5f424176c..76843b623 100644 --- a/src/grpc/dp_async_grpc.cpp +++ b/src/grpc/dp_async_grpc.cpp @@ -541,6 +541,112 @@ void CreateNatCall::ParseReply(struct dpgrpc_reply* reply) reply_.set_underlay_route(strbuf); } +const char* CaptureStopCall::FillRequest(__rte_unused struct dpgrpc_request* request) +{ + DPGRPC_LOG_INFO("Stopping packet capture"); + + return NULL; +} +void CaptureStopCall::ParseReply(struct dpgrpc_reply* reply) +{ + reply_.set_stopped_interface_cnt((uint32_t)reply->capture_stop.port_cnt); +} + +const char* CaptureStatusCall::FillRequest(__rte_unused struct dpgrpc_request* request) +{ + DPGRPC_LOG_INFO("Getting packet capturing operation's status"); + + return NULL; +} +void CaptureStatusCall::ParseReply(struct dpgrpc_reply* reply) +{ + const struct dpgrpc_capture &capture_get = reply->capture_get; + CaptureConfig *capture_config = new CaptureConfig(); + CapturedInterface *grpc_iface; + char strbuf[INET6_ADDRSTRLEN]; + IpAddress *sink_ip; + + if (!capture_get.is_active) { + reply_.set_is_active(false); + } else { + reply_.set_is_active(true); + capture_config->set_udp_src_port(capture_get.udp_src_port); + capture_config->set_udp_dst_port(capture_get.udp_dst_port); + + sink_ip = new IpAddress(); + inet_ntop(AF_INET6, capture_get.dst_addr6, strbuf, sizeof(strbuf)); + sink_ip->set_address(strbuf); + sink_ip->set_ipver(IpVersion::IPV6); + capture_config->set_allocated_sink_node_ip(sink_ip); + + for (int i = 0; i < capture_get.interface_count; ++i) { + grpc_iface = capture_config->add_interfaces(); + switch (capture_get.interfaces[i].type) { + case DP_CAPTURE_IFACE_TYPE_SINGLE_PF: + grpc_iface->set_interface_type(CaptureInterfaceType::SINGLE_PF); + grpc_iface->set_pf_index(capture_get.interfaces[i].spec.pf_index); + break; + case DP_CAPTURE_IFACE_TYPE_SINGLE_VF: + grpc_iface->set_interface_type(CaptureInterfaceType::SINGLE_VF); + grpc_iface->set_vf_name(capture_get.interfaces[i].spec.iface_id); + break; + } + } + reply_.set_allocated_capture_config(capture_config); + } +} + +const char* CaptureStartCall::FillRequest(struct dpgrpc_request* request) +{ + + DPGRPC_LOG_INFO("Starting packet capture", + DP_LOG_IPV6STR(request_.capture_config().sink_node_ip().address().c_str()), + DP_LOG_PORT(request_.capture_config().udp_src_port()), + DP_LOG_PORT(request_.capture_config().udp_dst_port())); + + if (request_.capture_config().udp_src_port() > UINT16_MAX) + return "Invalid udp_src_port"; + if (request_.capture_config().udp_dst_port() > UINT16_MAX) + return "Invalid udp_dst_port"; + if (!GrpcConv::StrToIpv6(request_.capture_config().sink_node_ip().address(), request->capture_start.dst_addr6)) + return "Invalid sink_node_ip"; + + request->capture_start.udp_src_port = request_.capture_config().udp_src_port(); + request->capture_start.udp_dst_port = request_.capture_config().udp_dst_port(); + + if (request_.capture_config().interfaces_size() > DP_CAPTURE_MAX_PORT_NUM) + return "Too many interfaces to be captured"; + + request->capture_start.interface_count = 0; + for (int i = 0; i < request_.capture_config().interfaces_size(); ++i) { + if (!GrpcConv::GrpcToDpCaptureInterfaceType(request_.capture_config().interfaces(i).interface_type(), &request->capture_start.interfaces[i].type)) { + return "Invalid interfaces.interface_type"; + } + + switch (request->capture_start.interfaces[i].type) { + case DP_CAPTURE_IFACE_TYPE_SINGLE_VF: + DPGRPC_LOG_INFO("Set packet capture interface vf", + DP_LOG_PORT_TYPE(request_.capture_config().interfaces(i).interface_type()), + DP_LOG_IFACE(request_.capture_config().interfaces(i).vf_name().c_str())); + if (SNPRINTF_FAILED(request->capture_start.interfaces[i].spec.iface_id, request_.capture_config().interfaces(i).vf_name())) + return "Invalid interface_id"; + break; + case DP_CAPTURE_IFACE_TYPE_SINGLE_PF: + DPGRPC_LOG_INFO("Set packet capture interface pf", + DP_LOG_PORT_TYPE(request_.capture_config().interfaces(i).interface_type()), + DP_LOG_IFACE_INDEX(request_.capture_config().interfaces(i).pf_index())); + request->capture_start.interfaces[i].spec.pf_index = request_.capture_config().interfaces(i).pf_index(); + break; + } + + request->capture_start.interface_count++; + } + return NULL; +} +void CaptureStartCall::ParseReply(__rte_unused struct dpgrpc_reply* reply) +{ +} + const char* GetNatCall::FillRequest(struct dpgrpc_request* request) { DPGRPC_LOG_INFO("Getting NAT IP", diff --git a/src/grpc/dp_grpc_conv.cpp b/src/grpc/dp_grpc_conv.cpp index c2f4713c1..da89f52b8 100644 --- a/src/grpc/dp_grpc_conv.cpp +++ b/src/grpc/dp_grpc_conv.cpp @@ -110,6 +110,34 @@ bool GrpcToDpFwallPort(int32_t grpc_port, uint32_t *dp_port) return true; } +bool GrpcToDpCaptureInterfaceType(const CaptureInterfaceType& grpc_type, enum dpgrpc_capture_iface_type *dp_capture_iface_type) +{ + switch (grpc_type) { + case CaptureInterfaceType::SINGLE_PF: + *dp_capture_iface_type = DP_CAPTURE_IFACE_TYPE_SINGLE_PF; + return true; + case CaptureInterfaceType::SINGLE_VF: + *dp_capture_iface_type = DP_CAPTURE_IFACE_TYPE_SINGLE_VF; + return true; + default: + return false; + } +} + +bool DpCaptureInterfaceTypeToGrpc(CaptureInterfaceType& grpc_type, enum dpgrpc_capture_iface_type dp_capture_iface_type) +{ + switch (dp_capture_iface_type) { + case DP_CAPTURE_IFACE_TYPE_SINGLE_PF: + grpc_type = CaptureInterfaceType::SINGLE_PF; + return true; + case DP_CAPTURE_IFACE_TYPE_SINGLE_VF: + grpc_type = CaptureInterfaceType::SINGLE_VF; + return true; + default: + return false; + } +} + const char *Ipv4ToStr(uint32_t ipv4) { struct in_addr addr = { diff --git a/src/grpc/dp_grpc_impl.c b/src/grpc/dp_grpc_impl.c index 6f75a43e8..e6de0c96f 100644 --- a/src/grpc/dp_grpc_impl.c +++ b/src/grpc/dp_grpc_impl.c @@ -16,6 +16,8 @@ #include "dpdk_layer.h" #include "grpc/dp_grpc_api.h" #include "grpc/dp_grpc_responder.h" +#include "monitoring/dp_monitoring.h" +#include "rte_flow/dp_rte_flow_capture.h" static uint32_t pfx_counter = 0; @@ -882,6 +884,110 @@ static int dp_process_get_version(struct dp_grpc_responder *responder) return DP_GRPC_OK; } +static int dp_process_capture_start(struct dp_grpc_responder *responder) +{ + struct dpgrpc_capture *request = &responder->request.capture_start; + int port_id = -1; + int status = DP_GRPC_OK; + + if (!dp_conf_is_offload_enabled()) + return DP_GRPC_ERR_NOT_ACTIVE; + + if (dp_is_capture_enabled()) + return DP_GRPC_ERR_ALREADY_ACTIVE; + + dp_set_capture_hdr_config(request->dst_addr6, request->udp_src_port, request->udp_dst_port); + + for (int i = 0; i < request->interface_count; ++i) { + switch (request->interfaces[i].type) { + case DP_CAPTURE_IFACE_TYPE_SINGLE_VF: + port_id = dp_get_portid_with_vm_handle(request->interfaces[i].spec.iface_id); + break; + case DP_CAPTURE_IFACE_TYPE_SINGLE_PF: + if (request->interfaces[i].spec.pf_index >= DP_MAX_PF_PORTS) + return DP_GRPC_ERR_NOT_FOUND; + port_id = request->interfaces[i].spec.pf_index == 0 ? dp_port_get_pf0_id() : dp_port_get_pf1_id(); + break; + } + + if (DP_FAILED(port_id)) { + DPS_LOG_WARNING("Got invalid port id when initializing capturing", DP_LOG_PORTID(port_id)); + status = DP_GRPC_ERR_NOT_FOUND; + break; + } + + status = dp_enable_port_offload_pkt_capture(port_id); + if (DP_FAILED(status)) // stop continuing to turn on offload capture on other interfaces, if capturing init failed on any port. abort and rollback. + break; + } + + // try to turn off capture on all interfaces if any of them failed to turn on + if (DP_FAILED(status)) { + if (DP_FAILED(dp_disable_pkt_capture_on_all_ifaces())) + status = DP_GRPC_ERR_ROLLBACK; + } else + dp_set_capture_enabled(true); + + return status; +} + +static int dp_process_capture_stop(struct dp_grpc_responder *responder) +{ + struct dpgrpc_capture_stop *reply = dp_grpc_single_reply(responder); + int ret; + + if (!dp_is_capture_enabled()) + return DP_GRPC_ERR_NOT_ACTIVE; + + ret = dp_disable_pkt_capture_on_all_ifaces(); + if (DP_FAILED(ret)) { + DPS_LOG_ERR("Failed to stop packet capture on all interfaces"); // it is problematic that we cannot rollback here + return ret; + } + + reply->port_cnt = ret; + dp_set_capture_enabled(false); + return DP_GRPC_OK; +} + +static int dp_process_capture_get_status(struct dp_grpc_responder *responder) +{ + struct dpgrpc_capture *reply = dp_grpc_single_reply(responder); + struct dp_ports *ports = get_dp_ports(); + const struct dp_capture_hdr_config *capture_hdr_config = dp_get_capture_hdr_config(); + int count = 0; + + reply->is_active = dp_is_capture_enabled(); + + if (reply->is_active) { + DP_FOREACH_PORT(ports, port) { + if (port->allocated && port->captured) { + if (port->port_type == DP_PORT_PF) { + reply->interfaces[count].type = DP_CAPTURE_IFACE_TYPE_SINGLE_PF; + reply->interfaces[count].spec.pf_index = port->port_id == dp_port_get_pf0_id() ? 0 : 1; + } else { + reply->interfaces[count].type = DP_CAPTURE_IFACE_TYPE_SINGLE_VF; + memcpy(reply->interfaces[count].spec.iface_id, dp_get_vm_machineid(port->port_id), sizeof(port->port_name)); + } + count++; + } + // it shouldnot never happen, but just in case + if (count >= DP_CAPTURE_MAX_PORT_NUM) { + DPS_LOG_ERR("Unexpected too many interfaces are captured"); + return DP_GRPC_ERR_LIMIT_REACHED; + } + } + } + + memcpy(reply->dst_addr6, capture_hdr_config->capture_node_ipv6_addr, sizeof(reply->dst_addr6)); + reply->udp_src_port = capture_hdr_config->capture_udp_src_port; + reply->udp_dst_port = capture_hdr_config->capture_udp_dst_port; + reply->interface_count = count; + + return DP_GRPC_OK; +} + + void dp_process_request(struct rte_mbuf *m) { struct dp_grpc_responder responder; @@ -1002,6 +1108,15 @@ void dp_process_request(struct rte_mbuf *m) case DP_REQ_TYPE_ResetVni: ret = dp_process_reset_vni(&responder); break; + case DP_REQ_TYPE_CaptureStart: + ret = dp_process_capture_start(&responder); + break; + case DP_REQ_TYPE_CaptureStop: + ret = dp_process_capture_stop(&responder); + break; + case DP_REQ_TYPE_CaptureStatus: + ret = dp_process_capture_get_status(&responder); + break; // DP_REQ_TYPE_CheckInitialized is handled by the gRPC thread default: ret = DP_GRPC_ERR_BAD_REQUEST; @@ -1009,7 +1124,7 @@ void dp_process_request(struct rte_mbuf *m) } if (DP_FAILED(ret)) { - // as gRPC errors are explicitely defined due to API reasons + // as gRPC errors are explicitly defined due to API reasons // extract the proper value from the standard (negative) retvals ret = dp_errcode_to_grpc_errcode(ret); DPGRPC_LOG_WARNING("Failed request", DP_LOG_GRPCREQUEST(responder.request.type), DP_LOG_GRPCRET(ret)); diff --git a/src/grpc/dp_grpc_service.cpp b/src/grpc/dp_grpc_service.cpp index a3cda6bfa..bba37a624 100644 --- a/src/grpc/dp_grpc_service.cpp +++ b/src/grpc/dp_grpc_service.cpp @@ -115,6 +115,9 @@ void GRPCService::HandleRpcs() new CheckVniInUseCall(); new ResetVniCall(); new GetVersionCall(); + new CaptureStartCall(); + new CaptureStopCall(); + new CaptureStatusCall(); while (cq_->Next(&tag, &ok) && ok) { call = static_cast(tag); diff --git a/src/meson.build b/src/meson.build index 160142205..a48dee806 100644 --- a/src/meson.build +++ b/src/meson.build @@ -34,6 +34,7 @@ dp_sources = [ 'rte_flow/dp_rte_flow.c', 'rte_flow/dp_rte_flow_init.c', 'rte_flow/dp_rte_flow_traffic_forward.c', + 'rte_flow/dp_rte_flow_capture.c', 'dp_argparse.c', 'dp_conf.c', 'dp_error.c', diff --git a/src/monitoring/dp_event.c b/src/monitoring/dp_event.c index a1c6f9aa2..5f2a6de84 100644 --- a/src/monitoring/dp_event.c +++ b/src/monitoring/dp_event.c @@ -81,18 +81,6 @@ void dp_process_event_link_msg(struct rte_mbuf *m) DPS_LOG_WARNING("Cannot set link status", DP_LOG_PORTID(port_id), DP_LOG_VALUE(status)); } -void dp_process_event_hardware_capture_start_msg(__rte_unused struct rte_mbuf *m) -{ - if (DP_FAILED(dp_turn_on_vf_offload_tracing())) - DPS_LOG_WARNING("Cannot turn on offload tracing"); -} - -void dp_process_event_hardware_capture_stop_msg(__rte_unused struct rte_mbuf *m) -{ - if (DP_FAILED(dp_turn_off_vf_offload_tracing())) - DPS_LOG_WARNING("Cannot turn off offload tracing"); -} - // Flow-aging message - sent periodically to age-out conntracked flows int dp_send_event_flow_aging_msg(void) @@ -105,27 +93,6 @@ int dp_send_event_flow_aging_msg(void) return dp_send_event_msg(&flow_aging_msg); } -int dp_send_event_hardware_capture_start_msg(void) -{ - struct dp_event_msg graphtrace_start_msg = { - .msg_head = { - .type = DP_EVENT_TYPE_HARDWARE_CAPTURE_START, - }, - }; - return dp_send_event_msg(&graphtrace_start_msg); -} - -int dp_send_event_hardware_capture_stop_msg(void) -{ - struct dp_event_msg graphtrace_start_msg = { - .msg_head = { - .type = DP_EVENT_TYPE_HARDWARE_CAPTURE_STOP, - }, - }; - return dp_send_event_msg(&graphtrace_start_msg); -} - - void dp_process_event_flow_aging_msg(__rte_unused struct rte_mbuf *m) { if (dp_conf_is_offload_enabled()) { diff --git a/src/monitoring/dp_graphtrace.c b/src/monitoring/dp_graphtrace.c index 7f0354848..a12e92818 100644 --- a/src/monitoring/dp_graphtrace.c +++ b/src/monitoring/dp_graphtrace.c @@ -22,10 +22,8 @@ static enum dp_graphtrace_loglevel graphtrace_loglevel; int _dp_graphtrace_flags; bool _dp_graphtrace_enabled = false; -bool _dp_graphtrace_hw_enabled = false; static struct dp_graphtrace graphtrace; -static bool offload_enabled; static bool nodename_filtered; static regex_t nodename_re; static bool bpf_filtered; @@ -62,8 +60,6 @@ static int dp_graphtrace_init_memory(void) return DP_ERROR; } - offload_enabled = dp_conf_is_offload_enabled(); - return DP_OK; } @@ -80,7 +76,6 @@ static void dp_graphtrace_free_memory(void) static int dp_handle_graphtrace_start(const struct dp_graphtrace_mp_request *request) { struct dp_graphtrace_params *filters = (struct dp_graphtrace_params *)graphtrace.filters->addr; - int ret; // there are additional parameters in shared memory (cannot fit into the request) if (!DP_IS_NUL_TERMINATED(filters->node_regex) @@ -102,30 +97,6 @@ static int dp_handle_graphtrace_start(const struct dp_graphtrace_mp_request *req } } - // not making the error code better since 'start.hw' branch will be removed anyway - if (request->params.start.hw) { - if (!offload_enabled) { - if (nodename_filtered) - regfree(&nodename_re); - if (bpf_filtered) - dp_free_bpf(&bpf); - return -EPERM; - } - - ret = dp_send_event_hardware_capture_start_msg(); - if (DP_FAILED(ret)) { - DPS_LOG_ERR("Cannot send hardware capture start message"); - if (nodename_filtered) - regfree(&nodename_re); - if (bpf_filtered) - dp_free_bpf(&bpf); - return ret; - } - - _dp_graphtrace_hw_enabled = true; - DPS_LOG_INFO("Offloaded packet tracing enabled"); - } - _dp_graphtrace_flags = 0; if (request->params.start.drops) _dp_graphtrace_flags |= DP_GRAPHTRACE_FLAG_DROPS; @@ -147,14 +118,7 @@ static int dp_handle_graphtrace_stop(void) dp_free_bpf(&bpf); DPS_LOG_INFO("Graphtrace disabled"); } - if (_dp_graphtrace_hw_enabled) { - if (DP_FAILED(dp_send_event_hardware_capture_stop_msg())) { - DPS_LOG_ERR("Cannot send hardware capture stop message"); - return DP_ERROR; - } - _dp_graphtrace_hw_enabled = false; - DPS_LOG_INFO("Offloaded packet tracing disabled"); - } + return DP_OK; } @@ -244,8 +208,7 @@ bool dp_is_node_match(regex_t *re, const struct rte_node *node, const struct rte return false; } -void _dp_graphtrace_send(enum dp_graphtrace_pkt_type type, - const struct rte_node *node, +void _dp_graphtrace_send(const struct rte_node *node, const struct rte_node *next_node, void **objs, uint16_t nb_objs, uint16_t dst_port_id) @@ -267,10 +230,10 @@ void _dp_graphtrace_send(enum dp_graphtrace_pkt_type type, // this prevent unnecessary copying and immediate freeing after enqueue() fails break; } + dups[nb_dups++] = dup; pktinfo = dp_get_graphtrace_pktinfo(dup); pktinfo->pktid = dp_get_pkt_mark(objs[i])->id; - pktinfo->pkt_type = type; pktinfo->node = node; pktinfo->next_node = next_node; pktinfo->dst_port_id = dst_port_id; diff --git a/src/monitoring/dp_monitoring.c b/src/monitoring/dp_monitoring.c index 30d0a0e13..01cdcb186 100644 --- a/src/monitoring/dp_monitoring.c +++ b/src/monitoring/dp_monitoring.c @@ -3,6 +3,10 @@ #include "monitoring/dp_event.h" +static struct dp_capture_hdr_config capture_hdr_config = {0}; +static bool capture_enabled = false; + + void dp_process_event_msg(struct rte_mbuf *m) { struct dp_event_msg *event_msg = rte_pktmbuf_mtod(m, struct dp_event_msg *); @@ -14,15 +18,29 @@ void dp_process_event_msg(struct rte_mbuf *m) case DP_EVENT_TYPE_FLOW_AGING: dp_process_event_flow_aging_msg(m); break; - case DP_EVENT_TYPE_HARDWARE_CAPTURE_START: - dp_process_event_hardware_capture_start_msg(m); - break; - case DP_EVENT_TYPE_HARDWARE_CAPTURE_STOP: - dp_process_event_hardware_capture_stop_msg(m); - break; - default: - DPS_LOG_WARNING("Unknown monitoring status message type", DP_LOG_VALUE(event_msg->msg_head.type)); } rte_pktmbuf_free(m); } + +void dp_set_capture_hdr_config(uint8_t *addr, uint32_t udp_src_port, uint32_t udp_dst_port) +{ + rte_memcpy(capture_hdr_config.capture_node_ipv6_addr, addr, sizeof(capture_hdr_config.capture_node_ipv6_addr)); + capture_hdr_config.capture_udp_src_port = udp_src_port; + capture_hdr_config.capture_udp_dst_port = udp_dst_port; +} + +const struct dp_capture_hdr_config *dp_get_capture_hdr_config(void) +{ + return &capture_hdr_config; +} + +void dp_set_capture_enabled(bool enabled) +{ + capture_enabled = enabled; +} + +bool dp_is_capture_enabled(void) +{ + return capture_enabled; +} diff --git a/src/nodes/conntrack_node.c b/src/nodes/conntrack_node.c index fe6929f93..7a7822817 100644 --- a/src/nodes/conntrack_node.c +++ b/src/nodes/conntrack_node.c @@ -51,7 +51,6 @@ static __rte_always_inline rte_edge_t get_next_index(__rte_unused struct rte_nod { struct dp_flow *df = dp_get_flow_ptr(m); struct rte_ipv4_hdr *ipv4_hdr = dp_get_ipv4_hdr(m); - int ret; dp_extract_ipv4_header(df, ipv4_hdr); @@ -68,8 +67,7 @@ static __rte_always_inline rte_edge_t get_next_index(__rte_unused struct rte_nod || df->l4_type == IPPROTO_UDP || df->l4_type == IPPROTO_ICMP ) { - ret = dp_cntrack_handle(m, df); - if (DP_FAILED(ret) || (ret == DP_IS_CAPTURED_HW_PKT)) + if (DP_FAILED(dp_cntrack_handle(m, df))) return CONNTRACK_NEXT_DROP; } else { return CONNTRACK_NEXT_DROP; diff --git a/src/nodes/rx_node.c b/src/nodes/rx_node.c index 2b59306d8..041ee0f1f 100644 --- a/src/nodes/rx_node.c +++ b/src/nodes/rx_node.c @@ -78,7 +78,6 @@ static int rx_node_init(const struct rte_graph *graph, struct rte_node *node) // save pointer to this node's context for enabling/disabling node_contexts[port_id] = ctx; - ctx->port_id = port_id; ctx->queue_id = graph->id; ctx->enabled = false; diff --git a/src/rte_flow/dp_rte_flow_capture.c b/src/rte_flow/dp_rte_flow_capture.c new file mode 100644 index 000000000..a32d40bd1 --- /dev/null +++ b/src/rte_flow/dp_rte_flow_capture.c @@ -0,0 +1,319 @@ +#include "rte_flow/dp_rte_flow_capture.h" + +#include "dp_error.h" +#include "dp_log.h" +#include "rte_flow/dp_rte_flow_helpers.h" +#include "dp_conf.h" +#include "monitoring/dp_monitoring.h" + +#define DP_RTE_FLOW_CAPTURE_PKT_HDR_SIZE (sizeof(struct rte_ether_hdr) \ + + sizeof(struct rte_ipv6_hdr) \ + + sizeof(struct rte_udp_hdr)) + +// this attribute value is used to install a flow rule in the default group of a VF to switch between the capturing group and vnet group +static const struct rte_flow_attr dp_flow_attr_default_jump_ingress = { + .group = DP_RTE_FLOW_DEFAULT_GROUP, + .priority = 0, + .ingress = 0, + .egress = 0, + .transfer = 1, +}; + +// this attribute value is used to install the flow capturing rule into the capturing group +// transfer flag is set to allow the port action +static const struct rte_flow_attr dp_flow_attr_default_capture_ingress = { + .group = DP_RTE_FLOW_CAPTURE_GROUP, + .priority = 0, + .ingress = 0, + .egress = 0, + .transfer = 1, +}; + +int dp_install_jump_rule_in_default_group(uint16_t port_id, uint32_t dst_group) +{ + struct rte_flow_item pattern[2]; // first is a NULL ethernet header matching, second is the end + int pattern_cnt = 0; + + // jump action from default group to capturing group + struct rte_flow_action_jump jump_action; // #1 + struct rte_flow_action action[2]; // + end + int action_cnt = 0; + + struct rte_flow *flow; + struct dp_port *port; + + port = dp_port_get(port_id); + if (!port) + return DP_ERROR; + + // all ethernet packets + dp_set_eth_match_all_item(&pattern[pattern_cnt++]); + dp_set_end_flow_item(&pattern[pattern_cnt++]); + + // create actions that jump from the default group + // create jump action + dp_set_jump_group_action(&action[action_cnt++], &jump_action, dst_group); + + // end actions + dp_set_end_action(&action[action_cnt++]); + + // validate and install flow rule + flow = dp_install_rte_flow(port_id, &dp_flow_attr_default_jump_ingress, pattern, action); + if (!flow) + return DP_ERROR; + + port->default_jump_flow = flow; + + DPS_LOG_DEBUG("Installed the default jumping flow rule that destinated to group", DP_LOG_PORTID(port_id), DP_LOG_RTE_GROUP(dst_group)); + return DP_OK; +} + +void dp_configure_pkt_capture_action(uint8_t *encaped_mirror_hdr, + struct rte_flow_action_raw_encap *encap_action, + struct rte_flow_action_port_id *port_id_action, + struct rte_flow_action *sub_action) +{ + struct rte_ether_hdr *encap_eth_hdr = (struct rte_ether_hdr *)encaped_mirror_hdr; + struct rte_ipv6_hdr *new_ipv6_hdr = (struct rte_ipv6_hdr *)(&encaped_mirror_hdr[sizeof(struct rte_ether_hdr)]); + struct rte_udp_hdr *udp_hdr = (struct rte_udp_hdr *)(&encaped_mirror_hdr[sizeof(struct rte_ether_hdr) + sizeof(struct rte_ipv6_hdr)]); + int sub_action_cnt = 0; + uint16_t outgoing_port_id = dp_port_get_pf0_id(); + const struct dp_capture_hdr_config *capture_hdr_config = dp_get_capture_hdr_config(); + + rte_ether_addr_copy(dp_get_neigh_mac(outgoing_port_id), &encap_eth_hdr->dst_addr); + rte_ether_addr_copy(dp_get_mac(outgoing_port_id), &encap_eth_hdr->src_addr); + encap_eth_hdr->ether_type = htons(RTE_ETHER_TYPE_IPV6); + + rte_memcpy(new_ipv6_hdr->src_addr, dp_conf_get_underlay_ip(), sizeof(new_ipv6_hdr->src_addr)); + rte_memcpy(new_ipv6_hdr->dst_addr, capture_hdr_config->capture_node_ipv6_addr, sizeof(new_ipv6_hdr->dst_addr)); + new_ipv6_hdr->vtc_flow = htonl(DP_IP6_VTC_FLOW); + new_ipv6_hdr->payload_len = 0; + new_ipv6_hdr->proto = DP_IP_PROTO_UDP; + new_ipv6_hdr->hop_limits = DP_IP6_HOP_LIMIT; + + + udp_hdr->dst_port = htons(capture_hdr_config->capture_udp_dst_port); + udp_hdr->src_port = htons(capture_hdr_config->capture_udp_src_port); + udp_hdr->dgram_cksum = 0; + + dp_set_raw_encap_action(&sub_action[sub_action_cnt++], encap_action, encaped_mirror_hdr, DP_RTE_FLOW_CAPTURE_PKT_HDR_SIZE); + dp_set_send_to_port_action(&sub_action[sub_action_cnt++], port_id_action, outgoing_port_id); // must be a pf port here + dp_set_end_action(&sub_action[sub_action_cnt++]); +} + + +static int dp_install_default_rule_in_capture_group(uint16_t port_id, bool capture_on) +{ + + struct rte_flow_item pattern[2]; // first is a NULL ethernet header matching, second is the end + int pattern_cnt = 0; + + struct rte_flow_action_sample sample_action; // #1 + struct rte_flow_action_jump jump_action; // #2 + struct rte_flow_action action[3]; // + end + int action_cnt = 0; + + struct rte_flow_action_raw_encap encap_action; // #1 + struct rte_flow_action_port_id port_id_action; // #2 + struct rte_flow_action sub_action[3]; // + end + + struct rte_flow *flow; + struct dp_port *port; + uint8_t raw_encap_hdr[DP_RTE_FLOW_CAPTURE_PKT_HDR_SIZE]; + + port = dp_port_get(port_id); + if (!port) + return DP_ERROR; + + // all ethernet packets + dp_set_eth_match_all_item(&pattern[pattern_cnt++]); + dp_set_end_flow_item(&pattern[pattern_cnt++]); + + // create actions + // create sampling action + if (capture_on) { + dp_configure_pkt_capture_action(raw_encap_hdr, &encap_action, &port_id_action, sub_action); + dp_set_sample_action(&action[action_cnt++], &sample_action, 1, sub_action); // sampling with a ratio less than 1 is not allowed in the eSwitch domain + } + + // create jump group action + dp_set_jump_group_action(&action[action_cnt++], &jump_action, DP_RTE_FLOW_VNET_GROUP); // jump to group DP_RTE_FLOW_VNET_GROUP + + // end actions + dp_set_end_action(&action[action_cnt++]); + + // validate and install flow rule + flow = dp_install_rte_flow(port_id, &dp_flow_attr_default_capture_ingress, pattern, action); + if (!flow) { + DPS_LOG_WARNING("Failed to install default monitoring flow rule", DP_LOG_PORTID(port_id)); + return DP_ERROR; + } + + port->default_capture_flow = flow; + + DPS_LOG_DEBUG("Installed the default monitoring flow rule", DP_LOG_PORTID(port_id)); + return DP_OK; +} + + +int dp_destroy_default_flow(struct dp_port *port) +{ + struct rte_flow_error error; + int ret; + + if (port->default_jump_flow) { + ret = rte_flow_destroy(port->port_id, port->default_jump_flow, &error); + if (DP_FAILED(ret)) { + DPS_LOG_WARNING("Failed to destroy default jump flow", DP_LOG_PORTID(port->port_id), DP_LOG_RET(ret)); + return DP_ERROR; + } + } + + if (port->default_capture_flow) { + ret = rte_flow_destroy(port->port_id, port->default_capture_flow, &error); + if (DP_FAILED(ret)) { + DPS_LOG_WARNING("Failed to destroy default capture flow", DP_LOG_PORTID(port->port_id), DP_LOG_RET(ret)); + return DP_ERROR; + } + } + + return DP_OK; +} + +static int dp_install_pf_default_flow(struct dp_port *port, bool capture_on) +{ + int ret; + + ret = dp_install_default_rule_in_capture_group(port->port_id, capture_on); + if (DP_FAILED(ret)) { + DPS_LOG_WARNING("Failed to install default flow", DP_LOG_PORTID(port->port_id), DP_LOG_RET(ret)); + return DP_ERROR; + } + + return DP_OK; +} + +static int dp_install_vf_default_jump_flow(struct dp_port *port, uint32_t dst_group) +{ + int ret; + + ret = dp_install_jump_rule_in_default_group(port->port_id, dst_group); + if (DP_FAILED(ret)) { + DPS_LOG_WARNING("Failed to install default jump flow", DP_LOG_PORTID(port->port_id), DP_LOG_RET(ret)); + return DP_ERROR; + } + + return DP_OK; +} + +static int dp_install_vf_default_capture_flow(struct dp_port *port) +{ + int ret; + + ret = dp_install_default_rule_in_capture_group(port->port_id, true); + if (DP_FAILED(ret)) { + DPS_LOG_WARNING("Failed to install default capture flow", DP_LOG_PORTID(port->port_id), DP_LOG_RET(ret)); + return DP_ERROR; + } + + return DP_OK; +} + +static int dp_turn_on_offload_pkt_capture(struct dp_port *port) +{ + if (!port || !port->allocated) + return DP_GRPC_ERR_NO_VM; + + if (port->captured) + return DP_GRPC_ERR_ALREADY_ACTIVE; + + if (DP_FAILED(dp_destroy_default_flow(port))) + return DP_GRPC_ERR_RTE_RULE_DEL; + + switch (port->port_type) { + case DP_PORT_PF: + if (DP_FAILED(dp_install_pf_default_flow(port, true))) + return DP_GRPC_ERR_RTE_RULE_ADD; + break; + case DP_PORT_VF: + if (DP_FAILED(dp_install_vf_default_jump_flow(port, DP_RTE_FLOW_CAPTURE_GROUP))) + return DP_GRPC_ERR_RTE_RULE_ADD; + // rollback flow rules if failed on the second one for VF. + if (DP_FAILED(dp_install_vf_default_capture_flow(port))) { + if (DP_FAILED(dp_destroy_default_flow(port))) { + DPS_LOG_ERR("Failed to recover from turning capturing on by destroying previously installed default rule", DP_LOG_PORTID(port->port_id)); + return DP_GRPC_ERR_ROLLBACK; + } + if (DP_FAILED(dp_install_vf_default_jump_flow(port, DP_RTE_FLOW_VNET_GROUP))) { + DPS_LOG_ERR("Failed to recover from turning capturing on by installing default jump rule to the vnet group", DP_LOG_PORTID(port->port_id)); + return DP_GRPC_ERR_ROLLBACK; + } + return DP_GRPC_ERR_RTE_RULE_ADD; + } + break; + } + + port->captured = true; + return DP_GRPC_OK; +} + +static int dp_turn_off_offload_pkt_capture(struct dp_port *port) +{ + if (!port || !port->allocated) + return DP_GRPC_ERR_NO_VM; + + if (!port->captured) + return DP_GRPC_ERR_NOT_ACTIVE; + + if (DP_FAILED(dp_destroy_default_flow(port))) + return DP_GRPC_ERR_RTE_RULE_DEL; + + switch (port->port_type) { + case DP_PORT_PF: + if (DP_FAILED(dp_install_pf_default_flow(port, false))) + return DP_GRPC_ERR_RTE_RULE_ADD; + break; + case DP_PORT_VF: + if (DP_FAILED(dp_install_vf_default_jump_flow(port, DP_RTE_FLOW_VNET_GROUP))) { + // rollback does not make sense here, but rather to report the error. because the default operation should be without capturing. + DPS_LOG_ERR("Failed to turn capturing off by installing default jump rule to the vnet group on vf", DP_LOG_PORTID(port->port_id)); + return DP_GRPC_ERR_RTE_RULE_ADD; + } + + break; + } + + port->captured = false; + return DP_OK; +} + +int dp_enable_port_offload_pkt_capture(uint16_t port_id) +{ + struct dp_port *port = dp_port_get(port_id); + + return dp_turn_on_offload_pkt_capture(port); +} + +int dp_disable_port_offload_pkt_capture(uint16_t port_id) +{ + struct dp_port *port = dp_port_get(port_id); + + return dp_turn_off_offload_pkt_capture(port); +} + +int dp_disable_pkt_capture_on_all_ifaces(void) +{ + struct dp_ports *ports = get_dp_ports(); + int count = 0; + int ret; + + DP_FOREACH_PORT(ports, port) { + if (port->allocated && port->captured) { + ret = dp_turn_off_offload_pkt_capture(port); + if (DP_FAILED(ret)) + return ret; + count++; + } + } + return count; +} diff --git a/src/rte_flow/dp_rte_flow_init.c b/src/rte_flow/dp_rte_flow_init.c index 976137e5d..e16f052c8 100644 --- a/src/rte_flow/dp_rte_flow_init.c +++ b/src/rte_flow/dp_rte_flow_init.c @@ -3,6 +3,12 @@ #include "dp_error.h" #include "dp_log.h" #include "rte_flow/dp_rte_flow_helpers.h" +#include "dp_conf.h" +#include "monitoring/dp_monitoring.h" + +#define DP_RTE_FLOW_CAPTURE_PKT_HDR_SIZE (sizeof(struct rte_ether_hdr) \ + + sizeof(struct rte_ipv6_hdr) \ + + sizeof(struct rte_udp_hdr)) static const struct rte_flow_attr dp_flow_attr_prio_ingress = { .group = 0, @@ -12,29 +18,6 @@ static const struct rte_flow_attr dp_flow_attr_prio_ingress = { .transfer = 0, }; -static const struct rte_flow_attr dp_flow_attr_default_jump_ingress = { - .group = DP_RTE_FLOW_DEFAULT_GROUP, - .priority = 1, - .ingress = 0, - .egress = 0, - .transfer = 1, -}; - -static const struct rte_flow_attr dp_flow_attr_default_monitoring_ingress = { - .group = DP_RTE_FLOW_MONITORING_GROUP, - .priority = 3, - .ingress = 0, - .egress = 0, - .transfer = 1, -}; - -static const struct rte_flow_attr dp_flow_attr_default_capture_ingress = { - .group = DP_RTE_FLOW_VNET_GROUP, - .priority = 3, - .ingress = 1, - .egress = 0, - .transfer = 0, -}; int dp_install_isolated_mode_ipip(int port_id, uint8_t proto_id) { @@ -62,149 +45,6 @@ int dp_install_isolated_mode_ipip(int port_id, uint8_t proto_id) return DP_OK; } -int dp_install_jump_rule_in_default_group(uint16_t port_id, uint32_t dst_group) -{ - struct rte_flow_item pattern[2]; // first is a NULL ethernet header matching, second is the end - int pattern_cnt = 0; - - // jump action from default group to monitoring group - struct rte_flow_action_jump jump_action; // #1 - struct rte_flow_action action[2]; // + end - int action_cnt = 0; - - struct rte_flow *flow; - struct dp_port *port = dp_port_get_vf(port_id); - - // all ethernet packets - dp_set_eth_match_all_item(&pattern[pattern_cnt++]); - dp_set_end_flow_item(&pattern[pattern_cnt++]); - - // create actions that jump from the default group - // create jump action - dp_set_jump_group_action(&action[action_cnt++], &jump_action, dst_group); - - // end actions - dp_set_end_action(&action[action_cnt++]); - - // validate and install flow rule - flow = dp_install_rte_flow(port_id, &dp_flow_attr_default_jump_ingress, pattern, action); - - if (!flow) - return DP_ERROR; - - port->default_flow = flow; - - DPS_LOG_DEBUG("Installed the default jumping flow rule that destinated to group", DP_LOG_PORTID(port_id), DP_LOG_RTE_GROUP(dst_group)); - return DP_OK; -} - -int dp_install_default_rule_in_monitoring_group(uint16_t port_id) -{ - - struct rte_flow_item pattern[2]; // first is a NULL ethernet header matching, second is the end - int pattern_cnt = 0; - - struct rte_flow_action_sample sample_action; // 1 - struct rte_flow_action_jump jump_action; // 2 - struct rte_flow_action action[3]; // + end - int action_cnt = 0; - - struct rte_flow_action sub_action[1]; - int sub_action_cnt = 0; - - struct rte_flow *flow; - - // all ethernet packets - dp_set_eth_match_all_item(&pattern[pattern_cnt++]); - dp_set_end_flow_item(&pattern[pattern_cnt++]); - - // create actions - // create sampling action - dp_set_end_action(&sub_action[sub_action_cnt++]); - dp_set_sample_action(&action[action_cnt++], &sample_action, 1, sub_action); // mirror all packets, without explicite sub sample action - - // create jump group action - dp_set_jump_group_action(&action[action_cnt++], &jump_action, DP_RTE_FLOW_VNET_GROUP); // jump to group DP_RTE_FLOW_VNET_GROUP - - // end actions - dp_set_end_action(&action[action_cnt++]); - - // validate and install flow rule - flow = dp_install_rte_flow(port_id, &dp_flow_attr_default_monitoring_ingress, pattern, action); - - if (!flow) - return DP_ERROR; - - DPS_LOG_DEBUG("Installed the default monitoring flow rule", DP_LOG_PORTID(port_id)); - return DP_OK; - -} - -int dp_install_default_capture_rule_in_vnet_group(uint16_t port_id) -{ - - struct rte_flow_item pattern[2]; // first is a NULL ethernet header matching, second is the end - int pattern_cnt = 0; - - struct rte_flow_action_queue queue_action; // 1 - struct rte_flow_action action[2]; // + end - int action_cnt = 0; - - // all ethernet packets - dp_set_eth_match_all_item(&pattern[pattern_cnt++]); - dp_set_end_flow_item(&pattern[pattern_cnt++]); - - // create actions - // create flow action -- queue, send to default software handling queue - dp_set_redirect_queue_action(&action[action_cnt++], &queue_action, 0); - // create flow action -- end - dp_set_end_action(&action[action_cnt++]); - - if (!dp_install_rte_flow(port_id, &dp_flow_attr_default_capture_ingress, pattern, action)) - return DP_ERROR; - - DPS_LOG_DEBUG("Installed the default capture flow rule", DP_LOG_PORTID(port_id)); - return DP_OK; -} - -static int dp_change_all_vf_default_jump_rte_flow_group(uint32_t dst_group) -{ - struct dp_ports *ports = get_dp_ports(); - struct rte_flow_error error; - int ret; - - DP_FOREACH_PORT(ports, port) { - if (port->port_type == DP_PORT_VF && port->allocated) { - if (port->default_flow) { - ret = rte_flow_destroy(port->port_id, port->default_flow, &error); - - if (DP_FAILED(ret)) { - DPS_LOG_WARNING("Failed to destroy default flow", DP_LOG_PORTID(port->port_id), DP_LOG_RET(ret)); - continue; - } - } - - if (DP_FAILED(dp_install_jump_rule_in_default_group(port->port_id, dst_group))) { - DPS_LOG_WARNING("Failed to install default jump flow", DP_LOG_PORTID(port->port_id)); - continue; - } - } - } - - return DP_OK; -} - -int dp_turn_on_vf_offload_tracing(void) -{ - return dp_change_all_vf_default_jump_rte_flow_group(DP_RTE_FLOW_MONITORING_GROUP); -} - -int dp_turn_off_vf_offload_tracing(void) -{ - return dp_change_all_vf_default_jump_rte_flow_group(DP_RTE_FLOW_VNET_GROUP); -} - - #ifdef ENABLE_VIRTSVC int dp_install_isolated_mode_virtsvc(int port_id, uint8_t proto_id, const uint8_t svc_ipv6[16], rte_be16_t svc_port) { diff --git a/src/rte_flow/dp_rte_flow_traffic_forward.c b/src/rte_flow/dp_rte_flow_traffic_forward.c index f10dbdddb..780a91b16 100644 --- a/src/rte_flow/dp_rte_flow_traffic_forward.c +++ b/src/rte_flow/dp_rte_flow_traffic_forward.c @@ -9,6 +9,7 @@ #define DP_IPIP_ENCAP_HEADER_SIZE (sizeof(struct rte_ether_hdr) + sizeof(struct rte_ipv6_hdr)) +// this attribute value is used by pf to install a rule to move hairpin packets to the right rx hairpin queue static const struct rte_flow_attr dp_flow_pf_attr_ingress = { .group = DP_RTE_FLOW_DEFAULT_GROUP, .priority = 0, @@ -17,14 +18,16 @@ static const struct rte_flow_attr dp_flow_pf_attr_ingress = { .transfer = 0, }; +// this attribute value is used by vf to install a rule to move hairpin packets to the right rx hairpin queue static const struct rte_flow_attr dp_flow_vf_attr_ingress = { - .group = DP_RTE_FLOW_VNET_GROUP, + .group = DP_RTE_FLOW_DEFAULT_GROUP, .priority = 0, .ingress = 1, .egress = 0, .transfer = 0, }; +// this attribute value is used during the encap operation to install a encap/decap rule on pf to process pkts arriving to tx hairpin queue static const struct rte_flow_attr dp_flow_attr_egress = { .group = DP_RTE_FLOW_DEFAULT_GROUP, .priority = 0, @@ -33,7 +36,9 @@ static const struct rte_flow_attr dp_flow_attr_egress = { .transfer = 0, }; -static const struct rte_flow_attr dp_flow_pf_attr_transfer = { +// this attribute value is used during the decap operation on pf to install a redirecting rule +// to point a specific flow to either capturing rule or vnet rule +static const struct rte_flow_attr dp_flow_pf_attr_transfer_capture = { .group = DP_RTE_FLOW_DEFAULT_GROUP, .priority = 0, .ingress = 0, @@ -41,7 +46,8 @@ static const struct rte_flow_attr dp_flow_pf_attr_transfer = { .transfer = 1, }; -static const struct rte_flow_attr dp_flow_vf_attr_transfer = { +// this attribute value is used during the decap/decap operation to install a decap/encap rule to transfer pkts +static const struct rte_flow_attr dp_flow_attr_transfer_multi_stage = { .group = DP_RTE_FLOW_VNET_GROUP, .priority = 0, .ingress = 0, @@ -49,6 +55,16 @@ static const struct rte_flow_attr dp_flow_vf_attr_transfer = { .transfer = 1, }; +// this attribute value is used during the decap/encap operation to install a decap/encap rule to transfer pkts +static const struct rte_flow_attr dp_flow_attr_transfer_single_stage = { + .group = DP_RTE_FLOW_DEFAULT_GROUP, + .priority = 0, + .ingress = 0, + .egress = 0, + .transfer = 1, +}; + + static __rte_always_inline struct flow_age_ctx *allocate_agectx(void) { struct flow_age_ctx *agectx; @@ -201,9 +217,16 @@ static __rte_always_inline int dp_offload_handle_tunnel_encap_traffic(struct rte const struct rte_flow_attr *attr; uint16_t t_port_id; bool cross_pf_port; + struct dp_port *incoming_port; cross_pf_port = df->nxt_hop != dp_port_get_pf0_id(); + incoming_port = dp_port_get(m->port); + if (!incoming_port) { + DPS_LOG_ERR("Port not registered in service", DP_LOG_PORTID(m->port)); + return DP_ERROR; + } + // Match vf packets (and possibly modified vf packets embedded with vni info) if (cross_pf_port) { dp_set_eth_flow_item(&hairpin_pattern[hairpin_pattern_cnt++], &hairpin_eth_spec, htons(df->l3_type)); @@ -230,7 +253,6 @@ static __rte_always_inline int dp_offload_handle_tunnel_encap_traffic(struct rte if (cross_pf_port) hairpin_pattern[hairpin_pattern_cnt++] = pattern[pattern_cnt-1]; - /* First, install a flow rule to modify mac address to embed vni info and move packet to hairpin rxq */ if (cross_pf_port) { // set proper ethernet address @@ -254,7 +276,7 @@ static __rte_always_inline int dp_offload_handle_tunnel_encap_traffic(struct rte DPS_LOG_ERR("Failed to install hairpin queue flow rule on VF", DP_LOG_PORTID(m->port)); return DP_ERROR; } - DPS_LOG_DEBUG("Installed hairpin queue flow rule", DP_LOG_PORTID(m->port)); + DPS_LOG_DEBUG("Installed a flow rule to move pkts to hairpin rx queue", DP_LOG_PORTID(m->port)); } // replace source ip if vip-nat/network-nat is enabled @@ -293,7 +315,10 @@ static __rte_always_inline int dp_offload_handle_tunnel_encap_traffic(struct rte attr = &dp_flow_attr_egress; t_port_id = dp_port_get_pf1_id(); } else { - attr = &dp_flow_vf_attr_transfer; + if (incoming_port->captured) + attr = &dp_flow_attr_transfer_multi_stage; + else + attr = &dp_flow_attr_transfer_single_stage; t_port_id = m->port; } if (DP_FAILED(dp_install_rte_flow_with_indirect(t_port_id, attr, @@ -306,7 +331,11 @@ static __rte_always_inline int dp_offload_handle_tunnel_encap_traffic(struct rte return DP_ERROR; } - DPS_LOG_DEBUG("Installed encap flow rule on PF", DP_LOG_PORTID(t_port_id)); + if (cross_pf_port) + DPS_LOG_DEBUG("Installed cross pf encap flow rules", DP_LOG_PORTID(m->port)); + else + DPS_LOG_DEBUG("Installed encap flow rule on VF", DP_LOG_PORTID(m->port)); + return DP_OK; } @@ -331,13 +360,19 @@ static __rte_always_inline int dp_offload_handle_tunnel_decap_traffic(struct rte struct rte_flow_action actions[7]; // + end int action_cnt = 0; + struct rte_flow_action_jump jump_action; // #1 + struct rte_flow_action_age flow_age_capture; // #2 + struct rte_flow_action special_moni_action[3]; // + end + int special_moni_action_cnt = 0; + // misc variables needed to create the flow - struct flow_age_ctx *agectx; - struct rte_flow_action *age_action; + struct flow_age_ctx *agectx, *agectx_capture = NULL; + struct rte_flow_action *age_action, *age_action_capture; struct dp_port *port; struct rte_ether_hdr new_eth_hdr; rte_be32_t actual_ol_ipv4_addr; bool cross_pf_port; + const struct rte_flow_attr *attr = &dp_flow_attr_transfer_single_stage; cross_pf_port = m->port != dp_port_get_pf0_id(); if (cross_pf_port) @@ -374,6 +409,31 @@ static __rte_always_inline int dp_offload_handle_tunnel_decap_traffic(struct rte dp_set_end_flow_item(&pattern[pattern_cnt++]); + // create one action to redirect flow packets to the capturing group. + if (!cross_pf_port && dp_port_get(m->port)->captured) { + agectx_capture = allocate_agectx(); + if (!agectx_capture) + return DP_ERROR; + + attr = &dp_flow_attr_transfer_multi_stage; + + age_action_capture = &special_moni_action[special_moni_action_cnt++]; + dp_set_flow_age_action(age_action_capture, &flow_age_capture, df->conntrack->timeout_value, agectx_capture); + + dp_set_jump_group_action(&special_moni_action[special_moni_action_cnt++], &jump_action, DP_RTE_FLOW_CAPTURE_GROUP); + + dp_set_end_action(&special_moni_action[special_moni_action_cnt++]); + + if (DP_FAILED(dp_install_rte_flow_with_indirect(m->port, &dp_flow_pf_attr_transfer_capture, + pattern, special_moni_action, age_action_capture, df, agectx_capture))) { + dp_destroy_rte_flow_agectx(agectx_capture); + return DP_ERROR; + } + + DPS_LOG_DEBUG("Installed capturing flow rule on PF", DP_LOG_PORTID(m->port)); + } + + // remove the IPIP header and replace it with a standard Ethernet header dp_set_raw_decap_action(&actions[action_cnt++], &raw_decap, NULL, DP_IPIP_ENCAP_HEADER_SIZE); dp_set_raw_encap_action(&actions[action_cnt++], &raw_encap, (uint8_t *)&new_eth_hdr, sizeof(new_eth_hdr)); @@ -389,8 +449,12 @@ static __rte_always_inline int dp_offload_handle_tunnel_decap_traffic(struct rte // make flow aging work agectx = allocate_agectx(); - if (!agectx) + if (!agectx) { + if (agectx_capture) + if (DP_FAILED(dp_destroy_rte_flow_agectx(agectx_capture))) + DPS_LOG_ERR("Failed to rollback by removing installed capturing rule on PF", DP_LOG_PORTID(m->port)); return DP_ERROR; + } age_action = &actions[action_cnt++]; dp_set_flow_age_action(age_action, &flow_age, df->conntrack->timeout_value, agectx); @@ -401,6 +465,7 @@ static __rte_always_inline int dp_offload_handle_tunnel_decap_traffic(struct rte if (!port) { DPS_LOG_ERR("Port not registered in service", DP_LOG_PORTID(df->nxt_hop)); dp_destroy_rte_flow_agectx(agectx); + // no need to free the above appeared (not allocated) agectx_capture, as the capturing rule is not installed for the cross-pf case return DP_ERROR; } // pf's rx hairpin queue for vf starts from index 2. (0: normal rxq, 1: hairpin rxq for another pf.) @@ -412,17 +477,22 @@ static __rte_always_inline int dp_offload_handle_tunnel_decap_traffic(struct rte dp_set_end_action(&actions[action_cnt++]); if (DP_FAILED(dp_install_rte_flow_with_indirect(m->port, - cross_pf_port - ? &dp_flow_pf_attr_ingress - : &dp_flow_pf_attr_transfer, + cross_pf_port ? &dp_flow_pf_attr_ingress : attr, pattern, actions, age_action, df, agectx)) ) { dp_destroy_rte_flow_agectx(agectx); + if (agectx_capture) + if (DP_FAILED(dp_destroy_rte_flow_agectx(agectx_capture))) + DPS_LOG_ERR("Failed to rollback by removing installed capturing rule on PF", DP_LOG_PORTID(m->port)); return DP_ERROR; } - DPS_LOG_DEBUG("Installed normal decap flow rule on PF", DP_LOG_PORTID(m->port)); + if (cross_pf_port) + DPS_LOG_DEBUG("Installed flow rules to handle hairpin pkts on both PF and VF", DP_LOG_PORTID(m->port), DP_LOG_PORTID(df->nxt_hop)); + else + DPS_LOG_DEBUG("Installed normal decap flow rule on PF", DP_LOG_PORTID(m->port)); + return DP_OK; } @@ -448,6 +518,14 @@ static __rte_always_inline int dp_offload_handle_local_traffic(struct rte_mbuf * struct flow_age_ctx *agectx; struct rte_flow_action *age_action; rte_be32_t actual_ol_ipv4_dst_addr; + struct dp_port *incoming_port; + const struct rte_flow_attr *attr; + + incoming_port = dp_port_get(m->port); + if (!incoming_port) { + DPS_LOG_ERR("Port not registered in service", DP_LOG_PORTID(m->port)); + return DP_ERROR; + } // create local flow match pattern dp_set_eth_flow_item(&pattern[pattern_cnt++], ð_spec, htons(df->l3_type)); @@ -496,7 +574,12 @@ static __rte_always_inline int dp_offload_handle_local_traffic(struct rte_mbuf * dp_set_end_action(&actions[action_cnt++]); - if (DP_FAILED(dp_install_rte_flow_with_indirect(m->port, &dp_flow_pf_attr_transfer, + if (incoming_port->captured) + attr = &dp_flow_attr_transfer_multi_stage; + else + attr = &dp_flow_attr_transfer_single_stage; + + if (DP_FAILED(dp_install_rte_flow_with_indirect(m->port, attr, pattern, actions, age_action, df, agectx)) ) { diff --git a/tools/dump/dp_conf.json b/tools/dump/dp_conf.json index 97984f167..7fd33c408 100644 --- a/tools/dump/dp_conf.json +++ b/tools/dump/dp_conf.json @@ -20,13 +20,6 @@ "help": "show only packets matching a pcap-style FILTER", "arg": "FILTER" }, - { - "lgopt": "hw", - "help": "capture offloaded packets (only outgoing VF->PF packets supported)", - "var": "offload_enabled", - "type": "bool", - "default": "false" - }, { "lgopt": "pcap", "help": "write packets into a PCAP file", diff --git a/tools/dump/main.c b/tools/dump/main.c index cda5cea5b..d58032ab5 100644 --- a/tools/dump/main.c +++ b/tools/dump/main.c @@ -111,31 +111,23 @@ static void print_packet(__rte_unused struct dp_pcap *context, struct rte_mbuf * dp_graphtrace_sprint(pkt, printbuf, sizeof(printbuf)); - if (pktinfo->pkt_type == DP_GRAPHTRACE_PKT_TYPE_OFFLOAD) { + arrow = "->"; + if (pktinfo->node) { + node = pktinfo->node->name; + } else { + arrow = ">>"; snprintf(node_buf, sizeof(node_buf), "PORT %u", pkt->port); - node = "Offloaded"; - arrow = "at"; - next_node = node_buf; + node = node_buf; + } + if (pktinfo->next_node) { + next_node = pktinfo->next_node->name; } else { - assert(pktinfo->pkt_type == DP_GRAPHTRACE_PKT_TYPE_SOFTWARE); - arrow = "->"; - if (pktinfo->node) { - node = pktinfo->node->name; - } else { - arrow = ">>"; - snprintf(node_buf, sizeof(node_buf), "PORT %u", pkt->port); - node = node_buf; - } - if (pktinfo->next_node) { - next_node = pktinfo->next_node->name; + arrow = ">>"; + if (pktinfo->dst_port_id == (uint16_t)-1) { + next_node = "DROP"; } else { - arrow = ">>"; - if (pktinfo->dst_port_id == (uint16_t)-1) { - next_node = "DROP"; - } else { - snprintf(next_node_buf, sizeof(next_node_buf), "PORT %u", pktinfo->dst_port_id); - next_node = next_node_buf; - } + snprintf(next_node_buf, sizeof(next_node_buf), "PORT %u", pktinfo->dst_port_id); + next_node = next_node_buf; } } @@ -215,7 +207,6 @@ static int dp_graphtrace_start(struct dp_graphtrace *graphtrace) .action = DP_GRAPHTRACE_ACTION_START, .params.start.drops = dp_conf_is_showing_drops(), .params.start.nodes = showing_nodes, - .params.start.hw = dp_conf_is_offload_enabled(), }; struct dp_graphtrace_params *filters = (struct dp_graphtrace_params *)graphtrace->filters->addr; diff --git a/tools/dump/opts.c b/tools/dump/opts.c index fe26e1bb9..9ba604e27 100644 --- a/tools/dump/opts.c +++ b/tools/dump/opts.c @@ -18,7 +18,6 @@ _OPT_SHOPT_MAX = 255, OPT_DROPS, OPT_NODES, OPT_FILTER, - OPT_HW, OPT_PCAP, OPT_STOP, }; @@ -31,14 +30,12 @@ static const struct option dp_conf_longopts[] = { { "drops", 0, 0, OPT_DROPS }, { "nodes", 1, 0, OPT_NODES }, { "filter", 1, 0, OPT_FILTER }, - { "hw", 0, 0, OPT_HW }, { "pcap", 1, 0, OPT_PCAP }, { "stop", 0, 0, OPT_STOP }, { NULL, 0, 0, 0 } }; static bool showing_drops = false; -static bool offload_enabled = false; static bool stop_mode = false; bool dp_conf_is_showing_drops(void) @@ -46,11 +43,6 @@ bool dp_conf_is_showing_drops(void) return showing_drops; } -bool dp_conf_is_offload_enabled(void) -{ - return offload_enabled; -} - bool dp_conf_is_stop_mode(void) { return stop_mode; @@ -89,8 +81,6 @@ static int dp_conf_parse_arg(int opt, const char *arg) return dp_argparse_opt_nodes(arg); case OPT_FILTER: return dp_argparse_opt_filter(arg); - case OPT_HW: - return dp_argparse_store_true(&offload_enabled); case OPT_PCAP: return dp_argparse_opt_pcap(arg); case OPT_STOP: