diff --git a/lib/cnet/eth/eth_tx.c b/lib/cnet/eth/eth_tx.c index be92757f..615d60e0 100644 --- a/lib/cnet/eth/eth_tx.c +++ b/lib/cnet/eth/eth_tx.c @@ -17,9 +17,28 @@ #include #include "eth_tx_priv.h" // for eth_tx_node_ctx_t, ETH_TX_NEXT_MAX #include "chnl_callback_priv.h" +#include "cnet_pcb.h" static struct eth_tx_node_main eth_tx_main; +static inline int +eth_tx_node_pktmbuf_has_userdata(pktmbuf_t *mbuf) +{ + struct pcb_entry *pcb = mbuf->userptr; + if (unlikely(!pcb)) + return -1; + + if (pcb->ip_proto == IPPROTO_UDP) + return ETH_TX_NEXT_PKT_CALLBACK; + + if (pcb->ip_proto == IPPROTO_TCP) + return mbuf->data_len > (mbuf->l2_len + mbuf->l3_len + mbuf->l4_len) + ? ETH_TX_NEXT_PKT_CALLBACK + : -1; + + return -1; +} + static uint16_t eth_tx_node_process(struct cne_graph *graph, struct cne_node *node, void **objs, uint16_t nb_objs) { @@ -27,7 +46,15 @@ eth_tx_node_process(struct cne_graph *graph, struct cne_node *node, void **objs, uint16_t port = ctx->port; /* Get TX port id */ uint16_t count = nb_objs; - CNE_SET_USED(graph); + pktmbuf_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts; + int next0, next1, next2, next3; + void **to_next, **from; + uint16_t n_left_from; + uint16_t held = 0; + + pkts = (pktmbuf_t **)objs; + from = objs; + n_left_from = nb_objs; if (nb_objs) { do { @@ -41,7 +68,106 @@ eth_tx_node_process(struct cne_graph *graph, struct cne_node *node, void **objs, struct cne_node *next = __cne_node_next_node_get(node, ETH_TX_NEXT_PKT_CALLBACK); chnl_callback_node_set_source(next, CHNL_CALLBACK_SOURCE_ETH_TX); - cne_node_next_stream_move(graph, node, ETH_TX_NEXT_PKT_CALLBACK); + + if (n_left_from >= 4) { + for (int i = 0; i < 4; i++) + cne_prefetch0(pkts[i]); + } + + /* Get stream for the speculated next node */ + to_next = (void **)calloc(count, sizeof(pktmbuf_t *)); + while (n_left_from >= 4) { + /* Prefetch next-next mbufs */ + if (likely(n_left_from > 11)) { + cne_prefetch0(pkts[8]); + cne_prefetch0(pkts[9]); + cne_prefetch0(pkts[10]); + cne_prefetch0(pkts[11]); + } + + /* Prefetch next mbuf data */ + if (likely(n_left_from > 7)) { + cne_prefetch0(pkts[4]); + cne_prefetch0(pkts[5]); + cne_prefetch0(pkts[6]); + cne_prefetch0(pkts[7]); + } + + mbuf0 = pkts[0]; + mbuf1 = pkts[1]; + mbuf2 = pkts[2]; + mbuf3 = pkts[3]; + + pkts += 4; + n_left_from -= 4; + + next0 = eth_tx_node_pktmbuf_has_userdata(mbuf0); + next1 = eth_tx_node_pktmbuf_has_userdata(mbuf1); + next2 = eth_tx_node_pktmbuf_has_userdata(mbuf2); + next3 = eth_tx_node_pktmbuf_has_userdata(mbuf3); + + int fix_spec = (ETH_TX_NEXT_PKT_CALLBACK ^ next0) | (ETH_TX_NEXT_PKT_CALLBACK ^ next1) | + (ETH_TX_NEXT_PKT_CALLBACK ^ next2) | (ETH_TX_NEXT_PKT_CALLBACK ^ next3); + + if (unlikely(fix_spec)) { + /* Next0 */ + if (next0 >= 0) + to_next[held++] = from[0]; + + /* Next1 */ + if (next1 >= 0) + to_next[held++] = from[1]; + + /* Next2 */ + if (next2 >= 0) + to_next[held++] = from[2]; + + /* Next3 */ + if (next3 >= 0) + to_next[held++] = from[3]; + } else { + to_next[held] = from[0]; + to_next[held + 1] = from[1]; + to_next[held + 2] = from[2]; + to_next[held + 3] = from[3]; + held += 4; + } + + from += 4; + } + + if (likely(n_left_from > 0)) + cne_prefetch0(pkts[0]); + + while (n_left_from > 0) { + if (likely(n_left_from > 0)) + cne_prefetch0(pkts[1]); + + mbuf0 = pkts[0]; + + pkts += 1; + n_left_from -= 1; + + next0 = eth_tx_node_pktmbuf_has_userdata(mbuf0); + + if (next0 >= 0) + to_next[held++] = from[0]; + + from += 1; + } + + /* !!! Home run !!! */ + if (likely(held == count)) { + cne_node_next_stream_move(graph, node, ETH_TX_NEXT_PKT_CALLBACK); + free(to_next); + return count; + } + + /* Copy things successfully speculated till now */ + void **stream = cne_node_next_stream_get(graph, node, ETH_TX_NEXT_PKT_CALLBACK, held); + memcpy(stream, to_next, held * sizeof(from[0])); + cne_node_next_stream_put(graph, node, ETH_TX_NEXT_PKT_CALLBACK, held); + free(to_next); } return count;