From 922315b2180589e04b188546c1524608d4795043 Mon Sep 17 00:00:00 2001 From: ywc689 Date: Wed, 24 Jul 2024 14:33:12 +0800 Subject: [PATCH 1/3] lldp: add supports for lldp protocol Signed-off-by: ywc689 --- conf/dpvs.bond.conf.sample | 1 + conf/dpvs.conf.items | 1 + conf/dpvs.conf.sample | 1 + conf/dpvs.conf.single-bond.sample | 1 + conf/dpvs.conf.single-nic.sample | 1 + include/conf/common.h | 38 + include/conf/lldp.h | 41 + include/conf/netif.h | 3 + include/conf/sockopts.h | 3 + include/ctrl.h | 1 + include/lldp.h | 118 ++ include/mbuf.h | 1 + include/netif.h | 9 +- include/timer.h | 1 + src/common.c | 216 ++++ src/ctrl.c | 2 +- src/global_conf.c | 27 +- src/inet.c | 5 + src/ip_tunnel.c | 1 + src/lldp.c | 1894 +++++++++++++++++++++++++++++ src/main.c | 1 + src/mbuf.c | 5 + src/netif.c | 35 +- src/vlan.c | 1 + tools/dpip/Makefile | 2 +- tools/dpip/dpip.c | 2 +- tools/dpip/link.c | 26 +- tools/dpip/lldp.c | 128 ++ 28 files changed, 2556 insertions(+), 9 deletions(-) create mode 100644 include/conf/lldp.h create mode 100644 include/lldp.h create mode 100644 src/lldp.c create mode 100644 tools/dpip/lldp.c diff --git a/conf/dpvs.bond.conf.sample b/conf/dpvs.bond.conf.sample index d8cc1e5e8..d35d10328 100644 --- a/conf/dpvs.bond.conf.sample +++ b/conf/dpvs.bond.conf.sample @@ -17,6 +17,7 @@ global_defs { ! log_async_mode off ! kni on ! pdump off + lldp on } ! netif config diff --git a/conf/dpvs.conf.items b/conf/dpvs.conf.items index 5b5f9b35e..448b40044 100644 --- a/conf/dpvs.conf.items +++ b/conf/dpvs.conf.items @@ -19,6 +19,7 @@ global_defs { log_async_pool_size 16383 <16383, 1023-unlimited> pdump off kni on + lldp on } ! netif config diff --git a/conf/dpvs.conf.sample b/conf/dpvs.conf.sample index 14b0846d5..002aab56f 100644 --- a/conf/dpvs.conf.sample +++ b/conf/dpvs.conf.sample @@ -17,6 +17,7 @@ global_defs { ! log_async_mode on ! kni on ! pdump off + lldp on } ! netif config diff --git a/conf/dpvs.conf.single-bond.sample b/conf/dpvs.conf.single-bond.sample index 3fdfbfd33..b0c1c375a 100644 --- a/conf/dpvs.conf.single-bond.sample +++ b/conf/dpvs.conf.single-bond.sample @@ -16,6 +16,7 @@ global_defs { ! log_file /var/log/dpvs.log ! log_async_mode on ! kni on + lldp on } ! netif config diff --git a/conf/dpvs.conf.single-nic.sample b/conf/dpvs.conf.single-nic.sample index 3717ed07b..bb9ce994e 100644 --- a/conf/dpvs.conf.single-nic.sample +++ b/conf/dpvs.conf.single-nic.sample @@ -16,6 +16,7 @@ global_defs { ! log_file /var/log/dpvs.log ! log_async_mode on ! kni on + lldp on } ! netif config diff --git a/include/conf/common.h b/include/conf/common.h index 7472ad8f1..14a048374 100644 --- a/include/conf/common.h +++ b/include/conf/common.h @@ -22,6 +22,7 @@ #include #include #include +#include #include typedef uint32_t sockoptid_t; @@ -142,6 +143,7 @@ int linux_get_link_status(const char *ifname, int *if_flags, char *if_flags_str, int linux_set_if_mac(const char *ifname, const unsigned char mac[ETH_ALEN]); int linux_hw_mc_add(const char *ifname, const uint8_t hwma[ETH_ALEN]); int linux_hw_mc_del(const char *ifname, const uint8_t hwma[ETH_ALEN]); +int linux_ifname2index(const char *ifname); /* read "n" bytes from a descriptor */ ssize_t readn(int fd, void *vptr, size_t n); @@ -166,4 +168,40 @@ static inline char *strlwr(char *str) { return str; } +/* convert hexadecimal string to binary sequence, return the converted binary length + * note: buflen should be half in size of len at least */ +int hexstr2binary(const char *hexstr, size_t len, uint8_t *buf, size_t buflen); + +/* convert binary sequence to hexadecimal string, return the converted string length + * note: buflen should be twice in size of len at least */ +int binary2hexstr(const uint8_t *hex, size_t len, char *buf, size_t buflen); + +/* convert binary sequence to printable or hexadecimal string, return the converted string length + * note: buflen should be triple in size of len in the worst case */ +int binary2print(const uint8_t *hex, size_t len, char *buf, size_t buflen); + +/* get prefix from network mask */ +int mask2prefix(const struct sockaddr *addr); + +/* get host addresses and corresponding interfaces + * + * Loopback addresses, ipv6 link local addresses, and addresses on linked-down + * or not-running interface are ignored. If multiple addresses matched, return + * the address of the least prefix length. + * + * Params: + * @ifname: preferred interface where to get host address, can be NULL + * @result4: store ipv4 address found, can be NULL + * @result6: store ipv6 address found, can be NULL + * @ifname4: interface name of ipv4 address, can be NULL + * @ifname6: interface name of ipv6 address, can be NULL + * Return: + * 1: only ipv4 address found + * 2: only ipv6 address found + * 3: both ipv4 and ipv6 address found + * dpvs error code: error occurred + * */ +int get_host_addr(const char *ifname, struct sockaddr_storage *result4, + struct sockaddr_storage *result6, char *ifname4, char *ifname6); + #endif /* __DPVS_COMMON_H__ */ diff --git a/include/conf/lldp.h b/include/conf/lldp.h new file mode 100644 index 000000000..24f8b7831 --- /dev/null +++ b/include/conf/lldp.h @@ -0,0 +1,41 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __DPVS_LLDP_CONF_H__ +#define __DPVS_LLDP_CONF_H__ + +#include +#include "conf/sockopts.h" + +#define LLDP_MESSAGE_LEN 4096 + +#define DPVS_LLDP_NODE_LOCAL 0 +#define DPVS_LLDP_NODE_NEIGH 1 +#define DPVS_LLDP_NODE_MAX 2 + + +struct lldp_param { + uint16_t node; /* DPVS_LLDP_NODE_xxx */ + char ifname[IFNAMSIZ]; +}; + +struct lldp_message { + struct lldp_param param; + char message[LLDP_MESSAGE_LEN]; +}; + +#endif /* __DPVS_LLDP_CONF_H__ */ diff --git a/include/conf/netif.h b/include/conf/netif.h index 7ef7def4d..da11e402a 100644 --- a/include/conf/netif.h +++ b/include/conf/netif.h @@ -111,6 +111,7 @@ typedef struct netif_nic_basic_get uint16_t ol_tx_ip_csum:1; uint16_t ol_tx_tcp_csum:1; uint16_t ol_tx_udp_csum:1; + uint16_t lldp:1; } netif_nic_basic_get_t; /* nic statistics specified by port_id */ @@ -247,6 +248,8 @@ typedef struct netif_nic_set { uint16_t tc_egress_off:1; uint16_t tc_ingress_on:1; uint16_t tc_ingress_off:1; + uint16_t lldp_on:1; + uint16_t lldp_off:1; } netif_nic_set_t; typedef struct netif_bond_set { diff --git a/include/conf/sockopts.h b/include/conf/sockopts.h index 83d02ccc9..8539f9e9d 100644 --- a/include/conf/sockopts.h +++ b/include/conf/sockopts.h @@ -102,6 +102,9 @@ DPVSMSG(SOCKOPT_NETIF_GET_MADDR)\ DPVSMSG(SOCKOPT_NETIF_GET_MAX) \ \ + DPVSMSG(SOCKOPT_SET_LLDP_TODO) \ + DPVSMSG(SOCKOPT_GET_LLDP_SHOW) \ + \ DPVSMSG(SOCKOPT_SET_NEIGH_ADD) \ DPVSMSG(SOCKOPT_SET_NEIGH_DEL) \ DPVSMSG(SOCKOPT_GET_NEIGH_SHOW) \ diff --git a/include/ctrl.h b/include/ctrl.h index 888113f89..754e38b9d 100644 --- a/include/ctrl.h +++ b/include/ctrl.h @@ -201,6 +201,7 @@ int msg_dump(const struct dpvs_msg *msg, char *buf, int len); #define MSG_TYPE_IPV6_STATS 16 #define MSG_TYPE_ROUTE6 17 #define MSG_TYPE_NEIGH_GET 18 +#define MSG_TYPE_LLDP_RECV 19 #define MSG_TYPE_IFA_GET 22 #define MSG_TYPE_IFA_SET 23 #define MSG_TYPE_IFA_SYNC 24 diff --git a/include/lldp.h b/include/lldp.h new file mode 100644 index 000000000..e4f828a24 --- /dev/null +++ b/include/lldp.h @@ -0,0 +1,118 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __DPVS_LLDP_H__ +#define __DPVS_LLDP_H__ + +#define DPVS_LLDP_TYPE_MAX 128 + +/* IEEE 802.3AB Clause 9: TLV Types */ +enum { + LLDP_TYPE_END = 0, + LLDP_TYPE_CHASSIS_ID = 1, + LLDP_TYPE_PORT_ID = 2, + LLDP_TYPE_TTL = 3, + LLDP_TYPE_PORT_DESC = 4, + LLDP_TYPE_SYS_NAME = 5, + LLDP_TYPE_SYS_DESC = 6, + LLDP_TYPE_SYS_CAP = 7, + LLDP_TYPE_MNG_ADDR = 8, + LLDP_TYPE_ORG = 127, +}; +#define LLDP_TYPE_VALID(t) (((t) >= 0) && ((t) < DPVS_LLDP_TYPE_MAX)) + +/* IEEE 802.3AB Clause 9.5.2: Chassis subtypes */ +enum { + LLDP_CHASSIS_ID_RESERVED = 0, + LLDP_CHASSIS_ID_CHASSIS_COMPONENT = 1, + LLDP_CHASSIS_ID_INTERFACE_ALIAS = 2, + LLDP_CHASSIS_ID_PORT_COMPONENT = 3, + LLDP_CHASSIS_ID_MAC_ADDRESS = 4, + LLDP_CHASSIS_ID_NETWORK_ADDRESS = 5, + LLDP_CHASSIS_ID_INTERFACE_NAME = 6, + LLDP_CHASSIS_ID_LOCALLY_ASSIGNED = 7, +}; +#define LLDP_CHASSIS_ID_VALID(t) (((t) > 0) && ((t) <= 7)) + +/* IEEE 802.3AB Clause 9.5.3: Port subtype */ +enum { + LLDP_PORT_ID_RESERVED = 0, + LLDP_PORT_ID_INTERFACE_ALIAS = 1, + LLDP_PORT_ID_PORT_COMPONENT = 2, + LLDP_PORT_ID_MAC_ADDRESS = 3, + LLDP_PORT_ID_NETWORK_ADDRESS = 4, + LLDP_PORT_ID_INTERFACE_NAME = 5, + LLDP_PORT_ID_AGENT_CIRCUIT_ID = 6, + LLDP_PORT_ID_LOCALLY_ASSIGNED = 7, +}; +#define LLDP_PORT_ID_VALID(t) (((t) > 0) && ((t) <= 7)) + +/* + * IETF RFC 3232: + * http://www.iana.org/assignments/ianaaddressfamilynumbers-mib + */ +enum { + LLDP_ADDR_OTHER = 0, + LLDP_ADDR_IPV4 = 1, + LLDP_ADDR_IPV6 = 2, + LLDP_ADDR_NSAP = 3, + LLDP_ADDR_HDLC = 4, + LLDP_ADDR_BBN1822 = 5, + LLDP_ADDR_ALL802 = 6, + LLDP_ADDR_E163 = 7, + LLDP_ADDR_E164 = 8, + LLDP_ADDR_F69 = 9, + LLDP_ADDR_X121 = 10, + LLDP_ADDR_IPX = 11, + LLDP_ADDR_APPLETALK = 12, + LLDP_ADDR_DECNETIV = 13, + LLDP_ADDR_BANYANVINES = 14, + LLDP_ADDR_E164WITHNSAP = 15, + LLDP_ADDR_DNS = 16, + LLDP_ADDR_DISTINGUISHEDNAME = 17, + LLDP_ADDR_ASNUMBER = 18, + LLDP_ADDR_XTPOVERIPV4 = 19, + LLDP_ADDR_XTPOVERIPV6 = 20, + LLDP_ADDR_XTPNATIVEMODEXTP = 21, + LLDP_ADDR_FIBRECHANNELWWPN = 22, + LLDP_ADDR_FIBRECHANNELWWNN = 23, + LLDP_ADDR_GWID = 24, + LLDP_ADDR_AFI = 25, + LLDP_ADDR_RESERVED = 65535, +}; + +/* IEEE 802.1AB: Annex E, Table E.1: Organizationally Specific TLVs */ +enum { + LLDP_ORG_SPEC_PVID = 1, + LLDP_ORG_SPEC_PPVID = 2, + LLDP_ORG_SPEC_VLAN_NAME = 3, + LLDP_ORG_SPEC_PROTO_ID = 4, + LLDP_ORG_SPEC_VID_USAGE = 5, + LLDP_ORG_SPEC_MGMT_VID = 6, + LLDP_ORG_SPEC_LINK_AGGR = 7, +}; +#define LLDP_ORG_SPEC_VALID(t) (((t) > 0) && ((t) <= 7)) + +void dpvs_lldp_enable(void); +void dpvs_lldp_disable(void); +bool dpvs_lldp_is_enabled(void); + +int dpvs_lldp_init(void); +int dpvs_lldp_term(void); + +#endif diff --git a/include/mbuf.h b/include/mbuf.h index a8ccde221..7fb013a48 100644 --- a/include/mbuf.h +++ b/include/mbuf.h @@ -61,6 +61,7 @@ typedef void * mbuf_userdata_field_route_t; typedef enum { MBUF_FIELD_PROTO = 0, MBUF_FIELD_ROUTE, + MBUF_FIELD_ORIGIN_PORT, } mbuf_usedata_field_t; /** diff --git a/include/netif.h b/include/netif.h index 4347eb999..af3adeb7a 100644 --- a/include/netif.h +++ b/include/netif.h @@ -48,6 +48,7 @@ enum { NETIF_PORT_FLAG_TC_EGRESS = (0x1<<10), NETIF_PORT_FLAG_TC_INGRESS = (0x1<<11), NETIF_PORT_FLAG_NO_ARP = (0x1<<12), + NETIF_PORT_FLAG_LLDP = (0x1<<13), }; /* max tx/rx queue number for each nic */ @@ -262,11 +263,15 @@ int netif_unregister_pkt(struct pkt_type *pt); /**************************** port API ******************************/ struct netif_port* netif_port_get(portid_t id); +/* get netif by name, fail return NULL */ +struct netif_port* netif_port_get_by_name(const char *name); +bool is_physical_port(portid_t pid); +bool is_bond_port(portid_t pid); +void netif_physical_port_range(portid_t *start, portid_t *end); +void netif_bond_port_range(portid_t *start, portid_t *end); /* port_conf can be NULL for default port configure */ int netif_print_port_conf(const struct rte_eth_conf *port_conf, char *buf, int *len); int netif_print_port_queue_conf(portid_t pid, char *buf, int *len); -/* get netif by name, fail return NULL */ -struct netif_port* netif_port_get_by_name(const char *name); // function only for init or termination // int netif_port_conf_get(struct netif_port *port, struct rte_eth_conf *eth_conf); int netif_port_conf_set(struct netif_port *port, const struct rte_eth_conf *conf); diff --git a/include/timer.h b/include/timer.h index ba1bb6563..a00f09fb4 100644 --- a/include/timer.h +++ b/include/timer.h @@ -17,6 +17,7 @@ */ #ifndef __DPVS_TIMER_H__ #define __DPVS_TIMER_H__ +#include #include #include "list.h" diff --git a/src/common.c b/src/common.c index c10505cf4..8452ef806 100644 --- a/src/common.c +++ b/src/common.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -246,6 +247,26 @@ int linux_hw_mc_del(const char *ifname, const uint8_t hwma[ETH_ALEN]) return linux_hw_mc_mod(ifname, hwma, false); } +int linux_ifname2index(const char *ifname) +{ + int sockfd; + struct ifreq ifr; + + sockfd = socket(AF_INET, SOCK_DGRAM, 0); + if (sockfd < 0) + return -1; + + memset(&ifr, 0, sizeof(struct ifreq)); + strncpy(ifr.ifr_name, ifname, IFNAMSIZ - 1); + if (ioctl(sockfd, SIOCGIFINDEX, &ifr) < 0) { + close(sockfd); + return -1; + } + close(sockfd); + + return ifr.ifr_ifindex; +} + ssize_t readn(int fd, void *vptr, size_t n) { size_t nleft; @@ -320,3 +341,198 @@ ssize_t sendn(int fd, const void *vptr, size_t n, int flags) return (n); } +static uint8_t hex_char2num(char hex) +{ + if (hex >= '0' && hex <= '9') + return hex - '0'; + if (hex >= 'A' && hex <= 'F') + return hex - 'A' + 10; + if (hex >= 'a' && hex <= 'f') + return hex - 'a' + 10; + return 255; +} + +int hexstr2binary(const char *hexstr, size_t len, uint8_t *buf, size_t buflen) +{ + int i, j; + + for (i = 0, j = 0; i + 1 < len && j < buflen; i += 2, j++) + buf[j] = (hex_char2num(hexstr[i]) << 4) | hex_char2num(hexstr[i+1]); + + return j; +} + +#define num2hexchar(b) (((b) > 9) ? ((b) - 0xa + 'A') : ((b) + '0')) +int binary2hexstr(const uint8_t *hex, size_t len, char *buf, size_t buflen) +{ + size_t i, j; + + for (i = 0, j = 0; i < len && j + 1 < buflen; i++, j += 2) { + buf[j] = num2hexchar((hex[i] & 0xf0) >> 4); + buf[j+1] = num2hexchar(hex[i] & 0x0f); + } + + return j; +} + +int binary2print(const uint8_t *hex, size_t len, char *buf, size_t buflen) +{ + size_t i, j; + + for (i = 0, j = 0; i < len && j < buflen; i++) { + if (isprint(hex[i])) { + buf[j++] = hex[i]; + if (j >= buflen) + break; + } else { + if (j + 2 >= buflen) + break; + buf[j] = '\\'; + buf[j+1] = num2hexchar((hex[i] & 0xf0) >> 4); + buf[j+2] = num2hexchar(hex[i] & 0x0f); + j += 2; + } + } + + return j; +} + +static int is_link_local(struct sockaddr *addr) +{ + unsigned char *addrbytes; + if (addr->sa_family == AF_INET6) { + addrbytes = (unsigned char *)(&((struct sockaddr_in6 *)addr)->sin6_addr); + return (addrbytes[0] == 0xFE) && ((addrbytes[1] & 0xC0) == 0x80); /* fe80::/10 */ + } + return 0; +} + +int mask2prefix(const struct sockaddr *addr) +{ + int i, j; + int pfxlen, addrlen; + unsigned char *mask; + + if (!addr) + return -1; + + if (addr->sa_family == AF_INET) { + mask = (unsigned char *)&((struct sockaddr_in *)addr)->sin_addr; + addrlen = 4; + } else if (addr->sa_family == AF_INET6) { + mask = (unsigned char *)&((struct sockaddr_in6 *)addr)->sin6_addr; + addrlen = 16; + } else { + return -1; + } + + pfxlen = 0; + for (i = 0; i < addrlen; i++) { + for (j = 7; j >= 0; j--) { + if (mask[i] & (1U << j)) + ++pfxlen; + else + return pfxlen; + } + } + return pfxlen; +} + +int get_host_addr(const char *ifname, struct sockaddr_storage *result4, + struct sockaddr_storage *result6, char *ifname4, char *ifname6) +{ + struct ifaddrs *ifa_head, *ifa; + int found_v4 = 0, found_v6 = 0; + int pfxlen, pfxlen_v4 = 0, pfxlen_v6 = 0; + + if (getifaddrs(&ifa_head) == -1) + return -1; + + /* addresses on ifname take precedence */ + if (ifname) { + for (ifa = ifa_head; ifa != NULL; ifa = ifa->ifa_next) { + if (ifa->ifa_addr == NULL) + continue; + if (ifa->ifa_flags & IFF_LOOPBACK || + !(ifa->ifa_flags & IFF_UP) || + !(ifa->ifa_flags & IFF_RUNNING)) + continue; + if (is_link_local(ifa->ifa_addr)) + continue; + if (strcmp(ifname, ifa->ifa_name) == 0) { + pfxlen = mask2prefix(ifa->ifa_netmask); + if (ifa->ifa_addr->sa_family == AF_INET) { + if (!pfxlen_v4 || (pfxlen > 0 && pfxlen < pfxlen_v4)) { + if (result4) + memcpy(result4, ifa->ifa_addr, sizeof(struct sockaddr_in)); + if (ifname4) { + strncpy(ifname4, ifa->ifa_name, IFNAMSIZ-1); + ifname4[IFNAMSIZ-1] = '\0'; + } + found_v4 = 1; + pfxlen_v4 = pfxlen > 0 ? pfxlen : 32; + } + } else if (ifa->ifa_addr->sa_family == AF_INET6) { + if (!pfxlen_v6 || (pfxlen > 0 && pfxlen < pfxlen_v6)) { + if (result6) + memcpy(result6, ifa->ifa_addr, sizeof(struct sockaddr_in6)); + if (ifname6) { + strncpy(ifname6, ifa->ifa_name, IFNAMSIZ-1); + ifname6[IFNAMSIZ-1] = '\0'; + } + found_v6 = 1; + pfxlen_v6 = pfxlen > 0 ? pfxlen : 128; + } + } + } + } + } + + /* try to find address on other interfaces */ + if (!found_v4 || !found_v6) { + for (ifa = ifa_head; ifa != NULL; ifa = ifa->ifa_next) { + if (ifa->ifa_addr == NULL) + continue; + if (ifa->ifa_flags & IFF_LOOPBACK || + !(ifa->ifa_flags & IFF_UP) || + !(ifa->ifa_flags & IFF_RUNNING)) + continue; + if (is_link_local(ifa->ifa_addr)) + continue; + pfxlen = mask2prefix(ifa->ifa_netmask); + if (ifa->ifa_addr->sa_family == AF_INET) { + if (!pfxlen_v4 || (pfxlen > 0 && pfxlen < pfxlen_v4)) { + if (result4) + memcpy(result4, ifa->ifa_addr, sizeof(struct sockaddr_in)); + if (ifname4) { + strncpy(ifname4, ifa->ifa_name, IFNAMSIZ-1); + ifname4[IFNAMSIZ-1] = '\0'; + } + found_v4 = 1; + pfxlen_v4 = pfxlen > 0 ? pfxlen : 32; + } + } else if (ifa->ifa_addr->sa_family == AF_INET6) { + if (!pfxlen_v6 || (pfxlen > 0 && pfxlen < pfxlen_v6)) { + if (result6) + memcpy(result6, ifa->ifa_addr, sizeof(struct sockaddr_in6)); + if (ifname6) { + strncpy(ifname6, ifa->ifa_name, IFNAMSIZ-1); + ifname6[IFNAMSIZ-1] = '\0'; + } + found_v6 = 1; + pfxlen_v6 = pfxlen > 0 ? pfxlen : 128; + } + } + } + } + + freeifaddrs(ifa_head); + + if (found_v4 && found_v6) + return 3; + if (found_v4) + return 1; + if (found_v6) + return 2; + return 0; +} diff --git a/src/ctrl.c b/src/ctrl.c index 303f43b2f..2b23377a9 100644 --- a/src/ctrl.c +++ b/src/ctrl.c @@ -496,7 +496,7 @@ int msg_send(struct dpvs_msg *msg, lcoreid_t cid, uint32_t flags, struct dpvs_ms RTE_LOG(WARNING, MSGMGR, "%s:msg@%p, msg ring of lcore %d quota exceeded\n", __func__, msg, cid); } else if (unlikely(-ENOBUFS == res)) { - RTE_LOG(ERR, MSGMGR, "%s:msg@%p, msg ring of lcore %d is full\n", __func__, msg, res); + RTE_LOG(ERR, MSGMGR, "%s:msg@%p, msg ring of lcore %d is full\n", __func__, msg, cid); add_msg_flags(msg, DPVS_MSG_F_STATE_DROP); rte_atomic16_dec(&msg->refcnt); /* not enqueued, free manually */ return EDPVS_DPDKAPIFAIL; diff --git a/src/global_conf.c b/src/global_conf.c index 9935d8f79..4a28edc79 100644 --- a/src/global_conf.c +++ b/src/global_conf.c @@ -20,6 +20,7 @@ #include "global_conf.h" #include "global_data.h" #include "log.h" +#include "lldp.h" bool g_dpvs_pdump = false; @@ -103,6 +104,13 @@ static int set_log_file(const char *log_file) return EDPVS_OK; } +static void global_defs_handler(vector_t tokens) +{ + // initilize config to default value + g_dpvs_log_tslen = 0; + dpvs_lldp_disable(); +} + static void log_level_handler(vector_t tokens) { char *log_level = set_value(tokens); @@ -189,6 +197,22 @@ static void kni_handler(vector_t tokens) FREE_PTR(str); } +static void lldp_handler(vector_t tokens) +{ + char *str = set_value(tokens); + assert(str); + if (strcasecmp(str, "on") == 0) + dpvs_lldp_enable(); + else if (strcasecmp(str, "off") == 0) + dpvs_lldp_disable(); + else + RTE_LOG(WARNING, CFG_FILE, "invalid lldp config: %s\n", str); + + RTE_LOG(INFO, CFG_FILE, "lldp = %s\n", dpvs_lldp_is_enabled() ? "on" : "off"); + + FREE_PTR(str); +} + #ifdef CONFIG_DPVS_PDUMP static void pdump_handler(vector_t tokens) { @@ -209,13 +233,14 @@ static void pdump_handler(vector_t tokens) void install_global_keywords(void) { - install_keyword_root("global_defs", NULL); + install_keyword_root("global_defs", global_defs_handler); install_keyword("log_level", log_level_handler, KW_TYPE_NORMAL); install_keyword("log_file", log_file_handler, KW_TYPE_NORMAL); install_keyword("log_async_mode", log_async_mode_handler, KW_TYPE_INIT); install_keyword("log_with_timestamp", log_with_timestamp_handler, KW_TYPE_NORMAL); install_keyword("log_async_pool_size", log_async_pool_size_handler, KW_TYPE_INIT); install_keyword("kni", kni_handler, KW_TYPE_INIT); + install_keyword("lldp", lldp_handler, KW_TYPE_NORMAL); #ifdef CONFIG_DPVS_PDUMP install_keyword("pdump", pdump_handler, KW_TYPE_INIT); #endif diff --git a/src/inet.c b/src/inet.c index d5ebcdea1..430babf3c 100644 --- a/src/inet.c +++ b/src/inet.c @@ -28,6 +28,7 @@ #include "icmp.h" #include "icmp6.h" #include "inetaddr.h" +#include "lldp.h" #define INET #define RTE_LOGTYPE_INET RTE_LOGTYPE_USER1 @@ -99,6 +100,8 @@ int inet_init(void) return err; if ((err = inet_addr_init()) != 0) return err; + if ((err = dpvs_lldp_init()) != 0) + return err; return EDPVS_OK; } @@ -107,6 +110,8 @@ int inet_term(void) { int err; + if ((err = dpvs_lldp_term()) != 0) + return err; if ((err = inet_addr_term()) != 0) return err; if ((err = icmpv6_term()) != 0) diff --git a/src/ip_tunnel.c b/src/ip_tunnel.c index 8b6cd9668..3e1e3483b 100644 --- a/src/ip_tunnel.c +++ b/src/ip_tunnel.c @@ -204,6 +204,7 @@ static struct netif_port *tunnel_create(struct ip_tunnel_tab *tab, dev->flag &= ~NETIF_PORT_FLAG_TX_IP_CSUM_OFFLOAD; dev->flag &= ~NETIF_PORT_FLAG_TX_TCP_CSUM_OFFLOAD; dev->flag &= ~NETIF_PORT_FLAG_TX_UDP_CSUM_OFFLOAD; + dev->flag &= ~NETIF_PORT_FLAG_LLDP; err = netif_port_register(dev); if (err != EDPVS_OK) { diff --git a/src/lldp.c b/src/lldp.c new file mode 100644 index 000000000..7edc31a3e --- /dev/null +++ b/src/lldp.c @@ -0,0 +1,1894 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Jul 2024, yuwenchao@qiyi.com, Initial + */ + +#include +#include +#include "list.h" +#include "timer.h" +#include "ctrl.h" +#include "netif.h" +#include "netif_addr.h" +#include "lldp.h" +#include "conf/lldp.h" + +#define RTE_LOGTYPE_LLDP RTE_LOGTYPE_USER1 + +#define DPVS_LLDP_PDU_MAX 1500 +#define DPVS_LLDP_TTL_DEFAULT 120 +#define DPVS_LLDP_TX_INTERVAL 30 +#define DPVS_LLDP_UPDATE_INTERVAL 600 + +#define DPVS_LLDP_TL_TYPE(tl) ((rte_be_to_cpu_16(tl) & 0xfe00) >> 9) +#define DPVS_LLDP_TL_LEN(tl) ((rte_be_to_cpu_16(tl) & 0x01ff)) +#define DPVS_LLDP_TL(type, len) (rte_cpu_to_be_16((((type) & 0x7f) << 9) | ((len) & 0x1ff))) + +#define lldp_type_equal(t1, t2) (((t1).type == (t2).type) && ((t1).subtype == (t2).subtype)) + +/* helper macro used in lldp_type_ops::dump + * @buf: target string buffer, must be an array + * @pos: start position for this snprintf, must be an initialized integer variable + * */ +#define lldp_dump_snprintf(buf, pos, fmt, ...) \ + do { \ + int res = snprintf(&(buf)[pos], sizeof(buf) - pos, fmt, ##__VA_ARGS__); \ + if (unlikely(res < 0)) \ + return EDPVS_IO; \ + (pos) += res; \ + if ((pos) >= sizeof(buf)) \ + return EDPVS_NOROOM; \ + } while (0) + +/* helper macro used ihn lldp_type_ops::dump + * @buf: target string buffer, must be an array + * @pos: start position for this snprintf, must be an initialized integer variable + * @s: non-null-terminated string (use lldp_dump_snprintf for null-terminated string) + * @n: length of s + * @ends: ending string appended into buf + * */ +#define lldp_dump_strcpy(buf, pos, s, n, ends) \ + do { \ + int i, endslen = strlen(ends); \ + if (unlikely((endslen + (n)) >= (sizeof(buf) - (pos)))) \ + return EDPVS_NOROOM; \ + rte_memcpy(&(buf)[pos], s, n); \ + (pos) += (n); \ + for (i = 0; i < endslen; i++) \ + (buf)[(pos)++] = ends[i]; \ + (buf)[pos] = '\0'; \ + } while (0) + +const struct rte_ether_addr LLDP_ETHER_ADDR_DST = { + .addr_bytes = {0x01, 0x80, 0xC2, 0x00, 0x00, 0x0E} +}; + +/* + * LLDP is processed only on master lcore, all data structures are free of lock + */ + +typedef struct { + uint8_t type; + uint32_t subtype; +} lldp_type_t; + +struct lldp_port { + struct netif_port *dev; + struct list_head head; /* lldp_entry list head, sorted by lldp type */ + struct list_head node; + struct dpvs_timer timer; + uint32_t timeout; + uint16_t entries; + uint16_t neigh; /* DPVS_LLDP_NODE_xxx */ +}; + +struct lldp_entry { + struct list_head node; + struct lldp_port *port; + uint8_t stale; + lldp_type_t type; + uint16_t len; /* host endian */ + + /* lldp pdu */ + uint16_t typelen; /* network endian */ + char value[0]; +}; + +struct lldp_type_ops { + uint8_t type; + + /* + * Parse LLDP type and subtype from LLDP PDU + * @params + * llpdu: lldp pdu + * type: where to store the parsed type id, must not be NULL + * len: where to store the parse data len for the type, can be NULL + * @return + * DPVS error code num + * */ + int (*parse_type)(const char *llpdu, lldp_type_t *type, uint16_t *len); + + /* + * Generate LLDP PDU, and store it to lldpdu + * @params + * dev: physical netif port + * subtype: subtype of the LLDP PDU + * lldpdu: lldp pdu buffer + * len: buffer size + * @return + * the lldp pdu length on success or buffer not big enough + * dpvs negative error code on error + * */ + int (*local_lldp)(const struct netif_port *dev, uint32_t subtype, char *buf, size_t len); + + /* + * Translate LLDP PDU, store the translated message into buf. + * @params + * e: lldp entry + * buf: message buffer + * len: message buffer size + * @return (similar to "snprintf") + * the message length on success or buffer not big enough + * negative error code on error + * the returned message always teminates with '\0'. + * */ + int (*dump)(const struct lldp_entry *e, char *buf, size_t len); + + /* + * Actions to take after lldp pdu changed (add, update) + * @params: + * entry: the newly added entry + * @return + * dpvs error code + * */ + int (*on_change)(const struct lldp_entry *entry); +}; + +static int lldp_enable = 0; +static struct dpvs_timer lldp_xmit_timer; +static struct dpvs_timer lldp_update_timer; + +static char lldp_sn[256]; +static struct utsname lldp_uname; + +static struct list_head lldp_ports[DPVS_LLDP_NODE_MAX]; +static struct lldp_type_ops *lldp_types[DPVS_LLDP_TYPE_MAX] = { NULL }; + +static int lldp_xmit_start(void); +static int lldp_xmit_stop(void); + +void dpvs_lldp_enable(void) +{ + int err; + + if (lldp_enable) + return; + + if (dpvs_state_get() == DPVS_STATE_NORMAL) { + if ((err = lldp_xmit_start()) != EDPVS_OK) { + RTE_LOG(ERR, LLDP, "%s: fail to enable lldp -- %s\n", + __func__, dpvs_strerror(err)); + return; + } + } + + lldp_enable = 1; +} + +void dpvs_lldp_disable(void) +{ + int err; + + if (!lldp_enable) + return; + + if (dpvs_state_get() == DPVS_STATE_NORMAL) { + if ((err = lldp_xmit_stop()) != EDPVS_OK) { + RTE_LOG(ERR, LLDP, "%s: fail to disable lldp -- %s\n", + __func__, dpvs_strerror(err)); + return; + } + } + + lldp_enable = 0; +} + +bool dpvs_lldp_is_enabled(void) +{ + return !!lldp_enable; +} + +static int lldp_serail_number_init(void) +{ + FILE *fp; + char *ptr; + + fp = fopen("/sys/class/dmi/id/product_serial", "r"); + if (!fp) { + RTE_LOG(WARNING, LLDP, "%s: fail to open serial number file\n", __func__); + snprintf(lldp_sn, sizeof(lldp_sn), "%s", "Unknown"); + return EDPVS_SYSCALL; + } + + if (!fgets(lldp_sn, sizeof(lldp_sn), fp)) { + RTE_LOG(WARNING, LLDP, "%s: fail to read serial number file\n", __func__); + snprintf(lldp_sn, sizeof(lldp_sn), "%s", "Unknown"); + return EDPVS_IO; + } + + /* remove the tailing LF character */ + ptr = strrchr(lldp_sn, '\n'); + if (ptr) + *ptr = '\0'; + + return EDPVS_OK; +} + +static inline int lldp_type_cmp(lldp_type_t *t1, lldp_type_t *t2) +{ + if (t1->type < t2->type) + return -1; + if (t1->type > t2->type) + return 1; + if (t1->subtype < t2->subtype) + return -1; + if (t1->subtype > t2->subtype) + return 1; + return 0; +} + +static int lldp_type_register(struct lldp_type_ops *ops) +{ + if (!ops || ops->type >= DPVS_LLDP_TYPE_MAX) + return EDPVS_INVAL; + + if (lldp_types[ops->type] != NULL) + return EDPVS_EXIST; + + if (!ops->parse_type || !ops->dump) + return EDPVS_INVAL; + + lldp_types[ops->type] = ops; + return EDPVS_OK; +} + +static int lldp_type_unregister(struct lldp_type_ops *ops) +{ + if (!ops || ops->type >= DPVS_LLDP_TYPE_MAX) + return EDPVS_INVAL; + + if (!lldp_types[ops->type]) + return EDPVS_NOTEXIST; + + lldp_types[ops->type] = NULL; + return EDPVS_OK; +} + +static struct lldp_type_ops *lldp_type_get(lldp_type_t type) +{ + if (type.type >= DPVS_LLDP_TYPE_MAX) + return NULL; + return lldp_types[type.type]; +} + +static int lldp_parse_type_default(const char *lldpdu, lldp_type_t *type, uint16_t *len) +{ + assert(NULL != type); + + type->type = DPVS_LLDP_TL_TYPE((uint16_t)(*lldpdu)); + type->subtype = 0; + if (!LLDP_TYPE_VALID(type->type)) { + type->type = 0; + return EDPVS_INVAL; + } + if (len) + *len = DPVS_LLDP_TL_LEN(*((uint16_t *)lldpdu)); + + return EDPVS_OK; +} + +static int lldp_local_pdu_end(const struct netif_port *dev, uint32_t subtype, char *buf, size_t len) +{ + uint16_t *typelen = (uint16_t *)buf; + + if (len >= 2) + *typelen = DPVS_LLDP_TL(LLDP_TYPE_END, 0); + else + memset(buf, 0, len); + return 2; +} + +static int lldp_dump_end(const struct lldp_entry *e, char *buf, size_t len) +{ + return snprintf(buf, len, "%s\n", "End of LLDPDU TLV"); +} + +static int lldp_parse_type_chassis_id(const char *lldpdu, lldp_type_t *type, uint16_t *len) +{ + assert(type != NULL); + + type->type = DPVS_LLDP_TL_TYPE((uint16_t)(*lldpdu)); + if (!LLDP_TYPE_VALID(type->type)) { + type->type = 0; + return EDPVS_INVAL; + } + + type->subtype = *(lldpdu + 2); + if (!LLDP_CHASSIS_ID_VALID(type->subtype)) { + type->subtype = 0; + return EDPVS_INVAL; + } + + if (len) + *len = DPVS_LLDP_TL_LEN(*((uint16_t *)lldpdu)); + + return EDPVS_OK; +} + +static int lldp_local_pdu_chassis_id(const struct netif_port *dev, uint32_t subtype, char *buf, size_t len) +{ + if (len >= 2 + 7) { + *((uint16_t *)buf) = DPVS_LLDP_TL(LLDP_TYPE_CHASSIS_ID, 7); + buf[2] = LLDP_CHASSIS_ID_MAC_ADDRESS; + rte_memcpy(&buf[3], &dev->addr, 6); + } else { + memset(buf, 0, len); + } + return 2 + 7; +} + +static int lldp_dump_chassis_id(const struct lldp_entry *e, char *buf, size_t len) +{ + const uint8_t *ptr = (const uint8_t *)e->value; /* Chassis ID Type */ + int pos = 0; + char tbuf[512], ipbuf[64]; + + lldp_dump_snprintf(tbuf, pos, "%s (%d)\n", "Chassis ID TLV", e->type.type); + + assert(e->type.subtype == *ptr); + ++ptr; /* Chassis ID Data */ + switch (e->type.subtype) { + case LLDP_CHASSIS_ID_CHASSIS_COMPONENT: + lldp_dump_snprintf(tbuf, pos, "%s", "\tChassis Component: "); + lldp_dump_strcpy(tbuf, pos, ptr, e->len - 1, "\n"); + break; + case LLDP_CHASSIS_ID_INTERFACE_ALIAS: + lldp_dump_snprintf(tbuf, pos, "%s", "\tInterface Alias: "); + lldp_dump_strcpy(tbuf, pos, ptr, e->len - 1, "\n"); + break; + case LLDP_CHASSIS_ID_PORT_COMPONENT: + lldp_dump_snprintf(tbuf, pos, "%s", "\tPort Component: "); + lldp_dump_strcpy(tbuf, pos, ptr, e->len - 1, "\n"); + break; + case LLDP_CHASSIS_ID_MAC_ADDRESS: + if (unlikely(e->len < 7)) + return EDPVS_INVPKT; + lldp_dump_snprintf(tbuf, pos, "\tMAC: %02x:%02x:%02x:%02x:%02x:%02x\n", + ptr[0], ptr[1], ptr[2], ptr[3], ptr[4], ptr[5]); + break; + case LLDP_CHASSIS_ID_NETWORK_ADDRESS: + switch (*ptr) { + case LLDP_ADDR_IPV4: + if (unlikely(e->len < 6)) + return EDPVS_INVPKT; + lldp_dump_snprintf(tbuf, pos, "\tIPv4: %s\n", inet_ntop(AF_INET, ptr + 1, + ipbuf, sizeof(ipbuf)) ?: "Unknown"); + break; + case LLDP_ADDR_IPV6: + if (unlikely(e->len < 18)) + return EDPVS_INVPKT; + lldp_dump_snprintf(tbuf, pos, "\tIPv6: %s\n", inet_ntop(AF_INET6, ptr + 1, + ipbuf, sizeof(ipbuf)) ?: "Unknown"); + break; + default: + if (unlikely(e->len <= 2)) + return EDPVS_INVPKT; + lldp_dump_snprintf(tbuf, pos, "\tNetwork Address Type %d:", *ptr); + pos += binary2hexstr(ptr + 1, e->len - 2, &tbuf[pos], sizeof(tbuf) - pos); + if (unlikely(pos >= sizeof(tbuf))) + return EDPVS_NOROOM; + lldp_dump_snprintf(tbuf, pos, "%c", '\n'); + break; + } + break; + case LLDP_CHASSIS_ID_INTERFACE_NAME: + lldp_dump_snprintf(tbuf, pos, "%s", "\tInterface Name: "); + lldp_dump_strcpy(tbuf, pos, ptr, e->len - 1, "\n"); + break; + case LLDP_CHASSIS_ID_LOCALLY_ASSIGNED: + lldp_dump_snprintf(tbuf, pos, "%s", "\tLocal: "); + lldp_dump_strcpy(tbuf, pos, ptr, e->len - 1, "\n"); + break; + default: + lldp_dump_snprintf(tbuf, pos, "\t%s: ", "Bad Chassis ID"); + pos += binary2print(ptr, e->len - 1, &tbuf[pos], sizeof(tbuf) - pos); + if (unlikely(pos >= sizeof(tbuf))) + return EDPVS_NOROOM; + lldp_dump_snprintf(tbuf, pos, "%c", '\n'); + break; + } + + if (pos >= len) { + rte_memcpy(buf, tbuf, len - 1); + buf[len - 1] = '\0'; + } else { + rte_memcpy(buf, tbuf, pos); + buf[pos] = '\0'; + } + return pos; +} + +static int lldp_parse_type_port_id(const char *lldpdu, lldp_type_t *type, uint16_t *len) +{ + assert(type != NULL); + + type->type = DPVS_LLDP_TL_TYPE((uint16_t)(*lldpdu)); + if (!LLDP_TYPE_VALID(type->type)) { + type->type = 0; + return EDPVS_INVAL; + } + + type->subtype = *(lldpdu + 2); + if (!LLDP_PORT_ID_VALID(type->subtype)) { + type->subtype = 0; + return EDPVS_INVAL; + } + + if (len) + *len = DPVS_LLDP_TL_LEN(*((uint16_t *)lldpdu)); + + return EDPVS_OK; +} + +static int lldp_local_pdu_port_id(const struct netif_port *dev, uint32_t subtype, char *buf, size_t len) +{ + size_t datalen = strlen(dev->name); + + assert(datalen < IFNAMSIZ); + + if (len >= 2 + 1 + datalen) { + *((uint16_t *)buf) = DPVS_LLDP_TL(LLDP_TYPE_PORT_ID, 1 + datalen); + buf[2] = LLDP_PORT_ID_INTERFACE_NAME; + rte_memcpy(&buf[3], &dev->name, datalen); + } else { + memset(buf, 0, len); + } + + return 2 + 1 + datalen; +} + +static int lldp_dump_port_id(const struct lldp_entry *e, char *buf, size_t len) +{ + const uint8_t *ptr = (const uint8_t *)e->value; /* Port ID Subtype */ + int pos = 0; + char tbuf[512], ipbuf[64]; + + lldp_dump_snprintf(tbuf, pos, "%s (%d)\n", "Port ID TLV", e->type.type); + assert(e->type.subtype == *ptr); + + ++ptr; /* Port ID Data */ + switch (e->type.subtype) { + case LLDP_PORT_ID_INTERFACE_ALIAS: + lldp_dump_snprintf(tbuf, pos, "%s", "\tInterface Alias: "); + lldp_dump_strcpy(tbuf, pos, ptr, e->len - 1, "\n"); + break; + case LLDP_PORT_ID_PORT_COMPONENT: + lldp_dump_snprintf(tbuf, pos, "%s", "\tPort Component: "); + lldp_dump_strcpy(tbuf, pos, ptr, e->len - 1, "\n"); + break; + case LLDP_PORT_ID_MAC_ADDRESS: + if (unlikely(e->len < 7)) + return EDPVS_INVPKT; + lldp_dump_snprintf(tbuf, pos, "\tMAC: %02x:%02x:%02x:%02x:%02x:%02x\n", + ptr[0], ptr[1], ptr[2], ptr[3], ptr[4], ptr[5]); + break; + case LLDP_PORT_ID_NETWORK_ADDRESS: + switch (*ptr) { + case LLDP_ADDR_IPV4: + if (unlikely(e->len < 6)) + return EDPVS_INVPKT; + lldp_dump_snprintf(tbuf, pos, "\tIPv4: %s\n", inet_ntop(AF_INET, ptr + 1, + ipbuf, sizeof(ipbuf)) ?: "Unknown"); + break; + case LLDP_ADDR_IPV6: + if (unlikely(e->len < 18)) + return EDPVS_INVPKT; + lldp_dump_snprintf(tbuf, pos, "\tIPv6: %s\n", inet_ntop(AF_INET6, ptr + 1, + ipbuf, sizeof(ipbuf)) ?: "Unknown"); + break; + default: + if (unlikely(e->len <= 2)) + return EDPVS_INVPKT; + lldp_dump_snprintf(tbuf, pos, "\tNetwork Address Type %d:", *ptr); + pos += binary2hexstr(ptr + 1, e->len - 2, &tbuf[pos], sizeof(tbuf) - pos); + if (unlikely(pos >= sizeof(tbuf))) + return EDPVS_NOROOM; + lldp_dump_snprintf(tbuf, pos, "%c", '\n'); + break; + } + break; + case LLDP_PORT_ID_INTERFACE_NAME: + lldp_dump_snprintf(tbuf, pos, "%s", "\tInterface Name: "); + lldp_dump_strcpy(tbuf, pos, ptr, e->len - 1, "\n"); + break; + case LLDP_PORT_ID_AGENT_CIRCUIT_ID: + lldp_dump_snprintf(tbuf, pos, "\t%s: ", "Agent Circuit ID"); + pos += binary2hexstr(ptr, e->len - 1, &tbuf[pos], sizeof(tbuf) - pos); + if (unlikely(pos >= sizeof(tbuf))) + return EDPVS_NOROOM; + lldp_dump_snprintf(tbuf, pos, "%c", '\n'); + break; + case LLDP_PORT_ID_LOCALLY_ASSIGNED: + lldp_dump_snprintf(tbuf, pos, "%s", "\tLocal: "); + lldp_dump_strcpy(tbuf, pos, ptr, e->len -1, "\n"); + break; + default: + lldp_dump_snprintf(tbuf, pos, "\t%s: ", "Bad Port ID"); + pos += binary2print(ptr, e->len - 1, &tbuf[pos], sizeof(tbuf) - pos); + if (unlikely(pos >= sizeof(tbuf))) + return EDPVS_NOROOM; + lldp_dump_snprintf(tbuf, pos, "%c", '\n'); + break; + } + + if (pos >= len) { + rte_memcpy(buf, tbuf, len - 1); + buf[len - 1] = '\0'; + } else { + rte_memcpy(buf, tbuf, pos); + buf[pos] = '\0'; + } + return pos; +} + +static int lldp_local_pdu_ttl(const struct netif_port *dev, uint32_t subtype, char *buf, size_t len) +{ + uint16_t *data; + + if (len >= 4) { + data = (uint16_t *)buf; + *data++ = DPVS_LLDP_TL(LLDP_TYPE_TTL, 2); + *data = rte_cpu_to_be_16(DPVS_LLDP_TTL_DEFAULT); + } else { + memset(buf, 0, len); + } + + return 4; +} + +static int lldp_dump_ttl(const struct lldp_entry *e, char *buf, size_t len) +{ + uint16_t *ttl = (uint16_t *)e->value; + return snprintf(buf, len, "Time to Live TLV (%d)\n\t%d\n", e->type.type, rte_be_to_cpu_16(*ttl)); +} + +static int lldp_on_change_ttl(const struct lldp_entry *e) +{ + struct lldp_port *port = e->port; + uint16_t ttl; + + /* Lifespan of local lldp caches is not decided by ttl. Actually, they are + * updated periodically in every DPVS_LLDP_UPDATE_INTERVAL second. If not updated + * in 3 * DPVS_LLDP_UPDATE_INTERVAL seconds, they are expired and removed. + * */ + if (port->neigh == DPVS_LLDP_NODE_LOCAL) + return EDPVS_OK; + + ttl = rte_be_to_cpu_16(*((uint16_t *)e->value)); + if (ttl != port->timeout) { + RTE_LOG(INFO, LLDP, "%s: update neigh lldp ttl %u -> %u\n", __func__, port->timeout, ttl); + port->timeout = ttl; + } + + return EDPVS_OK; +} + +static int lldp_local_pdu_port_desc(const struct netif_port *dev, uint32_t subtype, char *buf, size_t len) +{ + size_t desc_len; + char desc[128]; + + desc_len = snprintf(desc, sizeof(desc), "DPVS Server Port: Interface %s, Index %d, Kni %s", + dev->name, dev->id, dev->kni.kni ? dev->kni.name : "None"); + if (2 + desc_len <= len) { + *((uint16_t *)buf) = DPVS_LLDP_TL(LLDP_TYPE_PORT_DESC, desc_len); + rte_memcpy(&buf[2], desc, desc_len); + } else { + memset(buf, 0, len); + } + + return 2 + desc_len; +} + +static int lldp_dump_port_desc(const struct lldp_entry *e, char *buf, size_t len) +{ + int pos = 0; + char tbuf[1024]; + + lldp_dump_snprintf(tbuf, pos, "Port Description TLV (%d)\n\t", e->type.type); + if (likely(e->len > 0)) + lldp_dump_strcpy(tbuf, pos, e->value, e->len, "\n"); + + if (pos >= len) { + rte_memcpy(buf, tbuf, len - 1); + buf[len - 1] = '\0'; + } else { + rte_memcpy(buf, tbuf, pos); + buf[pos] = '\0'; + } + + return pos; +} + +static int lldp_local_pdu_sys_name(const struct netif_port *dev, uint32_t subtype, char *buf, size_t len) +{ + size_t host_len; + char hostname[HOST_NAME_MAX + 1]; + + if (unlikely(gethostname(hostname, sizeof(hostname)) != 0)) + snprintf(hostname, sizeof(hostname), "%s", "Unknown"); + + host_len = strlen(hostname); + if (2 + host_len <= len) { + *((uint16_t *)buf) = DPVS_LLDP_TL(LLDP_TYPE_SYS_NAME, host_len); + rte_memcpy(&buf[2], hostname, host_len); + } else { + memset(buf, 0, len); + } + + return 2 + host_len; +} + +static int lldp_dump_sys_name(const struct lldp_entry *e, char *buf, size_t len) +{ + int pos = 0; + char tbuf[1024]; + + lldp_dump_snprintf(tbuf, pos, "System Name TLV (%d)\n\t", e->type.type); + if (likely(e->len > 0)) + lldp_dump_strcpy(tbuf, pos, e->value, e->len, "\n"); + + if (pos >= len) { + rte_memcpy(buf, tbuf, len - 1); + buf[len - 1] = '\0'; + } else { + rte_memcpy(buf, tbuf, pos); + buf[pos] = '\0'; + } + + return pos; +} + +static int lldp_local_pdu_sys_desc(const struct netif_port *dev, uint32_t subtype, char *buf, size_t len) +{ + int rc; + + rc = snprintf(buf + 2, len - 2, "%s %s %s %s %s, Serail Number %s", + lldp_uname.sysname, lldp_uname.nodename, lldp_uname.release, + lldp_uname.version, lldp_uname.machine, lldp_sn); + if (unlikely(rc < 0)) + return EDPVS_IO; + *((uint16_t *)buf) = DPVS_LLDP_TL(LLDP_TYPE_SYS_DESC, rc); + + return rc; +} + +static int lldp_dump_sys_desc(const struct lldp_entry *e, char *buf, size_t len) +{ + int pos = 0; + char tbuf[1024]; + + lldp_dump_snprintf(tbuf, pos, "System Description TLV (%d)\n\t", e->type.type); + if (likely(e->len > 0)) + lldp_dump_strcpy(tbuf, pos, e->value, e->len, "\n"); + + if (pos >= len) { + rte_memcpy(buf, tbuf, len - 1); + buf[len - 1] = '\0'; + } else { + rte_memcpy(buf, tbuf, pos); + buf[pos] = '\0'; + } + return pos; +} + +static const char *lldp_bit2sys_cap(uint16_t capacities, uint8_t bitpos) +{ + switch (capacities & (1UL << bitpos)) { + case 0x0001: + return "Other"; + case 0x0002: + return "Repeater"; + case 0x0004: + return "Bridge"; + case 0x0008: + return "WLAN Access Point"; + case 0x0010: + return "Router"; + case 0x0020: + return "Telephone"; + case 0x0040: + return "DOCSIS cable device"; + case 0x0080: + return "Station Only"; + case 0x0100: + return "Client"; + case 0x0200: + return "ISDN Terminal Adapter"; + case 0x0400: + return "Cryptographic Device"; + case 0x0800: + return "Voice Gateway"; + case 0x1000: + return "LAN Endpoint"; + case 0x2000: + case 0x4000: + case 0x8000: + return "Reserved"; + default: + return ""; + } + return ""; +} + +static int lldp_local_pdu_sys_cap(const struct netif_port *dev, uint32_t subtype, char *buf, size_t len) +{ + if (len >= 2 + 4) { + *((uint16_t *)&buf[0]) = DPVS_LLDP_TL(LLDP_TYPE_SYS_CAP, 4); + *((uint16_t *)&buf[2]) = rte_cpu_to_be_16(0x80); /* Capacity: Station Only */ + *((uint16_t *)&buf[4]) = rte_cpu_to_be_16(0x80); /* Enabled: Station Only */ + } + + return 2 + 4; +} + +static int lldp_dump_sys_cap(const struct lldp_entry *e, char *buf, size_t len) +{ + uint8_t i, first; + uint16_t capacities, enables; + int pos = 0; + char tbuf[256]; + + if (e->len != 4) + return EDPVS_INVPKT; + capacities = rte_be_to_cpu_16(*((uint16_t *)&e->value[0])); + enables = rte_be_to_cpu_16(*((uint16_t *)&e->value[2])); + + lldp_dump_snprintf(tbuf, pos, "System Capabilities TLV (%d)\n", e->type.type); + + first = 1; + for (i = 0; i < 16; i++) { + if (!(capacities & (1UL << i))) + continue; + if (first) { + lldp_dump_snprintf(tbuf, pos, "\tSystem capabilities: %s", + lldp_bit2sys_cap(capacities, i)); + first = 0; + } else { + lldp_dump_snprintf(tbuf, pos, ", %s", lldp_bit2sys_cap(capacities, i)); + } + } + lldp_dump_snprintf(tbuf, pos, "%c", '\n'); + + first = 1; + for (i = 0; i < 16; i++) { + if (!(enables & (1UL << i))) + continue; + if (first) { + lldp_dump_snprintf(tbuf, pos, "\tEnabled capabilities: %s", + lldp_bit2sys_cap(enables, i)); + first = 0; + } else { + lldp_dump_snprintf(tbuf, pos, ", %s", lldp_bit2sys_cap(enables, i)); + } + } + lldp_dump_snprintf(tbuf, pos, "%c", '\n'); + + if (pos >= len) { + rte_memcpy(buf, tbuf, len - 1); + buf[len - 11] = '\0'; + } else { + rte_memcpy(buf, tbuf, pos); + buf[pos] = '\0'; + } + return pos; +} + +static int lldp_parse_type_mng_addr(const char *lldpdu, lldp_type_t *type, uint16_t *len) +{ + assert(NULL != type); + + type->type = DPVS_LLDP_TL_TYPE((uint16_t)(*lldpdu)); + if (!LLDP_TYPE_VALID(type->type)) { + type->type = 0; + return EDPVS_INVAL; + } + type->subtype = *((uint8_t *)(lldpdu + 3)); + + if (len) + *len = DPVS_LLDP_TL_LEN(*((uint16_t *)lldpdu)); + + return EDPVS_OK; +} + +static int lldp_local_pdu_mng_addr(const struct netif_port *dev, uint32_t subtype, char *buf, size_t len) +{ + int rc; + uint8_t tbuf[512]; + uint8_t *ptr; + struct sockaddr_storage addr; + char ifname[IFNAMSIZ]; + + ptr = tbuf + 2; + *(ptr + 1) = subtype; + switch (subtype) { + case LLDP_ADDR_ALL802: + *ptr = 7; + rte_memcpy(ptr + 2, &dev->addr, 6); + ptr += 8; + break; + case LLDP_ADDR_IPV4: + *ptr = 5; + rc = get_host_addr(dev->kni.kni ? dev->kni.name : NULL, &addr, NULL, ifname, NULL); + if (rc < 0) + return rc; + if (rc & 0x1) + rte_memcpy(ptr + 2, &((struct sockaddr_in *)&addr)->sin_addr.s_addr, 4); + else + ifname[0] = '\0'; + ptr += 6; + break; + case LLDP_ADDR_IPV6: + *ptr = 17; + rc = get_host_addr(dev->kni.kni ? dev->kni.name : NULL, NULL, &addr, NULL, ifname); + if (rc < 0) + return rc; + if (rc &0x2) + rte_memcpy(ptr + 2, &((struct sockaddr_in6 *)&addr)->sin6_addr, 16); + else + ifname[0] = '\0'; + ptr += 18; + break; + default: + return EDPVS_NOTSUPP; + } + + if (subtype == LLDP_ADDR_ALL802) { + *ptr++ = 2; /* Interface Subtype: Ifindex */ + *((uint32_t *)ptr) = rte_cpu_to_be_32(dev->id); + } else if (ifname[0]) { + *ptr++ = 2; /* Interface Subtype: Ifindex */ + rc = linux_ifname2index(ifname); + if (rc < 0) + return EDPVS_SYSCALL; + *((uint32_t *)ptr) = rte_cpu_to_be_32(rc); + } else { + *ptr++ = 1; /* Interface Subtype: Unknown */ + *((uint32_t *)ptr) = 0; + } + + ptr += 4; /* OID String Length */ + *ptr++ = 0; + + *((uint16_t *)tbuf) = DPVS_LLDP_TL(LLDP_TYPE_MNG_ADDR, ptr - tbuf - 2); + + if (ptr - tbuf > len) + rte_memcpy(buf, tbuf, len); + else + rte_memcpy(buf, tbuf, ptr - tbuf); + return ptr - tbuf; +} + +static int lldp_dump_mng_addr(const struct lldp_entry *e, char *buf, size_t len) +{ + const uint8_t *ptr = (const uint8_t *)e->value; /* Address Length */ + uint8_t addrlen, intf_subtype, oidlen; + int pos = 0; + char tbuf[1024], ipbuf[64]; + + lldp_dump_snprintf(tbuf, pos, "%s (%d)\n", "Management Address TLV", e->type.type); + addrlen = *ptr; + ++ptr; /* Address Subtype */ + assert(e->type.subtype == *ptr); + + ++ptr; /* Management Address */ + switch (e->type.subtype) { + case LLDP_ADDR_ALL802: + if (unlikely(addrlen < 7)) + return EDPVS_INVPKT; + lldp_dump_snprintf(tbuf, pos, "\tMAC: %02x:%02x:%02x:%02x:%02x:%02x\n", + ptr[0], ptr[1], ptr[2], ptr[3], ptr[4], ptr[5]); + break; + case LLDP_ADDR_IPV4: + if (unlikely(addrlen < 5)) + return EDPVS_INVPKT; + lldp_dump_snprintf(tbuf, pos, "\tIPv4: %s\n", + inet_ntop(AF_INET, ptr, ipbuf, sizeof(ipbuf)) ?: "Unknown"); + break; + case LLDP_ADDR_IPV6: + if (unlikely(addrlen < 17)) + return EDPVS_INVPKT; + lldp_dump_snprintf(tbuf, pos, "\tIPv6: %s\n", + inet_ntop(AF_INET6, ptr, ipbuf, sizeof(ipbuf)) ?: "Unknown"); + break; + default: + lldp_dump_snprintf(tbuf, pos, "\tNetwork Address Type(%d): ", e->type.subtype); + pos += binary2hexstr(ptr, addrlen - 1, &tbuf[pos], sizeof(tbuf) - pos); + if (unlikely(pos >= sizeof(tbuf))) + return EDPVS_NOROOM; + lldp_dump_snprintf(tbuf, pos, "%c", '\n'); + break; + } + + ptr = ptr + addrlen - 1; /* Interface Subtype */ + intf_subtype = *ptr; + switch (intf_subtype) { + case 1: + lldp_dump_snprintf(tbuf, pos, "\tUnknown interface subtype(%d): ", intf_subtype); + break; + case 2: + lldp_dump_snprintf(tbuf, pos, "%s", "\tIfindex: "); + break; + case 3: + lldp_dump_snprintf(tbuf, pos, "%s", "\tSystem port number: "); + break; + default: + lldp_dump_snprintf(tbuf, pos, "\tUnsupported interface subtype(%d): ", intf_subtype); + break; + } + ++ptr; /* Interface */ + lldp_dump_snprintf(tbuf, pos, "%d\n", rte_be_to_cpu_32(*((uint32_t *)ptr))); + + ptr += 4; /* OID String Length */ + oidlen = *ptr; + + ++ptr; /* OID String */ + if (oidlen > 128) + lldp_dump_snprintf(tbuf, pos, "\tOID: Invalid length = %d\n", oidlen); + else if (oidlen > 0) { + lldp_dump_snprintf(tbuf, pos, "%s", "\tOID: "); + pos += binary2hexstr((const uint8_t *)ptr, oidlen, &tbuf[pos], sizeof(tbuf) - pos); + if (pos >= sizeof(tbuf)) + return EDPVS_NOROOM; + lldp_dump_snprintf(tbuf, pos, "%c", '\n'); + } + + if (pos >= len) { + rte_memcpy(buf, tbuf, len - 1); + buf[len - 1] = '\0'; + } else { + rte_memcpy(buf, tbuf, pos); + buf[pos] = '\0'; + } + return pos; +} + +static int lldp_parse_type_org(const char *lldpdu, lldp_type_t *type, uint16_t *len) +{ + assert(type != NULL); + + type->type = DPVS_LLDP_TL_TYPE((uint16_t)(*lldpdu)); + if (!LLDP_TYPE_VALID(type->type)) { + type->type = 0; + return EDPVS_INVAL; + } + + /* subtype := ((24-bit Orgnization Unique Code) << 8) | (8-bit Subtype) */ + type->subtype = rte_be_to_cpu_32(*((uint32_t *)&lldpdu[2])); + + if (len) + *len = DPVS_LLDP_TL_LEN(*((uint16_t *)lldpdu)); + + return EDPVS_OK; +} + +static int lldp_dump_org_specific(const struct lldp_entry *e, char *buf, size_t len) +{ + // TODO: Implement Organizationally Specific TLVs + + const unsigned char *ptr = (unsigned char *)e->value; + int pos = 0; + char tbuf[1024]; + + if (e->len < 4) + return EDPVS_INVPKT; + + lldp_dump_snprintf(tbuf, pos, "Organizationally Specific TLV (%d): Code %02x:%02x:%02x, " + "Subtype %02d\n\t", e->type.type, ptr[0], ptr[1], ptr[2], ptr[3]); + pos += binary2hexstr((const uint8_t *)(&ptr[4]), e->len - 4, &tbuf[pos], sizeof(tbuf) - pos); + if (pos >= sizeof(tbuf)) + return EDPVS_NOROOM; + lldp_dump_snprintf(tbuf, pos, "%c", '\n'); + + if (pos >= len) { + rte_memcpy(buf, tbuf, len - 1); + buf[len - 1] = '\0'; + } else { + rte_memcpy(buf, tbuf, pos); + buf[pos] = '\0'; + } + return pos; +} + +static struct lldp_port *lldp_port_get(portid_t pid, uint16_t neigh) +{ + struct lldp_port *lp; + + if (unlikely(neigh >= DPVS_LLDP_NODE_MAX)) + return NULL; + + list_for_each_entry(lp, &lldp_ports[neigh], node) { + if (lp->dev->id == pid) { + assert(lp->neigh == neigh); + return lp; + } + } + return NULL; +} + +static void lldp_port_hash(struct lldp_port *port) +{ + struct lldp_port *entry, *next = NULL; + + assert(port->neigh < DPVS_LLDP_NODE_MAX); + + list_for_each_entry(entry, &lldp_ports[port->neigh], node) { + if (entry->dev->id >= port->dev->id) { + next = entry; + break; + } + } + + if (NULL != next) + list_add_tail(&port->node, &next->node); + else + list_add_tail(&port->node, &lldp_ports[port->neigh]); +} + +static inline void lldp_port_unhash(struct lldp_port *port) +{ + list_del_init(&port->node); +} + +static int lldp_entry_del(struct lldp_entry *entry); +static int lldp_port_del(struct lldp_port *port, bool in_timer) +{ + int err; + struct lldp_entry *entry, *next; + + lldp_port_unhash(port); + + list_for_each_entry_safe(entry, next, &port->head, node) { + err = lldp_entry_del(entry); + if (err != EDPVS_OK) + RTE_LOG(WARNING, LLDP, "%s: fail to del lldp %s entry, port %s type %d:%d error %s\n", + __func__, port->neigh ? "neigh" : "local", port->dev->name, + entry->type.type, entry->type.subtype, dpvs_strerror(err)); + } + assert(port->entries == 0); + + if (in_timer) + err = dpvs_timer_cancel_nolock(&port->timer, true); + else + err = dpvs_timer_cancel(&port->timer, true); + if (err != EDPVS_OK) + RTE_LOG(WARNING, LLDP, "%s: fail to cancel lldp port timer, port %s error %s\n", + __func__, port->dev->name, dpvs_strerror(err)); + + rte_free(port); + return EDPVS_OK; +} + +static int lldp_port_timeout(void *arg) +{ + struct lldp_port *port = arg; + + RTE_LOG(DEBUG, LLDP,"%s: %s lldp cache on %s expired\n", __func__, + port->neigh == DPVS_LLDP_NODE_LOCAL ? "local" : "neighbor", + port->dev->name); + + lldp_port_del(port, true); + return DTIMER_STOP; +} + +static int lldp_port_add(struct netif_port *dev, uint16_t neigh, uint16_t timeout, bool in_timer) +{ + int err; + struct lldp_port *lp; + struct timeval to = { .tv_sec = timeout }; + + if (neigh >= DPVS_LLDP_NODE_MAX) + return EDPVS_INVAL; + + if (lldp_port_get(dev->id, neigh)) + return EDPVS_EXIST; + + lp = rte_zmalloc("lldp_port", sizeof(*lp), RTE_CACHE_LINE_SIZE); + if (unlikely(!lp)) + return EDPVS_NOMEM; + + lp->dev = dev; + lp->neigh = neigh; + lp->timeout = timeout ?: DPVS_LLDP_TTL_DEFAULT; + INIT_LIST_HEAD(&lp->head); + + lldp_port_hash(lp); + + dpvs_time_rand_delay(&to, 1000000); + if (in_timer) + err = dpvs_timer_sched_nolock(&lp->timer, &to, lldp_port_timeout, lp, true); + else + err = dpvs_timer_sched(&lp->timer, &to, lldp_port_timeout, lp, true); + if (err != EDPVS_OK) { + lldp_port_unhash(lp); + rte_free(lp); + return err; + } + + return EDPVS_OK; +} + +static struct lldp_entry *lldp_entry_get(const struct lldp_port *port, lldp_type_t type) +{ + struct lldp_entry *e; + + if (unlikely(NULL == port)) + return NULL; + + list_for_each_entry(e, &port->head, node) { + if (lldp_type_equal(e->type, type)) + return e; + } + return NULL; +} + +static void lldp_entry_hash(struct lldp_entry *e, struct lldp_port *port) +{ + struct lldp_entry *entry, *next = NULL; + + /* put LLDP_TYPE_END node at tail */ + if (unlikely(!e->type.type)) { + list_add_tail(&e->node, &port->head); + ++port->entries; + return; + } + + list_for_each_entry(entry, &port->head, node) { + if (!entry->type.type || lldp_type_cmp(&entry->type, &e->type) >= 0) { + next = entry; + break; + } + } + + if (NULL != next) + list_add_tail(&e->node, &next->node); + else + list_add_tail(&e->node, &port->head); + ++port->entries; +} + +static inline void lldp_entry_unhash(struct lldp_entry *e) +{ + list_del_init(&e->node); + --e->port->entries; +} + +static int lldp_entry_del(struct lldp_entry *entry) +{ + lldp_entry_unhash(entry); + rte_free(entry); + return EDPVS_OK; +} + +static int lldp_entry_add(struct lldp_port *port, char *lldpdu) +{ + int err; + lldp_type_t type; + uint16_t len; + struct lldp_entry *entry; + struct lldp_type_ops *ops; + + type.type = DPVS_LLDP_TL_TYPE((uint16_t)(*lldpdu)); + ops = lldp_type_get(type); + if (!ops) + return EDPVS_NOTSUPP; + err = ops->parse_type(lldpdu, &type, &len); + if (EDPVS_OK != err) + return err; + assert(len <= DPVS_LLDP_PDU_MAX); + + entry = lldp_entry_get(port, type); + if (entry) { + /* do update */ + if (entry->len >= len) { + entry->len = len; + entry->stale = 0; + rte_memcpy(&entry->typelen, lldpdu, len + 2); + if (ops->on_change) + return ops->on_change(entry); + return EDPVS_OK; + } + lldp_entry_del(entry); + } + + entry = rte_zmalloc("lldp_entry", sizeof(struct lldp_entry) + len + 2, RTE_CACHE_LINE_SIZE); + if (unlikely(!entry)) + return EDPVS_NOMEM; + entry->type = type; + entry->len = len; + entry->port = port; + rte_memcpy(&entry->typelen, lldpdu, len + 2); + + lldp_entry_hash(entry, port); + + if (ops->on_change) + return ops->on_change(entry); + return EDPVS_OK; +} + +static int lldp_dump_pdu(const struct lldp_port *port, char *buf, size_t buflen) +{ + int rc; + size_t room; + char *ptr; + struct lldp_entry *e; + struct lldp_type_ops *ops; + + ptr = buf; + room = buflen; + list_for_each_entry(e, &port->head, node) { + if (room <= 0) + return EDPVS_NOROOM; + ops = lldp_type_get(e->type); + if (unlikely(!ops)) + return EDPVS_NOTSUPP; + if (ops->dump) { + rc = ops->dump(e, ptr, room); + if (unlikely(rc < 0)) + return rc; + if (unlikely(rc > room)) + return EDPVS_NOROOM; + ptr += rc; + room -= rc; + } + } + + return EDPVS_OK; +} + +static int lldp_pdu_local_update(struct netif_port *dev, bool in_timer) +{ + int i, rc; + struct lldp_port *port; + struct lldp_type_ops *ops; + char buf[DPVS_LLDP_PDU_MAX]; + + static lldp_type_t local_lldp_types[] = { + { LLDP_TYPE_CHASSIS_ID, LLDP_CHASSIS_ID_MAC_ADDRESS }, + { LLDP_TYPE_PORT_ID, LLDP_PORT_ID_INTERFACE_NAME }, + { LLDP_TYPE_TTL, 0 }, + { LLDP_TYPE_PORT_DESC, 0 }, + { LLDP_TYPE_SYS_NAME, 0 }, + { LLDP_TYPE_SYS_DESC, 0 }, + { LLDP_TYPE_SYS_CAP, 0 }, + { LLDP_TYPE_MNG_ADDR, 1 }, /* ipv4 */ + { LLDP_TYPE_MNG_ADDR, 2 }, /* ipv6 */ + { LLDP_TYPE_END, 0 }, + }; + + port = lldp_port_get(dev->id, DPVS_LLDP_NODE_LOCAL); + if (!port) { + /* timeout of 3*DPVS_LLDP_UPDATE_INTERVA ensures local lldp caches persist */ + rc = lldp_port_add(dev, DPVS_LLDP_NODE_LOCAL, 3 * DPVS_LLDP_UPDATE_INTERVAL, in_timer); + if (unlikely(EDPVS_OK != rc)) + return rc; + port = lldp_port_get(dev->id, DPVS_LLDP_NODE_LOCAL); + assert(port != NULL); + } + + for (i = 0; i < NELEMS(local_lldp_types); i++) { + ops = lldp_type_get(local_lldp_types[i]); + if (!ops || !ops->local_lldp) + continue; + rc = ops->local_lldp(dev, local_lldp_types[i].subtype, buf, sizeof(buf)); + if (unlikely(rc < 0)) { + RTE_LOG(INFO, LLDP, "%s: fail to generate local lldp pdu, type %d.%d," + " err %s\n", __func__, local_lldp_types[i].type, + local_lldp_types[i].subtype, dpvs_strerror(rc)); + continue; + } + if (unlikely(rc > sizeof(buf))) + return EDPVS_NOROOM; + rc = lldp_entry_add(port, buf); + if (EDPVS_OK != rc) + return rc; + } + + if (in_timer) + dpvs_timer_reset_nolock(&port->timer, true); + else + dpvs_timer_reset(&port->timer, true); + + return EDPVS_OK; +} + +static int lldp_pdu_neigh_update(struct netif_port *dev, const struct rte_mbuf *mbuf, bool in_timer) +{ + int err; + char *ptr; + size_t totlen; + uint16_t typelen; + uint16_t len; + uint8_t type; + bool check_stale = false; + struct lldp_port *port; + struct lldp_entry *entry, *next; + struct timeval timeout; + + port = lldp_port_get(dev->id, DPVS_LLDP_NODE_NEIGH); + if (!port) { + err = lldp_port_add(dev, DPVS_LLDP_NODE_NEIGH, DPVS_LLDP_TTL_DEFAULT, in_timer); + if (unlikely(EDPVS_OK != err)) + return err; + port = lldp_port_get(dev->id, DPVS_LLDP_NODE_NEIGH); + assert(port != NULL); + } else { + check_stale = true; + list_for_each_entry(entry, &port->head, node) + entry->stale = 1; + } + + totlen = mbuf->data_len; + ptr = rte_pktmbuf_mtod(mbuf, char *); + while (totlen > 0) { + typelen = *((uint16_t*)ptr); + type = DPVS_LLDP_TL_TYPE(typelen); + len = DPVS_LLDP_TL_LEN(typelen) + 2; + err = lldp_entry_add(port, ptr); + if (unlikely(EDPVS_OK != err && EDPVS_NOTSUPP != err)) + return err; + totlen -= len; + ptr += len; + if (LLDP_TYPE_END == type) + break; + } + + if (check_stale) { + list_for_each_entry_safe(entry, next, &port->head, node) { + if (entry->stale) + lldp_entry_del(entry); + } + } + + timeout.tv_sec = port->timeout; + dpvs_time_rand_delay(&timeout, 1000000); + if (in_timer) + err = dpvs_timer_update_nolock(&port->timer, &timeout, true); + else + err = dpvs_timer_update(&port->timer, &timeout, true); + return err; +} + +static int lldp_local_update_all(void *arg) +{ + int err; + portid_t i, start, end; + struct netif_port *dev; + + RTE_LOG(DEBUG, LLDP, "%s: updating local lldp cache\n", __func__); + + netif_physical_port_range(&start, &end); + for (i = start; i < end; i++) { + dev = netif_port_get(i); + assert(dev != NULL); + if (!(dev->flag & NETIF_PORT_FLAG_LLDP)) + continue; + err = lldp_pdu_local_update(dev, true); + if (EDPVS_OK != err) + RTE_LOG(WARNING, LLDP, "%s: fail to update local lldp cache on port %s: %s\n", + __func__, dev->name, dpvs_strerror(err)); + } + + return DTIMER_OK; +} + +static int lldp_xmit(struct netif_port *dev, bool in_timer) +{ + int err; + char *ptr; + struct rte_mbuf *mbuf; + struct lldp_port *port; + struct lldp_entry *entry; + struct rte_ether_hdr *ehdr; + + port = lldp_port_get(dev->id, DPVS_LLDP_NODE_LOCAL); + if (!port || port->entries <= 0) { + err = lldp_pdu_local_update(dev, in_timer); // FIXME: update lldp cache asynchronously + if (EDPVS_OK != err) { + RTE_LOG(ERR, LLDP, "%s: lldp_pdu_local_update failed: %s\n", + __func__, dpvs_strerror(err)); + return err; + } + port = lldp_port_get(dev->id, DPVS_LLDP_NODE_LOCAL); + if (unlikely(!port)) + return EDPVS_NOTEXIST; + if (port->entries <= 0) + return EDPVS_OK; + } + + mbuf = rte_pktmbuf_alloc(dev->mbuf_pool); + if (unlikely(!mbuf)) + return EDPVS_NOMEM; + mbuf_userdata_reset(mbuf); + + list_for_each_entry(entry, &port->head, node) { + ptr = rte_pktmbuf_append(mbuf, entry->len + 2); + if (unlikely(!ptr)) + return EDPVS_NOROOM; + rte_memcpy(ptr, &entry->typelen, entry->len + 2); + } + + ehdr = (struct rte_ether_hdr *)rte_pktmbuf_prepend(mbuf, sizeof(*ehdr)); + if (unlikely(!ptr)) + return EDPVS_NOROOM; + rte_memcpy(&ehdr->d_addr, &LLDP_ETHER_ADDR_DST, sizeof(ehdr->d_addr)); + rte_memcpy(&ehdr->s_addr, &dev->addr, sizeof(ehdr->s_addr)); + ehdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_LLDP); + + if (dev->type == PORT_TYPE_BOND_SLAVE) { + // FIXME: + // How to send LLDP packet on a specified slave port? I found no solutions to it via + // DPDK API. Maybe changes should be made to bond PMD driver to solve the problem. + // So I save the slave port id in mbuf, and hope bond PMD driver may consider it when + // distributing mbufs to slave ports. + // + // Store the slave port id into mbuf->port? + // No! mbuf->port is reset to the bond master's port id in the forthcoming transmit process. + // Use mbuf->hash.txadapter.reserved2 instead. Hope no conflictions. Remember to reset it to + // RTE_MBUF_PORT_INVALID in rte_pktmbuf_alloc. + // + mbuf->hash.txadapter.reserved2 = dev->id; + //MBUF_USERDATA(mbuf, portid_t, MBUF_FIELD_ORIGIN_PORT) = port->id; + dev = dev->bond->slave.master; + } + + return netif_xmit(mbuf, dev); + +} + +static int lldp_xmit_all(void *arg) +{ + int err; + portid_t i, start, end; + struct netif_port *dev; + + netif_physical_port_range(&start, &end); + for (i = start; i < end; i++) { + dev = netif_port_get(i); + assert(dev != NULL); + if (!(dev->flag & NETIF_PORT_FLAG_LLDP)) + continue; + err = lldp_xmit(dev, true); + if (EDPVS_OK != err) + RTE_LOG(WARNING, LLDP, "%s: fail to xmit lldp frame on port %s: %s\n", + __func__, dev->name, dpvs_strerror(err)); + } + + return DTIMER_OK; +} + +static int lldp_ether_addr_filter(bool add) +{ + int err; + portid_t i, start, end; + struct netif_port *dev; + + netif_physical_port_range(&start, &end); + for (i = start; i < end; i++) { + dev = netif_port_get(i); + assert(dev != NULL); + if (add) + err = netif_mc_add(dev, &LLDP_ETHER_ADDR_DST); + else + err = netif_mc_del(dev, &LLDP_ETHER_ADDR_DST); + if (err != EDPVS_OK) + return err; + } + + return EDPVS_OK; +} + +static int lldp_xmit_start(void) +{ + int err; + struct timeval timeout1 = { .tv_sec = DPVS_LLDP_TX_INTERVAL }; + struct timeval timeout2 = { .tv_sec = DPVS_LLDP_UPDATE_INTERVAL }; + + assert(rte_lcore_id() == rte_get_main_lcore()); + + err = lldp_ether_addr_filter(true); + if (EDPVS_OK != err && EDPVS_EXIST != err) { + RTE_LOG(WARNING, LLDP, "%s: failed to add lldp multicast ether address -- %s\n", + __func__, dpvs_strerror(err)); + return err; + } + + dpvs_time_rand_delay(&timeout1, 1000000); + err = dpvs_timer_sched_period(&lldp_xmit_timer, &timeout1, lldp_xmit_all, NULL, true); + if (EDPVS_OK != err) { + RTE_LOG(WARNING, LLDP, "%s: failed to schedule lldp_xmit_timer -- %s\n", + __func__, dpvs_strerror(err)); + lldp_ether_addr_filter(false); + return err; + } + + dpvs_time_rand_delay(&timeout2, 1000000); + err = dpvs_timer_sched_period(&lldp_update_timer, &timeout2, lldp_local_update_all, NULL, true); + if (EDPVS_OK != err) { + RTE_LOG(WARNING, LLDP, "%s: failed to schedule lldp_update_timer -- %s\n", + __func__, dpvs_strerror(err)); + dpvs_timer_cancel(&lldp_xmit_timer, true); + lldp_ether_addr_filter(false); + return err; + } + + return EDPVS_OK; +} + +static int lldp_xmit_stop(void) +{ + int err; + + assert(rte_lcore_id() == rte_get_main_lcore()); + + err = lldp_ether_addr_filter(false); + if (EDPVS_OK != err && EDPVS_NOTEXIST != err) { + RTE_LOG(WARNING, LLDP, "%s: failed to del lldp multicast ether address -- %s\n", + __func__, dpvs_strerror(err)); + return err; + } + + err = dpvs_timer_cancel(&lldp_xmit_timer, true); + if (EDPVS_OK != err) { + RTE_LOG(ERR, LLDP, "%s: failed to cancel lldp_xmit_timer -- %s\n", + __func__, dpvs_strerror(err)); + return err; + } + + err = dpvs_timer_cancel(&lldp_update_timer, true); + if (EDPVS_OK != err) { + RTE_LOG(ERR, LLDP, "%s: failed to cancel lldp_update_timer -- %s\n", + __func__, dpvs_strerror(err)); + return err; + } + + return EDPVS_OK; +} + +static int lldp_rcv(struct rte_mbuf *mbuf, struct netif_port *dev) +{ + int err; + portid_t pid; + static uint32_t seq = 0; + struct dpvs_msg *msg; + + if (!lldp_enable) + return EDPVS_KNICONTINUE; + + if (is_bond_port(dev->id)) { + pid = MBUF_USERDATA(mbuf, portid_t, MBUF_FIELD_ORIGIN_PORT); + dev = netif_port_get(pid); + if (unlikely(NULL == dev)) { + RTE_LOG(WARNING, LLDP, "%s: fail to find lldp physical device of port id %d\n", + __func__, pid); + rte_pktmbuf_free(mbuf); + return EDPVS_RESOURCE; + } + } + if (!(dev->flag & NETIF_PORT_FLAG_LLDP)) + return EDPVS_KNICONTINUE; + + /* redirect lldp mbuf to master lcore */ + msg = msg_make(MSG_TYPE_LLDP_RECV, seq++, DPVS_MSG_UNICAST, + rte_lcore_id(), sizeof(void *), &mbuf); + if (unlikely(NULL == msg)) { + rte_pktmbuf_free(mbuf); + return EDPVS_NOMEM; + } + + err = msg_send(msg, rte_get_main_lcore(), DPVS_MSG_F_ASYNC, NULL); + if (unlikely(EDPVS_OK != err)) { + RTE_LOG(WARNING, LLDP, "%s: fail to send mbuf to master lcore!\n", __func__); + rte_pktmbuf_free(mbuf); + } + msg_destroy(&msg); + return err; +} + +static int lldp_rcv_msg_cb(struct dpvs_msg *msg) +{ + int err; + portid_t pid, start, end; + struct netif_port *dev; + struct rte_mbuf *mbuf; + + mbuf = *(struct rte_mbuf **)(msg->data); + + pid = mbuf->port; + netif_bond_port_range(&start, &end); + if (pid < end && pid >= start) + pid = MBUF_USERDATA(mbuf, portid_t, MBUF_FIELD_ORIGIN_PORT); + + dev = netif_port_get(pid); + if (unlikely(NULL == dev)) { + RTE_LOG(WARNING, LLDP, "%s: fail to find lldp physical device of port id %d\n", + __func__, pid); + rte_pktmbuf_free(mbuf); + return EDPVS_RESOURCE; + } + + err = lldp_pdu_neigh_update(dev, mbuf, false); + rte_pktmbuf_free(mbuf); /* always consume the mbuf */ + return err; +} + +static int lldp_rcv_msg_register(void) +{ + lcoreid_t master_cid = rte_get_main_lcore(); + struct dpvs_msg_type mt = { + .type = MSG_TYPE_LLDP_RECV, + .mode = DPVS_MSG_UNICAST, + .prio = MSG_PRIO_LOW, + .cid = master_cid, + .unicast_msg_cb = lldp_rcv_msg_cb, + }; + + return msg_type_register(&mt); +} + +static int lldp_rcv_msg_unregister(void) +{ + lcoreid_t master_cid = rte_get_main_lcore(); + struct dpvs_msg_type mt = { + .type = MSG_TYPE_LLDP_RECV, + .mode = DPVS_MSG_UNICAST, + .prio = MSG_PRIO_LOW, + .cid = master_cid, + .unicast_msg_cb = lldp_rcv_msg_cb, + }; + + return msg_type_unregister(&mt); +} + +static int lldp_sockopt_set(sockoptid_t opt, const void *conf, size_t size) +{ + // TODO + return EDPVS_NOTSUPP; +} + +static int lldp_sockopt_get(sockoptid_t opt, const void *conf, size_t size, + void **out, size_t *outsize) +{ + const struct lldp_param *param = conf; + struct lldp_message *message; + struct netif_port *dev; + struct lldp_port *port; + int err; + + *outsize = 0; + *out = NULL; + + if (!conf || size < sizeof(*param) || !out || !outsize) + return EDPVS_INVAL; + + if (opt != SOCKOPT_GET_LLDP_SHOW) + return EDPVS_NOTSUPP; + + dev = netif_port_get_by_name(param->ifname); + if (!dev) { + RTE_LOG(WARNING, LLDP, "%s: no such device\n", __func__); + return EDPVS_NODEV; + } + + if (param->node >= DPVS_LLDP_NODE_MAX) { + RTE_LOG(WARNING, LLDP, "%s: invalid node type %d, only supports type " + "local(%d) and neigh(%d)\n", __func__, param->node, + DPVS_LLDP_NODE_LOCAL, DPVS_LLDP_NODE_NEIGH); + return EDPVS_INVAL; + } + + port = lldp_port_get(dev->id, param->node); + if (!port) { + RTE_LOG(INFO, LLDP, "%s: %s lldp port on %s not found!\n", __func__, + param->node == DPVS_LLDP_NODE_NEIGH ? "neighbor" : "local", dev->name); + return EDPVS_NOTEXIST; + } + + message = rte_calloc(NULL, 1, sizeof(*message), 0); + if (!message) + return EDPVS_NOMEM; + rte_memcpy(&message->param, param, sizeof(*param)); + err = lldp_dump_pdu(port, message->message, sizeof(message->message)); + if (EDPVS_OK != err) { + RTE_LOG(WARNING, LLDP, "%s: lldp_dump_pdu failed -- %s\n", + __func__, dpvs_strerror(err)); + rte_free(message); + return err; + } + + *out = message; + *outsize = sizeof(*message); + return EDPVS_OK; +} + +static struct dpvs_sockopts lldp_sockopts = { + .version = SOCKOPT_VERSION, + .set_opt_min = SOCKOPT_SET_LLDP_TODO, + .set_opt_max = SOCKOPT_SET_LLDP_TODO, + .set = lldp_sockopt_set, + .get_opt_min = SOCKOPT_GET_LLDP_SHOW, + .get_opt_max = SOCKOPT_GET_LLDP_SHOW, + .get = lldp_sockopt_get, +}; + +static struct lldp_type_ops lldp_ops[] = { + { + .type = LLDP_TYPE_END, + .parse_type = lldp_parse_type_default, + .local_lldp = lldp_local_pdu_end, + .dump = lldp_dump_end, + }, + { + .type = LLDP_TYPE_CHASSIS_ID, + .parse_type = lldp_parse_type_chassis_id, + .local_lldp = lldp_local_pdu_chassis_id, + .dump = lldp_dump_chassis_id, + }, + { + .type = LLDP_TYPE_PORT_ID, + .parse_type = lldp_parse_type_port_id, + .local_lldp = lldp_local_pdu_port_id, + .dump = lldp_dump_port_id, + }, + { + .type = LLDP_TYPE_TTL, + .parse_type = lldp_parse_type_default, + .local_lldp = lldp_local_pdu_ttl, + .dump = lldp_dump_ttl, + .on_change = lldp_on_change_ttl, + }, + { + .type = LLDP_TYPE_PORT_DESC, + .parse_type = lldp_parse_type_default, + .local_lldp = lldp_local_pdu_port_desc, + .dump = lldp_dump_port_desc, + }, + { + .type = LLDP_TYPE_SYS_NAME, + .parse_type = lldp_parse_type_default, + .local_lldp = lldp_local_pdu_sys_name, + .dump = lldp_dump_sys_name, + }, + { + .type = LLDP_TYPE_SYS_DESC, + .parse_type = lldp_parse_type_default, + .local_lldp = lldp_local_pdu_sys_desc, + .dump = lldp_dump_sys_desc, + }, + { + .type = LLDP_TYPE_SYS_CAP, + .parse_type = lldp_parse_type_default, + .local_lldp = lldp_local_pdu_sys_cap, + .dump = lldp_dump_sys_cap, + }, + { + .type = LLDP_TYPE_MNG_ADDR, + .parse_type = lldp_parse_type_mng_addr, + .local_lldp = lldp_local_pdu_mng_addr, + .dump = lldp_dump_mng_addr, + }, + { + .type = LLDP_TYPE_ORG, + .parse_type = lldp_parse_type_org, + .local_lldp = NULL, + .dump = lldp_dump_org_specific, + } +}; + +static struct pkt_type dpvs_lldp_pkt_type = { + //.type = rte_cpu_to_be_16(RTE_ETHER_TYPE_LLDP), + .func = lldp_rcv, + .port = NULL, +}; + +int dpvs_lldp_init(void) +{ + int i, err; + + lldp_serail_number_init(); + + if (unlikely(uname(&lldp_uname) < 0)) + return EDPVS_SYSCALL; + + for (i = 0; i < DPVS_LLDP_NODE_MAX; i++) + INIT_LIST_HEAD(&lldp_ports[i]); + + for (i = 0; i < NELEMS(lldp_ops); i++) { + err = lldp_type_register(&lldp_ops[i]); + assert(EDPVS_OK == err); + } + + err = lldp_rcv_msg_register(); + if (EDPVS_OK != err) + goto unreg_lldp_ops; + + err = sockopt_register(&lldp_sockopts); + if (EDPVS_OK != err) + goto unreg_msg; + + dpvs_lldp_pkt_type.type = rte_cpu_to_be_16(RTE_ETHER_TYPE_LLDP); + err = netif_register_pkt(&dpvs_lldp_pkt_type); + if (EDPVS_OK != err) + goto unreg_sockopt; + + if (lldp_enable) { + err = lldp_xmit_start(); + if (EDPVS_OK != err) + goto unreg_pkttype; + } + + return EDPVS_OK; + +unreg_pkttype: + netif_unregister_pkt(&dpvs_lldp_pkt_type); +unreg_sockopt: + sockopt_unregister(&lldp_sockopts); +unreg_msg: + lldp_rcv_msg_unregister(); +unreg_lldp_ops: + for (i = 0; i < NELEMS(lldp_ops); i++) + lldp_type_unregister(&lldp_ops[i]); + return err; +} + +int dpvs_lldp_term(void) +{ + int i, err; + + if (lldp_enable) + lldp_xmit_stop(); + + dpvs_lldp_pkt_type.type = rte_cpu_to_be_16(RTE_ETHER_TYPE_LLDP); + err = netif_unregister_pkt(&dpvs_lldp_pkt_type); + if (EDPVS_OK != err) + RTE_LOG(WARNING, LLDP, "%s: fail to unregister lldp packet type\n", __func__); + + err = sockopt_unregister(&lldp_sockopts); + if (EDPVS_OK != err) + RTE_LOG(WARNING, LLDP, "%s: fail to unregister lldp msg\n", __func__); + err = lldp_rcv_msg_unregister(); + if (EDPVS_OK != err) + RTE_LOG(WARNING, LLDP, "%s: fail to unregister lldp msg\n", __func__); + + for (i = 0; i < NELEMS(lldp_ops); i++) { + err = lldp_type_unregister(&lldp_ops[i]); + if (EDPVS_OK != err) + RTE_LOG(WARNING, LLDP, "%s: lldp_type_unregister(%d) failed\n", __func__, i); + } + + return EDPVS_OK; +} diff --git a/src/main.c b/src/main.c index 8f8067c0f..5cf291a24 100644 --- a/src/main.c +++ b/src/main.c @@ -311,6 +311,7 @@ int main(int argc, char *argv[]) gettimeofday(&tv, NULL); srandom(tv.tv_sec ^ tv.tv_usec ^ getpid()); + srand48(tv.tv_sec ^ tv.tv_usec ^ getpid()); rte_srand((uint64_t)(tv.tv_sec ^ tv.tv_usec ^ getpid())); sys_start_time(); diff --git a/src/mbuf.c b/src/mbuf.c index 7d10d54d3..8c913eb36 100644 --- a/src/mbuf.c +++ b/src/mbuf.c @@ -209,6 +209,11 @@ int mbuf_init(void) .size = sizeof(mbuf_userdata_field_route_t), .align = 8, }, + [ MBUF_FIELD_ORIGIN_PORT ] = { + .name = "origin_port", + .size = sizeof(portid_t), + .align = 2, + }, }; for (i = 0; i < NELEMS(rte_mbuf_userdata_fields); i++) { diff --git a/src/netif.c b/src/netif.c index a40252f21..3fd24aa0b 100644 --- a/src/netif.c +++ b/src/netif.c @@ -161,16 +161,32 @@ static struct list_head port_ntab[NETIF_PORT_TABLE_BUCKETS]; /* hashed by name * /* function declarations */ static void kni_lcore_loop(void *dummy); -static inline bool is_physical_port(portid_t pid) +bool is_physical_port(portid_t pid) { return pid >= phy_pid_base && pid < phy_pid_end; } -static inline bool is_bond_port(portid_t pid) +bool is_bond_port(portid_t pid) { return pid >= bond_pid_base && pid < bond_pid_end; } +void netif_physical_port_range(portid_t *start, portid_t *end) +{ + if (start) + *start = phy_pid_base; + if (end) + *end = phy_pid_end; +} + +void netif_bond_port_range(portid_t *start, portid_t *end) +{ + if (start) + *start = bond_pid_base; + if (end) + *end = bond_pid_end; +} + bool is_lcore_id_valid(lcoreid_t cid) { if (unlikely(cid >= DPVS_MAX_LCORE)) @@ -2536,6 +2552,10 @@ void lcore_process_packets(struct rte_mbuf **mbufs, lcoreid_t cid, uint16_t coun lcore_stats[cid].dropped++; continue; } + + /* some protocols like LLDP may still like the originated port */ + MBUF_USERDATA(mbuf, portid_t, MBUF_FIELD_ORIGIN_PORT) = mbuf->port; + if (dev->type == PORT_TYPE_BOND_SLAVE) { dev = dev->bond->slave.master; mbuf->port = dev->id; @@ -3448,6 +3468,10 @@ static inline void setup_dev_of_flags(struct netif_port *port) } if (port->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM) port->flag |= NETIF_PORT_FLAG_RX_IP_CSUM_OFFLOAD; + + /* enable lldp on physical port */ + if (is_physical_port(port->id)) + port->flag |= NETIF_PORT_FLAG_LLDP; } /* TODO: refactor it with netif_alloc */ @@ -4832,6 +4856,8 @@ static int get_port_basic(struct netif_port *port, void **out, size_t *out_len) get->ol_tx_tcp_csum = 1; if (port->flag & NETIF_PORT_FLAG_TX_UDP_CSUM_OFFLOAD) get->ol_tx_udp_csum = 1; + if (port->flag & NETIF_PORT_FLAG_LLDP) + get->lldp = 1; *out = get; *out_len = sizeof(netif_nic_basic_get_t); @@ -5317,6 +5343,11 @@ static int set_port(struct netif_port *port, const netif_nic_set_t *port_cfg) else if (port_cfg->tc_ingress_off) port->flag &= (~NETIF_PORT_FLAG_TC_INGRESS); + if (port_cfg->lldp_on) + port->flag |= NETIF_PORT_FLAG_LLDP; + else if (port_cfg->lldp_off) + port->flag &= (~NETIF_PORT_FLAG_LLDP); + return EDPVS_OK; } diff --git a/src/vlan.c b/src/vlan.c index 1b06aea3b..6d657e449 100644 --- a/src/vlan.c +++ b/src/vlan.c @@ -241,6 +241,7 @@ int vlan_add_dev(struct netif_port *real_dev, const char *ifname, dev->flag &= ~NETIF_PORT_FLAG_TX_IP_CSUM_OFFLOAD; dev->flag &= ~NETIF_PORT_FLAG_TX_TCP_CSUM_OFFLOAD; dev->flag &= ~NETIF_PORT_FLAG_TX_UDP_CSUM_OFFLOAD; + dev->flag &= ~NETIF_PORT_FLAG_LLDP; dev->type = PORT_TYPE_VLAN; rte_ether_addr_copy(&real_dev->addr, &dev->addr); diff --git a/tools/dpip/Makefile b/tools/dpip/Makefile index 4dc648e38..3ed2aaf15 100644 --- a/tools/dpip/Makefile +++ b/tools/dpip/Makefile @@ -40,7 +40,7 @@ DEFS = -D DPVS_MAX_LCORE=64 -D DPIP_VERSION=\"$(VERSION_STRING)\" CFLAGS += $(DEFS) OBJS = ipset.o dpip.o utils.o route.o addr.o neigh.o link.o vlan.o maddr.o \ - qsch.o cls.o tunnel.o ipset.o ipv6.o iftraf.o eal_mem.o flow.o \ + qsch.o cls.o tunnel.o ipset.o ipv6.o iftraf.o eal_mem.o flow.o lldp.o \ ../../src/common.o ../keepalived/keepalived/check/sockopt.o all: $(TARGET) diff --git a/tools/dpip/dpip.c b/tools/dpip/dpip.c index a3b31f1c6..1249e0b3b 100644 --- a/tools/dpip/dpip.c +++ b/tools/dpip/dpip.c @@ -35,7 +35,7 @@ static void usage(void) " "DPIP_NAME" [OPTIONS] OBJECT { COMMAND | help }\n" "Parameters:\n" " OBJECT := { link | addr | route | neigh | vlan | tunnel | qsch | cls |\n" - " ipv6 | iftraf | eal-mem | ipset | flow | maddr }\n" + " ipv6 | iftraf | eal-mem | ipset | flow | maddr | lldp }\n" " COMMAND := { create | destroy | add | del | show (list) | set (change) |\n" " replace | flush | test | enable | disable }\n" "Options:\n" diff --git a/tools/dpip/link.c b/tools/dpip/link.c index fd3d5bd84..acaab26b5 100644 --- a/tools/dpip/link.c +++ b/tools/dpip/link.c @@ -127,7 +127,7 @@ static void link_help(void) " dpip link set DEV-NAME ITEM VALUE\n" " ---supported items---\n" - " promisc [on|off], forward2kni [on|off], link [up|down],\n" + " promisc [on|off], forward2kni [on|off], link [up|down], lldp [up|down]\n" " allmulticast [on|off], tc-egress [on|off], tc-ingress [on|off], addr, \n" " bond-[mode|slave|primary|xmit-policy|monitor-interval|link-up-prop|" "link-down-prop]\n" @@ -260,6 +260,9 @@ static int dump_nic_basic(char *name, int namelen) if (get.tc_ingress) printf("tc-ingress "); + if (get.lldp) + printf("lldp "); + printf("\n"); printf(" addr %s ", get.addr); @@ -958,6 +961,25 @@ static int link_nic_set_tc_ingress(const char *name, const char *value) return dpvs_setsockopt(SOCKOPT_NETIF_SET_PORT, &cfg, sizeof(netif_nic_set_t)); } +static int link_nic_set_lldp(const char *name, const char *value) +{ + netif_nic_set_t cfg = {}; + assert(value); + + snprintf(cfg.pname, sizeof(cfg.pname), "%s", name); + + if (strcmp(value, "on") == 0) + cfg.lldp_on = 1; + else if(strcmp(value, "off") == 0) + cfg.lldp_off = 1; + else { + fprintf(stderr, "invalid arguement value for 'lldp'\n"); + return EDPVS_INVAL; + } + + return dpvs_setsockopt(SOCKOPT_NETIF_SET_PORT, &cfg, sizeof(netif_nic_set_t)); +} + static int link_bond_add_bond_slave(const char *name, const char *value) { netif_bond_set_t cfg; @@ -1193,6 +1215,8 @@ static int link_set(struct link_param *param) link_nic_set_tc_egress(param->dev_name, param->value); else if (strcmp(param->item, "tc-ingress") == 0) link_nic_set_tc_ingress(param->dev_name, param->value); + else if (strcmp(param->item, "lldp") == 0) + link_nic_set_lldp(param->dev_name, param->value); else { fprintf(stderr, "invalid parameter name '%s'\n", param->item); return EDPVS_INVAL; diff --git a/tools/dpip/lldp.c b/tools/dpip/lldp.c new file mode 100644 index 000000000..aed52c91c --- /dev/null +++ b/tools/dpip/lldp.c @@ -0,0 +1,128 @@ +/* + * DPVS is a software load balancer (Virtual Server) based on DPDK. + * + * Copyright (C) 2021 iQIYI (www.iqiyi.com). + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include +#include "dpip.h" +#include "sockopt.h" +#include "conf/lldp.h" + +static void lldp_help(void) +{ + fprintf(stderr, + "Usage:\n" + " dpip lldp show TYPE dev NAME\n" + " TYPE := [ local | neigh ]\n" + " NAME := interface name\n" + "Examples:\n" + " dpip lldp show local dev dpdk0\n" + " dpip lldp show dev dpdk1 neigh\n"); +} + +static int lldp_parse(struct dpip_obj *obj, struct dpip_conf *conf) +{ + struct lldp_param *param = obj->param; + + memset(param, 0, sizeof(*param)); + + while (conf->argc > 0) { + if (strcmp(conf->argv[0], "dev") == 0) { + NEXTARG_CHECK(conf, conf->argv[0]); + snprintf(param->ifname, sizeof(param->ifname), "%s", conf->argv[0]); + } else { + if (strcmp(conf->argv[0], "local") == 0) { + param->node = DPVS_LLDP_NODE_LOCAL; + } else if (strcmp(conf->argv[0], "neigh") == 0) { + param->node = DPVS_LLDP_NODE_NEIGH; + } else { + fprintf(stderr, "too many arguments\n"); + return EDPVS_INVAL; + } + } + NEXTARG(conf); + } + + return EDPVS_OK; +} + +static int lldp_check(const struct dpip_obj *obj, dpip_cmd_t cmd) +{ + const struct lldp_param *param = obj->param; + + /* sanity check */ + switch (cmd) { + case DPIP_CMD_SHOW: + if (strlen(param->ifname) == 0) { + fprintf(stderr, "missing device name\n"); + return EDPVS_INVAL; + } + return EDPVS_OK; + default: + return EDPVS_NOTSUPP; + } + return EDPVS_OK; +} + +static int lldp_do_cmd(struct dpip_obj *obj, dpip_cmd_t cmd, struct dpip_conf *conf) +{ + const struct lldp_param *param = obj->param; + struct lldp_message *message; + size_t size; + int err; + + switch (cmd) { + case DPIP_CMD_SHOW: + err = dpvs_getsockopt(SOCKOPT_GET_LLDP_SHOW, param, sizeof(*param), + (void **)&message, &size); + if (err != EDPVS_OK) + return err; + + if (size < sizeof(*message)) { + fprintf(stderr, "corrupted response\n"); + dpvs_sockopt_msg_free(message); + return EDPVS_INVAL; + } + printf("-*-*-*- %s LLDP Message on Port %s -*-*-*-\n", + message->param.node == DPVS_LLDP_NODE_NEIGH ? "Neighbour" : "Local", + message->param.ifname); + printf(message->message); + dpvs_sockopt_msg_free(message); + return EDPVS_OK; + default: + return EDPVS_NOTSUPP; + } +} + +static struct lldp_param lldp_param; + +static struct dpip_obj dpip_lldp = { + .name = "lldp", + .param = &lldp_param, + .help = lldp_help, + .parse = lldp_parse, + .check = lldp_check, + .do_cmd = lldp_do_cmd, +}; + +static void __init lldp_init(void) +{ + dpip_register_obj(&dpip_lldp); +} + +static void __exit lldp_exit(void) +{ + dpip_unregister_obj(&dpip_lldp); +} From 9baec6d6c91e073b4279c2c90945785c3eaa173f Mon Sep 17 00:00:00 2001 From: ywc689 Date: Tue, 30 Jul 2024 15:58:09 +0800 Subject: [PATCH 2/3] patch: lldp bonding xmit patch for dpdk Signed-off-by: ywc689 --- ...ends-packets-with-user-specified-sal.patch | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 patch/dpdk-stable-20.11.1/0007-bonding-device-sends-packets-with-user-specified-sal.patch diff --git a/patch/dpdk-stable-20.11.1/0007-bonding-device-sends-packets-with-user-specified-sal.patch b/patch/dpdk-stable-20.11.1/0007-bonding-device-sends-packets-with-user-specified-sal.patch new file mode 100644 index 000000000..d7e4e0c6d --- /dev/null +++ b/patch/dpdk-stable-20.11.1/0007-bonding-device-sends-packets-with-user-specified-sal.patch @@ -0,0 +1,91 @@ +From 7024d80414e914a54c301dbcc9bb4cf6fb5f927b Mon Sep 17 00:00:00 2001 +From: yuwenchao +Date: Tue, 30 Jul 2024 15:39:28 +0800 +Subject: [PATCH] bonding device sends packets with user specified salve port + +The outgoing slave port is specified in mbuf field "hash.txadapter.reserved2". +Support the following 3 bonding mode: +- mode 0: round robin +- mode 2: balance +- mode 4: 8023ad + +Signed-off-by: yuwenchao +--- + drivers/net/bonding/rte_eth_bond_pmd.c | 26 ++++++++++++++++++++++++-- + lib/librte_mbuf/rte_mbuf.h | 2 ++ + 2 files changed, 26 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c +index 42e436c..a35422c 100644 +--- a/drivers/net/bonding/rte_eth_bond_pmd.c ++++ b/drivers/net/bonding/rte_eth_bond_pmd.c +@@ -573,6 +573,22 @@ struct client_stats_t { + return nb_recv_pkts; + } + ++static inline int ++bond_ethdev_populate_slave_by_user(const struct rte_mbuf *mbuf, const uint16_t *slaves, ++ int num_slave) ++{ ++ uint16_t i, pid = mbuf->hash.txadapter.reserved2; ++ ++ if (likely(pid == RTE_MBUF_PORT_INVALID)) ++ return -1; ++ ++ for (i = 0; i < num_slave; i++) { ++ if (slaves[i] == pid) ++ return i; ++ } ++ return -1; ++} ++ + static uint16_t + bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs, + uint16_t nb_pkts) +@@ -605,7 +621,9 @@ struct client_stats_t { + + /* Populate slaves mbuf with which packets are to be sent on it */ + for (i = 0; i < nb_pkts; i++) { +- cslave_idx = (slave_idx + i) % num_of_slaves; ++ cslave_idx = bond_ethdev_populate_slave_by_user(bufs[i], slaves, num_of_slaves); ++ if (likely(cslave_idx < 0)) ++ cslave_idx = (slave_idx + i) % num_of_slaves; + slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i]; + } + +@@ -1162,7 +1180,11 @@ struct bwg_slave { + + for (i = 0; i < nb_bufs; i++) { + /* Populate slave mbuf arrays with mbufs for that slave. */ +- uint16_t slave_idx = bufs_slave_port_idxs[i]; ++ int slave_idx; ++ ++ slave_idx = bond_ethdev_populate_slave_by_user(bufs[i], slave_port_ids, slave_count); ++ if (likely(slave_idx < 0)) ++ slave_idx = bufs_slave_port_idxs[i]; + + slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i]; + } +diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h +index c4c9ebf..130b99d 100644 +--- a/lib/librte_mbuf/rte_mbuf.h ++++ b/lib/librte_mbuf/rte_mbuf.h +@@ -589,6 +589,7 @@ static inline struct rte_mbuf *rte_mbuf_raw_alloc(struct rte_mempool *mp) + + if (rte_mempool_get(mp, (void **)&m) < 0) + return NULL; ++ m->hash.txadapter.reserved2 = RTE_MBUF_PORT_INVALID; + __rte_mbuf_raw_sanity_check(m); + return m; + } +@@ -867,6 +868,7 @@ static inline void rte_pktmbuf_reset(struct rte_mbuf *m) + m->vlan_tci_outer = 0; + m->nb_segs = 1; + m->port = RTE_MBUF_PORT_INVALID; ++ m->hash.txadapter.reserved2 = RTE_MBUF_PORT_INVALID; + + m->ol_flags &= EXT_ATTACHED_MBUF; + m->packet_type = 0; +-- +1.8.3.1 + From 72bd6d2c34f9bef30c11ce801e695429984f45b3 Mon Sep 17 00:00:00 2001 From: ywc689 Date: Mon, 5 Aug 2024 11:35:32 +0800 Subject: [PATCH 3/3] lldp: fix strict-aliasing errors when compiling with O3 Signed-off-by: ywc689 --- src/lldp.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/lldp.c b/src/lldp.c index 7edc31a3e..26c4953cb 100644 --- a/src/lldp.c +++ b/src/lldp.c @@ -580,6 +580,7 @@ static int lldp_on_change_ttl(const struct lldp_entry *e) { struct lldp_port *port = e->port; uint16_t ttl; + const void *ptr; /* Lifespan of local lldp caches is not decided by ttl. Actually, they are * updated periodically in every DPVS_LLDP_UPDATE_INTERVAL second. If not updated @@ -588,7 +589,8 @@ static int lldp_on_change_ttl(const struct lldp_entry *e) if (port->neigh == DPVS_LLDP_NODE_LOCAL) return EDPVS_OK; - ttl = rte_be_to_cpu_16(*((uint16_t *)e->value)); + ptr = &e->value[0]; + ttl = rte_be_to_cpu_16(*((uint16_t *)ptr)); if (ttl != port->timeout) { RTE_LOG(INFO, LLDP, "%s: update neigh lldp ttl %u -> %u\n", __func__, port->timeout, ttl); port->timeout = ttl; @@ -762,11 +764,14 @@ static int lldp_dump_sys_cap(const struct lldp_entry *e, char *buf, size_t len) uint16_t capacities, enables; int pos = 0; char tbuf[256]; + const void *ptr; if (e->len != 4) return EDPVS_INVPKT; - capacities = rte_be_to_cpu_16(*((uint16_t *)&e->value[0])); - enables = rte_be_to_cpu_16(*((uint16_t *)&e->value[2])); + ptr = &e->value[0]; + capacities = rte_be_to_cpu_16(*((uint16_t *)ptr)); + ptr = &e->value[2]; + enables = rte_be_to_cpu_16(*((uint16_t *)ptr)); lldp_dump_snprintf(tbuf, pos, "System Capabilities TLV (%d)\n", e->type.type); @@ -832,6 +837,7 @@ static int lldp_local_pdu_mng_addr(const struct netif_port *dev, uint32_t subtyp uint8_t *ptr; struct sockaddr_storage addr; char ifname[IFNAMSIZ]; + uint16_t typlen; ptr = tbuf + 2; *(ptr + 1) = subtype; @@ -884,7 +890,8 @@ static int lldp_local_pdu_mng_addr(const struct netif_port *dev, uint32_t subtyp ptr += 4; /* OID String Length */ *ptr++ = 0; - *((uint16_t *)tbuf) = DPVS_LLDP_TL(LLDP_TYPE_MNG_ADDR, ptr - tbuf - 2); + typlen = DPVS_LLDP_TL(LLDP_TYPE_MNG_ADDR, ptr - tbuf - 2); + rte_memcpy(tbuf, &typlen, 2); if (ptr - tbuf > len) rte_memcpy(buf, tbuf, len); @@ -1625,8 +1632,9 @@ static int lldp_rcv_msg_cb(struct dpvs_msg *msg) portid_t pid, start, end; struct netif_port *dev; struct rte_mbuf *mbuf; + void *msgdata = msg->data; - mbuf = *(struct rte_mbuf **)(msg->data); + mbuf = *(struct rte_mbuf **)msgdata; pid = mbuf->port; netif_bond_port_range(&start, &end);