From cae7fd47dfe15e54710e3e550340876382a6db19 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Thu, 13 Jun 2024 16:07:54 +0200 Subject: [PATCH 1/5] mlxsw: port: Edit maximum MTU value Currently mlxsw driver supports up to 10000 bytes for maximum MTU, this value is not accurate, we can support up to 10K bytes. Change the value to the maximum supported MTU by firmware. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Link: https://lore.kernel.org/r/666f51681234aeef09d771833ccb6e94bd323c88.1718275854.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/port.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/port.h b/drivers/net/ethernet/mellanox/mlxsw/port.h index ac4d4ea515978..aa309615eff3f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/port.h +++ b/drivers/net/ethernet/mellanox/mlxsw/port.h @@ -6,7 +6,7 @@ #include -#define MLXSW_PORT_MAX_MTU 10000 +#define MLXSW_PORT_MAX_MTU (10 * 1024) #define MLXSW_PORT_DEFAULT_VID 1 From d361536fc2dfd22e65a84f4f2a13b2b8f4f0739b Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Thu, 13 Jun 2024 16:07:55 +0200 Subject: [PATCH 2/5] mlxsw: Adjust MTU value to hardware check Ethernet frame consists of - Ethernet header, payload, FCS. The MTU value which is used by user is the size of the payload, which means that when user sets MTU to X, the total frame size will be larger due to the addition of the Ethernet header and FCS. Spectrum ASICs take into account Ethernet header and FCS as part of packet size for MTU check. Adjust MTU value when user sets MTU, to configure the MTU size which is required by hardware. The Tx header length which was used by the driver is not relevant for such calculation, take into account Ethernet header (with VLAN extension) and FCS. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Link: https://lore.kernel.org/r/f3203c2477bb8ed18b1e79642fa3e3713e1e55bb.1718275854.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/port.h | 1 + drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/port.h b/drivers/net/ethernet/mellanox/mlxsw/port.h index aa309615eff3f..0a73b1a4526e0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/port.h +++ b/drivers/net/ethernet/mellanox/mlxsw/port.h @@ -7,6 +7,7 @@ #include #define MLXSW_PORT_MAX_MTU (10 * 1024) +#define MLXSW_PORT_ETH_FRAME_HDR (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) #define MLXSW_PORT_DEFAULT_VID 1 diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 030ed71f945d6..879daa18cccab 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -425,7 +425,7 @@ static int mlxsw_sp_port_mtu_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu) struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char pmtu_pl[MLXSW_REG_PMTU_LEN]; - mtu += MLXSW_TXHDR_LEN + ETH_HLEN; + mtu += MLXSW_PORT_ETH_FRAME_HDR; if (mtu > mlxsw_sp_port->max_mtu) return -EINVAL; From 753aacfc032d3c20083cf7944d393aca08606feb Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Thu, 13 Jun 2024 16:07:56 +0200 Subject: [PATCH 3/5] mlxsw: spectrum: Set more accurate values for netdevice min/max MTU Currently, the driver uses ETH_MAX_MTU as maximum MTU of netdevices, instead, use the accurate value which is supported by the driver. Subtract Ethernet headers which are taken into account by hardware for MTU checking, as described in the previous patch. Set minimum MTU to ETH_MIN_MTU, as zero MTU is not really supported. With this change: a. The stack will do the MTU checking, so we can remove it from the driver. b. User space will be able to query the actual MTU limits. Before this patch: $ ip -j -d link show dev swp1 | jq | grep mtu "mtu": 1500, "min_mtu": 0, "max_mtu": 65535, With this patch: $ ip -j -d link show dev swp1 | jq | grep mtu "mtu": 1500, "min_mtu": 68, "max_mtu": 10218, Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Link: https://lore.kernel.org/r/be8232e38c196ecb607f82c5e000ea427ce22abb.1718275854.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 879daa18cccab..c5856f4d6b8be 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -426,8 +426,6 @@ static int mlxsw_sp_port_mtu_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu) char pmtu_pl[MLXSW_REG_PMTU_LEN]; mtu += MLXSW_PORT_ETH_FRAME_HDR; - if (mtu > mlxsw_sp_port->max_mtu) - return -EINVAL; mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sp_port->local_port, mtu); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pmtu), pmtu_pl); @@ -1697,8 +1695,8 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u16 local_port, NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_TC; dev->hw_features |= NETIF_F_HW_TC | NETIF_F_LOOPBACK; - dev->min_mtu = 0; - dev->max_mtu = ETH_MAX_MTU; + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = MLXSW_PORT_MAX_MTU - MLXSW_PORT_ETH_FRAME_HDR; /* Each packet needs to have a Tx header (metadata) on top all other * headers. From 3e7856545d369a6dcb3f93b21a9672b69f918650 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Thu, 13 Jun 2024 16:07:57 +0200 Subject: [PATCH 4/5] mlxsw: Use the same maximum MTU value throughout the driver Currently, the driver uses two different values for maximum MTU, one is stored in mlxsw_port->dev->max_mtu and the second is stored in mlxsw_port->max_mtu. The second one is set to value which is queried from firmware. This value was never tested, and unfortunately is not really supported. That means that with the existing code, user can set MTU to X, which is not really supported by firmware and which is bigger than buffer size which is allocated in pci. To make the driver consistent, use only mlxsw_port->dev->max_mtu for maximum MTU value, for buffers headroom add Ethernet frame headers, which are not included in mlxsw_port->dev->max_mtu. Remove mlxsw_port->max_mtu. Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Link: https://lore.kernel.org/r/89fa6f804386b918d337e736e14ac291bb947483.1718275854.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 23 ------------------- .../net/ethernet/mellanox/mlxsw/spectrum.h | 1 - .../mellanox/mlxsw/spectrum_buffers.c | 8 +++++-- 3 files changed, 6 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index c5856f4d6b8be..f064789f32405 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -405,21 +405,6 @@ static int mlxsw_sp_port_dev_addr_init(struct mlxsw_sp_port *mlxsw_sp_port) mlxsw_sp_port->dev->dev_addr); } -static int mlxsw_sp_port_max_mtu_get(struct mlxsw_sp_port *mlxsw_sp_port, int *p_max_mtu) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char pmtu_pl[MLXSW_REG_PMTU_LEN]; - int err; - - mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sp_port->local_port, 0); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pmtu), pmtu_pl); - if (err) - return err; - - *p_max_mtu = mlxsw_reg_pmtu_max_mtu_get(pmtu_pl); - return 0; -} - static int mlxsw_sp_port_mtu_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; @@ -1725,13 +1710,6 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u16 local_port, goto err_max_speed_get; } - err = mlxsw_sp_port_max_mtu_get(mlxsw_sp_port, &mlxsw_sp_port->max_mtu); - if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to get maximum MTU\n", - mlxsw_sp_port->local_port); - goto err_port_max_mtu_get; - } - err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, ETH_DATA_LEN); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set MTU\n", @@ -1875,7 +1853,6 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u16 local_port, err_port_buffers_init: err_port_admin_status_set: err_port_mtu_set: -err_port_max_mtu_get: err_max_speed_get: err_port_speed_by_width_set: err_port_system_port_mapping_set: diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 3beb5d0847ab7..bb0586b45c8d7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -359,7 +359,6 @@ struct mlxsw_sp_port { u16 egr_types; struct mlxsw_sp_ptp_port_stats stats; } ptp; - int max_mtu; u32 max_speed; struct mlxsw_sp_hdroom *hdroom; u64 module_overheat_initial_val; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index c9f1c79f3f9d0..1b9ed393fbd47 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -399,11 +399,13 @@ void mlxsw_sp_hdroom_bufs_reset_sizes(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_hdroom *hdroom) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + unsigned int max_mtu = mlxsw_sp_port->dev->max_mtu; u16 reserve_cells; int i; + max_mtu += MLXSW_PORT_ETH_FRAME_HDR; /* Internal buffer. */ - reserve_cells = mlxsw_sp_hdroom_int_buf_size_get(mlxsw_sp, mlxsw_sp_port->max_mtu, + reserve_cells = mlxsw_sp_hdroom_int_buf_size_get(mlxsw_sp, max_mtu, mlxsw_sp_port->max_speed); reserve_cells = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, reserve_cells); hdroom->int_buf.reserve_cells = reserve_cells; @@ -613,7 +615,9 @@ static int mlxsw_sp_port_headroom_init(struct mlxsw_sp_port *mlxsw_sp_port) mlxsw_sp_hdroom_bufs_reset_sizes(mlxsw_sp_port, &hdroom); /* Buffer 9 is used for control traffic. */ - size9 = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, mlxsw_sp_port->max_mtu); + size9 = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, + mlxsw_sp_port->dev->max_mtu + + MLXSW_PORT_ETH_FRAME_HDR); hdroom.bufs.buf[9].size_cells = mlxsw_sp_bytes_cells(mlxsw_sp, size9); return __mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom, true); From 4be3dcc9bf04d0957235b45ba18983f1f751a3a1 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Thu, 13 Jun 2024 16:07:58 +0200 Subject: [PATCH 5/5] selftests: forwarding: Add test for minimum and maximum MTU Add cases to check minimum and maximum MTU which are exposed via "ip -d link show". Test configuration and traffic. Use VLAN devices as usually VLAN header (4 bytes) is not included in the MTU, and drivers should configure hardware correctly to send maximum MTU payload size in VLAN tagged packets. $ ./min_max_mtu.sh TEST: ping [ OK ] TEST: ping6 [ OK ] TEST: Test maximum MTU configuration [ OK ] TEST: Test traffic, packet size is maximum MTU [ OK ] TEST: Test minimum MTU configuration [ OK ] TEST: Test traffic, packet size is minimum MTU [ OK ] Signed-off-by: Amit Cohen Reviewed-by: Petr Machata Signed-off-by: Petr Machata Link: https://lore.kernel.org/r/89de8be8989db7a97f3b39e3c9da695673e78d2e.1718275854.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/forwarding/Makefile | 1 + .../selftests/net/forwarding/min_max_mtu.sh | 283 ++++++++++++++++++ 2 files changed, 284 insertions(+) create mode 100755 tools/testing/selftests/net/forwarding/min_max_mtu.sh diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 99576d7ecbf63..224346426ef22 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -39,6 +39,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ ipip_hier_gre.sh \ lib_sh_test.sh \ local_termination.sh \ + min_max_mtu.sh \ mirror_gre_bound.sh \ mirror_gre_bridge_1d.sh \ mirror_gre_bridge_1d_vlan.sh \ diff --git a/tools/testing/selftests/net/forwarding/min_max_mtu.sh b/tools/testing/selftests/net/forwarding/min_max_mtu.sh new file mode 100755 index 0000000000000..97bb8b221bed8 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/min_max_mtu.sh @@ -0,0 +1,283 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +--------------------+ +# | H1 | +# | | +# | $h1.10 + | +# | 192.0.2.2/24 | | +# | 2001:db8:1::2/64 | | +# | | | +# | $h1 + | +# | | | +# +------------------|-+ +# | +# +------------------|-+ +# | SW | | +# | $swp1 + | +# | | | +# | $swp1.10 + | +# | 192.0.2.1/24 | +# | 2001:db8:1::1/64 | +# | | +# +--------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + max_mtu_config_test + max_mtu_traffic_test + min_mtu_config_test + min_mtu_traffic_test +" + +NUM_NETIFS=2 +source lib.sh + +h1_create() +{ + simple_if_init $h1 + vlan_create $h1 10 v$h1 192.0.2.2/24 2001:db8:1::2/64 +} + +h1_destroy() +{ + vlan_destroy $h1 10 192.0.2.2/24 2001:db8:1::2/64 + simple_if_fini $h1 +} + +switch_create() +{ + ip li set dev $swp1 up + vlan_create $swp1 10 "" 192.0.2.1/24 2001:db8:1::1/64 +} + +switch_destroy() +{ + ip li set dev $swp1 down + vlan_destroy $swp1 10 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + vrf_prepare + + h1_create + + switch_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + switch_destroy + + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1.10 192.0.2.1 +} + +ping_ipv6() +{ + ping6_test $h1.10 2001:db8:1::1 +} + +min_max_mtu_get_if() +{ + local dev=$1; shift + local min_max=$1; shift + + ip -d -j link show $dev | jq ".[].$min_max" +} + +ensure_compatible_min_max_mtu() +{ + local min_max=$1; shift + + local mtu=$(min_max_mtu_get_if ${NETIFS[p1]} $min_max) + local i + + for ((i = 2; i <= NUM_NETIFS; ++i)); do + local current_mtu=$(min_max_mtu_get_if ${NETIFS[p$i]} $min_max) + + if [ $current_mtu -ne $mtu ]; then + return 1 + fi + done +} + +mtu_set_if() +{ + local dev=$1; shift + local mtu=$1; shift + local should_fail=${1:-0}; shift + + mtu_set $dev $mtu 2>/dev/null + check_err_fail $should_fail $? "Set MTU $mtu for $dev" +} + +mtu_set_all_if() +{ + local mtu=$1; shift + local i + + for ((i = 1; i <= NUM_NETIFS; ++i)); do + mtu_set_if ${NETIFS[p$i]} $mtu + mtu_set_if ${NETIFS[p$i]}.10 $mtu + done +} + +mtu_restore_all_if() +{ + local i + + for ((i = 1; i <= NUM_NETIFS; ++i)); do + mtu_restore ${NETIFS[p$i]}.10 + mtu_restore ${NETIFS[p$i]} + done +} + +mtu_test_ping4() +{ + local mtu=$1; shift + local should_fail=$1; shift + + # Ping adds 8 bytes for ICMP header and 20 bytes for IP header + local ping_headers_len=$((20 + 8)) + local pkt_size=$((mtu - ping_headers_len)) + + ping_do $h1.10 192.0.2.1 "-s $pkt_size -M do" + check_err_fail $should_fail $? "Ping, packet size: $pkt_size" +} + +mtu_test_ping6() +{ + local mtu=$1; shift + local should_fail=$1; shift + + # Ping adds 8 bytes for ICMP header and 40 bytes for IPv6 header + local ping6_headers_len=$((40 + 8)) + local pkt_size=$((mtu - ping6_headers_len)) + + ping6_do $h1.10 2001:db8:1::1 "-s $pkt_size -M do" + check_err_fail $should_fail $? "Ping6, packet size: $pkt_size" +} + +max_mtu_config_test() +{ + local i + + RET=0 + + for ((i = 1; i <= NUM_NETIFS; ++i)); do + local dev=${NETIFS[p$i]} + local max_mtu=$(min_max_mtu_get_if $dev "max_mtu") + local should_fail + + should_fail=0 + mtu_set_if $dev $max_mtu $should_fail + mtu_restore $dev + + should_fail=1 + mtu_set_if $dev $((max_mtu + 1)) $should_fail + mtu_restore $dev + done + + log_test "Test maximum MTU configuration" +} + +max_mtu_traffic_test() +{ + local should_fail + local max_mtu + + RET=0 + + if ! ensure_compatible_min_max_mtu "max_mtu"; then + log_test_xfail "Topology has incompatible maximum MTU values" + return + fi + + max_mtu=$(min_max_mtu_get_if ${NETIFS[p1]} "max_mtu") + + should_fail=0 + mtu_set_all_if $max_mtu + mtu_test_ping4 $max_mtu $should_fail + mtu_test_ping6 $max_mtu $should_fail + mtu_restore_all_if + + should_fail=1 + mtu_set_all_if $((max_mtu - 1)) + mtu_test_ping4 $max_mtu $should_fail + mtu_test_ping6 $max_mtu $should_fail + mtu_restore_all_if + + log_test "Test traffic, packet size is maximum MTU" +} + +min_mtu_config_test() +{ + local i + + RET=0 + + for ((i = 1; i <= NUM_NETIFS; ++i)); do + local dev=${NETIFS[p$i]} + local min_mtu=$(min_max_mtu_get_if $dev "min_mtu") + local should_fail + + should_fail=0 + mtu_set_if $dev $min_mtu $should_fail + mtu_restore $dev + + should_fail=1 + mtu_set_if $dev $((min_mtu - 1)) $should_fail + mtu_restore $dev + done + + log_test "Test minimum MTU configuration" +} + +min_mtu_traffic_test() +{ + local should_fail=0 + local min_mtu + + RET=0 + + if ! ensure_compatible_min_max_mtu "min_mtu"; then + log_test_xfail "Topology has incompatible minimum MTU values" + return + fi + + min_mtu=$(min_max_mtu_get_if ${NETIFS[p1]} "min_mtu") + mtu_set_all_if $min_mtu + mtu_test_ping4 $min_mtu $should_fail + # Do not test minimum MTU with IPv6, as IPv6 requires higher MTU. + + mtu_restore_all_if + + log_test "Test traffic, packet size is minimum MTU" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS