Skip to content

Commit

Permalink
Merge pull request #28 from L4STeam/AccECN-2023
Browse files Browse the repository at this point in the history
Acc ecn 2023
  • Loading branch information
koen0607 authored Mar 5, 2024
2 parents 804cadc + 64441b4 commit 4579ffb
Show file tree
Hide file tree
Showing 11 changed files with 148 additions and 27 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/kernel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ jobs:
runs-on: ubuntu-20.04
needs: build
permissions: write-all
if: ${{ github.ref == 'refs/heads/testing' || github.ref == 'refs/heads/ratebase'}}
if: ${{ github.ref == 'refs/heads/testing' || github.ref == 'refs/heads/ratebase' || github.ref == 'refs/heads/AccECN-2023'}}
steps:
- name: Get artifact
uses: actions/download-artifact@v3
Expand Down
6 changes: 6 additions & 0 deletions Documentation/networking/ip-sysctl.rst
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,12 @@ tcp_ecn_option - INTEGER

Default: 2

tcp_ecn_option_beacon - INTEGER
Control Accurate ECN (AccECN) option sending frequency per RTT and it
takes effect only when tcp_ecn_option is set to 2.

Default: 1 (AccECN will be send at least 1 time per RTT)

tcp_ecn_fallback - BOOLEAN
If the kernel detects that ECN connection misbehaves, enable fall
back to non-ECN. Currently, this knob implements the fallback
Expand Down
3 changes: 3 additions & 0 deletions include/linux/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,9 @@ struct tcp_sock {
syn_ect_snt:2, /* AccECN ECT memory, only */
syn_ect_rcv:2, /* ... needed durign 3WHS + first seqno */
ecn_fail:1; /* ECN reflector detected path mangling */
u8 accecn_no_respond:1, /* AccECN no response on feedback */
accecn_no_options:1, /* AccECN no options send out */
first_data_ack:1; /* Check for first data ack */
u8 saw_accecn_opt:2, /* An AccECN option was seen */
fast_ack_mode:2, /* which fast ack mode ? */
unused:4;
Expand Down
4 changes: 3 additions & 1 deletion include/net/request_sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ struct request_sock {
u16 mss;
u8 num_retrans; /* number of retransmits */
u8 syncookie:1; /* syncookie: encode tcpopts in timestamp */
u8 num_timeout:7; /* number of timeouts */
u8 num_timeout:7,
is_rtx:1; /* number of timeouts */
u32 ts_recent;
struct timer_list rsk_timer;
const struct request_sock_ops *rsk_ops;
Expand Down Expand Up @@ -105,6 +106,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
sk_tx_queue_clear(req_to_sk(req));
req->saved_syn = NULL;
req->num_timeout = 0;
req->is_rtx = 0;
req->num_retrans = 0;
req->sk = NULL;
refcount_set(&req->rsk_refcnt, 0);
Expand Down
1 change: 0 additions & 1 deletion include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCP_ACCECN_MAXSIZE (TCPOLEN_ACCECN_BASE + \
TCPOLEN_ACCECN_PERFIELD * \
TCP_ACCECN_NUMFIELDS)
#define TCP_ACCECN_BEACON_FREQ_SHIFT 2 /* Send option at least 2^2 times per RTT */
#define TCP_ACCECN_SAFETY_SHIFT 1 /* SAFETY_FACTOR in accecn draft */

/* tp->saw_accecn_opt states */
Expand Down
1 change: 1 addition & 0 deletions net/ipv4/inet_connection_sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,7 @@ static void syn_ack_recalc(struct request_sock *req,

int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req)
{
req->is_rtx = 1;
int err = req->rsk_ops->rtx_syn_ack(parent, req);

if (!err)
Expand Down
3 changes: 3 additions & 0 deletions net/ipv4/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -3034,6 +3034,9 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->delivered_ce = 0;
tp->saw_accecn_opt = 0;
tp->ecn_fail = 0;
tp->accecn_no_respond = 0;
tp->accecn_no_options = 0;
tp->first_data_ack = 0;
tcp_accecn_init_counters(tp);
tp->prev_ecnfield = 0;
tp->accecn_opt_tstamp = 0;
Expand Down
63 changes: 57 additions & 6 deletions net/ipv4/tcp_input.c
Original file line number Diff line number Diff line change
Expand Up @@ -444,11 +444,31 @@ static void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb,
tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
break;
case 0x1:
case 0x5:
if (tcp_ca_no_fallback_rfc3168(sk))
tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
else if (tcp_ecn_mode_pending(tp))
tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
else
tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
break;
/* [CY] 3.1.2. Backward Compatibility - If a TCP Client has sent a SYN requesting AccECN feedback with (AE,CWR,ECE) =
* (1,1,1) then receives a SYN/ACK with the currently reserved combination (AE,CWR,ECE) = (1,0,1) but it does not
* have logic specific to such a combination, the Client MUST enable AccECN mode as if the SYN/ACK confirmed that the
* Server supported AccECN and as if it fed back that the IP-ECN field on the SYN had arrived unchanged.
*/
case 0x5:
if (tcp_ecn_mode_pending(tp)) {
tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
tp->syn_ect_rcv = ip_dsfield & INET_ECN_MASK;
if (tp->rx_opt.accecn &&
tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) {
tp->saw_accecn_opt = tcp_accecn_option_init(skb,
tp->rx_opt.accecn);
tp->accecn_opt_demand = 2;
}
if (INET_ECN_is_ce(ip_dsfield)) {
tp->received_ce++;
tp->received_ce_pending++;
}
}
break;
default:
tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
Expand Down Expand Up @@ -575,7 +595,7 @@ static bool tcp_accecn_process_option(struct tcp_sock *tp,
bool order1, res;
unsigned int i;

if (tp->saw_accecn_opt == TCP_ACCECN_OPT_FAIL)
if (tp->saw_accecn_opt == TCP_ACCECN_OPT_FAIL || tp->accecn_no_respond)
return false;

if (!(flag & FLAG_SLOWPATH) || !tp->rx_opt.accecn) {
Expand Down Expand Up @@ -683,6 +703,22 @@ static u32 __tcp_accecn_process(struct sock *sk, const struct sk_buff *skb,
if (flag & FLAG_SYN_ACKED)
return 0;

/* [CY] 3.2.2.4. Testing for Zeroing of the ACE Field - If AccECN has been successfully negotiated, the Data Sender
* MAY check the value of the ACE counter in the first feedback packet (with or without data) that arrives after the
* 3-way handshake. If the value of this ACE field is found to be zero (0b000), for the remainder of the half-
* connection the Data Sender ought to send non-ECN-capable packets and it is advised not to respond to any feedback
* of CE markings.
*/
if (!tp->first_data_ack) {
tp->first_data_ack = 1;
if (tcp_accecn_ace(tcp_hdr(skb)) == 0x0) {
tp->ecn_fail = 1;
INET_ECN_dontxmit(sk);
tp->accecn_no_respond = 1;
return 0;
}
}

if (tp->received_ce_pending >= TCP_ACCECN_ACE_MAX_DELTA)
inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;

Expand Down Expand Up @@ -4873,8 +4909,18 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
* DSACK state and change the txhash to re-route speculatively.
*/
if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq &&
sk_rethink_txhash(sk))
sk_rethink_txhash(sk)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH);
/* [CY] 3.2.3.2.2. Testing for Loss of Packets Carrying the AccECN Option - If a middlebox is dropping
* packets with options it does not recognize, a host that is sending little or no data but mostly pure
* ACKs will not inherently detect such losses. Such a host MAY detect loss of ACKs carrying the AccECN
* Option by detecting whether the acknowledged data always reappears as a retransmission. In such cases,
* the host SHOULD disable the sending of the AccECN Option for this half-connection.
*/
if (tcp_ecn_mode_accecn(tcp_sk(sk)))
tcp_sk(sk)->accecn_no_options = 1;

}
}

static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
Expand Down Expand Up @@ -6215,6 +6261,11 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
if (th->syn) {
if (tcp_ecn_mode_accecn(tp)) {
send_accecn_reflector = true;
/* [CY] 3.1.5. Implications of AccECN Mode - A host in AccECN mode that is feeding back the IP-ECN
* field on a SYN or SYN/ACK: MUST feed back the IP-ECN field on the latest valid SYN or acceptable
* SYN/ACK to arrive.”
*/
tp->syn_ect_rcv = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK;
if (tp->rx_opt.accecn &&
tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) {
tp->saw_accecn_opt = tcp_accecn_option_init(skb,
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/tcp_ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -3142,7 +3142,7 @@ static int __net_init tcp_sk_init(struct net *net)

net->ipv4.sysctl_tcp_ecn = 2;
net->ipv4.sysctl_tcp_ecn_option = 2;
net->ipv4.sysctl_tcp_ecn_option_beacon = 1;
net->ipv4.sysctl_tcp_ecn_option_beacon = 3;
net->ipv4.sysctl_tcp_ecn_fallback = 1;
net->ipv4.sysctl_tcp_ecn_unsafe_cep = 0;

Expand Down
30 changes: 28 additions & 2 deletions net/ipv4/tcp_minisocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,13 @@ void tcp_accecn_third_ack(struct sock *sk, const struct sk_buff *skb,

switch (ace) {
case 0x0:
/* [CY] 3.2.2.1. ACE Field on the ACK of the SYN/ACK - If the Server is in AccECN mode and in SYN-RCVD
* state, and if it receives a value of zero on a pure ACK with SYN=0 and no SACK blocks, for the rest
* of the connection the Server MUST NOT set ECT on outgoing packets and MUST NOT respond to AccECN
* feedback. Nonetheless, as a Data Receiver it MUST NOT disable AccECN feedback.
*/
tp->ecn_fail = 1;
tp->accecn_no_respond = 1;
break;
case 0x7:
case 0x5:
Expand All @@ -432,6 +438,10 @@ static void tcp_ecn_openreq_child(struct sock *sk,
const struct tcp_request_sock *treq = tcp_rsk(req);
struct tcp_sock *tp = tcp_sk(sk);

/* [CY] 3.1.5. Implications of AccECN Mode - A TCP Server in AccECN mode: MUST NOT set ECT on
* any packet for the rest of the connection, if it has received or sent at least one valid
* SYN or Acceptable SYN/ACK with (AE,CWR,ECE) = (0,0,0) during the handshake.
*/
if (treq->accecn_ok) {
const struct tcphdr *th = (const struct tcphdr *)skb->data;
tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
Expand Down Expand Up @@ -694,9 +704,24 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
*/
if (!tcp_oow_rate_limited(sock_net(sk), skb,
LINUX_MIB_TCPACKSKIPPEDSYNRECV,
&tcp_rsk(req)->last_oow_ack_time) &&
&tcp_rsk(req)->last_oow_ack_time)) {

if (tcp_rsk(req)->accecn_ok) {
/* [CY] 3.1.5 Implications of AccECN Mode - A host in AccECN mode that is feeding back the IP-ECN
* field on a SYN or SYN/ACK: MUST feed back the IP-ECN field on the latest valid SYN or acceptable
* SYN/ACK to arrive.
*/
tcp_rsk(req)->syn_ect_rcv = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK;
if (tcp_accecn_ace(tcp_hdr(skb)) == 0x0) {
/* [CY] 3.1.5. Implications of AccECN Mode - A TCP Server in AccECN mode: MUST NOT set ECT on
* any packet for the rest of the connection, if it has received or sent at least one valid
* SYN or Acceptable SYN/ACK with (AE,CWR,ECE) = (0,0,0) during the handshake
*/
tcp_sk(sk)->ecn_fail = 1;
}
}

!inet_rtx_syn_ack(sk, req)) {
if (!inet_rtx_syn_ack(sk, req)) {
unsigned long expires = jiffies;

expires += min(TCP_TIMEOUT_INIT << req->num_timeout,
Expand All @@ -705,6 +730,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
mod_timer_pending(&req->rsk_timer, expires);
else
req->rsk_timer.expires = expires;
}
}
return NULL;
}
Expand Down
60 changes: 45 additions & 15 deletions net/ipv4/tcp_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -378,12 +378,27 @@ static void tcp_accecn_echo_syn_ect(struct tcphdr *th, u8 ect)
}

static void
tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th)
tcp_ecn_make_synack(struct sock *sk, const struct request_sock *req, struct tcphdr *th)
{
if (tcp_rsk(req)->accecn_ok)
tcp_accecn_echo_syn_ect(th, tcp_rsk(req)->syn_ect_rcv);
else if (inet_rsk(req)->ecn_ok)
th->ece = 1;
if (!req->is_rtx || req->num_timeout < 1) {
if (tcp_rsk(req)->accecn_ok)
tcp_accecn_echo_syn_ect(th, tcp_rsk(req)->syn_ect_rcv);
else if (inet_rsk(req)->ecn_ok)
th->ece = 1;
} else if (tcp_rsk(req)->accecn_ok) {
/* [CY] 3.2.3.2.2. Testing for Loss of Packets Carrying the AccECN Option - If this retransmission times out,
* to expedite connection setup, the TCP Server SHOULD retransmit the SYN/ACK with (AE,CWR,ECE) = (0,0,0) and
* no AccECN Option, but it remains in AccECN feedback mode
*/
th->ae = 0;
th->cwr = 0;
th->ece = 0;
/* [CY] 3.1.5. Implications of AccECN Mode - A TCP Server in AccECN mode: MUST NOT set ECT on any packet for
* the rest of the connection, if it has received or sent at least one valid SYN or Acceptable SYN/ACK with
* (AE,CWR,ECE) = (0,0,0) during the handshake.
*/
tcp_sk(sk)->ecn_fail = 1;
}
}

static void tcp_accecn_set_ace(struct tcp_sock *tp, struct sk_buff *skb,
Expand Down Expand Up @@ -922,8 +937,12 @@ static bool tcp_accecn_option_beacon_check(const struct sock *sk)
if (!sock_net(sk)->ipv4.sysctl_tcp_ecn_option_beacon)
return false;

return tcp_stamp_us_delta(tp->tcp_mstamp, tp->accecn_opt_tstamp) >=
(tp->srtt_us >> (3 + TCP_ACCECN_BEACON_FREQ_SHIFT));
/* [CY] AccECN period shall be larger than srtt[us]/TCP_ECN_OPTION_BEACON
* Following texts are removed in AccECN “6. Summary: Protocol Properties - However, it has to send a full-sized
* AccECN Option at least three times per RTT, which the Data Sender can rely on as a regular beacon or checkpoint.”
*/
return tcp_stamp_us_delta(tp->tcp_mstamp, tp->accecn_opt_tstamp) * sock_net(sk)->ipv4.sysctl_tcp_ecn_option_beacon >=
(tp->srtt_us >> 3);
}

/* Compute TCP options for SYN packets. This is not the final
Expand Down Expand Up @@ -1086,8 +1105,11 @@ static unsigned int tcp_synack_options(const struct sock *sk,

smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);

/* [CY] 3.2.3.2.2. Testing for Loss of Packets Carrying the AccECN Option - TCP Server SHOULD retransmit the
* SYN/ACK, but with no AccECN Option
*/
if (treq->accecn_ok && sock_net(sk)->ipv4.sysctl_tcp_ecn_option &&
req->num_timeout < 1 && (remaining >= TCPOLEN_ACCECN_BASE)) {
!req->is_rtx && (remaining >= TCPOLEN_ACCECN_BASE)) {
opts->ecn_bytes = synack_ecn_bytes;
remaining -= tcp_options_fit_accecn(opts, 0, remaining,
tcp_synack_options_combine_saving(opts));
Expand Down Expand Up @@ -1167,7 +1189,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb

if (tcp_ecn_mode_accecn(tp) &&
sock_net(sk)->ipv4.sysctl_tcp_ecn_option &&
(tp->saw_accecn_opt && tp->saw_accecn_opt != TCP_ACCECN_OPT_FAIL)) {
(tp->saw_accecn_opt && tp->saw_accecn_opt != TCP_ACCECN_OPT_FAIL && !tp->accecn_no_options)) {
if (sock_net(sk)->ipv4.sysctl_tcp_ecn_option >= 2 ||
tp->accecn_opt_demand ||
tcp_accecn_option_beacon_check(sk)) {
Expand Down Expand Up @@ -3431,12 +3453,20 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
tcp_retrans_try_collapse(sk, skb, avail_wnd);
}

/* RFC3168, section 6.1.1.1. ECN fallback
* As AccECN uses the same SYN flags (+ AE), this check covers both
* cases.
/* [CY] 3.1.4.1. Retransmitted SYNs - If the sender of an AccECN SYN (the TCP Client) times out before receiving the SYN/ACK,
* it SHOULD attempt to negotiate the use of AccECN at least one more time by continuing to set all three TCP ECN flags
* (AE,CWR,ECE) = (1,1,1) on the first retransmitted SYN (using the usual retransmission time-outs). If this first
* retransmission also fails to be acknowledged, in deployment scenarios where AccECN path traversal might be problematic, the
* TCP Client SHOULD send subsequent retransmissions of the SYN with the three TCP-ECN flags cleared (AE,CWR,ECE) = (0,0,0).
*/
if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN_ECN) == TCPHDR_SYN_ECN)
tcp_ecn_clear_syn(sk, skb);
if (!tcp_ecn_mode_pending(tp) || icsk->icsk_retransmits > 1) {
/* RFC3168, section 6.1.1.1. ECN fallback
* As AccECN uses the same SYN flags (+ AE), this check covers both
* cases.
*/
if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN_ECN) == TCPHDR_SYN_ECN)
tcp_ecn_clear_syn(sk, skb);
}

/* Update global and local TCP statistics. */
segs = tcp_skb_pcount(skb);
Expand Down Expand Up @@ -3822,7 +3852,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
memset(th, 0, sizeof(struct tcphdr));
th->syn = 1;
th->ack = 1;
tcp_ecn_make_synack(req, th);
tcp_ecn_make_synack((struct sock *)sk, req, th);
th->source = htons(ireq->ir_num);
th->dest = ireq->ir_rmt_port;
skb->mark = ireq->ir_mark;
Expand Down

0 comments on commit 4579ffb

Please sign in to comment.