Skip to content

Commit

Permalink
Revert "Remove lower limit of reconnection attempts."
Browse files Browse the repository at this point in the history
This reverts commit 28dc7bc.
  • Loading branch information
EvgeniiMekhanik committed Oct 1, 2024
1 parent 81ef977 commit 5f14829
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 34 deletions.
27 changes: 1 addition & 26 deletions fw/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -254,42 +254,17 @@ tfw_srv_conn_queue_full(TfwSrvConn *srv_conn)
return READ_ONCE(srv_conn->qsize) >= sg->max_qsize;
}

/*
* Timeout between connect attempts is increased with each unsuccessful
* attempt. Length of the timeout for each attempt is chosen to follow
* a variant of exponential backoff delay algorithm.
*
* It's essential that the new connection is established and the failed
* connection is restored ASAP, so the min retry interval is set to 1.
* The next step is good for a cyclic reconnect, e.g. if an upstream
* ia configured to reset a connection periodically. The next steps are
* almost a pure backoff algo starting from 100ms, which is a good RTT
* for a fast 10Gbps link. The timeout is not increased after 1 second
* as it has moderate overhead, and it's still good in response time.
*/
static const unsigned long tfw_srv_tmo_vals[] = { 1, 10, 100, 250, 500, 1000 };
/*
* The number of reconnection attempts during increasing timeout (quick
* reconnect) stage.
* This number is not included in the total count of reconnection attempts.
*/
static const unsigned int tfw_srv_tmo_nr = ARRAY_SIZE(tfw_srv_tmo_vals);
/*
* max_recns can be the maximum value for the data type to mean
* the unlimited number of attempts, which is the value that should
* never be reached. UINT_MAX seconds is more than 136 years. It's
* safe to assume that it's not reached in a single run of Tempesta.
*/
#define TFW_SRV_MAX_RECONNECT (UINT_MAX - ARRAY_SIZE(tfw_srv_tmo_vals))

static inline bool
tfw_srv_conn_need_resched(TfwSrvConn *srv_conn)
{
TfwSrvGroup *sg = ((TfwServer *)srv_conn->peer)->sg;
unsigned int recns = READ_ONCE(srv_conn->recns);
/* Rescheduling could not happens during quick reconnect stage. */
BUG_ON(recns < tfw_srv_tmo_nr);
return (recns - tfw_srv_tmo_nr >= sg->max_recns);
return READ_ONCE(srv_conn->recns) >= sg->max_recns;
}

/*
Expand Down
30 changes: 22 additions & 8 deletions fw/sock_srv.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,20 @@
* soon as the last client releases the server connection.
*/



/*
* Timeout between connect attempts is increased with each unsuccessful
* attempt. Length of the timeout for each attempt is chosen to follow
* a variant of exponential backoff delay algorithm.
*
* It's essential that the new connection is established and the failed
* connection is restored ASAP, so the min retry interval is set to 1.
* The next step is good for a cyclic reconnect, e.g. if an upstream
* ia configured to reset a connection periodically. The next steps are
* almost a pure backoff algo starting from 100ms, which is a good RTT
* for a fast 10Gbps link. The timeout is not increased after 1 second
* as it has moderate overhead, and it's still good in response time.
*/
static const unsigned long tfw_srv_tmo_vals[] = { 1, 10, 100, 250, 500, 1000 };

#define srv_warn(check, addr, fmt, ...) \
T_WARN_MOD_ADDR(sock_srv, check, addr, TFW_WITH_PORT, fmt, \
Expand Down Expand Up @@ -155,22 +167,23 @@ tfw_sock_srv_connect_try_later(TfwSrvConn *srv_conn)
{
unsigned long timeout;

if (srv_conn->recns < tfw_srv_tmo_nr) {
if (srv_conn->recns < ARRAY_SIZE(tfw_srv_tmo_vals)) {
if (srv_conn->recns)
T_DBG_ADDR("Cannot establish connection",
&srv_conn->peer->addr, TFW_WITH_PORT);
timeout = tfw_srv_tmo_vals[srv_conn->recns];
} else {
if (srv_conn->recns == tfw_srv_tmo_nr || !(srv_conn->recns % 60))
if (srv_conn->recns == ARRAY_SIZE(tfw_srv_tmo_vals)
|| !(srv_conn->recns % 60))
{
srv_warn("cannot establish connection",
&srv_conn->peer->addr,
": %u tries, keep trying...\n",
srv_conn->recns - tfw_srv_tmo_nr + 1);
srv_conn->recns);
}

tfw_connection_repair((TfwConn *)srv_conn);
timeout = tfw_srv_tmo_vals[tfw_srv_tmo_nr - 1];
timeout = tfw_srv_tmo_vals[ARRAY_SIZE(tfw_srv_tmo_vals) - 1];
}
srv_conn->recns++;

Expand Down Expand Up @@ -1271,7 +1284,8 @@ tfw_cfgop_conn_retries(TfwCfgSpec *cs, TfwCfgEntry *ce, unsigned int *recns)

if((r = tfw_cfgop_intval(cs, ce, recns)))
return r;
*recns = *recns ? *recns + tfw_srv_tmo_nr : TFW_SRV_MAX_RECONNECT;
*recns = *recns ? max_t(int, *recns, ARRAY_SIZE(tfw_srv_tmo_vals))
: UINT_MAX;

return 0;
}
Expand Down Expand Up @@ -2304,7 +2318,7 @@ static TfwCfgSpec tfw_srv_group_specs[] = {
.deflt = "10",
.handler = tfw_cfgop_in_conn_retries,
.spec_ext = &(TfwCfgSpecInt) {
.range = { 0, TFW_SRV_MAX_RECONNECT},
.range = { 0, INT_MAX },
},
.allow_none = true,
.allow_repeat = false,
Expand Down

0 comments on commit 5f14829

Please sign in to comment.