Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tcp_bpf: improve ingress redirection performance with message corking #8541

Open
wants to merge 4 commits into
base: bpf-next_base
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 35 additions & 13 deletions include/linux/skmsg.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

#define MAX_MSG_FRAGS MAX_SKB_FRAGS
#define NR_MSG_FRAG_IDS (MAX_MSG_FRAGS + 1)
/* GSO size for TCP BPF backlog processing */
#define TCP_BPF_GSO_SIZE 65536

enum __sk_action {
__SK_DROP = 0,
Expand Down Expand Up @@ -85,8 +87,10 @@ struct sk_psock {
struct sock *sk_redir;
u32 apply_bytes;
u32 cork_bytes;
u32 eval;
bool redir_ingress; /* undefined if sk_redir is null */
u32 backlog_since_notify;
unsigned int eval : 8;
unsigned int redir_ingress : 1; /* undefined if sk_redir is null */
unsigned int backlog_work_delayed : 1;
struct sk_msg *cork;
struct sk_psock_progs progs;
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
Expand All @@ -97,6 +101,9 @@ struct sk_psock {
struct sk_buff_head ingress_skb;
struct list_head ingress_msg;
spinlock_t ingress_lock;
struct list_head backlog_msg;
/* spin_lock for backlog_msg and backlog_since_notify */
spinlock_t backlog_msg_lock;
unsigned long state;
struct list_head link;
spinlock_t link_lock;
Expand All @@ -117,12 +124,15 @@ struct sk_psock {
struct mutex work_mutex;
struct sk_psock_work_state work_state;
struct delayed_work work;
struct delayed_work backlog_work;
struct sock *sk_pair;
struct rcu_work rwork;
};

int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
int elem_first_coalesce);
struct sk_msg *sk_msg_alloc(gfp_t gfp);
bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce);
int sk_msg_expand(struct sock *sk, struct sk_msg *msg, int len,
int elem_first_coalesce);
int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src,
u32 off, u32 len);
void sk_msg_trim(struct sock *sk, struct sk_msg *msg, int len);
Expand All @@ -143,6 +153,8 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
int len, int flags);
bool sk_msg_is_readable(struct sock *sk);

extern struct kmem_cache *sk_msg_cachep;

static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes)
{
WARN_ON(i == msg->sg.end && bytes);
Expand Down Expand Up @@ -319,6 +331,13 @@ static inline void sock_drop(struct sock *sk, struct sk_buff *skb)
kfree_skb(skb);
}

static inline void kfree_sk_msg(struct sk_msg *msg)
{
if (msg->skb)
consume_skb(msg->skb);
kmem_cache_free(sk_msg_cachep, msg);
}

static inline bool sk_psock_queue_msg(struct sk_psock *psock,
struct sk_msg *msg)
{
Expand All @@ -330,7 +349,7 @@ static inline bool sk_psock_queue_msg(struct sk_psock *psock,
ret = true;
} else {
sk_msg_free(psock->sk, msg);
kfree(msg);
kfree_sk_msg(msg);
ret = false;
}
spin_unlock_bh(&psock->ingress_lock);
Expand Down Expand Up @@ -378,13 +397,6 @@ static inline bool sk_psock_queue_empty(const struct sk_psock *psock)
return psock ? list_empty(&psock->ingress_msg) : true;
}

static inline void kfree_sk_msg(struct sk_msg *msg)
{
if (msg->skb)
consume_skb(msg->skb);
kfree(msg);
}

static inline void sk_psock_report_error(struct sk_psock *psock, int err)
{
struct sock *sk = psock->sk;
Expand All @@ -393,9 +405,19 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err)
sk_error_report(sk);
}

void sk_psock_backlog_msg(struct sk_psock *psock);
struct sk_psock *sk_psock_init(struct sock *sk, int node);
void sk_psock_stop(struct sk_psock *psock);

static inline void sk_psock_run_backlog_work(struct sk_psock *psock,
bool delayed)
{
if (!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
return;
psock->backlog_work_delayed = delayed;
schedule_delayed_work(&psock->backlog_work, delayed ? 1 : 0);
}

#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock);
Expand Down Expand Up @@ -441,7 +463,7 @@ static inline void sk_psock_cork_free(struct sk_psock *psock)
{
if (psock->cork) {
sk_msg_free(psock->sk, psock->cork);
kfree(psock->cork);
kfree_sk_msg(psock->cork);
psock->cork = NULL;
}
}
Expand Down
Loading
Loading