Skip to content

Commit

Permalink
Add SO_ZEROCOPY and MSG_ZEROCOPY support for UDP and TCP
Browse files Browse the repository at this point in the history
  • Loading branch information
davidBar-On committed Jun 23, 2024
1 parent 8f5a87e commit 6f0585b
Show file tree
Hide file tree
Showing 14 changed files with 453 additions and 16 deletions.
14 changes: 14 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,20 @@ if test "x$iperf3_cv_header_tcp_info_snd_wnd" = "xyes"; then
AC_DEFINE([HAVE_TCP_INFO_SND_WND], [1], [Have tcpi_snd_wnd field in tcp_info.])
fi

# Check for MSG_ZEROCOPY (mostly on Linux)
AC_CACHE_CHECK([MSG_ZEROCOPY send option],
[iperf3_cv_header_msg_zerocopy],
AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM([[#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>]],
[[int foo = MSG_ZEROCOPY;]])],
iperf3_cv_header_msg_zerocopy=yes,
iperf3_cv_header_msg_zerocopy=no))
if test "x$iperf3_cv_header_msg_zerocopy" = "xyes"; then
AC_DEFINE([HAVE_MSG_ZEROCOPY], [1], [Have MSG_ZEROCOPY send option.])
fi

# Check if we need -lrt for clock_gettime
AC_SEARCH_LIBS(clock_gettime, [rt posix4])
# Check for clock_gettime support
Expand Down
45 changes: 44 additions & 1 deletion src/iperf.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,37 @@ typedef atomic_uint_fast64_t atomic_iperf_size_t;
typedef unsigned int uint
#endif // __vxworks or __VXWORKS__

#if defined(HAVE_MSG_ZEROCOPY) && defined(HAVE_POLL_H)
#define SUPPORTED_MSG_ZEROCOPY 1

#ifndef SO_ZEROCOPY
#define SO_ZEROCOPY 60
#endif

// FIXME: supposed to be in <errqueue.h>?

#ifndef SO_EE_ORIGIN_ZEROCOPY
#define SO_EE_ORIGIN_ZEROCOPY 5
#endif

#ifndef SO_EE_CODE_ZEROCOPY_COPIED
#define SO_EE_CODE_ZEROCOPY_COPIED 1
#endif

struct sock_extended_err
{
uint32_t ee_errno; /* error number */
uint8_t ee_origin; /* where the error originated */
uint8_t ee_type; /* type */
uint8_t ee_code; /* code */
uint8_t ee_pad; /* padding */
uint32_t ee_info; /* additional information */
uint32_t ee_data; /* other data */
/* More data may follow */
};

#endif /* HAVE_MSG_ZEROCOPY && HAVE_POLL_H */

struct iperf_interval_results
{
atomic_iperf_size_t bytes_transferred; /* bytes transferred in this interval */
Expand Down Expand Up @@ -230,6 +261,14 @@ struct iperf_stream
int (*rcv2) (struct iperf_stream * stream);
int (*snd2) (struct iperf_stream * stream);

#if defined(SUPPORTED_MSG_ZEROCOPY)
/* used when sending using MSG_ZEROCOPY */
long completions;
long expected_completions;
int zerocopied;
uint32_t next_completion;
#endif /* SUPPORTED_MSG_ZEROCOPY */

// struct iperf_stream *next;
SLIST_ENTRY(iperf_stream) streams;

Expand Down Expand Up @@ -331,7 +370,7 @@ struct iperf_test
int verbose; /* -V option - verbose mode */
int json_output; /* -J option - JSON output */
int json_stream; /* --json-stream */
int zerocopy; /* -Z option - use sendfile */
int zerocopy; /* -Z option - use sendfile or MSG_ZEROCOPY for TCP, MSG_ZEROCOPY for UDP */
int debug; /* -d option - enable debug */
enum debug_level debug_level; /* -d option option - level of debug messages to show */
int get_server_output; /* --get-server-output */
Expand Down Expand Up @@ -459,4 +498,8 @@ extern int gerror; /* error value from getaddrinfo(3), for use in internal error
/* In Reverse mode, maximum number of packets to wait for "accept" response - to handle out of order packets */
#define MAX_REVERSE_OUT_OF_ORDER_PACKETS 2

/* Zerocopy methood use - sendfile() or MSG_ZEROCOPY (for UDP only MSG_ZEROCOPY is supported) */
#define ZEROCOPY_SENDFILE 1
#define ZEROCOPY_MSG_ZEROCOPY 2

#endif /* !__IPERF_H */
49 changes: 46 additions & 3 deletions src/iperf_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@
#include <Windows.h>
#endif /* HAVE_SETPROCESSAFFINITYMASK */

#include "net.h"
#include "iperf.h"
#include "net.h"
#include "iperf_api.h"
#include "iperf_udp.h"
#include "iperf_tcp.h"
Expand Down Expand Up @@ -702,7 +702,7 @@ iperf_has_zerocopy( void )
void
iperf_set_test_zerocopy(struct iperf_test *ipt, int zerocopy)
{
ipt->zerocopy = (zerocopy && has_sendfile());
ipt->zerocopy = (zerocopy && (ipt->protocol->id == Pudp ? 1 : has_sendfile()));
}

void
Expand Down Expand Up @@ -1104,7 +1104,11 @@ iperf_parse_arguments(struct iperf_test *test, int argc, char **argv)
#if defined(HAVE_FLOWLABEL)
{"flowlabel", required_argument, NULL, 'L'},
#endif /* HAVE_FLOWLABEL */
#if defined(SUPPORTED_MSG_ZEROCOPY)
{"zerocopy", optional_argument, NULL, 'Z'},
#else
{"zerocopy", no_argument, NULL, 'Z'},
#endif /* SUPPORTED_MSG_ZEROCOPY */
{"omit", required_argument, NULL, 'O'},
{"file", required_argument, NULL, 'F'},
{"repeating-payload", no_argument, NULL, OPT_REPEATING_PAYLOAD},
Expand Down Expand Up @@ -1467,11 +1471,22 @@ iperf_parse_arguments(struct iperf_test *test, int argc, char **argv)
TAILQ_INSERT_TAIL(&test->xbind_addrs, xbe, link);
break;
case 'Z':
#if defined(SUPPORTED_MSG_ZEROCOPY)
if (optarg && strcmp(optarg, "")) {
if (!strcmp(optarg, "z"))
test->zerocopy = ZEROCOPY_MSG_ZEROCOPY;
else {
i_errno = IENOSENDFILE;
return -1;
}
} else
#endif /* SUPPORTED_MSG_ZEROCOPY */
if (!has_sendfile()) {
i_errno = IENOSENDFILE;
return -1;
} else {
test->zerocopy = ZEROCOPY_SENDFILE;
}
test->zerocopy = 1;
client_flag = 1;
break;
case OPT_REPEATING_PAYLOAD:
Expand Down Expand Up @@ -1744,6 +1759,28 @@ iperf_parse_arguments(struct iperf_test *test, int argc, char **argv)
return -1;
}

#if defined(SUPPORTED_MSG_ZEROCOPY)
// UDP supports "zero copy" only using MSG_ZEROCOPY
if (test->protocol->id == Pudp && test->zerocopy)
test->zerocopy = ZEROCOPY_MSG_ZEROCOPY;
// Zero copy for TCP use sendfile()
if (test->zerocopy && test->protocol->id != Pudp && !has_sendfile()) {
i_errno = IENOSENDFILE;
return -1;
}
// Using MSG_ZEROCOPY is not supported when disk file is used
if (test->diskfile_name != (char*) 0 && test->zerocopy == ZEROCOPY_MSG_ZEROCOPY) {
i_errno = IEDISKFILEZEROCOPY;
return -1;
}
#else
// Zero copy is supported only by TCP
if (test->zerocopy && test->protocol->id != Ptcp) {
i_errno = IENOSENDFILE;
return -1;
}
#endif /* SUPPORTED_MSG_ZEROCOPY */

if (blksize == 0) {
if (test->protocol->id == Pudp)
blksize = 0; /* try to dynamically determine from MSS */
Expand Down Expand Up @@ -4464,6 +4501,12 @@ iperf_new_stream(struct iperf_test *test, int s, int sender)
sp->snd = test->protocol->send;
sp->rcv = test->protocol->recv;

#if defined(SUPPORTED_MSG_ZEROCOPY)
// Note: sp->next_completion is not initialized to 1, since the first
// SO_EE_ORIGIN_ZEROCOPY messge value is zero (and not 1 as expected).
sp->zerocopied = -1;
#endif /* SUPPORTED_MSG_ZEROCOPY */

if (test->diskfile_name != (char*) 0) {
sp->diskfile_fd = open(test->diskfile_name, sender ? O_RDONLY : (O_WRONLY|O_CREAT|O_TRUNC), S_IRUSR|S_IWUSR);
if (sp->diskfile_fd == -1) {
Expand Down
1 change: 1 addition & 0 deletions src/iperf_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,7 @@ enum {
IESNDTIMEOUT = 33, // Illegal message send timeout
IEUDPFILETRANSFER = 34, // Cannot transfer file using UDP
IESERVERAUTHUSERS = 35, // Cannot access authorized users file
IEDISKFILEZEROCOPY = 36, // Sending disk file using MSG_ZEROCOPY is not supported
/* Test errors */
IENEWTEST = 100, // Unable to create a new test (check perror)
IEINITTEST = 101, // Test initialization failed (check perror)
Expand Down
7 changes: 6 additions & 1 deletion src/iperf_client_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ iperf_connect(struct iperf_test *test)
/* Create and connect the control channel */
if (test->ctrl_sck < 0)
// Create the control channel using an ephemeral port
test->ctrl_sck = netdial(test->settings->domain, Ptcp, test->bind_address, test->bind_dev, 0, test->server_hostname, test->server_port, test->settings->connect_timeout);
test->ctrl_sck = netdial(test->settings->domain, Ptcp, test->bind_address, test->bind_dev, 0, test->server_hostname, test->server_port, test->settings->connect_timeout, 0);
if (test->ctrl_sck < 0) {
i_errno = IECONNECT;
return -1;
Expand Down Expand Up @@ -720,6 +720,11 @@ iperf_run_client(struct iperf_test * test)
SLIST_FOREACH(sp, &test->streams, streams) {
if (sp->sender) {
int rc;
#if defined(SUPPORTED_MSG_ZEROCOPY)
if (sp->test->zerocopy == ZEROCOPY_MSG_ZEROCOPY) {
wait_zerocopy_buffer_available(sp); /* Wait until last message is sent */
}
#endif /* SUPPORTED_MSG_ZEROCOPY */
sp->done = 1;
rc = pthread_cancel(sp->thr);
if (rc != 0 && rc != ESRCH) {
Expand Down
7 changes: 7 additions & 0 deletions src/iperf_error.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,14 @@ iperf_strerror(int int_errno)
snprintf(errstr, len, "TCP MSS too large (maximum = %d bytes)", MAX_MSS);
break;
case IENOSENDFILE:
#if defined(SUPPORTED_MSG_ZEROCOPY)
snprintf(errstr, len, "invalid zerocopy option value or this OS does not support sendfile");
#else
snprintf(errstr, len, "this OS does not support sendfile");
#endif /* SUPPORTED_MSG_ZEROCOPY */
break;
case IEDISKFILEZEROCOPY:
snprintf(errstr, len, "Sending disk file using MSG_ZEROCOPY is not supported");
break;
case IEOMIT:
snprintf(errstr, len, "bogus value for --omit");
Expand Down
7 changes: 6 additions & 1 deletion src/iperf_locale.c
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,12 @@ const char usage_longstr[] = "Usage: iperf3 [-s|-c host] [options]\n"
#if defined(HAVE_FLOWLABEL)
" -L, --flowlabel N set the IPv6 flow label (only supported on Linux)\n"
#endif /* HAVE_FLOWLABEL */
" -Z, --zerocopy use a 'zero copy' method of sending data\n"
#if defined(HAVE_MSG_ZEROCOPY) && defined(HAVE_POLL_H)
" -Z, --zerocopy[=z] for UDP use MSG_ZEROCOPY 'zero copy' method for sending data;\n"
" for TCP, use sendfile() uless '=z' is set for using MSG_ZEROCOPY\n"
#else
" -Z, --zerocopy use `sendfile()` for 'zero copy' send of TCP data\n"
#endif /* SUPPORTED_MSG_ZEROCOPY */
" -O, --omit N perform pre-test for N seconds and omit the pre-test statistics\n"
" -T, --title str prefix every output line with this string\n"
" --extra-data str data string to include in client and server JSON\n"
Expand Down
5 changes: 5 additions & 0 deletions src/iperf_server_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,11 @@ cleanup_server(struct iperf_test *test)
int i_errno_save = i_errno;
SLIST_FOREACH(sp, &test->streams, streams) {
int rc;
#if defined(SUPPORTED_MSG_ZEROCOPY)
if (sp->sender && sp->test->zerocopy == ZEROCOPY_MSG_ZEROCOPY) {
wait_zerocopy_buffer_available(sp); /* Wait until last message is sent */
}
#endif /* SUPPORTED_MSG_ZEROCOPY */
sp->done = 1;
rc = pthread_cancel(sp->thr);
if (rc != 0 && rc != ESRCH) {
Expand Down
29 changes: 28 additions & 1 deletion src/iperf_tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,18 @@ iperf_tcp_send(struct iperf_stream *sp)
if (!sp->pending_size)
sp->pending_size = sp->settings->blksize;

#if defined(SUPPORTED_MSG_ZEROCOPY)
if (sp->test->zerocopy == ZEROCOPY_MSG_ZEROCOPY) {
/* Wait until it is safe to rewite the sending buffer */
r = wait_zerocopy_buffer_available(sp);
if (r < 0) {
if (sp->test->debug_level >= DEBUG_LEVEL_INFO)
printf("Waining for TCP MSG_ZEROCOPY buffer to become available failed, errno=%s\n", strerror(errno));
return r;
}
r = Nsend_sp(sp, sp->buffer, sp->pending_size, Ptcp, MSG_ZEROCOPY);
} else
#endif /* SUPPORTED_MSG_ZEROCOPY */
if (sp->test->zerocopy)
r = Nsendfile(sp->buffer_fd, sp->socket, sp->buffer, sp->pending_size);
else
Expand Down Expand Up @@ -120,12 +132,27 @@ iperf_tcp_accept(struct iperf_test * test)
char cookie[COOKIE_SIZE] = {0};
socklen_t len;
struct sockaddr_storage addr;
#if defined(SUPPORTED_MSG_ZEROCOPY)
int opt;
#endif /* SUPPORTED_MSG_ZEROCOPY */

len = sizeof(addr);
if ((s = accept(test->listener, (struct sockaddr *) &addr, &len)) < 0) {
i_errno = IESTREAMCONNECT;
return -1;
}

#if defined(SUPPORTED_MSG_ZEROCOPY)
/* Setting should be done before the socket is conected */
if (test->zerocopy == ZEROCOPY_MSG_ZEROCOPY) {
opt = 1;
if (setsockopt(s, SOL_SOCKET, SO_ZEROCOPY, &opt, sizeof(opt)) < 0) {
i_errno = IESTREAMACCEPT;
return -1;
}
}
#endif /* SUPPORTED_MSG_ZEROCOPY */

#if defined(HAVE_SO_MAX_PACING_RATE)
/* If fq socket pacing is specified, enable it. */

Expand Down Expand Up @@ -380,7 +407,7 @@ iperf_tcp_connect(struct iperf_test *test)
int saved_errno;
int rcvbuf_actual, sndbuf_actual;

s = create_socket(test->settings->domain, SOCK_STREAM, test->bind_address, test->bind_dev, test->bind_port, test->server_hostname, test->server_port, &server_res);
s = create_socket(test->settings->domain, SOCK_STREAM, test->bind_address, test->bind_dev, test->bind_port, test->server_hostname, test->server_port, &server_res, test->zerocopy);
if (s < 0) {
i_errno = IESTREAMCONNECT;
return -1;
Expand Down
10 changes: 10 additions & 0 deletions src/iperf_time.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,16 @@ iperf_time_in_usecs(struct iperf_time *time)
return time->secs * 1000000LL + time->usecs;
}

uint64_t
iperf_time_now_in_usecs()
{
struct iperf_time time;

iperf_time_now(&time);
return iperf_time_in_usecs(&time);
}


double
iperf_time_in_secs(struct iperf_time *time)
{
Expand Down
2 changes: 2 additions & 0 deletions src/iperf_time.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ struct iperf_time {

int iperf_time_now(struct iperf_time *time1);

uint64_t iperf_time_now_in_usecs();

void iperf_time_add_usecs(struct iperf_time *time1, uint64_t usecs);

int iperf_time_compare(struct iperf_time *time1, struct iperf_time *time2);
Expand Down
Loading

0 comments on commit 6f0585b

Please sign in to comment.