Skip to content

Commit

Permalink
prov/util: Change uffd stop routine to use pipe
Browse files Browse the repository at this point in the history
Without this change, the following segfault can happen.

0  0x00001555546a66de in malloc () from /lib64/libc.so.6
1  0x00001555555351f8 in _dl_new_object () from /lib64/ld-linux-x86-64.so.2
2  0x000015555552f413 in _dl_map_object_from_fd () from /lib64/ld-linux-x86-64.so.2
3  0x000015555553252a in _dl_map_object () from /lib64/ld-linux-x86-64.so.2
4  0x000015555553d700 in dl_open_worker () from /lib64/ld-linux-x86-64.so.2
5  0x000015555476002d in _dl_catch_exception () from /lib64/libc.so.6
6  0x000015555553d28b in _dl_open () from /lib64/ld-linux-x86-64.so.2
7  0x000015555475f37d in do_dlopen () from /lib64/libc.so.6
8  0x000015555476002d in _dl_catch_exception () from /lib64/libc.so.6
9  0x00001555547600bf in _dl_catch_error () from /lib64/libc.so.6
10 0x000015555475f497 in dlerror_run () from /lib64/libc.so.6
11 0x000015555475f567 in __libc_dlopen_mode () from /lib64/libc.so.6
12 0x0000155555409fbb in pthread_cancel_init () from /lib64/libpthread.so.0
13 0x00001555554062b7 in pthread_cancel () from /lib64/libpthread.so.0
14 0x00000000005b66eb in ofi_uffd_stop (monitor=0x70f3e0 <uffd>) at prov/util/src/util_mem_monitor.c:887
15 0x00000000005b4c25 in ofi_monitors_update (monitors=0x7fffffffe070) at prov/util/src/util_mem_monitor.c:154
16 0x00000000005b5828 in ofi_monitors_del_cache (cache=0x74a428) at prov/util/src/util_mem_monitor.c:485
17 0x00000000005b9956 in ofi_mr_cache_cleanup (cache=0x74a428) at prov/util/src/util_mr_cache.c:502
18 0x00000000005e8cb1 in cxip_iomm_fini (dom=0x746250) at prov/cxi/src/cxip_iomm.c:368
19 0x0000000000617494 in cxip_domain_disable (dom=0x746250) at prov/cxi/src/cxip_dom.c:540
20 0x000000000061755f in cxip_dom_close (fid=0x746250) at prov/cxi/src/cxip_dom.c:568
21 0x000000000054da04 in fi_close (fid=0x746250) at ./include/rdma/fabric.h:632
22 0x0000000000570df9 in av_auth_key_test_rx_ep_fini () at prov/cxi/test/auth_key.c:2495
23 0x0000000000575efa in data_transfer_av_auth_key_av_user_id_source_err_auth_key_user_id_impl () at prov/cxi/test/auth_key.c:2802
24 0x0000155555017f50 in criterion_internal_test_main (fn=0x5751ad <data_transfer_av_auth_key_av_user_id_source_err_auth_key_user_id_impl>) at ../src/core/test.c:94
25 0x00000000005751a5 in data_transfer_av_auth_key_av_user_id_source_err_auth_key_user_id_jmp () at prov/cxi/test/auth_key.c:2754
26 0x0000155555016823 in run_test_child () at ../src/core/runner_coroutine.c:230
27 0x00001555550268b1 in bxfi_main () at ../subprojects/boxfort/src/sandbox.c:57
28 0x000015555463e24d in __libc_start_main () from /lib64/libc.so.6
29 0x0000000000405bea in _start () at ../sysdeps/x86_64/start.S:120

Signed-off-by: Mike Uttormark <[email protected]>
  • Loading branch information
muttormark authored and iziemba committed Oct 23, 2024
1 parent 708610d commit 6d8cba9
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 16 deletions.
1 change: 1 addition & 0 deletions include/ofi_mr.h
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ struct ofi_uffd {
struct ofi_mem_monitor monitor;
pthread_t thread;
int fd;
int exit_pipe[2];
};

extern struct ofi_mem_monitor *uffd_monitor;
Expand Down
94 changes: 78 additions & 16 deletions prov/util/src/util_mem_monitor.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ static struct ofi_uffd uffd = {
.monitor.start = ofi_uffd_start,
.monitor.stop = ofi_uffd_stop,
.monitor.name = "uffd",
.fd = -1,
.exit_pipe = { -1, -1 },
};
struct ofi_mem_monitor *uffd_monitor = &uffd.monitor;

Expand Down Expand Up @@ -594,14 +596,17 @@ static void ofi_uffd_pagefault_handler(struct uffd_msg *msg);
static void *ofi_uffd_handler(void *arg)
{
struct uffd_msg msg;
struct pollfd fds;
struct pollfd fds[2];
int ret;

fds.fd = uffd.fd;
fds.events = POLLIN;
fds[0].fd = uffd.fd;
fds[0].events = POLLIN;
fds[1].fd = uffd.exit_pipe[0];
fds[1].events = POLLIN;

for (;;) {
ret = poll(&fds, 1, -1);
if (ret != 1)
ret = poll(fds, 2, -1);
if (ret < 0 || fds[1].revents)
break;

pthread_rwlock_rdlock(&mm_list_rwlock);
Expand Down Expand Up @@ -832,24 +837,45 @@ static bool ofi_uffd_valid(struct ofi_mem_monitor *monitor,
return true;
}

static void ofi_uffd_close_fd(struct ofi_uffd *monitor)
{
close(monitor->fd);
monitor->fd = -1;
}

static void ofi_uffd_close_pipe(struct ofi_uffd *monitor)
{
close(monitor->exit_pipe[0]);
close(monitor->exit_pipe[1]);
monitor->exit_pipe[0] = -1;
monitor->exit_pipe[1] = -1;
}

static int ofi_uffd_start(struct ofi_mem_monitor *monitor)
{
struct uffdio_api api;
int ret;

uffd.monitor.subscribe = ofi_uffd_subscribe;
uffd.monitor.unsubscribe = ofi_uffd_unsubscribe;
uffd.monitor.valid = ofi_uffd_valid;
if (uffd.fd >= 0)
return 0;

if (!num_page_sizes)
return -FI_ENODATA;

ret = pipe(uffd.exit_pipe);
if (ret) {
FI_WARN(&core_prov, FI_LOG_MR,
"uffd/pipe: %s\n", strerror(errno));
return -errno;
}

uffd.fd = syscall(__NR_userfaultfd,
O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
if (uffd.fd < 0) {
FI_WARN(&core_prov, FI_LOG_MR,
"syscall/userfaultfd %s\n", strerror(errno));
return -errno;
ret = -errno;
goto close_pipe;
}

api.api = UFFD_API;
Expand All @@ -860,34 +886,70 @@ static int ofi_uffd_start(struct ofi_mem_monitor *monitor)
FI_WARN(&core_prov, FI_LOG_MR,
"ioctl/uffdio: %s\n", strerror(errno));
ret = -errno;
goto closefd;
goto close_fd;
}

if (api.api != UFFD_API) {
FI_WARN(&core_prov, FI_LOG_MR, "uffd features not supported\n");
ret = -FI_ENOSYS;
goto closefd;
goto close_fd;
}

ret = pthread_create(&uffd.thread, NULL, ofi_uffd_handler, &uffd);
if (ret) {
FI_WARN(&core_prov, FI_LOG_MR,
"failed to create handler thread %s\n", strerror(ret));
ret = -ret;
goto closefd;
goto close_fd;
}

uffd.monitor.subscribe = ofi_uffd_subscribe;
uffd.monitor.unsubscribe = ofi_uffd_unsubscribe;
uffd.monitor.valid = ofi_uffd_valid;

FI_INFO(&core_prov, FI_LOG_MR,
"Memory monitor uffd started.\n");

return 0;

closefd:
close(uffd.fd);
close_fd:

ofi_uffd_close_fd(&uffd);

close_pipe:

ofi_uffd_close_pipe(&uffd);

FI_WARN(&core_prov, FI_LOG_MR,
"Memory monitor uffd failed to start: %s.\n",
strerror(-ret));

return ret;
}

static void ofi_uffd_stop(struct ofi_mem_monitor *monitor)
{
pthread_cancel(uffd.thread);
ssize_t num_written;

if (uffd.fd < 0)
return;

/* tell the thread to exit with the exit_pipe */

num_written = write(uffd.exit_pipe[1], "X", 1);
if (num_written != 1) {
FI_WARN(&core_prov, FI_LOG_MR,
"uffd/close: unable to write to exit pipe: %s",
strerror(errno));
}

pthread_join(uffd.thread, NULL);
close(uffd.fd);

ofi_uffd_close_fd(&uffd);
ofi_uffd_close_pipe(&uffd);

FI_INFO(&core_prov, FI_LOG_MR,
"Memory monitor uffd stopped.\n");
}

#else /* HAVE_UFFD_MONITOR */
Expand Down

0 comments on commit 6d8cba9

Please sign in to comment.