Skip to content

Commit

Permalink
spawn-server-nofork: invalid magic (netdata#18831)
Browse files Browse the repository at this point in the history
* debug code to trace the issue

* detect the libsystemd socket and keep it open when forking

* fix the path

* evaluate 2k fds

* increase to 1k pad buffer

* detect it in a different way

* cleanup

* use the same journal direct socket detection

* removed debugging paddding
  • Loading branch information
ktsaou authored Oct 21, 2024
1 parent ac76c5f commit fc1fb32
Show file tree
Hide file tree
Showing 6 changed files with 122 additions and 66 deletions.
28 changes: 11 additions & 17 deletions src/libnetdata/log/nd_log-init.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ void nd_log_initialize_for_external_plugins(const char *name) {
program_name = name;

for(size_t i = 0; i < _NDLS_MAX ;i++) {
nd_log.sources[i].method = STDERR_FILENO;
nd_log.sources[i].method = NDLM_DEFAULT;
nd_log.sources[i].fd = -1;
nd_log.sources[i].fp = NULL;
}
Expand Down Expand Up @@ -123,12 +123,10 @@ void nd_log_initialize_for_external_plugins(const char *name) {
break;
}

for(size_t i = 0; i < _NDLS_MAX ;i++) {
nd_log.sources[i].method = method;
nd_log.sources[i].format = format;
nd_log.sources[i].fd = -1;
nd_log.sources[i].fp = NULL;
}
nd_log.sources[NDLS_COLLECTORS].method = method;
nd_log.sources[NDLS_COLLECTORS].format = format;
nd_log.sources[NDLS_COLLECTORS].fd = -1;
nd_log.sources[NDLS_COLLECTORS].fp = NULL;

// nd_log(NDLS_COLLECTORS, NDLP_NOTICE, "FINAL_LOG_METHOD: %s", nd_log_id2method(method));
}
Expand Down Expand Up @@ -271,6 +269,10 @@ void nd_log_reopen_log_files(bool log) {
netdata_log_info("Log files re-opened.");
}

int nd_log_systemd_journal_fd(void) {
return nd_log.journal.fd;
}

void nd_log_reopen_log_files_for_spawn_server(const char *name) {
gettid_uncached();

Expand All @@ -284,13 +286,11 @@ void nd_log_reopen_log_files_for_spawn_server(const char *name) {
close(nd_log.journal_direct.fd);
nd_log.journal_direct.fd = -1;
nd_log.journal_direct.initialized = false;
nd_log_journal_direct_init(NULL);
}

for(size_t i = 0; i < _NDLS_MAX ;i++) {
if(i != NDLS_COLLECTORS && i != NDLS_DAEMON) continue;

spinlock_init(&nd_log.sources[i].spinlock);
nd_log.sources[i].method = NDLM_DEFAULT;
nd_log.sources[i].fd = -1;
nd_log.sources[i].fp = NULL;
nd_log.sources[i].pending_msg = NULL;
Expand All @@ -299,16 +299,10 @@ void nd_log_reopen_log_files_for_spawn_server(const char *name) {
#endif
}

for(size_t i = 0; i < _NDLS_MAX ;i++) {
if(i == NDLS_COLLECTORS || i == NDLS_DAEMON) continue;
nd_log.sources[i].method = NDLM_DISABLED;
}

// initialize spinlocks
spinlock_init(&nd_log.std_output.spinlock);
spinlock_init(&nd_log.std_error.spinlock);

nd_log.journal.initialized = false;
nd_log.journal_direct.initialized = false;
nd_log.syslog.initialized = false;
nd_log.eventlog.initialized = false;
nd_log.std_output.initialized = false;
Expand Down
2 changes: 2 additions & 0 deletions src/libnetdata/log/nd_log-internals.c
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,8 @@ struct nd_log nd_log = {
.overwrite_process_source = 0,
.journal = {
.initialized = false,
.first_msg = false,
.fd = -1,
},
.journal_direct = {
.initialized = false,
Expand Down
4 changes: 3 additions & 1 deletion src/libnetdata/log/nd_log-internals.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,12 +133,14 @@ struct nd_log {

struct {
bool initialized;
bool first_msg;
int fd; // we don't control this, we just detect it to keep it open
} journal;

struct {
bool initialized;
int fd;
char filename[FILENAME_MAX + 1];
char filename[FILENAME_MAX];
} journal_direct;

struct {
Expand Down
78 changes: 51 additions & 27 deletions src/libnetdata/log/nd_log-to-systemd-journal.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,36 @@ bool nd_log_journal_systemd_init(void) {
return nd_log.journal.initialized;
}

bool nd_log_journal_socket_available(void) {
if(netdata_configured_host_prefix && *netdata_configured_host_prefix) {
char filename[FILENAME_MAX + 1];
static int nd_log_journal_direct_fd_find_and_open(char *filename, size_t size) {
int fd;

snprintfz(filename, sizeof(filename), "%s%s",
netdata_configured_host_prefix, "/run/systemd/journal/socket");
if(netdata_configured_host_prefix && *netdata_configured_host_prefix) {
journal_construct_path(filename, size, netdata_configured_host_prefix, "netdata");
if (is_path_unix_socket(filename) && (fd = journal_direct_fd(filename)) != -1)
return fd;

if(is_path_unix_socket(filename))
return true;
journal_construct_path(filename, size, netdata_configured_host_prefix, NULL);
if (is_path_unix_socket(filename) && (fd = journal_direct_fd(filename)) != -1)
return fd;
}

return is_path_unix_socket("/run/systemd/journal/socket");
journal_construct_path(filename, size, NULL, "netdata");
if (is_path_unix_socket(filename) && (fd = journal_direct_fd(filename)) != -1)
return fd;

journal_construct_path(filename, size, NULL, NULL);
if (is_path_unix_socket(filename) && (fd = journal_direct_fd(filename)) != -1)
return fd;

return -1;
}

bool nd_log_journal_socket_available(void) {
char filename[FILENAME_MAX];
int fd = nd_log_journal_direct_fd_find_and_open(filename, sizeof(filename));
if(fd == -1) return false;
close(fd);
return true;
}

static void nd_log_journal_direct_set_env(void) {
Expand All @@ -38,25 +56,9 @@ bool nd_log_journal_direct_init(const char *path) {
}

int fd;
char filename[FILENAME_MAX + 1];
if(!is_path_unix_socket(path)) {

journal_construct_path(filename, sizeof(filename), netdata_configured_host_prefix, "netdata");
if (!is_path_unix_socket(filename) || (fd = journal_direct_fd(filename)) == -1) {

journal_construct_path(filename, sizeof(filename), netdata_configured_host_prefix, NULL);
if (!is_path_unix_socket(filename) || (fd = journal_direct_fd(filename)) == -1) {

journal_construct_path(filename, sizeof(filename), NULL, "netdata");
if (!is_path_unix_socket(filename) || (fd = journal_direct_fd(filename)) == -1) {

journal_construct_path(filename, sizeof(filename), NULL, NULL);
if (!is_path_unix_socket(filename) || (fd = journal_direct_fd(filename)) == -1)
return false;
}
}
}
}
char filename[FILENAME_MAX];
if(!is_path_unix_socket(path))
fd = nd_log_journal_direct_fd_find_and_open(filename, sizeof(filename));
else {
snprintfz(filename, sizeof(filename), "%s", path);
fd = journal_direct_fd(filename);
Expand All @@ -74,6 +76,8 @@ bool nd_log_journal_direct_init(const char *path) {
return true;
}

static bool sockets_before[1024];

bool nd_logger_journal_libsystemd(struct log_field *fields __maybe_unused, size_t fields_max __maybe_unused) {
#ifdef HAVE_SYSTEMD

Expand Down Expand Up @@ -154,8 +158,28 @@ bool nd_logger_journal_libsystemd(struct log_field *fields __maybe_unused, size_
}
}

bool detect_systemd_socket = __atomic_load_n(&nd_log.journal.first_msg, __ATOMIC_RELAXED) == false;
if(detect_systemd_socket) {
for(int i = 3 ; (size_t)i < _countof(sockets_before); i++)
sockets_before[i] = fd_is_socket(i);
}

int r = sd_journal_sendv(iov, iov_count);

if(r == 0 && detect_systemd_socket) {
__atomic_store_n(&nd_log.journal.first_msg, true, __ATOMIC_RELAXED);

// this is the first successful libsystemd log
// let's detect its fd number (we need it for the spawn server)

for(int i = 3 ; (size_t)i < _countof(sockets_before); i++) {
if (!sockets_before[i] && fd_is_socket(i)) {
nd_log.journal.fd = i;
break;
}
}
}

// Clean up allocated memory
for (int i = 0; i < iov_count; i++) {
if (iov[i].iov_base != NULL) {
Expand Down
1 change: 1 addition & 0 deletions src/libnetdata/log/nd_log.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ extern "C" {
#define ND_LOG_DEFAULT_THROTTLE_PERIOD 60

void errno_clear(void);
int nd_log_systemd_journal_fd(void);
void nd_log_set_user_settings(ND_LOG_SOURCES source, const char *setting);
void nd_log_set_facility(const char *facility);
void nd_log_set_priority_level(const char *setting);
Expand Down
75 changes: 54 additions & 21 deletions src/libnetdata/spawn_server/spawn_server_nofork.c
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,13 @@ static bool spawn_external_command(SPAWN_SERVER *server __maybe_unused, SPAWN_RE
return false;
}

os_close_all_non_std_open_fds_except(rq->fds, 3, CLOSE_RANGE_CLOEXEC);
int fds_to_keep[] = {
rq->fds[0],
rq->fds[1],
rq->fds[2],
nd_log_systemd_journal_fd(),
};
os_close_all_non_std_open_fds_except(fds_to_keep, _countof(fds_to_keep), CLOSE_RANGE_CLOEXEC);

errno_clear();
if (posix_spawn(&rq->pid, rq->argv[0], &file_actions, &attr, (char * const *)rq->argv, (char * const *)rq->envp) != 0) {
Expand Down Expand Up @@ -325,7 +331,14 @@ static bool spawn_server_run_callback(SPAWN_SERVER *server __maybe_unused, SPAWN
os_setproctitle("spawn-callback", server->argc, server->argv);

// close all open file descriptors of the parent, but keep ours
os_close_all_non_std_open_fds_except(rq->fds, 4, 0);
int fds_to_keep[] = {
rq->fds[0],
rq->fds[1],
rq->fds[2],
rq->fds[3],
nd_log_systemd_journal_fd(),
};
os_close_all_non_std_open_fds_except(fds_to_keep, _countof(fds_to_keep), 0);
nd_log_reopen_log_files_for_spawn_server("spawn-callback");

// get the fds from the request
Expand Down Expand Up @@ -1076,7 +1089,12 @@ SPAWN_SERVER* spawn_server_create(SPAWN_SERVER_OPTIONS options, const char *name
os_setproctitle(buf, server->argc, server->argv);

replace_stdio_with_dev_null();
os_close_all_non_std_open_fds_except((int[]){ server->sock, server->pipe[1] }, 2, 0);
int fds_to_keep[] = {
server->sock,
server->pipe[1],
nd_log_systemd_journal_fd(),
};
os_close_all_non_std_open_fds_except(fds_to_keep, _countof(fds_to_keep), 0);
nd_log_reopen_log_files_for_spawn_server(buf);
exit(spawn_server_event_loop(server));
}
Expand Down Expand Up @@ -1125,6 +1143,21 @@ void spawn_server_exec_destroy(SPAWN_INSTANCE *instance) {
freez(instance);
}

static void log_invalid_magic(SPAWN_INSTANCE *instance, struct status_report *sr) {
unsigned char buf[sizeof(*sr) + 1];
memcpy(buf, sr, sizeof(*sr));
buf[sizeof(buf) - 1] = '\0';

for(size_t i = 0; i < sizeof(buf) - 1; i++) {
if (iscntrl(buf[i]) || !isprint(buf[i]))
buf[i] = '_';
}

nd_log(NDLS_COLLECTORS, NDLP_ERR,
"SPAWN PARENT: invalid final status report for child %d, request %zu (invalid magic %#x in response, reads like '%s')",
instance->child_pid, instance->request_id, sr->magic, buf);
}

int spawn_server_exec_wait(SPAWN_SERVER *server __maybe_unused, SPAWN_INSTANCE *instance) {
int rc = -1;

Expand All @@ -1139,24 +1172,24 @@ int spawn_server_exec_wait(SPAWN_SERVER *server __maybe_unused, SPAWN_INSTANCE *
"SPAWN PARENT: failed to read final status report for child %d, request %zu",
instance->child_pid, instance->request_id);

else if(sr.magic != STATUS_REPORT_MAGIC) {
nd_log(NDLS_COLLECTORS, NDLP_ERR,
"SPAWN PARENT: invalid final status report for child %d, request %zu (invalid magic %#x in response)",
instance->child_pid, instance->request_id, sr.magic);
}
else switch(sr.status) {
case STATUS_REPORT_EXITED:
rc = sr.exited.waitpid_status;
break;

case STATUS_REPORT_STARTED:
case STATUS_REPORT_FAILED:
default:
errno = 0;
nd_log(NDLS_COLLECTORS, NDLP_ERR,
"SPAWN PARENT: invalid status report to exec spawn request %zu for pid %d (status = %u)",
instance->request_id, instance->child_pid, sr.status);
break;
else if(sr.magic != STATUS_REPORT_MAGIC)
log_invalid_magic(instance, &sr);
else {
switch (sr.status) {
case STATUS_REPORT_EXITED:
rc = sr.exited.waitpid_status;
break;

case STATUS_REPORT_STARTED:
case STATUS_REPORT_FAILED:
default:
errno = 0;
nd_log(
NDLS_COLLECTORS, NDLP_ERR,
"SPAWN PARENT: invalid status report to exec spawn request %zu for pid %d (status = %u)",
instance->request_id, instance->child_pid, sr.status);
break;
}
}

instance->child_pid = 0;
Expand Down

0 comments on commit fc1fb32

Please sign in to comment.