Skip to content

Commit

Permalink
pals: make eventlog timeout configurable
Browse files Browse the repository at this point in the history
Problem: sometimes 10 seconds is not enough of a timeout for reading
from the eventlog if lustre is hanging.

Make the timeout configurable.
  • Loading branch information
jameshcorbett committed Jan 25, 2025
1 parent 37a697f commit 8ffd9ca
Showing 1 changed file with 11 additions and 4 deletions.
15 changes: 11 additions & 4 deletions src/shell/plugins/cray_pals.c
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,8 @@ static int create_apinfo (const char *apinfo_path, flux_shell_t *shell)
static int read_future (flux_future_t *fut,
char *buf,
size_t bufsize,
json_int_t *random)
json_int_t *random,
double timeout)
{
json_t *o = NULL;
json_t *context = NULL;
Expand All @@ -501,7 +502,7 @@ static int read_future (flux_future_t *fut,
json_int_t portnum;
int bytes_written;

while (flux_future_wait_for (fut, 10.0) == 0
while (flux_future_wait_for (fut, timeout) == 0
&& flux_job_event_watch_get (fut, &event) == 0) {
if (!(o = eventlog_entry_decode (event))) {
shell_log_errno ("Error decoding eventlog entry");
Expand Down Expand Up @@ -576,13 +577,19 @@ static int get_pals_ports (flux_shell_t *shell, json_int_t jobid)
flux_future_t *fut = NULL;
int rc;
json_int_t random;
double timeout = 10.0;

if (!(h = flux_shell_get_flux (shell))
|| !(fut = flux_job_event_watch (h, (flux_jobid_t)jobid, "eventlog", 0))) {
shell_log_error ("Error creating event_watch future");
return -1;
}
if ((rc = read_future (fut, buf, sizeof (buf), &random)) < 0)
if (flux_shell_getopt_unpack (shell,
"cray-pals",
"{s?F}",
"timeout",
&timeout) < 0
|| (rc = read_future (fut, buf, sizeof (buf), &random, timeout)) < 0)
shell_log_error ("Error reading ports from eventlog");
flux_future_destroy (fut);

Expand Down Expand Up @@ -802,7 +809,7 @@ int flux_plugin_init (flux_plugin_t *p)

shell_debug ("enabled");

// If -o cray-pals.no-edit-env is was speciifed set a flag for later
// If -o cray-pals.no-edit-env is was specified set a flag for later
no_edit_env = 0;
(void)flux_shell_getopt_unpack (shell,
"cray-pals",
Expand Down

0 comments on commit 8ffd9ca

Please sign in to comment.