-
Notifications
You must be signed in to change notification settings - Fork 50
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
job-list: support "hostlist" constraint to allow jobs to be filtered by nodes #5656
Changes from all commits
e31b66a
a8baf01
f1de8d0
68165f8
fc2aece
d381bf3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -51,6 +51,8 @@ | |
MATCH_LESS_THAN = 4, | ||
} match_comparison_t; | ||
|
||
#define MIN_MATCH_HOSTLIST 1024 | ||
|
||
struct timestamp_value { | ||
double t_value; | ||
match_timestamp_type_t t_type; | ||
|
@@ -428,6 +430,91 @@ | |
errp); | ||
} | ||
|
||
static int match_hostlist (struct list_constraint *c, | ||
const struct job *job, | ||
unsigned int *comparisons, | ||
flux_error_t *errp) | ||
{ | ||
struct hostlist *hl = zlistx_first (c->values); | ||
const char *host; | ||
|
||
/* nodelist may not exist if job never ran */ | ||
if (!job->nodelist) | ||
return 0; | ||
if (!job->nodelist_hl) { | ||
/* hack to remove const */ | ||
struct job *jobtmp = (struct job *)job; | ||
if (!(jobtmp->nodelist_hl = hostlist_decode (job->nodelist))) | ||
return 0; | ||
} | ||
host = hostlist_first (hl); | ||
while (host) { | ||
if (inc_check_comparison (c->mctx, comparisons, errp) < 0) | ||
return -1; | ||
if (hostlist_find (job->nodelist_hl, host) >= 0) | ||
return 1; | ||
host = hostlist_next (hl); | ||
} | ||
return 0; | ||
} | ||
|
||
/* zlistx_set_destructor */ | ||
static void wrap_hostlist_destroy (void **item) | ||
{ | ||
if (item) { | ||
struct hostlist *hl = *item; | ||
hostlist_destroy (hl); | ||
(*item) = NULL; | ||
} | ||
} | ||
|
||
static struct list_constraint *create_hostlist_constraint ( | ||
struct match_ctx *mctx, | ||
json_t *values, | ||
flux_error_t *errp) | ||
{ | ||
struct list_constraint *c; | ||
struct hostlist *hl = NULL; | ||
json_t *entry; | ||
size_t index; | ||
|
||
if (!(c = list_constraint_new (mctx, | ||
match_hostlist, | ||
wrap_hostlist_destroy, | ||
errp))) | ||
return NULL; | ||
/* Create a single hostlist if user specifies multiple nodes or | ||
* RFC29 hostlist range */ | ||
if (!(hl = hostlist_create ())) { | ||
errprintf (errp, "failed to create hostlist structure"); | ||
goto error; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
} | ||
json_array_foreach (values, index, entry) { | ||
if (!json_is_string (entry)) { | ||
errprintf (errp, "host value must be a string"); | ||
goto error; | ||
} | ||
if (hostlist_append (hl, json_string_value (entry)) <= 0) { | ||
errprintf (errp, "host value not in valid Hostlist format"); | ||
goto error; | ||
} | ||
} | ||
if (hostlist_count (hl) > mctx->max_hostlist) { | ||
errprintf (errp, "too many hosts specified"); | ||
goto error; | ||
} | ||
if (!zlistx_add_end (c->values, hl)) { | ||
errprintf (errp, "failed to append hostlist structure"); | ||
hostlist_destroy (hl); | ||
goto error; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
} | ||
return c; | ||
error: | ||
hostlist_destroy (hl); | ||
list_constraint_destroy (c); | ||
return NULL; | ||
} | ||
|
||
static int match_timestamp (struct list_constraint *c, | ||
const struct job *job, | ||
unsigned int *comparisons, | ||
|
@@ -665,6 +752,8 @@ | |
return create_states_constraint (mctx, values, errp); | ||
else if (streq (op, "results")) | ||
return create_results_constraint (mctx, values, errp); | ||
else if (streq (op, "hostlist")) | ||
return create_hostlist_constraint (mctx, values, errp); | ||
else if (streq (op, "t_submit") | ||
|| streq (op, "t_depend") | ||
|| streq (op, "t_run") | ||
|
@@ -743,6 +832,30 @@ | |
goto error; | ||
} | ||
|
||
if (flux_get_size (mctx->h, &mctx->max_hostlist) < 0) { | ||
flux_log_error (h, "failed to get instance size"); | ||
goto error; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think you mean I forgot a flux_log in this case, but correct!! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, heh. Yeah I must have been blindly checking |
||
} | ||
|
||
/* Notes: | ||
* | ||
* We do not want a hostlist constraint match to DoS this module. | ||
* So we want to configure a "max" amount of hosts that can exist | ||
* within a hostlist constraint. | ||
* | ||
* Under normal operating conditions, the number of brokers should | ||
* represent the most likely maximum. But there are some corner | ||
* cases. For example, the instance gets reconfigured to be | ||
* smaller, which is not an uncommon thing to do towards a | ||
* cluster's end of life and hardware is beginning to die. | ||
* | ||
* So we configure the following compromise. If the number of | ||
* brokers is below our defined minimum MIN_MATCH_HOSTLIST, we'll | ||
* allow max_hostlist to be increased to this number. | ||
*/ | ||
if (mctx->max_hostlist < MIN_MATCH_HOSTLIST) | ||
mctx->max_hostlist = MIN_MATCH_HOSTLIST; | ||
|
||
return mctx; | ||
|
||
error: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
errp->text
not set here.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i think this is ok, errp is set in the call to
list_constraint_new()
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh yeah, but looks like it is missing in the
EINVAL
path.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, nevermind, I think I was looking at the wrong function in the diff.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nevermind, I think I was looking at the wrong function in the diff.