From 13153322385c5612924803ceb1e8015c8f1584be Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sat, 18 May 2024 10:08:37 -0600 Subject: [PATCH] Check the runtime version of PMIx It has been reported (and confirmed) that building against one version of PMIx and then running with another version will cause PRRTE to segfault. This isn't a universal rule. For example, one can switch v5.0 and master without a problem. However, switching v5.0 and v4.2 is a definite segfault. The root cause of the problem is a change in the layout of the base pmix_object_t definition. This renders all PMIx objects binary incompatible when crossing between the v5 and v4 (and below) series. Changing the v5 definition back to match v4 is an overly complex task. The changes were required to accommodate the new shared memory support that was introduced in v5. So instead, we check the runtime version of PMIx against the build version. If the runtime version is incompatible with the build version, then we print an explanatory error message and error out. Signed-off-by: Ralph Castain bot:notacherrypick dd Signed-off-by: Ralph Castain --- src/runtime/prte_init.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/runtime/prte_init.c b/src/runtime/prte_init.c index 994b0cb0df..e417f4b98c 100644 --- a/src/runtime/prte_init.c +++ b/src/runtime/prte_init.c @@ -38,6 +38,9 @@ #ifdef HAVE_SYS_STAT_H # include #endif +#ifdef HAVE_STRING_H +#include +#endif #include "src/util/error.h" #include "src/util/error_strings.h" @@ -127,16 +130,53 @@ static bool check_exist(char *path) return false; } +static void print_error(unsigned major, + unsigned minor, + unsigned release) +{ + fprintf(stderr, "************************************************\n"); + fprintf(stderr, "We have detected that the runtime version\n"); + fprintf(stderr, "of the PMIx library we were given is binary\n"); + fprintf(stderr, "incompatible with the version we were built against:\n\n"); + fprintf(stderr, " Runtime: 0x%x%02x%02x\n", major, minor, release); + fprintf(stderr, " Build: 0x%0x\n\n", PMIX_NUMERIC_VERSION); + fprintf(stderr, "Please update your LD_LIBRARY_PATH to point\n"); + fprintf(stderr, "us to the same PMIx version used to build PRRTE.\n"); + fprintf(stderr, "************************************************\n"); +} + int prte_init_minimum(void) { int ret; char *path = NULL; + const char *rvers; + char token[100]; + unsigned int major, minor, release; if (min_initialized) { return PRTE_SUCCESS; } min_initialized = true; + /* check to see if the version of PMIx we were given in the + * library path matches the version we were built against. + * Because we are using PMIx internals, we cannot support + * cross version operations from inside of PRRTE. + */ + rvers = PMIx_Get_version(); + ret = sscanf(rvers, "%s %u.%u.%u", token, &major, &minor, &release); + + /* check the version triplet - we know that version + * 5 and above are not runtime compatible with version + * 4 and below. Since PRRTE has a minimum PMIx requirement + * in the v4.x series, we only need to check v4 vs 5 + * and above */ + if ((PMIX_VERSION_MAJOR > 4 && 4 == major) || + (PMIX_VERSION_MAJOR == 4 && 5 <= major)) { + print_error(major, minor, release); + return PRTE_ERR_SILENT; + } + /* carry across the toolname */ pmix_tool_basename = prte_tool_basename;