diff --git a/src/hwloc/hwloc-internal.h b/src/hwloc/hwloc-internal.h index 46fcb7deec..eb238d6f9c 100644 --- a/src/hwloc/hwloc-internal.h +++ b/src/hwloc/hwloc-internal.h @@ -97,6 +97,21 @@ typedef struct { size_t mbs_len; } prte_hwloc_base_memory_segment_t; + +/** + * Struct used to cache topology-level data used + * for repeated lookup - the struct is attached + * to the userdata of the root object of the + * topology + */ +typedef struct { + pmix_object_t super; + bool computed; + unsigned numa_cutoff; +} prte_hwloc_topo_data_t; +PRTE_EXPORT PMIX_CLASS_DECLARATION(prte_hwloc_topo_data_t); + + /* define binding policies */ typedef uint16_t prte_binding_policy_t; #define PRTE_BINDING_POLICY PRTE_UINT16 @@ -226,13 +241,14 @@ PRTE_EXPORT extern prte_hwloc_base_mbfa_t prte_hwloc_base_mbfa; * hwloc_topology_load()). */ PRTE_EXPORT int prte_hwloc_base_get_topology(void); -PRTE_EXPORT hwloc_cpuset_t prte_hwloc_base_setup_summary(hwloc_topology_t topo); /** * Set the hwloc topology to that from the given topo file */ PRTE_EXPORT int prte_hwloc_base_set_topology(char *topofile); +PRTE_EXPORT void prte_hwloc_base_setup_summary(hwloc_topology_t topo); + PRTE_EXPORT hwloc_cpuset_t prte_hwloc_base_generate_cpuset(hwloc_topology_t topo, bool use_hwthread_cpus, char *cpulist); @@ -240,6 +256,13 @@ PRTE_EXPORT hwloc_cpuset_t prte_hwloc_base_filter_cpus(hwloc_topology_t topo); PRTE_EXPORT unsigned int prte_hwloc_base_get_obj_idx(hwloc_topology_t topo, hwloc_obj_t obj); +PRTE_EXPORT unsigned int prte_hwloc_base_get_nbobjs_by_type(hwloc_topology_t topo, + hwloc_obj_type_t target); + +PRTE_EXPORT hwloc_obj_t prte_hwloc_base_get_obj_by_type(hwloc_topology_t topo, + hwloc_obj_type_t target, + unsigned int instance); + /** * Get the number of pu's under a given hwloc object. */ diff --git a/src/hwloc/hwloc.c b/src/hwloc/hwloc.c index be7854e8a2..fe5dd1df67 100644 --- a/src/hwloc/hwloc.c +++ b/src/hwloc/hwloc.c @@ -653,3 +653,13 @@ int prte_hwloc_base_set_binding_policy(void *jdat, char *spec) } return PRTE_SUCCESS; } + +static void topo_data_const(prte_hwloc_topo_data_t *ptr) +{ + ptr->computed = false; + ptr->numa_cutoff = UINT_MAX; +} +PMIX_CLASS_INSTANCE(prte_hwloc_topo_data_t, + pmix_object_t, + topo_data_const, NULL); + diff --git a/src/hwloc/hwloc_base_util.c b/src/hwloc/hwloc_base_util.c index d0ab6c317e..80cd5187f8 100644 --- a/src/hwloc/hwloc_base_util.c +++ b/src/hwloc/hwloc_base_util.c @@ -176,16 +176,89 @@ hwloc_cpuset_t prte_hwloc_base_generate_cpuset(hwloc_topology_t topo, return avail; } -hwloc_cpuset_t prte_hwloc_base_setup_summary(hwloc_topology_t topo) +void prte_hwloc_base_setup_summary(hwloc_topology_t topo) { - hwloc_cpuset_t avail = NULL; + hwloc_obj_t root; + prte_hwloc_topo_data_t *sum; + unsigned width, w, m, N, last; + hwloc_bitmap_t *numas; + hwloc_obj_t obj; - avail = hwloc_bitmap_alloc(); + /* Historically, CPU packages contained a single cpu die + * and nothing else. NUMA was therefore determined by simply + * looking at the memory bus attached to the socket where + * the package resided - all cpus in the package were + * exclusively "under" that NUMA. Since each socket had a + * unique NUMA, you could easily map by them. + + * More recently, packages have started to contain multiple + * cpu dies as well as memory and sometimes even fabric die. + * In these cases, the memory bus of the cpu dies in the + * package generally share any included memory die. This + * complicates the memory situation, leaving NUMA domains + * no longer cleanly delineated by processor (i.e.., the + * NUMA domains overlap each other). + * + * Fortunately, the OS index of non-CPU NUMA domains starts + * at 255 and counts downward (at least for GPUs) - while + * the index of CPU NUMA domains starts at 0 and counts + * upward. We can therefore separate the two by excluding + * NUMA domains with an OS index above the level where + * they first begin to intersect + */ - /* get the root available cpuset */ - hwloc_bitmap_copy(avail, hwloc_topology_get_allowed_cpuset(topo)); + root = hwloc_get_root_obj(topo); + if (NULL == root->userdata) { + root->userdata = (void *) PMIX_NEW(prte_hwloc_topo_data_t); + } + sum = (prte_hwloc_topo_data_t *) root->userdata; - return avail; + /* only need to do this once */ + if (sum->computed) { + return; + } + sum->computed = true; + + /* compute the CPU NUMA cutoff for this topology */ + width = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_NUMANODE); + if (0 == width) { + sum->numa_cutoff = 0; + return; + } + numas = (hwloc_bitmap_t*)malloc(width * sizeof(hwloc_bitmap_t)); + N = 0; + last = 0; + for (w=0; w < UINT_MAX && N < width; w++) { + /* get the object at this index */ + obj = hwloc_get_numanode_obj_by_os_index(topo, w); + if (NULL == obj) { + continue; + } + /* check for overlap with all preceding numas */ + for (m=0; m < N; m++) { + if (hwloc_bitmap_intersects(obj->cpuset, numas[m])) { + // if it intersects anyone, then we are done + sum->numa_cutoff = last+1; + break; + } + } + if (UINT_MAX != sum->numa_cutoff) { + break; + } else { + last = w; + /* cache this bitmap */ + numas[N] = hwloc_bitmap_alloc(); + hwloc_bitmap_copy(numas[N], obj->cpuset); + ++N; + } + } + if (UINT_MAX == sum->numa_cutoff) { + sum->numa_cutoff = last + 1; + } + for (m=0; m < N; m++) { + hwloc_bitmap_free(numas[m]); + } + free(numas); } /* determine the node-level available cpuset based on @@ -199,12 +272,15 @@ hwloc_cpuset_t prte_hwloc_base_filter_cpus(hwloc_topology_t topo) if (NULL == prte_hwloc_default_cpu_list) { PMIX_OUTPUT_VERBOSE((5, prte_hwloc_base_output, "hwloc:base: no cpus specified - using root available cpuset")); - avail = prte_hwloc_base_setup_summary(topo); + avail = hwloc_bitmap_alloc(); + hwloc_bitmap_copy(avail, hwloc_topology_get_allowed_cpuset(topo)); + } else { PMIX_OUTPUT_VERBOSE((5, prte_hwloc_base_output, "hwloc:base: filtering cpuset")); avail = prte_hwloc_base_generate_cpuset(topo, prte_hwloc_default_use_hwthread_cpus, prte_hwloc_default_cpu_list); } + return avail; } @@ -280,6 +356,8 @@ int prte_hwloc_base_get_topology(void) line size */ fill_cache_line_size(); + // create the summary + prte_hwloc_base_setup_summary(prte_hwloc_topology); return PRTE_SUCCESS; } @@ -453,7 +531,7 @@ unsigned int prte_hwloc_base_get_obj_idx(hwloc_topology_t topo, hwloc_obj_t obj) PMIX_OUTPUT_VERBOSE((5, prte_hwloc_base_output, "hwloc:base:get_idx")); - nobjs = hwloc_get_nbobjs_by_type(topo, obj->type); + nobjs = prte_hwloc_base_get_nbobjs_by_type(topo, obj->type); PMIX_OUTPUT_VERBOSE((5, prte_hwloc_base_output, "hwloc:base:get_idx found %u objects of type %s", nobjs, @@ -461,7 +539,7 @@ unsigned int prte_hwloc_base_get_obj_idx(hwloc_topology_t topo, hwloc_obj_t obj) /* find this object */ for (i = 0; i < nobjs; i++) { - ptr = hwloc_get_obj_by_type(topo, obj->type, i); + ptr = prte_hwloc_base_get_obj_by_type(topo, obj->type, i); if (ptr == obj) { return i; } @@ -472,6 +550,73 @@ unsigned int prte_hwloc_base_get_obj_idx(hwloc_topology_t topo, hwloc_obj_t obj) return UINT_MAX; } +unsigned int prte_hwloc_base_get_nbobjs_by_type(hwloc_topology_t topo, + hwloc_obj_type_t target) +{ + unsigned w, rc; + hwloc_obj_t obj, root; + prte_hwloc_topo_data_t *sum; + + /* if the type is NUMA, then we need to only count the + * CPU NUMAs and ignore the GPU NUMAs as we only deal + * with CPUs at this time */ + if (HWLOC_OBJ_NUMANODE == target) { + + root = hwloc_get_root_obj(topo); + sum = (prte_hwloc_topo_data_t *) root->userdata; + if (NULL == sum) { + return 0; + } + + rc = 0; + for (w=0; w < sum->numa_cutoff; w++) { + obj = hwloc_get_numanode_obj_by_os_index(topo, w); + if (NULL != obj) { + ++rc; + } + } + return rc; + } + rc = hwloc_get_nbobjs_by_type(topo, target); + if (UINT_MAX == rc) { + pmix_output(0, "UNKNOWN HWLOC ERROR"); + return 0; + } + return rc; +} + +hwloc_obj_t prte_hwloc_base_get_obj_by_type(hwloc_topology_t topo, + hwloc_obj_type_t target, + unsigned int instance) +{ + unsigned w, cnt; + hwloc_obj_t obj, root; + prte_hwloc_topo_data_t *sum; + + /* if we are looking for NUMA, then ignore all the + * GPU NUMAs */ + if (HWLOC_OBJ_NUMANODE == target) { + root = hwloc_get_root_obj(topo); + sum = (prte_hwloc_topo_data_t *) root->userdata; + if (NULL == sum) { + return NULL; + } + + cnt = 0; + for (w=0; w < sum->numa_cutoff; w++) { + obj = hwloc_get_numanode_obj_by_os_index(topo, w); + if (NULL != obj) { + if (cnt == instance) { + return obj; + } + ++cnt; + } + } + return NULL; + } + return hwloc_get_obj_by_type(topo, target, instance); +} + /* The current slot_list notation only goes to the core level - i.e., the location * is specified as package:core. Thus, the code below assumes that all locations * are to be parsed under that notation. @@ -500,7 +645,7 @@ static int package_to_cpu_set(char *cpus, hwloc_topology_t topo, hwloc_bitmap_t switch (range_cnt) { case 1: /* no range was present, so just one package given */ package_id = atoi(range[0]); - obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PACKAGE, package_id); + obj = prte_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_PACKAGE, package_id); /* get the available cpus for this package */ hwloc_bitmap_or(cpumask, cpumask, obj->cpuset); break; @@ -510,7 +655,7 @@ static int package_to_cpu_set(char *cpus, hwloc_topology_t topo, hwloc_bitmap_t upper_range = atoi(range[1]); /* cycle across the range of packages */ for (package_id = lower_range; package_id <= upper_range; package_id++) { - obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PACKAGE, package_id); + obj = prte_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_PACKAGE, package_id); /* set the available cpus for this package bits in the bitmask */ hwloc_bitmap_or(cpumask, cpumask, obj->cpuset); } @@ -542,7 +687,7 @@ static int package_core_to_cpu_set(char *package_core_list, hwloc_topology_t top package_id = atoi(package_core[0]); /* get the object for this package id */ - package = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PACKAGE, package_id); + package = prte_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_PACKAGE, package_id); if (NULL == package) { PMIX_ARGV_FREE_COMPAT(package_core); return PRTE_ERR_NOT_FOUND; @@ -552,7 +697,7 @@ static int package_core_to_cpu_set(char *package_core_list, hwloc_topology_t top * to find cores on all platforms. Adjust the type here if * required */ - if (NULL == hwloc_get_obj_by_type(topo, HWLOC_OBJ_CORE, 0)) { + if (NULL == prte_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_CORE, 0)) { obj_type = HWLOC_OBJ_PU; hwthreadcpus = true; } @@ -582,7 +727,7 @@ static int package_core_to_cpu_set(char *package_core_list, hwloc_topology_t top /* get the indexed core from this package */ core_id = atoi(list[j]) + npus; /* get that object */ - core = hwloc_get_obj_by_type(topo, obj_type, core_id); + core = prte_hwloc_base_get_obj_by_type(topo, obj_type, core_id); if (NULL == core) { rc = PRTE_ERR_NOT_FOUND; break; @@ -602,7 +747,7 @@ static int package_core_to_cpu_set(char *package_core_list, hwloc_topology_t top /* get the indexed core from this package */ core_id = j + npus; /* get that object */ - core = hwloc_get_obj_by_type(topo, obj_type, core_id); + core = prte_hwloc_base_get_obj_by_type(topo, obj_type, core_id); if (NULL == core) { rc = PRTE_ERR_NOT_FOUND; break; @@ -1159,10 +1304,10 @@ void prte_hwloc_get_binding_info(hwloc_const_cpuset_t cpuset, hwloc_bitmap_free(avail); /* get the number of packages in the topology */ - npkgs = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PACKAGE); + npkgs = prte_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_PACKAGE); avail = hwloc_bitmap_alloc(); - npus = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PU); - ncores = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_CORE); + npus = prte_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_PU); + ncores = prte_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_CORE); if (npus == ncores && !use_hwthread_cpus) { /* the bits in this bitmap represent cores */ @@ -1174,7 +1319,7 @@ void prte_hwloc_get_binding_info(hwloc_const_cpuset_t cpuset, /* binding happens within a package and not across packages */ for (n = 0; n < npkgs; n++) { - pkg = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PACKAGE, n); + pkg = prte_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_PACKAGE, n); /* see if we have any here */ hwloc_bitmap_and(avail, cpuset, pkg->cpuset); @@ -1232,11 +1377,11 @@ char *prte_hwloc_base_cset2str(hwloc_const_cpuset_t cpuset, hwloc_bitmap_free(avail); /* get the number of packages in the topology */ - npkgs = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PACKAGE); + npkgs = prte_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_PACKAGE); avail = hwloc_bitmap_alloc(); - npus = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PU); - ncores = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_CORE); + npus = prte_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_PU); + ncores = prte_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_CORE); if (npus == ncores && !use_hwthread_cpus) { /* the bits in this bitmap represent cores */ bits_as_cores = true; @@ -1246,7 +1391,7 @@ char *prte_hwloc_base_cset2str(hwloc_const_cpuset_t cpuset, } for (n = 0; n < npkgs; n++) { - pkg = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PACKAGE, n); + pkg = prte_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_PACKAGE, n); /* see if we have any here */ hwloc_bitmap_and(avail, cpuset, pkg->cpuset); if (hwloc_bitmap_iszero(avail)) { @@ -1290,13 +1435,13 @@ char *prte_hwloc_base_get_topo_signature(hwloc_topology_t topo) unsigned i; hwloc_bitmap_t complete, allowed; - nnuma = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_NUMANODE); - npackage = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PACKAGE); - nl3 = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_L3CACHE); - nl2 = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_L2CACHE); - nl1 = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_L1CACHE); - ncore = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_CORE); - nhwt = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PU); + nnuma = prte_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_NUMANODE); + npackage = prte_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_PACKAGE); + nl3 = prte_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_L3CACHE); + nl2 = prte_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_L2CACHE); + nl1 = prte_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_L1CACHE); + ncore = prte_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_CORE); + nhwt = prte_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_PU); /* get the root object so we can add the processor architecture */ obj = hwloc_get_root_obj(topo); diff --git a/src/mca/plm/base/plm_base_launch_support.c b/src/mca/plm/base/plm_base_launch_support.c index 9ce9ef0414..7aa3d226a7 100644 --- a/src/mca/plm/base/plm_base_launch_support.c +++ b/src/mca/plm/base/plm_base_launch_support.c @@ -84,28 +84,28 @@ void prte_plm_base_set_slots(prte_node_t *node) { if (0 == strncmp(prte_set_slots, "cores", strlen(prte_set_slots))) { if (NULL != node->topology && NULL != node->topology->topo) { - node->slots = hwloc_get_nbobjs_by_type(node->topology->topo, + node->slots = prte_hwloc_base_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_CORE); } } else if (0 == strncmp(prte_set_slots, "sockets", strlen(prte_set_slots))) { if (NULL != node->topology && NULL != node->topology->topo) { - node->slots = hwloc_get_nbobjs_by_type(node->topology->topo, + node->slots = prte_hwloc_base_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_SOCKET); if (0 == node->slots) { /* some systems don't report sockets - in this case, * use numanodes */ - node->slots = hwloc_get_nbobjs_by_type(node->topology->topo, + node->slots = prte_hwloc_base_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_NUMANODE); } } } else if (0 == strncmp(prte_set_slots, "numas", strlen(prte_set_slots))) { if (NULL != node->topology && NULL != node->topology->topo) { - node->slots = hwloc_get_nbobjs_by_type(node->topology->topo, + node->slots = prte_hwloc_base_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_NUMANODE); } } else if (0 == strncmp(prte_set_slots, "hwthreads", strlen(prte_set_slots))) { if (NULL != node->topology && NULL != node->topology->topo) { - node->slots = hwloc_get_nbobjs_by_type(node->topology->topo, + node->slots = prte_hwloc_base_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_PU); } } else { @@ -1224,6 +1224,7 @@ void prte_plm_base_daemon_topology(int status, pmix_proc_t *sender, } /* Apply any CPU filters (not preserved by the XML) */ daemon->node->available = prte_hwloc_base_filter_cpus(topo); + prte_hwloc_base_setup_summary(topo); /* process any cached daemons that match this signature */ PMIX_LIST_FOREACH_SAFE(dptr, dnxt, &prte_plm_globals.daemon_cache, prte_proc_t) { @@ -1626,6 +1627,7 @@ void prte_plm_base_daemon_callback(int status, pmix_proc_t *sender, pmix_data_bu hwloc_bitmap_free(daemon->node->available); } daemon->node->available = prte_hwloc_base_filter_cpus(t->topo); + prte_hwloc_base_setup_summary(t->topo); free(sig); break; } @@ -1642,6 +1644,7 @@ void prte_plm_base_daemon_callback(int status, pmix_proc_t *sender, pmix_data_bu if (0 == strcmp(dptr->node->topology->sig, sig)) { dptr->node->topology = t; dptr->node->available = prte_hwloc_base_filter_cpus(topo); + prte_hwloc_base_setup_summary(topo); jdatorted->num_reported++; } else { /* see if this topology has already been requested */ @@ -1704,6 +1707,7 @@ void prte_plm_base_daemon_callback(int status, pmix_proc_t *sender, pmix_data_bu hwloc_bitmap_free(daemon->node->available); } daemon->node->available = prte_hwloc_base_filter_cpus(t->topo); + prte_hwloc_base_setup_summary(t->topo); } } if (!prte_plm_globals.daemon1_has_reported) { diff --git a/src/mca/ras/base/ras_base_allocate.c b/src/mca/ras/base/ras_base_allocate.c index fa750d6935..2f83caa3d7 100644 --- a/src/mca/ras/base/ras_base_allocate.c +++ b/src/mca/ras/base/ras_base_allocate.c @@ -206,8 +206,8 @@ static void display_cpus(prte_topology_t *t, return; } - npus = hwloc_get_nbobjs_by_type(t->topo, HWLOC_OBJ_PU); - ncores = hwloc_get_nbobjs_by_type(t->topo, HWLOC_OBJ_CORE); + npus = prte_hwloc_base_get_nbobjs_by_type(t->topo, HWLOC_OBJ_PU); + ncores = prte_hwloc_base_get_nbobjs_by_type(t->topo, HWLOC_OBJ_CORE); if (npus == ncores && !use_hwthread_cpus) { /* the bits in this bitmap represent cores */ bits_as_cores = true; @@ -224,10 +224,10 @@ static void display_cpus(prte_topology_t *t, pmix_output(prte_clean_output, "\n====================== AVAILABLE PROCESSORS [node: %s] ======================\n\n", node); } - npkgs = hwloc_get_nbobjs_by_type(t->topo, HWLOC_OBJ_PACKAGE); + npkgs = prte_hwloc_base_get_nbobjs_by_type(t->topo, HWLOC_OBJ_PACKAGE); allowed = (hwloc_cpuset_t)hwloc_topology_get_allowed_cpuset(t->topo); for (pkg = 0; pkg < npkgs; pkg++) { - obj = hwloc_get_obj_by_type(t->topo, HWLOC_OBJ_PACKAGE, pkg); + obj = prte_hwloc_base_get_obj_by_type(t->topo, HWLOC_OBJ_PACKAGE, pkg); hwloc_bitmap_and(avail, obj->cpuset, allowed); if (hwloc_bitmap_iszero(avail)) { if (parsable) { diff --git a/src/mca/rmaps/base/rmaps_base_binding.c b/src/mca/rmaps/base/rmaps_base_binding.c index c86b23151c..0a3ac5efdc 100644 --- a/src/mca/rmaps/base/rmaps_base_binding.c +++ b/src/mca/rmaps/base/rmaps_base_binding.c @@ -79,7 +79,7 @@ static int bind_generic(prte_job_t *jdata, prte_proc_t *proc, tgtcpus = target->cpuset; hwloc_bitmap_and(prte_rmaps_base.baseset, options->target, tgtcpus); - nobjs = hwloc_get_nbobjs_by_type(node->topology->topo, options->hwb); + nobjs = prte_hwloc_base_get_nbobjs_by_type(node->topology->topo, options->hwb); // check for target object existence if (0 == nobjs) { @@ -94,7 +94,7 @@ static int bind_generic(prte_job_t *jdata, prte_proc_t *proc, } for (n=0; n < nobjs; n++) { - tmp_obj = hwloc_get_obj_by_type(node->topology->topo, options->hwb, n); + tmp_obj = prte_hwloc_base_get_obj_by_type(node->topology->topo, options->hwb, n); tmpcpus = tmp_obj->cpuset; hwloc_bitmap_and(prte_rmaps_base.available, node->available, tmpcpus); hwloc_bitmap_and(prte_rmaps_base.available, prte_rmaps_base.available, prte_rmaps_base.baseset); @@ -224,10 +224,10 @@ static int bind_to_cpuset(prte_job_t *jdata, /* sanity check - are all the target cpus in a single * package, or do they span packages? */ - npkgs = hwloc_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_PACKAGE); + npkgs = prte_hwloc_base_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_PACKAGE); included = false; for (n=0; n < npkgs; n++) { - pkg = hwloc_get_obj_by_type(node->topology->topo, HWLOC_OBJ_PACKAGE, n); + pkg = prte_hwloc_base_get_obj_by_type(node->topology->topo, HWLOC_OBJ_PACKAGE, n); rc = hwloc_bitmap_isincluded(tset, pkg->cpuset); if (1 == rc) { included = true; @@ -304,9 +304,9 @@ static int bind_multiple(prte_job_t *jdata, prte_proc_t *proc, * packages, so we need to ensure we set the * available processors to cover whichever package * has enough CPUs to fill the request */ - npkgs = hwloc_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_PACKAGE); + npkgs = prte_hwloc_base_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_PACKAGE); for (n=0; n < npkgs; n++) { - pkg = hwloc_get_obj_by_type(node->topology->topo, HWLOC_OBJ_PACKAGE, n); + pkg = prte_hwloc_base_get_obj_by_type(node->topology->topo, HWLOC_OBJ_PACKAGE, n); hwloc_bitmap_and(prte_rmaps_base.available, prte_rmaps_base.baseset, pkg->cpuset); hwloc_bitmap_and(prte_rmaps_base.available, prte_rmaps_base.available, node->available); ncpus = hwloc_get_nbobjs_inside_cpuset_by_type(node->topology->topo, prte_rmaps_base.available, type); diff --git a/src/mca/rmaps/base/rmaps_base_frame.c b/src/mca/rmaps/base/rmaps_base_frame.c index 48384211d4..4f8a6acd98 100644 --- a/src/mca/rmaps/base/rmaps_base_frame.c +++ b/src/mca/rmaps/base/rmaps_base_frame.c @@ -408,11 +408,11 @@ int prte_rmaps_base_set_default_mapping(prte_job_t *jdata, } } else { /* if package is available, map by that */ - if (NULL != hwloc_get_obj_by_type(prte_hwloc_topology, HWLOC_OBJ_PACKAGE, 0)) { + if (NULL != prte_hwloc_base_get_obj_by_type(prte_hwloc_topology, HWLOC_OBJ_PACKAGE, 0)) { pmix_output_verbose(5, prte_rmaps_base_framework.framework_output, "mca:rmaps mapping not set by user - using bypackage"); PRTE_SET_MAPPING_POLICY(jdata->map->mapping, PRTE_MAPPING_BYPACKAGE); - } else if (NULL != hwloc_get_obj_by_type(prte_hwloc_topology, HWLOC_OBJ_NUMANODE, 0)) { + } else if (NULL != prte_hwloc_base_get_obj_by_type(prte_hwloc_topology, HWLOC_OBJ_NUMANODE, 0)) { /* if NUMA is available, map by that */ pmix_output_verbose(5, prte_rmaps_base_framework.framework_output, "mca:rmaps mapping not set by user - using bynuma"); diff --git a/src/mca/rmaps/base/rmaps_base_map_job.c b/src/mca/rmaps/base/rmaps_base_map_job.c index 1dd998e566..93dd97291a 100644 --- a/src/mca/rmaps/base/rmaps_base_map_job.c +++ b/src/mca/rmaps/base/rmaps_base_map_job.c @@ -449,13 +449,13 @@ void prte_rmaps_base_map_job(int fd, short args, void *cbdata) } else if (HWLOC_OBJ_PACKAGE == options.maptype) { /* add in #packages for each node */ PMIX_LIST_FOREACH (node, &nodes, prte_node_t) { - app->num_procs += options.pprn * hwloc_get_nbobjs_by_type(node->topology->topo, + app->num_procs += options.pprn * prte_hwloc_base_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_PACKAGE); } } else if (HWLOC_OBJ_NUMANODE== options.maptype) { /* add in #numa for each node */ PMIX_LIST_FOREACH (node, &nodes, prte_node_t) { - app->num_procs += options.pprn * hwloc_get_nbobjs_by_type(node->topology->topo, + app->num_procs += options.pprn * prte_hwloc_base_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_NUMANODE); } } else if (HWLOC_OBJ_L1CACHE == options.maptype || @@ -463,19 +463,19 @@ void prte_rmaps_base_map_job(int fd, short args, void *cbdata) HWLOC_OBJ_L1CACHE == options.maptype) { /* add in #cache for each node */ PMIX_LIST_FOREACH (node, &nodes, prte_node_t) { - app->num_procs += options.pprn * hwloc_get_nbobjs_by_type(node->topology->topo, + app->num_procs += options.pprn * prte_hwloc_base_get_nbobjs_by_type(node->topology->topo, options.maptype); } } else if (HWLOC_OBJ_CORE == options.maptype) { /* add in #cores for each node */ PMIX_LIST_FOREACH (node, &nodes, prte_node_t) { - app->num_procs += options.pprn * hwloc_get_nbobjs_by_type(node->topology->topo, + app->num_procs += options.pprn * prte_hwloc_base_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_CORE); } } else if (HWLOC_OBJ_PU == options.maptype) { /* add in #hwt for each node */ PMIX_LIST_FOREACH (node, &nodes, prte_node_t) { - app->num_procs += options.pprn * hwloc_get_nbobjs_by_type(node->topology->topo, + app->num_procs += options.pprn * prte_hwloc_base_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_PU); } } diff --git a/src/mca/rmaps/base/rmaps_base_ranking.c b/src/mca/rmaps/base/rmaps_base_ranking.c index a1f1a23adc..8f4c630c78 100644 --- a/src/mca/rmaps/base/rmaps_base_ranking.c +++ b/src/mca/rmaps/base/rmaps_base_ranking.c @@ -196,10 +196,10 @@ int prte_rmaps_base_compute_vpids(prte_job_t *jdata, continue; } lrank = 0; - nobjs = hwloc_get_nbobjs_by_type(node->topology->topo, + nobjs = prte_hwloc_base_get_nbobjs_by_type(node->topology->topo, options->maptype); for (k=0; k < nobjs; k++) { - obj = hwloc_get_obj_by_type(node->topology->topo, + obj = prte_hwloc_base_get_obj_by_type(node->topology->topo, options->maptype, k); for (m=0; m < node->procs->size; m++) { proc = (prte_proc_t*)pmix_pointer_array_get_item(node->procs, m); @@ -247,13 +247,13 @@ int prte_rmaps_base_compute_vpids(prte_job_t *jdata, if (NULL == node) { continue; } - nobjs = hwloc_get_nbobjs_by_type(node->topology->topo, + nobjs = prte_hwloc_base_get_nbobjs_by_type(node->topology->topo, options->maptype); lrank = pass * nobjs; /* make a pass across all objects on this node */ for (k=0; k < nobjs && rank < jdata->num_procs; k++) { /* get this object */ - obj = hwloc_get_obj_by_type(node->topology->topo, + obj = prte_hwloc_base_get_obj_by_type(node->topology->topo, options->maptype, k); /* find an unranked proc on this object */ for (m=0; m < node->procs->size && rank < jdata->num_procs; m++) { diff --git a/src/mca/rmaps/ppr/rmaps_ppr.c b/src/mca/rmaps/ppr/rmaps_ppr.c index 92c8e4c0f0..8f51636f0a 100644 --- a/src/mca/rmaps/ppr/rmaps_ppr.c +++ b/src/mca/rmaps/ppr/rmaps_ppr.c @@ -236,7 +236,7 @@ static int ppr_mapper(prte_job_t *jdata, } } else { /* get the number of resources on this node */ - nobjs = hwloc_get_nbobjs_by_type(node->topology->topo, + nobjs = prte_hwloc_base_get_nbobjs_by_type(node->topology->topo, options->maptype); if (0 == nobjs) { continue; @@ -256,7 +256,7 @@ static int ppr_mapper(prte_job_t *jdata, } /* map the specified number of procs to each such resource on this node */ for (i = 0; i < nobjs && nprocs_mapped < app->num_procs; i++) { - obj = hwloc_get_obj_by_type(node->topology->topo, + obj = prte_hwloc_base_get_obj_by_type(node->topology->topo, options->maptype, i); if (!prte_rmaps_base_check_avail(jdata, app, node, &node_list, obj, options)) { continue; diff --git a/src/mca/rmaps/round_robin/rmaps_rr_mappers.c b/src/mca/rmaps/round_robin/rmaps_rr_mappers.c index e60e597d91..239e81da86 100644 --- a/src/mca/rmaps/round_robin/rmaps_rr_mappers.c +++ b/src/mca/rmaps/round_robin/rmaps_rr_mappers.c @@ -627,7 +627,7 @@ int prte_rmaps_rr_byobj(prte_job_t *jdata, prte_app_context_t *app, /* have to delay checking for availability until we have the object */ /* get the number of objects of this type on this node */ - nobjs = hwloc_get_nbobjs_by_type(node->topology->topo, + nobjs = prte_hwloc_base_get_nbobjs_by_type(node->topology->topo, options->maptype); if (0 == nobjs) { /* this node doesn't have any objects of this type, so @@ -647,7 +647,7 @@ int prte_rmaps_rr_byobj(prte_job_t *jdata, prte_app_context_t *app, pmix_output_verbose(10, prte_rmaps_base_framework.framework_output, "mca:rmaps:rr: assigning proc to object %d", j); /* get the hwloc object */ - obj = hwloc_get_obj_by_type(node->topology->topo, + obj = prte_hwloc_base_get_obj_by_type(node->topology->topo, options->maptype, j); if (NULL == obj) { /* out of objects on this node */ diff --git a/src/runtime/data_type_support/prte_dt_print_fns.c b/src/runtime/data_type_support/prte_dt_print_fns.c index 3783e29ae4..087bc51e7c 100644 --- a/src/runtime/data_type_support/prte_dt_print_fns.c +++ b/src/runtime/data_type_support/prte_dt_print_fns.c @@ -58,8 +58,8 @@ static void display_cpus(prte_topology_t *t, char *tmp1, *tmp2; - npus = hwloc_get_nbobjs_by_type(t->topo, HWLOC_OBJ_PU); - ncores = hwloc_get_nbobjs_by_type(t->topo, HWLOC_OBJ_CORE); + npus = prte_hwloc_base_get_nbobjs_by_type(t->topo, HWLOC_OBJ_PU); + ncores = prte_hwloc_base_get_nbobjs_by_type(t->topo, HWLOC_OBJ_CORE); if (npus == ncores && !use_hwthread_cpus) { /* the bits in this bitmap represent cores */ bits_as_cores = true; @@ -70,10 +70,10 @@ static void display_cpus(prte_topology_t *t, } avail = hwloc_bitmap_alloc(); pmix_asprintf(&tmp1, " \n"); - npkgs = hwloc_get_nbobjs_by_type(t->topo, HWLOC_OBJ_PACKAGE); + npkgs = prte_hwloc_base_get_nbobjs_by_type(t->topo, HWLOC_OBJ_PACKAGE); allowed = (hwloc_cpuset_t)hwloc_topology_get_allowed_cpuset(t->topo); for (pkg = 0; pkg < npkgs; pkg++) { - obj = hwloc_get_obj_by_type(t->topo, HWLOC_OBJ_PACKAGE, pkg); + obj = prte_hwloc_base_get_obj_by_type(t->topo, HWLOC_OBJ_PACKAGE, pkg); hwloc_bitmap_and(avail, obj->cpuset, allowed); if (hwloc_bitmap_iszero(avail)) { pmix_asprintf(&tmp2, "%s \n", tmp1, pkg, "NONE"); @@ -352,7 +352,7 @@ void prte_proc_print(char **output, prte_job_t *jdata, prte_proc_t *src) mycpus = hwloc_bitmap_alloc(); hwloc_bitmap_list_sscanf(mycpus, src->cpuset); - npus = hwloc_get_nbobjs_by_type(src->node->topology->topo, HWLOC_OBJ_PU); + npus = prte_hwloc_base_get_nbobjs_by_type(src->node->topology->topo, HWLOC_OBJ_PU); /* assuming each "core" xml element will take 20 characters. There could be at most npus such elements */ int sz = sizeof(char) * npus * 20; cores = (char*)malloc(sz); diff --git a/src/runtime/prte_globals.c b/src/runtime/prte_globals.c index 85e0c937f9..9e42b6f8d4 100644 --- a/src/runtime/prte_globals.c +++ b/src/runtime/prte_globals.c @@ -911,6 +911,12 @@ static void tcon(prte_topology_t *t) } static void tdes(prte_topology_t *t) { + hwloc_obj_t root; + + root = hwloc_get_root_obj(t->topo); + if (NULL != root->userdata) { + PMIX_RELEASE(root->userdata); + } if (NULL != t->topo) { hwloc_topology_destroy(t->topo); }