Skip to content

Commit

Permalink
Modified PR runtime to use hostnames instead of host IDs to keep trac…
Browse files Browse the repository at this point in the history
…k of SMP (#357)

nodes.

Co-authored-by: Bruce J Palmer <[email protected]>
  • Loading branch information
edoapra and Bruce J Palmer authored Nov 7, 2024
1 parent b2c538e commit 6bc1d51
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 31 deletions.
36 changes: 18 additions & 18 deletions comex/src-mpi-pr/comex.c
Original file line number Diff line number Diff line change
Expand Up @@ -2722,8 +2722,8 @@ int comex_malloc_mem_dev(void *ptrs[], size_t size, comex_group_t group,
reg_entries_local[reg_entries_local_count++] = reg_entries[i];
}
}
else if (g_state.hostid[reg_entries[i].rank]
== g_state.hostid[my_world_rank]) {
else if (!strcmp(g_state.host[reg_entries[i].rank].name,
g_state.host[my_world_rank].name)) {
/* same SMP node, need to mmap */
/* open remote shared memory object */
void *memory = _shm_attach_memdev(reg_entries[i].name,
Expand Down Expand Up @@ -2880,7 +2880,7 @@ void _malloc_semaphore()
if (g_state.rank == i) {
continue; /* skip my own rank */
}
else if (g_state.hostid[g_state.rank] == g_state.hostid[i]) {
else if (!strcmp(g_state.host[g_state.rank].name,g_state.host[i].name)) {
/* same SMP node */
#if ENABLE_UNNAMED_SEM
semaphores[i] = _shm_attach(
Expand Down Expand Up @@ -2949,7 +2949,7 @@ void _free_semaphore()
}
#endif
}
else if (g_state.hostid[g_state.rank] == g_state.hostid[i]) {
else if (!strcmp(g_state.host[g_state.rank].name,g_state.host[i].name)) {
/* same SMP node */
#if ENABLE_UNNAMED_SEM
retval = munmap(semaphores[i], sizeof(sem_t));
Expand Down Expand Up @@ -4648,8 +4648,8 @@ STATIC void _malloc_handler(
fprintf(stderr, "[%d] _malloc_handler found NULL at %d\n", g_state.rank, i);
#endif
}
else if (g_state.hostid[reg_entries[i].rank]
== g_state.hostid[g_state.rank]) {
else if (!strcmp(g_state.host[reg_entries[i].rank].name,
g_state.host[g_state.rank].name)) {
/* same SMP node, need to mmap */
/* attach to remote shared memory object */
void *memory;
Expand Down Expand Up @@ -4741,8 +4741,8 @@ STATIC void _free_handler(header_t *header, char *payload, int proc)
fprintf(stderr, "[%d] _free_handler found NULL at %d\n", g_state.rank, i);
#endif
}
else if (g_state.hostid[rank_ptrs[i].rank]
== g_state.hostid[g_state.rank]) {
else if (!strcmp(g_state.host[rank_ptrs[i].rank].name,
g_state.host[g_state.rank].name)) {
/* same SMP node */
reg_entry_t *reg_entry = NULL;
int retval = 0;
Expand Down Expand Up @@ -4926,7 +4926,7 @@ STATIC int _smallest_world_rank_with_same_hostid(comex_igroup_t *igroup)
int *world_ranks = _get_world_ranks(igroup);

for (i=0; i<igroup->size; ++i) {
if (g_state.hostid[world_ranks[i]] == g_state.hostid[g_state.rank]) {
if (!strcmp(g_state.host[world_ranks[i]].name,g_state.host[g_state.rank].name)) {
/* found same host as me */
if (world_ranks[i] < smallest) {
smallest = world_ranks[i];
Expand All @@ -4949,7 +4949,7 @@ STATIC int _largest_world_rank_with_same_hostid(comex_igroup_t *igroup)
int *world_ranks = _get_world_ranks(igroup);

for (i=0; i<igroup->size; ++i) {
if (g_state.hostid[world_ranks[i]] == g_state.hostid[g_state.rank]) {
if (!strcmp(g_state.host[world_ranks[i]].name,g_state.host[g_state.rank].name)) {
/* found same host as me */
if (world_ranks[i] > largest) {
largest = world_ranks[i];
Expand Down Expand Up @@ -6365,7 +6365,7 @@ STATIC void nb_puts(
if (COMEX_ENABLE_PUT_DATATYPE
&& (!COMEX_ENABLE_PUT_SELF || g_state.rank != proc)
&& (!COMEX_ENABLE_PUT_SMP
|| g_state.hostid[proc] != g_state.hostid[g_state.rank])
|| strcmp(g_state.host[proc].name,g_state.host[g_state.rank].name))
&& (_packed_size(src_stride, count, stride_levels) > COMEX_PUT_DATATYPE_THRESHOLD)) {
nb_puts_datatype(src, src_stride, dst, dst_stride, count, stride_levels, proc, nb);
return;
Expand All @@ -6375,7 +6375,7 @@ STATIC void nb_puts(
if (COMEX_ENABLE_PUT_PACKED
&& (!COMEX_ENABLE_PUT_SELF || g_state.rank != proc)
&& (!COMEX_ENABLE_PUT_SMP
|| g_state.hostid[proc] != g_state.hostid[g_state.rank])) {
|| strcmp(g_state.host[proc].name,g_state.host[g_state.rank].name))) {
nb_puts_packed(src, src_stride, dst, dst_stride, count, stride_levels, proc, nb);
return;
}
Expand Down Expand Up @@ -6634,7 +6634,7 @@ STATIC void nb_gets(
if (COMEX_ENABLE_GET_DATATYPE
&& (!COMEX_ENABLE_GET_SELF || g_state.rank != proc)
&& (!COMEX_ENABLE_GET_SMP
|| g_state.hostid[proc] != g_state.hostid[g_state.rank])
|| strcmp(g_state.host[proc].name,g_state.host[g_state.rank].name))
&& (_packed_size(src_stride, count, stride_levels) > COMEX_GET_DATATYPE_THRESHOLD)) {
nb_gets_datatype(src, src_stride, dst, dst_stride, count, stride_levels, proc, nb);
return;
Expand All @@ -6644,7 +6644,7 @@ STATIC void nb_gets(
if (COMEX_ENABLE_GET_PACKED
&& (!COMEX_ENABLE_GET_SELF || g_state.rank != proc)
&& (!COMEX_ENABLE_GET_SMP
|| g_state.hostid[proc] != g_state.hostid[g_state.rank])) {
|| strcmp(g_state.host[proc].name,g_state.host[g_state.rank].name))) {
nb_gets_packed(src, src_stride, dst, dst_stride, count, stride_levels, proc, nb);
return;
}
Expand Down Expand Up @@ -6910,7 +6910,7 @@ STATIC void nb_accs(
if (COMEX_ENABLE_ACC_PACKED
&& (!COMEX_ENABLE_ACC_SELF || g_state.rank != proc)
&& (!COMEX_ENABLE_ACC_SMP
|| g_state.hostid[proc] != g_state.hostid[g_state.rank])) {
|| strcmp(g_state.host[proc].name,g_state.host[g_state.rank].name))) {
nb_accs_packed(datatype, scale, src, src_stride, dst, dst_stride, count, stride_levels, proc, nb);
return;
}
Expand Down Expand Up @@ -7120,7 +7120,7 @@ STATIC void nb_putv(
if (COMEX_ENABLE_PUT_IOV
&& (!COMEX_ENABLE_PUT_SELF || g_state.rank != proc)
&& (!COMEX_ENABLE_PUT_SMP
|| g_state.hostid[proc] != g_state.hostid[g_state.rank])) {
|| strcmp(g_state.host[proc].name,g_state.host[g_state.rank].name))) {
nb_putv_packed(&iov[i], proc, nb);
}
else {
Expand Down Expand Up @@ -7224,7 +7224,7 @@ STATIC void nb_getv(
if (COMEX_ENABLE_GET_IOV
&& (!COMEX_ENABLE_GET_SELF || g_state.rank != proc)
&& (!COMEX_ENABLE_GET_SMP
|| g_state.hostid[proc] != g_state.hostid[g_state.rank])) {
|| strcmp(g_state.host[proc].name,g_state.host[g_state.rank].name))) {
nb_getv_packed(&iov[i], proc, nb);
}
else {
Expand Down Expand Up @@ -7336,7 +7336,7 @@ STATIC void nb_accv(
if (COMEX_ENABLE_ACC_IOV
&& (!COMEX_ENABLE_ACC_SELF || g_state.rank != proc)
&& (!COMEX_ENABLE_ACC_SMP
|| g_state.hostid[proc] != g_state.hostid[g_state.rank])) {
|| strcmp(g_state.host[proc].name,g_state.host[g_state.rank].name))) {
nb_accv_packed(datatype, scale, &iov[i], proc, nb);
}
else {
Expand Down
45 changes: 33 additions & 12 deletions comex/src-mpi-pr/groups.c
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,27 @@ static int cmplong(const void *p1, const void *p2)
return *((long*)p1) - *((long*)p2);
}

static int cmpname(const void *name1, const void *name2)
{
const char* n1 = (const char*)name1;
const char* n2 = (const char*)name2;
int comp = 0;
int i;
for (i=0; i<COMEX_MAX_HOST_NAME_LEN; i++) {
if ((int)n1[i] < (int)n2[i]) {
comp = -1;
break;
} else if ((int)n1[i] > (int)n2[i]) {
comp = 1;
break;
} else if (n1[i] == '\0' || n2[i] == '\0') {
break;
}
}
return comp;
}


/**
* Initialize group linked list. Prepopulate with world group.
*/
Expand All @@ -393,7 +414,7 @@ void comex_group_init(MPI_Comm comm)
int size_node = 0;
comex_group_t group = 0;
comex_igroup_t *igroup = NULL;
long *sorted = NULL;
host_name_t *sorted = NULL;
int count = 0;

/* populate g_state */
Expand All @@ -420,10 +441,10 @@ void comex_group_init(MPI_Comm comm)
#endif

/* need to figure out which proc is master on each node */
g_state.hostid = (long*)malloc(sizeof(long)*g_state.size);
g_state.hostid[g_state.rank] = xgethostid();
status = MPI_Allgather(MPI_IN_PLACE, 1, MPI_LONG,
g_state.hostid, 1, MPI_LONG, g_state.comm);
g_state.host = (host_name_t*)malloc(sizeof(host_name_t)*g_state.size);
gethostname(g_state.host[g_state.rank].name,COMEX_MAX_HOST_NAME_LEN);
status = MPI_Allgather(MPI_IN_PLACE, sizeof(host_name_t), MPI_BYTE,
g_state.host, sizeof(host_name_t), MPI_BYTE, g_state.comm);
COMEX_ASSERT(MPI_SUCCESS == status);
/* First create a temporary node communicator and then
* split further into number of groups within the node */
Expand All @@ -432,17 +453,17 @@ void comex_group_init(MPI_Comm comm)
/* create node comm */
/* MPI_Comm_split requires a non-negative color,
* so sort and sanitize */
sorted = (long*)malloc(sizeof(long) * g_state.size);
(void)memcpy(sorted, g_state.hostid, sizeof(long)*g_state.size);
qsort(sorted, g_state.size, sizeof(long), cmplong);
sorted = (long*)malloc(sizeof(host_name_t) * g_state.size);
(void)memcpy(sorted, g_state.host, sizeof(host_name_t)*g_state.size);
qsort(sorted, g_state.size, sizeof(host_name_t), cmpname);
/* count is number of distinct host IDs that are lower than
* the host ID of this rank */
for (i=0; i<g_state.size-1; ++i) {
if (sorted[i] == g_state.hostid[g_state.rank])
if (!strcmp(sorted[i].name,g_state.host[g_state.rank].name))
{
break;
}
if (sorted[i] != sorted[i+1]) {
if (strcmp(sorted[i].name,sorted[i+1].name)) {
count += 1;
}
}
Expand All @@ -463,7 +484,7 @@ void comex_group_init(MPI_Comm comm)
smallest_rank_with_same_hostid = g_state.rank;
largest_rank_with_same_hostid = g_state.rank;
for (i=0; i<g_state.size; ++i) {
if (g_state.hostid[i] == g_state.hostid[g_state.rank]) {
if (!strcmp(g_state.host[i].name,g_state.host[g_state.rank].name)) {
++size_node;
if (i < smallest_rank_with_same_hostid) {
smallest_rank_with_same_hostid = i;
Expand Down Expand Up @@ -605,7 +626,7 @@ void comex_group_finalize()
}

free(g_state.master);
free(g_state.hostid);
free(g_state.host);
status = MPI_Comm_free(&(g_state.node_comm));
COMEX_ASSERT(MPI_SUCCESS == status);
status = MPI_Group_free(&(g_state.group));
Expand Down
8 changes: 7 additions & 1 deletion comex/src-mpi-pr/groups.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,23 @@
#ifndef _COMEX_GROUPS_H_
#define _COMEX_GROUPS_H_

#define COMEX_MAX_HOST_NAME_LEN 256

#include <mpi.h>

#include "comex.h"

typedef struct {
char name[COMEX_MAX_HOST_NAME_LEN];
} host_name_t;

typedef struct {
MPI_Comm comm; /**< whole comm; all ranks */
MPI_Group group;/**< whole group; all ranks */
int size; /**< comm size */
int rank; /**< comm rank */
int *master; /**< master[size] rank of a given rank's master */
long *hostid; /**< hostid[size] hostid of SMP node for a given rank */
host_name_t *host; /**< host[size] host name of SMP node for a given rank */
MPI_Comm node_comm; /**< node comm; SMP ranks */
int node_size; /**< node comm size */
int node_rank; /**< node comm rank */
Expand Down

0 comments on commit 6bc1d51

Please sign in to comment.