Skip to content

Commit

Permalink
Map Exchange to set up communication for MDLCSR (#22)
Browse files Browse the repository at this point in the history
* add mapping exchange test

* remove map exchange test

* mapping exchange implementation and test

* increase memory and heap size

* change main memory size from 16 GB back to 8 GB

* fix cmake test script path

* fix cmake test script path for pando-rt

* remove comment
  • Loading branch information
ywwu928 authored Apr 9, 2024
1 parent 2a33ec2 commit 9fbe68a
Show file tree
Hide file tree
Showing 8 changed files with 206 additions and 94 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,11 @@ jobs:
echo "IMAGE_VERSION=$(git log --pretty="%h" -1 Dockerfile.dev)" >> $GITHUB_ENV
if [ ${{ matrix.build-type }} == 'LSAN' ]; then
echo "PANDO_BUILD_DOCS=OFF" >> $GITHUB_ENV
echo "PANDO_CONTAINER_ENV=-e=PANDO_PREP_L1SP_HART=16384 -ePANDO_PREP_MAIN_NODE=8589934592" >> $GITHUB_ENV
echo "PANDO_CONTAINER_ENV=-e=PANDO_PREP_L1SP_HART=16384" >> $GITHUB_ENV
fi
if [ ${{ matrix.build-type }} == 'UBSAN' ]; then
echo "PANDO_BUILD_DOCS=OFF" >> $GITHUB_ENV
echo "PANDO_CONTAINER_ENV=-e=PANDO_PREP_L1SP_HART=16384 -ePANDO_PREP_MAIN_NODE=8589934592" >> $GITHUB_ENV
echo "PANDO_CONTAINER_ENV=-e=PANDO_PREP_L1SP_HART=16384" >> $GITHUB_ENV
fi
if [ ${{ matrix.build-type }} == 'Release' ]; then
echo "PANDO_BUILD_DOCS=OFF" >> $GITHUB_ENV
Expand Down
8 changes: 4 additions & 4 deletions cmake/PANDOTesting.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ function(pando_add_driver_test_lib TARGET SOURCEFILE LIBRARY)
else ()
set(HTHREADS "")
if (${GASNet_CONDUIT} STREQUAL "smp")
set(DRIVER_SCRIPT ${pando-lib-galois_SOURCE_DIR}/pando-rt/scripts/preprun.sh)
set(DRIVER_SCRIPT ${pando-lib-galois_SOURCE_DIR}/scripts/preprun.sh)
elseif (${GASNet_CONDUIT} STREQUAL "mpi")
set(DRIVER_SCRIPT ${pando-lib-galois_SOURCE_DIR}/pando-rt/scripts/preprun_mpi.sh)
set(DRIVER_SCRIPT ${pando-lib-galois_SOURCE_DIR}/scripts/preprun_mpi.sh)
else ()
message(FATAL_ERROR "No runner script for GASNet conduit ${GASNet_CONDUIT}")
endif ()
Expand Down Expand Up @@ -124,9 +124,9 @@ endfunction()
function(pando_add_bin_test TARGET ARGS INPUTFILE OKFILE)
if (NOT PANDO_RT_BACKEND STREQUAL "DRVX")
if (${GASNet_CONDUIT} STREQUAL "smp")
set(DRIVER_SCRIPT ${pando-lib-galois_SOURCE_DIR}/pando-rt/scripts/preprun.sh)
set(DRIVER_SCRIPT ${pando-lib-galois_SOURCE_DIR}/scripts/preprun.sh)
elseif (${GASNet_CONDUIT} STREQUAL "mpi")
set(DRIVER_SCRIPT ${pando-lib-galois_SOURCE_DIR}/pando-rt/scripts/preprun_mpi.sh)
set(DRIVER_SCRIPT ${pando-lib-galois_SOURCE_DIR}/scripts/preprun_mpi.sh)
else ()
message(FATAL_ERROR "No runner script for GASNet conduit ${GASNet_CONDUIT}")
endif ()
Expand Down
10 changes: 7 additions & 3 deletions include/pando-lib-galois/graphs/dist_local_csr.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -487,10 +487,14 @@ class DistLocalCSR {
}

/** Host Information **/
std::uint64_t getPhysicalHostID(VertexTokenID tid) {
std::uint64_t getVirtualHostID(VertexTokenID tid) {
std::uint64_t virtualHostID = tid % this->numVHosts();
std::uint64_t physicalHost = fmap(virtualToPhysicalMap.getLocalRef(), get, virtualHostID);
return physicalHost;
return virtualHostID;
}
std::uint64_t getPhysicalHostID(VertexTokenID tid) {
std::uint64_t virtualHostID = this->getVirtualHostID(tid);
std::uint64_t physicalHostID = fmap(virtualToPhysicalMap.getLocalRef(), get, virtualHostID);
return physicalHostID;
}

/** Topology Modifications **/
Expand Down
115 changes: 66 additions & 49 deletions include/pando-lib-galois/graphs/mirror_dist_local_csr.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

#include <utility>

#include "pando-rt/sync/mutex.hpp"
#include <pando-lib-galois/containers/hashtable.hpp>
#include <pando-lib-galois/containers/host_indexed_map.hpp>
#include <pando-lib-galois/containers/host_local_storage.hpp>
Expand All @@ -17,7 +16,10 @@
#include <pando-lib-galois/graphs/local_csr.hpp>
#include <pando-lib-galois/import/wmd_graph_importer.hpp>
#include <pando-lib-galois/loops/do_all.hpp>
#include <pando-lib-galois/sync/global_barrier.hpp>
#include <pando-lib-galois/sync/simple_lock.hpp>
#include <pando-lib-galois/utility/gptr_monad.hpp>
#include <pando-lib-galois/utility/tuple.hpp>
#include <pando-rt/containers/array.hpp>
#include <pando-rt/containers/vector.hpp>
#include <pando-rt/memory/memory_guard.hpp>
Expand Down Expand Up @@ -132,6 +134,10 @@ class MirrorDistLocalCSR {
VertexTopologyID getMaster() {
return master;
}

bool operator==(const MirrorToMasterMap& a) noexcept {
return a.mirror == mirror && a.master == master;
}
};

/** Vertex Manipulation **/
Expand Down Expand Up @@ -239,19 +245,13 @@ class MirrorDistLocalCSR {
}

/** Host Information **/
std::uint64_t getVirtualHostID(VertexTokenID tid) {
return dlcsr.getVirtualHostID(tid);
}
std::uint64_t getPhysicalHostID(VertexTokenID tid) {
return dlcsr.getPhysicalHostID(tid);
}

/** Sync **/
// TODO(Ying-Wei):
// write a sync function that reduces mirror values and then broadcasts master values
// return a bitmap of modified vertices
//
// template <typename Func>
// pando::Array<bool> sync(Func func, pando::Array<bool>) {
//}

/**
* @brief get vertex local dense ID
*/
Expand Down Expand Up @@ -303,8 +303,6 @@ class MirrorDistLocalCSR {
return dlcsr.getLocalCSR();
}

// TODO(Jeageun):
// write a initialize function that calls initializeAfterGather function of DistLocalCSR dlcsr
template <typename ReadVertexType, typename ReadEdgeType>
pando::Status initializeAfterGather(
galois::HostLocalStorage<pando::Vector<ReadVertexType>> vertexData, std::uint64_t numVertices,
Expand Down Expand Up @@ -390,62 +388,81 @@ class MirrorDistLocalCSR {
numVertices += lift(mirrorList[i], size);
}
PANDO_CHECK(wg.wait());

PANDO_CHECK_RETURN(setupCommunication());

return pando::Status::Success;
}

// TODO(Ying-Wei):
// uses doAll to send remoteMasterToLocalMirrorMap to corresponding remote hosts
// no need to use executeON
// just push to the localMasterToRemoteMirrorOrderedTable vector
// make sure to use the spin lock in pando-rt
/**
* @brief Get the local mutex
* @brief Exchanges the mirror to master mapping from the mirror side to the maser side
*/
pando::GlobalRef<pando::Mutex> getLocalMutex(std::uint64_t host_id) {
return hostMutex[host_id];
}

pando::Status setupCommunication() {
auto dims = pando::getPlaceDims();

// initialize localMirrorToRemoteMasterOrderedTable
PANDO_CHECK_RETURN(localMasterToRemoteMirrorTable.initialize());
for (std::int16_t i = 0; i < dims.node.id; i++) {
pando::GlobalRef<pando::Vector<pando::Vector<MirrorToMasterMap>>>
localMasterToRemoteMirrorMap = localMasterToRemoteMirrorTable[i];
PANDO_CHECK_RETURN(fmap(localMasterToRemoteMirrorMap, initialize, dims.node.id));
for (std::int16_t i = 0; i < dims.node.id; i++) {
pando::GlobalRef<pando::Vector<MirrorToMasterMap>> mapVectorFromHost =
fmap(localMasterToRemoteMirrorMap, get, i);
PANDO_CHECK_RETURN(fmap(mapVectorFromHost, initialize, 0));
}
}

PANDO_CHECK_RETURN(hostMutex.initialize());

PANDO_CHECK_RETURN(galois::doAll(
localMirrorToRemoteMasterOrderedTable, localMasterToRemoteMirrorTable,
+[](galois::HostLocalStorage<pando::Array<MirrorToMasterMap>>
localMirrorToRemoteMasterOrderedTable,
pando::GlobalRef<pando::Vector<EdgeHandle>> localMasterToRemoteMirrorTable) {
PANDO_CHECK(fmap(localMirrorToRemoteMasterOrderedTable, initialize, 0));
pando::Array<MirrorToMasterMap> remoteMasterToLocalMirrorMap =
localMirrorToRemoteMasterOrderedTable.getLocal();
for (MirrorToMasterMap m : remoteMasterToLocalMirrorMap) {
VertexTopologyID masterTopologyID = m.master;
VertexTokenID masterTokenID = getTokenID(masterTopologyID);
std::uint64_t physicalHost = getPhysicalHostID(masterTokenID);
pando::Mutex mutex = getLocalMutex(physicalHost);

// Lock mutex to ensure atomic append to the vector
mutex.lock();
PANDO_CHECK(fmap(localMasterToRemoteMirrorTable, pushBack, m));
mutex.unlock();
auto thisCSR = *this;
auto state = galois::make_tpl(thisCSR, localMasterToRemoteMirrorTable);

// push style
// each host traverses its own localMirrorToRemoteMasterOrderedTable and send out the mapping to
// the corresponding remote host append to the vector of vector where each vector is the mapping
// from a specific host
galois::doAll(
state, localMirrorToRemoteMasterOrderedTable,
+[](decltype(state) state,
pando::GlobalRef<pando::Array<MirrorToMasterMap>> localMirrorToRemoteMasterOrderedMap) {
auto [object, localMasterToRemoteMirrorTable] = state;
for (std::uint64_t i = 0ul; i < lift(localMirrorToRemoteMasterOrderedMap, size); i++) {
MirrorToMasterMap m = fmap(localMirrorToRemoteMasterOrderedMap, get, i);
VertexTopologyID masterTopologyID = m.getMaster();
VertexTokenID masterTokenID = object.getTokenID(masterTopologyID);
std::uint64_t physicalHost = object.getPhysicalHostID(masterTokenID);

pando::GlobalRef<pando::Vector<pando::Vector<MirrorToMasterMap>>>
localMasterToRemoteMirrorMap = localMasterToRemoteMirrorTable[physicalHost];
pando::GlobalRef<pando::Vector<MirrorToMasterMap>> mapVectorFromHost =
fmap(localMasterToRemoteMirrorMap, get, pando::getCurrentPlace().node.id);

PANDO_CHECK(fmap(mapVectorFromHost, pushBack, m));
}
}));
});

return pando::Status::Success;
}

/**
* @brief For testing only
*/
pando::GlobalRef<pando::Array<MirrorToMasterMap>> getLocalMirrorToRemoteMasterOrderedMap(
int16_t hostId) {
return localMirrorToRemoteMasterOrderedTable[hostId];
}
pando::GlobalRef<pando::Vector<pando::Vector<MirrorToMasterMap>>> getLocalMasterToRemoteMirrorMap(
uint64_t hostId) {
return localMasterToRemoteMirrorTable[hostId];
}

private:
DLCSR dlcsr;
uint64_t _mirror_size;
galois::HostLocalStorage<LocalVertexRange> masterRange;
galois::HostLocalStorage<LocalVertexRange> mirrorRange;
galois::HostLocalStorage<pando::Array<MirrorToMasterMap>> localMirrorToRemoteMasterOrderedTable;

// TODO(Ying-Wei):
// generate the following
galois::HostLocalStorage<pando::Mutex> hostMutex;
galois::HostLocalStorage<pando::Vector<EdgeHandle>> localMasterToRemoteMirrorTable;
// galois::GlobalBarrier barrier;
galois::HostLocalStorage<pando::Vector<pando::Vector<MirrorToMasterMap>>>
localMasterToRemoteMirrorTable;
};

static_assert(graph_checker<MirrorDistLocalCSR<std::uint64_t, std::uint64_t>>::value);
Expand Down
4 changes: 2 additions & 2 deletions pando-rt/cmake/PANDOTesting.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ function(pando_add_driver_test TARGET SOURCEFILE)

if (PANDO_RT_BACKEND STREQUAL "PREP")
if (${GASNet_CONDUIT} STREQUAL "smp")
set(RUNNER_SCRIPT ${PROJECT_SOURCE_DIR}/scripts/preprun.sh)
set(RUNNER_SCRIPT ${PROJECT_SOURCE_DIR}/../scripts/preprun.sh)
elseif (${GASNet_CONDUIT} STREQUAL "mpi")
set(RUNNER_SCRIPT ${PROJECT_SOURCE_DIR}/scripts/preprun_mpi.sh)
set(RUNNER_SCRIPT ${PROJECT_SOURCE_DIR}/../scripts/preprun_mpi.sh)
else ()
message(FATAL_ERROR "No runner script for GASNet conduit ${GASNet_CONDUIT}")
endif ()
Expand Down
36 changes: 19 additions & 17 deletions pando-rt/scripts/preprun.sh → scripts/preprun.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: MIT
# Copyright (c) 2023. University of Texas at Austin. All rights reserved.

#
# SPDX-License-Identifier: MIT
# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Expand Down Expand Up @@ -29,32 +32,31 @@ usage: preprun -n <n> prog [program args]

harts=""

while getopts "n:c:t:h" option
do
while getopts "n:c:t:h" option; do
case ${option} in
n) # number of emulated PXNs
nodes=${OPTARG}
;;
c) # number of emulated cores per PXN
cores=${OPTARG}
;;
t) # number of emulated cores per PXN
harts=${OPTARG}
;;
h) # help
show_help
exit
;;
n) # number of emulated PXNs
nodes=${OPTARG}
;;
c) # number of emulated cores per PXN
cores=${OPTARG}
;;
t) # number of emulated cores per PXN
harts=${OPTARG}
;;
h) # help
show_help
exit
;;
esac
done
shift $(expr $OPTIND - 1 )
shift $(expr $OPTIND - 1)
prog=$@

export GASNET_PSHM_NODES=$nodes
export PANDO_PREP_NUM_CORES=$cores

if [ -n "$harts" ]; then
export PANDO_PREP_NUM_HARTS=$harts
export PANDO_PREP_NUM_HARTS=$harts
fi

exec $prog
40 changes: 23 additions & 17 deletions pando-rt/scripts/preprun_mpi.sh → scripts/preprun_mpi.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: MIT
# Copyright (c) 2023. University of Texas at Austin. All rights reserved.

#
# SPDX-License-Identifier: MIT
# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Expand Down Expand Up @@ -28,32 +31,35 @@ usage: preprun -n <n> prog [program args]
#

harts=""
# 16GB Main memory size by default
main_memory_size="${main_memory_size-8589934592}"

while getopts "n:c:t:h" option
do
while getopts "n:c:t:h" option; do
case ${option} in
n) # number of emulated PXNs
nodes=${OPTARG}
;;
c) # number of emulated cores per PXN
cores=${OPTARG}
;;
t) # number of emulated cores per PXN
harts=${OPTARG}
;;
h) # help
show_help
exit
;;
n) # number of emulated PXNs
nodes=${OPTARG}
;;
c) # number of emulated cores per PXN
cores=${OPTARG}
;;
t) # number of emulated cores per PXN
harts=${OPTARG}
;;
h) # help
show_help
exit
;;
esac
done
shift $(expr $OPTIND - 1 )
shift $(expr $OPTIND - 1)
prog=$@

export PANDO_PREP_NUM_CORES=$cores

if [ -n "$harts" ]; then
export PANDO_PREP_NUM_HARTS=$harts
export PANDO_PREP_NUM_HARTS=$harts
fi

export PANDO_PREP_MAIN_NODE=$main_memory_size

gasnetrun_mpi -n $nodes $prog
Loading

0 comments on commit 9fbe68a

Please sign in to comment.