From 123f94603391af3640d61a0e5d4b2e9339d12b1d Mon Sep 17 00:00:00 2001 From: jeageun Date: Wed, 3 Apr 2024 10:04:16 -0500 Subject: [PATCH 01/28] Jeageun/mdlcsr construct construct (#8) * Implementation for mirrored local dist CSR * Add decent test to cover all implemented construction functions and pass the test. --- .../graphs/dist_local_csr.hpp | 125 ++++- .../pando-lib-galois/graphs/graph_traits.hpp | 21 + include/pando-lib-galois/graphs/local_csr.hpp | 18 + .../graphs/mirror_dist_local_csr.hpp | 456 ++++++++++++++++++ .../import/ingest_rmat_el.hpp | 10 +- test/graphs/CMakeLists.txt | 2 + test/graphs/test_mirror_dist_local_csr.cpp | 59 +++ test/import/test_wmd_importer.cpp | 136 +++++- 8 files changed, 802 insertions(+), 25 deletions(-) create mode 100644 include/pando-lib-galois/graphs/mirror_dist_local_csr.hpp create mode 100644 test/graphs/test_mirror_dist_local_csr.cpp diff --git a/include/pando-lib-galois/graphs/dist_local_csr.hpp b/include/pando-lib-galois/graphs/dist_local_csr.hpp index d1528df7..c11bf046 100644 --- a/include/pando-lib-galois/graphs/dist_local_csr.hpp +++ b/include/pando-lib-galois/graphs/dist_local_csr.hpp @@ -6,6 +6,7 @@ #include +#include #include #include @@ -46,9 +47,13 @@ struct DLCSR_InitializeState { } // namespace internal +template +class MirrorDistLocalCSR; + template class DistLocalCSR { public: + friend MirrorDistLocalCSR; using VertexTokenID = std::uint64_t; using VertexTopologyID = pando::GlobalPtr; using EdgeHandle = pando::GlobalPtr; @@ -115,11 +120,11 @@ class DistLocalCSR { VertexIt& operator--() { auto currNode = static_cast(galois::localityOf(m_pos).node.id); pointer ptr = m_pos - 1; - CSR csrCurr = arrayOfCSRs.get(currNode); + CSR csrCurr = arrayOfCSRs[currNode]; if (csrCurr.vertexEdgeOffsets.begin() <= ptr || currNode == 0) { m_pos = ptr; } else { - csrCurr = arrayOfCSRs.get(currNode - 1); + csrCurr = arrayOfCSRs[currNode - 1]; m_pos = csrCurr.vertexEdgeOffsets.end() - 2; } return *this; @@ -198,12 +203,12 @@ class DistLocalCSR { VertexDataIt& operator++() { auto currNode = static_cast(galois::localityOf(m_pos).node.id); pointer ptr = m_pos + 1; - CSR csrCurr = arrayOfCSRs.get(currNode); + CSR csrCurr = arrayOfCSRs[currNode]; if (csrCurr.vertexData.end() > ptr || currNode == static_cast(pando::getPlaceDims().node.id - 1)) { m_pos = ptr; } else { - csrCurr = arrayOfCSRs.get(currNode + 1); + csrCurr = arrayOfCSRs[currNode + 1]; m_pos = csrCurr.vertexData.begin(); } return *this; @@ -218,11 +223,11 @@ class DistLocalCSR { VertexDataIt& operator--() { auto currNode = static_cast(galois::localityOf(m_pos).node.id); pointer ptr = m_pos - 1; - CSR csrCurr = arrayOfCSRs.get(currNode); + CSR csrCurr = arrayOfCSRs[currNode]; if (csrCurr.vertexData.begin() <= ptr || currNode == 0) { m_pos = *ptr; } else { - csrCurr = arrayOfCSRs.get(currNode - 1); + csrCurr = arrayOfCSRs[currNode - 1]; m_pos = *csrCurr.vertexData.end() - 1; } return *this; @@ -351,6 +356,8 @@ class DistLocalCSR { template friend class DistLocalCSR; + template + friend class MirrorDistLocalCSR; public: constexpr DistLocalCSR() noexcept = default; @@ -392,11 +399,29 @@ class DistLocalCSR { /** Vertex Manipulation **/ VertexTopologyID getTopologyID(VertexTokenID tid) { + std::uint64_t virtualHostID = tid % this->numVHosts(); + std::uint64_t physicalHost = fmap(virtualToPhysicalMap.getLocalRef(), get, virtualHostID); + auto [ret, found] = fmap(getLocalCSR(), relaxedgetTopologyID, tid); + if (!found) { + return fmap(arrayOfCSRs[physicalHost], getTopologyID, tid); + } else { + return ret; + } + } + +private: + // This function is for mirrored dist local csr, or classes which will directly use it. Don't use + // it externally. getLocalTopologyID with non-existing tokenID will return failure. + VertexTopologyID getLocalTopologyID(VertexTokenID tid) { + return fmap(getLocalCSR(), getTopologyID, tid); + } + VertexTopologyID getGlobalTopologyID(VertexTokenID tid) { std::uint64_t virtualHostID = tid % this->numVHosts(); std::uint64_t physicalHost = fmap(virtualToPhysicalMap.getLocalRef(), get, virtualHostID); return fmap(arrayOfCSRs[physicalHost], getTopologyID, tid); } +public: VertexTopologyID getTopologyIDFromIndex(std::uint64_t index) { std::uint64_t hostNum = 0; std::uint64_t hostSize; @@ -456,13 +481,19 @@ class DistLocalCSR { } VertexDataRange vertexDataRange() noexcept { return VertexDataRange{arrayOfCSRs, lift(arrayOfCSRs[0], vertexData.begin), - lift(arrayOfCSRs.get(arrayOfCSRs.size() - 1), vertexData.end), - numVertices}; + lift(arrayOfCSRs[arrayOfCSRs.size() - 1], vertexData.end), numVertices}; } EdgeDataRange edgeDataRange(VertexTopologyID vertex) noexcept { return fmap(getCSR(vertex), edgeDataRange, vertex); } + /** Host Information **/ + std::uint64_t getPhysicalHostID(VertexTokenID tid) { + std::uint64_t virtualHostID = tid % this->numVHosts(); + std::uint64_t physicalHost = fmap(virtualToPhysicalMap.getLocal(), get, virtualHostID); + return physicalHost; + } + /** Topology Modifications **/ VertexTopologyID addVertexTopologyOnly(VertexTokenID token) { return vertices().end(); @@ -642,7 +673,7 @@ class DistLocalCSR { std::uint64_t numVertices = 0; if constexpr (isEdgeList) { for (uint64_t h = 0; h < numHosts; h++) { - PANDO_CHECK(fmap(pHV.get(h), initialize, 0)); + PANDO_CHECK(fmap(pHV[h], initialize, 0)); } struct PHPV { HostIndexedMap>> partEdges; @@ -651,8 +682,8 @@ class DistLocalCSR { PHPV phpv{partEdges, pHV}; galois::doAllEvenlyPartition( phpv, numHosts, +[](PHPV phpv, uint64_t host_id, uint64_t) { - pando::Vector> edgeVec = phpv.partEdges.get(host_id); - pando::GlobalRef> vertexVec = phpv.pHV.get(host_id); + pando::Vector> edgeVec = phpv.partEdges[host_id]; + pando::GlobalRef> vertexVec = phpv.pHV[host_id]; for (pando::Vector vec : edgeVec) { EdgeType e = vec[0]; VertexType v = VertexType(e.src, agile::TYPES::NONE); @@ -661,7 +692,7 @@ class DistLocalCSR { }); for (uint64_t h = 0; h < numHosts; h++) { - numVertices += lift(pHV.get(h), size); + numVertices += lift(pHV[h], size); } } else { numVertices = numVerticesRead; @@ -693,6 +724,62 @@ class DistLocalCSR { return pando::Status::Success; } + /** + * @brief This function creates a mirror list for each host. Currently it implements full + * mirroring + */ + template + HostLocalStorage> getMirrorList( + galois::HostIndexedMap>> partEdges, + HostLocalStorage> V2PM) { + HostLocalStorage> mirrorList; + PANDO_CHECK(mirrorList.initialize()); + auto createMirrors = + +[](galois::HostIndexedMap>> partEdges, + HostLocalStorage> mirrorList, + HostLocalStorage> V2PM, std::uint64_t i, + galois::WaitGroup::HandleType wgh) { + pando::Array mirrors; + + // Populating the mirror list in a set to avoid duplicates + std::unordered_set mirrorMap; + pando::Array localV2PM = V2PM.getLocalRef(); + for (std::uint64_t k = 0; k < lift(partEdges.getLocalRef(), size); k++) { + pando::Vector currentEdge = fmap(partEdges.getLocalRef(), get, k); + for (ReadEdgeType tmp : currentEdge) { + std::uint64_t dstVHost = tmp.dst % localV2PM.size(); + std::uint64_t dstPHost = fmap(localV2PM, get, dstVHost); + if (dstPHost != i) + mirrorMap.insert(tmp.dst); + } + } + PANDO_CHECK(mirrors.initialize(mirrorMap.size())); + + // TODO(Divija): Make this parallel + // Populate the mirror list + uint64_t idx = 0; + for (auto& mirror : mirrorMap) { + mirrors[idx] = mirror; + idx++; + } + + mirrorList.getLocalRef() = mirrors; + wgh.done(); + }; + + std::uint64_t numHosts = static_cast(pando::getPlaceDims().node.id); + galois::WaitGroup wg; + PANDO_CHECK(wg.initialize(numHosts)); + auto wgh = wg.getHandle(); + for (std::uint64_t i = 0; i < numHosts; i++) { + pando::Place place = pando::Place{pando::NodeIndex{static_cast(i)}, + pando::anyPod, pando::anyCore}; + PANDO_CHECK(pando::executeOn(place, createMirrors, partEdges, mirrorList, V2PM, i, wgh)); + } + PANDO_CHECK(wg.wait()); + return mirrorList; + } + /** * @brief This initializer for workflow 4's edge lists */ @@ -814,7 +901,7 @@ class DistLocalCSR { } currentCSR.vertexEdgeOffsets[vertex] = Vertex{¤tCSR.edgeDestinations[currLocalEdge]}; - arrayOfCSRs.get(host) = currentCSR; + arrayOfCSRs[host] = currentCSR; edgesStart = edgesEnd; } edgeCounts.deinitialize(); @@ -822,7 +909,7 @@ class DistLocalCSR { edgesStart = 0; for (uint64_t host = 0; host < hosts; host++) { - CSR currentCSR = arrayOfCSRs.get(host); + CSR currentCSR = arrayOfCSRs[host]; uint64_t lastLocalVertexIndex = verticesPerHost * (host + 1) - 1; if (lastLocalVertexIndex >= numVertices) { @@ -844,7 +931,7 @@ class DistLocalCSR { currEdge = edges[edgesStart + currLocalEdge + 1]; } } - arrayOfCSRs.get(host) = currentCSR; + arrayOfCSRs[host] = currentCSR; edgesStart += currLocalEdge; } @@ -923,7 +1010,7 @@ class DistLocalCSR { galois::onEach( state2, +[](InitializeEdgeState& state, uint64_t thread, uint64_t) { uint64_t host = static_cast(pando::getCurrentNode().id); - CSR currentCSR = state.dlcsr.arrayOfCSRs.get(host); + CSR currentCSR = state.dlcsr.arrayOfCSRs[host]; uint64_t hostOffset; PANDO_CHECK(state.edges.currentHostIndexOffset(hostOffset)); @@ -1020,7 +1107,7 @@ class DistLocalCSR { */ std::uint64_t getVertexLocalIndex(VertexTopologyID vertex) { std::uint64_t hostNum = static_cast(galois::localityOf(vertex).node.id); - return fmap(arrayOfCSRs.get(hostNum), getVertexIndex, vertex); + return fmap(arrayOfCSRs[hostNum], getVertexIndex, vertex); } /** @@ -1028,7 +1115,7 @@ class DistLocalCSR { */ std::uint64_t localSize(std::uint32_t host) noexcept { - return lift(arrayOfCSRs.get(host), size); + return lift(arrayOfCSRs[host], size); } /** @@ -1065,7 +1152,7 @@ class DistLocalCSR { */ pando::GlobalRef getLocalCSR() { std::uint64_t nodeIdx = static_cast(pando::getCurrentPlace().node.id); - return arrayOfCSRs.get(nodeIdx); + return arrayOfCSRs[nodeIdx]; } private: diff --git a/include/pando-lib-galois/graphs/graph_traits.hpp b/include/pando-lib-galois/graphs/graph_traits.hpp index a804be1b..70031eb3 100644 --- a/include/pando-lib-galois/graphs/graph_traits.hpp +++ b/include/pando-lib-galois/graphs/graph_traits.hpp @@ -196,5 +196,26 @@ struct graph_checker { sizeof(addEdgesTopologyOnly(0)) == sizeof(Yes) && sizeof(addEdges(0)) == sizeof(Yes) && sizeof(deleteEdges(0)) == sizeof(Yes); }; + +/** + * @brief this is the graph interface, methods from here should mostly be used + */ +template +struct gluon_graph { + /** Size **/ + std::uint64_t getMasterSize(); + std::uint64_t getMasterSize() const noexcept; + std::uint64_t getMirrorSize(); + std::uint64_t getMirrorSize() const noexcept; + + /** Range **/ + VertexRange getMasterRange(); + VertexRange getMirrorRange(); + + /** Sync **/ + // template pando::Array sync(Func func, pando::Array); +}; } // namespace galois #endif // PANDO_LIB_GALOIS_GRAPHS_GRAPH_TRAITS_HPP_ diff --git a/include/pando-lib-galois/graphs/local_csr.hpp b/include/pando-lib-galois/graphs/local_csr.hpp index f8a0e7b3..87ab0ed2 100644 --- a/include/pando-lib-galois/graphs/local_csr.hpp +++ b/include/pando-lib-galois/graphs/local_csr.hpp @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include #include @@ -233,10 +235,14 @@ class RefSpan { template class DistLocalCSR; +template +class MirrorDistLocalCSR; + template class LCSR { public: friend DistLocalCSR; + friend MirrorDistLocalCSR; using VertexTokenID = std::uint64_t; using VertexTopologyID = pando::GlobalPtr; using EdgeHandle = pando::GlobalPtr; @@ -409,9 +415,21 @@ class LCSR { } /** Vertex Manipulation **/ +private: + // Use with your own risk. + // It is reasonable only when you could handle the non-existing value outside of this function. + galois::Pair relaxedgetTopologyID(VertexTokenID token) { + pando::GlobalPtr ret; + bool found = tokenToTopology.get(token, ret); + return galois::make_tpl(ret, found); + } + +public: VertexTopologyID getTopologyID(VertexTokenID token) { pando::GlobalPtr ret; if (!tokenToTopology.get(token, ret)) { + std::cout << "In the host " << pando::getCurrentPlace().node.id + << "can't find token:" << token << std::endl; PANDO_ABORT("FAILURE TO FIND TOKENID"); } return ret; diff --git a/include/pando-lib-galois/graphs/mirror_dist_local_csr.hpp b/include/pando-lib-galois/graphs/mirror_dist_local_csr.hpp new file mode 100644 index 00000000..d1216922 --- /dev/null +++ b/include/pando-lib-galois/graphs/mirror_dist_local_csr.hpp @@ -0,0 +1,456 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +#ifndef PANDO_LIB_GALOIS_GRAPHS_MIRROR_DIST_LOCAL_CSR_HPP_ +#define PANDO_LIB_GALOIS_GRAPHS_MIRROR_DIST_LOCAL_CSR_HPP_ + +#include + +#include + +#include "pando-rt/sync/mutex.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define FREE 1 + +namespace galois { + +namespace internal { + +template +struct MDLCSR_InitializeState { + using CSR = LCSR; + + MDLCSR_InitializeState() = default; + MDLCSR_InitializeState(galois::HostIndexedMap arrayOfCSRs_, + galois::PerThreadVector vertices_, + galois::PerThreadVector edges_, + galois::PerThreadVector edgeCounts_) + : arrayOfCSRs(arrayOfCSRs_), vertices(vertices_), edges(edges_), edgeCounts(edgeCounts_) {} + + galois::HostIndexedMap arrayOfCSRs; + galois::PerThreadVector vertices; + galois::PerThreadVector edges; + galois::PerThreadVector edgeCounts; +}; + +} // namespace internal + +template +class MirrorDistLocalCSR { +public: + using VertexTokenID = std::uint64_t; + using VertexTopologyID = pando::GlobalPtr; + using EdgeHandle = pando::GlobalPtr; + using VertexData = VertexType; + using EdgeData = EdgeType; + using EdgeRange = RefSpan; + using EdgeDataRange = pando::Span; + using CSR = LCSR; + using DLCSR = DistLocalCSR; + using VertexRange = typename DLCSR::VertexRange; + using VertexDataRange = typename DLCSR::VertexDataRange; + using LocalVertexRange = typename CSR::VertexRange; + using LocalVertexDataRange = typename CSR::VertexDataRange; + +private: + template + pando::GlobalRef getCSR(pando::GlobalPtr ptr) { + return dlcsr.getCSR(ptr); + } + + EdgeHandle halfEdgeBegin(VertexTopologyID vertex) { + return dlcsr.halfEdgeBegin(vertex); + } + + EdgeHandle halfEdgeEnd(VertexTopologyID vertex) { + return dlcsr.halfEdgeEnd(vertex); + } + + std::uint64_t numVHosts() { + return dlcsr.numVHosts(); + } + +public: + constexpr MirrorDistLocalCSR() noexcept = default; + constexpr MirrorDistLocalCSR(MirrorDistLocalCSR&&) noexcept = default; + constexpr MirrorDistLocalCSR(const MirrorDistLocalCSR&) noexcept = default; + ~MirrorDistLocalCSR() = default; + + constexpr MirrorDistLocalCSR& operator=(const MirrorDistLocalCSR&) noexcept = default; + constexpr MirrorDistLocalCSR& operator=(MirrorDistLocalCSR&&) noexcept = default; + + /** Official Graph APIS **/ + void deinitialize() { + dlcsr.deinitialize(); + } + + /** size stuff **/ + std::uint64_t size() noexcept { + return dlcsr.size() - _mirror_size; + } + std::uint64_t size() const noexcept { + return dlcsr.size() - _mirror_size; + } + std::uint64_t sizeEdges() noexcept { + return dlcsr.sizeEdges(); + } + std::uint64_t sizeEdges() const noexcept { + return dlcsr.sizeEdges(); + } + std::uint64_t getNumEdges(VertexTopologyID vertex) { + return dlcsr.getNumEdges(vertex); + } + std::uint64_t sizeMirrors() noexcept { + return _mirror_size; + } + std::uint64_t sizeMirrors() const noexcept { + return _mirror_size; + } + + struct MirrorToMasterMap { + MirrorToMasterMap() = default; + MirrorToMasterMap(VertexTopologyID _mirror, VertexTopologyID _master) + : mirror(_mirror), master(_master) {} + VertexTopologyID mirror; + VertexTopologyID master; + VertexTopologyID getMirror() { + return mirror; + } + VertexTopologyID getMaster() { + return master; + } + }; + + /** Vertex Manipulation **/ + VertexTopologyID getTopologyID(VertexTokenID tid) { + return dlcsr.getTopologyID(tid); + } + + VertexTopologyID getLocalTopologyID(VertexTokenID tid) { + return dlcsr.getLocalTopologyID(tid); + } + + VertexTopologyID getGlobalTopologyID(VertexTokenID tid) { + return dlcsr.getGlobalTopologyID(tid); + } + + pando::Array getLocalMirrorToRemoteMasterOrderedTable() { + return localMirrorToRemoteMasterOrderedTable.getLocalRef(); + } + + VertexTopologyID getTopologyIDFromIndex(std::uint64_t index) { + return dlcsr.getTopologyIDFromIndex(index); + } + VertexTokenID getTokenID(VertexTopologyID tid) { + return dlcsr.getTokenID(tid); + } + std::uint64_t getVertexIndex(VertexTopologyID vertex) { + return dlcsr.getVertexIndex(vertex); + } + pando::Place getLocalityVertex(VertexTopologyID vertex) { + return dlcsr.getLocalityVertex(vertex); + } + + /** Edge Manipulation **/ + EdgeHandle mintEdgeHandle(VertexTopologyID vertex, std::uint64_t off) { + return dlcsr.mintEdgeHandle(vertex, off); + } + VertexTopologyID getEdgeDst(EdgeHandle eh) { + return dlcsr.getEdgeDst(eh); + } + + /** Data Manipulations **/ + void setData(VertexTopologyID vertex, VertexData data) { + dlcsr.setData(vertex, data); + } + pando::GlobalRef getData(VertexTopologyID vertex) { + return dlcsr.getData(vertex); + } + void setEdgeData(EdgeHandle eh, EdgeData data) { + dlcsr.setEdgeData(eh, data); + } + pando::GlobalRef getEdgeData(EdgeHandle eh) { + return dlcsr.getEdgeData(eh); + } + + /** Ranges **/ + VertexRange vertices() { + // This will include all mirrored vertices + return dlcsr.vertices(); + } + + EdgeRange edges(pando::GlobalPtr vPtr) { + return dlcsr.edges(vPtr); + } + VertexDataRange vertexDataRange() noexcept { + return dlcsr.vertexDataRange(); + } + EdgeDataRange edgeDataRange(VertexTopologyID vertex) noexcept { + return dlcsr.edgeDataRange(vertex); + } + + /** Topology Modifications **/ + VertexTopologyID addVertexTopologyOnly(VertexTokenID token) { + return dlcsr.addVertexTopologyOnly(token); + } + VertexTopologyID addVertex(VertexTokenID token, VertexData data) { + return dlcsr.addVertex(token, data); + } + pando::Status addEdgesTopologyOnly(VertexTopologyID src, pando::Vector dsts) { + return dlcsr.addEdgesTopologyOnly(src, dsts); + } + pando::Status addEdges(VertexTopologyID src, pando::Vector dsts, + pando::Vector data) { + return dlcsr.addEdges(src, dsts, data); + } + pando::Status deleteEdges(VertexTopologyID src, pando::Vector edges) { + return dlcsr.deleteEdges(src, edges); + } + + /** Gluon Graph APIS **/ + + /** Size **/ + std::uint64_t getMasterSize() noexcept { + return lift(masterRange.getLocalRef(), size); + } + std::uint64_t getMirrorSize() noexcept { + return lift(mirrorRange.getLocalRef(), size); + } + + /** Range **/ + LocalVertexRange getMasterRange() { + return masterRange.getLocalRef(); + } + LocalVertexRange getMirrorRange() { + return mirrorRange.getLocalRef(); + } + + /** Host Information **/ + std::uint64_t getPhysicalHostID(VertexTokenID tid) { + return dlcsr.getPhysicalHostID(tid); + } + + /** Sync **/ + // TODO(Ying-Wei): + // write a sync function that reduces mirror values and then broadcasts master values + // return a bitmap of modified vertices + // + // template + // pando::Array sync(Func func, pando::Array) { + //} + + /** + * @brief get vertex local dense ID + */ + std::uint64_t getVertexLocalIndex(VertexTopologyID vertex) { + return dlcsr.getVertexIndex(vertex); + } + + /** + * @brief gives the number of edges + */ + + std::uint64_t localSize(std::uint32_t host) noexcept { + return dlcsr.localSize(host); + } + + /** + * @brief Sets the value of the edge provided + */ + void setEdgeData(VertexTopologyID vertex, std::uint64_t off, EdgeData data) { + dlcsr.setEdgeData(mintEdgeHandle(vertex, off), data); + } + + /** + * @brief gets the reference to the vertex provided + */ + pando::GlobalRef getEdgeData(VertexTopologyID vertex, std::uint64_t off) { + return dlcsr.getEdgeData(mintEdgeHandle(vertex, off)); + } + + /** + * @brief get the vertex at the end of the edge provided by vertex at the offset from the start + */ + VertexTopologyID getEdgeDst(VertexTopologyID vertex, std::uint64_t off) { + return dlcsr.getEdgeDst(mintEdgeHandle(vertex, off)); + } + + bool isLocal(VertexTopologyID vertex) { + return dlcsr.isLocal(vertex); + } + + bool isOwned(VertexTopologyID vertex) { + return dlcsr.isOwned(vertex); + } + + /** + * @brief Get the local csr + */ + pando::GlobalRef getLocalCSR() { + return dlcsr.getLocalCSR(); + } + + // TODO(Jeageun): + // write a initialize function that calls initializeAfterGather function of DistLocalCSR dlcsr + template + pando::Status initializeAfterGather( + galois::HostIndexedMap> vertexData, std::uint64_t numVertices, + galois::HostIndexedMap>> edgeData, + galois::HostIndexedMap> edgeMap, + galois::HostIndexedMap numEdges, + HostLocalStorage> virtualToPhysical) { + std::uint64_t numHosts = static_cast(pando::getPlaceDims().node.id); + galois::WaitGroup wg; + PANDO_CHECK(wg.initialize(numHosts)); + auto wgh = wg.getHandle(); + _mirror_size = 0; + HostLocalStorage> mirrorList; + mirrorList = this->dlcsr.getMirrorList(edgeData, virtualToPhysical); + PANDO_CHECK(masterRange.initialize()); + PANDO_CHECK(mirrorRange.initialize()); + PANDO_CHECK(localMirrorToRemoteMasterOrderedTable.initialize()); + + auto mirrorAttach = +[](galois::HostIndexedMap> vertexData, + HostLocalStorage> mirrorList, + std::uint64_t i, galois::WaitGroup::HandleType wgh) { + pando::Vector curVertexData = vertexData[i]; + pando::Array curMirrorList = mirrorList[i]; + for (uint64_t j = 0; j < lift(curMirrorList, size); j++) { + ReadVertexType v = ReadVertexType{curMirrorList[j]}; + PANDO_CHECK(fmap(curVertexData, pushBack, v)); + } + vertexData[i] = curVertexData; + wgh.done(); + }; + uint64_t local_mirror_size = 0; + for (std::uint64_t i = 0; i < numHosts; i++) { + pando::Place place = pando::Place{pando::NodeIndex{static_cast(i)}, + pando::anyPod, pando::anyCore}; + PANDO_CHECK(pando::executeOn(place, mirrorAttach, vertexData, mirrorList, i, wgh)); + local_mirror_size = lift(mirrorList[i], size); + numVertices += local_mirror_size; + _mirror_size += local_mirror_size; + } + PANDO_CHECK(wg.wait()); + wgh.add(numHosts); + + this->dlcsr.initializeAfterGather(vertexData, numVertices, edgeData, edgeMap, numEdges, + virtualToPhysical); + + // Generate masterRange, mirrorRange, localMirrorToRemoteMasterOrderedTable + auto generateMetadata = +[](MirrorDistLocalCSR mdlcsr, + DistLocalCSR dlcsr, + HostLocalStorage> mirrorList, + std::uint64_t i, galois::WaitGroup::HandleType wgh) { + pando::Array localMirrorList = mirrorList[i]; + uint64_t mirror_size = lift(localMirrorList, size); + CSR csrCurr = dlcsr.arrayOfCSRs[i]; + + LocalVertexRange _masterRange = mdlcsr.masterRange.getLocalRef(); + _masterRange = LocalVertexRange(lift(csrCurr, vertexEdgeOffsets.begin), + lift(csrCurr, size) - mirror_size); + + LocalVertexRange _mirrorRange = mdlcsr.mirrorRange.getLocalRef(); + _mirrorRange = LocalVertexRange( + lift(csrCurr, vertexEdgeOffsets.begin) + lift(csrCurr, size) - mirror_size, mirror_size); + + pando::Array _localMirrorToRemoteMasterOrderedTable = + mdlcsr.localMirrorToRemoteMasterOrderedTable.getLocalRef(); + fmap(_localMirrorToRemoteMasterOrderedTable, initialize, mirror_size); + for (uint64_t j = 0; j < mirror_size; j++) { + _localMirrorToRemoteMasterOrderedTable[j] = + MirrorToMasterMap(fmap(dlcsr, getLocalTopologyID, localMirrorList[j]), + fmap(dlcsr, getGlobalTopologyID, localMirrorList[j])); + } + mdlcsr.masterRange.getLocalRef() = _masterRange; + mdlcsr.mirrorRange.getLocalRef() = _mirrorRange; + mdlcsr.localMirrorToRemoteMasterOrderedTable.getLocalRef() = + _localMirrorToRemoteMasterOrderedTable; + wgh.done(); + }; + + for (std::uint64_t i = 0; i < numHosts; i++) { + pando::Place place = pando::Place{pando::NodeIndex{static_cast(i)}, + pando::anyPod, pando::anyCore}; + PANDO_CHECK( + pando::executeOn(place, generateMetadata, *this, this->dlcsr, mirrorList, i, wgh)); + numVertices += lift(mirrorList[i], size); + } + PANDO_CHECK(wg.wait()); + return pando::Status::Success; + } + + // TODO(Ying-Wei): + // uses doAll to send remoteMasterToLocalMirrorMap to corresponding remote hosts + // no need to use executeON + // just push to the localMasterToRemoteMirrorOrderedTable vector + // make sure to use the spin lock in pando-rt + /** + * @brief Get the local mutex + */ + pando::GlobalRef getLocalMutex(std::uint64_t host_id) { + return hostMutex[host_id]; + } + + pando::Status setupCommunication() { + PANDO_CHECK_RETURN(localMasterToRemoteMirrorTable.initialize()); + + PANDO_CHECK_RETURN(hostMutex.initialize()); + + PANDO_CHECK_RETURN(galois::doAll( + localMirrorToRemoteMasterOrderedTable, localMasterToRemoteMirrorTable, + +[](galois::HostLocalStorage> + localMirrorToRemoteMasterOrderedTable, + pando::GlobalRef> localMasterToRemoteMirrorTable) { + PANDO_CHECK(fmap(localMirrorToRemoteMasterOrderedTable, initialize, 0)); + pando::Array remoteMasterToLocalMirrorMap = + localMirrorToRemoteMasterOrderedTable.getLocal(); + for (MirrorToMasterMap m : remoteMasterToLocalMirrorMap) { + VertexTopologyID masterTopologyID = m.master; + VertexTokenID masterTokenID = getTokenID(masterTopologyID); + std::uint64_t physicalHost = getPhysicalHostID(masterTokenID); + pando::Mutex mutex = getLocalMutex(physicalHost); + + // Lock mutex to ensure atomic append to the vector + mutex.lock(); + PANDO_CHECK(fmap(localMasterToRemoteMirrorTable, pushBack, m)); + mutex.unlock(); + } + })); + + return pando::Status::Success; + } + +private: + DLCSR dlcsr; + uint64_t _mirror_size; + galois::HostLocalStorage masterRange; + galois::HostLocalStorage mirrorRange; + galois::HostLocalStorage> localMirrorToRemoteMasterOrderedTable; + + // TODO(Ying-Wei): + // generate the following + galois::HostLocalStorage hostMutex; + galois::HostLocalStorage> localMasterToRemoteMirrorTable; + // galois::GlobalBarrier barrier; +}; + +static_assert(graph_checker>::value); +static_assert(graph_checker>::value); + +} // namespace galois + +#endif // PANDO_LIB_GALOIS_GRAPHS_MIRROR_DIST_LOCAL_CSR_HPP_ diff --git a/include/pando-lib-galois/import/ingest_rmat_el.hpp b/include/pando-lib-galois/import/ingest_rmat_el.hpp index e8fd90b9..71fe90a2 100644 --- a/include/pando-lib-galois/import/ingest_rmat_el.hpp +++ b/include/pando-lib-galois/import/ingest_rmat_el.hpp @@ -9,6 +9,7 @@ #include #include #include +#include #include namespace galois { @@ -50,10 +51,9 @@ pando::Status generateEdgesPerVirtualHost(pando::GlobalRef -galois::DistLocalCSR initializeELDLCSR(pando::Array filename, - std::uint64_t numVertices, - std::uint64_t vHostsScaleFactor = 8) { +template +ReturnType initializeELDLCSR(pando::Array filename, std::uint64_t numVertices, + std::uint64_t vHostsScaleFactor = 8) { galois::PerThreadVector> localEdges; PANDO_CHECK(localEdges.initialize()); @@ -146,7 +146,7 @@ galois::DistLocalCSR initializeELDLCSR(pando::Array auto [partEdges, renamePerHost] = internal::partitionEdgesParallely(pHV, std::move(localEdges), v2PM); - using Graph = galois::DistLocalCSR; + using Graph = ReturnType; Graph graph; graph.template initializeAfterGather( pHV, numVertices, partEdges, renamePerHost, numEdges, diff --git a/test/graphs/CMakeLists.txt b/test/graphs/CMakeLists.txt index e127d446..e12a09fa 100644 --- a/test/graphs/CMakeLists.txt +++ b/test/graphs/CMakeLists.txt @@ -3,3 +3,5 @@ pando_add_driver_test(test_dist_array_csr test_dist_array_csr.cpp) pando_add_driver_test(test_local_csr test_local_csr.cpp) +pando_add_driver_test(test_dist_local_csr test_dist_local_csr.cpp) +pando_add_driver_test(test_mirror_dist_local_csr test_mirror_dist_local_csr.cpp) diff --git a/test/graphs/test_mirror_dist_local_csr.cpp b/test/graphs/test_mirror_dist_local_csr.cpp new file mode 100644 index 00000000..c50f25cb --- /dev/null +++ b/test/graphs/test_mirror_dist_local_csr.cpp @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +#include + +#include + +#include "pando-rt/export.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +pando::Vector> generateFullyConnectedGraph(std::uint64_t SIZE) { + pando::Vector> vec; + EXPECT_EQ(vec.initialize(SIZE), pando::Status::Success); + for (pando::GlobalRef> edges : vec) { + pando::Vector inner; + EXPECT_EQ(inner.initialize(0), pando::Status::Success); + edges = inner; + } + + galois::doAll( + SIZE, vec, +[](std::uint64_t size, pando::GlobalRef> innerRef) { + pando::Vector inner = innerRef; + for (std::uint64_t i = 0; i < size; i++) { + EXPECT_EQ(inner.pushBack(i), pando::Status::Success); + } + innerRef = inner; + }); + return vec; +} + +template +pando::Status deleteVectorVector(pando::Vector> vec) { + auto err = galois::doAll( + vec, +[](pando::GlobalRef> innerRef) { + pando::Vector inner = innerRef; + inner.deinitialize(); + innerRef = inner; + }); + vec.deinitialize(); + return err; +} + +using Graph = galois::MirrorDistLocalCSR; + +TEST(MirrorDistLocalCSR, NumVertices) { + constexpr std::uint64_t SIZE = 10; + Graph graph; + auto vec = generateFullyConnectedGraph(SIZE); + + EXPECT_EQ(deleteVectorVector(vec), pando::Status::Success); +} diff --git a/test/import/test_wmd_importer.cpp b/test/import/test_wmd_importer.cpp index 72f00847..59c76fbd 100644 --- a/test/import/test_wmd_importer.cpp +++ b/test/import/test_wmd_importer.cpp @@ -198,7 +198,8 @@ TEST_P(DLCSRInitEdgeList, initializeEL) { for (uint64_t i = 0; i < elFile.size(); i++) filename[i] = elFile[i]; - Graph graph = galois::initializeELDLCSR(filename, numVertices); + Graph graph = + galois::initializeELDLCSR(filename, numVertices); // Validate std::unordered_map> goldenTable; @@ -270,3 +271,136 @@ INSTANTIATE_TEST_SUITE_P( std::make_tuple("/pando/graphs/rmat_571919_seed1_scale16_nV65536_nE909846.el", 65536), std::make_tuple("/pando/graphs/rmat_571919_seed1_scale17_nV131072_nE1864704.el", 131072), std::make_tuple("/pando/graphs/rmat_571919_seed1_scale18_nV262144_nE3806162.el", 262144))); + +class MirrorDLCSRInitEdgeList + : public ::testing::TestWithParam> {}; +TEST_P(MirrorDLCSRInitEdgeList, initializeEL) { + using ET = galois::ELEdge; + using VT = galois::ELVertex; + using Graph = galois::MirrorDistLocalCSR; + galois::HostLocalStorageHeap::HeapInit(); + + const std::string elFile = std::get<0>(GetParam()); + const std::uint64_t numVertices = std::get<1>(GetParam()); + + pando::Array filename; + EXPECT_EQ(pando::Status::Success, filename.initialize(elFile.size())); + for (uint64_t i = 0; i < elFile.size(); i++) + filename[i] = elFile[i]; + + Graph graph = + galois::initializeELDLCSR(filename, numVertices); + + // Validate + std::unordered_map> goldenTable; + getVerticesAndEdgesEL(elFile, numVertices, goldenTable); + EXPECT_EQ(goldenTable.size(), graph.size()); + + // Iterate over vertices + std::uint64_t vid = 0; + + for (typename Graph::VertexTopologyID vert : graph.vertices()) { + EXPECT_EQ(vid, graph.getVertexIndex(vert)); + vid++; + typename Graph::VertexTokenID srcTok = graph.getTokenID(vert); + + EXPECT_LT(srcTok, numVertices); + + typename Graph::VertexData vertexData = graph.getData(vert); + EXPECT_EQ(srcTok, vertexData.id); + + VT dumbVertex = VT{numVertices}; + graph.setData(vert, dumbVertex); + vertexData = graph.getData(vert); + EXPECT_EQ(vertexData.id, numVertices); + + // Iterate over edges + EXPECT_NE(goldenTable.find(srcTok), goldenTable.end()) + << "Failed to find edges with tok_id:" << srcTok << "\t with index: " << (vid - 1); + std::vector goldenEdges = goldenTable[srcTok]; + + for (typename Graph::EdgeHandle eh : graph.edges(vert)) { + typename Graph::EdgeData eData = graph.getEdgeData(eh); + + EXPECT_EQ(eData.src, srcTok); + + typename Graph::VertexTokenID dstTok = graph.getTokenID(graph.getEdgeDst(eh)); + EXPECT_EQ(eData.dst, dstTok); + + auto mirrorTopology = graph.getTopologyID(dstTok); + auto masterTopology = graph.getGlobalTopologyID(dstTok); + if (mirrorTopology != masterTopology) { + // If global, and local have different value. + // It means current one have mirror. Mirror is local, but master is not. + ASSERT_TRUE(graph.isLocal(mirrorTopology)); + ASSERT_TRUE(!graph.isLocal(masterTopology)); + bool found = false; + // Mirror must exist in mirror range. + auto it = graph.getMirrorRange(); + for (auto v = it.begin(); v != it.end(); v++) { + if (*v == mirrorTopology) { + found = true; + } + } + ASSERT_TRUE(found); + found = false; + auto mirror_master_array = graph.getLocalMirrorToRemoteMasterOrderedTable(); + for (auto elem : mirror_master_array) { + if ((lift(elem, getMirror) == mirrorTopology) && + (lift(elem, getMaster) == masterTopology)) { + found = true; + } + } + ASSERT_TRUE(found); + } else { + // If I don't have mirror, that could be because it is in local, or never be a destination + // from me. + if (graph.isLocal(mirrorTopology)) { + // If it is from me, it is in my master range. + bool found = false; + auto it = graph.getMasterRange(); + for (auto v = it.begin(); v != it.end(); v++) { + if (*v == mirrorTopology) { + found = true; + } + } + ASSERT_TRUE(found); + found = false; + // In mirror to master, this should never exist + auto mirror_master_array = graph.getLocalMirrorToRemoteMasterOrderedTable(); + for (auto elem : mirror_master_array) { + if ((lift(elem, getMirror) == mirrorTopology) || + (lift(elem, getMaster) == masterTopology)) { + ASSERT_TRUE(false); + } + } + } + } + + auto goldenEdgeIt = std::find(goldenEdges.begin(), goldenEdges.end(), dstTok); + EXPECT_NE(goldenEdgeIt, goldenEdges.end()) + << "Unable to find edge with src_tok: " << srcTok << "\tand dst_tok: " << dstTok + << "\tat vertex: " << (vid - 1); + ET dumbEdge = ET{numVertices, numVertices}; + graph.setEdgeData(eh, dumbEdge); + eData = graph.getEdgeData(eh); + EXPECT_EQ(eData.src, numVertices); + EXPECT_EQ(eData.dst, numVertices); + } + } + graph.deinitialize(); +} + +INSTANTIATE_TEST_SUITE_P( + SmallFiles, MirrorDLCSRInitEdgeList, + ::testing::Values(std::make_tuple("/pando/graphs/simple.el", 10), + std::make_tuple("/pando/graphs/rmat_571919_seed1_scale10_nV1024_nE10447.el", + 1024))); + +INSTANTIATE_TEST_SUITE_P( + DISABLED_BigFiles, MirrorDLCSRInitEdgeList, + ::testing::Values( + std::make_tuple("/pando/graphs/rmat_571919_seed1_scale11_nV2048_nE22601.el", 2048), + std::make_tuple("/pando/graphs/rmat_571919_seed1_scale12_nV4096_nE48335.el", 4096), + std::make_tuple("/pando/graphs/rmat_571919_seed1_scale13_nV8192_nE102016.el", 8192), + std::make_tuple("/pando/graphs/rmat_571919_seed1_scale14_nV16384_nE213350.el", 16384), From 45762f83dcc7c87f8e4d8695041c36973d9c9142 Mon Sep 17 00:00:00 2001 From: ywwu928 Date: Thu, 4 Apr 2024 10:27:08 -0500 Subject: [PATCH 02/28] minor bug fix --- test/import/test_wmd_importer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/import/test_wmd_importer.cpp b/test/import/test_wmd_importer.cpp index 59c76fbd..c446025c 100644 --- a/test/import/test_wmd_importer.cpp +++ b/test/import/test_wmd_importer.cpp @@ -403,4 +403,4 @@ INSTANTIATE_TEST_SUITE_P( std::make_tuple("/pando/graphs/rmat_571919_seed1_scale11_nV2048_nE22601.el", 2048), std::make_tuple("/pando/graphs/rmat_571919_seed1_scale12_nV4096_nE48335.el", 4096), std::make_tuple("/pando/graphs/rmat_571919_seed1_scale13_nV8192_nE102016.el", 8192), - std::make_tuple("/pando/graphs/rmat_571919_seed1_scale14_nV16384_nE213350.el", 16384), + std::make_tuple("/pando/graphs/rmat_571919_seed1_scale14_nV16384_nE213350.el", 16384))); From 0b4fa5e1034edb02cff345831aa3524e6bca5ad4 Mon Sep 17 00:00:00 2001 From: jeageun Date: Thu, 4 Apr 2024 15:35:26 +0000 Subject: [PATCH 03/28] Early exit to reduce the time and pass timeout --- test/import/test_wmd_importer.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/import/test_wmd_importer.cpp b/test/import/test_wmd_importer.cpp index c446025c..55c21514 100644 --- a/test/import/test_wmd_importer.cpp +++ b/test/import/test_wmd_importer.cpp @@ -340,6 +340,7 @@ TEST_P(MirrorDLCSRInitEdgeList, initializeEL) { for (auto v = it.begin(); v != it.end(); v++) { if (*v == mirrorTopology) { found = true; + break; } } ASSERT_TRUE(found); @@ -349,6 +350,7 @@ TEST_P(MirrorDLCSRInitEdgeList, initializeEL) { if ((lift(elem, getMirror) == mirrorTopology) && (lift(elem, getMaster) == masterTopology)) { found = true; + break; } } ASSERT_TRUE(found); @@ -362,6 +364,7 @@ TEST_P(MirrorDLCSRInitEdgeList, initializeEL) { for (auto v = it.begin(); v != it.end(); v++) { if (*v == mirrorTopology) { found = true; + break; } } ASSERT_TRUE(found); From e687a66b39e6bc79c76bb122ae93e78f87f6c22f Mon Sep 17 00:00:00 2001 From: jeageun Date: Thu, 4 Apr 2024 16:48:17 +0000 Subject: [PATCH 04/28] Use hashtable instead of array during test --- test/import/test_wmd_importer.cpp | 50 ++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/test/import/test_wmd_importer.cpp b/test/import/test_wmd_importer.cpp index 55c21514..4af73e1c 100644 --- a/test/import/test_wmd_importer.cpp +++ b/test/import/test_wmd_importer.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -282,7 +283,6 @@ TEST_P(MirrorDLCSRInitEdgeList, initializeEL) { const std::string elFile = std::get<0>(GetParam()); const std::uint64_t numVertices = std::get<1>(GetParam()); - pando::Array filename; EXPECT_EQ(pando::Status::Success, filename.initialize(elFile.size())); for (uint64_t i = 0; i < elFile.size(); i++) @@ -299,6 +299,39 @@ TEST_P(MirrorDLCSRInitEdgeList, initializeEL) { // Iterate over vertices std::uint64_t vid = 0; + // Populate mirror_master_dictionary for faster lookup + std::uint64_t numHosts = static_cast(pando::getPlaceDims().node.id); + galois::WaitGroup wg; + PANDO_CHECK(wg.initialize(numHosts)); + auto wgh = wg.getHandle(); + + galois::HostLocalStorage< + galois::HashTable, pando::GlobalPtr>> + mirror_master_table; + + auto genMirrorMasterDict = + +[](Graph graph, + galois::HostLocalStorage< + galois::HashTable, pando::GlobalPtr>> + mirror_master_table, + galois::WaitGroup::HandleType wgh) { + auto _mirror_master_table = mirror_master_table.getLocalRef(); + auto mirror_master_array = graph.getLocalMirrorToRemoteMasterOrderedTable(); + PANDO_CHECK(fmap(_mirror_master_table, initialize, lift(mirror_master_array, size))); + for (auto elem : mirror_master_array) { + fmap(_mirror_master_table, put, lift(elem, getMirror), lift(elem, getMaster)); + } + mirror_master_table.getLocalRef() = _mirror_master_table; + wgh.done(); + }; + + for (std::uint64_t i = 0; i < numHosts; i++) { + pando::Place place = + pando::Place{pando::NodeIndex{static_cast(i)}, pando::anyPod, pando::anyCore}; + PANDO_CHECK(pando::executeOn(place, genMirrorMasterDict, graph, mirror_master_table, wgh)); + } + PANDO_CHECK(wg.wait()); + for (typename Graph::VertexTopologyID vert : graph.vertices()) { EXPECT_EQ(vid, graph.getVertexIndex(vert)); vid++; @@ -345,12 +378,10 @@ TEST_P(MirrorDLCSRInitEdgeList, initializeEL) { } ASSERT_TRUE(found); found = false; - auto mirror_master_array = graph.getLocalMirrorToRemoteMasterOrderedTable(); - for (auto elem : mirror_master_array) { - if ((lift(elem, getMirror) == mirrorTopology) && - (lift(elem, getMaster) == masterTopology)) { + pando::GlobalPtr val; + if (fmap(mirror_master_table.getLocalRef(), get, mirrorTopology, val)) { + if (val == masterTopology) { found = true; - break; } } ASSERT_TRUE(found); @@ -370,10 +401,9 @@ TEST_P(MirrorDLCSRInitEdgeList, initializeEL) { ASSERT_TRUE(found); found = false; // In mirror to master, this should never exist - auto mirror_master_array = graph.getLocalMirrorToRemoteMasterOrderedTable(); - for (auto elem : mirror_master_array) { - if ((lift(elem, getMirror) == mirrorTopology) || - (lift(elem, getMaster) == masterTopology)) { + pando::GlobalPtr val; + if (fmap(mirror_master_table.getLocalRef(), get, mirrorTopology, val)) { + if (val == masterTopology) { ASSERT_TRUE(false); } } From eb0eaa7ecd8caf00b58fda7adbc49e0d51f2d7c2 Mon Sep 17 00:00:00 2001 From: jeageun Date: Thu, 4 Apr 2024 18:43:44 +0000 Subject: [PATCH 05/28] Fix bug and performance issue on the test --- .../graphs/dist_local_csr.hpp | 1 + .../graphs/mirror_dist_local_csr.hpp | 1 + .../import/wmd_graph_importer.hpp | 1 + test/import/test_wmd_importer.cpp | 69 ++++--------------- 4 files changed, 16 insertions(+), 56 deletions(-) diff --git a/include/pando-lib-galois/graphs/dist_local_csr.hpp b/include/pando-lib-galois/graphs/dist_local_csr.hpp index c11bf046..7e32438c 100644 --- a/include/pando-lib-galois/graphs/dist_local_csr.hpp +++ b/include/pando-lib-galois/graphs/dist_local_csr.hpp @@ -777,6 +777,7 @@ class DistLocalCSR { PANDO_CHECK(pando::executeOn(place, createMirrors, partEdges, mirrorList, V2PM, i, wgh)); } PANDO_CHECK(wg.wait()); + wg.deinitialize(); return mirrorList; } diff --git a/include/pando-lib-galois/graphs/mirror_dist_local_csr.hpp b/include/pando-lib-galois/graphs/mirror_dist_local_csr.hpp index d1216922..4c86db1d 100644 --- a/include/pando-lib-galois/graphs/mirror_dist_local_csr.hpp +++ b/include/pando-lib-galois/graphs/mirror_dist_local_csr.hpp @@ -390,6 +390,7 @@ class MirrorDistLocalCSR { numVertices += lift(mirrorList[i], size); } PANDO_CHECK(wg.wait()); + wg.deinitialize(); return pando::Status::Success; } diff --git a/include/pando-lib-galois/import/wmd_graph_importer.hpp b/include/pando-lib-galois/import/wmd_graph_importer.hpp index d6078687..75742717 100644 --- a/include/pando-lib-galois/import/wmd_graph_importer.hpp +++ b/include/pando-lib-galois/import/wmd_graph_importer.hpp @@ -125,6 +125,7 @@ void buildEdgeCountToSend( })); PANDO_CHECK(wg.wait()); labeledEdgeCounts = sumArray; + wg.deinitialize(); } [[nodiscard]] pando::Expected< diff --git a/test/import/test_wmd_importer.cpp b/test/import/test_wmd_importer.cpp index 4af73e1c..dfbe2702 100644 --- a/test/import/test_wmd_importer.cpp +++ b/test/import/test_wmd_importer.cpp @@ -299,39 +299,6 @@ TEST_P(MirrorDLCSRInitEdgeList, initializeEL) { // Iterate over vertices std::uint64_t vid = 0; - // Populate mirror_master_dictionary for faster lookup - std::uint64_t numHosts = static_cast(pando::getPlaceDims().node.id); - galois::WaitGroup wg; - PANDO_CHECK(wg.initialize(numHosts)); - auto wgh = wg.getHandle(); - - galois::HostLocalStorage< - galois::HashTable, pando::GlobalPtr>> - mirror_master_table; - - auto genMirrorMasterDict = - +[](Graph graph, - galois::HostLocalStorage< - galois::HashTable, pando::GlobalPtr>> - mirror_master_table, - galois::WaitGroup::HandleType wgh) { - auto _mirror_master_table = mirror_master_table.getLocalRef(); - auto mirror_master_array = graph.getLocalMirrorToRemoteMasterOrderedTable(); - PANDO_CHECK(fmap(_mirror_master_table, initialize, lift(mirror_master_array, size))); - for (auto elem : mirror_master_array) { - fmap(_mirror_master_table, put, lift(elem, getMirror), lift(elem, getMaster)); - } - mirror_master_table.getLocalRef() = _mirror_master_table; - wgh.done(); - }; - - for (std::uint64_t i = 0; i < numHosts; i++) { - pando::Place place = - pando::Place{pando::NodeIndex{static_cast(i)}, pando::anyPod, pando::anyCore}; - PANDO_CHECK(pando::executeOn(place, genMirrorMasterDict, graph, mirror_master_table, wgh)); - } - PANDO_CHECK(wg.wait()); - for (typename Graph::VertexTopologyID vert : graph.vertices()) { EXPECT_EQ(vid, graph.getVertexIndex(vert)); vid++; @@ -370,40 +337,28 @@ TEST_P(MirrorDLCSRInitEdgeList, initializeEL) { bool found = false; // Mirror must exist in mirror range. auto it = graph.getMirrorRange(); - for (auto v = it.begin(); v != it.end(); v++) { - if (*v == mirrorTopology) { + ASSERT_TRUE(*it.begin() <= mirrorTopology && mirrorTopology < *it.end()); + auto mirror_master_array = graph.getLocalMirrorToRemoteMasterOrderedTable(); + for (auto elem : mirror_master_array) { + if ((lift(elem, getMirror) == mirrorTopology) && + (lift(elem, getMaster) == masterTopology)) { found = true; break; } } ASSERT_TRUE(found); - found = false; - pando::GlobalPtr val; - if (fmap(mirror_master_table.getLocalRef(), get, mirrorTopology, val)) { - if (val == masterTopology) { - found = true; - } - } - ASSERT_TRUE(found); } else { // If I don't have mirror, that could be because it is in local, or never be a destination // from me. - if (graph.isLocal(mirrorTopology)) { + if (graph.isLocal(masterTopology)) { // If it is from me, it is in my master range. - bool found = false; auto it = graph.getMasterRange(); - for (auto v = it.begin(); v != it.end(); v++) { - if (*v == mirrorTopology) { - found = true; - break; - } - } - ASSERT_TRUE(found); - found = false; + ASSERT_TRUE(*it.begin() <= masterTopology && masterTopology < *it.end()); // In mirror to master, this should never exist - pando::GlobalPtr val; - if (fmap(mirror_master_table.getLocalRef(), get, mirrorTopology, val)) { - if (val == masterTopology) { + auto mirror_master_array = graph.getLocalMirrorToRemoteMasterOrderedTable(); + for (auto elem : mirror_master_array) { + if ((lift(elem, getMirror) == mirrorTopology) || + (lift(elem, getMaster) == masterTopology)) { ASSERT_TRUE(false); } } @@ -422,6 +377,8 @@ TEST_P(MirrorDLCSRInitEdgeList, initializeEL) { } } graph.deinitialize(); + + // wg.deinitialize(); } INSTANTIATE_TEST_SUITE_P( From aeb3aa2beab489893cbc225520349ce0e7dac24c Mon Sep 17 00:00:00 2001 From: jeageun Date: Thu, 4 Apr 2024 19:33:30 +0000 Subject: [PATCH 06/28] Increase Hostlocalstorage heap size --- include/pando-lib-galois/containers/host_local_storage.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/pando-lib-galois/containers/host_local_storage.hpp b/include/pando-lib-galois/containers/host_local_storage.hpp index 41f20ecc..629c724f 100644 --- a/include/pando-lib-galois/containers/host_local_storage.hpp +++ b/include/pando-lib-galois/containers/host_local_storage.hpp @@ -18,7 +18,7 @@ namespace galois { namespace HostLocalStorageHeap { -constexpr std::uint64_t Size = 1 << 10; +constexpr std::uint64_t Size = 1 << 20; constexpr std::uint64_t Granule = 128; struct ModestArray { std::byte arr[Size]; From c4ac8b05eb2745052a9cb7cf6fc3d9d217e245b4 Mon Sep 17 00:00:00 2001 From: jeageun Date: Thu, 4 Apr 2024 21:50:04 +0000 Subject: [PATCH 07/28] Remove heavy comparison part --- test/import/test_wmd_importer.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/test/import/test_wmd_importer.cpp b/test/import/test_wmd_importer.cpp index dfbe2702..c13bdafd 100644 --- a/test/import/test_wmd_importer.cpp +++ b/test/import/test_wmd_importer.cpp @@ -334,10 +334,11 @@ TEST_P(MirrorDLCSRInitEdgeList, initializeEL) { // It means current one have mirror. Mirror is local, but master is not. ASSERT_TRUE(graph.isLocal(mirrorTopology)); ASSERT_TRUE(!graph.isLocal(masterTopology)); - bool found = false; - // Mirror must exist in mirror range. + // bool found = false; + // Mirror must exist in mirror range. auto it = graph.getMirrorRange(); ASSERT_TRUE(*it.begin() <= mirrorTopology && mirrorTopology < *it.end()); + /* auto mirror_master_array = graph.getLocalMirrorToRemoteMasterOrderedTable(); for (auto elem : mirror_master_array) { if ((lift(elem, getMirror) == mirrorTopology) && @@ -347,6 +348,7 @@ TEST_P(MirrorDLCSRInitEdgeList, initializeEL) { } } ASSERT_TRUE(found); + */ } else { // If I don't have mirror, that could be because it is in local, or never be a destination // from me. @@ -355,6 +357,8 @@ TEST_P(MirrorDLCSRInitEdgeList, initializeEL) { auto it = graph.getMasterRange(); ASSERT_TRUE(*it.begin() <= masterTopology && masterTopology < *it.end()); // In mirror to master, this should never exist + // TODO(Jeageun): This part will be done by output file scan... + /* auto mirror_master_array = graph.getLocalMirrorToRemoteMasterOrderedTable(); for (auto elem : mirror_master_array) { if ((lift(elem, getMirror) == mirrorTopology) || @@ -362,6 +366,7 @@ TEST_P(MirrorDLCSRInitEdgeList, initializeEL) { ASSERT_TRUE(false); } } + */ } } From ebb23d575772c465f8b00a64d59c2ee3383d9319 Mon Sep 17 00:00:00 2001 From: jeageun Date: Thu, 4 Apr 2024 23:58:21 +0000 Subject: [PATCH 08/28] Split tabledata validation and relation validation --- cmake/PANDOTesting.cmake | 33 +++++++++ scripts/mirror_master_validate.py | 27 +++++++ test/CMakeLists.txt | 2 + test/import/test_wmd_importer.cpp | 26 +------ test/test_mirror_master_table.cpp | 118 ++++++++++++++++++++++++++++++ 5 files changed, 181 insertions(+), 25 deletions(-) create mode 100644 scripts/mirror_master_validate.py create mode 100644 test/test_mirror_master_table.cpp diff --git a/cmake/PANDOTesting.cmake b/cmake/PANDOTesting.cmake index 48ca8497..0f8b78c0 100644 --- a/cmake/PANDOTesting.cmake +++ b/cmake/PANDOTesting.cmake @@ -148,3 +148,36 @@ function(pando_add_bin_test TARGET ARGS INPUTFILE OKFILE) endif() endfunction() + +function(pando_add_bin_python_test TARGET ARGS INPUTFILE) + if (NOT PANDO_RT_BACKEND STREQUAL "DRVX") + if (${GASNet_CONDUIT} STREQUAL "smp") + set(DRIVER_SCRIPT ${PROJECT_SOURCE_DIR}/pando-rt/scripts/preprun.sh) + elseif (${GASNet_CONDUIT} STREQUAL "mpi") + set(DRIVER_SCRIPT ${PROJECT_SOURCE_DIR}/pando-rt/scripts/preprun_mpi.sh) + else () + message(FATAL_ERROR "No runner script for GASNet conduit ${GASNet_CONDUIT}") + endif () + + set(NUM_PXNS 2) + set(NUM_CORES 4) + + add_test(NAME ${TARGET}-${INPUTFILE}-pythonvalidate + COMMAND bash -c "${DRIVER_SCRIPT} -n ${NUM_PXNS} -c ${NUM_CORES} ${CMAKE_CURRENT_BINARY_DIR}/${TARGET} ${ARGS} ${INPUTFILE} | python3 ${PROJECT_SOURCE_DIR}/scripts/mirror_master_validate.py") + + else() + + set(DRIVER_SCRIPT ${PROJECT_SOURCE_DIR}/scripts/run-drv.sh) + + set(NUM_PXNS 2) + set(NUM_CORES 4) + set(NUM_HTHREADS 8) + + get_filename_component(FNAME ${TARGET} NAME) + + add_test(NAME ${TARGET}-${INPUTFILE}-pythonvalidate + COMMAND bash -c "LAUNCH_DIR=${CMAKE_SOURCE_DIR} ${DRIVER_SCRIPT} -p ${NUM_HTHREADS} -n ${NUM_PXNS} -c ${NUM_CORES} \ + ${CMAKE_CURRENT_BINARY_DIR}/lib${FNAME}.so ${ARGS} ${INPUTFILE} | python3 ${PROJECT_SOURCE_DIR}/scripts/mirror_master_validate.py") + + endif() +endfunction() diff --git a/scripts/mirror_master_validate.py b/scripts/mirror_master_validate.py new file mode 100644 index 00000000..8e8b71a3 --- /dev/null +++ b/scripts/mirror_master_validate.py @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +import sys + +def tablevalidation(): + dic = {} + for line in sys.stdin: + parts = line.strip().split(', ') + if len(parts) < 3: + continue + + operation, key, value = parts + + if operation == "SET": + dic[key] = value + elif operation == "FALSE": + if key in dic and dic[key] == value: + sys.exit(1) + elif operation == "TRUE": + if key not in dic or dic[key] != value: + sys.exit(1) + return "PASS" + +# Call the function to process input from stdin +result = tablevalidation() +print(result) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 41e5f080..398f57c1 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -28,11 +28,13 @@ add_subdirectory(utility) pando_add_exec(import_csrlist import_csrlist.cpp) pando_add_exec(import_dirOptCsrList import_dirOptCsrList.cpp) pando_add_exec(import_ifstream import_ifstream.cpp) +pando_add_exec(mirror_master_table test_mirror_master_table.cpp) pando_add_bin_test(import_dirOptCsrList "-n 10 -f" ${pando-lib-galois_SOURCE_DIR}/graphs/repeats.el ${pando-lib-galois_SOURCE_DIR}/ok/repeats.el-import-dirOptCsrList-10.ok) if (NOT PANDO_RT_BACKEND STREQUAL "DRVX") # for speed reasons +pando_add_bin_python_test(mirror_master_table "-n 1024 -f" ${pando-lib-galois_SOURCE_DIR}/graphs/rmat_571919_seed1_scale10_nV1024_nE10447.el) pando_add_bin_test(import_dirOptCsrList "-n 1024 -f" ${pando-lib-galois_SOURCE_DIR}/graphs/rmat_571919_seed1_scale10_nV1024_nE10447.el ${pando-lib-galois_SOURCE_DIR}/ok/rmat_571919_seed1_scale10_nV1024_nE10447.el-import-dirOptCsrList-1024.ok) diff --git a/test/import/test_wmd_importer.cpp b/test/import/test_wmd_importer.cpp index c13bdafd..9a5d80e2 100644 --- a/test/import/test_wmd_importer.cpp +++ b/test/import/test_wmd_importer.cpp @@ -334,21 +334,9 @@ TEST_P(MirrorDLCSRInitEdgeList, initializeEL) { // It means current one have mirror. Mirror is local, but master is not. ASSERT_TRUE(graph.isLocal(mirrorTopology)); ASSERT_TRUE(!graph.isLocal(masterTopology)); - // bool found = false; - // Mirror must exist in mirror range. + // Mirror must exist in mirror range. auto it = graph.getMirrorRange(); ASSERT_TRUE(*it.begin() <= mirrorTopology && mirrorTopology < *it.end()); - /* - auto mirror_master_array = graph.getLocalMirrorToRemoteMasterOrderedTable(); - for (auto elem : mirror_master_array) { - if ((lift(elem, getMirror) == mirrorTopology) && - (lift(elem, getMaster) == masterTopology)) { - found = true; - break; - } - } - ASSERT_TRUE(found); - */ } else { // If I don't have mirror, that could be because it is in local, or never be a destination // from me. @@ -357,16 +345,6 @@ TEST_P(MirrorDLCSRInitEdgeList, initializeEL) { auto it = graph.getMasterRange(); ASSERT_TRUE(*it.begin() <= masterTopology && masterTopology < *it.end()); // In mirror to master, this should never exist - // TODO(Jeageun): This part will be done by output file scan... - /* - auto mirror_master_array = graph.getLocalMirrorToRemoteMasterOrderedTable(); - for (auto elem : mirror_master_array) { - if ((lift(elem, getMirror) == mirrorTopology) || - (lift(elem, getMaster) == masterTopology)) { - ASSERT_TRUE(false); - } - } - */ } } @@ -382,8 +360,6 @@ TEST_P(MirrorDLCSRInitEdgeList, initializeEL) { } } graph.deinitialize(); - - // wg.deinitialize(); } INSTANTIATE_TEST_SUITE_P( diff --git a/test/test_mirror_master_table.cpp b/test/test_mirror_master_table.cpp new file mode 100644 index 00000000..c3f51ba6 --- /dev/null +++ b/test/test_mirror_master_table.cpp @@ -0,0 +1,118 @@ + +// SPDX-License-Identifier: MIT +// Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void printUsageExit(char* argv0) { + std::cerr << "Usage: " << argv0 << " -n numVertices -f filepath" << std::endl; + std::exit(EXIT_FAILURE); +} + +template +using GV = pando::GlobalPtr>; + +template +using V = pando::Vector; + +template +using G = pando::GlobalPtr; +void runTest(const char* elFile, std::uint64_t numVertices); + +int pandoMain(int argc, char** argv) { + std::uint64_t numVertices = 0; + char* filepath = nullptr; + optind = 0; + int opt; + + while ((opt = getopt(argc, argv, "n:f:")) != -1) { + switch (opt) { + case 'n': + numVertices = strtoull(optarg, nullptr, 10); + break; + case 'f': + filepath = optarg; + break; + default: + printUsageExit(argv[0]); + } + } + if (numVertices == 0) { + printUsageExit(argv[0]); + } + if (filepath == nullptr) { + printUsageExit(argv[0]); + } + runTest(filepath, numVertices); + return 0; +} + +void runTest(const char* elFile, std::uint64_t numVertices) { + using ET = galois::ELEdge; + using VT = galois::ELVertex; + using Graph = galois::MirrorDistLocalCSR; + galois::HostLocalStorageHeap::HeapInit(); + pando::Array filename; + std::size_t length = strlen(elFile); + PANDO_CHECK(filename.initialize(length + 1)); + + for (std::size_t i = 0; i < length; i++) { + filename[i] = elFile[i]; + } + filename[length] = '\0'; // Ensure the string is null-terminated + + Graph graph = + galois::initializeELDLCSR(filename, numVertices); + + if (pando::getCurrentPlace().node.id == 0) { + // Iterate over vertices + std::uint64_t vid = 0; + auto mirror_master_array = graph.getLocalMirrorToRemoteMasterOrderedTable(); + for (auto elem : mirror_master_array) { + std::cout << "SET, " << lift(elem, getMirror).address << ", " << lift(elem, getMaster).address + << std::endl; + } + + for (typename Graph::VertexTopologyID vert : graph.vertices()) { + vid++; + for (typename Graph::EdgeHandle eh : graph.edges(vert)) { + typename Graph::VertexTokenID dstTok = graph.getTokenID(graph.getEdgeDst(eh)); + + auto mirrorTopology = graph.getTopologyID(dstTok); + auto masterTopology = graph.getGlobalTopologyID(dstTok); + if (mirrorTopology != masterTopology) { + // If global, and local have different value. + // It means current one have mirror. Mirror is local, but master is not. + std::cout << "TRUE, " << mirrorTopology.address << ", " << masterTopology.address + << std::endl; + } else { + // If I don't have mirror, that could be because it is in local, or never be a destination + // from me. + if (graph.isLocal(masterTopology)) { + // If it is from me, it is in my master range. + std::cout << "FALSE, " << mirrorTopology.address << ", " << masterTopology.address + << std::endl; + } + } + } + } + } + pando::waitAll(); + graph.deinitialize(); +} From aa8b24a961b28da7f37447030a618aad859234c0 Mon Sep 17 00:00:00 2001 From: jeageun Date: Fri, 5 Apr 2024 00:08:10 +0000 Subject: [PATCH 09/28] Remove the reason for timeout. After discussion with Ying-wei, verify that he don't use it --- test/sync/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/sync/CMakeLists.txt b/test/sync/CMakeLists.txt index f2650522..e0121016 100644 --- a/test/sync/CMakeLists.txt +++ b/test/sync/CMakeLists.txt @@ -1,7 +1,7 @@ # SPDX-License-Identifier: MIT # Copyright (c) 2023. University of Texas at Austin. All rights reserved. -pando_add_driver_test(test_simple_lock test_simple_lock.cpp) +# pando_add_driver_test(test_simple_lock test_simple_lock.cpp) pando_add_driver_test(test_global_barrier test_global_barrier.cpp) pando_add_driver_test(test_wait_group test_wait_group.cpp) pando_add_driver_test(test_atomic test_atomic.cpp) From 7be744cd28dbc22b50a1d3a2a7604034b3a0f734 Mon Sep 17 00:00:00 2001 From: jeageun Date: Fri, 5 Apr 2024 00:20:49 +0000 Subject: [PATCH 10/28] Revert changes --- test/CMakeLists.txt | 3 +++ test/sync/CMakeLists.txt | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 398f57c1..9db3f07d 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -32,6 +32,9 @@ pando_add_exec(mirror_master_table test_mirror_master_table.cpp) pando_add_bin_test(import_dirOptCsrList "-n 10 -f" ${pando-lib-galois_SOURCE_DIR}/graphs/repeats.el ${pando-lib-galois_SOURCE_DIR}/ok/repeats.el-import-dirOptCsrList-10.ok) +pando_add_bin_python_test(mirror_master_table "-n 10 -f" ${pando-lib-galois_SOURCE_DIR}/graphs/repeats.el) +pando_add_bin_python_test(mirror_master_table "-n 10 -f" ${pando-lib-galois_SOURCE_DIR}/graphs/simple.el) + if (NOT PANDO_RT_BACKEND STREQUAL "DRVX") # for speed reasons pando_add_bin_python_test(mirror_master_table "-n 1024 -f" ${pando-lib-galois_SOURCE_DIR}/graphs/rmat_571919_seed1_scale10_nV1024_nE10447.el) diff --git a/test/sync/CMakeLists.txt b/test/sync/CMakeLists.txt index e0121016..f2650522 100644 --- a/test/sync/CMakeLists.txt +++ b/test/sync/CMakeLists.txt @@ -1,7 +1,7 @@ # SPDX-License-Identifier: MIT # Copyright (c) 2023. University of Texas at Austin. All rights reserved. -# pando_add_driver_test(test_simple_lock test_simple_lock.cpp) +pando_add_driver_test(test_simple_lock test_simple_lock.cpp) pando_add_driver_test(test_global_barrier test_global_barrier.cpp) pando_add_driver_test(test_wait_group test_wait_group.cpp) pando_add_driver_test(test_atomic test_atomic.cpp) From 03ce181443ed98100b94108451d535aa47dbc85c Mon Sep 17 00:00:00 2001 From: jeageun Date: Fri, 5 Apr 2024 15:52:24 +0000 Subject: [PATCH 11/28] getLocal to getLocalRef --- include/pando-lib-galois/graphs/dist_local_csr.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/pando-lib-galois/graphs/dist_local_csr.hpp b/include/pando-lib-galois/graphs/dist_local_csr.hpp index 7e32438c..79e4a789 100644 --- a/include/pando-lib-galois/graphs/dist_local_csr.hpp +++ b/include/pando-lib-galois/graphs/dist_local_csr.hpp @@ -490,7 +490,7 @@ class DistLocalCSR { /** Host Information **/ std::uint64_t getPhysicalHostID(VertexTokenID tid) { std::uint64_t virtualHostID = tid % this->numVHosts(); - std::uint64_t physicalHost = fmap(virtualToPhysicalMap.getLocal(), get, virtualHostID); + std::uint64_t physicalHost = fmap(virtualToPhysicalMap.getLocalRef(), get, virtualHostID); return physicalHost; } @@ -831,7 +831,7 @@ class DistLocalCSR { +[](galois::HostIndexedMap>> partEdges, pando::GlobalRef> pHV) { PANDO_CHECK(fmap(pHV, initialize, 0)); - pando::Vector> localEdges = partEdges.getLocal(); + pando::Vector> localEdges = partEdges.getLocalRef(); for (pando::Vector e : localEdges) { EdgeType e0 = e[0]; VertexType v0 = VertexType(e0.src, e0.srcType); @@ -956,7 +956,7 @@ class DistLocalCSR { galois::PerThreadVector edgeCounts) { numVertices = vertices.sizeAll(); numEdges = edges.sizeAll(); - pando::Array oldV2PM = oldGraph.virtualToPhysicalMap.getLocal(); + pando::Array oldV2PM = oldGraph.virtualToPhysicalMap.getLocalRef(); pando::Array v2PM; PANDO_CHECK_RETURN(v2PM.initialize(oldV2PM.size())); for (uint64_t i = 0; i < oldV2PM.size(); i++) { @@ -1003,7 +1003,7 @@ class DistLocalCSR { } currentCSR.vertexEdgeOffsets[currLocalVertex] = Vertex{¤tCSR.edgeDestinations[currLocalEdge]}; - state.arrayOfCSRs.getLocal() = currentCSR; + state.arrayOfCSRs.getLocalRef() = currentCSR; }); arrayOfCSRs = state.arrayOfCSRs; @@ -1030,7 +1030,7 @@ class DistLocalCSR { data); currLocalEdge++; } - state.dlcsr.arrayOfCSRs.getLocal() = currentCSR; + state.dlcsr.arrayOfCSRs.getLocalRef() = currentCSR; }); *this = state2.dlcsr; From ddb2aa3f54c49da4c664373e5c02ce6f07227f89 Mon Sep 17 00:00:00 2001 From: jeageun Date: Fri, 5 Apr 2024 16:15:14 +0000 Subject: [PATCH 12/28] Add waitall --- test/import/test_wmd_importer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/test/import/test_wmd_importer.cpp b/test/import/test_wmd_importer.cpp index 9a5d80e2..203ec781 100644 --- a/test/import/test_wmd_importer.cpp +++ b/test/import/test_wmd_importer.cpp @@ -359,6 +359,7 @@ TEST_P(MirrorDLCSRInitEdgeList, initializeEL) { EXPECT_EQ(eData.dst, numVertices); } } + pando::waitAll(); graph.deinitialize(); } From cc26df40928cfcd3df8ffe7d45423054a16547d3 Mon Sep 17 00:00:00 2001 From: jeageun Date: Fri, 5 Apr 2024 17:01:50 +0000 Subject: [PATCH 13/28] Revert "Add waitall" This reverts commit ddb2aa3f54c49da4c664373e5c02ce6f07227f89. --- test/import/test_wmd_importer.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/test/import/test_wmd_importer.cpp b/test/import/test_wmd_importer.cpp index 203ec781..9a5d80e2 100644 --- a/test/import/test_wmd_importer.cpp +++ b/test/import/test_wmd_importer.cpp @@ -359,7 +359,6 @@ TEST_P(MirrorDLCSRInitEdgeList, initializeEL) { EXPECT_EQ(eData.dst, numVertices); } } - pando::waitAll(); graph.deinitialize(); } From 480ac63dcd4f1f7db1079b73f0109cf72795db01 Mon Sep 17 00:00:00 2001 From: jeageun Date: Fri, 5 Apr 2024 17:02:00 +0000 Subject: [PATCH 14/28] Revert "getLocal to getLocalRef" This reverts commit 03ce181443ed98100b94108451d535aa47dbc85c. --- include/pando-lib-galois/graphs/dist_local_csr.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/pando-lib-galois/graphs/dist_local_csr.hpp b/include/pando-lib-galois/graphs/dist_local_csr.hpp index 79e4a789..7e32438c 100644 --- a/include/pando-lib-galois/graphs/dist_local_csr.hpp +++ b/include/pando-lib-galois/graphs/dist_local_csr.hpp @@ -490,7 +490,7 @@ class DistLocalCSR { /** Host Information **/ std::uint64_t getPhysicalHostID(VertexTokenID tid) { std::uint64_t virtualHostID = tid % this->numVHosts(); - std::uint64_t physicalHost = fmap(virtualToPhysicalMap.getLocalRef(), get, virtualHostID); + std::uint64_t physicalHost = fmap(virtualToPhysicalMap.getLocal(), get, virtualHostID); return physicalHost; } @@ -831,7 +831,7 @@ class DistLocalCSR { +[](galois::HostIndexedMap>> partEdges, pando::GlobalRef> pHV) { PANDO_CHECK(fmap(pHV, initialize, 0)); - pando::Vector> localEdges = partEdges.getLocalRef(); + pando::Vector> localEdges = partEdges.getLocal(); for (pando::Vector e : localEdges) { EdgeType e0 = e[0]; VertexType v0 = VertexType(e0.src, e0.srcType); @@ -956,7 +956,7 @@ class DistLocalCSR { galois::PerThreadVector edgeCounts) { numVertices = vertices.sizeAll(); numEdges = edges.sizeAll(); - pando::Array oldV2PM = oldGraph.virtualToPhysicalMap.getLocalRef(); + pando::Array oldV2PM = oldGraph.virtualToPhysicalMap.getLocal(); pando::Array v2PM; PANDO_CHECK_RETURN(v2PM.initialize(oldV2PM.size())); for (uint64_t i = 0; i < oldV2PM.size(); i++) { @@ -1003,7 +1003,7 @@ class DistLocalCSR { } currentCSR.vertexEdgeOffsets[currLocalVertex] = Vertex{¤tCSR.edgeDestinations[currLocalEdge]}; - state.arrayOfCSRs.getLocalRef() = currentCSR; + state.arrayOfCSRs.getLocal() = currentCSR; }); arrayOfCSRs = state.arrayOfCSRs; @@ -1030,7 +1030,7 @@ class DistLocalCSR { data); currLocalEdge++; } - state.dlcsr.arrayOfCSRs.getLocalRef() = currentCSR; + state.dlcsr.arrayOfCSRs.getLocal() = currentCSR; }); *this = state2.dlcsr; From 58ed4e37714b32d0f8c63af705817951f0f900de Mon Sep 17 00:00:00 2001 From: jeageun Date: Fri, 5 Apr 2024 17:41:20 +0000 Subject: [PATCH 15/28] remove get function --- include/pando-lib-galois/graphs/dist_local_csr.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/pando-lib-galois/graphs/dist_local_csr.hpp b/include/pando-lib-galois/graphs/dist_local_csr.hpp index 7e32438c..a52ee4de 100644 --- a/include/pando-lib-galois/graphs/dist_local_csr.hpp +++ b/include/pando-lib-galois/graphs/dist_local_csr.hpp @@ -490,7 +490,7 @@ class DistLocalCSR { /** Host Information **/ std::uint64_t getPhysicalHostID(VertexTokenID tid) { std::uint64_t virtualHostID = tid % this->numVHosts(); - std::uint64_t physicalHost = fmap(virtualToPhysicalMap.getLocal(), get, virtualHostID); + std::uint64_t physicalHost = virtualToPhysicalMap.getLocal()[virtualHostID]; return physicalHost; } From 0b3fc6b0a12ce07d79672949fde536ef698be2b3 Mon Sep 17 00:00:00 2001 From: jeageun Date: Fri, 5 Apr 2024 21:49:09 +0000 Subject: [PATCH 16/28] Force to have 3 things to load in validation on python --- scripts/mirror_master_validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/mirror_master_validate.py b/scripts/mirror_master_validate.py index 8e8b71a3..7324a6df 100644 --- a/scripts/mirror_master_validate.py +++ b/scripts/mirror_master_validate.py @@ -7,7 +7,7 @@ def tablevalidation(): dic = {} for line in sys.stdin: parts = line.strip().split(', ') - if len(parts) < 3: + if len(parts) != 3: continue operation, key, value = parts From d6cfca724fe6ec7f70f176c69f938143eac0fe3a Mon Sep 17 00:00:00 2001 From: AdityaAtulTewari Date: Thu, 4 Apr 2024 15:18:29 -0700 Subject: [PATCH 17/28] Thread Local Vectors and Host Cached Arrays (#16) * Stash things * Added HostLocalArray * Almost there * Assign almost done * It compiles * Finally all testing ready * These tests were using fmap wrong * Fixed further test errors * Fixed a few more test errors --- .../containers/host_cached_array.hpp | 341 +++++++++++++++ .../containers/per_thread.hpp | 2 +- .../containers/thread_local_storage.hpp | 24 +- .../containers/thread_local_vector.hpp | 407 ++++++++++++++++++ .../import/wmd_graph_importer.hpp | 4 +- .../pando-lib-galois/utility/gptr_monad.hpp | 51 ++- .../pando-lib-galois/utility/prefix_sum.hpp | 27 +- test/containers/CMakeLists.txt | 2 + test/containers/test_host_cached_array.cpp | 277 ++++++++++++ test/containers/test_per_thread.cpp | 52 ++- test/containers/test_thread_local_vector.cpp | 402 +++++++++++++++++ test/utility/test_prefix_sum.cpp | 6 +- 12 files changed, 1518 insertions(+), 77 deletions(-) create mode 100644 include/pando-lib-galois/containers/host_cached_array.hpp create mode 100644 include/pando-lib-galois/containers/thread_local_vector.hpp create mode 100644 test/containers/test_host_cached_array.cpp create mode 100644 test/containers/test_thread_local_vector.cpp diff --git a/include/pando-lib-galois/containers/host_cached_array.hpp b/include/pando-lib-galois/containers/host_cached_array.hpp new file mode 100644 index 00000000..894cf66a --- /dev/null +++ b/include/pando-lib-galois/containers/host_cached_array.hpp @@ -0,0 +1,341 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +#ifndef PANDO_LIB_GALOIS_CONTAINERS_HOST_CACHED_ARRAY_HPP_ +#define PANDO_LIB_GALOIS_CONTAINERS_HOST_CACHED_ARRAY_HPP_ + +#include +#include +#include +#include + +#include "pando-rt/export.h" +#include +#include +#include +#include +#include +#include +#include + +namespace galois { + +template +class HostCachedArrayIterator; + +/** + * @brief This is an array like container that has an array on each host */ +template +class HostCachedArray { +public: + HostCachedArray() noexcept = default; + + HostCachedArray(const HostCachedArray&) = default; + HostCachedArray(HostCachedArray&&) = default; + + ~HostCachedArray() = default; + + HostCachedArray& operator=(const HostCachedArray&) = default; + HostCachedArray& operator=(HostCachedArray&&) = default; + + using iterator = HostCachedArrayIterator; + using reverse_iterator = std::reverse_iterator; + + /** + * @brief Takes in iterators with semantics like memoryType and a size to initialize the sizes of + * the objects + * + * @tparam It the iterator type + * @param[in] beg The beginning of the iterator to memoryType like objects + * @param[in] end The end of the iterator to memoryType like objects + * @param[in] size The size of the data to encapsulate in this abstraction + */ + template + [[nodiscard]] pando::Status initialize(Range range) { + assert(range.size() == m_data.size()); + size_ = 0; + PANDO_CHECK_RETURN(m_data.initialize()); + PANDO_CHECK_RETURN(galois::doAll( + range, m_data, + +[](Range range, pando::GlobalRef>> data) { + PANDO_CHECK(lift(data, initialize)); + auto ref = lift(data, getLocalRef); + PANDO_CHECK(fmap( + ref, initialize, + *(range.begin() + static_cast(pando::getCurrentPlace().node.id)))); + })); + PANDO_CHECK_RETURN(galois::doAll( + m_data, m_data, + +[](decltype(m_data) complete, galois::HostIndexedMap> data) { + for (std::uint64_t i = 0; i < data.size(); i++) { + data[i] = fmap(complete[i], operator[], i); + } + })); + for (std::uint64_t i = 0; i < m_data.size(); i++) { + auto ref = fmap(m_data[i], operator[], i); + size_ += lift(ref, size); + } + return pando::Status::Success; + } + + void deinitialize() { + PANDO_CHECK(galois::doAll( + m_data, +[](galois::HostIndexedMap> data) { + const std::uint64_t i = static_cast(pando::getCurrentPlace().node.id); + auto ref = data[i]; + liftVoid(ref, deinitialize); + liftVoid(data, deinitialize); + })); + m_data.deinitialize(); + } + + /** + * @brief Returns a pointer to the given index within a specific host + * @warning this is unsafe + */ + pando::GlobalPtr getSpecific(std::uint64_t host, std::uint64_t localIdx) noexcept { + HostIndexedMap> cache = m_data.getLocalRef(); + return &fmap(cache[host], get, localIdx); + } + + /** + * @brief Returns a pointer to the given index within a specific host + * @warning this is unsafe + */ + pando::GlobalRef getSpecificRef(std::uint64_t host, std::uint64_t localIdx) noexcept { + return *this->getSpecific(host, localIdx); + } + + /** + * @brief Returns a pointer to the given index + */ + pando::GlobalPtr get(std::uint64_t i) const noexcept { + HostIndexedMap> cache = m_data.getLocalRef(); + auto curr = cache.begin(); + for (; curr != cache.end(); curr++) { + auto size = lift(*curr, size); + if (i < size) { + break; + } + i -= size; + } + if (curr == cache.end()) + return nullptr; + return &fmap(*curr, get, i); + } + + /** + * @brief Returns a pointer to the given index + */ + pando::GlobalPtr get(std::uint64_t i) noexcept { + HostIndexedMap> cache = m_data.getLocalRef(); + auto curr = cache.begin(); + for (; curr != cache.end(); curr++) { + auto size = lift(*curr, size); + if (i < size) { + break; + } + i -= size; + } + if (curr == cache.end()) + return nullptr; + pando::GlobalRef> arr = *curr; + return &fmap(*curr, get, i); + } + + constexpr pando::GlobalRef operator[](std::uint64_t pos) noexcept { + return *this->get(pos); + } + + constexpr pando::GlobalRef operator[](std::uint64_t pos) const noexcept { + return *this->get(pos); + } + + constexpr bool empty() const noexcept { + return this->size() == 0; + } + + constexpr std::uint64_t size() noexcept { + return size_; + } + + constexpr std::uint64_t size() const noexcept { + return size_; + } + + constexpr std::uint64_t capacity() noexcept { + return size(); + } + + iterator begin() noexcept { + return iterator(*this, 0); + } + + iterator begin() const noexcept { + return iterator(*this, 0); + } + + iterator end() noexcept { + return iterator(*this, size_); + } + + iterator end() const noexcept { + return iterator(*this, size_); + } + + /** + * @brief reverse iterator to the first element + */ + reverse_iterator rbegin() noexcept { + return reverse_iterator(end()--); + } + + /** + * @copydoc rbegin() + */ + reverse_iterator rbegin() const noexcept { + return reverse_iterator(end()--); + } + + /** + * @brief reverse iterator to the last element + */ + reverse_iterator rend() noexcept { + return reverse_iterator(begin()--); + } + + /** + * @copydoc rend() + */ + reverse_iterator rend() const noexcept { + return reverse_iterator(begin()--); + } + + friend bool operator==(const HostCachedArray& a, const HostCachedArray& b) { + return a.size() == b.size() && a.m_data.getLocal() == b.m_data.getLocal(); + } + +private: + /// @brief The data structure storing the data this stores a cache once constructed + galois::HostLocalStorage>> m_data; + /// @brief Stores the amount of data in the array, may be less than allocated + uint64_t size_ = 0; +}; + +/** + * @brief an iterator that stores the DistArray and the current position to provide random access + * iterator semantics + */ +template +class HostCachedArrayIterator { + HostCachedArray m_arr; + std::uint64_t m_pos; + +public: + using iterator_category = std::random_access_iterator_tag; + using difference_type = std::int64_t; + using value_type = T; + using pointer = pando::GlobalPtr; + using reference = pando::GlobalRef; + + HostCachedArrayIterator(HostCachedArray arr, std::uint64_t pos) : m_arr(arr), m_pos(pos) {} + + constexpr HostCachedArrayIterator() noexcept = default; + constexpr HostCachedArrayIterator(HostCachedArrayIterator&&) noexcept = default; + constexpr HostCachedArrayIterator(const HostCachedArrayIterator&) noexcept = default; + ~HostCachedArrayIterator() = default; + + constexpr HostCachedArrayIterator& operator=(const HostCachedArrayIterator&) noexcept = default; + constexpr HostCachedArrayIterator& operator=(HostCachedArrayIterator&&) noexcept = default; + + reference operator*() const noexcept { + return m_arr[m_pos]; + } + + reference operator*() noexcept { + return m_arr[m_pos]; + } + + pointer operator->() { + return m_arr.get(m_pos); + } + + HostCachedArrayIterator& operator++() { + m_pos++; + return *this; + } + + HostCachedArrayIterator operator++(int) { + HostCachedArrayIterator tmp = *this; + ++(*this); + return tmp; + } + + HostCachedArrayIterator& operator--() { + m_pos--; + return *this; + } + + HostCachedArrayIterator operator--(int) { + HostCachedArrayIterator tmp = *this; + --(*this); + return tmp; + } + + constexpr HostCachedArrayIterator operator+(std::uint64_t n) const noexcept { + return HostCachedArrayIterator(m_arr, m_pos + n); + } + + constexpr HostCachedArrayIterator& operator+=(std::uint64_t n) noexcept { + m_pos += n; + return *this; + } + + constexpr HostCachedArrayIterator operator-(std::uint64_t n) const noexcept { + return HostCachedArrayIterator(m_arr, m_pos - n); + } + + constexpr difference_type operator-(HostCachedArrayIterator b) const noexcept { + return m_pos - b.m_pos; + } + + reference operator[](std::uint64_t n) noexcept { + return m_arr[m_pos + n]; + } + + reference operator[](std::uint64_t n) const noexcept { + return m_arr[m_pos + n]; + } + + friend bool operator==(const HostCachedArrayIterator& a, const HostCachedArrayIterator& b) { + return a.m_pos == b.m_pos && a.m_arr == b.m_arr; + } + + friend bool operator!=(const HostCachedArrayIterator& a, const HostCachedArrayIterator& b) { + return !(a == b); + } + + friend bool operator<(const HostCachedArrayIterator& a, const HostCachedArrayIterator& b) { + return a.m_pos < b.m_pos; + } + + friend bool operator<=(const HostCachedArrayIterator& a, const HostCachedArrayIterator& b) { + return a.m_pos <= b.m_pos; + } + + friend bool operator>(const HostCachedArrayIterator& a, const HostCachedArrayIterator& b) { + return a.m_pos > b.m_pos; + } + + friend bool operator>=(const HostCachedArrayIterator& a, const HostCachedArrayIterator& b) { + return a.m_pos >= b.m_pos; + } + + friend pando::Place localityOf(HostCachedArrayIterator& a) { + pando::GlobalPtr ptr = &a.m_arr[a.m_pos]; + return pando::localityOf(ptr); + } +}; +} // namespace galois + +#endif // PANDO_LIB_GALOIS_CONTAINERS_HOST_CACHED_ARRAY_HPP_ diff --git a/include/pando-lib-galois/containers/per_thread.hpp b/include/pando-lib-galois/containers/per_thread.hpp index 0061a7e4..804dc510 100644 --- a/include/pando-lib-galois/containers/per_thread.hpp +++ b/include/pando-lib-galois/containers/per_thread.hpp @@ -389,7 +389,7 @@ class PerThreadVector { galois::PrefixSum prefixSum(m_data, m_indices); - PANDO_CHECK_RETURN(prefixSum.initialize()); + PANDO_CHECK_RETURN(prefixSum.initialize(pando::getPlaceDims().node.id)); prefixSum.computePrefixSum(m_indices.size()); indices_computed = true; diff --git a/include/pando-lib-galois/containers/thread_local_storage.hpp b/include/pando-lib-galois/containers/thread_local_storage.hpp index bf0dd1a3..f8c8957d 100644 --- a/include/pando-lib-galois/containers/thread_local_storage.hpp +++ b/include/pando-lib-galois/containers/thread_local_storage.hpp @@ -37,13 +37,29 @@ class ThreadLocalStorage { using iterator = ThreadLocalStorageIt; using reverse_iterator = std::reverse_iterator; + [[nodiscard]] static constexpr std::uint64_t getThreadsPerCore() noexcept { + std::uint64_t threads = static_cast(pando::getThreadDims().id); + return threads; + } + + [[nodiscard]] static constexpr std::uint64_t getThreadsPerPod() noexcept { + const auto place = pando::getPlaceDims(); + const std::uint64_t cores = + static_cast(place.core.x) * static_cast(place.core.y); + return cores * getThreadsPerCore(); + } + + [[nodiscard]] static constexpr std::uint64_t getThreadsPerHost() noexcept { + const auto place = pando::getPlaceDims(); + const std::uint64_t pods = + static_cast(place.pod.x) * static_cast(place.pod.y); + return pods * getThreadsPerPod(); + } + [[nodiscard]] static constexpr std::uint64_t getNumThreads() noexcept { const auto place = pando::getPlaceDims(); std::uint64_t nodes = static_cast(place.node.id); - std::uint64_t pods = static_cast(place.pod.x * place.pod.y); - std::uint64_t cores = static_cast(place.core.x * place.core.y); - std::uint64_t threads = static_cast(pando::getThreadDims().id); - return nodes * pods * cores * threads; + return nodes * getThreadsPerHost(); } [[nodiscard]] constexpr std::uint64_t getCurrentThreadIdx() const noexcept { diff --git a/include/pando-lib-galois/containers/thread_local_vector.hpp b/include/pando-lib-galois/containers/thread_local_vector.hpp new file mode 100644 index 00000000..0893d14c --- /dev/null +++ b/include/pando-lib-galois/containers/thread_local_vector.hpp @@ -0,0 +1,407 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2023. University of Texas at Austin. All rights reserved. +#ifndef PANDO_LIB_GALOIS_CONTAINERS_THREAD_LOCAL_VECTOR_HPP_ +#define PANDO_LIB_GALOIS_CONTAINERS_THREAD_LOCAL_VECTOR_HPP_ +#include +#include +#include + +#include "pando-rt/export.h" +#include +#include +#include +#include +#include +#include +#include +#include + +namespace galois { + +template +class ThreadLocalVector { +public: + ThreadLocalVector() noexcept = default; + ThreadLocalVector(const ThreadLocalVector&) = default; + ThreadLocalVector(ThreadLocalVector&&) = default; + + ~ThreadLocalVector() = default; + + ThreadLocalVector& operator=(const ThreadLocalVector&) = default; + ThreadLocalVector& operator=(ThreadLocalVector&&) = default; + + using iterator = ThreadLocalStorageIt>; + using reverse_iterator = std::reverse_iterator; + + [[nodiscard]] pando::Status initialize() { + pando::Vector vec; + PANDO_CHECK_RETURN(vec.initialize(0)); + this->m_data = PANDO_EXPECT_RETURN(galois::copyToAllThreads(vec)); + return pando::Status::Success; + } + + void deinitialize() { + if (indicesInitialized) { + m_indices.deinitialize(); + } + for (pando::Vector vec : m_data) { + vec.deinitialize(); + } + m_data.deinitialize(); + } + + pando::GlobalPtr> getLocal() { + return m_data.getLocal(); + } + + pando::GlobalPtr> getLocal() const noexcept { + return m_data.getLocal(); + } + + pando::GlobalRef> getLocalRef() { + return *m_data.getLocal(); + } + + pando::GlobalRef> getLocalRef() const noexcept { + return *m_data.getLocal(); + } + + pando::GlobalPtr> get(std::uint64_t i) noexcept { + return m_data.get(i); + } + + pando::GlobalPtr> get(std::uint64_t i) const noexcept { + return m_data.get(i); + } + + constexpr pando::GlobalRef> operator[](std::uint64_t pos) noexcept { + return *get(pos); + } + + constexpr pando::GlobalRef> operator[](std::uint64_t pos) const noexcept { + return *get(pos); + } + + /** + * @brief Appends to the current hardware thread's vector. + */ + [[nodiscard]] pando::Status pushBack(T val) { + return fmap(this->getLocalRef(), pushBack, val); + } + + /** + * @brief Returns the total number of elements in the PerThreadVector + */ + std::uint64_t sizeAll() const { + if (indicesComputed) { + return *m_indices.rbegin(); + } + std::uint64_t size = 0; + for (std::uint64_t i = 0; i < m_data.size(); i++) { + pando::Vector vec = m_data[i]; + size += vec.size(); + } + return size; + } + + /** + * @brief Returns the total number of per thread vectors + */ + size_t size() const { + return m_data.size(); + } + + void clear() { + indicesComputed = false; + for (std::uint64_t i = 0; i < m_data.size(); i++) { + liftVoid(m_data[i], clear); + } + } + +private: + static uint64_t transmute(pando::Vector p) { + return p.size(); + } + static uint64_t scan_op(pando::Vector p, uint64_t l) { + return p.size() + l; + } + static uint64_t combiner(uint64_t f, uint64_t s) { + return f + s; + } + +public: + [[nodiscard]] pando::Status computeIndices() { + if (!indicesInitialized) { + PANDO_CHECK_RETURN(m_indices.initialize()); + indicesInitialized = true; + } + + using SRC = galois::ThreadLocalStorage>; + using DST = galois::ThreadLocalStorage; + using SRC_Val = pando::Vector; + using DST_Val = uint64_t; + + galois::PrefixSum + prefixSum(m_data, m_indices); + PANDO_CHECK_RETURN(prefixSum.initialize(pando::getPlaceDims().node.id)); + + prefixSum.computePrefixSum(m_indices.size()); + indicesComputed = true; + + prefixSum.deinitialize(); + return pando::Status::Success; + } + + /** + * @brief Returns the global index that elements for host start + * + * @param host passing in `hosts + 1` is legal + * @param index passed by reference will hold the global index + */ + [[nodiscard]] static pando::Expected hostIndexOffset( + galois::ThreadLocalStorage indices, uint64_t host) noexcept { + if (host == 0) + return static_cast(0); + const auto place = + pando::Place(pando::NodeIndex(host), pando::PodIndex(0, 0), pando::CoreIndex(0, 0)); + const auto idx = indices.getThreadIdxFromPlace(place, pando::ThreadIndex(0)); + return indices[idx - 1]; + } + + [[nodiscard]] pando::Status hostFlattenAppend(galois::HostLocalStorage> flat) { + pando::Status err; + + if (!indicesComputed) { + PANDO_CHECK_RETURN(computeIndices()); + } + + // TODO(AdityaAtulTewari) Make this properly parallel. + // Initialize the per host vectors + for (std::uint64_t i = 0; i < flat.getNumHosts(); i++) { + auto ref = flat[i]; + std::uint64_t start = PANDO_EXPECT_RETURN(hostIndexOffset(m_indices, i)); + std::uint64_t end = PANDO_EXPECT_RETURN(hostIndexOffset(m_indices, i + 1)); + err = fmap(ref, reserve, lift(ref, size) + end - start); + PANDO_CHECK_RETURN(err); + for (std::uint64_t j = 0; j < end - start; j++) { + PANDO_CHECK_RETURN(fmap(ref, pushBack, T())); + } + } + + auto tpl = galois::make_tpl(static_cast(*this), flat); + // Reduce into the per host vectors + auto f = +[](decltype(tpl) assign, std::uint64_t i, uint64_t) { + auto [data, flat] = assign; + std::uint64_t host = i / ThreadLocalStorage::getThreadsPerHost(); + std::uint64_t start = PANDO_EXPECT_CHECK(data.hostIndexOffset(data.m_indices, host)); + std::uint64_t curr = (i == 0) ? 0 : data.m_indices[i - 1]; + std::uint64_t end = PANDO_EXPECT_CHECK(data.hostIndexOffset(data.m_indices, host + 1)); + + auto ref = flat[host]; + pando::Vector localVec = data[i]; + std::uint64_t size = lift(ref, size) - (end - start); + for (T elt : localVec) { + fmap(ref, get, size + curr - start) = elt; + curr++; + } + }; + galois::onEach(tpl, f); + return pando::Status::Success; + } + +private: + class SizeIt { + public: + SizeIt() noexcept = default; + SizeIt(const SizeIt&) = default; + SizeIt(SizeIt&&) = default; + ~SizeIt() = default; + SizeIt& operator=(const SizeIt&) = default; + SizeIt& operator=(SizeIt&&) = default; + SizeIt(ThreadLocalStorage indices, std::uint64_t host) + : m_indices(indices), m_host(host) {} + using output_type = std::int64_t; + using difference_type = std::int64_t; + + output_type operator*() const noexcept { + const std::uint64_t start = PANDO_EXPECT_CHECK(hostIndexOffset(m_indices, m_host)); + const std::uint64_t end = PANDO_EXPECT_CHECK(hostIndexOffset(m_indices, m_host + 1)); + return end - start; + } + + SizeIt& operator++() { + m_host++; + return *this; + } + + SizeIt operator++(int) { + SizeIt tmp = *this; + ++(*this); + return tmp; + } + + SizeIt& operator--() { + m_host--; + return *this; + } + + SizeIt operator--(int) { + SizeIt tmp = *this; + --(*this); + return tmp; + } + + constexpr SizeIt operator+(std::uint64_t n) const noexcept { + return SizeIt(m_indices, m_host + n); + } + + constexpr SizeIt& operator+=(std::uint64_t n) noexcept { + m_host += n; + return *this; + } + + constexpr SizeIt operator-(std::uint64_t n) const noexcept { + return SizeIt(m_indices, m_host - n); + } + + constexpr difference_type operator-(SizeIt b) const noexcept { + return m_host - b.host; + } + + friend bool operator==(const SizeIt& a, const SizeIt& b) { + return a.m_host == b.m_host && a.m_indices == b.m_indices; + } + + friend bool operator!=(const SizeIt& a, const SizeIt& b) { + return !(a == b); + } + + friend bool operator<(const SizeIt& a, const SizeIt& b) { + return a.m_host < b.m_host; + } + + friend bool operator<=(const SizeIt& a, const SizeIt& b) { + return a.m_host <= b.m_host; + } + + friend bool operator>(const SizeIt& a, const SizeIt& b) { + return a.m_host > b.m_host; + } + + friend bool operator>=(const SizeIt& a, const SizeIt& b) { + return a.m_host >= b.m_host; + } + + friend pando::Place localityOf(SizeIt& a) { + return pando::Place{pando::NodeIndex(a.m_host), pando::anyPod, pando::anyCore}; + } + + private: + galois::ThreadLocalStorage m_indices; + std::uint64_t m_host; + }; + + struct SizeRange { + using iterator = SizeIt; + galois::ThreadLocalStorage m_indices; + SizeRange() noexcept = default; + SizeRange(const SizeRange&) = default; + SizeRange(SizeRange&&) = default; + ~SizeRange() = default; + SizeRange& operator=(const SizeRange&) = default; + SizeRange& operator=(SizeRange&&) = default; + explicit SizeRange(ThreadLocalStorage indices) : m_indices(indices) {} + iterator begin() const noexcept { + return iterator(m_indices, 0); + } + + iterator end() const noexcept { + std::uint64_t numHosts = pando::getPlaceDims().node.id; + return iterator(m_indices, numHosts); + } + std::uint64_t size() const noexcept { + return pando::getPlaceDims().node.id; + } + }; + +public: + [[nodiscard]] pando::Expected> hostCachedFlatten() { + if (!indicesComputed) { + PANDO_CHECK_RETURN(computeIndices()); + } + + galois::HostCachedArray hla; + // TODO(AdityaAtulTewari) Make this properly parallel. + // Initialize the per host vectors + PANDO_CHECK_RETURN(hla.initialize(SizeRange(m_indices))); + auto tpl = galois::make_tpl(static_cast(*this), hla); + // Reduce into the per host vectors + auto f = +[](decltype(tpl) assign, std::uint64_t i, uint64_t) { + auto [data, flat] = assign; + std::uint64_t host = i / ThreadLocalStorage::getThreadsPerHost(); + std::uint64_t start = PANDO_EXPECT_CHECK(hostIndexOffset(data.m_indices, host)); + std::uint64_t curr = (i == 0) ? 0 : data.m_indices[i - 1]; + pando::Vector localVec = data[i]; + for (T elt : localVec) { + flat.getSpecificRef(host, curr - start) = elt; + curr++; + } + }; + galois::onEach(tpl, f); + return hla; + } + + iterator begin() noexcept { + return iterator(this->m_data, 0); + } + + iterator begin() const noexcept { + return iterator(this->m_data, 0); + } + + iterator end() noexcept { + return iterator(this->m_data, size()); + } + + iterator end() const noexcept { + return iterator(this->m_data, size()); + } + + /** + * @brief reverse iterator to the first element + */ + reverse_iterator rbegin() noexcept { + return reverse_iterator(end()--); + } + + /** + * @copydoc rbegin() + */ + reverse_iterator rbegin() const noexcept { + return reverse_iterator(end()--); + } + + /** + * @brief reverse iterator to the last element + */ + reverse_iterator rend() noexcept { + return reverse_iterator(begin()--); + } + + /** + * @copydoc rend() + */ + reverse_iterator rend() const noexcept { + return reverse_iterator(begin()--); + } + +private: + galois::ThreadLocalStorage> m_data; + galois::ThreadLocalStorage m_indices; + bool indicesInitialized = false; + bool indicesComputed = false; +}; + +} // namespace galois + +#endif // PANDO_LIB_GALOIS_CONTAINERS_THREAD_LOCAL_VECTOR_HPP_ diff --git a/include/pando-lib-galois/import/wmd_graph_importer.hpp b/include/pando-lib-galois/import/wmd_graph_importer.hpp index 75742717..d8e11453 100644 --- a/include/pando-lib-galois/import/wmd_graph_importer.hpp +++ b/include/pando-lib-galois/import/wmd_graph_importer.hpp @@ -256,7 +256,7 @@ partitionEdgesParallely(HostIndexedMap> partitionedVer galois::internal::scan_op, galois::internal::combiner, galois::Array> prefixSum(arr, prefixArr); - PANDO_CHECK(prefixSum.initialize()); + PANDO_CHECK(prefixSum.initialize(pando::getPlaceDims().core.x * pando::getPlaceDims().core.y)); prefixSum.computePrefixSum(lift(localEdges, size)); } HostIndexedMap>> pHVEdge{}; @@ -422,7 +422,7 @@ template galois::internal::scan_op, galois::internal::combiner, galois::Array> prefixSum(arr, prefixArr); - PANDO_CHECK(prefixSum.initialize()); + PANDO_CHECK(prefixSum.initialize(pando::getPlaceDims().core.x * pando::getPlaceDims().core.y)); prefixSum.computePrefixSum(lift(localVertices, size)); } diff --git a/include/pando-lib-galois/utility/gptr_monad.hpp b/include/pando-lib-galois/utility/gptr_monad.hpp index c384ba1b..5b9baba2 100644 --- a/include/pando-lib-galois/utility/gptr_monad.hpp +++ b/include/pando-lib-galois/utility/gptr_monad.hpp @@ -7,43 +7,50 @@ /** * @brief lifts a function with no arguments to work on references */ -#define lift(ref, func) \ - __extension__({ \ - typename std::pointer_traits::element_type tmp = ref; \ - auto ret = tmp.func(); \ - ref = tmp; \ - ret; \ +#define lift(ref, func) \ + __extension__({ \ + auto refComputed##__LINE__ = (ref); \ + typename std::pointer_traits::element_type tmp = \ + refComputed##__LINE__; \ + auto ret = tmp.func(); \ + refComputed##__LINE__ = tmp; \ + ret; \ }) /** * @brief lifts a function with no arguments to work on a void return type */ -#define liftVoid(ref, func) \ - do { \ - typename std::pointer_traits::element_type tmp = ref; \ - tmp.func(); \ - ref = tmp; \ +#define liftVoid(ref, func) \ + do { \ + auto refComputed##__LINE__ = (ref); \ + typename std::pointer_traits::element_type tmp = \ + refComputed##__LINE__; \ + tmp.func(); \ + refComputed##__LINE__ = tmp; \ } while (0) /** * @brief maps a function over its arguments up to work on references */ -#define fmap(ref, func, ...) \ - __extension__({ \ - typename std::pointer_traits::element_type tmp = ref; \ - auto ret = tmp.func(__VA_ARGS__); \ - ref = tmp; \ - ret; \ +#define fmap(ref, func, ...) \ + __extension__({ \ + auto refComputed##__LINE__ = (ref); \ + typename std::pointer_traits::element_type tmp = \ + refComputed##__LINE__; \ + auto ret = tmp.func(__VA_ARGS__); \ + refComputed##__LINE__ = tmp; \ + ret; \ }) /** * @brief maps a function over it's arguments to work on references and return void */ -#define fmapVoid(ref, func, ...) \ - do { \ - typename std::pointer_traits::element_type tmp = ref; \ - tmp.func(__VA_ARGS__); \ - ref = tmp; \ +#define fmapVoid(ref, func, ...) \ + do { \ + auto refComputed##__LINE__ = (ref); \ + typename std::pointer_traits::element_type tmp = ref; \ + tmp.func(__VA_ARGS__); \ + ref = tmp; \ } while (0) #endif // PANDO_LIB_GALOIS_UTILITY_GPTR_MONAD_HPP_ diff --git a/include/pando-lib-galois/utility/prefix_sum.hpp b/include/pando-lib-galois/utility/prefix_sum.hpp index bedd1136..157e9662 100644 --- a/include/pando-lib-galois/utility/prefix_sum.hpp +++ b/include/pando-lib-galois/utility/prefix_sum.hpp @@ -99,8 +99,7 @@ class PrefixSum { A src; B dst; -public: - uint64_t numThreads; +private: using PArr = Conduit; Conduit paste; using WFLType = galois::WaterFallLock>; @@ -149,7 +148,7 @@ class PrefixSum { (void)ns; if (!wfl_id) { lock.template done<2>(wfl_id); - serial_pfxsum, + serial_pfxsum, after>(paste, paste, 0, 0, ns, lock); } else { lock.template wait<2>(wfl_id - 1); @@ -157,7 +156,8 @@ class PrefixSum { } /** Does the final prefix sums with the last part of the array being handled - * by tid = 0 */ + * by tid = 0 + */ inline void parallel_pfxsum_phase_2(uint64_t src_offset, uint64_t dst_offset, uint64_t ns, B_Val phase1_val, bool pfxsum) { if (pfxsum) { @@ -185,28 +185,19 @@ class PrefixSum { public: PrefixSum() = default; - PrefixSum(A src_, B dst_) : src(src_), dst(dst_), paste(B()), lock() { - uint64_t coreY = pando::getPlaceDims().core.y; - uint64_t cores = pando::getPlaceDims().core.x * coreY; - uint64_t threads = pando::getThreadDims().id; - uint64_t hosts = pando::getPlaceDims().node.id; - numThreads = hosts * cores * threads; - } + PrefixSum(A src_, B dst_) : src(src_), dst(dst_), paste(), lock() {} - [[nodiscard]] pando::Status initialize(std::uint64_t size) { - pando::Status err = lock.initialize(size); + [[nodiscard]] pando::Status initialize(std::uint64_t numWorkers) { + pando::Status err = lock.initialize(numWorkers); if (err != pando::Status::Success) { return err; } - err = paste.initialize(size); + err = paste.initialize(numWorkers); if (err != pando::Status::Success) { return err; } return pando::Status::Success; } - [[nodiscard]] pando::Status initialize() { - return initialize(numThreads); - } void deinitialize() { paste.deinitialize(); @@ -218,7 +209,7 @@ class PrefixSum { * @warning we expect ns to be less than equal to the length of source and destination */ void computePrefixSum(uint64_t ns) { - uint64_t workers = numThreads; + std::uint64_t workers = paste.size(); uint64_t workPerThread = ns / (workers + 1); if (workPerThread <= 10) { workers /= pando::getThreadDims().id; diff --git a/test/containers/CMakeLists.txt b/test/containers/CMakeLists.txt index bb6da64d..a2f6182d 100644 --- a/test/containers/CMakeLists.txt +++ b/test/containers/CMakeLists.txt @@ -8,3 +8,5 @@ pando_add_driver_test(test_stack test_stack.cpp) pando_add_driver_test(test_host_indexed_map test_host_indexed_map.cpp) pando_add_driver_test(test_host_local_storage test_host_local_storage.cpp) pando_add_driver_test(test_thread_local_storage test_thread_local_storage.cpp) +pando_add_driver_test(test_thread_local_vector test_thread_local_vector.cpp) +pando_add_driver_test(test_host_cached_array test_host_cached_array.cpp) diff --git a/test/containers/test_host_cached_array.cpp b/test/containers/test_host_cached_array.cpp new file mode 100644 index 00000000..1b90b357 --- /dev/null +++ b/test/containers/test_host_cached_array.cpp @@ -0,0 +1,277 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +#include +#include + +#include + +#include +#include +#include +#include + +TEST(HostCachedArray, Empty) { + galois::HostCachedArray array; + pando::Array sizes; + EXPECT_EQ(sizes.initialize(pando::getPlaceDims().node.id), pando::Status::Success); + for (auto ref : sizes) { + ref = 0; + } + EXPECT_EQ(array.initialize(sizes), pando::Status::Success); + EXPECT_EQ(array.size(), 0); + EXPECT_TRUE(array.empty()); + array.deinitialize(); + sizes.deinitialize(); +} + +TEST(HostCachedArray, ExecuteOn) { + constexpr std::uint64_t goodVal = 0xDEADBEEF; + + constexpr std::uint64_t size = 5; + const std::uint64_t nodes = pando::getPlaceDims().node.id; + + pando::Array sizes; + EXPECT_EQ(sizes.initialize(nodes), pando::Status::Success); + for (auto ref : sizes) { + ref = size; + } + + // create array + galois::HostCachedArray array; + + EXPECT_EQ(array.initialize(sizes), pando::Status::Success); + + for (std::uint64_t i = 0; i < size * nodes; i++) { + array[i] = 0xDEADBEEF; + } + + pando::Status status; + auto func = +[](pando::NotificationHandle done, std::uint64_t goodVal, + galois::HostCachedArray hca) { + for (auto curr : hca) { + EXPECT_EQ(curr, goodVal); + } + done.notify(); + }; + pando::Notification notif; + EXPECT_EQ(notif.init(), pando::Status::Success); + status = pando::executeOn(pando::Place{pando::NodeIndex{0}, pando::anyPod, pando::anyCore}, func, + notif.getHandle(), goodVal, array); + EXPECT_EQ(status, pando::Status::Success); + notif.wait(); + + array.deinitialize(); + sizes.deinitialize(); +} + +TEST(HostCachedArray, Initialize) { + constexpr std::uint64_t size = 10; + + const std::uint64_t nodes = pando::getPlaceDims().node.id; + pando::Array sizes; + EXPECT_EQ(sizes.initialize(nodes), pando::Status::Success); + for (auto ref : sizes) { + ref = size; + } + + galois::HostCachedArray array; + EXPECT_EQ(array.initialize(sizes), pando::Status::Success); + EXPECT_EQ(array.size(), size * nodes); + + for (std::uint64_t i = 0; i < size * nodes; i++) { + std::int16_t nodeIdx = i / size; + EXPECT_EQ(pando::localityOf(&array[i]).node.id, nodeIdx); + array[i] = i; + } + + for (std::uint64_t i = 0; i < size * nodes; i++) { + EXPECT_EQ(array[i], i); + } + + array.deinitialize(); + sizes.deinitialize(); +} + +TEST(HostCachedArray, Swap) { + const std::uint64_t size0 = 10; + const std::uint64_t size1 = 16; + const std::uint64_t nodes = pando::getPlaceDims().node.id; + pando::Array sizes0; + pando::Array sizes1; + + EXPECT_EQ(sizes0.initialize(nodes), pando::Status::Success); + for (auto ref : sizes0) { + ref = size0; + } + + EXPECT_EQ(sizes1.initialize(nodes), pando::Status::Success); + for (auto ref : sizes1) { + ref = size1; + } + + galois::HostCachedArray array0; + EXPECT_EQ(array0.initialize(sizes0), pando::Status::Success); + for (std::uint64_t i = 0; i < size0 * nodes; i++) { + array0[i] = i; + } + + galois::HostCachedArray array1; + EXPECT_EQ(array1.initialize(sizes1), pando::Status::Success); + for (std::uint64_t i = 0; i < size1 * nodes; i++) { + array1[i] = i + (size0 * nodes); + } + + std::swap(array0, array1); + + for (std::uint64_t i = 0; i < size1 * nodes; i++) { + EXPECT_EQ(array0[i], i + (size0 * nodes)); + } + + for (std::uint64_t i = 0; i < size0 * nodes; i++) { + EXPECT_EQ(array1[i], i); + } + + sizes0.deinitialize(); + sizes1.deinitialize(); + array0.deinitialize(); + array1.deinitialize(); +} + +TEST(HostCachedArray, Iterator) { + const std::uint64_t size = 25; + + // create array + galois::HostCachedArray array; + + const std::uint64_t nodes = pando::getPlaceDims().node.id; + pando::Array sizes; + EXPECT_EQ(sizes.initialize(nodes), pando::Status::Success); + for (auto ref : sizes) { + ref = size; + } + + EXPECT_EQ(array.initialize(sizes), pando::Status::Success); + + for (std::uint64_t i = 0; i < size * nodes; i++) { + array[i] = i; + } + for (std::uint64_t i = 0; i < size; i++) { + EXPECT_EQ(array[i], i); + } + + std::uint64_t i = 0; + for (std::uint64_t val : array) { + EXPECT_EQ(val, i); + i++; + } + + array.deinitialize(); + sizes.deinitialize(); +} + +TEST(HostCachedArray, IteratorManual) { + const std::uint64_t size = 25; + + // create array + galois::HostCachedArray array; + + const std::uint64_t nodes = pando::getPlaceDims().node.id; + pando::Array sizes; + EXPECT_EQ(sizes.initialize(nodes), pando::Status::Success); + for (auto ref : sizes) { + ref = size; + } + + EXPECT_EQ(array.initialize(sizes), pando::Status::Success); + + for (std::uint64_t i = 0; i < size * nodes; i++) { + array[i] = i; + } + + for (std::uint64_t i = 0; i < size * nodes; i++) { + EXPECT_EQ(array[i], i); + } + + std::uint64_t i = 0; + for (auto curr = array.begin(); curr != array.end(); curr++) { + EXPECT_EQ(*curr, i); + i++; + } + + array.deinitialize(); + sizes.deinitialize(); +} + +TEST(HostCachedArray, ReverseIterator) { + const std::uint64_t size = 25; + + // create array + galois::HostCachedArray array; + + const std::uint64_t nodes = pando::getPlaceDims().node.id; + pando::Array sizes; + EXPECT_EQ(sizes.initialize(nodes), pando::Status::Success); + for (auto ref : sizes) { + ref = size; + } + + EXPECT_EQ(array.initialize(sizes), pando::Status::Success); + + for (std::uint64_t i = 0; i < size * nodes; i++) { + array[i] = i; + } + + for (std::uint64_t i = 0; i < size * nodes; i++) { + EXPECT_EQ(array[i], i); + } + + std::uint64_t i = array.size() - 1; + for (auto curr = array.rbegin(); curr != array.rend(); curr++) { + EXPECT_EQ(*curr, i); + i--; + } + + array.deinitialize(); + sizes.deinitialize(); +} + +TEST(HostCachedArray, IteratorExecuteOn) { + using It = galois::HostCachedArrayIterator; + constexpr std::uint64_t goodVal = 0xDEADBEEF; + + constexpr std::uint64_t size = 5; + const std::uint64_t nodes = pando::getPlaceDims().node.id; + + pando::Array sizes; + EXPECT_EQ(sizes.initialize(nodes), pando::Status::Success); + for (auto ref : sizes) { + ref = size; + } + + // create array + galois::HostCachedArray array; + + EXPECT_EQ(array.initialize(sizes), pando::Status::Success); + + for (std::uint64_t i = 0; i < size * nodes; i++) { + array[i] = 0xDEADBEEF; + } + + pando::Status status; + auto func = +[](pando::NotificationHandle done, std::uint64_t goodVal, It begin, It end) { + for (auto curr = begin; curr != end; curr++) { + EXPECT_EQ(*curr, goodVal); + } + done.notify(); + }; + pando::Notification notif; + EXPECT_EQ(notif.init(), pando::Status::Success); + status = pando::executeOn(pando::Place{pando::NodeIndex{0}, pando::anyPod, pando::anyCore}, func, + notif.getHandle(), goodVal, array.begin(), array.end()); + EXPECT_EQ(status, pando::Status::Success); + notif.wait(); + + array.deinitialize(); + sizes.deinitialize(); +} diff --git a/test/containers/test_per_thread.cpp b/test/containers/test_per_thread.cpp index bf34bac3..6ae0de58 100644 --- a/test/containers/test_per_thread.cpp +++ b/test/containers/test_per_thread.cpp @@ -304,8 +304,8 @@ TEST(PerThreadVector, Clear) { }); }); - galois::DAccumulator accum; - err = lift(accum, initialize); + galois::DAccumulator accum{}; + err = accum.initialize(); EXPECT_EQ(err, pando::Status::Success); err = galois::doAll( @@ -329,11 +329,8 @@ TEST(PerThreadVector, Clear) { } TEST(PerThreadVector, ClearCompute) { - pando::GlobalPtr> perThreadVecPtr = - getGlobalObject>(); galois::PerThreadVector perThreadVec; EXPECT_EQ(perThreadVec.initialize(), pando::Status::Success); - *perThreadVecPtr = perThreadVec; static uint64_t workItems = 1000; galois::DistArray work; @@ -463,15 +460,15 @@ TEST(PerThreadVector, ClearCompute) { TEST(Vector, IntVectorOfVectorsUniform) { pando::Vector> vec; EXPECT_EQ(vec.initialize(0), pando::Status::Success); - uint64_t size = 2000; - galois::HashTable table; + uint64_t size = 10; + galois::HashTable table{}; PANDO_CHECK(table.initialize(8)); uint64_t result = 0; // Creates a vector of vectors of size [i,1] for (uint64_t i = 0; i < size; i++) { - EXPECT_FALSE(fmap(table, get, i, result)); - PANDO_CHECK(fmap(table, put, i, lift(vec, size))); + EXPECT_FALSE(table.get(i, result)); + PANDO_CHECK(table.put(i, lift(vec, size))); pando::Vector v; EXPECT_EQ(v.initialize(1), pando::Status::Success); v[0] = i; @@ -480,8 +477,8 @@ TEST(Vector, IntVectorOfVectorsUniform) { // Pushes back i+i to each vector for (uint64_t i = 0; i < size; i++) { - EXPECT_TRUE(fmap(table, get, i, result)); - pando::GlobalRef> vec1 = fmap(vec, get, result); + EXPECT_TRUE(table.get(i, result)); + pando::GlobalRef> vec1 = vec.get(result); pando::Vector vec2 = vec1; EXPECT_EQ(vec2.get(0), i); EXPECT_EQ(fmap(vec1, pushBack, (i + i)), pando::Status::Success); @@ -494,8 +491,9 @@ TEST(Vector, IntVectorOfVectorsUniform) { EXPECT_EQ(vec2[1], i + i); EXPECT_EQ(vec2[0], i); EXPECT_EQ(vec2.size(), 2); - EXPECT_TRUE(fmap(table, get, i, result)); + EXPECT_TRUE(table.get(i, result)); EXPECT_EQ(result, i); + vec2.deinitialize(); } EXPECT_EQ(vec.size(), size); vec.deinitialize(); @@ -504,8 +502,8 @@ TEST(Vector, IntVectorOfVectorsUniform) { TEST(Vector, IntVectorOfVectorsRandom) { pando::Vector> vec; EXPECT_EQ(vec.initialize(0), pando::Status::Success); - uint64_t size = 2000; - galois::HashTable table; + uint64_t size = 10; + galois::HashTable table{}; PANDO_CHECK(table.initialize(8)); uint64_t result = 0; std::random_device rd; @@ -524,12 +522,12 @@ TEST(Vector, IntVectorOfVectorsRandom) { } else { map[src].push_back(dst); } - if (fmap(table, get, src, result)) { - pando::GlobalRef> vec1 = fmap(vec, get, result); + if (table.get(src, result)) { + pando::GlobalRef> vec1 = vec.get(result); pando::Vector vec2 = vec1; EXPECT_EQ(fmap(vec1, pushBack, dst), pando::Status::Success); } else { - PANDO_CHECK(fmap(table, put, src, lift(vec, size))); + PANDO_CHECK(table.put(src, lift(vec, size))); pando::Vector v; EXPECT_EQ(v.initialize(1), pando::Status::Success); v[0] = dst; @@ -539,8 +537,8 @@ TEST(Vector, IntVectorOfVectorsRandom) { // Validates the vectors for (auto it = map.begin(); it != map.end(); ++it) { - EXPECT_TRUE(fmap(table, get, it->first, result)); - pando::GlobalRef> vec1 = fmap(vec, get, result); + EXPECT_TRUE(table.get(it->first, result)); + pando::GlobalRef> vec1 = vec.get(result); pando::Vector vec2 = vec1; std::sort(vec2.begin(), vec2.end()); std::vector v = it->second; @@ -557,8 +555,8 @@ TEST(Vector, IntVectorOfVectorsRandom) { TEST(Vector, EdgelistVectorOfVectors) { pando::Vector> vec; EXPECT_EQ(vec.initialize(0), pando::Status::Success); - uint64_t size = 2000; - galois::HashTable table; + uint64_t size = 10; + galois::HashTable table{}; PANDO_CHECK(table.initialize(8)); uint64_t result = 0; std::random_device rd; @@ -578,13 +576,13 @@ TEST(Vector, EdgelistVectorOfVectors) { map[src].push_back(dst); } - if (fmap(table, get, src, result)) { - pando::GlobalRef> vec1 = fmap(vec, get, result); + if (table.get(src, result)) { + pando::GlobalRef> vec1 = vec.get(result); pando::Vector vec2 = vec1; galois::WMDEdge edge(src, dst, agile::TYPES::NONE, agile::TYPES::NONE, agile::TYPES::NONE); EXPECT_EQ(fmap(vec1, pushBack, edge), pando::Status::Success); } else { - PANDO_CHECK(fmap(table, put, src, lift(vec, size))); + PANDO_CHECK(table.put(src, lift(vec, size))); pando::Vector v; EXPECT_EQ(v.initialize(1), pando::Status::Success); galois::WMDEdge edge(src, dst, agile::TYPES::NONE, agile::TYPES::NONE, agile::TYPES::NONE); @@ -595,11 +593,11 @@ TEST(Vector, EdgelistVectorOfVectors) { // Validates the vectors for (auto it = map.begin(); it != map.end(); ++it) { - EXPECT_TRUE(fmap(table, get, it->first, result)); - pando::GlobalRef> vec1 = fmap(vec, get, result); + EXPECT_TRUE(table.get(it->first, result)); + pando::GlobalRef> vec1 = vec.get(result); pando::Vector vec2 = vec1; std::vector v = it->second; - EXPECT_EQ(lift(vec2, size), v.size()); + EXPECT_EQ(vec2.size(), v.size()); for (uint64_t k = 0; k < lift(vec2, size); k++) { galois::WMDEdge edge = vec2[k]; bool found = false; diff --git a/test/containers/test_thread_local_vector.cpp b/test/containers/test_thread_local_vector.cpp new file mode 100644 index 00000000..14987a03 --- /dev/null +++ b/test/containers/test_thread_local_vector.cpp @@ -0,0 +1,402 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2023. University of Texas at Austin. All rights reserved. + +#include +#include + +#include "pando-rt/export.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace { + +template +pando::GlobalPtr getGlobalObject() { + const auto expected = + pando::allocateMemory(1, pando::getCurrentPlace(), pando::MemoryType::Main); + EXPECT_EQ(expected.hasValue(), true); + return expected.value(); +} + +/** +uint64_t getHostThreads() { + uint64_t x = pando::getPlaceDims().core.x; + uint64_t y = pando::getPlaceDims().core.y; + uint64_t threads = pando::getThreadDims().id; + return x * y * threads; +} +*/ + +struct State { + State() = default; + State(galois::WaitGroup::HandleType f, galois::DAccumulator s) : first(f), second(s) {} + + galois::WaitGroup::HandleType first; + galois::DAccumulator second; +}; + +} // namespace + +TEST(ThreadLocalVector, Init) { + galois::ThreadLocalVector perThreadVec{}; + EXPECT_EQ(perThreadVec.initialize(), pando::Status::Success); + pando::Vector work; + EXPECT_EQ(work.initialize(1), pando::Status::Success); + work[0] = 9801; + galois::doAll( + perThreadVec, work, +[](galois::ThreadLocalVector perThreadVec, uint64_t x) { + EXPECT_GE(pando::getCurrentThread().id, 0); + EXPECT_EQ(perThreadVec.pushBack(x), pando::Status::Success); + pando::Vector localVec = perThreadVec.getLocalRef(); + EXPECT_EQ(localVec.size(), 1); + }); + EXPECT_EQ(perThreadVec.sizeAll(), 1); + + std::uint64_t elts = 0; + for (pando::Vector vec : perThreadVec) { + elts += vec.size(); + } + EXPECT_EQ(elts, 1); + + auto hca = PANDO_EXPECT_CHECK(perThreadVec.hostCachedFlatten()); + EXPECT_EQ(hca.size(), 1); + uint64_t val = hca[0]; + EXPECT_EQ(val, 9801); + + hca.deinitialize(); + work.deinitialize(); + perThreadVec.deinitialize(); +} + +TEST(ThreadLocalVector, Parallel) { + galois::ThreadLocalVector perThreadVec{}; + EXPECT_EQ(perThreadVec.initialize(), pando::Status::Success); + + static const uint64_t workItems = 1000; + pando::Vector work; + EXPECT_EQ(work.initialize(workItems), pando::Status::Success); + galois::doAll( + perThreadVec, work, +[](galois::ThreadLocalVector& perThreadVec, uint64_t x) { + uint64_t originalID = pando::getCurrentThread().id; + EXPECT_GE(originalID, 0); + EXPECT_LT(originalID, pando::getThreadDims().id); + pando::Vector staleVec = perThreadVec.getLocalRef(); + + EXPECT_EQ(perThreadVec.pushBack(x), pando::Status::Success); + + pando::Vector localVec = perThreadVec.getLocalRef(); + EXPECT_GT(localVec.size(), 0); + EXPECT_LT(localVec.size(), workItems); + EXPECT_EQ(localVec.size(), staleVec.size() + 1); + }); + EXPECT_EQ(perThreadVec.sizeAll(), workItems); + + uint64_t elts = 0; + for (uint64_t i = 0; i < perThreadVec.size(); i++) { + pando::Vector vec = perThreadVec[i]; + elts += vec.size(); + for (uint64_t i = 0; i < vec.size(); i++) { + EXPECT_LT(vec[i], workItems); + } + } + EXPECT_EQ(elts, workItems); + EXPECT_EQ(perThreadVec.sizeAll(), workItems); + + galois::HostCachedArray hca = PANDO_EXPECT_CHECK(perThreadVec.hostCachedFlatten()); + EXPECT_EQ(hca.size(), workItems); + + hca.deinitialize(); + work.deinitialize(); + perThreadVec.deinitialize(); +} + +TEST(ThreadLocalVector, DoAll) { + galois::ThreadLocalVector perThreadVec; + EXPECT_EQ(perThreadVec.initialize(), pando::Status::Success); + + static const uint64_t workItems = 1000; + galois::DistArray work; + EXPECT_EQ(work.initialize(workItems), pando::Status::Success); + for (uint64_t i = 0; i < workItems; i++) { + work[i] = i; + } + + galois::DAccumulator sum{}; + EXPECT_EQ(sum.initialize(), pando::Status::Success); + EXPECT_EQ(sum.get(), 0); + + galois::doAll( + perThreadVec, work, +[](galois::ThreadLocalVector& perThreadVec, uint64_t x) { + uint64_t originalID = pando::getCurrentThread().id; + EXPECT_GE(originalID, 0); + EXPECT_LT(originalID, pando::getThreadDims().id); + pando::Vector staleVec = perThreadVec.getLocalRef(); + + EXPECT_EQ(perThreadVec.pushBack(x), pando::Status::Success); + + pando::Vector localVec = perThreadVec.getLocalRef(); + EXPECT_EQ(pando::localityOf(localVec.data()).node.id, pando::getCurrentPlace().node.id); + EXPECT_GT(localVec.size(), 0); + EXPECT_LT(localVec.size(), workItems); + EXPECT_EQ(localVec.size(), staleVec.size() + 1); + }); + EXPECT_EQ(perThreadVec.sizeAll(), workItems); + const std::uint64_t size = perThreadVec.sizeAll(); + + EXPECT_EQ(perThreadVec.computeIndices(), pando::Status::Success); + EXPECT_EQ(size, perThreadVec.sizeAll()); + + galois::WaitGroup wg; + EXPECT_EQ(wg.initialize(0), pando::Status::Success); + galois::doAll( + wg.getHandle(), State(wg.getHandle(), sum), perThreadVec, + +[](State state, pando::GlobalRef> vec) { + pando::Vector v = vec; + for (uint64_t i = 0; i < v.size(); i++) { + EXPECT_LT(v[i], workItems); + } + galois::doAll( + state.first, state.second, v, +[](galois::DAccumulator sum, uint64_t ref) { + EXPECT_LT(ref, workItems); + sum.add(ref); + }); + }); + EXPECT_EQ(wg.wait(), pando::Status::Success); + EXPECT_EQ(sum.reduce(), ((workItems - 1) + 0) * (workItems / 2)); + + galois::HostCachedArray hca = PANDO_EXPECT_CHECK(perThreadVec.hostCachedFlatten()); + EXPECT_EQ(hca.size(), workItems); + uint64_t copy_sum = 0; + for (uint64_t elt : hca) { + copy_sum += elt; + } + EXPECT_EQ(copy_sum, ((workItems - 1) + 0) * (workItems / 2)); + + hca.deinitialize(); + sum.deinitialize(); + work.deinitialize(); + wg.deinitialize(); + perThreadVec.deinitialize(); +} + +TEST(ThreadLocalVector, HostLocalStorageVector) { + constexpr std::uint64_t size = 32; + pando::Status err; + + galois::ThreadLocalVector ptv; + err = ptv.initialize(); + EXPECT_EQ(err, pando::Status::Success); + + galois::HostLocalStorage phu{}; + + galois::doAll( + ptv, phu, +[](galois::ThreadLocalVector ptv, std::uint64_t) { + galois::doAll( + ptv, galois::IotaRange(0, size), + +[](galois::ThreadLocalVector ptv, std::uint64_t i) { + pando::Status err; + err = ptv.pushBack(i); + EXPECT_EQ(err, pando::Status::Success); + }); + }); + + galois::HostLocalStorage> phv; + PANDO_CHECK(phv.initialize()); + for (auto vecRef : phv) { + EXPECT_EQ(fmap(vecRef, initialize, 0), pando::Status::Success); + } + + err = ptv.hostFlattenAppend(phv); + EXPECT_EQ(err, pando::Status::Success); + + for (pando::GlobalRef> vecRef : phv) { + EXPECT_EQ(lift(vecRef, size), size); + std::sort(lift(vecRef, begin), lift(vecRef, end)); + pando::Vector vec = vecRef; + for (std::uint64_t i = 0; i < size; i++) { + EXPECT_EQ(vec[i], i); + } + } +} + +TEST(ThreadLocalVector, Clear) { + constexpr std::uint64_t size = 32; + pando::Status err; + + galois::ThreadLocalVector ptv; + err = ptv.initialize(); + EXPECT_EQ(err, pando::Status::Success); + + galois::HostLocalStorage phu{}; + + galois::doAll( + ptv, phu, +[](galois::ThreadLocalVector ptv, std::uint64_t) { + galois::doAll( + ptv, galois::IotaRange(0, size), + +[](galois::ThreadLocalVector ptv, std::uint64_t i) { + pando::Status err; + err = ptv.pushBack(i); + EXPECT_EQ(err, pando::Status::Success); + }); + }); + + galois::DAccumulator accum{}; + EXPECT_EQ(accum.initialize(), pando::Status::Success); + + err = galois::doAll( + accum, ptv, + +[](galois::DAccumulator accum, + pando::GlobalRef> refVec) { + accum.add(lift(refVec, size)); + }); + EXPECT_EQ(err, pando::Status::Success); + EXPECT_EQ(accum.reduce(), size * static_cast(pando::getPlaceDims().node.id)); + + ptv.clear(); + + galois::doAll( + ptv, +[](pando::GlobalRef> refVec) { + EXPECT_EQ(0, lift(refVec, size)); + }); + + accum.deinitialize(); + ptv.deinitialize(); +} + +TEST(ThreadLocalVector, ClearCompute) { + galois::ThreadLocalVector perThreadVec; + EXPECT_EQ(perThreadVec.initialize(), pando::Status::Success); + + static uint64_t workItems = 1000; + galois::DistArray work; + EXPECT_EQ(work.initialize(workItems), pando::Status::Success); + for (uint64_t i = 0; i < workItems; i++) { + work[i] = i; + } + + galois::DAccumulator sum{}; + EXPECT_EQ(sum.initialize(), pando::Status::Success); + EXPECT_EQ(sum.get(), 0); + + galois::doAll( + perThreadVec, work, +[](galois::ThreadLocalVector& perThreadVec, uint64_t x) { + uint64_t originalID = pando::getCurrentThread().id; + EXPECT_GE(originalID, 0); + EXPECT_LT(originalID, pando::getThreadDims().id); + pando::Vector staleVec = perThreadVec.getLocalRef(); + + EXPECT_EQ(perThreadVec.pushBack(x), pando::Status::Success); + + pando::Vector localVec = perThreadVec.getLocalRef(); + EXPECT_EQ(pando::localityOf(localVec.data()).node.id, pando::getCurrentPlace().node.id); + EXPECT_GT(localVec.size(), 0); + EXPECT_LT(localVec.size(), workItems); + EXPECT_EQ(localVec.size(), staleVec.size() + 1); + }); + EXPECT_EQ(perThreadVec.sizeAll(), workItems); + + const std::uint64_t sizeAll0 = perThreadVec.sizeAll(); + EXPECT_EQ(perThreadVec.computeIndices(), pando::Status::Success); + EXPECT_EQ(sizeAll0, perThreadVec.sizeAll()); + + galois::WaitGroup wg; + EXPECT_EQ(wg.initialize(0), pando::Status::Success); + galois::doAll( + wg.getHandle(), State(wg.getHandle(), sum), perThreadVec, + +[](State state, pando::GlobalRef> vec) { + pando::Vector v = vec; + for (uint64_t i = 0; i < v.size(); i++) { + EXPECT_LT(v[i], workItems); + } + galois::doAll( + state.first, state.second, v, +[](galois::DAccumulator sum, uint64_t ref) { + EXPECT_LT(ref, workItems); + sum.add(ref); + }); + }); + EXPECT_EQ(wg.wait(), pando::Status::Success); + EXPECT_EQ(sum.reduce(), ((workItems - 1) + 0) * (workItems / 2)); + + galois::HostCachedArray hca = PANDO_EXPECT_CHECK(perThreadVec.hostCachedFlatten()); + EXPECT_EQ(hca.size(), workItems); + uint64_t copy_sum = 0; + for (uint64_t elt : hca) { + copy_sum += elt; + } + EXPECT_EQ(copy_sum, ((workItems - 1) + 0) * (workItems / 2)); + + hca.deinitialize(); + sum.deinitialize(); + work.deinitialize(); + wg.deinitialize(); + perThreadVec.clear(); + + workItems = 100; + EXPECT_EQ(work.initialize(workItems), pando::Status::Success); + for (uint64_t i = 0; i < workItems; i++) { + work[i] = i; + } + + EXPECT_EQ(sum.initialize(), pando::Status::Success); + EXPECT_EQ(sum.get(), 0); + + galois::doAll( + perThreadVec, work, +[](galois::ThreadLocalVector& perThreadVec, uint64_t x) { + uint64_t originalID = pando::getCurrentThread().id; + EXPECT_GE(originalID, 0); + EXPECT_LT(originalID, pando::getThreadDims().id); + pando::Vector staleVec = perThreadVec.getLocalRef(); + + EXPECT_EQ(perThreadVec.pushBack(x), pando::Status::Success); + + pando::Vector localVec = perThreadVec.getLocalRef(); + EXPECT_EQ(pando::localityOf(localVec.data()).node.id, pando::getCurrentPlace().node.id); + EXPECT_GT(localVec.size(), 0); + EXPECT_LT(localVec.size(), workItems); + EXPECT_EQ(localVec.size(), staleVec.size() + 1); + }); + EXPECT_EQ(perThreadVec.sizeAll(), workItems); + + const std::uint64_t sizeAll1 = perThreadVec.sizeAll(); + EXPECT_EQ(perThreadVec.computeIndices(), pando::Status::Success); + EXPECT_EQ(sizeAll1, perThreadVec.sizeAll()); + + EXPECT_EQ(wg.initialize(0), pando::Status::Success); + galois::doAll( + wg.getHandle(), State(wg.getHandle(), sum), perThreadVec, + +[](State state, pando::GlobalRef> vec) { + pando::Vector v = vec; + for (uint64_t i = 0; i < v.size(); i++) { + EXPECT_LT(v[i], workItems); + } + galois::doAll( + state.first, state.second, v, +[](galois::DAccumulator sum, uint64_t ref) { + EXPECT_LT(ref, workItems); + sum.add(ref); + }); + }); + EXPECT_EQ(wg.wait(), pando::Status::Success); + EXPECT_EQ(sum.reduce(), ((workItems - 1) + 0) * (workItems / 2)); + + hca = PANDO_EXPECT_CHECK(perThreadVec.hostCachedFlatten()); + EXPECT_EQ(hca.size(), workItems); + copy_sum = 0; + for (uint64_t elt : hca) { + copy_sum += elt; + } + EXPECT_EQ(copy_sum, ((workItems - 1) + 0) * (workItems / 2)); + + hca.deinitialize(); + sum.deinitialize(); + work.deinitialize(); + wg.deinitialize(); + perThreadVec.deinitialize(); +} diff --git a/test/utility/test_prefix_sum.cpp b/test/utility/test_prefix_sum.cpp index b000aacb..f017b484 100644 --- a/test/utility/test_prefix_sum.cpp +++ b/test/utility/test_prefix_sum.cpp @@ -50,7 +50,7 @@ TEST(PrefixSum, Init) { galois::PrefixSum, scan_op, combiner, galois::DistArray> prefixSum(arr, prefixArr); - EXPECT_EQ(prefixSum.initialize(), pando::Status::Success); + EXPECT_EQ(prefixSum.initialize(pando::getPlaceDims().node.id), pando::Status::Success); prefixSum.computePrefixSum(elts); uint64_t expected = 0; @@ -76,7 +76,7 @@ TEST(PrefixSum, PerThread) { galois::PrefixSum, galois::DistArray> prefixSum(arr.m_data, prefixArr); - EXPECT_EQ(prefixSum.initialize(), pando::Status::Success); + EXPECT_EQ(prefixSum.initialize(pando::getPlaceDims().node.id), pando::Status::Success); prefixSum.computePrefixSum(prefixArr.size()); EXPECT_EQ(prefixArr[prefixArr.size() - 1], arr.sizeAll()); } @@ -97,7 +97,7 @@ TEST(PrefixSum, Array) { scan_op, combiner, galois::Array>; PFXSUM pfxsum(arr, arr); - PANDO_CHECK(pfxsum.initialize()); + PANDO_CHECK(pfxsum.initialize(pando::getPlaceDims().core.x * pando::getPlaceDims().core.y)); pfxsum.computePrefixSum(size); From 4b8e9cff62b50e1001573f884b698a64a9c73318 Mon Sep 17 00:00:00 2001 From: AdityaAtulTewari Date: Fri, 5 Apr 2024 06:50:22 -0700 Subject: [PATCH 18/28] Refactor test to be faster (#20) * Refactor test to be faster * Thread Local Storage test was too expensive --- include/pando-lib-galois/sync/simple_lock.hpp | 5 -- test/containers/test_thread_local_storage.cpp | 4 +- test/sync/test_simple_lock.cpp | 48 ++++++++----------- 3 files changed, 22 insertions(+), 35 deletions(-) diff --git a/include/pando-lib-galois/sync/simple_lock.hpp b/include/pando-lib-galois/sync/simple_lock.hpp index ab23a554..26d09ff1 100644 --- a/include/pando-lib-galois/sync/simple_lock.hpp +++ b/include/pando-lib-galois/sync/simple_lock.hpp @@ -48,11 +48,6 @@ class SimpleLock { * @warning one of the initialize methods must be called before use */ [[nodiscard]] pando::Status initialize(pando::Place place, pando::MemoryType memoryType) { - // auto desiredValue = static_cast(State::IsUnlocked); - // pando::atomicStore(pando::GlobalPtr(&m_state), pando::GlobalPtr(&desiredValue), - // std::memory_order_release); - const auto desiredValue = pando::allocateMemory(1, place, memoryType); if (!desiredValue.hasValue()) { return desiredValue.error(); diff --git a/test/containers/test_thread_local_storage.cpp b/test/containers/test_thread_local_storage.cpp index caae41d4..ef0c5408 100644 --- a/test/containers/test_thread_local_storage.cpp +++ b/test/containers/test_thread_local_storage.cpp @@ -108,9 +108,9 @@ TEST(ThreadLocalStorage, DoAll) { } TEST(ThreadLocalStorage, copyToAllThreads) { - const std::uint64_t SIZE = 100; + const std::uint64_t SIZE = 10; pando::Array arr; - EXPECT_EQ(pando::Status::Success, arr.initialize(100)); + EXPECT_EQ(pando::Status::Success, arr.initialize(SIZE)); for (std::uint64_t i = 0; i < SIZE; i++) { arr[i] = i; } diff --git a/test/sync/test_simple_lock.cpp b/test/sync/test_simple_lock.cpp index 3d180377..d4f92d9c 100644 --- a/test/sync/test_simple_lock.cpp +++ b/test/sync/test_simple_lock.cpp @@ -6,9 +6,11 @@ #include +#include #include #include #include +#include #include #include #include @@ -37,7 +39,7 @@ TEST(SimpleLock, TryLock) { TEST(SimpleLock, SimpleLockUnlock) { auto test = [] { - galois::SimpleLock mutex; + galois::SimpleLock mutex{}; EXPECT_EQ(mutex.initialize(), pando::Status::Success); mutex.lock(); mutex.unlock(); @@ -52,45 +54,35 @@ TEST(SimpleLock, SimpleLockUnlock) { } TEST(SimpleLock, ActualLockUnlock) { - auto dims = pando::getPlaceDims(); - galois::GlobalBarrier gb; - EXPECT_EQ(gb.initialize(dims.node.id), pando::Status::Success); galois::SimpleLock mutex; EXPECT_EQ(mutex.initialize(), pando::Status::Success); pando::Array array; EXPECT_EQ(array.initialize(10), pando::Status::Success); array.fill(0); - auto func = +[](galois::GlobalBarrier gb, galois::SimpleLock mutex, pando::Array array) { - mutex.lock(); - for (int i = 0; i < 10; i++) { - if ((i + 1 + pando::getCurrentPlace().node.id) <= 10) { - array[i] = i + 1 + pando::getCurrentPlace().node.id; - } else { - array[i] = i - 9 + pando::getCurrentPlace().node.id; - } - } - mutex.unlock(); - gb.done(); - }; - for (std::int16_t nodeId = 0; nodeId < dims.node.id; nodeId++) { - EXPECT_EQ( - pando::executeOn(pando::Place{pando::NodeIndex{nodeId}, pando::anyPod, pando::anyCore}, - func, gb, mutex, array), - pando::Status::Success); - } + galois::HostLocalStorage hls{}; + auto tpl = galois::make_tpl(mutex, array); + EXPECT_EQ(galois::doAll( + tpl, hls, + +[](decltype(tpl) tpl, pando::GlobalRef) { + auto [mutex, array] = tpl; + for (int i = 0; i < 10; i++) { + if ((i + 1 + pando::getCurrentPlace().node.id) <= 10) { + array[i] = i + 1 + pando::getCurrentPlace().node.id; + } else { + array[i] = i - 9 + pando::getCurrentPlace().node.id; + } + } + mutex.unlock(); + }), + pando::Status::Success); - EXPECT_EQ(gb.wait(), pando::Status::Success); - for (int i = 0; i < 10; i++) { - std::cout << array[i] << " "; - } - std::cout << std::endl; int sum = 0; for (int i = 0; i < 10; i++) { sum += array[i]; } EXPECT_EQ(sum, 55); - gb.deinitialize(); array.deinitialize(); + mutex.deinitialize(); } From 0dcd78a01d7c90de9432ca384fb09abaa4e07377 Mon Sep 17 00:00:00 2001 From: AdityaAtulTewari Date: Fri, 5 Apr 2024 09:48:08 -0700 Subject: [PATCH 19/28] PerThreadRename: DistArray -> ThreadLocalStorage (#18) --- .../graphs/dist_local_csr.hpp | 9 ++-- .../import/ingest_rmat_el.hpp | 12 +++-- .../import/ingest_wmd_csv.hpp | 12 +++-- .../import/wmd_graph_importer.hpp | 6 +-- src/ingest_rmat_el.cpp | 15 +++--- src/ingest_wmd_csv.cpp | 5 +- test/import/test_cusp_importer.cpp | 50 +++++++++---------- 7 files changed, 54 insertions(+), 55 deletions(-) diff --git a/include/pando-lib-galois/graphs/dist_local_csr.hpp b/include/pando-lib-galois/graphs/dist_local_csr.hpp index a52ee4de..f4e76ff8 100644 --- a/include/pando-lib-galois/graphs/dist_local_csr.hpp +++ b/include/pando-lib-galois/graphs/dist_local_csr.hpp @@ -1043,8 +1043,8 @@ class DistLocalCSR { galois::PerThreadVector> localEdges; PANDO_CHECK_RETURN(localEdges.initialize()); - galois::DistArray> perThreadRename; - PANDO_CHECK(perThreadRename.initialize(localEdges.size())); + galois::ThreadLocalStorage> perThreadRename; + PANDO_CHECK(perThreadRename.initialize()); for (auto hashRef : perThreadRename) { hashRef = galois::HashTable{}; @@ -1088,13 +1088,14 @@ class DistLocalCSR { #ifdef FREE auto freePerThreadRename = - +[](galois::DistArray> perThreadRename) { + +[](galois::ThreadLocalStorage> + perThreadRename) { for (galois::HashTable hash : perThreadRename) { hash.deinitialize(); } - perThreadRename.deinitialize(); }; PANDO_CHECK(pando::executeOn(pando::anyPlace, freePerThreadRename, perThreadRename)); + perThreadRename.deinitialize(); #endif const bool isEdgeList = false; diff --git a/include/pando-lib-galois/import/ingest_rmat_el.hpp b/include/pando-lib-galois/import/ingest_rmat_el.hpp index 71fe90a2..c68bb6b3 100644 --- a/include/pando-lib-galois/import/ingest_rmat_el.hpp +++ b/include/pando-lib-galois/import/ingest_rmat_el.hpp @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -33,7 +34,7 @@ void loadELFilePerThread( galois::WaitGroup::HandleType wgh, pando::Array filename, std::uint64_t segmentsPerThread, std::uint64_t numThreads, std::uint64_t threadID, galois::PerThreadVector> localEdges, - galois::DistArray> perThreadRename, + galois::ThreadLocalStorage> perThreadRename, std::uint64_t numVertices); const char* elGetOne(const char* line, std::uint64_t& val); @@ -59,8 +60,8 @@ ReturnType initializeELDLCSR(pando::Array filename, std::uint64_t numVerti const std::uint64_t numThreads = localEdges.size() - pando::getPlaceDims().node.id; - galois::DistArray> perThreadRename; - PANDO_CHECK(perThreadRename.initialize(localEdges.size())); + galois::ThreadLocalStorage> perThreadRename; + PANDO_CHECK(perThreadRename.initialize()); for (auto hashRef : perThreadRename) { hashRef = galois::HashTable{}; @@ -92,13 +93,14 @@ ReturnType initializeELDLCSR(pando::Array filename, std::uint64_t numVerti #ifdef FREE auto freePerThreadRename = - +[](galois::DistArray> perThreadRename) { + +[](galois::ThreadLocalStorage> + perThreadRename) { for (galois::HashTable hash : perThreadRename) { hash.deinitialize(); } - perThreadRename.deinitialize(); }; PANDO_CHECK(pando::executeOn(pando::anyPlace, freePerThreadRename, perThreadRename)); + perThreadRename.deinitialize(); #endif PANDO_CHECK( diff --git a/include/pando-lib-galois/import/ingest_wmd_csv.hpp b/include/pando-lib-galois/import/ingest_wmd_csv.hpp index 1139c343..aecfb119 100644 --- a/include/pando-lib-galois/import/ingest_wmd_csv.hpp +++ b/include/pando-lib-galois/import/ingest_wmd_csv.hpp @@ -6,6 +6,7 @@ #include +#include #include #include #include @@ -16,7 +17,7 @@ void loadWMDFilePerThread( galois::WaitGroup::HandleType wgh, pando::Array filename, std::uint64_t segmentsPerThread, std::uint64_t numThreads, std::uint64_t threadID, galois::PerThreadVector> localEdges, - galois::DistArray> perThreadRename, + galois::ThreadLocalStorage> perThreadRename, galois::PerThreadVector localVertices, galois::DAccumulator totVerts); template @@ -79,8 +80,8 @@ galois::DistLocalCSR initializeWMDDLCSR(pando::Array galois::DAccumulator totVerts; PANDO_CHECK(totVerts.initialize()); - galois::DistArray> perThreadRename{}; - PANDO_CHECK(perThreadRename.initialize(localEdges.size())); + galois::ThreadLocalStorage> perThreadRename{}; + PANDO_CHECK(perThreadRename.initialize()); for (auto hashRef : perThreadRename) { hashRef = galois::HashTable{}; @@ -103,13 +104,14 @@ galois::DistLocalCSR initializeWMDDLCSR(pando::Array #ifdef FREE auto freePerThreadRename = - +[](galois::DistArray> perThreadRename) { + +[](galois::ThreadLocalStorage> + perThreadRename) { for (galois::HashTable hash : perThreadRename) { hash.deinitialize(); } - perThreadRename.deinitialize(); }; PANDO_CHECK(pando::executeOn(pando::anyPlace, freePerThreadRename, perThreadRename)); + perThreadRename.deinitialize(); #endif PANDO_CHECK( diff --git a/include/pando-lib-galois/import/wmd_graph_importer.hpp b/include/pando-lib-galois/import/wmd_graph_importer.hpp index d8e11453..b70e5fa0 100644 --- a/include/pando-lib-galois/import/wmd_graph_importer.hpp +++ b/include/pando-lib-galois/import/wmd_graph_importer.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -611,10 +612,9 @@ void loadEdgeFilePerThread( pando::NotificationHandle done, galois::EdgeParser parser, uint64_t segmentsPerThread, std::uint64_t numThreads, std::uint64_t threadID, galois::PerThreadVector> localEdges, - galois::DistArray> perThreadRename) { - auto hartID = localEdges.getLocalVectorID(); + galois::ThreadLocalStorage> perThreadRename) { auto localEdgeVec = localEdges.getThreadVector(); - auto hashRef = perThreadRename[hartID]; + auto hashRef = perThreadRename.getLocalRef(); auto parseLine = [&parser, &localEdgeVec, &hashRef](const char* currentLine) { if (currentLine[0] != parser.comment) { diff --git a/src/ingest_rmat_el.cpp b/src/ingest_rmat_el.cpp index be5f06f9..b5e7a989 100644 --- a/src/ingest_rmat_el.cpp +++ b/src/ingest_rmat_el.cpp @@ -20,15 +20,14 @@ auto generateRMATParser( }; } -void galois::loadELFilePerThread(galois::WaitGroup::HandleType wgh, pando::Array filename, - std::uint64_t segmentsPerThread, std::uint64_t numThreads, - std::uint64_t threadID, - galois::PerThreadVector> localEdges, - DistArray> perThreadRename, - std::uint64_t numVertices) { - auto hartID = localEdges.getLocalVectorID(); +void galois::loadELFilePerThread( + galois::WaitGroup::HandleType wgh, pando::Array filename, std::uint64_t segmentsPerThread, + std::uint64_t numThreads, std::uint64_t threadID, + galois::PerThreadVector> localEdges, + ThreadLocalStorage> perThreadRename, + std::uint64_t numVertices) { auto parser = - generateRMATParser(&localEdges.getThreadVector(), &perThreadRename[hartID], numVertices); + generateRMATParser(&localEdges.getThreadVector(), perThreadRename.getLocal(), numVertices); PANDO_CHECK( internal::loadGraphFilePerThread(filename, segmentsPerThread, numThreads, threadID, parser)); wgh.done(); diff --git a/src/ingest_wmd_csv.cpp b/src/ingest_wmd_csv.cpp index b561a198..69527409 100644 --- a/src/ingest_wmd_csv.cpp +++ b/src/ingest_wmd_csv.cpp @@ -32,14 +32,13 @@ void galois::loadWMDFilePerThread( galois::WaitGroup::HandleType wgh, pando::Array filename, std::uint64_t segmentsPerThread, std::uint64_t numThreads, std::uint64_t threadID, galois::PerThreadVector> localEdges, - galois::DistArray> perThreadRename, + galois::ThreadLocalStorage> perThreadRename, galois::PerThreadVector localVertices, galois::DAccumulator totVerts) { std::uint64_t countLocalVertices = 0; pando::Array tokens; PANDO_CHECK(tokens.initialize(10)); - auto hartID = localVertices.getLocalVectorID(); - auto parser = generateWMDParser(tokens, &localEdges.getThreadVector(), &perThreadRename[hartID], + auto parser = generateWMDParser(tokens, &localEdges.getThreadVector(), perThreadRename.getLocal(), &localVertices.getThreadVector(), &countLocalVertices); PANDO_CHECK( internal::loadGraphFilePerThread(filename, segmentsPerThread, numThreads, threadID, parser)); diff --git a/test/import/test_cusp_importer.cpp b/test/import/test_cusp_importer.cpp index d310bb89..e264eb83 100644 --- a/test/import/test_cusp_importer.cpp +++ b/test/import/test_cusp_importer.cpp @@ -6,6 +6,7 @@ #include +#include #include #include #include @@ -679,25 +680,20 @@ TEST(loadGraphFilePerThread, loadGraph) { for (uint64_t i = 0; i < wmdFile.size(); i++) filename[i] = wmdFile[i]; - galois::DistArray> perThreadRename; - PANDO_CHECK(perThreadRename.initialize(localEdges.size())); - for (std::uint64_t i = 0; i < localEdges.size(); i++) { + galois::ThreadLocalStorage> perThreadRename; + PANDO_CHECK(perThreadRename.initialize()); + for (std::uint64_t i = 0; i < perThreadRename.size(); i++) { perThreadRename[i] = galois::HashTable(); pando::Status err = fmap(perThreadRename[i], initialize, 0); EXPECT_EQ(err, pando::Status::Success); } - for (std::uint64_t i = 0; i < numThreads; i++) { - perThreadRename[i] = galois::HashTable(); - pando::Status err = fmap(perThreadRename[i], initialize, 0); - EXPECT_EQ(err, pando::Status::Success); - } - - galois::DAccumulator totVerts; + galois::DAccumulator totVerts{}; EXPECT_EQ(totVerts.initialize(), pando::Status::Success); galois::WaitGroup wg; EXPECT_EQ(pando::Status::Success, wg.initialize(numThreads)); + auto wgh = wg.getHandle(); for (uint64_t i = 0; i < numThreads; i++) { @@ -713,13 +709,13 @@ TEST(loadGraphFilePerThread, loadGraph) { wg.deinitialize(); - auto freeStuff = - +[](galois::DistArray> perThreadRename) { - for (galois::HashTable hash : perThreadRename) { - hash.deinitialize(); - } - perThreadRename.deinitialize(); - }; + auto freeStuff = +[](galois::ThreadLocalStorage> + perThreadRename) { + for (galois::HashTable hash : perThreadRename) { + hash.deinitialize(); + } + }; + perThreadRename.deinitialize(); EXPECT_EQ(pando::Status::Success, pando::executeOn(pando::anyPlace, freeStuff, perThreadRename)); uint64_t numVertices = 0; @@ -756,9 +752,9 @@ TEST(loadGraphFilePerThread, loadEdgeList) { const std::uint64_t numThreads = localEdges.size() - pando::getPlaceDims().node.id; - galois::DistArray> perThreadRename{}; - PANDO_CHECK(perThreadRename.initialize(localEdges.size())); - for (std::uint64_t i = 0; i < localEdges.size(); i++) { + galois::ThreadLocalStorage> perThreadRename{}; + PANDO_CHECK(perThreadRename.initialize()); + for (std::uint64_t i = 0; i < perThreadRename.size(); i++) { perThreadRename[i] = galois::HashTable(); pando::Status err = fmap(perThreadRename[i], initialize, 0); EXPECT_EQ(err, pando::Status::Success); @@ -778,14 +774,14 @@ TEST(loadGraphFilePerThread, loadEdgeList) { } EXPECT_EQ(wg.wait(), pando::Status::Success); - auto freeStuff = - +[](galois::DistArray> perThreadRename) { - for (galois::HashTable hash : perThreadRename) { - hash.deinitialize(); - } - perThreadRename.deinitialize(); - }; + auto freeStuff = +[](galois::ThreadLocalStorage> + perThreadRename) { + for (galois::HashTable hash : perThreadRename) { + hash.deinitialize(); + } + }; EXPECT_EQ(pando::Status::Success, pando::executeOn(pando::anyPlace, freeStuff, perThreadRename)); + perThreadRename.deinitialize(); uint64_t numEdges = getNumEdges(edgelistFile); uint64_t edges = 0; From 7120711aef73a201cbce98caeafabd11546f468c Mon Sep 17 00:00:00 2001 From: AdityaAtulTewari Date: Fri, 5 Apr 2024 12:16:04 -0700 Subject: [PATCH 20/28] Refactor a vector test and make sanitizer tests more accessible (#21) * shorten vector tests * Make builds easier to access * Small fix * Workflows updated to be cleaner * Reduce test sizes further * Try using cpusets * Try using labels to determine cpusets * Try using names to determine cpusets * Forgot the coverage type --- .github/workflows/docker.yml | 26 +++++++++++--------- .github/workflows/drivex.yml | 10 ++++++-- CMakeLists.txt | 23 ++++++++++++++++- Makefile | 8 +++--- pando-rt/test/containers/test_vector.cpp | 2 +- test/containers/test_per_thread.cpp | 4 +-- test/containers/test_thread_local_vector.cpp | 4 +-- 7 files changed, 54 insertions(+), 23 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 1f5d5a27..1311a1a0 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -18,8 +18,8 @@ concurrency: cancel-in-progress: true jobs: - prep-smp-ubuntu-2204-docker: - name: gcc / ${{ matrix.build-type }} / ${{ matrix.sanitizer-type }} + prep-mpi-ubuntu-2204-docker: + name: gcc / ${{ matrix.build-type }} runs-on: self-hosted permissions: contents: read @@ -35,13 +35,7 @@ jobs: shell: bash -l {0} strategy: matrix: - build-type: ['Release', 'RelWithDebInfo'] - sanitizer-type: ['nosan', 'san'] - exclude: - - build-type: 'RelWithDebInfo' - sanitizer-type: 'nosan' - - build-type: 'Release' - sanitizer-type: 'san' + build-type: ['Release', 'Sanitize'] steps: @@ -60,13 +54,21 @@ jobs: echo "SRC_DIR=$(pwd)" >> $GITHUB_ENV echo "PANDO_TEST_DISCOVERY_TIMEOUT=600" >> $GITHUB_ENV echo "IMAGE_VERSION=$(git log --pretty="%h" -1 Dockerfile.dev)" >> $GITHUB_ENV - if [ ${{ matrix.sanitizer-type }} == 'san' ]; then + if [ ${{ matrix.build-type }} == 'Sanitize' ]; then echo "PANDO_BUILD_DOCS=OFF" >> $GITHUB_ENV - echo "PANDO_CONTAINER_ENV=-e=PANDO_PREP_L1SP_HART=32768 -ePANDO_PREP_MAIN_NODE=8589934592 -e=PANDO_EXTRA_CXX_FLAGS='\"-fsanitize=address -fsanitize=undefined\"'" >> $GITHUB_ENV + echo "PANDO_CONTAINER_ENV=-e=PANDO_PREP_L1SP_HART=32768 -ePANDO_PREP_MAIN_NODE=8589934592" >> $GITHUB_ENV fi - if [ ${{ matrix.sanitizer-type }} == 'nosan' ]; then + if [ ${{ matrix.build-type }} == 'Release' ]; then + echo "PANDO_BUILD_DOCS=OFF" >> $GITHUB_ENV echo "PANDO_CONTAINER_ENV=-e=PANDORT_TESTS=ON" >> $GITHUB_ENV fi + echo ${{ runner.name }} + if [ ${{ runner.name }} == 'zerberus-0' ]; then + echo "CONTAINER_CPUSET='--cpuset-cpus=0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30'" >> $GITHUB_ENV + fi + if [ ${{ runner.name }} == 'zerberus-1' ]; then + echo "CONTAINER_CPUSET='--cpuset-cpus=1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31'" >> $GITHUB_ENV + fi cat $GITHUB_ENV - name: Configure diff --git a/.github/workflows/drivex.yml b/.github/workflows/drivex.yml index a1dc2a8a..c20570de 100644 --- a/.github/workflows/drivex.yml +++ b/.github/workflows/drivex.yml @@ -19,7 +19,7 @@ concurrency: jobs: docker-drivex-ubuntu-2204: - name: gcc / ${{ matrix.build-type }} / ${{ matrix.sanitizer-type }} + name: gcc / ${{ matrix.build-type }} runs-on: self-hosted permissions: contents: read @@ -36,7 +36,6 @@ jobs: strategy: matrix: build-type: ['Release'] - sanitizer-type: ['nosan'] steps: @@ -56,6 +55,13 @@ jobs: echo "PANDO_TEST_DISCOVERY_TIMEOUT=600" >> $GITHUB_ENV echo "IMAGE_VERSION=$(git log --pretty="%h" -1 Dockerfile.dev)" >> $GITHUB_ENV echo "PANDO_CONTAINER_ENV=-e=PANDORT_TESTS=ON" >> $GITHUB_ENV + echo ${{ runner.name }} + if [ ${{ runner.name }} == 'zerberus-0' ]; then + echo "CONTAINER_CPUSET='--cpuset-cpus=0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30'" >> $GITHUB_ENV + fi + if [ ${{ runner.name }} == 'zerberus-1' ]; then + echo "CONTAINER_CPUSET='--cpuset-cpus=1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31'" >> $GITHUB_ENV + fi cat $GITHUB_ENV - name: Configure diff --git a/CMakeLists.txt b/CMakeLists.txt index a40d51c9..36784f60 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,6 +20,7 @@ endif() option(ENABLE_PANDORT_TESTS "Enable pando-rt to run tests" OFF) if (NOT ENABLE_PANDORT_TESTS) + message("Not Enabling Pandort testing") set(BUILD_TESTING_SAVED "${BUILD_TESTING}") set(BUILD_TESTING OFF) endif() @@ -43,13 +44,33 @@ if ("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}") ) endif() +#create the sanitize build type +set(CMAKE_CXX_FLAGS_SANITIZE + "-O3 -g -fsanitize=address -fsanitize=undefined -DNDEBUG" CACHE STRING "Flags used by the C++ compiler during sanitizer builds" + FORCE ) +set(CMAKE_C_FLAGS_SANITIZE + "-O3 -g -fsanitize=address -fsanitize=undefined -DNDEBUG" CACHE STRING "Flags used by the C compiler during sanitizer builds" + FORCE ) +set(CMAKE_EXE_LINKER_FLAGS_SANITIZE + "" CACHE STRING "Flags used for linking binaries during sanitizer builds" + FORCE ) +set(CMAKE_SHARED_LINKER_FLAGS_SANITIZE + "" CACHE STRING "Flags used for linking shared libraries during sanitizer builds" + FORCE ) + +MARK_AS_ADVANCED( + CMAKE_CXX_FLAGS_SANITIZE + CMAKE_C_FLAGS_SANITIZE + CMAKE_EXE_LINKER_FLAGS_SANITIZE + CMAKE_SHARED_LINKER_FLAGS_SANITIZE) + # default build type +set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo" "Sanitize" "Coverage") set(DEFAULT_BUILD_TYPE "Release") if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) message(STATUS "Setting build type to default '${DEFAULT_BUILD_TYPE}' as none was specified.") set(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE STRING "Choose the type of build." FORCE) # possible values of build type for cmake-gui - set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") endif () # target diff --git a/Makefile b/Makefile index 45997b12..f2c061c3 100644 --- a/Makefile +++ b/Makefile @@ -18,6 +18,7 @@ CONTAINER_BUILD_DIR ?= /pando/dockerbuild CONTAINER_WORKDIR ?= ${CONTAINER_SRC_DIR} CONTAINER_CONTEXT ?= default CONTAINER_OPTS ?= +CONTAINER_CPUSET ?= CONTAINER_CMD ?= setarch `uname -m` -R bash -l INTERACTIVE ?= i @@ -110,6 +111,7 @@ docker: -v ${SRC_DIR}/:${CONTAINER_SRC_DIR} \ ${PANDO_CONTAINER_MOUNTS} \ ${PANDO_CONTAINER_ENV} \ + ${CONTAINER_CPUSET} \ --privileged \ --workdir=${CONTAINER_WORKDIR} ${CONTAINER_OPTS} -${INTERACTIVE}t \ ${IMAGE_NAME}:${VERSION} \ @@ -181,19 +183,19 @@ drive-deps: run-tests-mpi: set -o pipefail && \ . ~/.profile && \ - cd ${CONTAINER_BUILD_DIR} && ctest -j4 --verbose | tee test.out && \ + cd ${CONTAINER_BUILD_DIR} && ctest -j2 --verbose | tee test.out && \ ! grep -E "Failure" test.out && ! grep -E "runtime error" test.out run-tests-smp: set -o pipefail && \ . ~/.profile && \ - cd ${CONTAINER_BUILD_DIR}-smp && ctest -j4 --verbose | tee test.out && \ + cd ${CONTAINER_BUILD_DIR}-smp && ctest -j2 --verbose | tee test.out && \ ! grep -E "Failure" test.out && ! grep -E "runtime error" test.out run-tests-drv: set -o pipefail && \ . ~/.profile && \ - cd ${DRV_BUILD_DIR} && ctest -j4 --verbose | tee test.out && \ + cd ${DRV_BUILD_DIR} && ctest -j2 --verbose | tee test.out && \ ! grep -E "Failure" test.out && ! grep -E "runtime error" test.out run-tests: run-tests-mpi diff --git a/pando-rt/test/containers/test_vector.cpp b/pando-rt/test/containers/test_vector.cpp index fe556228..76fb1e9c 100644 --- a/pando-rt/test/containers/test_vector.cpp +++ b/pando-rt/test/containers/test_vector.cpp @@ -162,7 +162,7 @@ TEST(Vector, StressCreateDestroy) { TEST(Vector, StressPushBack) { const std::uint64_t size = 8; - const std::uint64_t finalSz = 1 << 10; + const std::uint64_t finalSz = 1 << 6; pando::Vector vector; EXPECT_EQ(vector.initialize(size), pando::Status::Success); diff --git a/test/containers/test_per_thread.cpp b/test/containers/test_per_thread.cpp index 6ae0de58..c25e31b1 100644 --- a/test/containers/test_per_thread.cpp +++ b/test/containers/test_per_thread.cpp @@ -134,7 +134,7 @@ TEST(PerThreadVector, DoAll) { EXPECT_EQ(perThreadVec.initialize(), pando::Status::Success); *perThreadVecPtr = perThreadVec; - static const uint64_t workItems = 1000; + static const uint64_t workItems = 100; galois::DistArray work; EXPECT_EQ(work.initialize(workItems), pando::Status::Success); for (uint64_t i = 0; i < workItems; i++) { @@ -332,7 +332,7 @@ TEST(PerThreadVector, ClearCompute) { galois::PerThreadVector perThreadVec; EXPECT_EQ(perThreadVec.initialize(), pando::Status::Success); - static uint64_t workItems = 1000; + static uint64_t workItems = 100; galois::DistArray work; EXPECT_EQ(work.initialize(workItems), pando::Status::Success); for (uint64_t i = 0; i < workItems; i++) { diff --git a/test/containers/test_thread_local_vector.cpp b/test/containers/test_thread_local_vector.cpp index 14987a03..cef9ece5 100644 --- a/test/containers/test_thread_local_vector.cpp +++ b/test/containers/test_thread_local_vector.cpp @@ -122,7 +122,7 @@ TEST(ThreadLocalVector, DoAll) { galois::ThreadLocalVector perThreadVec; EXPECT_EQ(perThreadVec.initialize(), pando::Status::Success); - static const uint64_t workItems = 1000; + static const uint64_t workItems = 100; galois::DistArray work; EXPECT_EQ(work.initialize(workItems), pando::Status::Success); for (uint64_t i = 0; i < workItems; i++) { @@ -275,7 +275,7 @@ TEST(ThreadLocalVector, ClearCompute) { galois::ThreadLocalVector perThreadVec; EXPECT_EQ(perThreadVec.initialize(), pando::Status::Success); - static uint64_t workItems = 1000; + static uint64_t workItems = 100; galois::DistArray work; EXPECT_EQ(work.initialize(workItems), pando::Status::Success); for (uint64_t i = 0; i < workItems; i++) { From d9302ea6ad566be278b04dfc1a709f552c739e19 Mon Sep 17 00:00:00 2001 From: AdityaAtulTewari Date: Fri, 5 Apr 2024 21:50:01 +0000 Subject: [PATCH 21/28] atomics needed some fixes as well as rolling back aggression on sanitize --- CMakeLists.txt | 4 ++-- include/pando-lib-galois/sync/simple_lock.hpp | 6 +----- pando-rt/src/atomic.cpp | 2 +- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 36784f60..c5a80d40 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,10 +46,10 @@ endif() #create the sanitize build type set(CMAKE_CXX_FLAGS_SANITIZE - "-O3 -g -fsanitize=address -fsanitize=undefined -DNDEBUG" CACHE STRING "Flags used by the C++ compiler during sanitizer builds" + "-O2 -g -fsanitize=address -fsanitize=undefined -DNDEBUG" CACHE STRING "Flags used by the C++ compiler during sanitizer builds" FORCE ) set(CMAKE_C_FLAGS_SANITIZE - "-O3 -g -fsanitize=address -fsanitize=undefined -DNDEBUG" CACHE STRING "Flags used by the C compiler during sanitizer builds" + "-O2 -g -fsanitize=address -fsanitize=undefined -DNDEBUG" CACHE STRING "Flags used by the C compiler during sanitizer builds" FORCE ) set(CMAKE_EXE_LINKER_FLAGS_SANITIZE "" CACHE STRING "Flags used for linking binaries during sanitizer builds" diff --git a/include/pando-lib-galois/sync/simple_lock.hpp b/include/pando-lib-galois/sync/simple_lock.hpp index 26d09ff1..2c9c300c 100644 --- a/include/pando-lib-galois/sync/simple_lock.hpp +++ b/include/pando-lib-galois/sync/simple_lock.hpp @@ -48,11 +48,7 @@ class SimpleLock { * @warning one of the initialize methods must be called before use */ [[nodiscard]] pando::Status initialize(pando::Place place, pando::MemoryType memoryType) { - const auto desiredValue = pando::allocateMemory(1, place, memoryType); - if (!desiredValue.hasValue()) { - return desiredValue.error(); - } - m_state = desiredValue.value(); + m_state = PANDO_EXPECT_RETURN(pando::allocateMemory(1, place, memoryType)); *m_state = static_cast(State::IsUnlocked); pando::atomicThreadFence(std::memory_order_release); return pando::Status::Success; diff --git a/pando-rt/src/atomic.cpp b/pando-rt/src/atomic.cpp index 257e4c9f..22c9f27d 100644 --- a/pando-rt/src/atomic.cpp +++ b/pando-rt/src/atomic.cpp @@ -348,11 +348,11 @@ bool atomicCompareExchangeImpl(GlobalPtr ptr, GlobalPtr expected, GlobalPt if (handle.value() == expectedValue) { // success postAtomicOpFence(success); - *expected = expectedValue; return true; } else { // failure postAtomicOpFence(failure); + *expected = handle.value(); return false; } } From bbb273b04cb9678df59542b910dec931688b8403 Mon Sep 17 00:00:00 2001 From: AdityaAtulTewari Date: Fri, 5 Apr 2024 15:53:38 -0700 Subject: [PATCH 22/28] all references work with fmap (#25) --- .../pando-lib-galois/utility/gptr_monad.hpp | 59 +++++++++-------- test/utility/test_gptr_monad.cpp | 66 +++++++++++++++++++ 2 files changed, 96 insertions(+), 29 deletions(-) diff --git a/include/pando-lib-galois/utility/gptr_monad.hpp b/include/pando-lib-galois/utility/gptr_monad.hpp index 5b9baba2..e4e254d8 100644 --- a/include/pando-lib-galois/utility/gptr_monad.hpp +++ b/include/pando-lib-galois/utility/gptr_monad.hpp @@ -7,50 +7,51 @@ /** * @brief lifts a function with no arguments to work on references */ -#define lift(ref, func) \ - __extension__({ \ - auto refComputed##__LINE__ = (ref); \ - typename std::pointer_traits::element_type tmp = \ - refComputed##__LINE__; \ - auto ret = tmp.func(); \ - refComputed##__LINE__ = tmp; \ - ret; \ +#define lift(ref, func) \ + __extension__({ \ + auto ptrComputed##__LINE__ = &(ref); \ + typename std::pointer_traits::element_type tmp = \ + *ptrComputed##__LINE__; \ + auto ret = tmp.func(); \ + *ptrComputed##__LINE__ = tmp; \ + ret; \ }) /** * @brief lifts a function with no arguments to work on a void return type */ -#define liftVoid(ref, func) \ - do { \ - auto refComputed##__LINE__ = (ref); \ - typename std::pointer_traits::element_type tmp = \ - refComputed##__LINE__; \ - tmp.func(); \ - refComputed##__LINE__ = tmp; \ +#define liftVoid(ref, func) \ + do { \ + auto ptrComputed##__LINE__ = &(ref); \ + typename std::pointer_traits::element_type tmp = \ + *ptrComputed##__LINE__; \ + tmp.func(); \ + *ptrComputed##__LINE__ = tmp; \ } while (0) /** * @brief maps a function over its arguments up to work on references */ -#define fmap(ref, func, ...) \ - __extension__({ \ - auto refComputed##__LINE__ = (ref); \ - typename std::pointer_traits::element_type tmp = \ - refComputed##__LINE__; \ - auto ret = tmp.func(__VA_ARGS__); \ - refComputed##__LINE__ = tmp; \ - ret; \ +#define fmap(ref, func, ...) \ + __extension__({ \ + auto ptrComputed##__LINE__ = &(ref); \ + typename std::pointer_traits::element_type tmp = \ + *ptrComputed##__LINE__; \ + auto ret = tmp.func(__VA_ARGS__); \ + *ptrComputed##__LINE__ = tmp; \ + ret; \ }) /** * @brief maps a function over it's arguments to work on references and return void */ -#define fmapVoid(ref, func, ...) \ - do { \ - auto refComputed##__LINE__ = (ref); \ - typename std::pointer_traits::element_type tmp = ref; \ - tmp.func(__VA_ARGS__); \ - ref = tmp; \ +#define fmapVoid(ref, func, ...) \ + do { \ + auto ptrComputed##__LINE__ = &(ref); \ + typename std::pointer_traits::element_type tmp = \ + *ptrComputed##__LINE__; \ + tmp.func(__VA_ARGS__); \ + *ptrComputed##__LINE__ = tmp; \ } while (0) #endif // PANDO_LIB_GALOIS_UTILITY_GPTR_MONAD_HPP_ diff --git a/test/utility/test_gptr_monad.cpp b/test/utility/test_gptr_monad.cpp index 1d23906c..d771cd4e 100644 --- a/test/utility/test_gptr_monad.cpp +++ b/test/utility/test_gptr_monad.cpp @@ -27,6 +27,14 @@ TEST(Fmap, GVectorInitialize) { pando::deallocateMemory(gvec, 1); } +TEST(Fmap, VectorInitialize) { + constexpr std::uint64_t SIZE = 10; + pando::Vector vec; + fmap(vec, initialize, SIZE); + EXPECT_EQ(vec.size(), SIZE); + vec.deinitialize(); +} + TEST(Fmap, GVectorPushBack) { constexpr std::uint64_t SIZE = 10; pando::GlobalPtr> gvec; @@ -54,6 +62,25 @@ TEST(Fmap, GVectorPushBack) { pando::deallocateMemory(gvec, 1); } +TEST(Fmap, VectorPushBack) { + constexpr std::uint64_t SIZE = 10; + pando::Vector vec; + PANDO_CHECK(fmap(vec, initialize, 0)); + + for (std::uint64_t i = 0; i < SIZE; i++) { + PANDO_CHECK(fmap(vec, pushBack, i)); + } + + EXPECT_EQ(vec.size(), SIZE); + std::uint64_t i = 0; + for (std::uint64_t v : vec) { + EXPECT_EQ(v, i); + i++; + } + vec.deinitialize(); + EXPECT_EQ(SIZE, i); +} + pando::Vector> generateFullyConnectedGraph(std::uint64_t SIZE) { pando::Vector> vec; EXPECT_EQ(vec.initialize(SIZE), pando::Status::Success); @@ -113,6 +140,29 @@ TEST(FmapVoid, GDistArrayCSR) { liftVoid(*ggraph, deinitialize); } +TEST(FmapVoid, DistArrayCSR) { + constexpr std::uint64_t SIZE = 10; + Graph graph{}; + auto vvec = generateFullyConnectedGraph(SIZE); + PANDO_CHECK(fmap(graph, initialize, vvec)); + PANDO_CHECK(deleteVectorVector(vvec)); + + for (std::uint64_t i = 0; i < SIZE; i++) { + fmapVoid(graph, setData, i, i); + for (std::uint64_t j = 0; j < SIZE; j++) { + fmapVoid(graph, setEdgeData, i, j, i * j); + } + } + + for (std::uint64_t i = 0; i < SIZE; i++) { + EXPECT_EQ(fmap(graph, getData, i), i); + for (std::uint64_t j = 0; j < SIZE; j++) { + EXPECT_EQ(fmap(graph, getEdgeData, i, j), i * j); + } + } + liftVoid(graph, deinitialize); +} + TEST(Lift, GVectorSize) { constexpr std::uint64_t SIZE = 10; pando::GlobalPtr> gvec; @@ -130,6 +180,14 @@ TEST(Lift, GVectorSize) { pando::deallocateMemory(gvec, 1); } +TEST(Lift, VectorSize) { + constexpr std::uint64_t SIZE = 10; + pando::Vector vec; + PANDO_CHECK(fmap(vec, initialize, SIZE)); + EXPECT_EQ(lift(vec, size), SIZE); + vec.deinitialize(); +} + TEST(LiftVoid, GVectorDeinitialize) { constexpr std::uint64_t SIZE = 10; pando::GlobalPtr> gvec; @@ -145,6 +203,14 @@ TEST(LiftVoid, GVectorDeinitialize) { pando::deallocateMemory(gvec, 1); } +TEST(LiftVoid, VectorDeinitialize) { + constexpr std::uint64_t SIZE = 10; + pando::Vector vec; + PANDO_CHECK(fmap(vec, initialize, SIZE)); + EXPECT_EQ(lift(vec, size), SIZE); + liftVoid(vec, deinitialize); +} + TEST(PANDO_EXPECT_RETURN, Success) { auto success = +[]() -> pando::Status { const std::int32_t value = 42; From a77c6fef110dd6ed8bac242114cff472594af9d8 Mon Sep 17 00:00:00 2001 From: AdityaAtulTewari Date: Fri, 5 Apr 2024 17:10:18 -0700 Subject: [PATCH 23/28] Fix simplelock test (#26) --- test/sync/test_simple_lock.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/test/sync/test_simple_lock.cpp b/test/sync/test_simple_lock.cpp index d4f92d9c..a65cb82b 100644 --- a/test/sync/test_simple_lock.cpp +++ b/test/sync/test_simple_lock.cpp @@ -66,6 +66,7 @@ TEST(SimpleLock, ActualLockUnlock) { tpl, hls, +[](decltype(tpl) tpl, pando::GlobalRef) { auto [mutex, array] = tpl; + mutex.lock(); for (int i = 0; i < 10; i++) { if ((i + 1 + pando::getCurrentPlace().node.id) <= 10) { array[i] = i + 1 + pando::getCurrentPlace().node.id; From c762cbaea08a7d47f3d06112ba9a29ad4c4c6a9c Mon Sep 17 00:00:00 2001 From: AdityaAtulTewari Date: Fri, 5 Apr 2024 17:29:28 -0700 Subject: [PATCH 24/28] Refactor the reading of vertices for partitioning (#23) * Fix simplelock test * Refactor the reading of vertices for partitioning --- .../import/ingest_wmd_csv.hpp | 14 +++++------ .../import/wmd_graph_importer.hpp | 23 +++++++++++-------- src/ingest_wmd_csv.cpp | 12 +++++----- test/import/test_cusp_importer.cpp | 5 ++-- 4 files changed, 29 insertions(+), 25 deletions(-) diff --git a/include/pando-lib-galois/import/ingest_wmd_csv.hpp b/include/pando-lib-galois/import/ingest_wmd_csv.hpp index aecfb119..d5144279 100644 --- a/include/pando-lib-galois/import/ingest_wmd_csv.hpp +++ b/include/pando-lib-galois/import/ingest_wmd_csv.hpp @@ -16,9 +16,9 @@ namespace galois { void loadWMDFilePerThread( galois::WaitGroup::HandleType wgh, pando::Array filename, std::uint64_t segmentsPerThread, std::uint64_t numThreads, std::uint64_t threadID, - galois::PerThreadVector> localEdges, - galois::ThreadLocalStorage> perThreadRename, - galois::PerThreadVector localVertices, galois::DAccumulator totVerts); + PerThreadVector> localEdges, + ThreadLocalStorage> perThreadRename, + ThreadLocalVector localReadVertices, galois::DAccumulator totVerts); template pando::Status wmdCSVParse(const char* line, pando::Array tokens, @@ -66,8 +66,8 @@ galois::DistLocalCSR initializeWMDDLCSR(pando::Array galois::PerThreadVector> localEdges; PANDO_CHECK(localEdges.initialize()); - galois::PerThreadVector localVertices; - PANDO_CHECK(localVertices.initialize()); + galois::ThreadLocalVector localReadVertices; + PANDO_CHECK(localReadVertices.initialize()); const std::uint64_t numThreads = localEdges.size() - pando::getPlaceDims().node.id; const std::uint64_t hosts = static_cast(pando::getPlaceDims().node.id); @@ -93,7 +93,7 @@ galois::DistLocalCSR initializeWMDDLCSR(pando::Array pando::Place place = pando::Place{pando::NodeIndex{static_cast(i % hosts)}, pando::anyPod, pando::anyCore}; PANDO_CHECK(pando::executeOn(place, &galois::loadWMDFilePerThread, wgh, filename, 1, numThreads, - i, localEdges, perThreadRename, localVertices, totVerts)); + i, localEdges, perThreadRename, localReadVertices, totVerts)); } pando::GlobalPtr>> labeledEdgeCounts; @@ -132,7 +132,7 @@ galois::DistLocalCSR initializeWMDDLCSR(pando::Array /** Generate Vertex Partition **/ galois::HostIndexedMap> pHV = - internal::partitionVerticesParallel(std::move(localVertices), v2PM); + internal::partitionVerticesParallel(std::move(localReadVertices), v2PM); /** Generate Edge Partition **/ auto [partEdges, renamePerHost] = diff --git a/include/pando-lib-galois/import/wmd_graph_importer.hpp b/include/pando-lib-galois/import/wmd_graph_importer.hpp index b70e5fa0..e437960b 100644 --- a/include/pando-lib-galois/import/wmd_graph_importer.hpp +++ b/include/pando-lib-galois/import/wmd_graph_importer.hpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -370,11 +371,11 @@ template template [[nodiscard]] galois::HostIndexedMap> partitionVerticesParallel( - galois::PerThreadVector localVertices, pando::Array v2PM) { + galois::ThreadLocalVector&& localReadVertices, pando::Array v2PM) { DistArray>> perThreadVerticesPartition; - PANDO_CHECK(perThreadVerticesPartition.initialize(localVertices.size())); + PANDO_CHECK(perThreadVerticesPartition.initialize(localReadVertices.size())); std::uint64_t numHosts = static_cast(pando::getPlaceDims().node.id); - for (uint64_t i = 0; i < localVertices.size(); i++) { + for (uint64_t i = 0; i < localReadVertices.size(); i++) { PANDO_CHECK(lift(perThreadVerticesPartition[i], initialize)); HostIndexedMap> pVec = perThreadVerticesPartition[i]; for (uint64_t j = 0; j < numHosts; j++) { @@ -382,19 +383,20 @@ template } } + const std::uint64_t numThreads = localReadVertices.size(); HostIndexedMap> numVerticesPerHostPerThread{}; HostIndexedMap> prefixArrPerHostPerThread{}; PANDO_CHECK(numVerticesPerHostPerThread.initialize()); PANDO_CHECK(prefixArrPerHostPerThread.initialize()); for (std::uint64_t i = 0; i < numHosts; i++) { - PANDO_CHECK(fmap(numVerticesPerHostPerThread[i], initialize, lift(localVertices, size))); - PANDO_CHECK(fmap(prefixArrPerHostPerThread[i], initialize, lift(localVertices, size))); + PANDO_CHECK(fmap(numVerticesPerHostPerThread[i], initialize, numThreads)); + PANDO_CHECK(fmap(prefixArrPerHostPerThread[i], initialize, numThreads)); } auto newVec = - make_tpl(perThreadVerticesPartition, localVertices, v2PM, numVerticesPerHostPerThread); + make_tpl(perThreadVerticesPartition, localReadVertices, v2PM, numVerticesPerHostPerThread); galois::doAllEvenlyPartition( - newVec, lift(localVertices, size), +[](decltype(newVec) newVec, uint64_t tid, uint64_t) { + newVec, numThreads, +[](decltype(newVec) newVec, uint64_t tid, uint64_t) { auto [perThreadVerticesPT, localVerticesVec, v2PMap, prefixArr] = newVec; pando::GlobalPtr> localVerticesPtr = localVerticesVec.get(tid); pando::Vector localVertices = *localVerticesPtr; @@ -409,6 +411,7 @@ template *(arr.begin() + tid) = lift(vertVec[i], size); } }); + localReadVertices.deinitialize(); // Compute prefix sum using SRC = galois::Array; @@ -424,7 +427,7 @@ template galois::internal::combiner, galois::Array> prefixSum(arr, prefixArr); PANDO_CHECK(prefixSum.initialize(pando::getPlaceDims().core.x * pando::getPlaceDims().core.y)); - prefixSum.computePrefixSum(lift(localVertices, size)); + prefixSum.computePrefixSum(numThreads); } galois::HostIndexedMap> pHV{}; @@ -432,12 +435,12 @@ template for (uint64_t i = 0; i < numHosts; i++) { galois::Array prefixArr = prefixArrPerHostPerThread[i]; - PANDO_CHECK(fmap(pHV[i], initialize, prefixArr[lift(localVertices, size) - 1])); + PANDO_CHECK(fmap(pHV[i], initialize, prefixArr[numThreads - 1])); } auto phVec = make_tpl(pHV, prefixArrPerHostPerThread, perThreadVerticesPartition); galois::doAllEvenlyPartition( - phVec, lift(localVertices, size), +[](decltype(phVec) phVec, uint64_t threadID, uint64_t) { + phVec, numThreads, +[](decltype(phVec) phVec, uint64_t threadID, uint64_t) { auto [pHV, prefixArrPerHost, PHVertex] = phVec; std::uint64_t numHosts = static_cast(pando::getPlaceDims().node.id); for (uint64_t i = 0; i < numHosts; i++) { diff --git a/src/ingest_wmd_csv.cpp b/src/ingest_wmd_csv.cpp index 69527409..524888fe 100644 --- a/src/ingest_wmd_csv.cpp +++ b/src/ingest_wmd_csv.cpp @@ -7,13 +7,13 @@ auto generateWMDParser( pando::Array tokens, pando::GlobalPtr>> localEdges, pando::GlobalPtr> localRename, - pando::GlobalPtr> localVertices, uint64_t* totVerts) { + pando::GlobalPtr> localReadVertices, uint64_t* totVerts) { using galois::WMDEdge, galois::WMDVertex; using galois::internal::insertLocalEdgesPerThread; - return [localEdges, localRename, localVertices, totVerts, tokens](char* line) { - auto vfunc = [localVertices, totVerts](WMDVertex v) { + return [localEdges, localRename, localReadVertices, totVerts, tokens](char* line) { + auto vfunc = [localReadVertices, totVerts](WMDVertex v) { *totVerts += 1; - return fmap(*localVertices, pushBack, v); + return fmap(*localReadVertices, pushBack, v); }; auto efunc = [localEdges, localRename](WMDEdge e, agile::TYPES inverseEdgeType) { WMDEdge inverseE = e; @@ -33,13 +33,13 @@ void galois::loadWMDFilePerThread( std::uint64_t numThreads, std::uint64_t threadID, galois::PerThreadVector> localEdges, galois::ThreadLocalStorage> perThreadRename, - galois::PerThreadVector localVertices, + galois::ThreadLocalVector localReadVertices, galois::DAccumulator totVerts) { std::uint64_t countLocalVertices = 0; pando::Array tokens; PANDO_CHECK(tokens.initialize(10)); auto parser = generateWMDParser(tokens, &localEdges.getThreadVector(), perThreadRename.getLocal(), - &localVertices.getThreadVector(), &countLocalVertices); + localReadVertices.getLocal(), &countLocalVertices); PANDO_CHECK( internal::loadGraphFilePerThread(filename, segmentsPerThread, numThreads, threadID, parser)); diff --git a/test/import/test_cusp_importer.cpp b/test/import/test_cusp_importer.cpp index e264eb83..94a2aa7f 100644 --- a/test/import/test_cusp_importer.cpp +++ b/test/import/test_cusp_importer.cpp @@ -672,7 +672,7 @@ TEST(loadGraphFilePerThread, loadGraph) { uint64_t segmentsPerThread = 1; galois::PerThreadVector> localEdges; EXPECT_EQ(localEdges.initialize(), pando::Status::Success); - galois::PerThreadVector localVertices; + galois::ThreadLocalVector localVertices; EXPECT_EQ(localVertices.initialize(), pando::Status::Success); pando::Array filename; std::string wmdFile = "/pando/graphs/simple_wmd.csv"; @@ -723,7 +723,7 @@ TEST(loadGraphFilePerThread, loadGraph) { getNumVerticesAndEdges(wmdFile, numVertices, numEdges); uint64_t vert = 0; for (uint64_t i = 0; i < localVertices.size(); i++) { - pando::Vector vec = *localVertices.get(i); + pando::Vector vec = localVertices[i]; vert += vec.size(); } uint64_t edges = 0; @@ -736,6 +736,7 @@ TEST(loadGraphFilePerThread, loadGraph) { EXPECT_EQ(vert, numVertices); EXPECT_EQ(edges, 2 * numEdges); totVerts.deinitialize(); + localVertices.deinitialize(); } TEST(loadGraphFilePerThread, loadEdgeList) { From b9a14d1ca021447a7a83cb81cdedf4ca148c2c12 Mon Sep 17 00:00:00 2001 From: jeageun Date: Sat, 6 Apr 2024 01:08:06 +0000 Subject: [PATCH 25/28] rebase --- include/pando-lib-galois/graphs/dist_local_csr.hpp | 3 +-- include/pando-lib-galois/graphs/mirror_dist_local_csr.hpp | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/include/pando-lib-galois/graphs/dist_local_csr.hpp b/include/pando-lib-galois/graphs/dist_local_csr.hpp index f4e76ff8..9802ecb1 100644 --- a/include/pando-lib-galois/graphs/dist_local_csr.hpp +++ b/include/pando-lib-galois/graphs/dist_local_csr.hpp @@ -490,7 +490,7 @@ class DistLocalCSR { /** Host Information **/ std::uint64_t getPhysicalHostID(VertexTokenID tid) { std::uint64_t virtualHostID = tid % this->numVHosts(); - std::uint64_t physicalHost = virtualToPhysicalMap.getLocal()[virtualHostID]; + std::uint64_t physicalHost = fmap(virtualToPhysicalMap.getLocalRef(), get, virtualHostID); return physicalHost; } @@ -777,7 +777,6 @@ class DistLocalCSR { PANDO_CHECK(pando::executeOn(place, createMirrors, partEdges, mirrorList, V2PM, i, wgh)); } PANDO_CHECK(wg.wait()); - wg.deinitialize(); return mirrorList; } diff --git a/include/pando-lib-galois/graphs/mirror_dist_local_csr.hpp b/include/pando-lib-galois/graphs/mirror_dist_local_csr.hpp index 4c86db1d..d1216922 100644 --- a/include/pando-lib-galois/graphs/mirror_dist_local_csr.hpp +++ b/include/pando-lib-galois/graphs/mirror_dist_local_csr.hpp @@ -390,7 +390,6 @@ class MirrorDistLocalCSR { numVertices += lift(mirrorList[i], size); } PANDO_CHECK(wg.wait()); - wg.deinitialize(); return pando::Status::Success; } From c6cdf2aa1ed1ac20b8b2e36d2fcccb7157b84276 Mon Sep 17 00:00:00 2001 From: jeageun Date: Sat, 6 Apr 2024 01:08:40 +0000 Subject: [PATCH 26/28] rebase --- test/import/test_wmd_importer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/test/import/test_wmd_importer.cpp b/test/import/test_wmd_importer.cpp index 9a5d80e2..4f7085d7 100644 --- a/test/import/test_wmd_importer.cpp +++ b/test/import/test_wmd_importer.cpp @@ -283,6 +283,7 @@ TEST_P(MirrorDLCSRInitEdgeList, initializeEL) { const std::string elFile = std::get<0>(GetParam()); const std::uint64_t numVertices = std::get<1>(GetParam()); + pando::Array filename; EXPECT_EQ(pando::Status::Success, filename.initialize(elFile.size())); for (uint64_t i = 0; i < elFile.size(); i++) From d187163254537eab53bc43827e2594631008abbf Mon Sep 17 00:00:00 2001 From: jeageun Date: Sat, 6 Apr 2024 02:16:41 +0000 Subject: [PATCH 27/28] update test --- test/test_mirror_master_table.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/test/test_mirror_master_table.cpp b/test/test_mirror_master_table.cpp index c3f51ba6..8321db50 100644 --- a/test/test_mirror_master_table.cpp +++ b/test/test_mirror_master_table.cpp @@ -77,10 +77,9 @@ void runTest(const char* elFile, std::uint64_t numVertices) { } filename[length] = '\0'; // Ensure the string is null-terminated - Graph graph = - galois::initializeELDLCSR(filename, numVertices); - if (pando::getCurrentPlace().node.id == 0) { + Graph graph = + galois::initializeELDLCSR(filename, numVertices); // Iterate over vertices std::uint64_t vid = 0; auto mirror_master_array = graph.getLocalMirrorToRemoteMasterOrderedTable(); @@ -112,7 +111,7 @@ void runTest(const char* elFile, std::uint64_t numVertices) { } } } + graph.deinitialize(); } pando::waitAll(); - graph.deinitialize(); } From efaf15f9f4219405af7534d57f7698024748734e Mon Sep 17 00:00:00 2001 From: jeageun Date: Sat, 6 Apr 2024 04:14:56 +0000 Subject: [PATCH 28/28] Bug fix because of PodLocalStorageHeap initialization --- test/test_mirror_master_table.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_mirror_master_table.cpp b/test/test_mirror_master_table.cpp index 8321db50..0e0c2f12 100644 --- a/test/test_mirror_master_table.cpp +++ b/test/test_mirror_master_table.cpp @@ -68,10 +68,10 @@ void runTest(const char* elFile, std::uint64_t numVertices) { using VT = galois::ELVertex; using Graph = galois::MirrorDistLocalCSR; galois::HostLocalStorageHeap::HeapInit(); + galois::PodLocalStorageHeap::HeapInit(); pando::Array filename; std::size_t length = strlen(elFile); PANDO_CHECK(filename.initialize(length + 1)); - for (std::size_t i = 0; i < length; i++) { filename[i] = elFile[i]; }