diff --git a/include/pando-lib-galois/loops/do_all.hpp b/include/pando-lib-galois/loops/do_all.hpp index 7d1920c7..6993e654 100644 --- a/include/pando-lib-galois/loops/do_all.hpp +++ b/include/pando-lib-galois/loops/do_all.hpp @@ -17,8 +17,11 @@ #include #include +constexpr bool SCHEDULER_TIMER_ENABLE = false; + extern counter::Record perCoreRNG; extern counter::Record> perCoreDist; +extern counter::Record schedulerCount; namespace galois { @@ -47,8 +50,12 @@ enum SchedulerPolicy { RANDOM, UNSAFE_STRIPE, CORE_STRIPE, + NODE_ONLY, }; +constexpr SchedulerPolicy CURRENT_SCHEDULER_POLICY = SchedulerPolicy::RANDOM; +constexpr SchedulerPolicy EVENLY_PARITION_SCHEDULER_POLICY = SchedulerPolicy::CORE_STRIPE; + template struct LoopLocalSchedulerStruct {}; @@ -65,25 +72,31 @@ struct LoopLocalSchedulerStruct { template pando::Place schedulerImpl(pando::Place preferredLocality, [[maybe_unused]] LoopLocalSchedulerStruct& loopLocal) noexcept { + counter::HighResolutionCount schedulerTimer; + schedulerTimer.start(); if constexpr (Policy == RANDOM) { auto coreIdx = perCoreDist.getLocal()(perCoreRNG.getLocal()); assert(coreIdx < pando::getCoreDims().x); - return pando::Place(preferredLocality.node, pando::anyPod, pando::CoreIndex(coreIdx, 0)); + preferredLocality = + pando::Place(preferredLocality.node, pando::anyPod, pando::CoreIndex(coreIdx, 0)); } else if constexpr (Policy == UNSAFE_STRIPE) { auto threadIdx = ++loopLocal.lastThreadIdx; threadIdx %= getNumThreads(); - return std::get<0>(getPlaceFromThreadIdx(threadIdx)); + preferredLocality = std::get<0>(getPlaceFromThreadIdx(threadIdx)); } else if constexpr (Policy == CORE_STRIPE) { auto coreIdx = ++loopLocal.lastCoreIdx; coreIdx %= pando::getCoreDims().x; - return pando::Place(preferredLocality.node, pando::anyPod, pando::CoreIndex(coreIdx, 0)); + preferredLocality = + pando::Place(preferredLocality.node, pando::anyPod, pando::CoreIndex(coreIdx, 0)); + } else if constexpr (Policy == NODE_ONLY) { + preferredLocality = pando::Place(preferredLocality.node, pando::anyPod, pando::anyCore); } else { PANDO_ABORT("SCHEDULER POLICY NOT IMPLEMENTED"); } + counter::recordHighResolutionEvent(schedulerCount, schedulerTimer); + return preferredLocality; } -constexpr SchedulerPolicy CURRENT_SCHEDULER_POLICY = SchedulerPolicy::RANDOM; - inline pando::Place scheduler(pando::Place preferredLocality, LoopLocalSchedulerStruct& loopLocal) { return schedulerImpl(preferredLocality, loopLocal); @@ -357,7 +370,6 @@ class DoAll { template static pando::Status doAllEvenlyPartition(WaitGroup::HandleType wgh, State s, uint64_t workItems, const F& func) { - constexpr SchedulerPolicy EVENLY_PARITION_SCHEDULER_POLICY = SchedulerPolicy::CORE_STRIPE; LoopLocalSchedulerStruct loopLocal; pando::Status err = pando::Status::Success; if (workItems == 0) { diff --git a/pando-rt/src/init.cpp b/pando-rt/src/init.cpp index bc2e0969..5ddee8cf 100644 --- a/pando-rt/src/init.cpp +++ b/pando-rt/src/init.cpp @@ -34,6 +34,7 @@ counter::Record perCoreRNG; counter::Record> perCoreDist; +counter::Record schedulerCount = counter::Record(); namespace pando { @@ -211,6 +212,10 @@ int main(int argc, char* argv[]) { thisPlace.node.id, std::int8_t((i == std::uint64_t(dims.core.x + 1)) ? -1 : i), pointerCount.get(i)); + SPDLOG_WARN("Scheduler time on node: {}, core: {} was {}", + thisPlace.node.id, + std::int8_t((i == std::uint64_t(dims.core.x + 1)) ? -1 : i), + schedulerCount.get(i)); }