diff --git a/pando-rt/include/pando-rt/execution/execute_on_impl.hpp b/pando-rt/include/pando-rt/execution/execute_on_impl.hpp index 950e23a9..ebd60f74 100644 --- a/pando-rt/include/pando-rt/execution/execute_on_impl.hpp +++ b/pando-rt/include/pando-rt/execution/execute_on_impl.hpp @@ -8,6 +8,11 @@ #include "../status.hpp" #include "export.h" #include "task.hpp" +#include +#include + +extern counter::Record perCoreRNG; +extern counter::Record> perCoreDist; namespace pando { diff --git a/pando-rt/src/execute_on.cpp b/pando-rt/src/execute_on.cpp index 1340c227..6dff27b6 100644 --- a/pando-rt/src/execute_on.cpp +++ b/pando-rt/src/execute_on.cpp @@ -30,8 +30,10 @@ Status detail::executeOn(Place place, Task task) { const auto& coreDims = getCoreDims(); if (place.core == anyCore) { + auto coreIdx = perCoreDist.getLocal()(perCoreRNG.getLocal()); + assert(coreIdx < pando::getCoreDims().x); // anyCore: get scheduler core queue - const CoreIndex schedulerCoreIndex(coreDims.x, 0); + const CoreIndex schedulerCoreIndex(coreIdx, 0); place.core = schedulerCoreIndex; } else { if ((place.core < CoreIndex{0, 0}) || diff --git a/pando-rt/src/start.cpp b/pando-rt/src/start.cpp index 28dee371..028c6ee8 100644 --- a/pando-rt/src/start.cpp +++ b/pando-rt/src/start.cpp @@ -22,7 +22,7 @@ #include "drvx/drvx.hpp" #endif -constexpr std::uint64_t STEAL_THRESH_HOLD_SIZE = 32; +constexpr std::uint64_t STEAL_THRESH_HOLD_SIZE = 16; constexpr bool IDLE_TIMER_ENABLE = false; counter::Record idleCount = counter::Record(); @@ -80,7 +80,7 @@ extern "C" int __start(int argc, char** argv) { break; case SchedulerFailState::STEAL: - for(std::int8_t i = 0; i <= coreDims.x && !task.has_value(); i++) { + for(std::int8_t i = 0; i < coreDims.x && !task.has_value(); i++) { auto* otherQueue = pando::Cores::getTaskQueue(pando::Place{thisPlace.node, thisPlace.pod, pando::CoreIndex(i, 0)}); if(!otherQueue || otherQueue == queue) {continue;} if(otherQueue->getApproxSize() > STEAL_THRESH_HOLD_SIZE) { diff --git a/pando-rt/test/execution/test_bulk_execute_on.cpp b/pando-rt/test/execution/test_bulk_execute_on.cpp index 722a49ec..99faf78b 100644 --- a/pando-rt/test/execution/test_bulk_execute_on.cpp +++ b/pando-rt/test/execution/test_bulk_execute_on.cpp @@ -15,25 +15,25 @@ namespace { -pando::NodeSpecificStorage counter; +pando::NodeSpecificStorage nodeCounter; struct IncrementCounter { void operator()(std::int64_t val) { - pando::atomicIncrement(counter.getPointer(), val, std::memory_order_relaxed); + pando::atomicIncrement(nodeCounter.getPointer(), val, std::memory_order_relaxed); } }; struct IncrementCounters { void operator()(std::int64_t x, std::int64_t y) { - pando::atomicIncrement(&counter, x, std::memory_order_relaxed); - pando::atomicIncrement(&counter, y, std::memory_order_relaxed); + pando::atomicIncrement(&nodeCounter, x, std::memory_order_relaxed); + pando::atomicIncrement(&nodeCounter, y, std::memory_order_relaxed); } }; } // namespace TEST(BulkExecuteOn, ThisNode) { - counter = 0; + nodeCounter = 0; const auto& thisPlace = pando::getCurrentPlace(); const pando::Place place{thisPlace.node, pando::PodIndex{0, 0}, pando::CoreIndex{0, 0}}; @@ -43,42 +43,42 @@ TEST(BulkExecuteOn, ThisNode) { pando::Status::Success); pando::waitUntil([&] { - return pando::atomicLoad(&counter, std::memory_order_relaxed) == 10; + return pando::atomicLoad(&nodeCounter, std::memory_order_relaxed) == 10; }); - EXPECT_EQ(pando::atomicLoad(&counter, std::memory_order_relaxed), 10); + EXPECT_EQ(pando::atomicLoad(&nodeCounter, std::memory_order_relaxed), 10); } TEST(BulkExecuteOn, ThisNodeAnyPod) { - counter = 0; + nodeCounter = 0; EXPECT_EQ(pando::bulkExecuteOn(pando::anyPod, IncrementCounter{}, std::make_tuple(1), std::make_tuple(2), std::make_tuple(3), std::make_tuple(4)), pando::Status::Success); pando::waitUntil([&] { - return pando::atomicLoad(&counter, std::memory_order_relaxed) == 10; + return pando::atomicLoad(&nodeCounter, std::memory_order_relaxed) == 10; }); - EXPECT_EQ(pando::atomicLoad(&counter, std::memory_order_relaxed), 10); + EXPECT_EQ(pando::atomicLoad(&nodeCounter, std::memory_order_relaxed), 10); } TEST(BulkExecuteOn, ThisNodeAnyCore) { - counter = 0; + nodeCounter = 0; EXPECT_EQ(pando::bulkExecuteOn(pando::anyCore, IncrementCounter{}, std::make_tuple(1), std::make_tuple(2), std::make_tuple(3), std::make_tuple(4)), pando::Status::Success); pando::waitUntil([&] { - return pando::atomicLoad(&counter, std::memory_order_relaxed) == 10; + return pando::atomicLoad(&nodeCounter, std::memory_order_relaxed) == 10; }); - EXPECT_EQ(pando::atomicLoad(&counter, std::memory_order_relaxed), 10); + EXPECT_EQ(pando::atomicLoad(&nodeCounter, std::memory_order_relaxed), 10); } TEST(BulkExecuteOn, ThisNodeMultipleArgs) { - counter = 0; + nodeCounter = 0; const auto& thisPlace = pando::getCurrentPlace(); const pando::Place place{thisPlace.node, pando::PodIndex{0, 0}, pando::CoreIndex{0, 0}}; @@ -89,8 +89,8 @@ TEST(BulkExecuteOn, ThisNodeMultipleArgs) { pando::Status::Success); pando::waitUntil([&] { - return pando::atomicLoad(&counter, std::memory_order_relaxed) == 20; + return pando::atomicLoad(&nodeCounter, std::memory_order_relaxed) == 20; }); - EXPECT_EQ(pando::atomicLoad(&counter, std::memory_order_relaxed), 20); + EXPECT_EQ(pando::atomicLoad(&nodeCounter, std::memory_order_relaxed), 20); }