From b46be6ef160d276401cb11f1445b22c86514a229 Mon Sep 17 00:00:00 2001 From: AdityaAtulTewari Date: Mon, 13 May 2024 21:53:18 +0000 Subject: [PATCH] Fixed affinity and added idle counters --- .../include/pando-rt/benchmark/counters.hpp | 125 +++++++++++------- pando-rt/src/init.cpp | 27 ++++ pando-rt/src/init.hpp | 3 + pando-rt/src/prep/cores.cpp | 22 +++ pando-rt/src/start.cpp | 13 ++ 5 files changed, 145 insertions(+), 45 deletions(-) diff --git a/pando-rt/include/pando-rt/benchmark/counters.hpp b/pando-rt/include/pando-rt/benchmark/counters.hpp index 1b079078..cf2f2b06 100644 --- a/pando-rt/include/pando-rt/benchmark/counters.hpp +++ b/pando-rt/include/pando-rt/benchmark/counters.hpp @@ -10,73 +10,108 @@ #include +#define UNUSED(x) ((void)(x)) -template -struct Record { - std::array counts; +namespace counter { - constexpr Record() { - for(auto& count : counts) { - count = T(); + template + struct Record { + std::array counts; + + constexpr Record() { + for(auto& count : counts) { + count = T(); + } } - } - Record(Record&) = delete; - Record& operator=(Record&) = delete; - Record(Record&&) = default; - Record& operator=(Record&&) = default; + Record(Record&) = delete; + Record& operator=(Record&) = delete; + Record(Record&&) = default; + Record& operator=(Record&&) = default; - void reset () { - for(auto& count : counts) { - count = T(); + void reset () { + for(auto& count : counts) { + count = T(); + } } - } - template - void record(A val, F func, bool isOnCP, - decltype(pando::getCurrentPlace().core.x) corex, - decltype(pando::getCurrentPlace().core.x) coreDims) { - std::uint64_t idx = isOnCP ? coreDims + 1 : corex; - counts[idx] += func(val); - } + template + void record(A val, F func, bool isOnCP, + decltype(pando::getCurrentPlace().core.x) corex, + decltype(pando::getCurrentPlace().core.x) coreDims) { + std::uint64_t idx = isOnCP ? coreDims + 1 : corex; + counts[idx] += func(val); + } - template - void record(A val, F func) { - auto thisPlace = pando::getCurrentPlace(); - auto coreDims = pando::getCoreDims(); - record(val, func, pando::isOnCP(), thisPlace.core.x, coreDims.x); - } + template + void record(A val, F func) { + auto thisPlace = pando::getCurrentPlace(); + auto coreDims = pando::getCoreDims(); + record(val, func, pando::isOnCP(), thisPlace.core.x, coreDims.x); + } - T& get(std::uint64_t i) { - return counts[i]; - } -}; + T& get(std::uint64_t i) { + return counts[i]; + } + }; -struct HighResolutionCount { - std::chrono::time_point begin; + template + struct HighResolutionCount; - inline void start() { - begin = std::chrono::high_resolution_clock::now(); - } + template<> + struct HighResolutionCount { + std::chrono::time_point begin; - inline std::chrono::nanoseconds stop() const noexcept{ - auto stop = std::chrono::high_resolution_clock::now(); - return std::chrono::duration_cast(stop - begin); - } + inline void start() { + begin = std::chrono::high_resolution_clock::now(); + } + + inline std::chrono::nanoseconds stop() const noexcept{ + auto stop = std::chrono::high_resolution_clock::now(); + return std::chrono::duration_cast(stop - begin); + } + + }; + + template<> + struct HighResolutionCount { + inline void start() { + } + + inline std::chrono::nanoseconds stop() const noexcept{ + return std::chrono::nanoseconds(); + } + }; inline static void recordHighResolutionEvent(Record& r, - HighResolutionCount c, bool isOnCP, + HighResolutionCount c, bool isOnCP, decltype(pando::getCurrentPlace().core.x) corex, decltype(pando::getCurrentPlace().core.x) coreDims) { - r.record(c, [](const HighResolutionCount& c) { + r.record(c, [](const HighResolutionCount& c) { return c.stop().count(); }, isOnCP, corex, coreDims); } - inline static void recordHighResolutionEvent(Record& r, HighResolutionCount c) { - r.record(c, [](const HighResolutionCount& c){ + inline static void recordHighResolutionEvent(Record& r, + HighResolutionCount c, bool isOnCP, + decltype(pando::getCurrentPlace().core.x) corex, + decltype(pando::getCurrentPlace().core.x) coreDims) { + UNUSED(r); + UNUSED(c); + UNUSED(isOnCP); + UNUSED(corex); + UNUSED(coreDims); + } + + inline static void recordHighResolutionEvent(Record& r, HighResolutionCount c) { + r.record(c, [](const HighResolutionCount& c){ return c.stop().count(); }); } + + inline static void recordHighResolutionEvent(Record& r, HighResolutionCount c) { + UNUSED(r); + UNUSED(c); + } }; #endif diff --git a/pando-rt/src/init.cpp b/pando-rt/src/init.cpp index 0f1a2f21..05361367 100644 --- a/pando-rt/src/init.cpp +++ b/pando-rt/src/init.cpp @@ -4,11 +4,15 @@ #include "init.hpp" +#include +#include + #include "memory_resources.hpp" #include "pando-rt/locality.hpp" #include "pando-rt/stdlib.hpp" #include "specific_storage.hpp" #include "start.hpp" +#include #if defined(PANDO_RT_USE_BACKEND_PREP) #include "prep/config.hpp" @@ -153,6 +157,12 @@ const bool initLogger = [] { // PREP entry point int main(int argc, char* argv[]) { + + struct rusage start, end; + int rc; + rc = getrusage(RUSAGE_SELF, &start); + if(rc != 0) {PANDO_ABORT("GETRUSAGE FAILED");} + // initialize machine state (e.g., number of harts/cores/PXNs and memory sizes etc) if (auto status = pando::powerOn(argc, argv); status != pando::Status::Success) { PANDO_ABORT("PREP initialization failed"); @@ -162,6 +172,23 @@ int main(int argc, char* argv[]) { pando::powerOff(); + rc = getrusage(RUSAGE_SELF, &end); + if(rc != 0) {PANDO_ABORT("GETRUSAGE FAILED");} + auto thisPlace = pando::getCurrentPlace(); + SPDLOG_INFO("Total time on node: {}, was {}ns", + thisPlace.node.id, + end.ru_utime.tv_sec * 1000000000 + end.ru_utime.tv_usec * 1000 - + (start.ru_utime.tv_sec * 1000000000 + start.ru_utime.tv_usec * 1000) + + end.ru_stime.tv_sec * 1000000000 + end.ru_stime.tv_usec * 1000 - + (start.ru_stime.tv_sec * 1000000000 + start.ru_stime.tv_usec * 1000)); + auto dims = pando::getPlaceDims(); + for(std::uint64_t i = 0; i < std::uint64_t(dims.core.x + 2); i++) { + SPDLOG_INFO("Idle time on node: {}, core: {} was {}", + thisPlace.node.id, + std::int8_t((i == std::uint64_t(dims.core.x + 1)) ? -1 : i), + idleCount.get(i)); + } + return result; } diff --git a/pando-rt/src/init.hpp b/pando-rt/src/init.hpp index fd0d0043..65dad7e3 100644 --- a/pando-rt/src/init.hpp +++ b/pando-rt/src/init.hpp @@ -3,6 +3,9 @@ #ifndef PANDO_RT_SRC_INIT_HPP_ #define PANDO_RT_SRC_INIT_HPP_ +#include + +extern counter::Record idleCount; namespace pando { diff --git a/pando-rt/src/prep/cores.cpp b/pando-rt/src/prep/cores.cpp index 90204dd3..ff782f76 100644 --- a/pando-rt/src/prep/cores.cpp +++ b/pando-rt/src/prep/cores.cpp @@ -295,6 +295,28 @@ Status Cores::initialize(int (*entry)(int, char**), int argc, char* argv[]) { setenv("QTHREAD_NUM_SHEPHERDS", shepherdCount.c_str(), 0); } + { + std::string shepherdBinding = ""; + std::vector availableCores; + + cpu_set_t mask; + if (sched_getaffinity(0, sizeof(cpu_set_t), &mask) == -1) { + PANDO_ABORT("FAILED TO GET SCHEDULER AFFINITY"); + } + + long nproc = sysconf(_SC_NPROCESSORS_ONLN); + for (long i = 0; i < nproc; i++) { + if(CPU_ISSET(i, &mask)) availableCores.push_back(i); + } + + std::uint64_t coreNum = 0; + for(std::uint64_t i = 0; i < config.compute.coreCount + 1; i++) { + if(i != 0) shepherdBinding += ":"; + shepherdBinding += fmt::format("{}", availableCores[coreNum % availableCores.size()]); + } + setenv("QT_CPUBIND", shepherdBinding.c_str(), 0); + } + // initialize qthread library if (auto status = qthread_initialize(); status != 0) { SPDLOG_ERROR("Error initializing qthreads: {}", status); diff --git a/pando-rt/src/start.cpp b/pando-rt/src/start.cpp index 7f1220d0..13b975ff 100644 --- a/pando-rt/src/start.cpp +++ b/pando-rt/src/start.cpp @@ -12,6 +12,7 @@ #include "pando-rt/status.hpp" #include "pando-rt/stdlib.hpp" #include "pando-rt/pando-rt.hpp" +#include #ifdef PANDO_RT_USE_BACKEND_PREP #include "prep/cores.hpp" @@ -23,6 +24,9 @@ constexpr std::uint64_t STEAL_THRESH_HOLD_SIZE = 4096; +constexpr bool IDLE_TIMER_ENABLE = false; +counter::Record idleCount = counter::Record(); + enum SchedulerFailState{ YIELD, STEAL, @@ -35,6 +39,8 @@ extern "C" int __start(int argc, char** argv) { pando::initialize(); + counter::HighResolutionCount idleTimer; + if (pando::isOnCP()) { // invokes user's main function (pandoMain) result = pandoMain(argc, argv); @@ -54,16 +60,20 @@ extern "C" int __start(int argc, char** argv) { SchedulerFailState failState = SchedulerFailState::YIELD; do { + idleTimer.start(); task = queue->tryDequeue(ctok); if (!task.has_value()) { switch(failState) { case SchedulerFailState::YIELD: #ifdef PANDO_RT_USE_BACKEND_PREP + counter::recordHighResolutionEvent(idleCount, idleTimer, false, thisPlace.core.x, coreDims.x); pando::hartYield(); //In Drvx hart yielding is a 1000 cycle wait which is too much + idleTimer.start(); #endif failState = SchedulerFailState::STEAL; break; + case SchedulerFailState::STEAL: for(std::int8_t i = 0; i <= coreDims.x && !task.has_value(); i++) { auto* otherQueue = pando::Cores::getTaskQueue(pando::Place{thisPlace.node, thisPlace.pod, pando::CoreIndex(i, 0)}); @@ -77,6 +87,9 @@ extern "C" int __start(int argc, char** argv) { } } if(task.has_value()) { (*task)(); task = std::nullopt; } + else { + counter::recordHighResolutionEvent(idleCount, idleTimer, false, thisPlace.core.x, coreDims.x); + } } while (*coreActive == true); } else if (thisPlace.core.x == coreDims.x) { // scheduler