From 59fd363aaac6d2b28940447542b89e61c902a1f3 Mon Sep 17 00:00:00 2001 From: ywwu928 <44017355+ywwu928@users.noreply.github.com> Date: Wed, 26 Jun 2024 09:27:57 -0500 Subject: [PATCH] Fix to guarantee Bypass mode is more accurate (#123) * add barriers before and after bypass for accuracy * move bypass flag to DrvAPISysControlVariable --- pando-drv/api/DrvAPIInfo.hpp | 10 ++++++++++ pando-drv/api/DrvAPISysConfig.hpp | 14 ++++++++++++++ pando-rt/include/pando-rt/drv_info.hpp | 3 --- pando-rt/include/pando-rt/sync/wait.hpp | 11 ++++++++++- pando-rt/src/drv_info.cpp | 9 ++++++--- pando-rt/src/wait.cpp | 24 +++++++++++++++++------- 6 files changed, 57 insertions(+), 14 deletions(-) diff --git a/pando-drv/api/DrvAPIInfo.hpp b/pando-drv/api/DrvAPIInfo.hpp index ea3df7f4..bc0e395c 100644 --- a/pando-drv/api/DrvAPIInfo.hpp +++ b/pando-drv/api/DrvAPIInfo.hpp @@ -279,6 +279,16 @@ inline int64_t getCoreHartsDone(int64_t pxn_id, int8_t pod_id, int8_t core_id) { return DrvAPISysConfig::Get()->getCoreHartsDone(pxn_id, pod_id, core_id); } +inline void clearGlobalBypassFlag() { + return DrvAPISysConfig::Get()->clearGlobalBypassFlag(); +} +inline void setGlobalBypassFlag() { + return DrvAPISysConfig::Get()->setGlobalBypassFlag(); +} +inline bool getGlobalBypassFlag() { + return DrvAPISysConfig::Get()->getGlobalBypassFlag(); +} + } // namespace DrvAPI #endif diff --git a/pando-drv/api/DrvAPISysConfig.hpp b/pando-drv/api/DrvAPISysConfig.hpp index 572fe3bd..9ac13ee6 100644 --- a/pando-drv/api/DrvAPISysConfig.hpp +++ b/pando-drv/api/DrvAPISysConfig.hpp @@ -43,6 +43,8 @@ struct DrvAPISysControl std::vector> pod_cores_finalized_; //!< number of cores finalized per pod std::vector>> core_state_; //!< state for each core std::vector>> core_harts_done_; //!< number of harts done per core + + int8_t global_bypass_flag_; //!< global flag to detemine whether to bypass or not }; @@ -62,6 +64,7 @@ class DrvAPISysConfig control_.pod_cores_finalized_.resize(data.num_pxn_); control_.core_state_.resize(data.num_pxn_); control_.core_harts_done_.resize(data.num_pxn_); + control_.global_bypass_flag_ = 0; for (int i = 0; i < data_.num_pxn_; i++) { control_.pxn_cores_initialized_[i] = 0; control_.pxn_barrier_exit_[i] = 0; @@ -185,6 +188,17 @@ class DrvAPISysConfig return __atomic_load_n(&(control_.core_harts_done_.at(pxn_id).at(pod_id).at(core_id)), static_cast(std::memory_order_relaxed)); } + void clearGlobalBypassFlag() { + __atomic_store_n(&(control_.global_bypass_flag_), 0, static_cast(std::memory_order_relaxed)); + } + void setGlobalBypassFlag() { + __atomic_store_n(&(control_.global_bypass_flag_), 1, static_cast(std::memory_order_relaxed)); + } + bool getGlobalBypassFlag() { + int8_t value = __atomic_load_n(&(control_.global_bypass_flag_), static_cast(std::memory_order_relaxed)); + return value == 1; + } + static DrvAPISysConfig *Get() { return &sysconfig; } static DrvAPISysConfig sysconfig; private: diff --git a/pando-rt/include/pando-rt/drv_info.hpp b/pando-rt/include/pando-rt/drv_info.hpp index 22f0e8a3..68f50122 100644 --- a/pando-rt/include/pando-rt/drv_info.hpp +++ b/pando-rt/include/pando-rt/drv_info.hpp @@ -5,8 +5,6 @@ #define PANDO_RT_DRV_INFO_HPP_ #ifdef PANDO_RT_USE_BACKEND_DRVX -#include "DrvAPIMemory.hpp" -#include "DrvAPIThread.hpp" namespace DrvAPI { void setStageInit(); @@ -23,7 +21,6 @@ bool isStageInit(); #define PANDO_DRV_SET_STAGE_OTHER() {DrvAPI::setStageOther();} #define PANDO_DRV_INCREMENT_PHASE() {DrvAPI::incrementPhase();} -extern bool bypass_flag; void setBypassFlag(); void clearBypassFlag(); bool getBypassFlag(); diff --git a/pando-rt/include/pando-rt/sync/wait.hpp b/pando-rt/include/pando-rt/sync/wait.hpp index d0bf50c4..c42ca3a3 100644 --- a/pando-rt/include/pando-rt/sync/wait.hpp +++ b/pando-rt/include/pando-rt/sync/wait.hpp @@ -83,8 +83,17 @@ void monitorUntilNot(GlobalPtr ptr, T value) { #endif } +#ifdef PANDO_RT_USE_BACKEND_DRVX /** - * @brief Waits for all tasks to finish executing. + * @brief A specific node waits for all tasks to finish executing. + * + * @ingroup ROOT + */ +PANDO_RT_EXPORT void waitAllTasks(); +#endif + +/** + * @brief All nodes wait for all tasks to finish executing. * * @note This is a collective operation and needs to be called by all nodes. * diff --git a/pando-rt/src/drv_info.cpp b/pando-rt/src/drv_info.cpp index 33ec5e4e..ed54efbd 100644 --- a/pando-rt/src/drv_info.cpp +++ b/pando-rt/src/drv_info.cpp @@ -4,6 +4,7 @@ #ifdef PANDO_RT_USE_BACKEND_DRVX #include "pando-rt/drv_info.hpp" #include +#include "drvx/drvx.hpp" namespace DrvAPI { @@ -137,12 +138,14 @@ void incrementPhase() { bool bypass_flag = false; void setBypassFlag() { - bypass_flag = true; + pando::waitAllTasks(); + DrvAPI::setGlobalBypassFlag(); } void clearBypassFlag() { - bypass_flag = false; + pando::waitAllTasks(); + DrvAPI::clearGlobalBypassFlag(); } bool getBypassFlag() { - return bypass_flag; + return DrvAPI::getGlobalBypassFlag(); } #endif // PANDO_RT_USE_BACKEND_DRVX \ No newline at end of file diff --git a/pando-rt/src/wait.cpp b/pando-rt/src/wait.cpp index 11a49e5a..56651d2e 100644 --- a/pando-rt/src/wait.cpp +++ b/pando-rt/src/wait.cpp @@ -45,6 +45,22 @@ void waitUntil(const Function& f) { #endif } +#ifdef PANDO_RT_USE_BACKEND_DRVX +void waitAllTasks() { + if (!isOnCP()) { + PANDO_ABORT("Can only be called from the CP"); + } + + for (std::int64_t i = 0; i < Drvx::getNodeDims().id; i++) { + for (std::int64_t j = 0; j < Drvx::getPodDims().x; j++) { + while (DrvAPI::getPodTasksRemaining(i, j) != 0) { + hartYield(1000); + } + } + } +} +#endif + void waitAll() { if (!isOnCP()) { PANDO_ABORT("Can only be called from the CP"); @@ -89,13 +105,7 @@ void waitAll() { #endif #elif defined(PANDO_RT_USE_BACKEND_DRVX) CommandProcessor::barrier(); - for (std::int64_t i = 0; i < Drvx::getNodeDims().id; i++) { - for (std::int64_t j = 0; j < Drvx::getPodDims().x; j++) { - while (DrvAPI::getPodTasksRemaining(i, j) != 0) { - hartYield(1000); - } - } - } + waitAllTasks(); CommandProcessor::barrier(); #endif }