From 2c2c6992b4169613ba0c956a2a2bd120552f446a Mon Sep 17 00:00:00 2001 From: jmsexton03 Date: Thu, 27 May 2021 15:38:31 -0700 Subject: [PATCH] Add roctx ranges to TinyProfiler (#2057) ## Summary Adds roctx markup similarly to existing nvtx markup. ## Additional background New features are controlled by added compile flag USE_ROCTX. This assumes the location of roctracer/libroctracer64 and roctracer/libroctx64 are similar to other ROCm library installations such as rocrand. Example run to generate results.json ``` export HIP_PATH=/path/to/rocm/root cd amrex/Tests/GPU/CNS/Exec/Sod make USE_HIP=TRUE TINY_PROFILE=TRUE USE_ROCTX=TRUE USE_MPI=FALSE NO_CONFIG_CHECKING=TRUE srun -n 1 ${HIP_PATH}/bin/rocprof --hsa-trace --stats --timestamp on --roctx-trace ./CNS3d.*.TPROF.HIP.ex inputs ``` This results.json file can be viewed with a browser as described in the documentation using chrome://tracing/ https://github.com/ROCm-Developer-Tools/rocprofiler/blob/amd-master/doc/rocprof.md#43rd-party-visualization-tools Co-authored-by: Axel Huebl Co-authored-by: Weiqun Zhang --- .../dependencies/dependencies_hip.sh | 2 +- .../source/BuildingAMReX.rst | 2 ++ Src/Base/AMReX_GpuDevice.cpp | 28 +++++++++++++------ Src/Base/AMReX_TinyProfiler.H | 6 +++- Src/Base/AMReX_TinyProfiler.cpp | 23 +++++++++++---- Tools/CMake/AMReXOptions.cmake | 4 +++ Tools/CMake/AMReXParallelBackends.cmake | 10 +++++-- Tools/CMake/AMReXSetDefines.cmake | 1 + Tools/CMake/AMReX_Config.H.in | 1 + Tools/GNUMake/comps/hip.mak | 14 ++++++++-- 10 files changed, 72 insertions(+), 19 deletions(-) diff --git a/.github/workflows/dependencies/dependencies_hip.sh b/.github/workflows/dependencies/dependencies_hip.sh index 52ed4f53284..16eec4287ef 100755 --- a/.github/workflows/dependencies/dependencies_hip.sh +++ b/.github/workflows/dependencies/dependencies_hip.sh @@ -36,7 +36,7 @@ sudo apt-get install -y --no-install-recommends \ libnuma-dev \ libopenmpi-dev \ openmpi-bin \ - rocm-dev rocrand rocprim + rocm-dev roctracer-dev rocprofiler-dev rocrand rocprim # activate # diff --git a/Docs/sphinx_documentation/source/BuildingAMReX.rst b/Docs/sphinx_documentation/source/BuildingAMReX.rst index a8bd337a578..69dd0da98a6 100644 --- a/Docs/sphinx_documentation/source/BuildingAMReX.rst +++ b/Docs/sphinx_documentation/source/BuildingAMReX.rst @@ -473,6 +473,8 @@ The list of available options is reported in the :ref:`table ` bel +------------------------------+-------------------------------------------------+-------------------------+-----------------------+ | AMReX_PROFPARSER | Build with profile parser support | NO | YES, NO | +------------------------------+-------------------------------------------------+-------------------------+-----------------------+ + | AMReX_ROCTX | Build with roctx markup profiling support | NO | YES, NO | + +------------------------------+-------------------------------------------------+-------------------------+-----------------------+ | AMReX_FPE | Build with Floating Point Exceptions checks | NO | YES, NO | +------------------------------+-------------------------------------------------+-------------------------+-----------------------+ | AMReX_ASSERTIONS | Build with assertions turned on | NO | YES, NO | diff --git a/Src/Base/AMReX_GpuDevice.cpp b/Src/Base/AMReX_GpuDevice.cpp index ef7bb423602..689c82cfbf7 100644 --- a/Src/Base/AMReX_GpuDevice.cpp +++ b/Src/Base/AMReX_GpuDevice.cpp @@ -15,7 +15,17 @@ #if defined(AMREX_USE_CUDA) #include #if defined(AMREX_PROFILING) || defined (AMREX_TINY_PROFILING) -#include "nvToolsExt.h" +#include +#endif +#endif + +#if defined(AMREX_USE_HIP) +#include +#if defined(AMREX_USE_ROCTX) +#include +#if defined(AMREX_PROFILING) || defined (AMREX_TINY_PROFILING) +#include +#endif #endif #endif @@ -311,8 +321,6 @@ Device::Initialize () #if (defined(AMREX_PROFILING) || defined(AMREX_TINY_PROFILING)) nvtxRangeEnd(nvtx_init); #endif - profilerStart(); - if (amrex::Verbose()) { #if defined(AMREX_USE_MPI) && (__CUDACC_VER_MAJOR__ >= 10) if (num_devices_used == ParallelDescriptor::NProcs()) @@ -330,8 +338,6 @@ Device::Initialize () #endif // AMREX_USE_MPI && NVCC >= 10 } - profilerStart(); - #elif defined(AMREX_USE_HIP) if (amrex::Verbose()) { if (ParallelDescriptor::NProcs() > 1) { @@ -349,14 +355,15 @@ Device::Initialize () } #endif + Device::profilerStart(); + } void Device::Finalize () { -#ifdef AMREX_USE_CUDA - cudaProfilerStop(); -#endif + + Device::profilerStop(); for (int i = 0; i < max_gpu_streams; ++i) { @@ -996,7 +1003,10 @@ Device::profilerStart () { #ifdef AMREX_USE_CUDA AMREX_GPU_SAFE_CALL(cudaProfilerStart()); +#elif (defined(AMREX_USE_HIP) && defined(AMREX_USE_ROCTX)) + roctracer_start(); #endif + } void @@ -1004,6 +1014,8 @@ Device::profilerStop () { #ifdef AMREX_USE_CUDA AMREX_GPU_SAFE_CALL(cudaProfilerStop()); +#elif (defined(AMREX_USE_HIP) && defined(AMREX_USE_ROCTX)) + roctracer_stop(); #endif } diff --git a/Src/Base/AMReX_TinyProfiler.H b/Src/Base/AMReX_TinyProfiler.H index ab67a109280..1a0660fae57 100644 --- a/Src/Base/AMReX_TinyProfiler.H +++ b/Src/Base/AMReX_TinyProfiler.H @@ -6,7 +6,11 @@ #include #ifdef AMREX_USE_CUDA -#include "nvToolsExt.h" +#include +#endif + +#if defined(AMREX_USE_HIP) && defined(AMREX_USE_ROCTX) +#include #endif #include diff --git a/Src/Base/AMReX_TinyProfiler.cpp b/Src/Base/AMReX_TinyProfiler.cpp index 7cf532de33a..bac68880481 100644 --- a/Src/Base/AMReX_TinyProfiler.cpp +++ b/Src/Base/AMReX_TinyProfiler.cpp @@ -92,11 +92,16 @@ TinyProfiler::start () noexcept ttstack.emplace_back(std::make_tuple(t, 0.0, &fname)); global_depth = ttstack.size(); +#ifdef AMREX_USE_GPU + if (device_synchronize_around_region) { + amrex::Gpu::Device::synchronize(); + } +#endif + #ifdef AMREX_USE_CUDA - if (device_synchronize_around_region) { - amrex::Gpu::Device::synchronize(); - } nvtxRangePush(fname.c_str()); +#elif defined(AMREX_USE_HIP) && defined(AMREX_USE_ROCTX) + roctxRangePush(fname.c_str()); #endif for (auto const& region : regionstack) @@ -173,11 +178,16 @@ TinyProfiler::stop () noexcept std::get<1>(parent) += dtin; } -#ifdef AMREX_USE_CUDA +#ifdef AMREX_USE_GPU if (device_synchronize_around_region) { amrex::Gpu::Device::synchronize(); } +#endif + +#ifdef AMREX_USE_CUDA nvtxRangePop(); +#elif defined(AMREX_USE_HIP) && defined(AMREX_USE_ROCTX) + roctxRangePop(); #endif } else { improperly_nested_timers.insert(fname); @@ -247,11 +257,14 @@ TinyProfiler::stop (unsigned boxUintID) noexcept std::get<1>(parent) += dtin; } -#ifdef AMREX_USE_CUDA if (device_synchronize_around_region) { amrex::Gpu::Device::synchronize(); } + +#ifdef AMREX_USE_CUDA nvtxRangePop(); +#elif defined(AMREX_USE_HIP) && defined(AMREX_USE_ROCTX) + roctxRangePop(); #endif } else { diff --git a/Tools/CMake/AMReXOptions.cmake b/Tools/CMake/AMReXOptions.cmake index bbef0de2dd1..25ea576d986 100644 --- a/Tools/CMake/AMReXOptions.cmake +++ b/Tools/CMake/AMReXOptions.cmake @@ -321,6 +321,10 @@ cmake_dependent_option(AMReX_PROFPARSER "Enable profile parser" OFF "AMReX_BASE_PROFILE;AMReX_TRACE_PROFILE;AMReX_AMRDATA" OFF) print_option( AMReX_PROFPARSER ) +cmake_dependent_option(AMReX_ROCTX "Enable roctx markup for HIP with ROCm" OFF + "AMReX_GPU_BACKEND STREQUAL HIP" OFF) +print_option( AMReX_ROCTX ) + set(AMReX_TP_PROFILE_VALUES IGNORE CRAYPAT FORGE VTUNE) set(AMReX_TP_PROFILE IGNORE CACHE STRING "Third-party profiling options: ") set_property(CACHE AMReX_TP_PROFILE PROPERTY STRINGS ${AMReX_TP_PROFILE_VALUES}) diff --git a/Tools/CMake/AMReXParallelBackends.cmake b/Tools/CMake/AMReXParallelBackends.cmake index 85c2f999e61..b5b78511471 100644 --- a/Tools/CMake/AMReXParallelBackends.cmake +++ b/Tools/CMake/AMReXParallelBackends.cmake @@ -175,7 +175,7 @@ if (AMReX_HIP) get_filename_component(_this_comp ${CMAKE_CXX_COMPILER} NAME) if (NOT (_this_comp IN_LIST _valid_hip_compilers) ) - message(FATAL_ERROR "\nCMAKE_CXX_COMPILER is incompatible with HIP.\n" + message(FATAL_ERROR "\nCMAKE_CXX_COMPILER=${_this_comp} is incompatible with HIP.\n" "Set CMAKE_CXX_COMPILER to either hipcc or nvcc for HIP builds.\n") endif () @@ -200,7 +200,7 @@ if (AMReX_HIP) if(HIP_FOUND) message(STATUS "Found HIP: ${HIP_VERSION}") - message(STATUS "HIP: Platform=${HIP_PLATFORM} Compiler=${HIP_COMPILER}") + message(STATUS "HIP: Platform=${HIP_PLATFORM} Compiler=${HIP_COMPILER} Path=${HIP_PATH}") else() message(FATAL_ERROR "Could not find HIP." " Ensure that HIP is either installed in /opt/rocm/hip or the variable HIP_PATH is set to point to the right location.") @@ -222,6 +222,12 @@ if (AMReX_HIP) find_package(rocrand REQUIRED CONFIG) find_package(rocprim REQUIRED CONFIG) find_package(hiprand REQUIRED CONFIG) + if (AMReX_ROCTX) + # To be modernized in the future, please see: + # https://github.com/ROCm-Developer-Tools/roctracer/issues/56 + target_include_directories(amrex PUBLIC ${HIP_PATH}/../roctracer/include ${HIP_PATH}/../rocprofiler/include) + target_link_libraries(amrex PUBLIC "-L${HIP_PATH}/../roctracer/lib/ -lroctracer64" "-L${HIP_PATH}/../roctracer/lib -lroctx64") + endif () target_link_libraries(amrex PUBLIC hip::hiprand roc::rocrand roc::rocprim) # ARCH flags -- these must be PUBLIC for all downstream targets to use, diff --git a/Tools/CMake/AMReXSetDefines.cmake b/Tools/CMake/AMReXSetDefines.cmake index 9267361f52c..79e8951e1d7 100644 --- a/Tools/CMake/AMReXSetDefines.cmake +++ b/Tools/CMake/AMReXSetDefines.cmake @@ -32,6 +32,7 @@ add_amrex_define( AMREX_COMM_PROFILING IF AMReX_COMM_PROFILE ) # Tiny profiler add_amrex_define( AMREX_TINY_PROFILING NO_LEGACY IF AMReX_TINY_PROFILE ) +add_amrex_define( AMREX_USE_ROCTX NO_LEGACY IF AMReX_ROCTX ) # Mem profiler add_amrex_define( AMREX_MEM_PROFILING NO_LEGACY IF AMReX_MEM_PROFILE ) diff --git a/Tools/CMake/AMReX_Config.H.in b/Tools/CMake/AMReX_Config.H.in index 66c2c1086e1..e6c71d60be7 100644 --- a/Tools/CMake/AMReX_Config.H.in +++ b/Tools/CMake/AMReX_Config.H.in @@ -10,6 +10,7 @@ #cmakedefine AMREX_COMM_PROFILING #cmakedefine BL_COMM_PROFILING #cmakedefine AMREX_TINY_PROFILING +#cmakedefine AMREX_USE_ROCTX #cmakedefine AMREX_MEM_PROFILING #cmakedefine AMREX_TESTING #cmakedefine AMREX_USE_MPI diff --git a/Tools/GNUMake/comps/hip.mak b/Tools/GNUMake/comps/hip.mak index 0e4e4146272..aa693125c12 100644 --- a/Tools/GNUMake/comps/hip.mak +++ b/Tools/GNUMake/comps/hip.mak @@ -2,6 +2,9 @@ ifneq ($(NO_CONFIG_CHECKING),TRUE) HIP_PATH=$(realpath $(shell hipconfig --path)) + hipcc_version := $(shell hipcc --version | grep "HIP version: " | cut -d" " -f3) + hipcc_major_version := $(shell hipcc --version | grep "HIP version: " | cut -d" " -f3 | cut -d. -f1) + hipcc_minor_version := $(shell hipcc --version | grep "HIP version: " | cut -d" " -f3 | cut -d. -f2) ifeq ($(HIP_PATH),) $(error hipconfig failed. Is the HIP toolkit available?) endif @@ -18,8 +21,6 @@ else CXXSTD := c++14 endif -#if less than a given version, throw error. - # Generic flags, always used CXXFLAGS = -std=$(CXXSTD) -m64 CFLAGS = -std=c99 -m64 @@ -114,6 +115,15 @@ ifeq ($(HIP_COMPILER),clang) # rocThrust - Header only # SYSTEM_INCLUDE_LOCATIONS += $(ROC_PATH)/rocthrust/include + ifeq ($(USE_ROCTX),TRUE) + # rocTracer + CXXFLAGS += -DAMREX_USE_ROCTX + HIPCC_FLAGS += -DAMREX_USE_ROCTX + SYSTEM_INCLUDE_LOCATIONS += $(ROC_PATH)/roctracer/include $(ROC_PATH)/rocprofiler/include + LIBRARY_LOCATIONS += $(ROC_PATH)/roctracer/lib $(ROC_PATH)/rocprofiler/lib + LIBRARIES += -Wl,--rpath=${ROC_PATH}/roctracer/lib -lroctracer64 -lroctx64 + endif + # hipcc passes a lot of unused arguments to clang LEGACY_DEPFLAGS += -Wno-unused-command-line-argument