Skip to content

Commit

Permalink
Add roctx ranges to TinyProfiler (#2057)
Browse files Browse the repository at this point in the history
## Summary
Adds roctx markup similarly to existing nvtx markup. 

## Additional background
New features are controlled by added compile flag USE_ROCTX. This assumes the location of roctracer/libroctracer64 and roctracer/libroctx64 are similar to other ROCm library installations such as rocrand. 

Example run to generate results.json
```
export HIP_PATH=/path/to/rocm/root
cd amrex/Tests/GPU/CNS/Exec/Sod
make USE_HIP=TRUE TINY_PROFILE=TRUE USE_ROCTX=TRUE USE_MPI=FALSE NO_CONFIG_CHECKING=TRUE
srun -n 1 ${HIP_PATH}/bin/rocprof  --hsa-trace --stats --timestamp on --roctx-trace  ./CNS3d.*.TPROF.HIP.ex inputs
```

This results.json file can be viewed with a browser as described in the documentation using chrome://tracing/
https://github.com/ROCm-Developer-Tools/rocprofiler/blob/amd-master/doc/rocprof.md#43rd-party-visualization-tools

Co-authored-by: Axel Huebl <[email protected]>
Co-authored-by: Weiqun Zhang <[email protected]>
  • Loading branch information
3 people authored May 27, 2021
1 parent 01361ca commit 2c2c699
Show file tree
Hide file tree
Showing 10 changed files with 72 additions and 19 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/dependencies/dependencies_hip.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ sudo apt-get install -y --no-install-recommends \
libnuma-dev \
libopenmpi-dev \
openmpi-bin \
rocm-dev rocrand rocprim
rocm-dev roctracer-dev rocprofiler-dev rocrand rocprim

# activate
#
Expand Down
2 changes: 2 additions & 0 deletions Docs/sphinx_documentation/source/BuildingAMReX.rst
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,8 @@ The list of available options is reported in the :ref:`table <tab:cmakevar>` bel
+------------------------------+-------------------------------------------------+-------------------------+-----------------------+
| AMReX_PROFPARSER | Build with profile parser support | NO | YES, NO |
+------------------------------+-------------------------------------------------+-------------------------+-----------------------+
| AMReX_ROCTX | Build with roctx markup profiling support | NO | YES, NO |
+------------------------------+-------------------------------------------------+-------------------------+-----------------------+
| AMReX_FPE | Build with Floating Point Exceptions checks | NO | YES, NO |
+------------------------------+-------------------------------------------------+-------------------------+-----------------------+
| AMReX_ASSERTIONS | Build with assertions turned on | NO | YES, NO |
Expand Down
28 changes: 20 additions & 8 deletions Src/Base/AMReX_GpuDevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,17 @@
#if defined(AMREX_USE_CUDA)
#include <cuda_profiler_api.h>
#if defined(AMREX_PROFILING) || defined (AMREX_TINY_PROFILING)
#include "nvToolsExt.h"
#include <nvToolsExt.h>
#endif
#endif

#if defined(AMREX_USE_HIP)
#include <hip/hip_runtime.h>
#if defined(AMREX_USE_ROCTX)
#include <roctracer_ext.h>
#if defined(AMREX_PROFILING) || defined (AMREX_TINY_PROFILING)
#include <roctx.h>
#endif
#endif
#endif

Expand Down Expand Up @@ -311,8 +321,6 @@ Device::Initialize ()
#if (defined(AMREX_PROFILING) || defined(AMREX_TINY_PROFILING))
nvtxRangeEnd(nvtx_init);
#endif
profilerStart();

if (amrex::Verbose()) {
#if defined(AMREX_USE_MPI) && (__CUDACC_VER_MAJOR__ >= 10)
if (num_devices_used == ParallelDescriptor::NProcs())
Expand All @@ -330,8 +338,6 @@ Device::Initialize ()
#endif // AMREX_USE_MPI && NVCC >= 10
}

profilerStart();

#elif defined(AMREX_USE_HIP)
if (amrex::Verbose()) {
if (ParallelDescriptor::NProcs() > 1) {
Expand All @@ -349,14 +355,15 @@ Device::Initialize ()
}
#endif

Device::profilerStart();

}

void
Device::Finalize ()
{
#ifdef AMREX_USE_CUDA
cudaProfilerStop();
#endif

Device::profilerStop();

for (int i = 0; i < max_gpu_streams; ++i)
{
Expand Down Expand Up @@ -996,14 +1003,19 @@ Device::profilerStart ()
{
#ifdef AMREX_USE_CUDA
AMREX_GPU_SAFE_CALL(cudaProfilerStart());
#elif (defined(AMREX_USE_HIP) && defined(AMREX_USE_ROCTX))
roctracer_start();
#endif

}

void
Device::profilerStop ()
{
#ifdef AMREX_USE_CUDA
AMREX_GPU_SAFE_CALL(cudaProfilerStop());
#elif (defined(AMREX_USE_HIP) && defined(AMREX_USE_ROCTX))
roctracer_stop();
#endif
}

Expand Down
6 changes: 5 additions & 1 deletion Src/Base/AMReX_TinyProfiler.H
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@
#include <AMReX_REAL.H>

#ifdef AMREX_USE_CUDA
#include "nvToolsExt.h"
#include <nvToolsExt.h>
#endif

#if defined(AMREX_USE_HIP) && defined(AMREX_USE_ROCTX)
#include <roctx.h>
#endif

#include <deque>
Expand Down
23 changes: 18 additions & 5 deletions Src/Base/AMReX_TinyProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,16 @@ TinyProfiler::start () noexcept
ttstack.emplace_back(std::make_tuple(t, 0.0, &fname));
global_depth = ttstack.size();

#ifdef AMREX_USE_GPU
if (device_synchronize_around_region) {
amrex::Gpu::Device::synchronize();
}
#endif

#ifdef AMREX_USE_CUDA
if (device_synchronize_around_region) {
amrex::Gpu::Device::synchronize();
}
nvtxRangePush(fname.c_str());
#elif defined(AMREX_USE_HIP) && defined(AMREX_USE_ROCTX)
roctxRangePush(fname.c_str());
#endif

for (auto const& region : regionstack)
Expand Down Expand Up @@ -173,11 +178,16 @@ TinyProfiler::stop () noexcept
std::get<1>(parent) += dtin;
}

#ifdef AMREX_USE_CUDA
#ifdef AMREX_USE_GPU
if (device_synchronize_around_region) {
amrex::Gpu::Device::synchronize();
}
#endif

#ifdef AMREX_USE_CUDA
nvtxRangePop();
#elif defined(AMREX_USE_HIP) && defined(AMREX_USE_ROCTX)
roctxRangePop();
#endif
} else {
improperly_nested_timers.insert(fname);
Expand Down Expand Up @@ -247,11 +257,14 @@ TinyProfiler::stop (unsigned boxUintID) noexcept
std::get<1>(parent) += dtin;
}

#ifdef AMREX_USE_CUDA
if (device_synchronize_around_region) {
amrex::Gpu::Device::synchronize();
}

#ifdef AMREX_USE_CUDA
nvtxRangePop();
#elif defined(AMREX_USE_HIP) && defined(AMREX_USE_ROCTX)
roctxRangePop();
#endif
} else
{
Expand Down
4 changes: 4 additions & 0 deletions Tools/CMake/AMReXOptions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,10 @@ cmake_dependent_option(AMReX_PROFPARSER "Enable profile parser" OFF
"AMReX_BASE_PROFILE;AMReX_TRACE_PROFILE;AMReX_AMRDATA" OFF)
print_option( AMReX_PROFPARSER )

cmake_dependent_option(AMReX_ROCTX "Enable roctx markup for HIP with ROCm" OFF
"AMReX_GPU_BACKEND STREQUAL HIP" OFF)
print_option( AMReX_ROCTX )

set(AMReX_TP_PROFILE_VALUES IGNORE CRAYPAT FORGE VTUNE)
set(AMReX_TP_PROFILE IGNORE CACHE STRING "Third-party profiling options: <CRAYPAT,FORGE,VTUNE>")
set_property(CACHE AMReX_TP_PROFILE PROPERTY STRINGS ${AMReX_TP_PROFILE_VALUES})
Expand Down
10 changes: 8 additions & 2 deletions Tools/CMake/AMReXParallelBackends.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ if (AMReX_HIP)
get_filename_component(_this_comp ${CMAKE_CXX_COMPILER} NAME)

if (NOT (_this_comp IN_LIST _valid_hip_compilers) )
message(FATAL_ERROR "\nCMAKE_CXX_COMPILER is incompatible with HIP.\n"
message(FATAL_ERROR "\nCMAKE_CXX_COMPILER=${_this_comp} is incompatible with HIP.\n"
"Set CMAKE_CXX_COMPILER to either hipcc or nvcc for HIP builds.\n")
endif ()

Expand All @@ -200,7 +200,7 @@ if (AMReX_HIP)

if(HIP_FOUND)
message(STATUS "Found HIP: ${HIP_VERSION}")
message(STATUS "HIP: Platform=${HIP_PLATFORM} Compiler=${HIP_COMPILER}")
message(STATUS "HIP: Platform=${HIP_PLATFORM} Compiler=${HIP_COMPILER} Path=${HIP_PATH}")
else()
message(FATAL_ERROR "Could not find HIP."
" Ensure that HIP is either installed in /opt/rocm/hip or the variable HIP_PATH is set to point to the right location.")
Expand All @@ -222,6 +222,12 @@ if (AMReX_HIP)
find_package(rocrand REQUIRED CONFIG)
find_package(rocprim REQUIRED CONFIG)
find_package(hiprand REQUIRED CONFIG)
if (AMReX_ROCTX)
# To be modernized in the future, please see:
# https://github.com/ROCm-Developer-Tools/roctracer/issues/56
target_include_directories(amrex PUBLIC ${HIP_PATH}/../roctracer/include ${HIP_PATH}/../rocprofiler/include)
target_link_libraries(amrex PUBLIC "-L${HIP_PATH}/../roctracer/lib/ -lroctracer64" "-L${HIP_PATH}/../roctracer/lib -lroctx64")
endif ()
target_link_libraries(amrex PUBLIC hip::hiprand roc::rocrand roc::rocprim)

# ARCH flags -- these must be PUBLIC for all downstream targets to use,
Expand Down
1 change: 1 addition & 0 deletions Tools/CMake/AMReXSetDefines.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ add_amrex_define( AMREX_COMM_PROFILING IF AMReX_COMM_PROFILE )

# Tiny profiler
add_amrex_define( AMREX_TINY_PROFILING NO_LEGACY IF AMReX_TINY_PROFILE )
add_amrex_define( AMREX_USE_ROCTX NO_LEGACY IF AMReX_ROCTX )

# Mem profiler
add_amrex_define( AMREX_MEM_PROFILING NO_LEGACY IF AMReX_MEM_PROFILE )
Expand Down
1 change: 1 addition & 0 deletions Tools/CMake/AMReX_Config.H.in
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#cmakedefine AMREX_COMM_PROFILING
#cmakedefine BL_COMM_PROFILING
#cmakedefine AMREX_TINY_PROFILING
#cmakedefine AMREX_USE_ROCTX
#cmakedefine AMREX_MEM_PROFILING
#cmakedefine AMREX_TESTING
#cmakedefine AMREX_USE_MPI
Expand Down
14 changes: 12 additions & 2 deletions Tools/GNUMake/comps/hip.mak
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

ifneq ($(NO_CONFIG_CHECKING),TRUE)
HIP_PATH=$(realpath $(shell hipconfig --path))
hipcc_version := $(shell hipcc --version | grep "HIP version: " | cut -d" " -f3)
hipcc_major_version := $(shell hipcc --version | grep "HIP version: " | cut -d" " -f3 | cut -d. -f1)
hipcc_minor_version := $(shell hipcc --version | grep "HIP version: " | cut -d" " -f3 | cut -d. -f2)
ifeq ($(HIP_PATH),)
$(error hipconfig failed. Is the HIP toolkit available?)
endif
Expand All @@ -18,8 +21,6 @@ else
CXXSTD := c++14
endif

#if less than a given version, throw error.

# Generic flags, always used
CXXFLAGS = -std=$(CXXSTD) -m64
CFLAGS = -std=c99 -m64
Expand Down Expand Up @@ -114,6 +115,15 @@ ifeq ($(HIP_COMPILER),clang)
# rocThrust - Header only
# SYSTEM_INCLUDE_LOCATIONS += $(ROC_PATH)/rocthrust/include

ifeq ($(USE_ROCTX),TRUE)
# rocTracer
CXXFLAGS += -DAMREX_USE_ROCTX
HIPCC_FLAGS += -DAMREX_USE_ROCTX
SYSTEM_INCLUDE_LOCATIONS += $(ROC_PATH)/roctracer/include $(ROC_PATH)/rocprofiler/include
LIBRARY_LOCATIONS += $(ROC_PATH)/roctracer/lib $(ROC_PATH)/rocprofiler/lib
LIBRARIES += -Wl,--rpath=${ROC_PATH}/roctracer/lib -lroctracer64 -lroctx64
endif

# hipcc passes a lot of unused arguments to clang
LEGACY_DEPFLAGS += -Wno-unused-command-line-argument

Expand Down

0 comments on commit 2c2c699

Please sign in to comment.