Array crash using RelWithDebInfo with CUDA. #1440

BradWhitlock · 2024-10-08T19:41:20Z

I think I was able to make a reproducer for an axom::Array crash. Build the following example program on rzansel. It will build in RelWithDebInfo mode and will crash in the axom::Array constructor trying to default-initialize memory with placement new on the host (but the memory should be on the GPU).

In thinking about this some more, having axom::Array constructors in a compiled .cpp file might be the real problem. In the original scenario, I did not do anything special in my CMakeLists.txt to indicate that the file has GPU code; I only added the source file to my library target. In this example, the bug.cpp file is marked as CUDA code so axom::CUDA_EXEC will be available. The execute.cpp file where the error happens is not marked as a GPU file, though some code in axom::Array might need it to be.

bug.cpp

#include <axom/config.hpp>
#include <axom/core.hpp>

// Forward declaration
namespace axom
{
namespace mir
{
namespace views
{
void buildShapeMap(axom::Array<axom::IndexType> &indices,
                   axom::Array<axom::IndexType> &values,
                   int allocatorID);
} // end namespace views
} // end namespace mir
} // end namespace axom

template <typename ExecSpace>
void execute()
{
  axom::Array<axom::IndexType> indices, values;
  int allocatorID = axom::execution_space<ExecSpace>::allocatorID();
  axom::mir::views::buildShapeMap(/*node,*/ indices, values, allocatorID);

  std::cout << "indices.size=" << indices.size() << std::endl;
  std::cout << "values.size=" << values.size() << std::endl;
}

int main(int argc, char *argv[])
{
#if defined(AXOM_USE_CUDA)
  constexpr int BLOCK_SIZE = 256;
  using cuda_exec = axom::CUDA_EXEC<BLOCK_SIZE>;
  execute<cuda_exec>();
#endif

  return 0;
}

execute.cpp

#include <axom/config.hpp>
#include <axom/core.hpp>

#include <vector>

// Forward declaration
namespace axom
{
namespace mir
{
namespace views
{
void buildShapeMap(axom::Array<axom::IndexType> &indices,
                   axom::Array<axom::IndexType> &values,
                   int allocatorID)
{
  std::vector<axom::IndexType> srcIndices{{0,1,2,3,4,5}};
  std::vector<axom::IndexType> srcValues{{5,4,3,2,1,0}};
  const axom::IndexType n = srcValues.size();

  // The error would manifest here when calling Array::Array(n, n, allocatorID).
  indices = axom::Array<axom::IndexType>(n, n, allocatorID);
  values = axom::Array<axom::IndexType>(n, n, allocatorID);
  axom::copy(indices.data(), srcIndices.data(), n * sizeof(axom::IndexType));
  axom::copy(values.data(), srcValues.data(), n * sizeof(axom::IndexType));
}

} // end namespace views
} // end namespace mir
} // end namespace axom

This file was hacked together from the QuickStart guide and I made some additions so it would work with CUDA. I would have hoped that Axom would take care of that but, it didn't.

CMakeLists.txt

cmake_minimum_required(VERSION 3.21)

set(CMAKE_C_COMPILER "/usr/tce/packages/clang/clang-ibm-10.0.1-gcc-8.3.1/bin/clang" CACHE PATH "")
set(CMAKE_CXX_COMPILER "/usr/tce/packages/clang/clang-ibm-10.0.1-gcc-8.3.1/bin/clang++" CACHE PATH "")
#set(CMAKE_BUILD_TYPE Debug)
#set(CMAKE_BUILD_TYPE Release)
set(CMAKE_BUILD_TYPE RelWithDebInfo)

project(bug)

# Point to an installed RelWithDebInfo Axom
set(AXOM_DIR /usr/WS2/whitlocb/Axom/axom_mir/axom/[email protected]_cuda-relwithdebinfo)

#------------------------------------------------------------------------------
# Check for AXOM_DIR and use CMake's find_package to import axom's targets
#------------------------------------------------------------------------------
if(NOT DEFINED AXOM_DIR OR NOT EXISTS ${AXOM_DIR}/lib/cmake/axom-config.cmake)
    message(FATAL_ERROR "Missing required 'AXOM_DIR' variable pointing to an installed axom")
endif()

if (ENABLE_CUDA)
    enable_language(CUDA)
endif()

if (ENABLE_HIP)
    if (NOT ROCM_PATH)
        find_path(ROCM_PATH
            hip
            ENV{ROCM_DIR}
            ENV{ROCM_PATH}
            ENV{HIP_PATH}
            ${HIP_PATH}/..
            ${HIP_ROOT_DIR}/../
            ${ROCM_ROOT_DIR}
            /opt/rocm)
    endif()
    set(CMAKE_PREFIX_PATH "${CMAKE_PREFIX_PATH};${ROCM_PATH}")
    find_package(hip REQUIRED CONFIG PATHS ${ROCM_PATH})
endif()

include(CMakeFindDependencyMacro)

# 70=Volta
set(CMAKE_CUDA_ARCHITECTURES "70")

find_dependency(axom REQUIRED
                NO_DEFAULT_PATH 
                PATHS ${AXOM_DIR}/lib/cmake)


add_executable(bug bug.cpp execute.cpp)

set_source_files_properties(bug.cpp PROPERTIES
    LANGUAGE CUDA
)

set_target_properties(bug PROPERTIES
    CUDA_SEPARABLE_COMPILATION ON
)
target_link_libraries(bug axom::core ${CUDA_LIBRARIES})

The text was updated successfully, but these errors were encountered:

rhornung67 added bug Something isn't working GPU Issues related to GPU development Reviewed labels Oct 21, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Array crash using RelWithDebInfo with CUDA. #1440

Array crash using RelWithDebInfo with CUDA. #1440

BradWhitlock commented Oct 8, 2024

Array crash using RelWithDebInfo with CUDA. #1440

Array crash using RelWithDebInfo with CUDA. #1440

Comments

BradWhitlock commented Oct 8, 2024