Skip to content

Commit

Permalink
Merge pull request #377 from srogatch/fix-gpu-multiple-sum
Browse files Browse the repository at this point in the history
Fix 0-initialization of elements in a multiple_sum structure on GPU
  • Loading branch information
roshandathathri authored Jul 9, 2021
2 parents 49a59a6 + 191e9ff commit b6605ed
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 1 deletion.
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,9 @@ if (GALOIS_ENABLE_GPU)
add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:-gencode=arch=compute_${GENCODE},code=sm_${GENCODE}>")
endforeach()

# This is necessary to allow building for CUDA 11.x (where CUB is bundled) and earlier versions (where CUB is not included)
add_definitions(-DTHRUST_IGNORE_CUB_VERSION_CHECK)

add_subdirectory(libgpu)
endif()
add_subdirectory(libpangolin)
Expand Down
4 changes: 3 additions & 1 deletion libgpu/include/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ template <int items, typename T>
struct multiple_sum {
T el[items];

__device__ __host__ multiple_sum() {}
// https://nvlabs.github.io/cub/classcub_1_1_block_scan.html#a6ed3f77795e582df31d3d6d9d950615e
// "This operation assumes the value of obtained by the T's default constructor (or by zero-initialization if no user-defined default constructor exists) is suitable as the identity value zero for addition."
__device__ __host__ multiple_sum() : multiple_sum(T()) { }

__device__ __host__ multiple_sum(const T e) {
for (int i = 0; i < items; i++)
Expand Down

0 comments on commit b6605ed

Please sign in to comment.