Skip to content

Commit

Permalink
Merge C++ Standard Parallelism and SYCL2020 implementations
Browse files Browse the repository at this point in the history
  • Loading branch information
gonzalobg committed Jun 5, 2024
1 parent 9ff46ec commit b3786f6
Show file tree
Hide file tree
Showing 22 changed files with 359 additions and 1,164 deletions.
7 changes: 2 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -154,15 +154,12 @@ include(cmake/register_models.cmake)
register_model(serial SERIAL SerialStream.cpp)
register_model(omp OMP OMPStream.cpp)
register_model(ocl OCL OCLStream.cpp)
register_model(std-data STD_DATA STDDataStream.cpp)
register_model(std-indices STD_INDICES STDIndicesStream.cpp)
register_model(std-ranges STD_RANGES STDRangesStream.cpp)
register_model(std STD STDStream.cpp)
register_model(hip HIP HIPStream.cpp)
register_model(cuda CUDA CUDAStream.cu)
register_model(kokkos KOKKOS KokkosStream.cpp)
register_model(sycl SYCL SYCLStream.cpp)
register_model(sycl2020-acc SYCL2020 SYCLStream2020.cpp)
register_model(sycl2020-usm SYCL2020 SYCLStream2020.cpp)
register_model(sycl2020 SYCL2020 SYCLStream2020.cpp)
register_model(acc ACC ACCStream.cpp)
# defining RAJA collides with the RAJA namespace so USE_RAJA
register_model(raja USE_RAJA RAJAStream.cpp)
Expand Down
3 changes: 3 additions & 0 deletions src/Stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,14 @@

#pragma once

#include <cstdint>
#include <array>
#include <vector>
#include <string>
#include "benchmark.h"

using std::intptr_t;

template <class T>
class Stream
{
Expand Down
20 changes: 4 additions & 16 deletions src/StreamModels.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,8 @@

#if defined(CUDA)
#include "CUDAStream.h"
#elif defined(STD_DATA)
#include "STDDataStream.h"
#elif defined(STD_INDICES)
#include "STDIndicesStream.h"
#elif defined(STD_RANGES)
#include "STDRangesStream.hpp"
#elif defined(STD)
#include "STDStream.h"
#elif defined(TBB)
#include "TBBStream.hpp"
#elif defined(THRUST)
Expand Down Expand Up @@ -63,17 +59,9 @@ std::unique_ptr<Stream<T>> make_stream(Args... args) {
// Use the Kokkos implementation
return std::make_unique<KokkosStream<T>>(args...);

#elif defined(STD_DATA)
#elif defined(STD)
// Use the C++ STD data-oriented implementation
return std::make_unique<STDDataStream<T>>(args...);

#elif defined(STD_INDICES)
// Use the C++ STD index-oriented implementation
return std::make_unique<STDIndicesStream<T>>(args...);

#elif defined(STD_RANGES)
// Use the C++ STD ranges implementation
return std::make_unique<STDRangesStream<T>>(args...);
return std::make_unique<STDStream<T>>(args...);

#elif defined(TBB)
// Use the C++20 implementation
Expand Down
21 changes: 13 additions & 8 deletions src/ci-test-compile.sh
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,10 @@ build_gcc() {
*) dpl_conditional_flags="-DFETCH_ONEDPL=ON -DFETCH_TBB=ON -DUSE_TBB=ON -DCXX_EXTRA_FLAGS=-D_GLIBCXX_USE_TBB_PAR_BACKEND=0" ;;
esac
# some distributions like Ubuntu bionic implements std par with TBB, so conditionally link it here
run_build $name "${GCC_CXX:?}" std-data "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl"
run_build $name "${GCC_CXX:?}" std-indices "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl"
run_build $name "${GCC_CXX:?}" std-ranges "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl"
run_build $name "${GCC_CXX:?}" std "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DSTDIMPL=DATA17"
# Requires GCC 14 and newer CMake for C++23 support
#run_build $name "${GCC_CXX:?}" std "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DSTDIMPL=DATA23"
run_build $name "${GCC_CXX:?}" std "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DSTDIMPL=INDICES"
done

run_build $name "${GCC_CXX:?}" tbb "$cxx -DONE_TBB_DIR=$TBB_LIB"
Expand Down Expand Up @@ -251,9 +252,11 @@ build_clang() {
OFF) dpl_conditional_flags="-DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" ;;
*) dpl_conditional_flags="-DFETCH_ONEDPL=ON -DFETCH_TBB=ON -DUSE_TBB=ON -DCXX_EXTRA_FLAGS=-D_GLIBCXX_USE_TBB_PAR_BACKEND=0" ;;
esac
run_build $name "${CLANG_CXX:?}" std-data "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl"
run_build $name "${CLANG_CXX:?}" std-indices "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl"
# run_build $name "${CLANG_CXX:?}" std-ranges "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl" # not yet supported
run_build $name "${CLANG_CXX:?}" std "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DSTDIMPL=DATA17"
# Requires GCC 14 and newer CMake for C++23 support
# run_build $name "${CLANG_CXX:?}" std "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DSTDIMPL=DATA23"
# TODO: clang is too old
#run_build $name "${CLANG_CXX:?}" std "$cxx $dpl_conditional_flags -DUSE_ONEDPL=$use_onedpl -DSTDIMPL=INDICES"
done

run_build $name "${CLANG_CXX:?}" tbb "$cxx -DONE_TBB_DIR=$TBB_LIB"
Expand All @@ -270,8 +273,10 @@ build_clang() {
build_nvhpc() {
local name="nvhpc_build"
local cxx="-DCMAKE_CXX_COMPILER=${NVHPC_NVCXX:?}"
run_build $name "${NVHPC_NVCXX:?}" std-data "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY"
run_build $name "${NVHPC_NVCXX:?}" std-indices "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY"
run_build $name "${NVHPC_NVCXX:?}" std "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY -DSTDIMPL=DATA17"
# Requires GCC 14 and newer CMake for C++23 support
# run_build $name "${NVHPC_NVCXX:?}" std "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY -DSTDIMPL=DATA23"
run_build $name "${NVHPC_NVCXX:?}" std "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY -DSTDIMPL=INDICES"

run_build $name "${NVHPC_NVCXX:?}" acc "$cxx -DTARGET_DEVICE=gpu -DTARGET_PROCESSOR=px -DCUDA_ARCH=$NV_ARCH_CCXY"
run_build $name "${NVHPC_NVCXX:?}" acc "$cxx -DTARGET_DEVICE=multicore -DTARGET_PROCESSOR=zen"
Expand Down
3 changes: 3 additions & 0 deletions src/dpl_shim.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,15 @@ T *alloc_raw(size_t size) { return sycl::malloc_shared<T>(size, exe_policy.queue
template<typename T>
void dealloc_raw(T *ptr) { sycl::free(ptr, exe_policy.queue()); }

#define WORKAROUND

#else

// auto exe_policy = dpl::execution::seq;
// auto exe_policy = dpl::execution::par;
static constexpr auto exe_policy = dpl::execution::par_unseq;
#define USE_STD_PTR_ALLOC_DEALLOC
#define WORKAROUND

#endif

Expand Down
13 changes: 7 additions & 6 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -332,21 +332,22 @@ void check_solution(const size_t num_times, T const* a, T const* b, T const* c,
size_t failed = 0;
T max_rel = std::numeric_limits<T>::epsilon() * T(100.0);
T max_rel_dot = std::numeric_limits<T>::epsilon() * T(10000000.0);
auto check = [&](const char* name, T is, T should, T max_rel, size_t i = size_t(-1)) {
auto check = [&](const char* name, T is, T should, T mrel, size_t i = size_t(-1)) {
// Relative difference:
T diff = std::abs(is - should);
T abs_is = std::abs(is);
T abs_sh = std::abs(should);
T largest = std::max(abs_is, abs_sh);
T same = diff <= largest * max_rel;
T same = diff <= largest * mrel;
if (!same || std::isnan(is)) {
++failed;
if (failed > 10) return;
std::cerr << "FAILED validation of " << name;
if (i != size_t(-1)) std::cerr << "[" << i << "]";
std::cerr << ": " << is << " (is) != " << should
<< " (should)" << ", diff=" << diff << " > "
<< largest * max_rel << std::endl;
<< largest * mrel << " (largest=" << largest
<< ", max_rel=" << mrel << ")" << std::endl;
}
};

Expand All @@ -360,9 +361,9 @@ void check_solution(const size_t num_times, T const* a, T const* b, T const* c,

// Calculate the L^infty-norm relative error
for (size_t i = 0; i < array_size; ++i) {
check("a", a[i], goldA, i, max_rel);
check("b", b[i], goldB, i, max_rel);
check("c", c[i], goldC, i, max_rel);
check("a", a[i], goldA, max_rel, i);
check("b", b[i], goldB, max_rel, i);
check("c", c[i], goldC, max_rel, i);
}

if (failed > 0 && !silence_errors)
Expand Down
117 changes: 0 additions & 117 deletions src/std-data/STDDataStream.cpp

This file was deleted.

Loading

0 comments on commit b3786f6

Please sign in to comment.