Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce ldg_ptr to Enable __ldg in Data Stores and simple_ptr_holder #1802

Merged
merged 17 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 115 additions & 0 deletions include/gridtools/common/ldg_ptr.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*
* GridTools
*
* Copyright (c) 2014-2023, ETH Zurich
* All rights reserved.
*
* Please, refer to the LICENSE file in the root directory.
* SPDX-License-Identifier: BSD-3-Clause
*/
#pragma once

#include <cstddef>
#include <type_traits>
#include <utility>

#include "defs.hpp"
#include "host_device.hpp"

#ifdef GT_CUDACC
#include "cuda_type_traits.hpp"
#endif

namespace gridtools {

#ifdef GT_CUDACC
namespace impl_ {

template <class T>
class ldg_ptr {
T const *m_ptr;

static_assert(is_texture_type<T>::value);

public:
GT_FUNCTION constexpr ldg_ptr() {}
GT_FUNCTION constexpr explicit ldg_ptr(T const *ptr) : m_ptr(ptr) {}
GT_FUNCTION constexpr T operator*() const {
#ifdef GT_CUDA_ARCH
return __ldg(m_ptr);
#else
return *m_ptr;
#endif
}

GT_FUNCTION constexpr ldg_ptr &operator+=(std::ptrdiff_t diff) {
m_ptr += diff;
return *this;
}

GT_FUNCTION constexpr ldg_ptr &operator-=(std::ptrdiff_t diff) {
m_ptr -= diff;
return *this;
}

friend GT_FUNCTION constexpr bool operator==(ldg_ptr const &a, ldg_ptr const &b) {
return a.m_ptr == b.m_ptr;
}
friend GT_FUNCTION constexpr bool operator==(ldg_ptr const &a, T const *b) { return a.m_ptr == b; }
friend GT_FUNCTION constexpr bool operator==(T const *a, ldg_ptr const &b) { return a == b.m_ptr; }

friend GT_FUNCTION constexpr bool operator!=(ldg_ptr const &a, ldg_ptr const &b) {
return a.m_ptr != b.m_ptr;
}
friend GT_FUNCTION constexpr bool operator!=(ldg_ptr const &a, T const *b) { return a.m_ptr != b; }
friend GT_FUNCTION constexpr bool operator!=(T const *a, ldg_ptr const &b) { return a != b.m_ptr; }

friend GT_FUNCTION constexpr ldg_ptr &operator++(ldg_ptr &ptr) {
++ptr.m_ptr;
return ptr;
}

friend GT_FUNCTION constexpr ldg_ptr &operator--(ldg_ptr &ptr) {
--ptr.m_ptr;
return ptr;
}

friend GT_FUNCTION constexpr ldg_ptr operator++(ldg_ptr &ptr, int) {
ldg_ptr p = ptr;
++ptr.m_ptr;
return p;
}

friend GT_FUNCTION constexpr ldg_ptr operator--(ldg_ptr &ptr, int) {
ldg_ptr p = ptr;
--ptr.m_ptr;
return p;
}

friend GT_FUNCTION constexpr ldg_ptr operator+(ldg_ptr const &ptr, std::ptrdiff_t diff) {
return ldg_ptr(ptr.m_ptr + diff);
}

friend GT_FUNCTION constexpr ldg_ptr operator-(ldg_ptr const &ptr, std::ptrdiff_t diff) {
return ldg_ptr(ptr.m_ptr - diff);
}

friend GT_FUNCTION constexpr std::ptrdiff_t operator-(ldg_ptr const &ptr, ldg_ptr const &other) {
return ptr.m_ptr - other.m_ptr;
}
};
} // namespace impl_

template <class T>
GT_FUNCTION constexpr std::enable_if_t<is_texture_type<T>::value, impl_::ldg_ptr<T>> as_ldg_ptr(T const *ptr) {
return impl_::ldg_ptr<T>(ptr);
}

#endif

template <class T>
GT_FUNCTION constexpr T &&as_ldg_ptr(T &&value) {
return std::forward<T>(value);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure I like this fallback. Doesn't it mean that if you wrap any pointer ad "ldg" pointer, which is not "ldg"-capable, it will silently do that. If this is the intent, then at least I don't like the naming.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn’t wrap anything, it just passes unsupported types through as is. We just call this function wherever we would like to use LDG when available. The name might be improvable though, so let me know if you have a better one …

}

} // namespace gridtools
3 changes: 2 additions & 1 deletion include/gridtools/sid/simple_ptr_holder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "../common/defs.hpp"
#include "../common/host_device.hpp"
#include "../common/ldg_ptr.hpp"

#define GT_FILENAME <gridtools/sid/simple_ptr_holder.hpp>
#include GT_ITERATE_ON_TARGETS()
Expand All @@ -38,7 +39,7 @@ namespace gridtools {
simple_ptr_holder() = default;
GT_TARGET GT_FORCE_INLINE constexpr simple_ptr_holder(T const &ptr) : m_val{ptr} {}
#endif
GT_TARGET GT_FORCE_INLINE constexpr T const &operator()() const { return m_val; }
GT_TARGET GT_FORCE_INLINE constexpr decltype(auto) operator()() const { return as_ldg_ptr(m_val); }
};

template <class T>
Expand Down
7 changes: 0 additions & 7 deletions include/gridtools/stencil/gpu/entry_point.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,13 +132,6 @@ namespace gridtools {

template <class Keys>
struct deref_f {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
template <class Key, class T>
GT_FUNCTION std::enable_if_t<is_texture_type<T>::value && meta::st_contains<Keys, Key>::value, T>
operator()(Key, T const *ptr) const {
return __ldg(ptr);
}
#endif
template <class Key, class Ptr>
GT_FUNCTION decltype(auto) operator()(Key, Ptr ptr) const {
return *ptr;
Expand Down
7 changes: 0 additions & 7 deletions include/gridtools/stencil/gpu_horizontal/entry_point.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,6 @@ namespace gridtools {
namespace gpu_horizontal_backend {
template <class Keys>
struct deref_f {
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
template <class Key, class T>
GT_FUNCTION std::enable_if_t<is_texture_type<T>::value && meta::st_contains<Keys, Key>::value, T>
operator()(Key, T const *ptr) const {
return __ldg(ptr);
}
#endif
template <class Key, class Ptr>
GT_FUNCTION decltype(auto) operator()(Key, Ptr ptr) const {
return *ptr;
Expand Down
3 changes: 2 additions & 1 deletion include/gridtools/storage/sid.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "../common/hymap.hpp"
#include "../common/integral_constant.hpp"
#include "../common/layout_map.hpp"
#include "../common/ldg_ptr.hpp"
#include "../common/tuple.hpp"
#include "../common/tuple_util.hpp"
#include "../meta.hpp"
Expand All @@ -36,7 +37,7 @@ namespace gridtools {
template <class T>
struct ptr_holder {
T *m_val;
GT_FUNCTION constexpr T *operator()() const { return m_val; }
GT_FUNCTION constexpr auto operator()() const { return as_ldg_ptr(m_val); }

friend GT_FORCE_INLINE constexpr ptr_holder operator+(ptr_holder obj, int_t arg) {
return {obj.m_val + arg};
Expand Down
5 changes: 5 additions & 0 deletions tests/unit_tests/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ gridtools_add_unit_test(test_gt_math SOURCES test_gt_math.cpp NO_NVCC)
gridtools_add_unit_test(test_hypercube_iterator SOURCES test_hypercube_iterator.cpp NO_NVCC)
gridtools_add_unit_test(test_tuple SOURCES test_tuple.cpp NO_NVCC)
gridtools_add_unit_test(test_int_vector SOURCES test_int_vector.cpp NO_NVCC)
gridtools_add_unit_test(test_ldg_ptr SOURCES test_ldg_ptr.cpp NO_NVCC)

if(TARGET _gridtools_cuda)
gridtools_check_compilation(test_cuda_type_traits test_cuda_type_traits.cu)
Expand Down Expand Up @@ -49,4 +50,8 @@ if(TARGET _gridtools_cuda)
SOURCES test_tuple.cu
LIBRARIES _gridtools_cuda
LABELS cuda)
gridtools_add_unit_test(test_ldg_ptr_cuda
SOURCES test_ldg_ptr.cu
LIBRARIES _gridtools_cuda
LABELS cuda)
endif()
51 changes: 51 additions & 0 deletions tests/unit_tests/common/test_ldg_ptr.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* GridTools
*
* Copyright (c) 2014-2023, ETH Zurich
* All rights reserved.
*
* Please, refer to the LICENSE file in the root directory.
* SPDX-License-Identifier: BSD-3-Clause
*/

#include <gridtools/common/ldg_ptr.hpp>

#include <gtest/gtest.h>

namespace gridtools {
namespace {
TEST(as_ldg_ptr, non_const_host) {
float data[5] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f};

auto ptr = as_ldg_ptr(&data[2]);
EXPECT_EQ(*ptr, 2.0f);
EXPECT_EQ(ptr + 2, as_ldg_ptr(&data[4]));
EXPECT_EQ(ptr - 2, as_ldg_ptr(&data[0]));
EXPECT_EQ(*(ptr + 2), 4.0f);
EXPECT_EQ(*(ptr - 2), 0.0f);
EXPECT_EQ(*(++ptr), 3.0f);
EXPECT_EQ(*(ptr++), 3.0f);
EXPECT_EQ(*(ptr--), 4.0f);
EXPECT_EQ(*(--ptr), 2.0f);
EXPECT_EQ((ptr + 2) - ptr, 2);
*ptr = 5.0f;
EXPECT_EQ(*ptr, 5.0f);
}

TEST(as_ldg_ptr, const_host) {
float const data[5] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f};

auto ptr = as_ldg_ptr(&data[2]);
EXPECT_EQ(*ptr, 2.0f);
EXPECT_EQ(ptr + 2, as_ldg_ptr(&data[4]));
EXPECT_EQ(ptr - 2, as_ldg_ptr(&data[0]));
EXPECT_EQ(*(ptr + 2), 4.0f);
EXPECT_EQ(*(ptr - 2), 0.0f);
EXPECT_EQ(*(++ptr), 3.0f);
EXPECT_EQ(*(ptr++), 3.0f);
EXPECT_EQ(*(ptr--), 4.0f);
EXPECT_EQ(*(--ptr), 2.0f);
EXPECT_EQ((ptr + 2) - ptr, 2);
}
} // namespace
} // namespace gridtools
86 changes: 86 additions & 0 deletions tests/unit_tests/common/test_ldg_ptr.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
havogt marked this conversation as resolved.
Show resolved Hide resolved
* GridTools
*
* Copyright (c) 2014-2023, ETH Zurich
* All rights reserved.
*
* Please, refer to the LICENSE file in the root directory.
* SPDX-License-Identifier: BSD-3-Clause
*/

#include <gridtools/common/ldg_ptr.hpp>

#include <gtest/gtest.h>

#include <cuda_test_helper.hpp>

namespace gridtools {
namespace {
havogt marked this conversation as resolved.
Show resolved Hide resolved
__device__ bool test_non_const_device() {
volatile float data[5] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f};

auto ptr = as_ldg_ptr(&data[2]);
if (*ptr != 2.0f)
return false;
if (ptr + 2 != as_ldg_ptr(&data[4]))
return false;
if (ptr - 2 != as_ldg_ptr(&data[0]))
return false;
if (*(ptr + 2) != 4.0f)
return false;
if (*(ptr - 2) != 0.0f)
return false;
if (*(++ptr) != 3.0f)
return false;
if (*(ptr++) != 3.0f)
return false;
if (*(ptr--) != 4.0f)
return false;
if (*(--ptr) != 2.0f)
return false;
if ((ptr + 2) - ptr != 2)
return false;
*ptr = 5.0f;
if (*ptr != 5.0f)
return false;
return true;
}

TEST(as_ldg_ptr, non_const_device) {
EXPECT_TRUE(on_device::exec(GT_MAKE_INTEGRAL_CONSTANT_FROM_VALUE(&test_non_const_device)));
}

__device__ bool test_const_device() {
volatile float const data[5] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f};

auto ptr = as_ldg_ptr(&data[2]);
if (*ptr != 2.0f)
return false;
if (ptr + 2 != as_ldg_ptr(&data[4]))
return false;
if (ptr - 2 != as_ldg_ptr(&data[0]))
return false;
if (*(ptr + 2) != 4.0f)
return false;
if (*(ptr - 2) != 0.0f)
return false;
if (*(++ptr) != 3.0f)
return false;
if (*(ptr++) != 3.0f)
return false;
if (*(ptr--) != 4.0f)
return false;
if (*(--ptr) != 2.0f)
return false;
if ((ptr + 2) - ptr != 2)
return false;
return true;
}

TEST(as_ldg_ptr, const_device) {
EXPECT_TRUE(on_device::exec(GT_MAKE_INTEGRAL_CONSTANT_FROM_VALUE(&test_const_device)));
}
} // namespace
} // namespace gridtools

#include "test_ldg_ptr.cpp"
Loading