Skip to content
This repository has been archived by the owner on Aug 30, 2024. It is now read-only.

Commit

Permalink
update scalar load/store
Browse files Browse the repository at this point in the history
  • Loading branch information
sunjiweiswift committed Jun 25, 2024
1 parent 177f21a commit 631b2a3
Showing 1 changed file with 12 additions and 6 deletions.
18 changes: 12 additions & 6 deletions include/common/core/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -355,9 +355,13 @@ __XETLA_API xetla_vector<T, N> xetla_load_global(
__ESIMD_NS::cache_hint_L1<gpu::xetla::detail::get_cache_hint(L1H)>,
__ESIMD_NS::cache_hint_L2<gpu::xetla::detail::get_cache_hint(L2H)>,
__ESIMD_NS::alignment<alignment>};
if constexpr (sizeof(T) * N < sizeof(uint32_t)) {
xetla_vector<uint32_t, N> offsets(byte_offset, sizeof(T));
return __ESIMD_NS::gather<T, N, uint32_t>(ptr, offsets);
if constexpr (sizeof(T) * N < sizeof(uint32_t) || N == 1) {
xetla_vector<T, N> ret;
#pragma unroll
for (uint32_t i = 0; i < N; i++) {
ret[i] = ptr[i + byte_offset / sizeof(T)];
}
return ret;
} else {
return __ESIMD_NS::block_load<T, N>(ptr, byte_offset, props);
}
Expand Down Expand Up @@ -501,9 +505,11 @@ __XETLA_API void xetla_store_global(
__ESIMD_NS::cache_hint_L2<gpu::xetla::detail::get_cache_hint(L2H)>,
__ESIMD_NS::alignment<alignment>};

if constexpr (sizeof(T) * N < sizeof(uint32_t)) {
xetla_vector<uint32_t, N> offsets(byte_offset, sizeof(T));
return __ESIMD_NS::scatter<T, N, uint32_t>(ptr, offsets, vals);
if constexpr (sizeof(T) * N < sizeof(uint32_t) || N == 1) {
#pragma unroll
for (uint32_t i = 0; i < N; i++) {
ptr[i + byte_offset / sizeof(T)] = vals[i];
}
} else {
__ESIMD_NS::block_store<T, N>(ptr, byte_offset, vals, props);
}
Expand Down

0 comments on commit 631b2a3

Please sign in to comment.