Skip to content

Commit

Permalink
Add load_unaligned and allow construction from unaligned scalar arrays
Browse files Browse the repository at this point in the history
  • Loading branch information
wbthomason committed Feb 6, 2025
1 parent 064fb7e commit a01a98f
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 12 deletions.
12 changes: 12 additions & 0 deletions src/impl/vamp/vector/avx.hh
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ namespace vamp
return _mm256_load_ps(f);
}

template <unsigned int = 0>
inline static constexpr auto load_unaligned(const ScalarT *const f) noexcept -> VectorT
{
return _mm256_loadu_ps(f);
}

template <unsigned int = 0>
inline static constexpr auto store(ScalarT *f, VectorT v) noexcept -> void
{
Expand Down Expand Up @@ -408,6 +414,12 @@ namespace vamp

template <unsigned int = 0>
inline static constexpr auto load(const ScalarT *const i) noexcept -> VectorT
{
return _mm256_load_si256((const __m256i *const)i);
}

template <unsigned int = 0>
inline static constexpr auto load_unaligned(const ScalarT *const i) noexcept -> VectorT
{
return _mm256_loadu_si256((const __m256i *const)i);
}
Expand Down
41 changes: 29 additions & 12 deletions src/impl/vamp/vector/interface.hh
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ namespace vamp
inline static constexpr std::size_t num_rows = Sig::num_rows;
using DataT = typename Sig::DataT;

inline constexpr auto
to_array() const noexcept -> std::array<typename S::ScalarT, num_scalars_rounded>
inline constexpr auto to_array() const noexcept
-> std::array<typename S::ScalarT, num_scalars_rounded>
{
alignas(S::Alignment) std::array<typename S::ScalarT, num_scalars_rounded> result = {};
to_array(result);
Expand Down Expand Up @@ -718,9 +718,9 @@ namespace vamp
return S::template constant<0>(s);
}

inline constexpr void pack(const typename S::ScalarT *const scalar_data) noexcept
inline constexpr void pack(const typename S::ScalarT *const scalar_data, bool is_aligned) noexcept
{
load_vector(scalar_data, std::make_index_sequence<num_vectors>());
load_vector(scalar_data, std::make_index_sequence<num_vectors>(), is_aligned);
}

template <auto fn, std::size_t stride = 1, std::size_t... I>
Expand All @@ -738,12 +738,22 @@ namespace vamp
}

template <std::size_t... I>
inline constexpr void
load_vector(const typename S::ScalarT *const scalar_array, std::index_sequence<I...>) noexcept
inline constexpr void load_vector(
const typename S::ScalarT *const scalar_array,
std::index_sequence<I...>,
bool is_aligned) noexcept
{
// TODO: This might segfault if we had to over-allocate vectors and the scalar data isn't
// full for the over-allocated size
(..., (std::get<I>(d()->data) = S::template load<0>(scalar_array + I * S::VectorWidth)));
if constexpr (is_aligned)
{
(..., (std::get<I>(d()->data) = S::template load<0>(scalar_array + I * S::VectorWidth)));
}
else
{
(...,
(std::get<I>(d()->data) = S::template load_unaligned<0>(scalar_array + I * S::VectorWidth)));
}
}

template <std::size_t... I>
Expand Down Expand Up @@ -815,10 +825,17 @@ namespace vamp
{
}

// TODO: Enable unaligned load for other constructors too
constexpr Vector(const typename S::ScalarT *const scalar_data, bool is_aligned) noexcept
{
// NOTE: assumes that scalar_data is a multiple of VectorWidth of valid data
Interface::pack(scalar_data, is_aligned);
}

constexpr Vector(const typename S::ScalarT *const scalar_data) noexcept
{
// NOTE: assumes that scalar_data is a multiple of VectorWidth of valid data
Interface::pack(scalar_data);
Interface::pack(scalar_data, true);
}

constexpr Vector(std::array<typename S::ScalarT, num_scalars> scalar_data) noexcept
Expand All @@ -831,13 +848,13 @@ namespace vamp
num_scalars + (S::VectorWidth - (num_scalars % S::VectorWidth))>
rounded_size_buffer{0};
std::copy(scalar_data.begin(), scalar_data.end(), rounded_size_buffer.begin());
Interface::pack(rounded_size_buffer.data());
Interface::pack(rounded_size_buffer.data(), true);
}
else
{
alignas(S::Alignment) std::array<typename S::ScalarT, num_scalars> aligned_buffer =
scalar_data;
Interface::pack(aligned_buffer.data());
Interface::pack(aligned_buffer.data(), true);
}
}

Expand All @@ -864,11 +881,11 @@ namespace vamp
num_scalars + (S::VectorWidth - (num_scalars % S::VectorWidth))>
rounded_size_buffer{0};
std::copy(scalar_data.begin(), scalar_data.end(), rounded_size_buffer.begin());
Interface::pack(rounded_size_buffer.data());
Interface::pack(rounded_size_buffer.data(), true);
}
else
{
Interface::pack(scalar_data.data());
Interface::pack(scalar_data.data(), true);
}
}
}
Expand Down
14 changes: 14 additions & 0 deletions src/impl/vamp/vector/neon.hh
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ namespace vamp
return vld1q_f32(f);
}

template <unsigned int = 0>
inline static auto load_unaligned(const ScalarT *const f) noexcept -> VectorT
{
// NOTE: The same instruction seems to do double-duty for ARM?
return vld1q_f32(f);
}

template <unsigned int = 0>
inline static auto store(ScalarT *f, VectorT v) noexcept -> void
{
Expand Down Expand Up @@ -490,6 +497,13 @@ namespace vamp
return vld1q_s32((const int32_t *const)i);
}

template <unsigned int = 0>
inline static auto load_unaligned(const ScalarT *const i) noexcept -> VectorT
{
// NOTE: The same instruction seems to do double-duty for ARM?
return vld1q_s32((const int32_t *const)i);
}

template <unsigned int = 0>
inline static auto store(ScalarT *i, VectorT v) noexcept -> void
{
Expand Down

0 comments on commit a01a98f

Please sign in to comment.