-
Notifications
You must be signed in to change notification settings - Fork 1
/
fwddecl.h
338 lines (260 loc) · 11.7 KB
/
fwddecl.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
/* SPDX-License-Identifier: GPL-3.0-or-later WITH GCC-exception-3.1 */
/* Copyright © 2023-2024 GSI Helmholtzzentrum fuer Schwerionenforschung GmbH
* Matthias Kretz <[email protected]>
*/
#ifndef PROTOTYPE_FWDDECL_H_
#define PROTOTYPE_FWDDECL_H_
//#include <experimental/bits/simd_detail.h>
#include "simd_config.h"
#include <functional>
#include <stdfloat>
#include <type_traits>
namespace std
{
template <int _Width>
struct _VecAbi;
template <int _Width>
struct _Avx512Abi;
struct _ScalarAbi;
namespace __detail
{
template <size_t _Np, bool _Sanitized = false>
struct _BitMask;
template <size_t _Np>
using _SanitizedBitMask = _BitMask<_Np, true>;
template <size_t _Bytes>
struct __make_unsigned_int;
template <>
struct __make_unsigned_int<sizeof(unsigned int)>
{ using type = unsigned int; };
template <>
struct __make_unsigned_int<sizeof(unsigned long)
+ (sizeof(unsigned long) == sizeof(unsigned int))>
{ using type = unsigned long; };
template <>
struct __make_unsigned_int<sizeof(unsigned long long)
+ (sizeof(unsigned long long) == sizeof(unsigned long))>
{ using type = unsigned long long; };
template <>
struct __make_unsigned_int<sizeof(unsigned short)>
{ using type = unsigned short; };
template <>
struct __make_unsigned_int<sizeof(unsigned char)>
{ using type = unsigned char; };
template <typename _Tp>
using __make_unsigned_int_t = typename __make_unsigned_int<sizeof(_Tp)>::type;
template <typename _Tp>
using __make_signed_int_t = make_signed_t<__make_unsigned_int_t<_Tp>>;
template <size_t _Bs>
using __mask_integer_from = make_signed_t<typename __make_unsigned_int<_Bs>::type>;
template <typename _Tp>
struct __is_vectorizable
: bool_constant<false>
{};
// TODO
//template <> struct __is_vectorizable<std::byte> : bool_constant<true> {};
template <> struct __is_vectorizable<char> : bool_constant<true> {};
template <> struct __is_vectorizable<wchar_t> : bool_constant<true> {};
template <> struct __is_vectorizable<char8_t> : bool_constant<true> {};
template <> struct __is_vectorizable<char16_t> : bool_constant<true> {};
template <> struct __is_vectorizable<char32_t> : bool_constant<true> {};
template <> struct __is_vectorizable< signed char> : bool_constant<true> {};
template <> struct __is_vectorizable<unsigned char> : bool_constant<true> {};
template <> struct __is_vectorizable< signed short> : bool_constant<true> {};
template <> struct __is_vectorizable<unsigned short> : bool_constant<true> {};
template <> struct __is_vectorizable< signed int> : bool_constant<true> {};
template <> struct __is_vectorizable<unsigned int> : bool_constant<true> {};
template <> struct __is_vectorizable< signed long> : bool_constant<true> {};
template <> struct __is_vectorizable<unsigned long> : bool_constant<true> {};
template <> struct __is_vectorizable< signed long long> : bool_constant<true> {};
template <> struct __is_vectorizable<unsigned long long> : bool_constant<true> {};
template <> struct __is_vectorizable<float> : bool_constant<true> {};
template <> struct __is_vectorizable<double> : bool_constant<true> {};
#ifdef __STDCPP_FLOAT16_T__
//template <> struct __is_vectorizable<std::float16_t> : bool_constant<true> {};
#endif
#ifdef __STDCPP_FLOAT32_T__
template <> struct __is_vectorizable<std::float32_t> : bool_constant<true> {};
#endif
#ifdef __STDCPP_FLOAT64_T__
template <> struct __is_vectorizable<std::float64_t> : bool_constant<true> {};
#endif
template <typename _Tp, typename>
struct __make_dependent
{ using type = _Tp; };
template <typename _Tp, typename _Up>
using __make_dependent_t = typename __make_dependent<_Tp, _Up>::type;
template <int _Bs, typename _Tp>
consteval auto
__native_abi_impl_recursive()
{
constexpr int _Width = _Bs / sizeof(_Tp);
if constexpr (_Avx512Abi<_Width>::template _IsValid<_Tp>::value)
return _Avx512Abi<_Width>();
else if constexpr (_VecAbi<_Width>::template _IsValid<_Tp>::value)
return _VecAbi<_Width>();
else if constexpr (_Bs > sizeof(_Tp))
return __native_abi_impl_recursive<_Bs / 2, _Tp>();
else
return __make_dependent_t<_ScalarAbi, _Tp>();
}
struct _InvalidAbi
{};
template <typename _Tp>
consteval auto
__native_abi_impl()
{
if constexpr (__is_vectorizable<_Tp>::value)
{
// __one is used to make _VecAbi a dependent type
constexpr int __one = sizeof(_Tp) / sizeof(_Tp);
return __native_abi_impl_recursive<__one * 256, _Tp>();
}
else
return _InvalidAbi();
}
template <typename _Tp>
using _NativeAbi = decltype(__native_abi_impl<_Tp>());
using _SimdSizeType = int;
template <typename _Tp, _SimdSizeType _Np>
struct _DeduceAbi;
template <typename _Tp, _SimdSizeType _Np>
using __deduce_t = typename _DeduceAbi<_Tp, _Np>::type;
}
template <typename _Abi0, __detail::_SimdSizeType _Np>
struct _AbiArray;
template <__detail::_SimdSizeType _Np, typename _Tag>
struct _AbiCombine;
template <typename _Tp, typename _Abi = __detail::_NativeAbi<_Tp>>
class basic_simd;
template <size_t _Bytes,
typename _Abi = __detail::_NativeAbi<__detail::__mask_integer_from<_Bytes>>>
class basic_simd_mask;
template <typename... _Flags>
struct simd_flags;
template <typename _Tp>
struct is_simd
: bool_constant<false>
{};
template <typename _Tp>
inline constexpr bool is_simd_v = is_simd<_Tp>::value;
template <typename _Tp>
struct is_simd_mask
: bool_constant<false>
{};
template <typename _Tp>
inline constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value;
template <typename _Tp, typename _Abi = __detail::_NativeAbi<_Tp>>
struct simd_size
{};
template <typename _Tp, typename _Abi = __detail::_NativeAbi<_Tp>>
inline constexpr __detail::_SimdSizeType simd_size_v = simd_size<_Tp, _Abi>::value;
template <typename _Tp, typename _Up = typename _Tp::value_type>
struct simd_alignment
{};
template <typename _Tp, typename _Up = typename _Tp::value_type>
inline constexpr size_t simd_alignment_v = simd_alignment<_Tp, _Up>::value;
template <typename _Tp, typename _Vp>
struct rebind_simd
{};
template <typename _Tp, typename _Vp>
using rebind_simd_t = typename rebind_simd<_Tp, _Vp>::type;
template <__detail::_SimdSizeType _Np, typename _Vp>
struct resize_simd
{};
template <__detail::_SimdSizeType _Np, typename _Vp>
using resize_simd_t = typename resize_simd<_Np, _Vp>::type;
template <typename _Tp, __detail::_SimdSizeType _Np = basic_simd<_Tp>::size()>
using simd = basic_simd<_Tp, __detail::__deduce_t<_Tp, _Np>>;
template <typename _Tp, __detail::_SimdSizeType _Np = basic_simd<_Tp>::size()>
using simd_mask = basic_simd_mask<sizeof(_Tp), __detail::__deduce_t<_Tp, _Np>>;
// mask_reductions.h
template <size_t _Bs, typename _Abi>
_GLIBCXX_SIMD_ALWAYS_INLINE constexpr bool
all_of(const basic_simd_mask<_Bs, _Abi>& __k) noexcept;
_GLIBCXX_SIMD_ALWAYS_INLINE constexpr bool
all_of(same_as<bool> auto __x) noexcept;
template <size_t _Bs, typename _Abi>
_GLIBCXX_SIMD_ALWAYS_INLINE constexpr bool
any_of(const basic_simd_mask<_Bs, _Abi>& __k) noexcept;
_GLIBCXX_SIMD_ALWAYS_INLINE constexpr bool
any_of(same_as<bool> auto __x) noexcept;
template <size_t _Bs, typename _Abi>
_GLIBCXX_SIMD_ALWAYS_INLINE constexpr bool
none_of(const basic_simd_mask<_Bs, _Abi>& __k) noexcept;
_GLIBCXX_SIMD_ALWAYS_INLINE constexpr bool
none_of(same_as<bool> auto __x) noexcept;
template <size_t _Bs, typename _Abi>
_GLIBCXX_SIMD_ALWAYS_INLINE constexpr __detail::_SimdSizeType
reduce_count(const basic_simd_mask<_Bs, _Abi>& __k) noexcept;
_GLIBCXX_SIMD_ALWAYS_INLINE constexpr __detail::_SimdSizeType
reduce_count(same_as<bool> auto __x) noexcept;
template <size_t _Bs, typename _Abi>
_GLIBCXX_SIMD_ALWAYS_INLINE constexpr __detail::_SimdSizeType
reduce_min_index(const basic_simd_mask<_Bs, _Abi>& __k);
_GLIBCXX_SIMD_ALWAYS_INLINE constexpr __detail::_SimdSizeType
reduce_min_index(same_as<bool> auto __x) noexcept;
template <size_t _Bs, typename _Abi>
_GLIBCXX_SIMD_ALWAYS_INLINE constexpr __detail::_SimdSizeType
reduce_max_index(const basic_simd_mask<_Bs, _Abi>& __k);
_GLIBCXX_SIMD_ALWAYS_INLINE constexpr __detail::_SimdSizeType
reduce_max_index(same_as<bool> auto __x) noexcept;
template <typename _V, typename _Tp, typename _Abi>
_GLIBCXX_SIMD_ALWAYS_INLINE constexpr auto
simd_split(const basic_simd<_Tp, _Abi>& __x) noexcept;
template <typename _M, size_t _Bs, typename _Abi>
_GLIBCXX_SIMD_ALWAYS_INLINE constexpr auto
simd_split(const basic_simd_mask<_Bs, _Abi>& __x) noexcept;
template <typename _Tp, typename... _Abis>
_GLIBCXX_SIMD_ALWAYS_INLINE constexpr
simd<_Tp, (simd_size_v<_Tp, _Abis> + ...)>
simd_cat(const basic_simd<_Tp, _Abis>&... __xs) noexcept;
template <size_t _Bs, typename... _Abis>
_GLIBCXX_SIMD_ALWAYS_INLINE constexpr
simd_mask<__detail::__mask_integer_from<_Bs>, (basic_simd_mask<_Bs, _Abis>::size.value + ...)>
simd_cat(const basic_simd_mask<_Bs, _Abis>&... __xs) noexcept;
template <typename _Tp, typename _Abi,
std::invocable<simd<_Tp, 1>, simd<_Tp, 1>> _BinaryOperation = plus<>>
constexpr _Tp
reduce(const basic_simd<_Tp, _Abi>& __x, _BinaryOperation __binary_op = {});
template <typename _Tp, typename _Abi,
std::invocable<simd<_Tp, 1>, simd<_Tp, 1>> _BinaryOperation>
constexpr _Tp
reduce(const basic_simd<_Tp, _Abi>& __x, const typename basic_simd<_Tp, _Abi>::mask_type& __k,
__type_identity_t<_Tp> __identity_element, _BinaryOperation __binary_op);
template <typename _Tp, typename _Abi>
constexpr _Tp
reduce(const basic_simd<_Tp, _Abi>& __x, const typename basic_simd<_Tp, _Abi>::mask_type& __k,
plus<> __binary_op = {}) noexcept;
template <typename _Tp, typename _Abi>
constexpr _Tp
reduce(const basic_simd<_Tp, _Abi>& __x, const typename basic_simd<_Tp, _Abi>::mask_type& __k,
multiplies<> __binary_op) noexcept;
template <std::integral _Tp, typename _Abi>
constexpr _Tp
reduce(const basic_simd<_Tp, _Abi>& __x, const typename basic_simd<_Tp, _Abi>::mask_type& __k,
bit_and<> __binary_op) noexcept;
template <std::integral _Tp, typename _Abi>
constexpr _Tp
reduce(const basic_simd<_Tp, _Abi>& __x, const typename basic_simd<_Tp, _Abi>::mask_type& __k,
bit_or<> __binary_op) noexcept;
template <std::integral _Tp, typename _Abi>
constexpr _Tp
reduce(const basic_simd<_Tp, _Abi>& __x, const typename basic_simd<_Tp, _Abi>::mask_type& __k,
bit_xor<> __binary_op) noexcept;
template <std::totally_ordered _Tp, typename _Abi>
constexpr _Tp
reduce_min(const basic_simd<_Tp, _Abi>& __x) noexcept;
template <std::totally_ordered _Tp, typename _Abi>
constexpr _Tp
reduce_min(const basic_simd<_Tp, _Abi>& __x,
const typename basic_simd<_Tp, _Abi>::mask_type& __k) noexcept;
template <std::totally_ordered _Tp, typename _Abi>
constexpr _Tp
reduce_max(const basic_simd<_Tp, _Abi>& __x) noexcept;
template <std::totally_ordered _Tp, typename _Abi>
constexpr _Tp
reduce_max(const basic_simd<_Tp, _Abi>& __x,
const typename basic_simd<_Tp, _Abi>::mask_type& __k) noexcept;
}
#endif // PROTOTYPE_FWDDECL_H_