3#ifndef DUNE_COMMON_SIMD_LOOP_HH
4#define DUNE_COMMON_SIMD_LOOP_HH
24# pragma GCC diagnostic push
25# pragma GCC diagnostic ignored "-Wbool-operation"
26# pragma GCC diagnostic ignored "-Wint-in-bool-context"
27# define GCC_WARNING_DISABLED
35#if __has_warning("-Wbitwise-instead-of-logical")
36# pragma clang diagnostic push
37# pragma clang diagnostic ignored "-Wbitwise-instead-of-logical"
38# define CLANG_WARNING_DISABLED
46 #define DUNE_PRAGMA_OMP_SIMD _Pragma("omp simd")
48 #define DUNE_PRAGMA_OMP_SIMD
63 template<
class T, std::
size_t S, std::
size_t A = 0>
64 class alignas(A==0?alignof(T):A)
LoopSIMD :
public std::array<T,S> {
70 assert(
reinterpret_cast<uintptr_t
>(
this) % std::min(
alignof(
LoopSIMD<T,S,A>),
alignof(std::max_align_t)) == 0);
78 template<std::
size_t OA>
80 : std::array<T,S>(other)
82 assert(
reinterpret_cast<uintptr_t
>(
this) % std::min(
alignof(
LoopSIMD<T,S,A>),
alignof(std::max_align_t)) == 0);
90#define DUNE_SIMD_LOOP_PREFIX_OP(SYMBOL) \
91 auto operator SYMBOL() { \
92 DUNE_PRAGMA_OMP_SIMD \
93 for(std::size_t i=0; i<S; i++){ \
98 static_assert(true, "expecting ;")
100 DUNE_SIMD_LOOP_PREFIX_OP(++);
101 DUNE_SIMD_LOOP_PREFIX_OP(--);
102#undef DUNE_SIMD_LOOP_PREFIX_OP
105#define DUNE_SIMD_LOOP_UNARY_OP(SYMBOL) \
106 auto operator SYMBOL() const { \
107 LoopSIMD<T,S,A> out; \
108 DUNE_PRAGMA_OMP_SIMD \
109 for(std::size_t i=0; i<S; i++){ \
110 out[i] = SYMBOL((*this)[i]); \
114 static_assert(true, "expecting ;")
116 DUNE_SIMD_LOOP_UNARY_OP(+);
117 DUNE_SIMD_LOOP_UNARY_OP(-);
118 DUNE_SIMD_LOOP_UNARY_OP(~);
120 auto operator!()
const {
123 for(std::size_t i=0; i<S; i++){
124 out[i] = !((*this)[i]);
128#undef DUNE_SIMD_LOOP_UNARY_OP
131#define DUNE_SIMD_LOOP_POSTFIX_OP(SYMBOL) \
132 auto operator SYMBOL(int){ \
133 LoopSIMD<T,S,A> out = *this; \
137 static_assert(true, "expecting ;")
139 DUNE_SIMD_LOOP_POSTFIX_OP(++);
140 DUNE_SIMD_LOOP_POSTFIX_OP(--);
141#undef DUNE_SIMD_LOOP_POSTFIX_OP
144#define DUNE_SIMD_LOOP_ASSIGNMENT_OP(SYMBOL) \
145 auto operator SYMBOL(const Simd::Scalar<T> s) { \
146 DUNE_PRAGMA_OMP_SIMD \
147 for(std::size_t i=0; i<S; i++){ \
148 (*this)[i] SYMBOL s; \
153 auto operator SYMBOL(const LoopSIMD<T,S,A> &v) { \
154 DUNE_PRAGMA_OMP_SIMD \
155 for(std::size_t i=0; i<S; i++){ \
156 (*this)[i] SYMBOL v[i]; \
160 static_assert(true, "expecting ;")
162 DUNE_SIMD_LOOP_ASSIGNMENT_OP(+=);
163 DUNE_SIMD_LOOP_ASSIGNMENT_OP(-=);
164 DUNE_SIMD_LOOP_ASSIGNMENT_OP(*=);
165 DUNE_SIMD_LOOP_ASSIGNMENT_OP(/=);
166 DUNE_SIMD_LOOP_ASSIGNMENT_OP(%=);
167 DUNE_SIMD_LOOP_ASSIGNMENT_OP(<<=);
168 DUNE_SIMD_LOOP_ASSIGNMENT_OP(>>=);
169 DUNE_SIMD_LOOP_ASSIGNMENT_OP(&=);
170 DUNE_SIMD_LOOP_ASSIGNMENT_OP(|=);
171 DUNE_SIMD_LOOP_ASSIGNMENT_OP(^=);
172#undef DUNE_SIMD_LOOP_ASSIGNMENT_OP
176#define DUNE_SIMD_LOOP_BINARY_OP(SYMBOL) \
177 template<class T, std::size_t S, std::size_t A> \
178 auto operator SYMBOL(const LoopSIMD<T,S,A> &v, const Simd::Scalar<T> s) { \
179 LoopSIMD<T,S,A> out; \
180 DUNE_PRAGMA_OMP_SIMD \
181 for(std::size_t i=0; i<S; i++){ \
182 out[i] = v[i] SYMBOL s; \
186 template<class T, std::size_t S, std::size_t A> \
187 auto operator SYMBOL(const Simd::Scalar<T> s, const LoopSIMD<T,S,A> &v) { \
188 LoopSIMD<T,S,A> out; \
189 DUNE_PRAGMA_OMP_SIMD \
190 for(std::size_t i=0; i<S; i++){ \
191 out[i] = s SYMBOL v[i]; \
195 template<class T, std::size_t S, std::size_t A> \
196 auto operator SYMBOL(const LoopSIMD<T,S,A> &v, \
197 const LoopSIMD<T,S,A> &w) { \
198 LoopSIMD<T,S,A> out; \
199 DUNE_PRAGMA_OMP_SIMD \
200 for(std::size_t i=0; i<S; i++){ \
201 out[i] = v[i] SYMBOL w[i]; \
205 static_assert(true, "expecting ;")
207 DUNE_SIMD_LOOP_BINARY_OP(+);
208 DUNE_SIMD_LOOP_BINARY_OP(-);
209 DUNE_SIMD_LOOP_BINARY_OP(*);
210 DUNE_SIMD_LOOP_BINARY_OP(/);
211 DUNE_SIMD_LOOP_BINARY_OP(%);
213 DUNE_SIMD_LOOP_BINARY_OP(&);
214 DUNE_SIMD_LOOP_BINARY_OP(|);
215 DUNE_SIMD_LOOP_BINARY_OP(^);
217#undef DUNE_SIMD_LOOP_BINARY_OP
220#define DUNE_SIMD_LOOP_BITSHIFT_OP(SYMBOL) \
221 template<class T, std::size_t S, std::size_t A, class U> \
222 auto operator SYMBOL(const LoopSIMD<T,S,A> &v, const U s) { \
223 LoopSIMD<T,S,A> out; \
224 DUNE_PRAGMA_OMP_SIMD \
225 for(std::size_t i=0; i<S; i++){ \
226 out[i] = v[i] SYMBOL s; \
230 template<class T, std::size_t S, std::size_t A, class U, std::size_t AU> \
231 auto operator SYMBOL(const LoopSIMD<T,S,A> &v, \
232 const LoopSIMD<U,S,AU> &w) { \
233 LoopSIMD<T,S,A> out; \
234 DUNE_PRAGMA_OMP_SIMD \
235 for(std::size_t i=0; i<S; i++){ \
236 out[i] = v[i] SYMBOL w[i]; \
240 static_assert(true, "expecting ;")
242 DUNE_SIMD_LOOP_BITSHIFT_OP(<<);
243 DUNE_SIMD_LOOP_BITSHIFT_OP(>>);
245#undef DUNE_SIMD_LOOP_BITSHIFT_OP
248#define DUNE_SIMD_LOOP_COMPARISON_OP(SYMBOL) \
249 template<class T, std::size_t S, std::size_t A, class U> \
250 auto operator SYMBOL(const LoopSIMD<T,S,A> &v, const U s) { \
251 Simd::Mask<LoopSIMD<T,S,A>> out; \
252 DUNE_PRAGMA_OMP_SIMD \
253 for(std::size_t i=0; i<S; i++){ \
254 out[i] = v[i] SYMBOL s; \
258 template<class T, std::size_t S, std::size_t A> \
259 auto operator SYMBOL(const Simd::Scalar<T> s, const LoopSIMD<T,S,A> &v) { \
260 Simd::Mask<LoopSIMD<T,S,A>> out; \
261 DUNE_PRAGMA_OMP_SIMD \
262 for(std::size_t i=0; i<S; i++){ \
263 out[i] = s SYMBOL v[i]; \
267 template<class T, std::size_t S, std::size_t A> \
268 auto operator SYMBOL(const LoopSIMD<T,S,A> &v, \
269 const LoopSIMD<T,S,A> &w) { \
270 Simd::Mask<LoopSIMD<T,S,A>> out; \
271 DUNE_PRAGMA_OMP_SIMD \
272 for(std::size_t i=0; i<S; i++){ \
273 out[i] = v[i] SYMBOL w[i]; \
277 static_assert(true, "expecting ;")
279 DUNE_SIMD_LOOP_COMPARISON_OP(<);
280 DUNE_SIMD_LOOP_COMPARISON_OP(>);
281 DUNE_SIMD_LOOP_COMPARISON_OP(<=);
282 DUNE_SIMD_LOOP_COMPARISON_OP(>=);
283 DUNE_SIMD_LOOP_COMPARISON_OP(==);
284 DUNE_SIMD_LOOP_COMPARISON_OP(!=);
285#undef DUNE_SIMD_LOOP_COMPARISON_OP
288#define DUNE_SIMD_LOOP_BOOLEAN_OP(SYMBOL) \
289 template<class T, std::size_t S, std::size_t A> \
290 auto operator SYMBOL(const LoopSIMD<T,S,A> &v, const Simd::Scalar<T> s) { \
291 Simd::Mask<LoopSIMD<T,S,A>> out; \
292 DUNE_PRAGMA_OMP_SIMD \
293 for(std::size_t i=0; i<S; i++){ \
294 out[i] = v[i] SYMBOL s; \
298 template<class T, std::size_t S, std::size_t A> \
299 auto operator SYMBOL(const Simd::Mask<T> s, const LoopSIMD<T,S,A> &v) { \
300 Simd::Mask<LoopSIMD<T,S,A>> out; \
301 DUNE_PRAGMA_OMP_SIMD \
302 for(std::size_t i=0; i<S; i++){ \
303 out[i] = s SYMBOL v[i]; \
307 template<class T, std::size_t S, std::size_t A> \
308 auto operator SYMBOL(const LoopSIMD<T,S,A> &v, \
309 const LoopSIMD<T,S,A> &w) { \
310 Simd::Mask<LoopSIMD<T,S,A>> out; \
311 DUNE_PRAGMA_OMP_SIMD \
312 for(std::size_t i=0; i<S; i++){ \
313 out[i] = v[i] SYMBOL w[i]; \
317 static_assert(true, "expecting ;")
319 DUNE_SIMD_LOOP_BOOLEAN_OP(&&);
320 DUNE_SIMD_LOOP_BOOLEAN_OP(||);
321#undef DUNE_SIMD_LOOP_BOOLEAN_OP
324 template<
class T, std::
size_t S, std::
size_t A>
325 std::ostream& operator<< (std::ostream &os,
const LoopSIMD<T,S,A> &v) {
327 for(std::size_t i=0; i<S-1; i++) {
335 namespace Overloads {
342 template<
class T, std::
size_t S, std::
size_t A>
343 struct ScalarType<LoopSIMD<T,S,A>> {
344 using type = Simd::Scalar<T>;
347 template<
class U,
class T, std::
size_t S, std::
size_t A>
348 struct RebindType<U, LoopSIMD<T,S,A>> {
349 using type = LoopSIMD<Simd::Rebind<U, T>,S,A>;
353 template<
class T, std::
size_t S, std::
size_t A>
354 struct LaneCount<LoopSIMD<T,S,A>> :
index_constant<S*lanes<T>()> {};
356 template<
class T, std::
size_t S, std::
size_t A>
357 auto lane(ADLTag<5>, std::size_t l, LoopSIMD<T,S,A> &&v)
358 ->
decltype(std::move(
Simd::lane(l%lanes<T>(), v[l/lanes<T>()])))
360 return std::move(
Simd::lane(l%lanes<T>(), v[l/lanes<T>()]));
363 template<
class T, std::
size_t S, std::
size_t A>
364 auto lane(ADLTag<5>, std::size_t l,
const LoopSIMD<T,S,A> &v)
365 ->
decltype(
Simd::lane(l%lanes<T>(), v[l/lanes<T>()]))
367 return Simd::lane(l%lanes<T>(), v[l/lanes<T>()]);
370 template<
class T, std::
size_t S, std::
size_t A>
371 auto lane(ADLTag<5>, std::size_t l, LoopSIMD<T,S,A> &v)
372 ->
decltype(
Simd::lane(l%lanes<T>(), v[l/lanes<T>()]))
374 return Simd::lane(l%lanes<T>(), v[l/lanes<T>()]);
377 template<
class T, std::
size_t S, std::
size_t AM, std::
size_t AD>
379 LoopSIMD<T,S,AD> ifTrue, LoopSIMD<T,S,AD> ifFalse) {
380 LoopSIMD<T,S,AD> out;
381 for(std::size_t i=0; i<S; i++) {
387 template<
class M,
class T, std::
size_t S, std::
size_t A>
388 auto cond(ADLTag<5, std::is_same<
bool, Simd::Scalar<M> >::value
389 && Simd::lanes<M>() ==
Simd::lanes<LoopSIMD<T,S,A> >()>,
390 M
mask, LoopSIMD<T,S,A> ifTrue, LoopSIMD<T,S,A> ifFalse)
398 template<
class M, std::
size_t S, std::
size_t A>
399 bool anyTrue(ADLTag<5>, LoopSIMD<M,S,A>
mask) {
401 for(std::size_t i=0; i<S; i++) {
407 template<
class M, std::
size_t S, std::
size_t A>
410 for(std::size_t i=0; i<S; i++) {
416 template<
class M, std::
size_t S, std::
size_t A>
419 for(std::size_t i=0; i<S; i++) {
425 template<
class M, std::
size_t S, std::
size_t A>
428 for(std::size_t i=0; i<S; i++) {
445#define DUNE_SIMD_LOOP_CMATH_UNARY_OP(expr) \
446 template<class T, std::size_t S, std::size_t A, typename Sfinae = \
447 typename std::enable_if_t<!std::is_integral<Simd::Scalar<T>>::value> > \
448 auto expr(const LoopSIMD<T,S,A> &v) { \
450 LoopSIMD<T,S,A> out; \
451 for(std::size_t i=0; i<S; i++) { \
452 out[i] = expr(v[i]); \
456 static_assert(true, "expecting ;")
458#define DUNE_SIMD_LOOP_CMATH_UNARY_OP_WITH_RETURN(expr, returnType) \
459 template<class T, std::size_t S, std::size_t A, typename Sfinae = \
460 typename std::enable_if_t<!std::is_integral<Simd::Scalar<T>>::value> > \
461 auto expr(const LoopSIMD<T,S,A> &v) { \
463 LoopSIMD<returnType,S> out; \
464 for(std::size_t i=0; i<S; i++) { \
465 out[i] = expr(v[i]); \
469 static_assert(true, "expecting ;")
471 DUNE_SIMD_LOOP_CMATH_UNARY_OP(cos);
472 DUNE_SIMD_LOOP_CMATH_UNARY_OP(sin);
473 DUNE_SIMD_LOOP_CMATH_UNARY_OP(tan);
474 DUNE_SIMD_LOOP_CMATH_UNARY_OP(acos);
475 DUNE_SIMD_LOOP_CMATH_UNARY_OP(asin);
476 DUNE_SIMD_LOOP_CMATH_UNARY_OP(atan);
477 DUNE_SIMD_LOOP_CMATH_UNARY_OP(cosh);
478 DUNE_SIMD_LOOP_CMATH_UNARY_OP(sinh);
479 DUNE_SIMD_LOOP_CMATH_UNARY_OP(tanh);
480 DUNE_SIMD_LOOP_CMATH_UNARY_OP(acosh);
481 DUNE_SIMD_LOOP_CMATH_UNARY_OP(asinh);
482 DUNE_SIMD_LOOP_CMATH_UNARY_OP(atanh);
484 DUNE_SIMD_LOOP_CMATH_UNARY_OP(exp);
485 DUNE_SIMD_LOOP_CMATH_UNARY_OP(log);
486 DUNE_SIMD_LOOP_CMATH_UNARY_OP(log10);
487 DUNE_SIMD_LOOP_CMATH_UNARY_OP(exp2);
488 DUNE_SIMD_LOOP_CMATH_UNARY_OP(expm1);
489 DUNE_SIMD_LOOP_CMATH_UNARY_OP_WITH_RETURN(ilogb,
int);
490 DUNE_SIMD_LOOP_CMATH_UNARY_OP(log1p);
491 DUNE_SIMD_LOOP_CMATH_UNARY_OP(log2);
492 DUNE_SIMD_LOOP_CMATH_UNARY_OP(logb);
494 DUNE_SIMD_LOOP_CMATH_UNARY_OP(sqrt);
495 DUNE_SIMD_LOOP_CMATH_UNARY_OP(cbrt);
497 DUNE_SIMD_LOOP_CMATH_UNARY_OP(erf);
498 DUNE_SIMD_LOOP_CMATH_UNARY_OP(erfc);
499 DUNE_SIMD_LOOP_CMATH_UNARY_OP(tgamma);
500 DUNE_SIMD_LOOP_CMATH_UNARY_OP(lgamma);
502 DUNE_SIMD_LOOP_CMATH_UNARY_OP(ceil);
503 DUNE_SIMD_LOOP_CMATH_UNARY_OP(floor);
504 DUNE_SIMD_LOOP_CMATH_UNARY_OP(
trunc);
505 DUNE_SIMD_LOOP_CMATH_UNARY_OP(
round);
506 DUNE_SIMD_LOOP_CMATH_UNARY_OP_WITH_RETURN(lround,
long);
507 DUNE_SIMD_LOOP_CMATH_UNARY_OP_WITH_RETURN(llround,
long long);
508 DUNE_SIMD_LOOP_CMATH_UNARY_OP(rint);
509 DUNE_SIMD_LOOP_CMATH_UNARY_OP_WITH_RETURN(lrint,
long);
510 DUNE_SIMD_LOOP_CMATH_UNARY_OP_WITH_RETURN(llrint,
long long);
511 DUNE_SIMD_LOOP_CMATH_UNARY_OP(nearbyint);
513 DUNE_SIMD_LOOP_CMATH_UNARY_OP(fabs);
514 DUNE_SIMD_LOOP_CMATH_UNARY_OP(abs);
516#undef DUNE_SIMD_LOOP_CMATH_UNARY_OP
517#undef DUNE_SIMD_LOOP_CMATH_UNARY_OP_WITH_RETURN
539#define DUNE_SIMD_LOOP_STD_UNARY_OP(expr) \
540 template<class T, std::size_t S, std::size_t A> \
541 auto expr(const LoopSIMD<T,S,A> &v) { \
543 LoopSIMD<T,S,A> out; \
544 for(std::size_t i=0; i<S; i++) { \
545 out[i] = expr(v[i]); \
550 template<class T, std::size_t S, std::size_t A> \
551 auto expr(const LoopSIMD<std::complex<T>,S,A> &v) { \
553 LoopSIMD<T,S,A> out; \
554 for(std::size_t i=0; i<S; i++) { \
555 out[i] = expr(v[i]); \
559 static_assert(true, "expecting ;")
561 DUNE_SIMD_LOOP_STD_UNARY_OP(real);
562 DUNE_SIMD_LOOP_STD_UNARY_OP(imag);
564#undef DUNE_SIMD_LOOP_STD_UNARY_OP
566#define DUNE_SIMD_LOOP_STD_BINARY_OP(expr) \
567 template<class T, std::size_t S, std::size_t A> \
568 auto expr(const LoopSIMD<T,S,A> &v, const LoopSIMD<T,S,A> &w) { \
570 LoopSIMD<T,S,A> out; \
571 for(std::size_t i=0; i<S; i++) { \
572 out[i] = expr(v[i],w[i]); \
576 static_assert(true, "expecting ;")
578 DUNE_SIMD_LOOP_STD_BINARY_OP(
max);
579 DUNE_SIMD_LOOP_STD_BINARY_OP(
min);
581#undef DUNE_SIMD_LOOP_STD_BINARY_OP
583 namespace MathOverloads {
584 template<
class T, std::
size_t S, std::
size_t A>
585 auto isNaN(
const LoopSIMD<T,S,A> &v, PriorityTag<3>, ADLTag) {
586 Simd::Mask<LoopSIMD<T,S,A>> out;
587 for(
auto l : range(S))
588 out[l] = Dune::isNaN(v[l]);
592 template<
class T, std::
size_t S, std::
size_t A>
593 auto isInf(
const LoopSIMD<T,S,A> &v, PriorityTag<3>, ADLTag) {
594 Simd::Mask<LoopSIMD<T,S,A>> out;
595 for(
auto l : range(S))
596 out[l] = Dune::isInf(v[l]);
600 template<
class T, std::
size_t S, std::
size_t A>
601 auto isFinite(
const LoopSIMD<T,S,A> &v, PriorityTag<3>, ADLTag) {
602 Simd::Mask<LoopSIMD<T,S,A>> out;
603 for(
auto l : range(S))
604 out[l] = Dune::isFinite(v[l]);
609 template<
class T, std::
size_t S, std::
size_t A>
610 struct IsNumber<LoopSIMD<T,S,A>> :
611 public std::integral_constant<bool, IsNumber<T>::value>{
614#ifdef CLANG_WARNING_DISABLED
615# pragma clang diagnostic pop
616# undef CLANG_WARNING_DISABLED
619#ifdef GCC_WARNING_DISABLED
620# pragma GCC diagnostic pop
621# undef GCC_WARNING_DISABLED
Traits for type conversions and type information.
std::integral_constant< std::size_t, i > index_constant
An index constant with value i.
Definition: indices.hh:30
I round(const T &val, typename EpsilonType< T >::Type epsilon)
round using epsilon
Definition: float_cmp.cc:311
I trunc(const T &val, typename EpsilonType< T >::Type epsilon)
truncate using epsilon
Definition: float_cmp.cc:407
Mask< V > mask(ADLTag< 0, std::is_same< V, Mask< V > >::value >, const V &v)
implements Simd::mask()
Definition: defaults.hh:153
bool allFalse(ADLTag< 0 >, const Mask &mask)
implements Simd::allFalse()
Definition: defaults.hh:124
bool allTrue(ADLTag< 0 >, const Mask &mask)
implements Simd::allTrue()
Definition: defaults.hh:104
bool anyFalse(ADLTag< 0 >, const Mask &mask)
implements Simd::anyFalse()
Definition: defaults.hh:114
auto min(ADLTag< 0 >, const V &v1, const V &v2)
implements binary Simd::min()
Definition: defaults.hh:89
auto max(ADLTag< 0 >, const V &v1, const V &v2)
implements binary Simd::max()
Definition: defaults.hh:81
bool anyTrue(const Mask &mask)
Whether any entry is true
Definition: interface.hh:429
V cond(M &&mask, const V &ifTrue, const V &ifFalse)
Like the ?: operator.
Definition: interface.hh:386
bool allTrue(const Mask &mask)
Whether all entries are true
Definition: interface.hh:439
bool anyFalse(const Mask &mask)
Whether any entry is false
Definition: interface.hh:449
constexpr std::size_t lanes()
Number of lanes in a SIMD type.
Definition: interface.hh:305
decltype(auto) lane(std::size_t l, V &&v)
Extract an element of a SIMD type.
Definition: interface.hh:324
Rebind< bool, V > Mask
Mask type type of some SIMD type.
Definition: interface.hh:289
bool allFalse(const Mask &mask)
Whether all entries are false
Definition: interface.hh:459
typename Overloads::ScalarType< std::decay_t< V > >::type Scalar
Element type of some SIMD type.
Definition: interface.hh:235
Some useful basic math stuff.
Dune namespace.
Definition: alignedallocator.hh:13
const T1 cond(bool b, const T1 &v1, const T2 &v2)
conditional evaluate
Definition: conditional.hh:28
Include file for users of the SIMD abstraction layer.