3#ifndef DUNE_COMMON_SIMD_LOOP_HH
4#define DUNE_COMMON_SIMD_LOOP_HH
25# pragma GCC diagnostic push
26# pragma GCC diagnostic ignored "-Wbool-operation"
27# pragma GCC diagnostic ignored "-Wint-in-bool-context"
28# define GCC_WARNING_DISABLED
36#if __has_warning("-Wbitwise-instead-of-logical")
37# pragma clang diagnostic push
38# pragma clang diagnostic ignored "-Wbitwise-instead-of-logical"
39# define CLANG_WARNING_DISABLED
47 #define DUNE_PRAGMA_OMP_SIMD _Pragma("omp simd")
49 #define DUNE_PRAGMA_OMP_SIMD
64 template<
class T, std::
size_t S, std::
size_t A = 0>
65 class alignas(A==0?alignof(T):A)
LoopSIMD :
public std::array<T,S> {
79 template<std::
size_t OA>
81 : std::array<T,S>(other)
91#define DUNE_SIMD_LOOP_PREFIX_OP(SYMBOL) \
92 auto operator SYMBOL() { \
93 DUNE_PRAGMA_OMP_SIMD \
94 for(std::size_t i=0; i<S; i++){ \
99 static_assert(true, "expecting ;")
101 DUNE_SIMD_LOOP_PREFIX_OP(++);
102 DUNE_SIMD_LOOP_PREFIX_OP(--);
103#undef DUNE_SIMD_LOOP_PREFIX_OP
106#define DUNE_SIMD_LOOP_UNARY_OP(SYMBOL) \
107 auto operator SYMBOL() const { \
108 LoopSIMD<T,S,A> out; \
109 DUNE_PRAGMA_OMP_SIMD \
110 for(std::size_t i=0; i<S; i++){ \
111 out[i] = SYMBOL((*this)[i]); \
115 static_assert(true, "expecting ;")
117 DUNE_SIMD_LOOP_UNARY_OP(+);
118 DUNE_SIMD_LOOP_UNARY_OP(-);
119 DUNE_SIMD_LOOP_UNARY_OP(~);
121 auto operator!()
const {
124 for(std::size_t i=0; i<S; i++){
125 out[i] = !((*this)[i]);
129#undef DUNE_SIMD_LOOP_UNARY_OP
132#define DUNE_SIMD_LOOP_POSTFIX_OP(SYMBOL) \
133 auto operator SYMBOL(int){ \
134 LoopSIMD<T,S,A> out = *this; \
138 static_assert(true, "expecting ;")
140 DUNE_SIMD_LOOP_POSTFIX_OP(++);
141 DUNE_SIMD_LOOP_POSTFIX_OP(--);
142#undef DUNE_SIMD_LOOP_POSTFIX_OP
145#define DUNE_SIMD_LOOP_ASSIGNMENT_OP(SYMBOL) \
146 auto operator SYMBOL(const Simd::Scalar<T> s) { \
147 DUNE_PRAGMA_OMP_SIMD \
148 for(std::size_t i=0; i<S; i++){ \
149 (*this)[i] SYMBOL s; \
154 auto operator SYMBOL(const LoopSIMD<T,S,A> &v) { \
155 DUNE_PRAGMA_OMP_SIMD \
156 for(std::size_t i=0; i<S; i++){ \
157 (*this)[i] SYMBOL v[i]; \
161 static_assert(true, "expecting ;")
163 DUNE_SIMD_LOOP_ASSIGNMENT_OP(+=);
164 DUNE_SIMD_LOOP_ASSIGNMENT_OP(-=);
165 DUNE_SIMD_LOOP_ASSIGNMENT_OP(*=);
166 DUNE_SIMD_LOOP_ASSIGNMENT_OP(/=);
167 DUNE_SIMD_LOOP_ASSIGNMENT_OP(%=);
168 DUNE_SIMD_LOOP_ASSIGNMENT_OP(<<=);
169 DUNE_SIMD_LOOP_ASSIGNMENT_OP(>>=);
170 DUNE_SIMD_LOOP_ASSIGNMENT_OP(&=);
171 DUNE_SIMD_LOOP_ASSIGNMENT_OP(|=);
172 DUNE_SIMD_LOOP_ASSIGNMENT_OP(^=);
173#undef DUNE_SIMD_LOOP_ASSIGNMENT_OP
177#define DUNE_SIMD_LOOP_BINARY_OP(SYMBOL) \
178 template<class T, std::size_t S, std::size_t A> \
179 auto operator SYMBOL(const LoopSIMD<T,S,A> &v, const Simd::Scalar<T> s) { \
180 LoopSIMD<T,S,A> out; \
181 DUNE_PRAGMA_OMP_SIMD \
182 for(std::size_t i=0; i<S; i++){ \
183 out[i] = v[i] SYMBOL s; \
187 template<class T, std::size_t S, std::size_t A> \
188 auto operator SYMBOL(const Simd::Scalar<T> s, const LoopSIMD<T,S,A> &v) { \
189 LoopSIMD<T,S,A> out; \
190 DUNE_PRAGMA_OMP_SIMD \
191 for(std::size_t i=0; i<S; i++){ \
192 out[i] = s SYMBOL v[i]; \
196 template<class T, std::size_t S, std::size_t A> \
197 auto operator SYMBOL(const LoopSIMD<T,S,A> &v, \
198 const LoopSIMD<T,S,A> &w) { \
199 LoopSIMD<T,S,A> out; \
200 DUNE_PRAGMA_OMP_SIMD \
201 for(std::size_t i=0; i<S; i++){ \
202 out[i] = v[i] SYMBOL w[i]; \
206 static_assert(true, "expecting ;")
208 DUNE_SIMD_LOOP_BINARY_OP(+);
209 DUNE_SIMD_LOOP_BINARY_OP(-);
210 DUNE_SIMD_LOOP_BINARY_OP(*);
211 DUNE_SIMD_LOOP_BINARY_OP(/);
212 DUNE_SIMD_LOOP_BINARY_OP(%);
214 DUNE_SIMD_LOOP_BINARY_OP(&);
215 DUNE_SIMD_LOOP_BINARY_OP(|);
216 DUNE_SIMD_LOOP_BINARY_OP(^);
218#undef DUNE_SIMD_LOOP_BINARY_OP
221#define DUNE_SIMD_LOOP_BITSHIFT_OP(SYMBOL) \
222 template<class T, std::size_t S, std::size_t A, class U> \
223 auto operator SYMBOL(const LoopSIMD<T,S,A> &v, const U s) { \
224 LoopSIMD<T,S,A> out; \
225 DUNE_PRAGMA_OMP_SIMD \
226 for(std::size_t i=0; i<S; i++){ \
227 out[i] = v[i] SYMBOL s; \
231 template<class T, std::size_t S, std::size_t A, class U, std::size_t AU> \
232 auto operator SYMBOL(const LoopSIMD<T,S,A> &v, \
233 const LoopSIMD<U,S,AU> &w) { \
234 LoopSIMD<T,S,A> out; \
235 DUNE_PRAGMA_OMP_SIMD \
236 for(std::size_t i=0; i<S; i++){ \
237 out[i] = v[i] SYMBOL w[i]; \
241 static_assert(true, "expecting ;")
243 DUNE_SIMD_LOOP_BITSHIFT_OP(<<);
244 DUNE_SIMD_LOOP_BITSHIFT_OP(>>);
246#undef DUNE_SIMD_LOOP_BITSHIFT_OP
249#define DUNE_SIMD_LOOP_COMPARISON_OP(SYMBOL) \
250 template<class T, std::size_t S, std::size_t A, class U> \
251 auto operator SYMBOL(const LoopSIMD<T,S,A> &v, const U s) { \
252 Simd::Mask<LoopSIMD<T,S,A>> out; \
253 DUNE_PRAGMA_OMP_SIMD \
254 for(std::size_t i=0; i<S; i++){ \
255 out[i] = v[i] SYMBOL s; \
259 template<class T, std::size_t S, std::size_t A> \
260 auto operator SYMBOL(const Simd::Scalar<T> s, const LoopSIMD<T,S,A> &v) { \
261 Simd::Mask<LoopSIMD<T,S,A>> out; \
262 DUNE_PRAGMA_OMP_SIMD \
263 for(std::size_t i=0; i<S; i++){ \
264 out[i] = s SYMBOL v[i]; \
268 template<class T, std::size_t S, std::size_t A> \
269 auto operator SYMBOL(const LoopSIMD<T,S,A> &v, \
270 const LoopSIMD<T,S,A> &w) { \
271 Simd::Mask<LoopSIMD<T,S,A>> out; \
272 DUNE_PRAGMA_OMP_SIMD \
273 for(std::size_t i=0; i<S; i++){ \
274 out[i] = v[i] SYMBOL w[i]; \
278 static_assert(true, "expecting ;")
280 DUNE_SIMD_LOOP_COMPARISON_OP(<);
281 DUNE_SIMD_LOOP_COMPARISON_OP(>);
282 DUNE_SIMD_LOOP_COMPARISON_OP(<=);
283 DUNE_SIMD_LOOP_COMPARISON_OP(>=);
284 DUNE_SIMD_LOOP_COMPARISON_OP(==);
285 DUNE_SIMD_LOOP_COMPARISON_OP(!=);
286#undef DUNE_SIMD_LOOP_COMPARISON_OP
289#define DUNE_SIMD_LOOP_BOOLEAN_OP(SYMBOL) \
290 template<class T, std::size_t S, std::size_t A> \
291 auto operator SYMBOL(const LoopSIMD<T,S,A> &v, const Simd::Scalar<T> s) { \
292 Simd::Mask<LoopSIMD<T,S,A>> out; \
293 DUNE_PRAGMA_OMP_SIMD \
294 for(std::size_t i=0; i<S; i++){ \
295 out[i] = v[i] SYMBOL s; \
299 template<class T, std::size_t S, std::size_t A> \
300 auto operator SYMBOL(const Simd::Mask<T> s, const LoopSIMD<T,S,A> &v) { \
301 Simd::Mask<LoopSIMD<T,S,A>> out; \
302 DUNE_PRAGMA_OMP_SIMD \
303 for(std::size_t i=0; i<S; i++){ \
304 out[i] = s SYMBOL v[i]; \
308 template<class T, std::size_t S, std::size_t A> \
309 auto operator SYMBOL(const LoopSIMD<T,S,A> &v, \
310 const LoopSIMD<T,S,A> &w) { \
311 Simd::Mask<LoopSIMD<T,S,A>> out; \
312 DUNE_PRAGMA_OMP_SIMD \
313 for(std::size_t i=0; i<S; i++){ \
314 out[i] = v[i] SYMBOL w[i]; \
318 static_assert(true, "expecting ;")
320 DUNE_SIMD_LOOP_BOOLEAN_OP(&&);
321 DUNE_SIMD_LOOP_BOOLEAN_OP(||);
322#undef DUNE_SIMD_LOOP_BOOLEAN_OP
325 template<
class T, std::
size_t S, std::
size_t A>
326 std::ostream& operator<< (std::ostream &os,
const LoopSIMD<T,S,A> &v) {
328 for(std::size_t i=0; i<S-1; i++) {
336 namespace Overloads {
343 template<
class T, std::
size_t S, std::
size_t A>
344 struct ScalarType<LoopSIMD<T,S,A>> {
345 using type = Simd::Scalar<T>;
348 template<
class U,
class T, std::
size_t S, std::
size_t A>
349 struct RebindType<U, LoopSIMD<T,S,A>> {
350 using type = LoopSIMD<Simd::Rebind<U, T>,S,A>;
354 template<
class T, std::
size_t S, std::
size_t A>
355 struct LaneCount<LoopSIMD<T,S,A>> :
index_constant<S*lanes<T>()> {};
357 template<
class T, std::
size_t S, std::
size_t A>
358 auto lane(ADLTag<5>, std::size_t l, LoopSIMD<T,S,A> &&v)
359 ->
decltype(std::move(
Simd::lane(l%lanes<T>(), v[l/lanes<T>()])))
361 return std::move(
Simd::lane(l%lanes<T>(), v[l/lanes<T>()]));
364 template<
class T, std::
size_t S, std::
size_t A>
365 auto lane(ADLTag<5>, std::size_t l,
const LoopSIMD<T,S,A> &v)
366 ->
decltype(
Simd::lane(l%lanes<T>(), v[l/lanes<T>()]))
368 return Simd::lane(l%lanes<T>(), v[l/lanes<T>()]);
371 template<
class T, std::
size_t S, std::
size_t A>
372 auto lane(ADLTag<5>, std::size_t l, LoopSIMD<T,S,A> &v)
373 ->
decltype(
Simd::lane(l%lanes<T>(), v[l/lanes<T>()]))
375 return Simd::lane(l%lanes<T>(), v[l/lanes<T>()]);
378 template<
class T, std::
size_t S, std::
size_t AM, std::
size_t AD>
380 LoopSIMD<T,S,AD> ifTrue, LoopSIMD<T,S,AD> ifFalse) {
381 LoopSIMD<T,S,AD> out;
382 for(std::size_t i=0; i<S; i++) {
388 template<
class M,
class T, std::
size_t S, std::
size_t A>
389 auto cond(ADLTag<5, std::is_same<
bool, Simd::Scalar<M> >::value
390 && Simd::lanes<M>() ==
Simd::lanes<LoopSIMD<T,S,A> >()>,
391 M
mask, LoopSIMD<T,S,A> ifTrue, LoopSIMD<T,S,A> ifFalse)
399 template<
class M, std::
size_t S, std::
size_t A>
400 bool anyTrue(ADLTag<5>, LoopSIMD<M,S,A>
mask) {
402 for(std::size_t i=0; i<S; i++) {
408 template<
class M, std::
size_t S, std::
size_t A>
411 for(std::size_t i=0; i<S; i++) {
417 template<
class M, std::
size_t S, std::
size_t A>
420 for(std::size_t i=0; i<S; i++) {
426 template<
class M, std::
size_t S, std::
size_t A>
429 for(std::size_t i=0; i<S; i++) {
446#define DUNE_SIMD_LOOP_CMATH_UNARY_OP(expr) \
447 template<class T, std::size_t S, std::size_t A, typename Sfinae = \
448 typename std::enable_if_t<!std::is_integral<Simd::Scalar<T>>::value> > \
449 auto expr(const LoopSIMD<T,S,A> &v) { \
451 LoopSIMD<T,S,A> out; \
452 for(std::size_t i=0; i<S; i++) { \
453 out[i] = expr(v[i]); \
457 static_assert(true, "expecting ;")
459#define DUNE_SIMD_LOOP_CMATH_UNARY_OP_WITH_RETURN(expr, returnType) \
460 template<class T, std::size_t S, std::size_t A, typename Sfinae = \
461 typename std::enable_if_t<!std::is_integral<Simd::Scalar<T>>::value> > \
462 auto expr(const LoopSIMD<T,S,A> &v) { \
464 LoopSIMD<returnType,S> out; \
465 for(std::size_t i=0; i<S; i++) { \
466 out[i] = expr(v[i]); \
470 static_assert(true, "expecting ;")
472 DUNE_SIMD_LOOP_CMATH_UNARY_OP(cos);
473 DUNE_SIMD_LOOP_CMATH_UNARY_OP(sin);
474 DUNE_SIMD_LOOP_CMATH_UNARY_OP(tan);
475 DUNE_SIMD_LOOP_CMATH_UNARY_OP(acos);
476 DUNE_SIMD_LOOP_CMATH_UNARY_OP(asin);
477 DUNE_SIMD_LOOP_CMATH_UNARY_OP(atan);
478 DUNE_SIMD_LOOP_CMATH_UNARY_OP(cosh);
479 DUNE_SIMD_LOOP_CMATH_UNARY_OP(sinh);
480 DUNE_SIMD_LOOP_CMATH_UNARY_OP(tanh);
481 DUNE_SIMD_LOOP_CMATH_UNARY_OP(acosh);
482 DUNE_SIMD_LOOP_CMATH_UNARY_OP(asinh);
483 DUNE_SIMD_LOOP_CMATH_UNARY_OP(atanh);
485 DUNE_SIMD_LOOP_CMATH_UNARY_OP(exp);
486 DUNE_SIMD_LOOP_CMATH_UNARY_OP(log);
487 DUNE_SIMD_LOOP_CMATH_UNARY_OP(log10);
488 DUNE_SIMD_LOOP_CMATH_UNARY_OP(exp2);
489 DUNE_SIMD_LOOP_CMATH_UNARY_OP(expm1);
490 DUNE_SIMD_LOOP_CMATH_UNARY_OP_WITH_RETURN(ilogb,
int);
491 DUNE_SIMD_LOOP_CMATH_UNARY_OP(log1p);
492 DUNE_SIMD_LOOP_CMATH_UNARY_OP(log2);
493 DUNE_SIMD_LOOP_CMATH_UNARY_OP(logb);
495 DUNE_SIMD_LOOP_CMATH_UNARY_OP(sqrt);
496 DUNE_SIMD_LOOP_CMATH_UNARY_OP(cbrt);
498 DUNE_SIMD_LOOP_CMATH_UNARY_OP(erf);
499 DUNE_SIMD_LOOP_CMATH_UNARY_OP(erfc);
500 DUNE_SIMD_LOOP_CMATH_UNARY_OP(tgamma);
501 DUNE_SIMD_LOOP_CMATH_UNARY_OP(lgamma);
503 DUNE_SIMD_LOOP_CMATH_UNARY_OP(ceil);
504 DUNE_SIMD_LOOP_CMATH_UNARY_OP(floor);
505 DUNE_SIMD_LOOP_CMATH_UNARY_OP(
trunc);
506 DUNE_SIMD_LOOP_CMATH_UNARY_OP(
round);
507 DUNE_SIMD_LOOP_CMATH_UNARY_OP_WITH_RETURN(lround,
long);
508 DUNE_SIMD_LOOP_CMATH_UNARY_OP_WITH_RETURN(llround,
long long);
509 DUNE_SIMD_LOOP_CMATH_UNARY_OP(rint);
510 DUNE_SIMD_LOOP_CMATH_UNARY_OP_WITH_RETURN(lrint,
long);
511 DUNE_SIMD_LOOP_CMATH_UNARY_OP_WITH_RETURN(llrint,
long long);
512 DUNE_SIMD_LOOP_CMATH_UNARY_OP(nearbyint);
514 DUNE_SIMD_LOOP_CMATH_UNARY_OP(fabs);
515 DUNE_SIMD_LOOP_CMATH_UNARY_OP(abs);
517#undef DUNE_SIMD_LOOP_CMATH_UNARY_OP
518#undef DUNE_SIMD_LOOP_CMATH_UNARY_OP_WITH_RETURN
540#define DUNE_SIMD_LOOP_STD_UNARY_OP(expr) \
541 template<class T, std::size_t S, std::size_t A> \
542 auto expr(const LoopSIMD<T,S,A> &v) { \
544 LoopSIMD<T,S,A> out; \
545 for(std::size_t i=0; i<S; i++) { \
546 out[i] = expr(v[i]); \
551 template<class T, std::size_t S, std::size_t A> \
552 auto expr(const LoopSIMD<std::complex<T>,S,A> &v) { \
554 LoopSIMD<T,S,A> out; \
555 for(std::size_t i=0; i<S; i++) { \
556 out[i] = expr(v[i]); \
560 static_assert(true, "expecting ;")
562 DUNE_SIMD_LOOP_STD_UNARY_OP(real);
563 DUNE_SIMD_LOOP_STD_UNARY_OP(imag);
565#undef DUNE_SIMD_LOOP_STD_UNARY_OP
567#define DUNE_SIMD_LOOP_STD_BINARY_OP(expr) \
568 template<class T, std::size_t S, std::size_t A> \
569 auto expr(const LoopSIMD<T,S,A> &v, const LoopSIMD<T,S,A> &w) { \
571 LoopSIMD<T,S,A> out; \
572 for(std::size_t i=0; i<S; i++) { \
573 out[i] = expr(v[i],w[i]); \
577 static_assert(true, "expecting ;")
579 DUNE_SIMD_LOOP_STD_BINARY_OP(
max);
580 DUNE_SIMD_LOOP_STD_BINARY_OP(
min);
582#undef DUNE_SIMD_LOOP_STD_BINARY_OP
584 namespace MathOverloads {
585 template<
class T, std::
size_t S, std::
size_t A>
586 auto isNaN(
const LoopSIMD<T,S,A> &v, PriorityTag<3>, ADLTag) {
587 Simd::Mask<LoopSIMD<T,S,A>> out;
588 for(
auto l : range(S))
589 out[l] = Dune::isNaN(v[l]);
593 template<
class T, std::
size_t S, std::
size_t A>
594 auto isInf(
const LoopSIMD<T,S,A> &v, PriorityTag<3>, ADLTag) {
595 Simd::Mask<LoopSIMD<T,S,A>> out;
596 for(
auto l : range(S))
597 out[l] = Dune::isInf(v[l]);
601 template<
class T, std::
size_t S, std::
size_t A>
602 auto isFinite(
const LoopSIMD<T,S,A> &v, PriorityTag<3>, ADLTag) {
603 Simd::Mask<LoopSIMD<T,S,A>> out;
604 for(
auto l : range(S))
605 out[l] = Dune::isFinite(v[l]);
610 template<
class T, std::
size_t S, std::
size_t A>
611 struct IsNumber<LoopSIMD<T,S,A>> :
612 public std::integral_constant<bool, IsNumber<T>::value>{
615#ifdef CLANG_WARNING_DISABLED
616# pragma clang diagnostic pop
617# undef CLANG_WARNING_DISABLED
620#ifdef GCC_WARNING_DISABLED
621# pragma GCC diagnostic pop
622# undef GCC_WARNING_DISABLED
std::integral_constant< std::size_t, i > index_constant
An index constant with value i.
Definition: indices.hh:30
I round(const T &val, typename EpsilonType< T >::Type epsilon)
round using epsilon
Definition: float_cmp.cc:311
I trunc(const T &val, typename EpsilonType< T >::Type epsilon)
truncate using epsilon
Definition: float_cmp.cc:407
Mask< V > mask(ADLTag< 0, std::is_same< V, Mask< V > >::value >, const V &v)
implements Simd::mask()
Definition: defaults.hh:153
bool allFalse(ADLTag< 0 >, const Mask &mask)
implements Simd::allFalse()
Definition: defaults.hh:124
bool allTrue(ADLTag< 0 >, const Mask &mask)
implements Simd::allTrue()
Definition: defaults.hh:104
bool anyFalse(ADLTag< 0 >, const Mask &mask)
implements Simd::anyFalse()
Definition: defaults.hh:114
auto min(ADLTag< 0 >, const V &v1, const V &v2)
implements binary Simd::min()
Definition: defaults.hh:89
auto max(ADLTag< 0 >, const V &v1, const V &v2)
implements binary Simd::max()
Definition: defaults.hh:81
bool anyTrue(const Mask &mask)
Whether any entry is true
Definition: interface.hh:429
V cond(M &&mask, const V &ifTrue, const V &ifFalse)
Like the ?: operator.
Definition: interface.hh:386
bool allTrue(const Mask &mask)
Whether all entries are true
Definition: interface.hh:439
bool anyFalse(const Mask &mask)
Whether any entry is false
Definition: interface.hh:449
constexpr std::size_t lanes()
Number of lanes in a SIMD type.
Definition: interface.hh:305
decltype(auto) lane(std::size_t l, V &&v)
Extract an element of a SIMD type.
Definition: interface.hh:324
Rebind< bool, V > Mask
Mask type type of some SIMD type.
Definition: interface.hh:289
bool allFalse(const Mask &mask)
Whether all entries are false
Definition: interface.hh:459
typename Overloads::ScalarType< std::decay_t< V > >::type Scalar
Element type of some SIMD type.
Definition: interface.hh:235
Some useful basic math stuff.
Dune namespace.
Definition: alignedallocator.hh:13
const T1 cond(bool b, const T1 &v1, const T2 &v2)
conditional evaluate
Definition: conditional.hh:28
Include file for users of the SIMD abstraction layer.
Traits for type conversions and type information.