1#ifndef DUNE_FEM_FLOPS_HH
2#define DUNE_FEM_FLOPS_HH
14#include <dune/fem/misc/mpimanager.hh>
15#include <dune/fem/misc/threads/threadsafevalue.hh>
16#include <dune/fem/storage/singleton.hh>
33 typedef std::vector< float > values_t ;
39 void evaluateCounters(
float& realTime,
45 int retval = PAPI_flops(&realTime, &procTime, &flop, &mFlops);
46 if( retval < PAPI_OK )
48 std::cerr <<
"ERROR: PAPI_FP_OPS event is not available, check papi_avail!" << std::endl;
55 : values_( values_t(3,
float(0.0)) ),
60 static unsigned long threadId ()
62 return MPIManager :: thread();
68 if( ! MPIManager :: singleThreadMode() )
71 PAPI_thread_init( threadId );
72 PAPI_register_thread();
75 float realtime, proctime, mflops;
77 evaluateCounters( realtime, proctime, mflops, flop );
88 values_t& values = *values_;
89 long long& flop = *flop_;
90 evaluateCounters( values[ 0 ], values[ 1 ], values[ 2 ], flop );
99 void printCounter( std::ostream& out )
const
102 assert( MPIManager :: singleThreadMode () );
105 const int threads = MPIManager :: maxThreads ();
106 for(
int i=0; i<threads; ++i )
108 allStopped += stopped_[ i ];
113 if( allStopped != threads )
116 typedef std::vector< double > result_t ;
117 result_t values( 5, 0.0 );
119 for(
int i=0; i<3; ++i )
120 values[ i ] = values_[ 0 ][ i ];
121 values[ 3 ] = flop_[ 0 ];
124 for(
int i=1; i<threads; ++i )
126 values[ 0 ] = std::max( values[ 0 ],
double(values_[ i ][ 0 ]) );
127 values[ 1 ] = std::max( values[ 1 ],
double(values_[ i ][ 1 ]) );
128 values[ 2 ] += values_[ i ][ 2 ];
129 values[ 3 ] += flop_[ i ];
132 values[ 3 ] /= 1.0e9 ;
134 values[ 4 ] = values[ 3 ] / values[ 0 ];
136 result_t
max( values );
137 result_t
min( values );
138 result_t sum( values );
149 if( comm.
rank() == 0 )
151 out <<
"FlopCounter::typ: real proc mflops flop flop/real " << std::endl;
152 printValues( out,
"FlopCounter::sum: ", sum );
153 printValues( out,
"FlopCounter::max: ",
max );
154 printValues( out,
"FlopCounter::min: ",
min );
185 static void print( std::ostream& out )
191 template <
class vec_t>
192 void printValues( std::ostream& out,
const std::string name,
const vec_t& values )
const
195 for(
unsigned int i=0; i<values.size(); ++i )
197 out << values[ i ] <<
" ";
Collective communication interface and sequential default implementation.
Definition: communication.hh:100
T max(const T &in) const
Compute the maximum of the argument over all processes and return the result in every process....
Definition: communication.hh:248
int rank() const
Return rank, is between 0 and size()-1.
Definition: communication.hh:114
T sum(const T &in) const
Compute the sum of the argument over all processes and return the result in every process....
Definition: communication.hh:188
T min(const T &in) const
Compute the minimum of the argument over all processes and return the result in every process....
Definition: communication.hh:228
A class wrapper for the function PAPI_flops from the package PAPI. The results are CPU time,...
Definition: flops.hh:32
static void start()
Start counters.
Definition: flops.hh:171
static void print(std::ostream &out)
print values to given ostream, all values are gathered to the master rank before printing
Definition: flops.hh:185
static void stop()
stop counters
Definition: flops.hh:177
return singleton instance of given Object type.
Definition: singleton.hh:93
static DUNE_EXPORT Object & instance(Args &&... args)
return singleton instance of given Object type.
Definition: singleton.hh:123
Default exception if a function was called while the object is not in a valid state for that function...
Definition: exceptions.hh:281
#define DUNE_THROW(E, m)
Definition: exceptions.hh:218
constexpr auto max
Function object that returns the greater of the given values.
Definition: hybridutilities.hh:484
constexpr auto min
Function object that returns the smaller of the given values.
Definition: hybridutilities.hh:506
Dune namespace.
Definition: alignedallocator.hh:13
constexpr std::integral_constant< std::size_t, sizeof...(II)> size(std::integer_sequence< T, II... >)
Return the size of the sequence.
Definition: integersequence.hh:75