DUNE-FEM (unstable)

flops.hh
1#ifndef DUNE_FEM_FLOPS_HH
2#define DUNE_FEM_FLOPS_HH
3
4#if HAVE_PAPI
5#include <papi.h>
6#endif
7
8//- system includes
9#include <iostream>
10#include <vector>
11#include <cassert>
12
13//- dune-fem includes
14#include <dune/fem/misc/mpimanager.hh>
15#include <dune/fem/misc/threads/threadsafevalue.hh>
16#include <dune/fem/storage/singleton.hh>
17
18namespace Dune {
19
20 namespace Fem {
21
22 // FlopCounter
23 // -----------
24
32 {
33 typedef std::vector< float > values_t ;
37
38 // call PAPI_flops for given values
39 void evaluateCounters( float& realTime,
40 float& procTime,
41 float& mFlops,
42 long long& flop )
43 {
44#if HAVE_PAPI
45 int retval = PAPI_flops(&realTime, &procTime, &flop, &mFlops);
46 if( retval < PAPI_OK )
47 {
48 std::cerr << "ERROR: PAPI_FP_OPS event is not available, check papi_avail!" << std::endl;
49 }
50#endif
51 }
52
53 // constructor
55 : values_( values_t(3, float(0.0)) ),
56 stopped_( 0 )
57 {
58 }
59
60 static unsigned long threadId ()
61 {
62 return MPIManager :: thread();
63 }
64
65 // initialize counters
66 void startCounter()
67 {
68 if( ! MPIManager :: singleThreadMode() )
69 {
70#if HAVE_PAPI
71 PAPI_thread_init( threadId );
72 PAPI_register_thread();
73#endif
74 }
75 float realtime, proctime, mflops;
76 long long flop ;
77 evaluateCounters( realtime, proctime, mflops, flop );
78 // mark as not stopped
79 *stopped_ = 0;
80 }
81
82 // stop counters and store values
83 void stopCounter()
84 {
85 if( *stopped_ == 0 )
86 {
87 // get reference to thread local value
88 values_t& values = *values_;
89 long long& flop = *flop_;
90 evaluateCounters( values[ 0 ], values[ 1 ], values[ 2 ], flop );
91
92 // mark thread as stopped
93 *stopped_ = 1 ;
94 }
95 }
96
97 // print values to given ostream, all values are gathered to
98 // the master rank
99 void printCounter( std::ostream& out ) const
100 {
101 // make sure this method is called in single thread mode only
102 assert( MPIManager :: singleThreadMode () );
103
104 int allStopped = 0 ;
105 const int threads = MPIManager :: maxThreads ();
106 for( int i=0; i<threads; ++i )
107 {
108 allStopped += stopped_[ i ];
109 }
110
111 // make sure all other thread have been stopped, otherwise
112 // the results wont be coorect
113 if( allStopped != threads )
114 DUNE_THROW(InvalidStateException,"Not all thread have been stopped");
115
116 typedef std::vector< double > result_t ;
117 result_t values( 5, 0.0 );
118
119 for( int i=0; i<3; ++i )
120 values[ i ] = values_[ 0 ][ i ];
121 values[ 3 ] = flop_[ 0 ];
122
123 // tkae maximum for times and sum flops for all threads
124 for( int i=1; i<threads; ++i )
125 {
126 values[ 0 ] = std::max( values[ 0 ], double(values_[ i ][ 0 ]) );
127 values[ 1 ] = std::max( values[ 1 ], double(values_[ i ][ 1 ]) );
128 values[ 2 ] += values_[ i ][ 2 ];
129 values[ 3 ] += flop_[ i ];
130 }
131 // convert to GFLOP
132 values[ 3 ] /= 1.0e9 ;
133 // compute mflops ourselfs
134 values[ 4 ] = values[ 3 ] / values[ 0 ];
135
136 result_t max( values );
137 result_t min( values );
138 result_t sum( values );
139
140 typedef MPIManager :: Communication Communication;
141 const Communication& comm = MPIManager :: comm();
142
143 const int size = max.size();
144 // compute max, min, and sum of flop values
145 comm.max( &max[ 0 ], size );
146 comm.min( &min[ 0 ], size );
147 comm.sum( &sum[ 0 ], size );
148
149 if( comm.rank() == 0 )
150 {
151 out << "FlopCounter::typ: real proc mflops flop flop/real " << std::endl;
152 printValues( out, "FlopCounter::sum: ", sum );
153 printValues( out, "FlopCounter::max: ", max );
154 printValues( out, "FlopCounter::min: ", min );
155 }
156 }
157
158 friend class Dune::Fem::Singleton< FlopCounter >;
159
160 static FlopCounter& instance()
161 {
163 }
164
165 public:
171 static void start( )
172 {
173 instance().startCounter();
174 }
175
177 static void stop( )
178 {
179 instance().stopCounter();
180 }
181
185 static void print( std::ostream& out )
186 {
187 instance().printCounter( out );
188 }
189
190 protected:
191 template <class vec_t>
192 void printValues( std::ostream& out, const std::string name, const vec_t& values ) const
193 {
194 out << name << " ";
195 for( unsigned int i=0; i<values.size(); ++i )
196 {
197 out << values[ i ] << " ";
198 }
199 out << std::endl;
200 }
201 };
202
203 } // namespace Fem
204} // namespace Dune
205#endif
Collective communication interface and sequential default implementation.
Definition: communication.hh:100
T max(const T &in) const
Compute the maximum of the argument over all processes and return the result in every process....
Definition: communication.hh:248
int rank() const
Return rank, is between 0 and size()-1.
Definition: communication.hh:114
T sum(const T &in) const
Compute the sum of the argument over all processes and return the result in every process....
Definition: communication.hh:188
T min(const T &in) const
Compute the minimum of the argument over all processes and return the result in every process....
Definition: communication.hh:228
A class wrapper for the function PAPI_flops from the package PAPI. The results are CPU time,...
Definition: flops.hh:32
static void start()
Start counters.
Definition: flops.hh:171
static void print(std::ostream &out)
print values to given ostream, all values are gathered to the master rank before printing
Definition: flops.hh:185
static void stop()
stop counters
Definition: flops.hh:177
return singleton instance of given Object type.
Definition: singleton.hh:93
static DUNE_EXPORT Object & instance(Args &&... args)
return singleton instance of given Object type.
Definition: singleton.hh:123
Default exception if a function was called while the object is not in a valid state for that function...
Definition: exceptions.hh:281
#define DUNE_THROW(E, m)
Definition: exceptions.hh:218
constexpr auto max
Function object that returns the greater of the given values.
Definition: hybridutilities.hh:484
constexpr auto min
Function object that returns the smaller of the given values.
Definition: hybridutilities.hh:506
Dune namespace.
Definition: alignedallocator.hh:13
constexpr std::integral_constant< std::size_t, sizeof...(II)> size(std::integer_sequence< T, II... >)
Return the size of the sequence.
Definition: integersequence.hh:75
Creative Commons License   |  Legal Statements / Impressum  |  Hosted by TU Dresden  |  generated with Hugo v0.111.3 (Nov 24, 23:30, 2024)