DUNE-FEM (unstable)

codegen.hh
1#ifndef DUNE_FEM_BASEFUNCTIONSETS_CODEGEN_HH
2#define DUNE_FEM_BASEFUNCTIONSETS_CODEGEN_HH
3
4#include <cassert>
5#include <cstdlib>
6#include <iostream>
7#include <fstream>
8#include <sstream>
9#include <vector>
10#include <set>
11#include <map>
12
14#include <dune/fem/io/io.hh>
15#include <dune/fem/io/parameter.hh>
16
17#include <dune/fem/quadrature/cachingquadrature.hh>
18#include <dune/fem/storage/singleton.hh>
19
20namespace Dune
21{
22
23 namespace Fem
24 {
25
26 namespace Codegen
27 {
28
29 class CodegenInfoFinished : public Dune :: Exception {};
30
31 class CodegenInfo
32 {
33 std::string path_;
34 std::string outFileName_;
35
36 int nopMax_;
37 int nopMin_;
38
39 int baseMax_;
40 int baseMin_;
41
42 typedef std::set< int > DimRangeSetType;
43 typedef std::set< void* > BaseSetPointerType;
44 DimRangeSetType savedimRanges_;
45 mutable DimRangeSetType dimRanges_;
46 BaseSetPointerType savedBaseSets_;
47
48 typedef void codegenfunc_t (std::ostream& out,
49 const int dim,
50 const int dimRange,
51 const size_t numRows,
52 const size_t numCols );
53
54 typedef std::pair< std::string, int > EntryType;
55 std::vector< EntryType > filenames_;
56
57 typedef std::vector< int > KeyType;
58 typedef std::set< KeyType > KeyStorageType;
59
60 typedef std::pair< int, std::pair< int, int > > EvalPairType ;
61 typedef std::set< EvalPairType > EvalSetType ;
62
63 std::map< codegenfunc_t* , KeyStorageType > entryMap_;
64 EvalSetType evalSet_;
65
66 public:
67 CodegenInfo()
68 : path_( Dune::Fem::Parameter::commonInputPath() + "/autogeneratedcode"),
69 outFileName_( "autogeneratedcode.hh" ),
70 nopMax_(0), nopMin_(0), baseMax_(0), baseMin_(0),
71 dimRanges_(), savedBaseSets_(), filenames_(),
72 entryMap_(),
73 evalSet_()
74 {
75 }
76
77 static CodegenInfo& instance()
78 {
80 }
81
83 void clear() {
84 savedBaseSets_.clear();
85 filenames_.clear();
86 dimRanges_.clear();
87 entryMap_.clear();
88 evalSet_.clear();
89 nopMax_ = 0;
90 nopMin_ = 0;
91 baseMax_ = 0;
92 baseMin_ = 0;
93 }
94
96 void setPath(const std::string& path ) { path_ = path ; }
97
99 void setFileName(const std::string& filename ) { outFileName_ = filename ; }
100
101 template <class BaseSet>
102 void addDimRange(const BaseSet* baseSet,
103 const int dimRange)
104 {
105 void* ptr = (void *) baseSet;
106 if( savedBaseSets_.find( ptr ) == savedBaseSets_.end() )
107 {
108 savedBaseSets_.insert( ptr );
109#ifndef NDEBUG
110 std::cout << "Add dimRange " << dimRange << std::endl;
111#endif
112 dimRanges_.insert( dimRange ) ;
113 }
114 }
115
116 void addEntry(const std::string& fileprefix,
117 codegenfunc_t* codegenfunc,
118 const int dim, const int dimRange, const int quadNop, const int numBase )
119 {
120 KeyStorageType& keyMap = entryMap_[ codegenfunc ];
121
122 typedef KeyStorageType :: iterator iterator;
123 KeyType key( 4 );
124 key[ 0 ] = dim;
125 key[ 1 ] = dimRange;
126 key[ 2 ] = quadNop;
127 key[ 3 ] = numBase;
128
129 // search for key, if already exists, then do nothing
130 iterator it = keyMap.find( key );
131 if( it != keyMap.end() ) return ;
132
133 // make sure dimRange was registered
134 assert( dimRanges_.find( dimRange ) != dimRanges_.end() );
135
136 // create directory to store files
137 createDirectory( path_ );
138
139 std::stringstream filename;
140 filename << fileprefix << dimRange << "_" << quadNop << "_" << numBase << ".hh";
141
142 // second check, if file exists, do nothing
143 int pos = exists( filename.str() );
144 if( pos >= 0 ) return;
145
146 std::string filenameToWrite( path_ + "/" + filename.str() );
147
148 // write code to string stream and compare with existing file
149 // if file does not exist, then write code to newly generated file
150 std::stringstream code;
151 // call code generation function
152 codegenfunc( code, dim, dimRange, quadNop, numBase );
153
154 bool writeCode = false ;
155
156 {
157 std::ifstream infile( filenameToWrite );
158 if( infile )
159 {
160 std::stringstream checkstr;
161 checkstr << infile.rdbuf();
162
163 // if both string are identical we can stop here
164 // and don't write the header thus avoiding recompilation
165 if( checkstr.str().compare( code.str() ) != 0 )
166 {
167 // if strings differ then write code
168 writeCode = true;
169 }
170 infile.close();
171 }
172 else
173 {
174 // if file does not exist, then write code
175 writeCode = true;
176 }
177 }
178
179 if( writeCode )
180 {
181 // if file does not exist, then write code to newly generated file
182 std::ofstream file( filenameToWrite, std::ios::out );
183 file << code.str();
184 file.close();
185#ifndef NDEBUG
186 std::cout << "Generate code " << fileprefix << " for (" << dimRange << ","
187 << quadNop << "," << numBase << ")" << std::endl;
188#endif
189 }
190
191 // insert evaluate pair in any case
192 // otherwise the lists with include files is wrong
193 EvalPairType evalPair ( dimRange, std::make_pair(quadNop, numBase) );
194 evalSet_.insert( evalPair );
195
196 if( baseMin_ == 0 ) baseMin_ = numBase;
197 if( nopMin_ == 0 ) nopMin_ = quadNop;
198
199 EntryType entry ( filename.str() , dimRange );
200 filenames_.push_back( entry );
201 nopMax_ = std::max( quadNop, nopMax_ );
202 nopMin_ = std::min( quadNop, nopMin_ );
203 baseMax_ = std::max( numBase, baseMax_ );
204 baseMin_ = std::min( numBase, baseMin_ );
205 }
206
207 void finalize () const
208 {
209 if( checkAbort() )
210 {
211 dumpInfo();
212 std::cerr << "All automated code generated, bye, bye !! " << std::endl;
213 DUNE_THROW(CodegenInfoFinished,"All automated code generated, bye, bye !!");
214 }
215 }
216
217
218 bool dumpInfo(const bool writeToCurrentDir = false ) const
219 {
220 if( writeToCurrentDir )
221 {
222 // write file with correct include to current directory
223 // this is usually not needed if include paths are correct
224 std::ofstream file( outFileName_.c_str() );
225 file << "#include \"" <<path_<< "/" << outFileName_ << "\"";
226 }
227
228 {
229 std::string filename( path_ + "/" + outFileName_ );
230 std::stringstream file;
231
232 file << "#ifdef CODEGEN_INCLUDEMAXNUMS" << std::endl;
233 file << "#ifndef CODEGEN_INCLUDEMAXNUMS_INCLUDED" << std::endl;
234 file << "#define CODEGEN_INCLUDEMAXNUMS_INCLUDED" << std::endl << std::endl;
235 file << "#define MAX_NUMBER_OF_QUAD_POINTS " << nopMax_ << std::endl;
236 file << "#define MIN_NUMBER_OF_QUAD_POINTS " << nopMin_ << std::endl;
237 file << "#define MAX_NUMBER_OF_BASE_FCT " << baseMax_ << std::endl;
238 file << "#define MIN_NUMBER_OF_BASE_FCT " << baseMin_ << std::endl << std::endl;
239#if 0
240 file << "/* include all headers with inner loop extern declarations */" << std::endl;
241 file << "#define CODEGEN_COMPILE_INNERLOOPS 1" << std::endl;
242 file << "namespace Dune {" << std::endl;
243 file << "namespace Fem {" << std::endl;
244 file << "namespace Codegen {" << std::endl;
245 for( size_t i = 0; i < filenames_.size(); ++i )
246 {
247 file << "#include \""<< filenames_[ i ].first << "\"" << std::endl;
248 }
249 file << "}}} // end namespaces" << std::endl;
250 file << "#undef CODEGEN_COMPILE_INNERLOOPS" << std::endl << std::endl;
251 file << "#include \"" << filename << ".c\"" <<std::endl;
252#endif
253
254 file << "#endif // CODEGEN_INCLUDEMAXNUMS_INCLUDED" << std::endl << std::endl;
255 file << "#elif defined CODEGEN_INCLUDEEVALCALLERS" << std::endl;
256 file << "#ifndef CODEGEN_EVALCALLERS_INCLUDED" << std::endl;
257 file << "#define CODEGEN_EVALCALLERS_INCLUDED" << std::endl << std::endl;
258 file << "namespace Dune {"<< std::endl;
259 file << "namespace Fem {"<< std::endl;
260 file << "namespace Codegen {"<< std::endl;
261 typedef EvalSetType :: iterator iterator ;
262 const iterator endit = evalSet_.end();
263 for( iterator it = evalSet_.begin(); it != endit; ++it )
264 {
265 int dimRange = it->first;
266 int quadNop = it->second.first;
267 int numBase = it->second.second;
268 file << " template <class Traits>" << std::endl;
269 file << " struct EvaluateImplementation< Traits, " << dimRange << " , " << quadNop << " , " << numBase << " >" << std::endl;
270 file << " : public EvaluateRealImplementation< Traits, " << dimRange << " , " << quadNop << " , " << numBase << " >" << std::endl;
271 file << " {" << std::endl;
272 file << " typedef EvaluateRealImplementation< Traits, " << dimRange << " , " << quadNop << " , " << numBase << " > BaseType;" << std::endl;
273 file << " typedef typename BaseType :: RangeVectorType RangeVectorType;" << std::endl;
274 file << " EvaluateImplementation( const RangeVectorType& rv ) : BaseType ( rv ) {}" << std::endl;
275 file << " };"<< std::endl;
276 file << std::endl;
277 }
278 file << "}}} // end namespaces"<< std::endl;
279 file << "#endif // CODEGEN_EVALCALLERS_INCLUDED" << std::endl << std::endl;
280 file << "#else" << std::endl << std::endl ;
281 file << "#ifndef CODEGEN_INCLUDE_IMPLEMENTATION" << std::endl;
282 file << "#define CODEGEN_INCLUDE_IMPLEMENTATION" << std::endl;
283 //file << "#undef CODEGEN_COMPILE_INNERLOOPS" << std::endl;
284 file << "namespace Dune {" << std::endl;
285 file << "namespace Fem {" << std::endl;
286 file << "namespace Codegen {" << std::endl;
287 for( size_t i = 0; i < filenames_.size(); ++i )
288 {
289 file << "#include \""<< filenames_[ i ].first << "\"" << std::endl;
290 }
291 file << "}}} // end namespaces" << std::endl;
292 file << "#endif // CODEGEN_INCLUDE_IMPLEMENTATION" << std::endl << std::endl;
293 file << "#endif // CODEGEN_INCLUDEMAXNUMS" << std::endl;
294
295 std::ifstream infile( filename );
296 if( infile )
297 {
298 std::stringstream checkstr;
299 checkstr << infile.rdbuf();
300
301 // if both string are identical we can stop here
302 // and don't write the header thus avoiding recompilation
303 if( checkstr.str().compare( file.str() ) == 0 )
304 return false;
305 }
306
307 std::ofstream outfile( filename );
308 outfile << file.str();
309 outfile.close();
310
311#if 0
312 // write C file with implementation of inner loop functions
313 filename += ".c";
314 std::ofstream Cfile( filename.c_str() );
315
316 Cfile << "#include <stdlib.h>" << std::endl;
317 Cfile << "/* include all headers with inner loop implementation */" << std::endl;
318 Cfile << "#define CODEGEN_COMPILE_INNERLOOPS 2" << std::endl;
319 for( size_t i = 0; i < filenames_.size(); ++i )
320 {
321 Cfile << "#include \""<< filenames_[ i ].first << "\"" << std::endl;
322 }
323#endif
324 return true;
325 }
326 }
327 protected:
328 int exists( const std::string& filename ) const
329 {
330 for( size_t i = 0; i < filenames_.size(); ++i )
331 {
332 if( filename == filenames_[ i ].first )
333 {
334 return i;
335 }
336 }
337 return -1;
338 }
339
340 bool checkAbort() const
341 {
342 DimRangeSetType found ;
343 bool canAbort = true ;
344 for( size_t i = 0; i < filenames_.size(); ++i )
345 {
346 found.insert( filenames_[ i ].second );
347 if ( filenames_[ i ].second > 0 )
348 {
349 canAbort = false ;
350 }
351 }
352 typedef DimRangeSetType :: iterator iterator ;
353 for( iterator it = found.begin(); it != found.end(); ++it )
354 {
355 dimRanges_.erase( *it );
356 }
357
358 if( canAbort && dimRanges_.size() == 0 )
359 return true ;
360 else
361 return false ;
362 }
363 };
364
366 template < int simdWidth = 4 >
368 {
369 static const char* restrictKey()
370 {
371 return "__restrict__";
372 }
373
374 static const char* doubletype()
375 {
376#ifdef COUNT_FLOPS
377 return "Dune::Fem::Double";
378#else
379 return "double";
380#endif
381 }
382
383 static void writePreCompHeader(std::ostream& out, const int stage )
384 {
385 const char* codegenPreCompVar = "CODEGEN_COMPILE_INNERLOOPS";
386 if( stage == -1 )
387 {
388 out << "#ifndef HEADERCHECK" << std::endl;
389 //out << "#if ! " << codegenPreCompVar << std::endl;
390 }
391 else if( stage == 0 )
392 {
393 out << "#else" << std::endl;
394 out << "#if " << codegenPreCompVar << " == 1" << std::endl;
395 out << "extern \"C\" {" << std::endl
396 << " extern inline" << std::endl;
397 out << "#endif" << std::endl;
398 }
399 else if( stage == 1 )
400 {
401 out << "#if " << codegenPreCompVar << " == 1" << std::endl;
402 out << " ;" << std::endl;
403 out << "}" << std::endl;
404 out << "#else" << std::endl;
405 }
406 else
407 {
408 //out << "#endif" << std::endl;
409 //out << "#endif" << std::endl;
410 out << "#endif" << std::endl;
411 }
412 }
413
414 // generate inner loop function name
415 static std::string generateFunctionName( const std::string& prefix,
416 const int simdW, const int dimRange,
417 const size_t numRows, const size_t numCols )
418 {
419 std::stringstream funcName;
420 funcName << prefix << "_" << simdWidth << "_" << dimRange << "_" << numRows << "_" << numCols ;
421 return funcName.str();
422 }
423
424 static void writeInnerLoopEval(std::ostream& out, const int simdW, const int dimRange, const size_t numRows, const size_t numCols )
425 {
426 out << " // Loop over all quadrature points" << std::endl;
427 out << " for(int qp = 0; qp < " << numRows << " ; ++qp )" << std::endl;
428 out << " {" << std::endl;
429 /*
430 if( simdW == 1 )
431 {
432 out << " const " << doubletype() << " phi0 = rangeStorage[ quad.cachingPoint( row ) * " << numCols << " + col ][ 0 ];" << std::endl;
433 }
434 else
435 {
436 for( int i = 0 ; i< simdW ; ++ i )
437 out << " const " << doubletype() << " phi" << i << " = base" << i << "[ row ];" << std::endl;
438 }
439 */
440 for( int i = 0 ; i< simdW ; ++ i )
441 out << " const " << doubletype() << " phi" << i << " = base" << i << "[ qp ];" << std::endl;
442 for(int r = 0; r < dimRange; ++ r )
443 {
444 out << " result" << r << "[ qp ] += phi0 * dof0" << r;
445 for( int i=1; i<simdW; ++i )
446 out << " + phi" << i << " * dof" << i << r;
447 out << " ;" << std::endl;
448 }
449 out << " }" << std::endl;
450 }
451
452 static void evaluateCodegen(std::ostream& out,
453 const int dim,
454 const int dimRange,
455 const size_t numRows,
456 const size_t numCols )
457 {
458 const std::string funcName =
459 generateFunctionName( "evalRangesLoop", simdWidth, dimRange, numRows, numCols );
460
461 writePreCompHeader( out, -1 );
462
463 out << "template <class BaseFunctionSet> // dimRange = "<< dimRange << ", quadNop = " << numRows << ", scalarBasis = " << numCols << std::endl;
464 out << "struct EvaluateRanges<BaseFunctionSet, EmptyGeometry, " << dimRange << ", " << numRows << ", " << numCols << ">" << std::endl;
465 out << "{" << std::endl;
466 out << " template< class QuadratureType,"<< std::endl;
467 out << " class RangeVectorType," << std::endl;
468 out << " class RangeFactorType," << std::endl;
469 out << " class LocalDofVectorType>" << std::endl;
470 out << " static void eval( const QuadratureType& quad," << std::endl;
471 out << " const RangeVectorType& rangeStorageTransposed," << std::endl;
472 out << " const LocalDofVectorType& dofs," << std::endl;
473 out << " RangeFactorType &rangeVector)" << std::endl;
474 out << " {" << std::endl;
475 //out << " typedef typename ScalarRangeType :: field_type field_type;" << std::endl;
476 //out << " typedef Field field_type;" << std::endl;
477 //out << " typedef typename RangeVectorType :: value_type value_type;" << std::endl;
478 //out << " typedef RangeType value_type;" << std::endl;
479
480 out << " typedef " << doubletype() << " Field;" << std::endl;
481 //out << " static Dune::Fem::ThreadSafeValue< std::vector< Field > > memory( " << numRows * dimRange << " );" << std::endl;
482 out << " static thread_local std::vector< Field > memory( " << numRows * dimRange << " );" << std::endl;
483
484 // make length simd conform
485 //out << " Field* resultTmp = (*memory).data();" << std::endl;
486 out << " Field* resultTmp = memory.data();" << std::endl;
487 out << " for( int i=0; i < " << numRows * dimRange << "; ++ i ) resultTmp[ i ] = 0;" << std::endl <<std::endl;
488
489 for(int r=0; r<dimRange ; ++r )
490 {
491 out << " Field* result" << r << " = resultTmp + " << r * numRows << " ;" << std::endl;
492 }
493 out << std::endl;
494
495 out << " const Field* baseData = rangeStorageTransposed.data();" << std::endl;
496 //for( int i=0; i< simdWidth; ++ i )
497 //{
498 // out << " Field base" << i << "[ " << numRows << " ];" << std::endl;
499 //}
500 out << std::endl;
501 const size_t simdCols = simdWidth * ( numCols / simdWidth );
502 if( simdCols > 0 )
503 {
504 out << " for( int col = 0, dof = 0 ; col < "<< simdCols <<" ; col += " << simdWidth << ", dof += " << simdWidth * dimRange<< " )"<<std::endl;
505 out << " {" << std::endl;
506
507 //out << " const value_type& rangeStorageRow = rangeStorage[ rowMap[ row ] ];" << std::endl;
508 //out << " for( int row = 0 ; row < " << numRows << " ; ++ row )" << std::endl;
509 //out << " {" << std::endl;
510
511 //out << " int idx = quad.cachingPoint( row ) * " << numCols << " + col;" << std::endl;
512 //for( int i=0; i< simdWidth; ++ i )
513 //{
514 // const char* plusplus = (i == simdWidth-1) ? " " : "++";
515 // out << " base" << i << "[ row ] = rangeStorage[ idx" << plusplus << " ][ 0 ];" << std::endl;
516 //}
517 //out << " }" << std::endl;
518
519
520 out << " " << funcName << "(" << std::endl;
521 out << " ";
522 for( int i = 0; i< simdWidth * dimRange; ++i )
523 out << " dofs[ dof + " << i << " ],";
524 out << std::endl;
525 for( int i=0; i< simdWidth; ++i )
526 out << " baseData + ((col + " << i << ") * " << numRows << ")," << std::endl;
527 out << " ";
528 for( int r = 0; r < dimRange-1; ++r)
529 out << "result" << r << ", ";
530 out << "result" << dimRange-1 << " );" << std::endl;
531 out << " }"<< std::endl;
532 out << std::endl;
533 }
534
535 if( numCols > simdCols )
536 {
537 out << " // remainder iteration" << std::endl;
538 out << " for( int col = " << simdCols << ", dof = " << simdCols * dimRange << " ; col < " << numCols << " ; ++col )" << std::endl;
539 out << " {" << std::endl;
540 for( int r=0; r<dimRange; ++r )
541 out << " const " << doubletype() << " dof0" << r << " = dofs[ dof++ ];" << std::endl;
542 out << " const Field* base0 = baseData + (col * " << numRows << ");" << std::endl;
543
544 writeInnerLoopEval( out, 1, dimRange, numRows, numCols );
545 out << " }" << std::endl;
546 out << std::endl;
547 }
548
549 out << " // store result" << std::endl;
550 out << " for(int row = 0; row < " << numRows << " ; ++row )" << std::endl;
551 out << " {" << std::endl;
552 out << " auto& result = rangeVector[ row ];" << std::endl;
553 for( int r = 0 ; r < dimRange; ++ r )
554 {
555 out << " result[ " << r << " ] = result" << r << "[ row ];" << std::endl;
556 }
557 out << " }" << std::endl;
558 out << " }" << std::endl << std::endl;
559 //out << "};" << std::endl;
560
561 //writePreCompHeader( out, 0 );
562 out << " static void " << funcName << "(" << std::endl;
563 for( int i=0; i<simdWidth; ++i )
564 {
565 out << " ";
566 for( int r=0; r<dimRange; ++ r )
567 {
568 if( r > 0 ) out << " ";
569 out << "const " << doubletype() << " dof"<< i << r << ",";
570 }
571 out << std::endl;
572 }
573 for( int i=0; i<simdWidth; ++ i )
574 out << " const " << doubletype() << "* " << restrictKey() << " base" << i << "," << std::endl;
575 for( int r=0; r<dimRange; ++ r )
576 {
577 out << " " << doubletype() << "* "<< restrictKey() << " result" << r;
578 if( r == dimRange-1 ) out << " )" << std::endl;
579 else out << "," << std::endl;
580 }
581
582 //writePreCompHeader( out, 1 );
583 out << " {" << std::endl;
584 writeInnerLoopEval( out, simdWidth, dimRange, numRows, numCols );
585 out << " }" << std::endl;
586 out << "};" << std::endl;
587 writePreCompHeader( out, 2 );
588 }
589
590 static void writeInnerLoop(std::ostream& out, const int simdW, const int dimRange, const size_t numCols )
591 {
592 for( int i=0; i< simdW; ++i )
593 {
594 for( int r=0; r< dimRange; ++r )
595 {
596 out << " const " << doubletype() << " fac" << i << r << " = rangeFactor" << i << "[ " << r << " ];" << std::endl;
597 }
598 }
599 if( simdW == 1 )
600 out << " int rowCol = quad.cachingPoint( row ) * " << numCols <<";" << std::endl;
601 out << " for(int col = 0 ; col < " << numCols << " ; ++ col";
602 if( simdW == 1 )
603 out << ", ++rowCol";
604 out << " )" << std::endl;
605 out << " {" << std::endl;
606 for( int i = 0 ; i< simdW ; ++ i )
607 {
608 if( simdW == 1 )
609 out << " const " << doubletype() << " phi" << i << " = rangeStorage[ rowCol ][ 0 ];" << std::endl;
610 else
611 out << " const " << doubletype() << " phi" << i << " = base" << i << " [ col ];" << std::endl;
612 }
613 for(int r = 0; r < dimRange; ++ r )
614 {
615 out << " dofs" << r << "[ col ] += phi0 * fac0" << r;
616 for( int i=1; i<simdW; ++i )
617 {
618 out << " + phi" << i << " * fac" << i << r ;
619 }
620 out << " ;" << std::endl;
621 }
622 out << " }" << std::endl;
623 }
624
625 static void axpyCodegen(std::ostream& out,
626 const int dim, const int dimRange, const size_t numRows, const size_t numCols )
627 {
628 const std::string funcName =
629 generateFunctionName( "axpyRangesLoop", simdWidth, dimRange, numRows, numCols );
630
631 writePreCompHeader( out, -1 );
632
633 out << "template <class BaseFunctionSet> // dimRange = "<< dimRange << ", quadNop = " << numRows << ", scalarBasis = " << numCols << std::endl;
634 out << "struct AxpyRanges<BaseFunctionSet, EmptyGeometry, " << dimRange << ", " << numRows << ", " << numCols << ">" << std::endl;
635 out << "{" << std::endl;
636
637 out << std::endl;
638 out << " template< class QuadratureType,"<< std::endl;
639 out << " class RangeVectorType," << std::endl;
640 out << " class RangeFactorType," << std::endl;
641 out << " class LocalDofVectorType>" << std::endl;
642 out << " static void axpy( const QuadratureType& quad," << std::endl;
643 out << " const RangeVectorType& rangeStorage," << std::endl;
644 out << " const RangeFactorType& rangeFactors," << std::endl;
645 out << " LocalDofVectorType& dofs)" << std::endl;
646 out << " {" << std::endl;
647
649 // axpy
651
652 //out << " typedef RangeType value_type;" << std::endl;
653 //out << " typedef typename ScalarRangeType :: field_type field_type;" << std::endl;
654
655 out << " typedef " << doubletype() << " Field;" << std::endl;
656 out << " static thread_local std::vector< Field > memory( " << numCols * dimRange << " );" << std::endl;
657 //out << " static Dune::Fem::ThreadSafeValue< std::vector< Field > > memory( " << numCols * dimRange << " );" << std::endl;
658
659 //out << " " << doubletype() << "* dofResult = (*memory).data();" << std::endl;
660 out << " " << doubletype() << "* dofResult = memory.data();" << std::endl;
661 out << " for( int i=0; i < " << numCols * dimRange << "; ++i ) dofResult[ i ] = 0;" << std::endl << std::endl;
662
663 out << " " << doubletype() << "* dofs0 = dofResult;" << std::endl;
664 for( int r = 1; r < dimRange; ++ r )
665 out << " " << doubletype() << "* dofs" << r << " = dofResult + " << r * numCols << ";" << std::endl;
666 out << std::endl;
667
668 const size_t simdRows = simdWidth * (numRows / simdWidth) ;
669
670 if( simdRows > 0 )
671 {
672 out << " for( int row = 0; row < "<< simdRows << " ; row += " << int(simdWidth) << " )" << std::endl;
673 out << " {" << std::endl;
674 for( int i=0; i<simdWidth; ++ i )
675 out << " const " << doubletype() << "* rangeFactor" << i << " = &rangeFactors[ row + " << i << " ][ 0 ];" << std::endl;
676 out << " " << funcName << "(";
677 for( int i = 0; i < simdWidth; ++i )
678 {
679 if( i>0 )
680 out << " ";
681 out << " &rangeStorage[ quad.cachingPoint( row + " << i << " ) * " << numCols << " ][ 0 ]," << std::endl;
682 }
683 out << " rangeFactor0, ";
684 for( int i=1; i<simdWidth; ++ i )
685 out << "rangeFactor" << i << ",";
686 out << std::endl;
687 out << " ";
688 for( int r = 0; r < dimRange; ++ r )
689 {
690 out << " dofs" << r ;
691 if( r == dimRange-1 )
692 out << " );" << std::endl;
693 else
694 out << ",";
695 }
696 out << std::endl;
697 out << " }" << std::endl;
698 out << std::endl;
699 }
700
701 if( numRows > simdRows )
702 {
703 out << " // remainder iteration" << std::endl;
704 out << " for( int row = " << simdRows << " ; row < " << numRows << " ; ++row )" << std::endl;
705 out << " {" << std::endl;
706 out << " const " << doubletype() << "* rangeFactor0 = &rangeFactors[ row ][ 0 ];" << std::endl;
707 writeInnerLoop( out, 1, dimRange, numCols );
708 out << " }" << std::endl;
709 out << std::endl;
710 }
711
712 out << " // sum up results (transform from variable based to point based layout)" << std::endl;
713 out << " for( int col = 0, dof = 0 ; col < "<< numCols << " ; ++col )" << std::endl;
714 out << " {" << std::endl;
715 for( int r = 0 ; r < dimRange; ++ r )
716 out << " dofs[ dof++ ] += dofs" << r << "[ col ];" << std::endl;
717 out << " }" << std::endl;
718
719 out << " }" << std::endl << std::endl;
720 //out << "};" << std::endl;
721
723 // inner loop
725 //writePreCompHeader( out, 0 );
726 out << " static void " << funcName << "(" << std::endl;
727 out << " const " << doubletype() << "* " << restrictKey() << " base0," << std::endl;
728 for( int i=1; i<simdWidth; ++ i )
729 out << " const " << doubletype() << "* " << restrictKey() << " base" << i << "," << std::endl;
730 for( int i=0; i<simdWidth; ++ i )
731 out << " const " << doubletype() << "* " << restrictKey() << " rangeFactor" << i << "," << std::endl;
732 for( int r = 0; r < dimRange; ++r )
733 {
734 out << " " << doubletype() << "* " << restrictKey() << " dofs" << r;
735 if( r == dimRange-1 )
736 out << " )" << std::endl;
737 else
738 out << "," << std::endl;
739 }
740 //writePreCompHeader( out, 1 );
741 out << " {" << std::endl;
742 writeInnerLoop( out, simdWidth, dimRange, numCols );
743 out << " }" << std::endl;
744 out << "};" << std::endl;
745 writePreCompHeader( out, 2 );
746 }
747
748 static void writeInnerJacEvalLoop(std::ostream& out, const int simdW, const int dim,
749 const int dimRange, const size_t numRows, const size_t numCols )
750 {
751 out << " for(int row = 0; row < " << numRows << " ; ++row )" << std::endl;
752 out << " {" << std::endl;
753 if( simdW == 1 )
754 {
755 /*
756 out << " int idx = quad.cachingPoint( row ) * " << numCols << " + col ;" << std::endl;
757 out << " const GeometryJacobianType& gjit = geometry.jacobianInverseTransposed( quad.point( row ) );" << std::endl << std::endl;
758 for( int i = 0 ; i< simdW ; ++ i )
759 {
760 const char* plusplus = (i == simdW-1) ? " " : "++";
761 out << " gjit.mv( jacStorage[ idx" << plusplus << " ][ 0 ], gradPhi" << i << " );" << std::endl;
762 for( int d = 0 ; d < dim; ++ d )
763 out << " const " << doubletype() << " phi" << i << d << " = gradPhi" << i << "[ " << d << " ];" << std::endl;
764 }
765 */
766 /*
767 for( int i = 0 ; i< simdW ; ++ i )
768 {
769 const char* plusplus = (i == simdW-1) ? " " : "++";
770 out << " gradPhi" << i << " = jacStorage[ idx" << plusplus << " ][ 0 ];" << std::endl;
771 //out << " gjit.mv( jacStorage[ idx" << plusplus << " ][ 0 ], gradPhi" << i << " );" << std::endl;
772 for( int d = 0 ; d < dim; ++ d )
773 out << " const " << doubletype() << " phi" << i << d << " = gradPhi" << i << "[ " << d << " ];" << std::endl;
774 }
775 */
776 // out << " const "<< doubletype() << "* base" << i << " = jacobians + (" << dim * numRows << " * (col + "<< i << "));" << std::endl;
777 for( int i = 0 ; i< simdW ; ++ i )
778 {
779 //out << " gjit.mv( jacStorage[ idx" << plusplus << " ][ 0 ], gradPhi" << i << " );" << std::endl;
780 for( int d = 0 ; d < dim; ++ d )
781 out << " const " << doubletype() << " phi" << i << d << " = base" << i << "[ row * " << dim << " + " << d << " ];" << std::endl;
782 }
783 out << std::endl;
784 }
785 else
786 {
787 out << " const int idx = row * " << dim<< ";" << std::endl;
788 for( int d = 0; d < dim ; ++ d )
789 {
790 for( int i = 0 ; i< simdW ; ++ i )
791 out << " const " << doubletype() << " phi" << i << d << " = base" << i << "[ idx + " << d << " ];" << std::endl;
792 }
793 }
794 for( int d = 0; d < dim ; ++ d )
795 {
796 for(int r = 0; r < dimRange; ++ r )
797 {
798 out << " result" << r << d << "[ row ] += phi0" << d << " * dof0" << r;
799 for( int i=1; i<simdW; ++i )
800 out << " + phi" << i << d << " * dof" << i << r;
801 out << " ;" << std::endl;
802 }
803 }
804 out << " }" << std::endl;
805 }
806
807 static void evaluateJacobiansCodegen(std::ostream& out,
808 const int dim, const int dimRange, const size_t numRows, const size_t numCols )
809 {
810 const std::string funcName =
811 generateFunctionName( "evalJacobianLoop", simdWidth, dimRange, numRows, numCols );
812
813 writePreCompHeader( out, -1 );
814
815 out << "template <class BaseFunctionSet> // dimRange = "<< dimRange << ", quadNop = " << numRows << ", scalarBasis = " << numCols << std::endl;
816 out << "struct EvaluateJacobians<BaseFunctionSet, EmptyGeometry, " << dimRange << ", " << numRows << ", " << numCols << ">" << std::endl;
817 out << "{" << std::endl;
818 out << " template< class QuadratureType,"<< std::endl;
819 out << " class JacobianRangeVectorType," << std::endl;
820 out << " class LocalDofVectorType," << std::endl;
821 out << " class JacobianRangeFactorType>" << std::endl;
822 out << " static void eval( const QuadratureType&," << std::endl;
823 out << " const EmptyGeometry&," << std::endl;
824 out << " const JacobianRangeVectorType&," << std::endl;
825 out << " const LocalDofVectorType&," << std::endl;
826 out << " JacobianRangeFactorType &)" << std::endl;
827 out << " {" << std::endl;
828 out << " std::cerr << \"ERROR: wrong code generated for VectorialBaseFunctionSet::axpyJacobians\" << std::endl;" << std::endl;
829 out << " abort();" << std::endl;
830 out << " }" << std::endl;
831 out << "};" << std::endl << std::endl;
832 out << "template <class BaseFunctionSet, class Geometry> // dimRange = "<< dimRange << ", quadNop = " << numRows << ", scalarBasis = " << numCols << std::endl;
833 out << "struct EvaluateJacobians<BaseFunctionSet, Geometry, " << dimRange << ", " << numRows << ", " << numCols << ">" << std::endl;
834 out << "{" << std::endl;
835 out << " template< class QuadratureType,"<< std::endl;
836 out << " class JacobianRangeVectorType," << std::endl;
837 out << " class LocalDofVectorType," << std::endl;
838 out << " class JacobianRangeFactorType>" << std::endl;
839 out << " static void eval( const QuadratureType& quad," << std::endl;
840 out << " const Geometry& geometry," << std::endl;
841 out << " const JacobianRangeVectorType& jacStorage," << std::endl;
842 out << " const LocalDofVectorType& dofs," << std::endl;
843 out << " JacobianRangeFactorType& jacFactors)" << std::endl;
844 out << " {" << std::endl;
845 out << " evalJac( quad, geometry, jacStorage, dofs, jacFactors, jacFactors[ 0 ] );" << std::endl;
846 out << " }" << std::endl;
847 out << "private:" << std::endl;
848 out << " template< class QuadratureType,"<< std::endl;
849 out << " class JacobianRangeVectorType," << std::endl;
850 out << " class LocalDofVectorType," << std::endl;
851 out << " class JacobianRangeFactorType," << std::endl;
852 out << " class GlobalJacobianRangeType>" << std::endl;
853 out << " static void evalJac( const QuadratureType& quad," << std::endl;
854 out << " const Geometry& geometry," << std::endl;
855 out << " const JacobianRangeVectorType& jacStorage," << std::endl;
856 out << " const LocalDofVectorType& dofs," << std::endl;
857 out << " JacobianRangeFactorType& jacVector," << std::endl;
858 out << " const GlobalJacobianRangeType&)" << std::endl;
859 out << " {" << std::endl;
860 //out << " typedef typename JacobianRangeVectorType :: value_type value_type;" << std::endl;
861 //out << " typedef JacobianRangeType value_type;" << std::endl;
862 //out << " typedef typename JacobianRangeType :: field_type field_type;" << std::endl;
863 out << " typedef typename Geometry::JacobianInverseTransposed GeometryJacobianType;" << std::endl;
864
865 const size_t nDofs = numRows * dimRange * dim ;
866 out << " typedef " << doubletype() << " Field;" << std::endl;
867 out << " static thread_local std::vector< Field > memory( " << nDofs << " );" << std::endl;
868 //out << " static Dune::Fem::ThreadSafeValue< std::vector< Field > > memory( " << nDofs << " );" << std::endl;
869
870 for( int d = 0; d < dim ; ++ d )
871 {
872 // make length simd conform
873 //out << " Field* resultTmp" << d << " = (*memory).data() + " << d * numRows * dimRange << ";" << std::endl;
874 out << " Field* resultTmp" << d << " = memory.data() + " << d * numRows * dimRange << ";" << std::endl;
875 }
876 out << " for( int i=0; i<" << numRows * dimRange << "; ++i ) " << std::endl;
877 out << " {" << std::endl;
878 for( int d = 0; d < dim ; ++ d )
879 out << " resultTmp" << d << "[ i ] = 0;" << std::endl;
880 out << " }" << std::endl << std::endl;
881
882 for( int d = 0; d < dim ; ++ d )
883 {
884 for(int r=0; r<dimRange ; ++r )
885 {
886 out << " Field* result" << r << d <<" = resultTmp" << d << " + " << r * numRows << " ;" << std::endl;
887 }
888 }
889 out << std::endl;
890
891 const size_t simdNumCols = simdWidth * ( numCols / simdWidth );
892 out << " typedef typename GlobalJacobianRangeType :: row_type JacobianRangeType;" << std::endl;
893 out << " const " << doubletype() << "* jacobians = jacStorage.data();" << std::endl;
894 if( simdNumCols > 0 )
895 {
896 /*
897 for( int d = 0; d < dim; ++d )
898 {
899 for( int i=0; i< simdWidth; ++ i )
900 {
901 out << " Field* base" << i << d << " = memory.data() + " << nDofs + ((d * simdWidth) + i) * numRows << ";" << std::endl;
902 }
903 }
904 */
905
906 //for( int i=0; i< simdWidth; ++ i )
907 //out << " JacobianRangeType gradPhi" << i << ";" << std::endl;
908
909 out << " for( int col = 0, dof = 0 ; col < "<< simdNumCols <<" ; col += " << simdWidth << ", dof += " << simdWidth * dimRange<< " )"<<std::endl;
910 out << " {" << std::endl;
911 /*
912 out << " for( int row = 0; row < " << numRows << " ; ++ row )" << std::endl;
913 out << " {" << std::endl;
914 out << " int idx = quad.cachingPoint( row ) * " << numCols << " + col ;" << std::endl;
915 out << " // use reference to GeometryJacobianType to make code compile with SPGrid Geometry" << std::endl;
916 */
917 //out << " const GeometryJacobianType& gjit = geometry.jacobianInverseTransposed( quad.point( row ) );" << std::endl << std::endl;
918 /*
919 for( int i=0; i< simdWidth; ++ i )
920 {
921 const char* plusplus = (i == simdWidth-1) ? " " : "++";
922 out << " gjit.mv( jacStorage[ idx" << plusplus << " ][ 0 ], gradPhi" << i << " );" << std::endl;
923 }
924 */
925 /*
926 for( int i=0; i< simdWidth; ++ i )
927 {
928 const char* plusplus = (i == simdWidth-1) ? " " : "++";
929 out << " gradPhi" << i << " = jacStorage[ idx" << plusplus << " ][ 0 ];" << std::endl;
930 }
931 out << std::endl;
932 */
933 /*
934 for( int d = 0; d < dim; ++ d )
935 {
936 for( int i=0; i< simdWidth; ++ i )
937 {
938 out << " base" << i << d << "[ row ] = gradPhi"<< i << "[ " << d << " ];" << std::endl;
939 }
940 }
941 out << " }" << std::endl << std::endl;
942 */
943 for( int i=0; i< simdWidth; ++ i )
944 {
945 out << " const "<< doubletype() << "* base" << i << " = jacobians + (" << dim * numRows << " * (col + "<< i << "));" << std::endl;
946 }
947
948 out << " " << funcName << "(";
949 for( int i = 0; i< simdWidth * dimRange; ++i )
950 out << " dofs[ dof + " << i << " ],";
951 out << std::endl << " ";
952 for( int i=0; i< simdWidth; ++i )
953 out << "base" << i << ", ";
954 out << std::endl << " ";
955 for( int d = 0; d < dim; ++ d )
956 {
957 for( int r = 0; r < dimRange; ++r)
958 {
959 out << "result" << r << d;
960 if( (r == dimRange - 1) && (d == dim-1 ) ) out << std::endl;
961 else out << ", ";
962 }
963 }
964 out << " );" << std::endl;
965 out << " }"<< std::endl;
966 out << std::endl;
967 }
968
969 // remainder iteration
970 if( numCols > simdNumCols )
971 {
972 out << " // remainder iteration" << std::endl;
973 out << " for( int col = " << simdNumCols << ", dof = " << simdNumCols * dimRange << " ; col < " << numCols << " ; ++col )" << std::endl;
974 out << " {" << std::endl;
975 out << " const "<< doubletype() << "* base0" << " = jacobians + (" << dim * numRows << " * col);" << std::endl;
976 for( int r=0; r<dimRange; ++r )
977 out << " const " << doubletype() << " dof0" << r << " = dofs[ dof++ ];" << std::endl;
978 writeInnerJacEvalLoop( out, 1, dim, dimRange, numRows, numCols );
979 out << " }" << std::endl;
980 out << std::endl;
981 }
982
983 out << " // store result" << std::endl;
984 out << " JacobianRangeType tmp;" << std::endl;
985 out << " for(int row = 0; row < " << numRows << " ; ++row )" << std::endl;
986 out << " {" << std::endl;
987 out << " const GeometryJacobianType& gjit = geometry.jacobianInverseTransposed( quad.point( row ) );" << std::endl << std::endl;
988 out << " GlobalJacobianRangeType& result = jacVector[ row ];" << std::endl;
989 for( int r = 0 ; r < dimRange; ++ r )
990 {
991 for( int d = 0 ; d < dim; ++ d )
992 {
993 out << " tmp[ " << d << " ] = result" << r << d << "[ row ];" << std::endl;
994 /*
995 {
996 out << " result[ " << r << " ][ " << d <<" ] = result" << r << d << "[ row ];" << std::endl;
997 }
998 */
999 }
1000 out << " gjit.mv( tmp, result[ "<< r << " ] );" << std::endl;
1001 }
1002 out << " }" << std::endl;
1003 out << " }" << std::endl << std::endl;
1004 //out << "};" << std::endl;
1005
1006 //writePreCompHeader( out, 0 );
1007 out << " static void " << funcName << "(" << std::endl;
1008 for( int i=0; i<simdWidth; ++i )
1009 {
1010 out << " ";
1011 for( int r=0; r<dimRange; ++ r )
1012 out << " const " << doubletype() << " dof"<< i << r << ",";
1013 out << std::endl;
1014 }
1015 for( int i=0; i<simdWidth; ++ i )
1016 out << " const " << doubletype() << "* " << restrictKey() << " base" << i << "," << std::endl;
1017 for( int d=0; d<dim; ++ d )
1018 {
1019 for( int r=0; r<dimRange; ++ r )
1020 {
1021 out << " " << doubletype() << "* "<< restrictKey() << " result" << r << d;
1022 if( (r == dimRange - 1) && (d == dim-1 ) ) out << " )" << std::endl;
1023 else out << "," << std::endl;
1024 }
1025 }
1026
1027 //writePreCompHeader( out, 1 );
1028 out << " {" << std::endl;
1029 writeInnerJacEvalLoop( out, simdWidth, dim, dimRange, numRows, numCols );
1030 out << " }" << std::endl;
1031 out << "};" << std::endl;
1032 writePreCompHeader( out, 2 );
1033 }
1034
1035 static void writeInnerLoopAxpyJac(std::ostream& out, const int dim, const int dimRange, const size_t numCols )
1036 {
1037 out << " for( int col = 0; col < " << numCols << " ; ++col )" << std::endl;
1038 out << " {" << std::endl;
1039 for( int d =0; d < dim; ++d )
1040 out << " const " << doubletype() << " phi" << d << " = base[ (col * " << dim << ") + " << d << " ];" << std::endl;
1041
1042 for( int r = 0; r < dimRange; ++r )
1043 {
1044 out << " result" << r << "[ col ] += phi0 * jacFactorInv0" << r;
1045 for( int d=1; d < dim; ++d )
1046 out << " + phi" << d << " * jacFactorInv" << d << r;
1047 out << ";" << std::endl;
1048 }
1049 out << " }" << std::endl;
1050 }
1051
1052 static void axpyJacobianCodegen(std::ostream& out,
1053 const int dim, const int dimRange, const size_t numRows, const size_t numCols )
1054 {
1055 const std::string funcName =
1056 generateFunctionName( "axpyJacobianLoop", simdWidth, dimRange, numRows, numCols );
1057
1058 writePreCompHeader( out, -1 );
1059
1060 out << "template <class BaseFunctionSet> // dimRange = "<< dimRange << ", quadNop = " << numRows << ", scalarBasis = " << numCols << std::endl;
1061 out << "struct AxpyJacobians<BaseFunctionSet, EmptyGeometry, " << dimRange << ", " << numRows << ", " << numCols << ">" << std::endl;
1062 out << "{" << std::endl;
1063 out << " template< class QuadratureType,"<< std::endl;
1064 out << " class JacobianRangeVectorType," << std::endl;
1065 out << " class JacobianRangeFactorType," << std::endl;
1066 out << " class LocalDofVectorType>" << std::endl;
1067 out << " static void axpy( const QuadratureType&," << std::endl;
1068 out << " const EmptyGeometry&," << std::endl;
1069 out << " const JacobianRangeVectorType&," << std::endl;
1070 out << " const JacobianRangeFactorType&," << std::endl;
1071 out << " LocalDofVectorType&)" << std::endl;
1072 out << " {" << std::endl;
1073 out << " std::cerr << \"ERROR: wrong code generated for VectorialBaseFunctionSet::axpyJacobians\" << std::endl;" << std::endl;
1074 out << " abort();" << std::endl;
1075 out << " }" << std::endl;
1076 out << "};" << std::endl << std::endl;
1077 out << "template <class BaseFunctionSet, class Geometry>" << std::endl;
1078 out << "struct AxpyJacobians<BaseFunctionSet, Geometry, " << dimRange << ", " << numRows << ", " << numCols << ">" << std::endl;
1079 out << "{" << std::endl;
1080
1081 out << " template< class QuadratureType,"<< std::endl;
1082 out << " class JacobianRangeVectorType," << std::endl;
1083 out << " class JacobianRangeFactorType," << std::endl;
1084 out << " class LocalDofVectorType>" << std::endl;
1085 out << " static void axpy( const QuadratureType& quad," << std::endl;
1086 out << " const Geometry& geometry," << std::endl;
1087 out << " const JacobianRangeVectorType& jacStorage," << std::endl;
1088 out << " const JacobianRangeFactorType& jacFactors," << std::endl;
1089 out << " LocalDofVectorType& dofs)" << std::endl;
1090 out << " {" << std::endl;
1091 out << " typedef typename JacobianRangeFactorType :: value_type JacobianRangeType;" << std::endl << std::endl;
1092
1093 const size_t dofs = dimRange * numCols ;
1094 out << " typedef " << doubletype() << " Field;" << std::endl;
1095 out << " static thread_local std::vector< Field > memory( " << dofs << " );" << std::endl;
1096 //out << " static Dune::Fem::ThreadSafeValue< std::vector< Field > > memory( " << dofs << " );" << std::endl;
1097
1098 //out << " Field* result = (*memory).data();" << std::endl;
1099 out << " Field* result = memory.data();" << std::endl;
1100 out << " for( int i = 0 ; i < " << dofs << "; ++i ) result[ i ] = 0;" << std::endl << std::endl;
1101
1102 for( int r=0; r<dimRange; ++r )
1103 out << " Field* result" << r << " = result + " << r * numCols << ";" << std::endl;
1104 out << std::endl;
1105
1106 //for( int d =0; d < dim; ++d )
1107 //{
1108 // out << " Field* base"<< d << " = result + " << dofs + d * numCols << ";" << std::endl;
1109 //}
1110
1111 out << " const Field* base = jacStorage.data();" << std::endl << std::endl;
1112 out << " JacobianRangeType jacFactorTmp;" << std::endl;
1113 out << " for( int row = 0; row < " << numRows << " ; ++ row )" << std::endl;
1114 out << " {" << std::endl;
1115 out << " typedef typename Geometry::JacobianInverseTransposed GeometryJacobianType;" << std::endl;
1116 out << " // use reference to GeometryJacobianType to make code compile with SPGrid Geometry" << std::endl;
1117 out << " const GeometryJacobianType& gjit = geometry.jacobianInverseTransposed( quad.point( row ) );" << std::endl << std::endl;
1118 out << " for( int r = 0; r < " << dimRange << " ; ++r )" << std::endl;
1119 out << " {"<<std::endl;
1120 out << " gjit.mtv( jacFactors[ row ][ r ], jacFactorTmp[ r ] );" << std::endl;
1121 out << " }" << std::endl << std::endl;
1122
1123 /*
1124 out << " for( int col = 0, rowCol = quad.cachingPoint( row ) * " << numCols << "; col < " << numCols << " ; ++ col, ++rowCol )" << std::endl;
1125 out << " {" << std::endl;
1126 for( int d =0; d < dim; ++d )
1127 out << " base"<< d << "[ col ] = jacStorage[ rowCol ][ 0 ][ " << d << " ];" << std::endl;
1128
1129 out << " }" << std::endl;
1130 */
1131
1132 out << " // calculate updates" << std::endl;
1133 out << " " << funcName << "(";
1134 //for( int d =0; d < dim; ++d ) out << "base" << d << ", ";
1135 out << " base + ( quad.localCachingPoint( row ) * " << numCols * dim << " )," << std::endl;
1136 for( int i =0; i < dim; ++i )
1137 {
1138 out << " ";
1139 for( int r = 0; r < dimRange; ++ r )
1140 out << " jacFactorTmp[ " << r << " ][ " << i << " ], ";
1141 out << std::endl;
1142 }
1143 out << " ";
1144 for( int r = 0; r < dimRange; ++ r )
1145 {
1146 out << " result" << r;
1147 if( r == dimRange -1 )
1148 out << " );" << std::endl ;
1149 else
1150 out << ", ";
1151 }
1152 out << " }" << std::endl << std::endl;
1153
1154 out << " // sum up results (transform from variable based to point based layout)" << std::endl;
1155 out << " for( int col = 0, dof = 0 ; col < "<< numCols << " ; ++col )" << std::endl;
1156 out << " {" << std::endl;
1157 for( int r = 0 ; r < dimRange; ++ r )
1158 out << " dofs[ dof++ ] += result" << r << "[ col ];" << std::endl;
1159 out << " }" << std::endl;
1160
1161 out << " }" << std::endl << std::endl;
1162 //out << "};" << std::endl;
1163
1164
1166 // inner loop
1168 //writePreCompHeader( out, 0 );
1169
1170 out << " static void " << funcName << "(" << std::endl;
1171 out << " const " << doubletype() << "* " << restrictKey() << " base," << std::endl;
1172 //for( int i=1; i<dim; ++ i )
1173 // out << " const " << doubletype() << "* " << restrictKey() << " base" << i << "," << std::endl;
1174 for( int i=0; i<dim; ++i )
1175 {
1176 out << " ";
1177 for( int r=0; r<dimRange; ++ r )
1178 out << "const " << doubletype() << " jacFactorInv"<< i << r << ", ";
1179 out << std::endl;
1180 }
1181 for( int r = 0; r < dimRange; ++r )
1182 {
1183 out << " " << doubletype() << "* " << restrictKey() << " result" << r;
1184 if( r == dimRange-1 )
1185 out << " )" << std::endl;
1186 else
1187 out << "," << std::endl;
1188 }
1189 //writePreCompHeader( out, 1 );
1190 out << " {" << std::endl;
1191 writeInnerLoopAxpyJac( out, dim, dimRange, numCols );
1192 out << " }" << std::endl;
1193 out << "};" << std::endl;
1194 writePreCompHeader( out, 2 );
1195 }
1196 };
1197
1198 // if this pre processor variable is defined then
1199 // we assume that CODEGENERATOR_REPLACEMENT is CodeGenerator of choice
1200#ifndef FEM_CODEGENERATOR_REPLACEMENT
1202#else
1203 typedef FEM_CODEGENERATOR_REPLACEMENT CodeGeneratorType;
1204#endif
1205
1206 inline std::string autoFilename(const int dim, const int polynomialOrder )
1207 {
1208 std::stringstream name;
1209 name << "autogeneratedcode_" << dim << "d_k" << polynomialOrder << ".hh";
1210 return name.str();
1211 }
1212
1213 } // namespace Codegen
1214
1215
1216 template <class DiscreteFunctionSpace, class Vector>
1217 inline void generateCode (const DiscreteFunctionSpace& space,
1218 const Vector& elemQuadOrders,
1219 const Vector& faceQuadOrders,
1220 const std::string& outpath = "./",
1221 const std::string& filename = "autogeneratedcode.hh" )
1222 {
1223 using namespace Codegen;
1224
1225 const int dimRange = DiscreteFunctionSpace :: dimRange;
1226 const int dimDomain = DiscreteFunctionSpace :: dimDomain;
1227 const int dimGrad = dimRange*dimDomain;
1228
1229 typedef typename DiscreteFunctionSpace :: GridPartType GridPartType;
1230
1231 typedef CachingQuadrature< GridPartType, 0 > ElementQuadratureType;
1232 typedef CachingQuadrature< GridPartType, 1 > FaceQuadratureType;
1233
1234
1235 std::set< int > sizes;
1236 std::set< int > quadNops;
1237
1238 for( const auto& entity : space )
1239 {
1240 // continue if none, since the code below does not work in that case
1241 if( entity.type().isNone() ) continue;
1242
1243 // only use size of scalar basis function set, i.e. dimRange = 1
1244 const int scalarSize = space.basisFunctionSet( entity ).size() / dimRange ;
1245 sizes.insert( scalarSize );
1246
1247 const auto iend = space.gridPart().iend( entity );
1248 for( auto it = space.gridPart().ibegin( entity ); it != iend; ++it )
1249 {
1250 for( const auto& quadOrd : faceQuadOrders )
1251 {
1252 FaceQuadratureType quad( space.gridPart(), *it, quadOrd, FaceQuadratureType::INSIDE );
1253 quadNops.insert( quad.nop() );
1254 }
1255 }
1256 }
1257
1258 for( const auto& type : space.indexSet().types(0))
1259 {
1260 for( const auto& quadOrd : elemQuadOrders )
1261 {
1262 ElementQuadratureType quad( type, quadOrd );
1263 quadNops.insert( quad.nop() );
1264 }
1265 }
1266
1267 std::string path ( outpath );
1268 path += "/autogeneratedcode";
1269
1270 // set output path
1271 CodegenInfo& info = CodegenInfo::instance();
1272 info.setPath( path );
1273
1274 // add my dimrange
1275 info.addDimRange( &space, dimRange );
1276 int gradSpace;
1277 info.addDimRange( &gradSpace, dimGrad );
1278
1279 for( const auto& size : sizes )
1280 {
1281 for( const auto& quadNop : quadNops )
1282 {
1283 info.addEntry( "evalranges",
1284 CodeGeneratorType :: evaluateCodegen, dimDomain, dimRange, quadNop, size );
1285 info.addEntry( "evaljacobians",
1286 CodeGeneratorType :: evaluateJacobiansCodegen, dimDomain, dimRange, quadNop, size );
1287 info.addEntry( "axpyranges",
1288 CodeGeneratorType :: axpyCodegen, dimDomain, dimRange, quadNop, size );
1289 info.addEntry( "axpyjacobians",
1290 CodeGeneratorType :: axpyJacobianCodegen, dimDomain, dimRange, quadNop, size );
1291
1292 info.addEntry( "evalranges",
1293 CodeGeneratorType :: evaluateCodegen, dimDomain, dimGrad, quadNop, size );
1294 info.addEntry( "evaljacobians",
1295 CodeGeneratorType :: evaluateJacobiansCodegen, dimDomain, dimGrad, quadNop, size );
1296 info.addEntry( "axpyranges",
1297 CodeGeneratorType :: axpyCodegen, dimDomain, dimGrad, quadNop, size );
1298 info.addEntry( "axpyjacobians",
1299 CodeGeneratorType :: axpyJacobianCodegen, dimDomain, dimRange, quadNop, size );
1300 }
1301 }
1302
1303 //std::cerr << "Code for k="<< MAX_POLORD << " generated!! " << std::endl;
1304 //std::remove( autoFilename( CODEDIM, MAX_POLORD ).c_str() );
1305
1306 info.setFileName( filename );
1307 bool written = info.dumpInfo();
1308
1309 if( written )
1310 {
1311#ifndef NDEBUG
1312 std::cout << "Written code to " << filename << std::endl;
1313#endif
1315 // write include header
1317 std::ofstream file( outpath + "/" + filename );
1318
1319 if( file )
1320 {
1321 std::string header( filename );
1322 size_t size = header.size();
1323 // replace all . with _
1324 for( size_t i=0; i<size; ++i )
1325 {
1326 if( header[ i ] == '.' )
1327 header[ i ] = '_';
1328 }
1329
1330 file << "#ifndef " << header << "_INCLUDED" << std::endl;
1331 file << "#define " << header << "_INCLUDED" << std::endl;
1332 file << "#ifndef USE_BASEFUNCTIONSET_CODEGEN" << std::endl;
1333 file << "#define USE_BASEFUNCTIONSET_CODEGEN" << std::endl;
1334 file << "#endif" << std::endl;
1335 file << "// this is the file containing the necessary includes for the specialized codes" << std::endl;
1336 file << "#define DUNE_FEM_INCLUDE_AUTOGENERATEDCODE_FILENAME_SPEC \"" << path << "/" << filename << "\"" << std::endl;
1337 file << "#endif" << std::endl;
1338 }
1339 }
1340 else
1341 {
1342#ifndef NDEBUG
1343 std::cout << "No changes written to " << filename << std::endl << std::endl;
1344#endif
1345 }
1346 }
1347
1348 } // namespace Fem
1349
1350} // namespace Dune
1351
1352#endif // #ifndef DUNE_FEM_BASEFUNCTIONSETS_CODEGEN_HH
discrete function space
static DUNE_EXPORT Object & instance(Args &&... args)
return singleton instance of given Object type.
Definition: singleton.hh:123
A few common exception classes.
#define DUNE_THROW(E, m)
Definition: exceptions.hh:218
Dune namespace.
Definition: alignedallocator.hh:13
constexpr std::integral_constant< std::size_t, sizeof...(II)> size(std::integer_sequence< T, II... >)
Return the size of the sequence.
Definition: integersequence.hh:75
default code generator methods
Definition: codegen.hh:368
Creative Commons License   |  Legal Statements / Impressum  |  Hosted by TU Dresden  |  generated with Hugo v0.111.3 (Nov 12, 23:30, 2024)