1#ifndef DUNE_FEM_BASEFUNCTIONSETS_CODEGEN_HH
2#define DUNE_FEM_BASEFUNCTIONSETS_CODEGEN_HH
14#include <dune/fem/io/io.hh>
15#include <dune/fem/io/parameter.hh>
17#include <dune/fem/quadrature/cachingquadrature.hh>
18#include <dune/fem/storage/singleton.hh>
29 class CodegenInfoFinished :
public Dune :: Exception {};
34 std::string outFileName_;
42 typedef std::set< int > DimRangeSetType;
43 typedef std::set< void* > BaseSetPointerType;
44 DimRangeSetType savedimRanges_;
45 mutable DimRangeSetType dimRanges_;
46 BaseSetPointerType savedBaseSets_;
48 typedef void codegenfunc_t (std::ostream& out,
52 const size_t numCols );
54 typedef std::pair< std::string, int > EntryType;
55 std::vector< EntryType > filenames_;
57 typedef std::vector< int > KeyType;
58 typedef std::set< KeyType > KeyStorageType;
60 typedef std::pair< int, std::pair< int, int > > EvalPairType ;
61 typedef std::set< EvalPairType > EvalSetType ;
63 std::map< codegenfunc_t* , KeyStorageType > entryMap_;
68 : path_(
Dune::Fem::Parameter::commonInputPath() +
"/autogeneratedcode"),
69 outFileName_(
"autogeneratedcode.hh" ),
70 nopMax_(0), nopMin_(0), baseMax_(0), baseMin_(0),
71 dimRanges_(), savedBaseSets_(), filenames_(),
77 static CodegenInfo& instance()
84 savedBaseSets_.clear();
96 void setPath(
const std::string& path ) { path_ = path ; }
99 void setFileName(
const std::string& filename ) { outFileName_ = filename ; }
101 template <
class BaseSet>
102 void addDimRange(
const BaseSet* baseSet,
105 void* ptr = (
void *) baseSet;
106 if( savedBaseSets_.find( ptr ) == savedBaseSets_.end() )
108 savedBaseSets_.insert( ptr );
110 std::cout <<
"Add dimRange " << dimRange << std::endl;
112 dimRanges_.insert( dimRange ) ;
116 void addEntry(
const std::string& fileprefix,
117 codegenfunc_t* codegenfunc,
118 const int dim,
const int dimRange,
const int quadNop,
const int numBase )
120 KeyStorageType& keyMap = entryMap_[ codegenfunc ];
122 typedef KeyStorageType :: iterator iterator;
130 iterator it = keyMap.find( key );
131 if( it != keyMap.end() ) return ;
134 assert( dimRanges_.find( dimRange ) != dimRanges_.end() );
137 createDirectory( path_ );
139 std::stringstream filename;
140 filename << fileprefix << dimRange <<
"_" << quadNop <<
"_" << numBase <<
".hh";
143 int pos = exists( filename.str() );
144 if( pos >= 0 )
return;
146 std::string filenameToWrite( path_ +
"/" + filename.str() );
150 std::stringstream code;
152 codegenfunc( code, dim, dimRange, quadNop, numBase );
154 bool writeCode = false ;
157 std::ifstream infile( filenameToWrite );
160 std::stringstream checkstr;
161 checkstr << infile.rdbuf();
165 if( checkstr.str().compare( code.str() ) != 0 )
182 std::ofstream file( filenameToWrite, std::ios::out );
186 std::cout <<
"Generate code " << fileprefix <<
" for (" << dimRange <<
","
187 << quadNop <<
"," << numBase <<
")" << std::endl;
193 EvalPairType evalPair ( dimRange, std::make_pair(quadNop, numBase) );
194 evalSet_.insert( evalPair );
196 if( baseMin_ == 0 ) baseMin_ = numBase;
197 if( nopMin_ == 0 ) nopMin_ = quadNop;
199 EntryType entry ( filename.str() , dimRange );
200 filenames_.push_back( entry );
201 nopMax_ = std::max( quadNop, nopMax_ );
202 nopMin_ = std::min( quadNop, nopMin_ );
203 baseMax_ = std::max( numBase, baseMax_ );
204 baseMin_ = std::min( numBase, baseMin_ );
207 void finalize ()
const
212 std::cerr <<
"All automated code generated, bye, bye !! " << std::endl;
213 DUNE_THROW(CodegenInfoFinished,
"All automated code generated, bye, bye !!");
218 bool dumpInfo(
const bool writeToCurrentDir =
false )
const
220 if( writeToCurrentDir )
224 std::ofstream file( outFileName_.c_str() );
225 file <<
"#include \"" <<path_<<
"/" << outFileName_ <<
"\"";
229 std::string filename( path_ +
"/" + outFileName_ );
230 std::stringstream file;
232 file <<
"#ifdef CODEGEN_INCLUDEMAXNUMS" << std::endl;
233 file <<
"#ifndef CODEGEN_INCLUDEMAXNUMS_INCLUDED" << std::endl;
234 file <<
"#define CODEGEN_INCLUDEMAXNUMS_INCLUDED" << std::endl << std::endl;
235 file <<
"#define MAX_NUMBER_OF_QUAD_POINTS " << nopMax_ << std::endl;
236 file <<
"#define MIN_NUMBER_OF_QUAD_POINTS " << nopMin_ << std::endl;
237 file <<
"#define MAX_NUMBER_OF_BASE_FCT " << baseMax_ << std::endl;
238 file <<
"#define MIN_NUMBER_OF_BASE_FCT " << baseMin_ << std::endl << std::endl;
240 file <<
"/* include all headers with inner loop extern declarations */" << std::endl;
241 file <<
"#define CODEGEN_COMPILE_INNERLOOPS 1" << std::endl;
242 file <<
"namespace Dune {" << std::endl;
243 file <<
"namespace Fem {" << std::endl;
244 file <<
"namespace Codegen {" << std::endl;
245 for(
size_t i = 0; i < filenames_.size(); ++i )
247 file <<
"#include \""<< filenames_[ i ].first <<
"\"" << std::endl;
249 file <<
"}}} // end namespaces" << std::endl;
250 file <<
"#undef CODEGEN_COMPILE_INNERLOOPS" << std::endl << std::endl;
251 file <<
"#include \"" << filename <<
".c\"" <<std::endl;
254 file <<
"#endif // CODEGEN_INCLUDEMAXNUMS_INCLUDED" << std::endl << std::endl;
255 file <<
"#elif defined CODEGEN_INCLUDEEVALCALLERS" << std::endl;
256 file <<
"#ifndef CODEGEN_EVALCALLERS_INCLUDED" << std::endl;
257 file <<
"#define CODEGEN_EVALCALLERS_INCLUDED" << std::endl << std::endl;
258 file <<
"namespace Dune {"<< std::endl;
259 file <<
"namespace Fem {"<< std::endl;
260 file <<
"namespace Codegen {"<< std::endl;
261 typedef EvalSetType :: iterator iterator ;
262 const iterator endit = evalSet_.end();
263 for( iterator it = evalSet_.begin(); it != endit; ++it )
265 int dimRange = it->first;
266 int quadNop = it->second.first;
267 int numBase = it->second.second;
268 file <<
" template <class Traits>" << std::endl;
269 file <<
" struct EvaluateImplementation< Traits, " << dimRange <<
" , " << quadNop <<
" , " << numBase <<
" >" << std::endl;
270 file <<
" : public EvaluateRealImplementation< Traits, " << dimRange <<
" , " << quadNop <<
" , " << numBase <<
" >" << std::endl;
271 file <<
" {" << std::endl;
272 file <<
" typedef EvaluateRealImplementation< Traits, " << dimRange <<
" , " << quadNop <<
" , " << numBase <<
" > BaseType;" << std::endl;
273 file <<
" typedef typename BaseType :: RangeVectorType RangeVectorType;" << std::endl;
274 file <<
" EvaluateImplementation( const RangeVectorType& rv ) : BaseType ( rv ) {}" << std::endl;
275 file <<
" };"<< std::endl;
278 file <<
"}}} // end namespaces"<< std::endl;
279 file <<
"#endif // CODEGEN_EVALCALLERS_INCLUDED" << std::endl << std::endl;
280 file <<
"#else" << std::endl << std::endl ;
281 file <<
"#ifndef CODEGEN_INCLUDE_IMPLEMENTATION" << std::endl;
282 file <<
"#define CODEGEN_INCLUDE_IMPLEMENTATION" << std::endl;
284 file <<
"namespace Dune {" << std::endl;
285 file <<
"namespace Fem {" << std::endl;
286 file <<
"namespace Codegen {" << std::endl;
287 for(
size_t i = 0; i < filenames_.size(); ++i )
289 file <<
"#include \""<< filenames_[ i ].first <<
"\"" << std::endl;
291 file <<
"}}} // end namespaces" << std::endl;
292 file <<
"#endif // CODEGEN_INCLUDE_IMPLEMENTATION" << std::endl << std::endl;
293 file <<
"#endif // CODEGEN_INCLUDEMAXNUMS" << std::endl;
295 std::ifstream infile( filename );
298 std::stringstream checkstr;
299 checkstr << infile.rdbuf();
303 if( checkstr.str().compare( file.str() ) == 0 )
307 std::ofstream outfile( filename );
308 outfile << file.str();
314 std::ofstream Cfile( filename.c_str() );
316 Cfile <<
"#include <stdlib.h>" << std::endl;
317 Cfile <<
"/* include all headers with inner loop implementation */" << std::endl;
318 Cfile <<
"#define CODEGEN_COMPILE_INNERLOOPS 2" << std::endl;
319 for(
size_t i = 0; i < filenames_.size(); ++i )
321 Cfile <<
"#include \""<< filenames_[ i ].first <<
"\"" << std::endl;
328 int exists(
const std::string& filename )
const
330 for(
size_t i = 0; i < filenames_.size(); ++i )
332 if( filename == filenames_[ i ].first )
340 bool checkAbort()
const
342 DimRangeSetType found ;
343 bool canAbort = true ;
344 for(
size_t i = 0; i < filenames_.size(); ++i )
346 found.insert( filenames_[ i ].second );
347 if ( filenames_[ i ].second > 0 )
352 typedef DimRangeSetType :: iterator iterator ;
353 for( iterator it = found.begin(); it != found.end(); ++it )
355 dimRanges_.erase( *it );
358 if( canAbort && dimRanges_.size() == 0 )
366 template <
int simdW
idth = 4 >
369 static const char* restrictKey()
371 return "__restrict__";
374 static const char* doubletype()
377 return "Dune::Fem::Double";
383 static void writePreCompHeader(std::ostream& out,
const int stage )
385 const char* codegenPreCompVar =
"CODEGEN_COMPILE_INNERLOOPS";
388 out <<
"#ifndef HEADERCHECK" << std::endl;
391 else if( stage == 0 )
393 out <<
"#else" << std::endl;
394 out <<
"#if " << codegenPreCompVar <<
" == 1" << std::endl;
395 out <<
"extern \"C\" {" << std::endl
396 <<
" extern inline" << std::endl;
397 out <<
"#endif" << std::endl;
399 else if( stage == 1 )
401 out <<
"#if " << codegenPreCompVar <<
" == 1" << std::endl;
402 out <<
" ;" << std::endl;
403 out <<
"}" << std::endl;
404 out <<
"#else" << std::endl;
410 out <<
"#endif" << std::endl;
415 static std::string generateFunctionName(
const std::string& prefix,
416 const int simdW,
const int dimRange,
417 const size_t numRows,
const size_t numCols )
419 std::stringstream funcName;
420 funcName << prefix <<
"_" << simdWidth <<
"_" << dimRange <<
"_" << numRows <<
"_" << numCols ;
421 return funcName.str();
424 static void writeInnerLoopEval(std::ostream& out,
const int simdW,
const int dimRange,
const size_t numRows,
const size_t numCols )
426 out <<
" // Loop over all quadrature points" << std::endl;
427 out <<
" for(int qp = 0; qp < " << numRows <<
" ; ++qp )" << std::endl;
428 out <<
" {" << std::endl;
440 for(
int i = 0 ; i< simdW ; ++ i )
441 out <<
" const " << doubletype() <<
" phi" << i <<
" = base" << i <<
"[ qp ];" << std::endl;
442 for(
int r = 0; r < dimRange; ++ r )
444 out <<
" result" << r <<
"[ qp ] += phi0 * dof0" << r;
445 for(
int i=1; i<simdW; ++i )
446 out <<
" + phi" << i <<
" * dof" << i << r;
447 out <<
" ;" << std::endl;
449 out <<
" }" << std::endl;
452 static void evaluateCodegen(std::ostream& out,
455 const size_t numRows,
456 const size_t numCols )
458 const std::string funcName =
459 generateFunctionName(
"evalRangesLoop", simdWidth, dimRange, numRows, numCols );
461 writePreCompHeader( out, -1 );
463 out <<
"template <class BaseFunctionSet> // dimRange = "<< dimRange <<
", quadNop = " << numRows <<
", scalarBasis = " << numCols << std::endl;
464 out <<
"struct EvaluateRanges<BaseFunctionSet, EmptyGeometry, " << dimRange <<
", " << numRows <<
", " << numCols <<
">" << std::endl;
465 out <<
"{" << std::endl;
466 out <<
" template< class QuadratureType,"<< std::endl;
467 out <<
" class RangeVectorType," << std::endl;
468 out <<
" class RangeFactorType," << std::endl;
469 out <<
" class LocalDofVectorType>" << std::endl;
470 out <<
" static void eval( const QuadratureType& quad," << std::endl;
471 out <<
" const RangeVectorType& rangeStorageTransposed," << std::endl;
472 out <<
" const LocalDofVectorType& dofs," << std::endl;
473 out <<
" RangeFactorType &rangeVector)" << std::endl;
474 out <<
" {" << std::endl;
480 out <<
" typedef " << doubletype() <<
" Field;" << std::endl;
482 out <<
" static thread_local std::vector< Field > memory( " << numRows * dimRange <<
" );" << std::endl;
486 out <<
" Field* resultTmp = memory.data();" << std::endl;
487 out <<
" for( int i=0; i < " << numRows * dimRange <<
"; ++ i ) resultTmp[ i ] = 0;" << std::endl <<std::endl;
489 for(
int r=0; r<dimRange ; ++r )
491 out <<
" Field* result" << r <<
" = resultTmp + " << r * numRows <<
" ;" << std::endl;
495 out <<
" const Field* baseData = rangeStorageTransposed.data();" << std::endl;
501 const size_t simdCols = simdWidth * ( numCols / simdWidth );
504 out <<
" for( int col = 0, dof = 0 ; col < "<< simdCols <<
" ; col += " << simdWidth <<
", dof += " << simdWidth * dimRange<<
" )"<<std::endl;
505 out <<
" {" << std::endl;
520 out <<
" " << funcName <<
"(" << std::endl;
522 for(
int i = 0; i< simdWidth * dimRange; ++i )
523 out <<
" dofs[ dof + " << i <<
" ],";
525 for(
int i=0; i< simdWidth; ++i )
526 out <<
" baseData + ((col + " << i <<
") * " << numRows <<
")," << std::endl;
528 for(
int r = 0; r < dimRange-1; ++r)
529 out <<
"result" << r <<
", ";
530 out <<
"result" << dimRange-1 <<
" );" << std::endl;
531 out <<
" }"<< std::endl;
535 if( numCols > simdCols )
537 out <<
" // remainder iteration" << std::endl;
538 out <<
" for( int col = " << simdCols <<
", dof = " << simdCols * dimRange <<
" ; col < " << numCols <<
" ; ++col )" << std::endl;
539 out <<
" {" << std::endl;
540 for(
int r=0; r<dimRange; ++r )
541 out <<
" const " << doubletype() <<
" dof0" << r <<
" = dofs[ dof++ ];" << std::endl;
542 out <<
" const Field* base0 = baseData + (col * " << numRows <<
");" << std::endl;
544 writeInnerLoopEval( out, 1, dimRange, numRows, numCols );
545 out <<
" }" << std::endl;
549 out <<
" // store result" << std::endl;
550 out <<
" for(int row = 0; row < " << numRows <<
" ; ++row )" << std::endl;
551 out <<
" {" << std::endl;
552 out <<
" auto& result = rangeVector[ row ];" << std::endl;
553 for(
int r = 0 ; r < dimRange; ++ r )
555 out <<
" result[ " << r <<
" ] = result" << r <<
"[ row ];" << std::endl;
557 out <<
" }" << std::endl;
558 out <<
" }" << std::endl << std::endl;
562 out <<
" static void " << funcName <<
"(" << std::endl;
563 for(
int i=0; i<simdWidth; ++i )
566 for(
int r=0; r<dimRange; ++ r )
568 if( r > 0 ) out <<
" ";
569 out <<
"const " << doubletype() <<
" dof"<< i << r <<
",";
573 for(
int i=0; i<simdWidth; ++ i )
574 out <<
" const " << doubletype() <<
"* " << restrictKey() <<
" base" << i <<
"," << std::endl;
575 for(
int r=0; r<dimRange; ++ r )
577 out <<
" " << doubletype() <<
"* "<< restrictKey() <<
" result" << r;
578 if( r == dimRange-1 ) out <<
" )" << std::endl;
579 else out <<
"," << std::endl;
583 out <<
" {" << std::endl;
584 writeInnerLoopEval( out, simdWidth, dimRange, numRows, numCols );
585 out <<
" }" << std::endl;
586 out <<
"};" << std::endl;
587 writePreCompHeader( out, 2 );
590 static void writeInnerLoop(std::ostream& out,
const int simdW,
const int dimRange,
const size_t numCols )
592 for(
int i=0; i< simdW; ++i )
594 for(
int r=0; r< dimRange; ++r )
596 out <<
" const " << doubletype() <<
" fac" << i << r <<
" = rangeFactor" << i <<
"[ " << r <<
" ];" << std::endl;
600 out <<
" int rowCol = quad.cachingPoint( row ) * " << numCols <<
";" << std::endl;
601 out <<
" for(int col = 0 ; col < " << numCols <<
" ; ++ col";
604 out <<
" )" << std::endl;
605 out <<
" {" << std::endl;
606 for(
int i = 0 ; i< simdW ; ++ i )
609 out <<
" const " << doubletype() <<
" phi" << i <<
" = rangeStorage[ rowCol ][ 0 ];" << std::endl;
611 out <<
" const " << doubletype() <<
" phi" << i <<
" = base" << i <<
" [ col ];" << std::endl;
613 for(
int r = 0; r < dimRange; ++ r )
615 out <<
" dofs" << r <<
"[ col ] += phi0 * fac0" << r;
616 for(
int i=1; i<simdW; ++i )
618 out <<
" + phi" << i <<
" * fac" << i << r ;
620 out <<
" ;" << std::endl;
622 out <<
" }" << std::endl;
625 static void axpyCodegen(std::ostream& out,
626 const int dim,
const int dimRange,
const size_t numRows,
const size_t numCols )
628 const std::string funcName =
629 generateFunctionName(
"axpyRangesLoop", simdWidth, dimRange, numRows, numCols );
631 writePreCompHeader( out, -1 );
633 out <<
"template <class BaseFunctionSet> // dimRange = "<< dimRange <<
", quadNop = " << numRows <<
", scalarBasis = " << numCols << std::endl;
634 out <<
"struct AxpyRanges<BaseFunctionSet, EmptyGeometry, " << dimRange <<
", " << numRows <<
", " << numCols <<
">" << std::endl;
635 out <<
"{" << std::endl;
638 out <<
" template< class QuadratureType,"<< std::endl;
639 out <<
" class RangeVectorType," << std::endl;
640 out <<
" class RangeFactorType," << std::endl;
641 out <<
" class LocalDofVectorType>" << std::endl;
642 out <<
" static void axpy( const QuadratureType& quad," << std::endl;
643 out <<
" const RangeVectorType& rangeStorage," << std::endl;
644 out <<
" const RangeFactorType& rangeFactors," << std::endl;
645 out <<
" LocalDofVectorType& dofs)" << std::endl;
646 out <<
" {" << std::endl;
655 out <<
" typedef " << doubletype() <<
" Field;" << std::endl;
656 out <<
" static thread_local std::vector< Field > memory( " << numCols * dimRange <<
" );" << std::endl;
660 out <<
" " << doubletype() <<
"* dofResult = memory.data();" << std::endl;
661 out <<
" for( int i=0; i < " << numCols * dimRange <<
"; ++i ) dofResult[ i ] = 0;" << std::endl << std::endl;
663 out <<
" " << doubletype() <<
"* dofs0 = dofResult;" << std::endl;
664 for(
int r = 1; r < dimRange; ++ r )
665 out <<
" " << doubletype() <<
"* dofs" << r <<
" = dofResult + " << r * numCols <<
";" << std::endl;
668 const size_t simdRows = simdWidth * (numRows / simdWidth) ;
672 out <<
" for( int row = 0; row < "<< simdRows <<
" ; row += " << int(simdWidth) <<
" )" << std::endl;
673 out <<
" {" << std::endl;
674 for(
int i=0; i<simdWidth; ++ i )
675 out <<
" const " << doubletype() <<
"* rangeFactor" << i <<
" = &rangeFactors[ row + " << i <<
" ][ 0 ];" << std::endl;
676 out <<
" " << funcName <<
"(";
677 for(
int i = 0; i < simdWidth; ++i )
681 out <<
" &rangeStorage[ quad.cachingPoint( row + " << i <<
" ) * " << numCols <<
" ][ 0 ]," << std::endl;
683 out <<
" rangeFactor0, ";
684 for(
int i=1; i<simdWidth; ++ i )
685 out <<
"rangeFactor" << i <<
",";
688 for(
int r = 0; r < dimRange; ++ r )
690 out <<
" dofs" << r ;
691 if( r == dimRange-1 )
692 out <<
" );" << std::endl;
697 out <<
" }" << std::endl;
701 if( numRows > simdRows )
703 out <<
" // remainder iteration" << std::endl;
704 out <<
" for( int row = " << simdRows <<
" ; row < " << numRows <<
" ; ++row )" << std::endl;
705 out <<
" {" << std::endl;
706 out <<
" const " << doubletype() <<
"* rangeFactor0 = &rangeFactors[ row ][ 0 ];" << std::endl;
707 writeInnerLoop( out, 1, dimRange, numCols );
708 out <<
" }" << std::endl;
712 out <<
" // sum up results (transform from variable based to point based layout)" << std::endl;
713 out <<
" for( int col = 0, dof = 0 ; col < "<< numCols <<
" ; ++col )" << std::endl;
714 out <<
" {" << std::endl;
715 for(
int r = 0 ; r < dimRange; ++ r )
716 out <<
" dofs[ dof++ ] += dofs" << r <<
"[ col ];" << std::endl;
717 out <<
" }" << std::endl;
719 out <<
" }" << std::endl << std::endl;
726 out <<
" static void " << funcName <<
"(" << std::endl;
727 out <<
" const " << doubletype() <<
"* " << restrictKey() <<
" base0," << std::endl;
728 for(
int i=1; i<simdWidth; ++ i )
729 out <<
" const " << doubletype() <<
"* " << restrictKey() <<
" base" << i <<
"," << std::endl;
730 for(
int i=0; i<simdWidth; ++ i )
731 out <<
" const " << doubletype() <<
"* " << restrictKey() <<
" rangeFactor" << i <<
"," << std::endl;
732 for(
int r = 0; r < dimRange; ++r )
734 out <<
" " << doubletype() <<
"* " << restrictKey() <<
" dofs" << r;
735 if( r == dimRange-1 )
736 out <<
" )" << std::endl;
738 out <<
"," << std::endl;
741 out <<
" {" << std::endl;
742 writeInnerLoop( out, simdWidth, dimRange, numCols );
743 out <<
" }" << std::endl;
744 out <<
"};" << std::endl;
745 writePreCompHeader( out, 2 );
748 static void writeInnerJacEvalLoop(std::ostream& out,
const int simdW,
const int dim,
749 const int dimRange,
const size_t numRows,
const size_t numCols )
751 out <<
" for(int row = 0; row < " << numRows <<
" ; ++row )" << std::endl;
752 out <<
" {" << std::endl;
777 for(
int i = 0 ; i< simdW ; ++ i )
780 for(
int d = 0 ; d < dim; ++ d )
781 out <<
" const " << doubletype() <<
" phi" << i << d <<
" = base" << i <<
"[ row * " << dim <<
" + " << d <<
" ];" << std::endl;
787 out <<
" const int idx = row * " << dim<<
";" << std::endl;
788 for(
int d = 0; d < dim ; ++ d )
790 for(
int i = 0 ; i< simdW ; ++ i )
791 out <<
" const " << doubletype() <<
" phi" << i << d <<
" = base" << i <<
"[ idx + " << d <<
" ];" << std::endl;
794 for(
int d = 0; d < dim ; ++ d )
796 for(
int r = 0; r < dimRange; ++ r )
798 out <<
" result" << r << d <<
"[ row ] += phi0" << d <<
" * dof0" << r;
799 for(
int i=1; i<simdW; ++i )
800 out <<
" + phi" << i << d <<
" * dof" << i << r;
801 out <<
" ;" << std::endl;
804 out <<
" }" << std::endl;
807 static void evaluateJacobiansCodegen(std::ostream& out,
808 const int dim,
const int dimRange,
const size_t numRows,
const size_t numCols )
810 const std::string funcName =
811 generateFunctionName(
"evalJacobianLoop", simdWidth, dimRange, numRows, numCols );
813 writePreCompHeader( out, -1 );
815 out <<
"template <class BaseFunctionSet> // dimRange = "<< dimRange <<
", quadNop = " << numRows <<
", scalarBasis = " << numCols << std::endl;
816 out <<
"struct EvaluateJacobians<BaseFunctionSet, EmptyGeometry, " << dimRange <<
", " << numRows <<
", " << numCols <<
">" << std::endl;
817 out <<
"{" << std::endl;
818 out <<
" template< class QuadratureType,"<< std::endl;
819 out <<
" class JacobianRangeVectorType," << std::endl;
820 out <<
" class LocalDofVectorType," << std::endl;
821 out <<
" class JacobianRangeFactorType>" << std::endl;
822 out <<
" static void eval( const QuadratureType&," << std::endl;
823 out <<
" const EmptyGeometry&," << std::endl;
824 out <<
" const JacobianRangeVectorType&," << std::endl;
825 out <<
" const LocalDofVectorType&," << std::endl;
826 out <<
" JacobianRangeFactorType &)" << std::endl;
827 out <<
" {" << std::endl;
828 out <<
" std::cerr << \"ERROR: wrong code generated for VectorialBaseFunctionSet::axpyJacobians\" << std::endl;" << std::endl;
829 out <<
" abort();" << std::endl;
830 out <<
" }" << std::endl;
831 out <<
"};" << std::endl << std::endl;
832 out <<
"template <class BaseFunctionSet, class Geometry> // dimRange = "<< dimRange <<
", quadNop = " << numRows <<
", scalarBasis = " << numCols << std::endl;
833 out <<
"struct EvaluateJacobians<BaseFunctionSet, Geometry, " << dimRange <<
", " << numRows <<
", " << numCols <<
">" << std::endl;
834 out <<
"{" << std::endl;
835 out <<
" template< class QuadratureType,"<< std::endl;
836 out <<
" class JacobianRangeVectorType," << std::endl;
837 out <<
" class LocalDofVectorType," << std::endl;
838 out <<
" class JacobianRangeFactorType>" << std::endl;
839 out <<
" static void eval( const QuadratureType& quad," << std::endl;
840 out <<
" const Geometry& geometry," << std::endl;
841 out <<
" const JacobianRangeVectorType& jacStorage," << std::endl;
842 out <<
" const LocalDofVectorType& dofs," << std::endl;
843 out <<
" JacobianRangeFactorType& jacFactors)" << std::endl;
844 out <<
" {" << std::endl;
845 out <<
" evalJac( quad, geometry, jacStorage, dofs, jacFactors, jacFactors[ 0 ] );" << std::endl;
846 out <<
" }" << std::endl;
847 out <<
"private:" << std::endl;
848 out <<
" template< class QuadratureType,"<< std::endl;
849 out <<
" class JacobianRangeVectorType," << std::endl;
850 out <<
" class LocalDofVectorType," << std::endl;
851 out <<
" class JacobianRangeFactorType," << std::endl;
852 out <<
" class GlobalJacobianRangeType>" << std::endl;
853 out <<
" static void evalJac( const QuadratureType& quad," << std::endl;
854 out <<
" const Geometry& geometry," << std::endl;
855 out <<
" const JacobianRangeVectorType& jacStorage," << std::endl;
856 out <<
" const LocalDofVectorType& dofs," << std::endl;
857 out <<
" JacobianRangeFactorType& jacVector," << std::endl;
858 out <<
" const GlobalJacobianRangeType&)" << std::endl;
859 out <<
" {" << std::endl;
863 out <<
" typedef typename Geometry::JacobianInverseTransposed GeometryJacobianType;" << std::endl;
865 const size_t nDofs = numRows * dimRange * dim ;
866 out <<
" typedef " << doubletype() <<
" Field;" << std::endl;
867 out <<
" static thread_local std::vector< Field > memory( " << nDofs <<
" );" << std::endl;
870 for(
int d = 0; d < dim ; ++ d )
874 out <<
" Field* resultTmp" << d <<
" = memory.data() + " << d * numRows * dimRange <<
";" << std::endl;
876 out <<
" for( int i=0; i<" << numRows * dimRange <<
"; ++i ) " << std::endl;
877 out <<
" {" << std::endl;
878 for(
int d = 0; d < dim ; ++ d )
879 out <<
" resultTmp" << d <<
"[ i ] = 0;" << std::endl;
880 out <<
" }" << std::endl << std::endl;
882 for(
int d = 0; d < dim ; ++ d )
884 for(
int r=0; r<dimRange ; ++r )
886 out <<
" Field* result" << r << d <<
" = resultTmp" << d <<
" + " << r * numRows <<
" ;" << std::endl;
891 const size_t simdNumCols = simdWidth * ( numCols / simdWidth );
892 out <<
" typedef typename GlobalJacobianRangeType :: row_type JacobianRangeType;" << std::endl;
893 out <<
" const " << doubletype() <<
"* jacobians = jacStorage.data();" << std::endl;
894 if( simdNumCols > 0 )
909 out <<
" for( int col = 0, dof = 0 ; col < "<< simdNumCols <<
" ; col += " << simdWidth <<
", dof += " << simdWidth * dimRange<<
" )"<<std::endl;
910 out <<
" {" << std::endl;
943 for(
int i=0; i< simdWidth; ++ i )
945 out <<
" const "<< doubletype() <<
"* base" << i <<
" = jacobians + (" << dim * numRows <<
" * (col + "<< i <<
"));" << std::endl;
948 out <<
" " << funcName <<
"(";
949 for(
int i = 0; i< simdWidth * dimRange; ++i )
950 out <<
" dofs[ dof + " << i <<
" ],";
951 out << std::endl <<
" ";
952 for(
int i=0; i< simdWidth; ++i )
953 out <<
"base" << i <<
", ";
954 out << std::endl <<
" ";
955 for(
int d = 0; d < dim; ++ d )
957 for(
int r = 0; r < dimRange; ++r)
959 out <<
"result" << r << d;
960 if( (r == dimRange - 1) && (d == dim-1 ) ) out << std::endl;
964 out <<
" );" << std::endl;
965 out <<
" }"<< std::endl;
970 if( numCols > simdNumCols )
972 out <<
" // remainder iteration" << std::endl;
973 out <<
" for( int col = " << simdNumCols <<
", dof = " << simdNumCols * dimRange <<
" ; col < " << numCols <<
" ; ++col )" << std::endl;
974 out <<
" {" << std::endl;
975 out <<
" const "<< doubletype() <<
"* base0" <<
" = jacobians + (" << dim * numRows <<
" * col);" << std::endl;
976 for(
int r=0; r<dimRange; ++r )
977 out <<
" const " << doubletype() <<
" dof0" << r <<
" = dofs[ dof++ ];" << std::endl;
978 writeInnerJacEvalLoop( out, 1, dim, dimRange, numRows, numCols );
979 out <<
" }" << std::endl;
983 out <<
" // store result" << std::endl;
984 out <<
" JacobianRangeType tmp;" << std::endl;
985 out <<
" for(int row = 0; row < " << numRows <<
" ; ++row )" << std::endl;
986 out <<
" {" << std::endl;
987 out <<
" const GeometryJacobianType& gjit = geometry.jacobianInverseTransposed( quad.point( row ) );" << std::endl << std::endl;
988 out <<
" GlobalJacobianRangeType& result = jacVector[ row ];" << std::endl;
989 for(
int r = 0 ; r < dimRange; ++ r )
991 for(
int d = 0 ; d < dim; ++ d )
993 out <<
" tmp[ " << d <<
" ] = result" << r << d <<
"[ row ];" << std::endl;
1000 out <<
" gjit.mv( tmp, result[ "<< r <<
" ] );" << std::endl;
1002 out <<
" }" << std::endl;
1003 out <<
" }" << std::endl << std::endl;
1007 out <<
" static void " << funcName <<
"(" << std::endl;
1008 for(
int i=0; i<simdWidth; ++i )
1011 for(
int r=0; r<dimRange; ++ r )
1012 out <<
" const " << doubletype() <<
" dof"<< i << r <<
",";
1015 for(
int i=0; i<simdWidth; ++ i )
1016 out <<
" const " << doubletype() <<
"* " << restrictKey() <<
" base" << i <<
"," << std::endl;
1017 for(
int d=0; d<dim; ++ d )
1019 for(
int r=0; r<dimRange; ++ r )
1021 out <<
" " << doubletype() <<
"* "<< restrictKey() <<
" result" << r << d;
1022 if( (r == dimRange - 1) && (d == dim-1 ) ) out <<
" )" << std::endl;
1023 else out <<
"," << std::endl;
1028 out <<
" {" << std::endl;
1029 writeInnerJacEvalLoop( out, simdWidth, dim, dimRange, numRows, numCols );
1030 out <<
" }" << std::endl;
1031 out <<
"};" << std::endl;
1032 writePreCompHeader( out, 2 );
1035 static void writeInnerLoopAxpyJac(std::ostream& out,
const int dim,
const int dimRange,
const size_t numCols )
1037 out <<
" for( int col = 0; col < " << numCols <<
" ; ++col )" << std::endl;
1038 out <<
" {" << std::endl;
1039 for(
int d =0; d < dim; ++d )
1040 out <<
" const " << doubletype() <<
" phi" << d <<
" = base[ (col * " << dim <<
") + " << d <<
" ];" << std::endl;
1042 for(
int r = 0; r < dimRange; ++r )
1044 out <<
" result" << r <<
"[ col ] += phi0 * jacFactorInv0" << r;
1045 for(
int d=1; d < dim; ++d )
1046 out <<
" + phi" << d <<
" * jacFactorInv" << d << r;
1047 out <<
";" << std::endl;
1049 out <<
" }" << std::endl;
1052 static void axpyJacobianCodegen(std::ostream& out,
1053 const int dim,
const int dimRange,
const size_t numRows,
const size_t numCols )
1055 const std::string funcName =
1056 generateFunctionName(
"axpyJacobianLoop", simdWidth, dimRange, numRows, numCols );
1058 writePreCompHeader( out, -1 );
1060 out <<
"template <class BaseFunctionSet> // dimRange = "<< dimRange <<
", quadNop = " << numRows <<
", scalarBasis = " << numCols << std::endl;
1061 out <<
"struct AxpyJacobians<BaseFunctionSet, EmptyGeometry, " << dimRange <<
", " << numRows <<
", " << numCols <<
">" << std::endl;
1062 out <<
"{" << std::endl;
1063 out <<
" template< class QuadratureType,"<< std::endl;
1064 out <<
" class JacobianRangeVectorType," << std::endl;
1065 out <<
" class JacobianRangeFactorType," << std::endl;
1066 out <<
" class LocalDofVectorType>" << std::endl;
1067 out <<
" static void axpy( const QuadratureType&," << std::endl;
1068 out <<
" const EmptyGeometry&," << std::endl;
1069 out <<
" const JacobianRangeVectorType&," << std::endl;
1070 out <<
" const JacobianRangeFactorType&," << std::endl;
1071 out <<
" LocalDofVectorType&)" << std::endl;
1072 out <<
" {" << std::endl;
1073 out <<
" std::cerr << \"ERROR: wrong code generated for VectorialBaseFunctionSet::axpyJacobians\" << std::endl;" << std::endl;
1074 out <<
" abort();" << std::endl;
1075 out <<
" }" << std::endl;
1076 out <<
"};" << std::endl << std::endl;
1077 out <<
"template <class BaseFunctionSet, class Geometry>" << std::endl;
1078 out <<
"struct AxpyJacobians<BaseFunctionSet, Geometry, " << dimRange <<
", " << numRows <<
", " << numCols <<
">" << std::endl;
1079 out <<
"{" << std::endl;
1081 out <<
" template< class QuadratureType,"<< std::endl;
1082 out <<
" class JacobianRangeVectorType," << std::endl;
1083 out <<
" class JacobianRangeFactorType," << std::endl;
1084 out <<
" class LocalDofVectorType>" << std::endl;
1085 out <<
" static void axpy( const QuadratureType& quad," << std::endl;
1086 out <<
" const Geometry& geometry," << std::endl;
1087 out <<
" const JacobianRangeVectorType& jacStorage," << std::endl;
1088 out <<
" const JacobianRangeFactorType& jacFactors," << std::endl;
1089 out <<
" LocalDofVectorType& dofs)" << std::endl;
1090 out <<
" {" << std::endl;
1091 out <<
" typedef typename JacobianRangeFactorType :: value_type JacobianRangeType;" << std::endl << std::endl;
1093 const size_t dofs = dimRange * numCols ;
1094 out <<
" typedef " << doubletype() <<
" Field;" << std::endl;
1095 out <<
" static thread_local std::vector< Field > memory( " << dofs <<
" );" << std::endl;
1099 out <<
" Field* result = memory.data();" << std::endl;
1100 out <<
" for( int i = 0 ; i < " << dofs <<
"; ++i ) result[ i ] = 0;" << std::endl << std::endl;
1102 for(
int r=0; r<dimRange; ++r )
1103 out <<
" Field* result" << r <<
" = result + " << r * numCols <<
";" << std::endl;
1111 out <<
" const Field* base = jacStorage.data();" << std::endl << std::endl;
1112 out <<
" JacobianRangeType jacFactorTmp;" << std::endl;
1113 out <<
" for( int row = 0; row < " << numRows <<
" ; ++ row )" << std::endl;
1114 out <<
" {" << std::endl;
1115 out <<
" typedef typename Geometry::JacobianInverseTransposed GeometryJacobianType;" << std::endl;
1116 out <<
" // use reference to GeometryJacobianType to make code compile with SPGrid Geometry" << std::endl;
1117 out <<
" const GeometryJacobianType& gjit = geometry.jacobianInverseTransposed( quad.point( row ) );" << std::endl << std::endl;
1118 out <<
" for( int r = 0; r < " << dimRange <<
" ; ++r )" << std::endl;
1119 out <<
" {"<<std::endl;
1120 out <<
" gjit.mtv( jacFactors[ row ][ r ], jacFactorTmp[ r ] );" << std::endl;
1121 out <<
" }" << std::endl << std::endl;
1132 out <<
" // calculate updates" << std::endl;
1133 out <<
" " << funcName <<
"(";
1135 out <<
" base + ( quad.localCachingPoint( row ) * " << numCols * dim <<
" )," << std::endl;
1136 for(
int i =0; i < dim; ++i )
1139 for(
int r = 0; r < dimRange; ++ r )
1140 out <<
" jacFactorTmp[ " << r <<
" ][ " << i <<
" ], ";
1144 for(
int r = 0; r < dimRange; ++ r )
1146 out <<
" result" << r;
1147 if( r == dimRange -1 )
1148 out <<
" );" << std::endl ;
1152 out <<
" }" << std::endl << std::endl;
1154 out <<
" // sum up results (transform from variable based to point based layout)" << std::endl;
1155 out <<
" for( int col = 0, dof = 0 ; col < "<< numCols <<
" ; ++col )" << std::endl;
1156 out <<
" {" << std::endl;
1157 for(
int r = 0 ; r < dimRange; ++ r )
1158 out <<
" dofs[ dof++ ] += result" << r <<
"[ col ];" << std::endl;
1159 out <<
" }" << std::endl;
1161 out <<
" }" << std::endl << std::endl;
1170 out <<
" static void " << funcName <<
"(" << std::endl;
1171 out <<
" const " << doubletype() <<
"* " << restrictKey() <<
" base," << std::endl;
1174 for(
int i=0; i<dim; ++i )
1177 for(
int r=0; r<dimRange; ++ r )
1178 out <<
"const " << doubletype() <<
" jacFactorInv"<< i << r <<
", ";
1181 for(
int r = 0; r < dimRange; ++r )
1183 out <<
" " << doubletype() <<
"* " << restrictKey() <<
" result" << r;
1184 if( r == dimRange-1 )
1185 out <<
" )" << std::endl;
1187 out <<
"," << std::endl;
1190 out <<
" {" << std::endl;
1191 writeInnerLoopAxpyJac( out, dim, dimRange, numCols );
1192 out <<
" }" << std::endl;
1193 out <<
"};" << std::endl;
1194 writePreCompHeader( out, 2 );
1200#ifndef FEM_CODEGENERATOR_REPLACEMENT
1206 inline std::string autoFilename(
const int dim,
const int polynomialOrder )
1208 std::stringstream name;
1209 name <<
"autogeneratedcode_" << dim <<
"d_k" << polynomialOrder <<
".hh";
1216 template <
class DiscreteFunctionSpace,
class Vector>
1218 const Vector& elemQuadOrders,
1219 const Vector& faceQuadOrders,
1220 const std::string& outpath =
"./",
1221 const std::string& filename =
"autogeneratedcode.hh" )
1223 using namespace Codegen;
1225 const int dimRange = DiscreteFunctionSpace :: dimRange;
1226 const int dimDomain = DiscreteFunctionSpace :: dimDomain;
1227 const int dimGrad = dimRange*dimDomain;
1229 typedef typename DiscreteFunctionSpace :: GridPartType GridPartType;
1231 typedef CachingQuadrature< GridPartType, 0 > ElementQuadratureType;
1232 typedef CachingQuadrature< GridPartType, 1 > FaceQuadratureType;
1235 std::set< int > sizes;
1236 std::set< int > quadNops;
1238 for(
const auto& entity : space )
1241 if( entity.type().isNone() )
continue;
1244 const int scalarSize = space.basisFunctionSet( entity ).size() / dimRange ;
1245 sizes.insert( scalarSize );
1247 const auto iend = space.gridPart().iend( entity );
1248 for(
auto it = space.gridPart().ibegin( entity ); it != iend; ++it )
1250 for(
const auto& quadOrd : faceQuadOrders )
1252 FaceQuadratureType quad( space.gridPart(), *it, quadOrd, FaceQuadratureType::INSIDE );
1253 quadNops.insert( quad.nop() );
1258 for(
const auto& type : space.indexSet().types(0))
1260 for(
const auto& quadOrd : elemQuadOrders )
1262 ElementQuadratureType quad( type, quadOrd );
1263 quadNops.insert( quad.nop() );
1267 std::string path ( outpath );
1268 path +=
"/autogeneratedcode";
1271 CodegenInfo& info = CodegenInfo::instance();
1272 info.setPath( path );
1275 info.addDimRange( &space, dimRange );
1277 info.addDimRange( &gradSpace, dimGrad );
1279 for(
const auto&
size : sizes )
1281 for(
const auto& quadNop : quadNops )
1283 info.addEntry(
"evalranges",
1284 CodeGeneratorType :: evaluateCodegen, dimDomain, dimRange, quadNop,
size );
1285 info.addEntry(
"evaljacobians",
1286 CodeGeneratorType :: evaluateJacobiansCodegen, dimDomain, dimRange, quadNop,
size );
1287 info.addEntry(
"axpyranges",
1288 CodeGeneratorType :: axpyCodegen, dimDomain, dimRange, quadNop,
size );
1289 info.addEntry(
"axpyjacobians",
1290 CodeGeneratorType :: axpyJacobianCodegen, dimDomain, dimRange, quadNop,
size );
1292 info.addEntry(
"evalranges",
1293 CodeGeneratorType :: evaluateCodegen, dimDomain, dimGrad, quadNop,
size );
1294 info.addEntry(
"evaljacobians",
1295 CodeGeneratorType :: evaluateJacobiansCodegen, dimDomain, dimGrad, quadNop,
size );
1296 info.addEntry(
"axpyranges",
1297 CodeGeneratorType :: axpyCodegen, dimDomain, dimGrad, quadNop,
size );
1298 info.addEntry(
"axpyjacobians",
1299 CodeGeneratorType :: axpyJacobianCodegen, dimDomain, dimRange, quadNop,
size );
1306 info.setFileName( filename );
1307 bool written = info.dumpInfo();
1312 std::cout <<
"Written code to " << filename << std::endl;
1317 std::ofstream file( outpath +
"/" + filename );
1321 std::string header( filename );
1322 size_t size = header.size();
1324 for(
size_t i=0; i<
size; ++i )
1326 if( header[ i ] ==
'.' )
1330 file <<
"#ifndef " << header <<
"_INCLUDED" << std::endl;
1331 file <<
"#define " << header <<
"_INCLUDED" << std::endl;
1332 file <<
"#ifndef USE_BASEFUNCTIONSET_CODEGEN" << std::endl;
1333 file <<
"#define USE_BASEFUNCTIONSET_CODEGEN" << std::endl;
1334 file <<
"#endif" << std::endl;
1335 file <<
"// this is the file containing the necessary includes for the specialized codes" << std::endl;
1336 file <<
"#define DUNE_FEM_INCLUDE_AUTOGENERATEDCODE_FILENAME_SPEC \"" << path <<
"/" << filename <<
"\"" << std::endl;
1337 file <<
"#endif" << std::endl;
1343 std::cout <<
"No changes written to " << filename << std::endl << std::endl;
static DUNE_EXPORT Object & instance(Args &&... args)
return singleton instance of given Object type.
Definition: singleton.hh:123
A few common exception classes.
#define DUNE_THROW(E, m)
Definition: exceptions.hh:218
Dune namespace.
Definition: alignedallocator.hh:13
constexpr std::integral_constant< std::size_t, sizeof...(II)> size(std::integer_sequence< T, II... >)
Return the size of the sequence.
Definition: integersequence.hh:75
default code generator methods
Definition: codegen.hh:368