5 #ifndef DUNE_ISTL_REPARTITION_HH
6 #define DUNE_ISTL_REPARTITION_HH
50 #if HAVE_PARMETIS && defined(REALTYPEWIDTH)
56 #if HAVE_PARMETIS && defined(IDXTYPEWIDTH)
57 using idx_t = ::idx_t;
58 #elif HAVE_PARMETIS && defined(HAVE_SCOTCH_NUM_TYPE)
59 using idx_t = SCOTCH_Num;
63 using idx_t = std::size_t;
82 template<
class G,
class T1,
class T2>
86 typedef typename IndexSet::LocalIndex::Attribute
Attribute;
91 std::size_t sum=0, needed = graph.noVertices()-indexSet.
size();
92 std::vector<std::size_t> neededall(oocomm.communicator().
size(), 0);
95 for(
int i=0; i<oocomm.communicator().
size(); ++i)
104 auto end = indexSet.end();
105 for(
auto it = indexSet.begin(); it != end; ++it)
111 maxgi=oocomm.communicator().
max(maxgi);
114 for(
int i=0; i<oocomm.communicator().rank(); ++i)
115 maxgi=maxgi+neededall[i];
118 std::map<int,SLList<std::pair<T1,Attribute> > > globalIndices;
120 indexSet.beginResize();
123 const typename IndexSet::IndexPair* pair=lookup.
pair(*
vertex);
126 indexSet.add(maxgi,
typename IndexSet::LocalIndex(*
vertex, OwnerOverlapCopyAttributeSet::owner,
false));
131 indexSet.endResize();
135 oocomm.freeGlobalLookup();
136 oocomm.buildGlobalLookup();
138 std::cout<<
"Holes are filled!"<<std::endl;
139 std::cout<<oocomm.communicator().
rank()<<
": "<<oocomm.
indexSet()<<std::endl;
146 class ParmetisDuneIndexMap
149 template<
class Graph,
class OOComm>
150 ParmetisDuneIndexMap(
const Graph& graph,
const OOComm& com);
151 int toParmetis(
int i)
const
153 return duneToParmetis[i];
155 int toLocalParmetis(
int i)
const
157 return duneToParmetis[i]-base_;
159 int operator[](
int i)
const
161 return duneToParmetis[i];
163 int toDune(
int i)
const
165 return parmetisToDune[i];
167 std::vector<int>::size_type numOfOwnVtx()
const
169 return parmetisToDune.size();
171 Metis::idx_t* vtxDist()
175 int globalOwnerVertices;
178 std::vector<int> duneToParmetis;
179 std::vector<int> parmetisToDune;
181 std::vector<Metis::idx_t> vtxDist_;
184 template<
class G,
class OOComm>
185 ParmetisDuneIndexMap::ParmetisDuneIndexMap(
const G& graph,
const OOComm& oocomm)
186 : duneToParmetis(graph.noVertices(), -1), vtxDist_(oocomm.communicator().
size()+1)
188 int npes=oocomm.communicator().size(), mype=oocomm.communicator().rank();
190 typedef typename OOComm::OwnerSet OwnerSet;
193 auto end = oocomm.indexSet().end();
194 for(
auto index = oocomm.indexSet().begin(); index !=
end; ++index) {
195 if (OwnerSet::contains(index->local().attribute())) {
199 parmetisToDune.resize(numOfOwnVtx);
200 std::vector<int> globalNumOfVtx(npes);
202 MPI_Allgather(&numOfOwnVtx, 1, MPI_INT, &(globalNumOfVtx[0]), 1, MPI_INT, oocomm.communicator());
206 for(
int i=0; i<npes; i++) {
208 base += globalNumOfVtx[i];
210 vtxDist_[i+1] = vtxDist_[i] + globalNumOfVtx[i];
212 globalOwnerVertices=vtxDist_[npes];
216 std::cout << oocomm.communicator().rank()<<
" vtxDist: ";
217 for(
int i=0; i<= npes; ++i)
218 std::cout << vtxDist_[i]<<
" ";
219 std::cout<<std::endl;
226 auto vend = graph.end();
228 const typename OOComm::ParallelIndexSet::IndexPair* index=oocomm.globalLookup().pair(*
vertex);
230 if (OwnerSet::contains(index->local().attribute())) {
232 parmetisToDune[base-base_]=index->local();
233 duneToParmetis[index->local()] = base++;
243 std::cout <<oocomm.communicator().rank()<<
": before ";
244 for(std::size_t i=0; i<duneToParmetis.size(); ++i)
245 std::cout<<duneToParmetis[i]<<
" ";
246 std::cout<<std::endl;
248 oocomm.copyOwnerToAll(duneToParmetis,duneToParmetis);
250 std::cout <<oocomm.communicator().rank()<<
": after ";
251 for(std::size_t i=0; i<duneToParmetis.size(); ++i)
252 std::cout<<duneToParmetis[i]<<
" ";
253 std::cout<<std::endl;
258 struct RedistributeInterface
261 void setCommunicator(MPI_Comm comm)
265 template<
class Flags,
class IS>
266 void buildSendInterface(
const std::vector<int>& toPart,
const IS& idxset)
268 std::map<int,int> sizes;
270 for(
auto i=idxset.begin(), end=idxset.end(); i!=end; ++i)
271 if(Flags::contains(i->local().attribute()))
272 ++sizes[toPart[i->local()]];
275 for(
auto i=sizes.begin(), end=sizes.end(); i!=end; ++i)
276 interfaces()[i->first].first.reserve(i->second);
279 for(
auto i=idxset.begin(), end=idxset.end(); i!=end; ++i)
280 if(Flags::contains(i->local().attribute()))
281 interfaces()[toPart[i->local()]].first.add(i->local());
284 void reserveSpaceForReceiveInterface(
int proc,
int size)
288 void addReceiveIndex(
int proc, std::size_t idx)
292 template<
typename TG>
293 void buildReceiveInterface(std::vector<std::pair<TG,int> >& indices)
296 for(
auto idx=indices.begin(); idx!= indices.end(); ++idx) {
301 ~RedistributeInterface()
318 void createSendBuf(std::vector<GI>& ownerVec, std::set<GI>& overlapVec, std::set<int>& neighbors,
char *sendBuf,
int buffersize, MPI_Comm comm) {
320 std::size_t s=ownerVec.size();
324 MPI_Pack(&s, 1, MPITraits<std::size_t>::getType(), sendBuf, buffersize, &pos, comm);
325 MPI_Pack(&(ownerVec[0]), s, MPITraits<GI>::getType(), sendBuf, buffersize, &pos, comm);
326 s = overlapVec.size();
327 MPI_Pack(&s, 1, MPITraits<std::size_t>::getType(), sendBuf, buffersize, &pos, comm);
328 for(
auto i=overlapVec.begin(), end= overlapVec.end(); i != end; ++i)
329 MPI_Pack(
const_cast<GI*
>(&(*i)), 1, MPITraits<GI>::getType(), sendBuf, buffersize, &pos, comm);
332 MPI_Pack(&s, 1, MPITraits<std::size_t>::getType(), sendBuf, buffersize, &pos, comm);
334 for(
auto i=neighbors.begin(), end= neighbors.end(); i != end; ++i)
335 MPI_Pack(
const_cast<int*
>(&(*i)), 1, MPI_INT, sendBuf, buffersize, &pos, comm);
346 void saveRecvBuf(
char *recvBuf,
int bufferSize, std::vector<std::pair<GI,int> >& ownerVec,
347 std::set<GI>& overlapVec, std::set<int>& neighbors, RedistributeInterface& inf,
int from, MPI_Comm comm) {
351 MPI_Unpack(recvBuf, bufferSize, &pos, &
size, 1, MPITraits<std::size_t>::getType(), comm);
352 inf.reserveSpaceForReceiveInterface(from,
size);
353 ownerVec.reserve(ownerVec.size()+
size);
356 MPI_Unpack(recvBuf, bufferSize, &pos, &gi, 1, MPITraits<GI>::getType(), comm);
357 ownerVec.push_back(std::make_pair(gi,from));
360 MPI_Unpack(recvBuf, bufferSize, &pos, &
size, 1, MPITraits<std::size_t>::getType(), comm);
361 typename std::set<GI>::iterator ipos = overlapVec.begin();
365 MPI_Unpack(recvBuf, bufferSize, &pos, &gi, 1, MPITraits<GI>::getType(), comm);
366 ipos=overlapVec.insert(ipos, gi);
369 MPI_Unpack(recvBuf, bufferSize, &pos, &
size, 1, MPITraits<std::size_t>::getType(), comm);
371 typename std::set<int>::iterator npos = neighbors.begin();
374 MPI_Unpack(recvBuf, bufferSize, &pos, &n, 1, MPI_INT, comm);
375 npos=neighbors.insert(npos, n);
393 void getDomain(
const MPI_Comm& comm, T *part,
int numOfOwnVtx,
int nparts,
int *myDomain, std::vector<int> &domainMapping) {
395 MPI_Comm_size(comm, &npes);
396 MPI_Comm_rank(comm, &mype);
403 std::vector<int> domain(nparts, 0);
404 std::vector<int> assigned(npes, 0);
406 domainMapping.assign(domainMapping.size(), -1);
409 for (i=0; i<numOfOwnVtx; i++) {
413 std::vector<int> domainMatrix(npes * nparts, -1);
416 int *buf =
new int[nparts];
417 for (i=0; i<nparts; i++) {
419 domainMatrix[mype*nparts+i] = domain[i];
422 int src = (mype-1+npes)%npes;
423 int dest = (mype+1)%npes;
425 for (i=0; i<npes-1; i++) {
426 MPI_Sendrecv_replace(buf, nparts, MPI_INT, dest, 0, src, 0, comm, &status);
428 pe = ((mype-1-i)+npes)%npes;
429 for(j=0; j<nparts; j++) {
431 domainMatrix[pe*nparts+j] = buf[j];
439 int maxOccurance = 0;
441 std::set<std::size_t> unassigned;
443 for(i=0; i<nparts; i++) {
444 for(j=0; j<npes; j++) {
446 if (assigned[j]==0) {
447 if (maxOccurance < domainMatrix[j*nparts+i]) {
448 maxOccurance = domainMatrix[j*nparts+i];
456 domainMapping[i] = pe;
466 unassigned.insert(i);
471 typename std::vector<int>::iterator next_free = assigned.begin();
473 for(
auto udomain = unassigned.begin(),
474 end = unassigned.end(); udomain != end; ++udomain)
476 next_free = std::find_if(next_free, assigned.end(), std::bind(std::less<int>(),
std::placeholders::_1, 1));
477 assert(next_free != assigned.end());
478 domainMapping[*udomain] = next_free-assigned.begin();
486 bool operator()(
const T& t1,
const T& t2)
const
504 void mergeVec(std::vector<std::pair<GI, int> >& ownerVec, std::set<GI>& overlapSet) {
508 if(ownerVec.size()>0)
510 auto old=ownerVec.begin();
511 for(
auto i=old+1, end=ownerVec.end(); i != end; old=i++)
513 if(i->first==old->first)
515 std::cerr<<
"Value at index "<<old-ownerVec.begin()<<
" is the same as at index "
516 <<i-ownerVec.begin()<<
" ["<<old->first<<
","<<old->second<<
"]==["
517 <<i->first<<
","<<i->second<<
"]"<<std::endl;
525 auto v=ownerVec.begin(), vend=ownerVec.end();
526 for(
auto s=overlapSet.begin(), send=overlapSet.end(); s!=send;)
528 while(v!=vend && v->first<*s) ++v;
529 if(v!=vend && v->first==*s) {
534 overlapSet.erase(tmp);
554 template<
class OwnerSet,
class Graph,
class IS,
class GI>
555 void getNeighbor(
const Graph& g, std::vector<int>& part,
556 typename Graph::VertexDescriptor vtx,
const IS& indexSet,
557 int toPe, std::set<GI>& neighbor, std::set<int>& neighborProcs) {
558 for(
auto edge=g.beginEdges(vtx), end=g.endEdges(vtx); edge!=end; ++edge)
560 const typename IS::IndexPair* pindex = indexSet.pair(edge.target());
562 if(part[pindex->local()]!=toPe || !OwnerSet::contains(pindex->local().attribute()))
565 neighbor.insert(pindex->global());
566 neighborProcs.insert(part[pindex->local()]);
571 template<
class T,
class I>
572 void my_push_back(std::vector<T>& ownerVec,
const I& index, [[maybe_unused]]
int proc)
574 ownerVec.push_back(index);
577 template<
class T,
class I>
578 void my_push_back(std::vector<std::pair<T,int> >& ownerVec,
const I& index,
int proc)
580 ownerVec.push_back(std::make_pair(index,proc));
583 void reserve(std::vector<T>&, RedistributeInterface&,
int)
586 void reserve(std::vector<std::pair<T,int> >& ownerVec, RedistributeInterface& redist,
int proc)
588 redist.reserveSpaceForReceiveInterface(proc, ownerVec.size());
609 template<
class OwnerSet,
class G,
class IS,
class T,
class GI>
610 void getOwnerOverlapVec(
const G& graph, std::vector<int>& part, IS& indexSet,
611 [[maybe_unused]]
int myPe,
int toPe, std::vector<T>& ownerVec, std::set<GI>& overlapSet,
612 RedistributeInterface& redist, std::set<int>& neighborProcs) {
613 for(
auto index = indexSet.begin(); index != indexSet.end(); ++index) {
615 if(OwnerSet::contains(index->local().attribute()))
617 if(part[index->local()]==toPe)
619 getNeighbor<OwnerSet>(graph, part, index->local(), indexSet,
620 toPe, overlapSet, neighborProcs);
621 my_push_back(ownerVec, index->global(), toPe);
625 reserve(ownerVec, redist, toPe);
636 template<
class F,
class IS>
637 inline bool isOwner(IS& indexSet,
int index) {
639 const typename IS::IndexPair* pindex=indexSet.pair(index);
642 return F::contains(pindex->local().attribute());
646 class BaseEdgeFunctor
649 BaseEdgeFunctor(Metis::idx_t* adj,
const ParmetisDuneIndexMap& data)
650 : i_(), adj_(adj), data_(data)
654 void operator()(
const T& edge)
658 adj_[i_] = data_.toParmetis(edge.target());
669 const ParmetisDuneIndexMap& data_;
674 :
public BaseEdgeFunctor
676 EdgeFunctor(Metis::idx_t* adj,
const ParmetisDuneIndexMap& data, std::size_t)
677 : BaseEdgeFunctor(adj, data)
680 Metis::idx_t* getWeights()
687 template<
class G,
class V,
class E,
class VM,
class EM>
688 class EdgeFunctor<
Dune::Amg::PropertiesGraph<G,V,E,VM,EM> >
689 :
public BaseEdgeFunctor
692 EdgeFunctor(Metis::idx_t* adj,
const ParmetisDuneIndexMap& data, std::size_t s)
693 : BaseEdgeFunctor(adj, data)
695 weight_=
new Metis::idx_t[s];
699 void operator()(
const T& edge)
701 weight_[index()]=edge.properties().depends() ? 3 : 1;
702 BaseEdgeFunctor::operator()(edge);
704 Metis::idx_t* getWeights()
713 Metis::idx_t* weight_;
731 template<
class F,
class G,
class IS,
class EW>
732 void getAdjArrays(G& graph, IS& indexSet, Metis::idx_t *xadj,
736 auto vend = graph.end();
739 if (isOwner<F>(indexSet,*
vertex)) {
742 xadj[j] = ew.index();
744 for(
auto edge =
vertex.begin(); edge != eend; ++edge) {
749 xadj[j] = ew.index();
753 template<
class G,
class T1,
class T2>
754 bool buildCommunication(
const G& graph, std::vector<int>& realparts,
757 RedistributeInterface& redistInf,
760 #ifndef METIS_VER_MAJOR
764 void METIS_PartGraphKway(
int *nvtxs, Metis::idx_t *xadj, Metis::idx_t *adjncy, Metis::idx_t *vwgt,
765 Metis::idx_t *adjwgt,
int *wgtflag,
int *numflag,
int *nparts,
766 int *options,
int *edgecut, Metis::idx_t *part);
768 void METIS_PartGraphRecursive(
int *nvtxs, Metis::idx_t *xadj, Metis::idx_t *adjncy, Metis::idx_t *vwgt,
769 Metis::idx_t *adjwgt,
int *wgtflag,
int *numflag,
int *nparts,
770 int *options,
int *edgecut, Metis::idx_t *part);
775 template<
class S,
class T>
776 inline void print_carray(S& os, T* array, std::size_t l)
778 for(T *cur=array, *end=array+l; cur!=end; ++cur)
782 template<
class S,
class T>
783 inline bool isValidGraph(std::size_t noVtx, std::size_t gnoVtx, S noEdges, T* xadj,
784 T* adjncy,
bool checkSymmetry)
789 for(Metis::idx_t vtx=0; vtx<(Metis::idx_t)noVtx; ++vtx) {
790 if(
static_cast<S
>(xadj[vtx])>noEdges || signbit(xadj[vtx])) {
791 std::cerr <<
"Check graph: xadj["<<vtx<<
"]="<<xadj[vtx]<<
" (>"
792 <<noEdges<<
") out of range!"<<std::endl;
795 if(
static_cast<S
>(xadj[vtx+1])>noEdges || signbit(xadj[vtx+1])) {
796 std::cerr <<
"Check graph: xadj["<<vtx+1<<
"]="<<xadj[vtx+1]<<
" (>"
797 <<noEdges<<
") out of range!"<<std::endl;
801 for(Metis::idx_t i=xadj[vtx]; i< xadj[vtx+1]; ++i) {
802 if(signbit(adjncy[i]) || ((std::size_t)adjncy[i])>gnoVtx) {
803 std::cerr<<
" Edge "<<adjncy[i]<<
" out of range ["<<0<<
","<<noVtx<<
")"
809 for(Metis::idx_t i=xadj[vtx]; i< xadj[vtx+1]; ++i) {
810 Metis::idx_t target=adjncy[i];
813 for(Metis::idx_t j=xadj[target]; j< xadj[target+1]; ++j)
817 std::cerr<<
"Edge ("<<target<<
","<<vtx<<
") "<<i<<
" time"<<std::endl;
826 template<
class M,
class T1,
class T2>
830 RedistributeInterface& redistInf,
833 if(verbose && oocomm.communicator().
rank()==0)
834 std::cout<<
"Repartitioning from "<<oocomm.communicator().
size()
835 <<
" to "<<nparts<<
" parts"<<std::endl;
837 int rank = oocomm.communicator().
rank();
839 int* part =
new int[1];
842 Metis::idx_t* part =
new Metis::idx_t[1];
866 Metis::idx_t *xadj=
new Metis::idx_t[2];
867 Metis::idx_t *vtxdist=
new Metis::idx_t[oocomm.communicator().
size()+1];
868 Metis::idx_t *adjncy=
new Metis::idx_t[noNeighbours];
870 Metis::idx_t *vwgt = 0;
871 Metis::idx_t *adjwgt = 0;
875 for(
int i=0; i<oocomm.communicator().
size(); ++i)
877 vtxdist[oocomm.communicator().
size()]=oocomm.communicator().
size();
880 xadj[1]=noNeighbours;
911 Metis::idx_t* adjp=adjncy;
914 vwgt =
new Metis::idx_t[1];
917 adjwgt =
new Metis::idx_t[noNeighbours];
918 Metis::idx_t* adjwp=adjwgt;
923 if(n->first != rank) {
931 assert(isValidGraph(vtxdist[rank+1]-vtxdist[rank],
932 vtxdist[oocomm.communicator().
size()],
933 noNeighbours, xadj, adjncy,
false));
935 [[maybe_unused]] Metis::idx_t wgtflag=0;
936 Metis::idx_t numflag=0;
937 Metis::idx_t edgecut;
941 Metis::real_t *tpwgts =
new Metis::real_t[nparts];
942 for(
int i=0; i<nparts; ++i)
943 tpwgts[i]=1.0/nparts;
944 MPI_Comm comm=oocomm.communicator();
960 oocomm.communicator().
barrier();
961 if(verbose && oocomm.communicator().
rank()==0)
962 std::cout<<
"Creating comm graph took "<<time.elapsed()<<std::endl;
965 #ifdef PARALLEL_PARTITION
966 Metis::real_t ubvec = 1.15;
968 int options[5] ={ 0,1,15,0,0};
973 ParMETIS_V3_PartKway(vtxdist, xadj, adjncy,
974 vwgt, adjwgt, &wgtflag,
975 &numflag, &ncon, &nparts, tpwgts, &ubvec, options, &edgecut, part,
977 if(verbose && oocomm.communicator().
rank()==0)
978 std::cout<<
"ParMETIS took "<<time.elapsed()<<std::endl;
982 std::size_t gnoedges=0;
984 noedges =
new int[oocomm.communicator().
size()];
985 Dune::dverb<<
"noNeighbours: "<<noNeighbours<<std::endl;
987 MPI_Allgather(&noNeighbours,1,MPI_INT,noedges,1, MPI_INT,oocomm.communicator());
989 if(verbose && oocomm.communicator().
rank()==0)
990 std::cout<<
"Gathering noedges took "<<time1.elapsed()<<std::endl;
993 Metis::idx_t noVertices = vtxdist[oocomm.communicator().
size()];
994 Metis::idx_t *gxadj = 0;
995 Metis::idx_t *gvwgt = 0;
996 Metis::idx_t *gadjncy = 0;
997 Metis::idx_t *gadjwgt = 0;
998 Metis::idx_t *gpart = 0;
1005 std::size_t localNoVtx=vtxdist[rank+1]-vtxdist[rank];
1007 std::size_t gxadjlen = vtxdist[oocomm.communicator().
size()]-vtxdist[0]+oocomm.communicator().
size();
1013 displ =
new int[oocomm.communicator().
size()];
1014 xdispl =
new int[oocomm.communicator().
size()];
1015 noxs =
new int[oocomm.communicator().
size()];
1016 vdispl =
new int[oocomm.communicator().
size()];
1017 novs =
new int[oocomm.communicator().
size()];
1019 for(
int i=0; i < oocomm.communicator().
size(); ++i) {
1020 noxs[i]=vtxdist[i+1]-vtxdist[i]+1;
1021 novs[i]=vtxdist[i+1]-vtxdist[i];
1024 Metis::idx_t *so= vtxdist;
1026 for(
int *xcurr = xdispl, *vcurr = vdispl, *end=vdispl+oocomm.communicator().
size();
1027 vcurr!=end; ++vcurr, ++xcurr, ++so, ++offset) {
1029 *xcurr = offset + *so;
1035 for(
int *curr=noedges, *end=noedges+oocomm.communicator().
size()-1;
1036 curr!=end; ++curr) {
1047 for(
int *curr=noedges, *end=noedges+oocomm.communicator().
size();
1052 Dune::dinfo<<
"gxadjlen: "<<gxadjlen<<
" noVertices: "<<noVertices
1053 <<
" gnoedges: "<<gnoedges<<std::endl;
1054 gxadj =
new Metis::idx_t[gxadjlen];
1055 gpart =
new Metis::idx_t[noVertices];
1057 gvwgt =
new Metis::idx_t[noVertices];
1058 gadjwgt =
new Metis::idx_t[gnoedges];
1060 gadjncy =
new Metis::idx_t[gnoedges];
1063 if(verbose && oocomm.communicator().
rank()==0)
1064 std::cout<<
"Preparing global graph took "<<time1.elapsed()<<std::endl;
1068 MPI_Allgatherv(xadj,2,MPITraits<Metis::idx_t>::getType(),
1069 gxadj,noxs,xdispl,MPITraits<Metis::idx_t>::getType(),
1071 MPI_Allgatherv(adjncy,noNeighbours,MPITraits<Metis::idx_t>::getType(),
1072 gadjncy,noedges,displ,MPITraits<Metis::idx_t>::getType(),
1075 MPI_Allgatherv(adjwgt,noNeighbours,MPITraits<Metis::idx_t>::getType(),
1076 gadjwgt,noedges,displ,MPITraits<Metis::idx_t>::getType(),
1078 MPI_Allgatherv(vwgt,localNoVtx,MPITraits<Metis::idx_t>::getType(),
1079 gvwgt,novs,vdispl,MPITraits<Metis::idx_t>::getType(),
1082 if(verbose && oocomm.communicator().
rank()==0)
1083 std::cout<<
"Gathering global graph data took "<<time1.elapsed()<<std::endl;
1093 Metis::idx_t increment = vtxdist[1];
1094 Metis::idx_t *start=gxadj+1;
1095 for(
int i=1; i<oocomm.communicator().
size(); ++i) {
1097 int lprev = vtxdist[i]-vtxdist[i-1];
1098 int l = vtxdist[i+1]-vtxdist[i];
1100 assert((start+l+offset)-gxadj<=
static_cast<Metis::idx_t
>(gxadjlen));
1101 increment = *(start-1);
1102 std::transform(start+offset, start+l+offset, start, std::bind(std::plus<Metis::idx_t>(),
std::placeholders::_1, increment));
1116 if(verbose && oocomm.communicator().
rank()==0)
1117 std::cout<<
"Postprocessing global graph data took "<<time1.elapsed()<<std::endl;
1120 assert(isValidGraph(noVertices, noVertices, gnoedges,
1121 gxadj, gadjncy,
true));
1124 if(verbose && oocomm.communicator().
rank()==0)
1125 std::cout<<
"Creating grah one 1 process took "<<time.elapsed()<<std::endl;
1127 #if METIS_VER_MAJOR >= 5
1128 Metis::idx_t ncon = 1;
1129 Metis::idx_t moptions[METIS_NOPTIONS];
1130 METIS_SetDefaultOptions(moptions);
1131 moptions[METIS_OPTION_NUMBERING] = numflag;
1132 METIS_PartGraphRecursive(&noVertices, &ncon, gxadj, gadjncy, gvwgt, NULL, gadjwgt,
1133 &nparts, NULL, NULL, moptions, &edgecut, gpart);
1135 int options[5] = {0, 1, 1, 3, 3};
1137 METIS_PartGraphRecursive(&noVertices, gxadj, gadjncy, gvwgt, gadjwgt, &wgtflag,
1138 &numflag, &nparts, options, &edgecut, gpart);
1141 if(verbose && oocomm.communicator().
rank()==0)
1142 std::cout<<
"METIS took "<<time.elapsed()<<std::endl;
1156 MPI_Scatter(gpart, 1, MPITraits<Metis::idx_t>::getType(), part, 1,
1157 MPITraits<Metis::idx_t>::getType(), 0, comm);
1181 Dune::dinfo<<
" repart "<<rank <<
" -> "<< part[0]<<std::endl;
1183 std::vector<int> realpart(mat.
N(), part[0]);
1188 if(verbose && oocomm.communicator().
rank()==0)
1189 std::cout<<
"Scattering repartitioning took "<<time.elapsed()<<std::endl;
1193 oocomm.buildGlobalLookup(mat.
N());
1196 if(verbose && oocomm.communicator().
rank()==0)
1197 std::cout<<
"Filling index set took "<<time.elapsed()<<std::endl;
1202 noNeighbours = oocomm.communicator().
sum(noNeighbours)
1203 / oocomm.communicator().
size();
1204 if(oocomm.communicator().
rank()==0)
1205 std::cout<<
"Average no neighbours was "<<noNeighbours<<std::endl;
1207 bool ret = buildCommunication(graph, realpart, oocomm, outcomm, redistInf,
1209 if(verbose && oocomm.communicator().
rank()==0)
1210 std::cout<<
"Building index sets took "<<time.elapsed()<<std::endl;
1232 template<
class G,
class T1,
class T2>
1235 RedistributeInterface& redistInf,
1240 MPI_Comm comm=oocomm.communicator();
1241 oocomm.buildGlobalLookup(graph.noVertices());
1244 if(verbose && oocomm.communicator().
rank()==0)
1245 std::cout<<
"Filling holes took "<<time.
elapsed()<<std::endl;
1252 double t1=0.0, t2=0.0, t3=0.0, t4=0.0, tSum=0.0;
1257 int mype = oocomm.communicator().
rank();
1259 assert(nparts<=
static_cast<Metis::idx_t
>(oocomm.communicator().
size()));
1280 typedef typename OOComm::OwnerSet OwnerSet;
1285 ParmetisDuneIndexMap indexMap(graph,oocomm);
1286 Metis::idx_t *part =
new Metis::idx_t[indexMap.numOfOwnVtx()];
1287 for(std::size_t i=0; i < indexMap.numOfOwnVtx(); ++i)
1291 if(oocomm.communicator().
rank()==0 && nparts>1)
1292 std::cerr<<
"ParMETIS not activated. Will repartition to 1 domain instead of requested "
1293 <<nparts<<
" domains."<<std::endl;
1300 Metis::idx_t *xadj =
new Metis::idx_t[indexMap.numOfOwnVtx()+1];
1301 Metis::idx_t *adjncy =
new Metis::idx_t[graph.noEdges()];
1302 EdgeFunctor<G> ef(adjncy, indexMap, graph.noEdges());
1303 getAdjArrays<OwnerSet>(graph, oocomm.globalLookup(), xadj, ef);
1309 Metis::idx_t numflag=0, wgtflag=0, options[3], edgecut=0, ncon=1;
1311 Metis::real_t *tpwgts =
new Metis::real_t[nparts];
1312 for(
int i=0; i<nparts; ++i)
1313 tpwgts[i]=1.0/nparts;
1314 Metis::real_t ubvec[1];
1322 wgtflag = (ef.getWeights()!=NULL) ? 1 : 0;
1330 std::cout<<std::endl;
1331 std::cout<<
"Testing ParMETIS_V3_PartKway with options[1-2] = {"
1332 <<options[1]<<
" "<<options[2]<<
"}, Ncon: "
1333 <<ncon<<
", Nparts: "<<nparts<<std::endl;
1344 oocomm.communicator().
barrier();
1345 if(oocomm.communicator().
rank()==0)
1346 std::cout<<
"Preparing for parmetis took "<<time.
elapsed()<<std::endl;
1353 ParMETIS_V3_PartKway(indexMap.vtxDist(), xadj, adjncy,
1354 NULL, ef.getWeights(), &wgtflag,
1355 &numflag, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, part, &
const_cast<MPI_Comm&
>(comm));
1366 std::cout<<std::endl;
1367 std::cout<<
"ParMETIS_V3_PartKway reported a cut of "<<edgecut<<std::endl;
1368 std::cout<<std::endl;
1370 std::cout<<mype<<
": PARMETIS-Result: ";
1371 for(
int i=0; i < indexMap.vtxDist()[mype+1]-indexMap.vtxDist()[mype]; ++i) {
1372 std::cout<<part[i]<<
" ";
1374 std::cout<<std::endl;
1375 std::cout<<
"Testing ParMETIS_V3_PartKway with options[1-2] = {"
1376 <<options[1]<<
" "<<options[2]<<
"}, Ncon: "
1377 <<ncon<<
", Nparts: "<<nparts<<std::endl;
1388 oocomm.communicator().
barrier();
1389 if(oocomm.communicator().
rank()==0)
1390 std::cout<<
"Parmetis took "<<time.
elapsed()<<std::endl;
1397 for(std::size_t i=0; i<indexMap.numOfOwnVtx(); ++i)
1407 std::vector<int> domainMapping(nparts);
1409 getDomain(comm, part, indexMap.numOfOwnVtx(), nparts, &myDomain, domainMapping);
1414 std::cout<<mype<<
": myDomain: "<<myDomain<<std::endl;
1415 std::cout<<mype<<
": DomainMapping: ";
1416 for(
auto j : range(nparts)) {
1417 std::cout<<
" do: "<<j<<
" pe: "<<domainMapping[j]<<
" ";
1419 std::cout<<std::endl;
1426 std::vector<int> setPartition(oocomm.
indexSet().
size(), -1);
1430 if(OwnerSet::contains(index->local().attribute())) {
1431 setPartition[index->local()]=domainMapping[part[i++]];
1441 bool ret = buildCommunication(graph, setPartition, oocomm, outcomm, redistInf,
1444 oocomm.communicator().
barrier();
1445 if(oocomm.communicator().
rank()==0)
1446 std::cout<<
"Creating indexsets took "<<time.
elapsed()<<std::endl;
1453 template<
class G,
class T1,
class T2>
1454 bool buildCommunication(
const G& graph,
1457 RedistributeInterface& redistInf,
1461 typedef typename OOComm::OwnerSet OwnerSet;
1466 redistInf.buildSendInterface<OwnerSet>(setPartition, oocomm.
indexSet());
1492 int npes = oocomm.communicator().
size();
1495 std::set<int> recvFrom;
1501 int mype = oocomm.communicator().
rank();
1504 std::set<int> tsendTo;
1505 for(
auto i=setPartition.begin(), iend = setPartition.end(); i!=iend; ++i)
1508 noSendTo = tsendTo.size();
1509 sendTo =
new int[noSendTo];
1511 for(
auto i=tsendTo.begin(); i != tsendTo.end(); ++i, ++idx)
1516 int* gnoSend=
new int[oocomm.communicator().
size()];
1517 int* gsendToDispl =
new int[oocomm.communicator().
size()+1];
1519 MPI_Allgather(&noSendTo, 1, MPI_INT, gnoSend, 1,
1520 MPI_INT, oocomm.communicator());
1523 int totalNoRecv = 0;
1524 for(
int i=0; i<npes; ++i)
1525 totalNoRecv += gnoSend[i];
1527 int *gsendTo =
new int[totalNoRecv];
1531 for(
int i=0; i<npes; ++i)
1532 gsendToDispl[i+1]=gsendToDispl[i]+gnoSend[i];
1535 MPI_Allgatherv(sendTo, noSendTo, MPI_INT, gsendTo, gnoSend, gsendToDispl,
1536 MPI_INT, oocomm.communicator());
1539 for(
int proc=0; proc < npes; ++proc)
1540 for(
int i=gsendToDispl[proc]; i < gsendToDispl[proc+1]; ++i)
1541 if(gsendTo[i]==mype)
1542 recvFrom.insert(proc);
1544 bool existentOnNextLevel = recvFrom.size()>0;
1548 delete[] gsendToDispl;
1553 if(recvFrom.size()) {
1554 std::cout<<mype<<
": recvFrom: ";
1555 for(
auto i=recvFrom.begin(); i!= recvFrom.end(); ++i) {
1560 std::cout<<std::endl<<std::endl;
1561 std::cout<<mype<<
": sendTo: ";
1562 for(
int i=0; i<noSendTo; i++) {
1563 std::cout<<sendTo[i]<<
" ";
1565 std::cout<<std::endl<<std::endl;
1569 if(oocomm.communicator().
rank()==0)
1570 std::cout<<
" Communicating the receive information took "<<
1571 time.elapsed()<<std::endl;
1585 typedef typename OOComm::ParallelIndexSet::GlobalIndex GI;
1586 typedef std::vector<GI> GlobalVector;
1587 std::vector<std::pair<GI,int> > myOwnerVec;
1588 std::set<GI> myOverlapSet;
1589 GlobalVector sendOwnerVec;
1590 std::set<GI> sendOverlapSet;
1591 std::set<int> myNeighbors;
1596 char **sendBuffers=
new char*[noSendTo];
1597 MPI_Request *requests =
new MPI_Request[noSendTo];
1600 for(
int i=0; i < noSendTo; ++i) {
1602 sendOwnerVec.clear();
1603 sendOverlapSet.clear();
1606 std::set<int> neighbors;
1607 getOwnerOverlapVec<OwnerSet>(graph, setPartition, oocomm.globalLookup(),
1608 mype, sendTo[i], sendOwnerVec, sendOverlapSet, redistInf,
1614 MPI_Pack_size(1, MPITraits<std::size_t>::getType(), oocomm.communicator(), &buffersize);
1615 MPI_Pack_size(sendOwnerVec.size(), MPITraits<GI>::getType(), oocomm.communicator(), &tsize);
1617 MPI_Pack_size(1, MPITraits<std::size_t>::getType(), oocomm.communicator(), &tsize);
1619 MPI_Pack_size(sendOverlapSet.size(), MPITraits<GI>::getType(), oocomm.communicator(), &tsize);
1620 buffersize += tsize;
1621 MPI_Pack_size(1, MPITraits<std::size_t>::getType(), oocomm.communicator(), &tsize);
1622 buffersize += tsize;
1623 MPI_Pack_size(neighbors.size(), MPI_INT, oocomm.communicator(), &tsize);
1624 buffersize += tsize;
1626 sendBuffers[i] =
new char[buffersize];
1629 std::cout<<mype<<
" sending "<<sendOwnerVec.size()<<
" owner and "<<
1630 sendOverlapSet.size()<<
" overlap to "<<sendTo[i]<<
" buffersize="<<buffersize<<std::endl;
1632 createSendBuf(sendOwnerVec, sendOverlapSet, neighbors, sendBuffers[i], buffersize, oocomm.communicator());
1633 MPI_Issend(sendBuffers[i], buffersize, MPI_PACKED, sendTo[i], 99, oocomm.communicator(), requests+i);
1637 oocomm.communicator().
barrier();
1638 if(oocomm.communicator().
rank()==0)
1639 std::cout<<
" Creating sends took "<<
1640 time.elapsed()<<std::endl;
1645 int noRecv = recvFrom.size();
1646 int oldbuffersize=0;
1651 MPI_Probe(MPI_ANY_SOURCE, 99, oocomm.communicator(), &stat);
1653 MPI_Get_count(&stat, MPI_PACKED, &buffersize);
1655 if(oldbuffersize<buffersize) {
1658 recvBuf =
new char[buffersize];
1659 oldbuffersize = buffersize;
1661 MPI_Recv(recvBuf, buffersize, MPI_PACKED, stat.MPI_SOURCE, 99, oocomm.communicator(), &stat);
1662 saveRecvBuf(recvBuf, buffersize, myOwnerVec, myOverlapSet, myNeighbors, redistInf,
1663 stat.MPI_SOURCE, oocomm.communicator());
1672 MPI_Status *statuses =
new MPI_Status[noSendTo];
1673 int send = MPI_Waitall(noSendTo, requests, statuses);
1676 if(send==MPI_ERR_IN_STATUS) {
1677 std::cerr<<mype<<
": Error in sending :"<<std::endl;
1679 for(
int i=0; i< noSendTo; i++)
1680 if(statuses[i].MPI_ERROR!=MPI_SUCCESS) {
1683 MPI_Error_string(statuses[i].MPI_ERROR, message, &messageLength);
1684 std::cerr<<
" source="<<statuses[i].MPI_SOURCE<<
" message: ";
1685 for(
int j = 0; j < messageLength; j++)
1686 std::cout<<message[j];
1688 std::cerr<<std::endl;
1692 oocomm.communicator().
barrier();
1693 if(oocomm.communicator().
rank()==0)
1694 std::cout<<
" Receiving and saving took "<<
1695 time.elapsed()<<std::endl;
1699 for(
int i=0; i < noSendTo; ++i)
1700 delete[] sendBuffers[i];
1702 delete[] sendBuffers;
1706 redistInf.setCommunicator(oocomm.communicator());
1717 if (!existentOnNextLevel) {
1719 color= MPI_UNDEFINED;
1721 MPI_Comm outputComm;
1723 MPI_Comm_split(oocomm.communicator(), color, oocomm.communicator().
rank(), &outputComm);
1727 int newrank=outcomm->communicator().rank();
1728 int *newranks=
new int[oocomm.communicator().
size()];
1729 std::vector<int> tneighbors;
1730 tneighbors.reserve(myNeighbors.size());
1732 typename OOComm::ParallelIndexSet& outputIndexSet = outcomm->indexSet();
1734 MPI_Allgather(&newrank, 1, MPI_INT, newranks, 1,
1735 MPI_INT, oocomm.communicator());
1738 std::cout<<oocomm.communicator().
rank()<<
" ";
1739 for(
auto i=myNeighbors.begin(), end=myNeighbors.end();
1741 assert(newranks[*i]>=0);
1742 std::cout<<*i<<
"->"<<newranks[*i]<<
" ";
1743 tneighbors.push_back(newranks[*i]);
1745 std::cout<<std::endl;
1747 for(
auto i=myNeighbors.begin(), end=myNeighbors.end();
1749 tneighbors.push_back(newranks[*i]);
1753 myNeighbors.clear();
1756 oocomm.communicator().
barrier();
1757 if(oocomm.communicator().
rank()==0)
1758 std::cout<<
" Calculating new neighbours ("<<tneighbors.size()<<
") took "<<
1759 time.elapsed()<<std::endl;
1764 outputIndexSet.beginResize();
1767 std::sort(myOwnerVec.begin(), myOwnerVec.end(), SortFirst());
1772 using LocalIndexT =
typename OOComm::ParallelIndexSet::LocalIndex;
1773 for(
auto g=myOwnerVec.begin(), end =myOwnerVec.end(); g!=end; ++g, ++i ) {
1774 outputIndexSet.add(g->first,LocalIndexT(i, OwnerOverlapCopyAttributeSet::owner,
true));
1775 redistInf.addReceiveIndex(g->second, i);
1779 oocomm.communicator().
barrier();
1780 if(oocomm.communicator().
rank()==0)
1781 std::cout<<
" Adding owner indices took "<<
1782 time.elapsed()<<std::endl;
1791 mergeVec(myOwnerVec, myOverlapSet);
1795 myOwnerVec.swap(myOwnerVec);
1798 oocomm.communicator().
barrier();
1799 if(oocomm.communicator().
rank()==0)
1800 std::cout<<
" Merging indices took "<<
1801 time.elapsed()<<std::endl;
1807 for(
auto g=myOverlapSet.begin(), end=myOverlapSet.end(); g!=end; ++g, i++) {
1808 outputIndexSet.add(*g,LocalIndexT(i, OwnerOverlapCopyAttributeSet::copy,
true));
1810 myOverlapSet.clear();
1811 outputIndexSet.endResize();
1813 #ifdef DUNE_ISTL_WITH_CHECKING
1815 auto end = outputIndexSet.end();
1816 for(
auto index = outputIndexSet.begin(); index != end; ++index) {
1817 if (OwnerSet::contains(index->local().attribute())) {
1821 numOfOwnVtx = oocomm.communicator().
sum(numOfOwnVtx);
1828 std::is_sorted(outputIndexSet.begin(), outputIndexSet.end(),
1829 [](
const auto& v1,
const auto& v2){ return v1.global() < v2.global();});
1832 oocomm.communicator().
barrier();
1833 if(oocomm.communicator().
rank()==0)
1834 std::cout<<
" Adding overlap indices took "<<
1835 time.elapsed()<<std::endl;
1840 if(color != MPI_UNDEFINED) {
1841 outcomm->remoteIndices().setNeighbours(tneighbors);
1842 outcomm->remoteIndices().template rebuild<true>();
1850 oocomm.communicator().
barrier();
1851 if(oocomm.communicator().
rank()==0)
1852 std::cout<<
" Storing indexsets took "<<
1853 time.elapsed()<<std::endl;
1859 tSum = t1 + t2 + t3 + t4;
1860 std::cout<<std::endl
1861 <<mype<<
": WTime for step 1): "<<t1
1869 return color!=MPI_UNDEFINED;
1873 template<
class G,
class P,
class T1,
class T2,
class R>
1875 std::shared_ptr<P>& outcomm,
1879 if(nparts!=oocomm.size())
1880 DUNE_THROW(NotImplemented,
"only available for MPI programs");
1884 template<
class G,
class P,
class T1,
class T2,
class R>
1885 bool commGraphRepartition(
const G& graph, P& oocomm,
int nparts,
1886 std::shared_ptr<P>& outcomm,
1890 if(nparts!=oocomm.size())
1891 DUNE_THROW(NotImplemented,
"only available for MPI programs");
The (undirected) graph of a matrix.
Definition: graph.hh:51
T max(const T &in) const
Compute the maximum of the argument over all processes and return the result in every process....
Definition: mpicommunication.hh:257
int barrier() const
Wait until all processes have arrived at this point in the program.
Definition: mpicommunication.hh:272
int rank() const
Return rank, is between 0 and size()-1.
Definition: mpicommunication.hh:133
T sum(const T &in) const
Compute the sum of the argument over all processes and return the result in every process....
Definition: mpicommunication.hh:208
int size() const
Number of processes in set, is greater than 0.
Definition: mpicommunication.hh:139
Index Set Interface base class.
Definition: indexidset.hh:78
auto size(GeometryType type) const
Return total number of entities of given geometry type in entity set .
Definition: indexidset.hh:223
MPI_Comm communicator_
The MPI communicator we use.
Definition: interface.hh:325
size_type N() const
Return the number of rows.
Definition: matrix.hh:691
A class setting up standard communication for a two-valued attribute set with owner/overlap/copy sema...
Definition: owneroverlapcopy.hh:174
const ParallelIndexSet & indexSet() const
Get the underlying parallel index set.
Definition: owneroverlapcopy.hh:462
void copyCopyToAll(const T &source, T &dest) const
Communicate values from copy data points to all other data points.
Definition: owneroverlapcopy.hh:328
const RemoteIndices & remoteIndices() const
Get the underlying remote indices.
Definition: owneroverlapcopy.hh:471
void copyOwnerToAll(const T &source, T &dest) const
Communicate values from owner data points to all other data points.
Definition: owneroverlapcopy.hh:311
Manager class for the mapping between local indices and globally unique indices.
Definition: indexset.hh:218
LocalIndex::Attribute Attribute
The type of the attribute.
Definition: remoteindices.hh:227
A simple stop watch.
Definition: timer.hh:43
void reset() noexcept
Reset timer while keeping the running/stopped state.
Definition: timer.hh:57
double elapsed() const noexcept
Get elapsed user-time from last reset until now/last stop in seconds.
Definition: timer.hh:77
Provides utility classes for syncing distributed data via MPI communication.
Provides a map between global and local indices.
Classes providing communication interfaces for overlapping Schwarz methods.
Classes for building sets out of enumeration values.
Provides classes for building the matrix graph.
const IndexPair * pair(const std::size_t &local) const
Get the index pair corresponding to a local index.
size_t size() const
Get the total number (public and nonpublic) indices.
void repairLocalIndexPointers(std::map< int, SLList< std::pair< typename T::GlobalIndex, typename T::LocalIndex::Attribute >, A > > &globalMap, RemoteIndices< T, A1 > &remoteIndices, const T &indexSet)
Repair the pointers to the local indices in the remote indices.
Definition: indicessyncer.hh:495
iterator begin()
Get an iterator over the indices positioned at the first index.
iterator end()
Get an iterator over the indices positioned after the last index.
const_iterator end() const
Get an iterator over all remote index lists.
Definition: remoteindices.hh:1528
const InformationMap & interfaces() const
Get information about the interfaces.
Definition: interface.hh:433
int neighbours() const
Get the number of processors we share indices with.
Definition: remoteindices.hh:1445
void storeGlobalIndicesOfRemoteIndices(std::map< int, SLList< std::pair< typename T::GlobalIndex, typename T::LocalIndex::Attribute >, A > > &globalMap, const RemoteIndices< T, A1 > &remoteIndices)
Stores the corresponding global indices of the remote index information.
Definition: indicessyncer.hh:469
const_iterator begin() const
Get an iterator over all remote index lists.
Definition: remoteindices.hh:1521
constexpr index_constant< 1 > _1
Compile time index with value 1.
Definition: indices.hh:55
typename FieldTraits< Type >::real_type real_t
Convenient access to FieldTraits<Type>::real_type.
Definition: typetraits.hh:301
#define DUNE_THROW(E, m)
Definition: exceptions.hh:218
constexpr GeometryType vertex
GeometryType representing a vertex.
Definition: type.hh:492
concept IndexSet
Model of an index set.
Definition: indexidset.hh:44
constexpr auto max
Function object that returns the greater of the given values.
Definition: hybridutilities.hh:484
DInfoType dinfo(std::cout)
Stream for informative output.
Definition: stdstreams.hh:140
DVerbType dverb(std::cout)
Singleton of verbose debug stream.
Definition: stdstreams.hh:116
Class for adding missing indices of a distributed index set in a local communication.
Traits classes for mapping types onto MPI_Datatype.
Dune namespace.
Definition: alignedallocator.hh:13
void fillIndexSetHoles(const G &graph, Dune::OwnerOverlapCopyCommunication< T1, T2 > &oocomm)
Fills the holes in an index set.
Definition: repartition.hh:83
constexpr std::integral_constant< std::size_t, sizeof...(II)> size(std::integer_sequence< T, II... >)
Return the size of the sequence.
Definition: integersequence.hh:75
bool graphRepartition(const G &graph, Dune::OwnerOverlapCopyCommunication< T1, T2 > &oocomm, Metis::idx_t nparts, std::shared_ptr< Dune::OwnerOverlapCopyCommunication< T1, T2 >> &outcomm, RedistributeInterface &redistInf, bool verbose=false)
execute a graph repartition for a giving graph and indexset.
Definition: repartition.hh:1233
Utilities for reduction like operations on ranges.
Classes describing a distributed indexset.
Standard Dune debug streams.
A traits class describing the mapping of types onto MPI_Datatypes.
Definition: mpitraits.hh:41
@ nonoverlapping
Category for non-overlapping solvers.
Definition: solvercategory.hh:27
static Category category(const OP &op, decltype(op.category()) *=nullptr)
Helperfunction to extract the solver category either from an enum, or from the newly introduced virtu...
Definition: solvercategory.hh:34