dune-grid  2.5-git
torus.hh
Go to the documentation of this file.
1 // -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 // vi: set et ts=4 sw=2 sts=2:
3 #ifndef DUNE_GRID_YASPGRID_TORUS_HH
4 #define DUNE_GRID_YASPGRID_TORUS_HH
5 
6 #include <array>
7 #include <bitset>
8 #include <cmath>
9 #include <deque>
10 #include <iostream>
11 #include <vector>
12 
13 #if HAVE_MPI
14 #include <mpi.h>
15 #endif
16 
17 #include <dune/common/binaryfunctions.hh>
19 
20 #include "partitioning.hh"
21 
26 namespace Dune
27 {
28 
42  template<class CollectiveCommunication, int d>
43  class Torus {
44  public:
46  typedef std::array<int, d> iTupel;
47 
48 
49  private:
50  struct CommPartner {
51  int rank;
52  iTupel delta;
53  int index;
54  };
55 
56  struct CommTask {
57  int rank; // process to send to / receive from
58  void *buffer; // buffer to send / receive
59  int size; // size of buffer
60 #if HAVE_MPI
61  MPI_Request request; // used by MPI to handle request
62 #else
63  int request;
64 #endif
65  int flag; // used by MPI
66  };
67 
68  public:
70  Torus ()
71  {}
72 
74  Torus (CollectiveCommunication comm, int tag, iTupel size, const YLoadBalance<d>* lb)
75  : _comm(comm), _tag(tag)
76  {
77  // determine dimensions
78  lb->loadbalance(size, _comm.size(), _dims);
79 
80  // compute increments for lexicographic ordering
81  int inc = 1;
82  for (int i=0; i<d; i++)
83  {
84  _increment[i] = inc;
85  inc *= _dims[i];
86  }
87 
88  // check whether the load balancing matches the size of the communicator
89  if (inc != _comm.size())
90  DUNE_THROW(Dune::Exception, "Communicator size and result of the given load balancer do not match!");
91 
92  // make full schedule
93  proclists();
94  }
95 
97  int rank () const
98  {
99  return _comm.rank();
100  }
101 
103  iTupel coord () const
104  {
105  return rank_to_coord(_comm.rank());
106  }
107 
109  int procs () const
110  {
111  return _comm.size();
112  }
113 
115  const iTupel & dims () const
116  {
117  return _dims;
118  }
119 
121  int dims (int i) const
122  {
123  return _dims[i];
124  }
125 
127  CollectiveCommunication comm () const
128  {
129  return _comm;
130  }
131 
133  int tag () const
134  {
135  return _tag;
136  }
137 
139  bool inside (iTupel c) const
140  {
141  for (int i=d-1; i>=0; i--)
142  if (c[i]<0 || c[i]>=_dims[i]) return false;
143  return true;
144  }
145 
147  iTupel rank_to_coord (int rank) const
148  {
149  iTupel coord;
150  rank = rank%_comm.size();
151  for (int i=d-1; i>=0; i--)
152  {
153  coord[i] = rank/_increment[i];
154  rank = rank%_increment[i];
155  }
156  return coord;
157  }
158 
160  int coord_to_rank (iTupel coord) const
161  {
162  for (int i=0; i<d; i++) coord[i] = coord[i]%_dims[i];
163  int rank = 0;
164  for (int i=0; i<d; i++) rank += coord[i]*_increment[i];
165  return rank;
166  }
167 
169  int rank_relative (int rank, int dir, int cnt) const
170  {
171  iTupel coord = rank_to_coord(rank);
172  coord[dir] = (coord[dir]+_dims[dir]+cnt)%_dims[dir];
173  return coord_to_rank(coord);
174  }
175 
177  int color (const iTupel & coord) const
178  {
179  int c = 0;
180  int power = 1;
181 
182  // interior coloring
183  for (int i=0; i<d; i++)
184  {
185  if (coord[i]%2==1) c += power;
186  power *= 2;
187  }
188 
189  // extra colors for boundary processes
190  for (int i=0; i<d; i++)
191  {
192  if (_dims[i]>1 && coord[i]==_dims[i]-1) c += power;
193  power *= 2;
194  }
195 
196  return c;
197  }
198 
200  int color (int rank) const
201  {
202  return color(rank_to_coord(rank));
203  }
204 
206  int neighbors () const
207  {
208  int n=1;
209  for (int i=0; i<d; ++i)
210  n *= 3;
211  return n-1;
212  }
213 
215  bool is_neighbor (iTupel delta, std::bitset<d> periodic) const
216  {
217  iTupel coord = rank_to_coord(_comm.rank()); // my own coordinate with 0 <= c_i < dims_i
218 
219  for (int i=0; i<d; i++)
220  {
221  if (delta[i]<0)
222  {
223  // if I am on the boundary and domain is not periodic => no neighbor
224  if (coord[i]==0 && periodic[i]==false) return false;
225  }
226  if (delta[i]>0)
227  {
228  // if I am on the boundary and domain is not periodic => no neighbor
229  if (coord[i]==_dims[i]-1 && periodic[i]==false) return false;
230  }
231  }
232  return true;
233  }
234 
242  double partition (int rank, iTupel origin_in, iTupel size_in, iTupel& origin_out, iTupel& size_out) const
243  {
244  iTupel coord = rank_to_coord(rank);
245  double maxsize = 1;
246  double sz = 1;
247 
248  // make a tensor product partition
249  for (int i=0; i<d; i++)
250  {
251  // determine
252  int m = size_in[i]/_dims[i];
253  int r = size_in[i]%_dims[i];
254 
255  sz *= size_in[i];
256 
257  if (coord[i]<_dims[i]-r)
258  {
259  origin_out[i] = origin_in[i] + coord[i]*m;
260  size_out[i] = m;
261  maxsize *= m;
262  }
263  else
264  {
265  origin_out[i] = origin_in[i] + (_dims[i]-r)*m + (coord[i]-(_dims[i]-r))*(m+1);
266  size_out[i] = m+1;
267  maxsize *= m+1;
268  }
269  }
270  return maxsize/(sz/_comm.size());
271  }
272 
280  public:
282  ProcListIterator (typename std::deque<CommPartner>::const_iterator iter)
283  {
284  i = iter;
285  }
286 
288  int rank () const
289  {
290  return i->rank;
291  }
292 
294  iTupel delta () const
295  {
296  return i->delta;
297  }
298 
300  int index () const
301  {
302  return i->index;
303  }
304 
306  int distance () const
307  {
308  int dist = 0;
309  iTupel delta=i->delta;
310  for (int j=0; j<d; ++j)
311  dist += std::abs(delta[j]);
312  return dist;
313  }
314 
316  bool operator== (const ProcListIterator& iter)
317  {
318  return i == iter.i;
319  }
320 
321 
323  bool operator!= (const ProcListIterator& iter)
324  {
325  return i != iter.i;
326  }
327 
329  ProcListIterator& operator++ ()
330  {
331  ++i;
332  return *this;
333  }
334 
335  private:
336  typename std::deque<CommPartner>::const_iterator i;
337  };
338 
340  ProcListIterator sendbegin () const
341  {
342  return ProcListIterator(_sendlist.begin());
343  }
344 
346  ProcListIterator sendend () const
347  {
348  return ProcListIterator(_sendlist.end());
349  }
350 
352  ProcListIterator recvbegin () const
353  {
354  return ProcListIterator(_recvlist.begin());
355  }
356 
358  ProcListIterator recvend () const
359  {
360  return ProcListIterator(_recvlist.end());
361  }
362 
364  void send (int rank, void* buffer, int size) const
365  {
366  CommTask task;
367  task.rank = rank;
368  task.buffer = buffer;
369  task.size = size;
370  if (rank!=_comm.rank())
371  _sendrequests.push_back(task);
372  else
373  _localsendrequests.push_back(task);
374  }
375 
377  void recv (int rank, void* buffer, int size) const
378  {
379  CommTask task;
380  task.rank = rank;
381  task.buffer = buffer;
382  task.size = size;
383  if (rank!=_comm.rank())
384  _recvrequests.push_back(task);
385  else
386  _localrecvrequests.push_back(task);
387  }
388 
390  void exchange () const
391  {
392  // handle local requests first
393  if (_localsendrequests.size()!=_localrecvrequests.size())
394  {
395  std::cout << "[" << rank() << "]: ERROR: local sends/receives do not match in exchange!" << std::endl;
396  return;
397  }
398  for (unsigned int i=0; i<_localsendrequests.size(); i++)
399  {
400  if (_localsendrequests[i].size!=_localrecvrequests[i].size)
401  {
402  std::cout << "[" << rank() << "]: ERROR: size in local sends/receive does not match in exchange!" << std::endl;
403  return;
404  }
405  memcpy(_localrecvrequests[i].buffer,_localsendrequests[i].buffer,_localsendrequests[i].size);
406  }
407  _localsendrequests.clear();
408  _localrecvrequests.clear();
409 
410 #if HAVE_MPI
411  // handle foreign requests
412  int sends=0;
413  int recvs=0;
414 
415  // issue sends to foreign processes
416  for (unsigned int i=0; i<_sendrequests.size(); i++)
417  if (_sendrequests[i].rank!=rank())
418  {
419  // std::cout << "[" << rank() << "]" << " send " << _sendrequests[i].size << " bytes "
420  // << "to " << _sendrequests[i].rank << " p=" << _sendrequests[i].buffer << std::endl;
421  MPI_Isend(_sendrequests[i].buffer, _sendrequests[i].size, MPI_BYTE,
422  _sendrequests[i].rank, _tag, _comm, &(_sendrequests[i].request));
423  _sendrequests[i].flag = false;
424  sends++;
425  }
426 
427  // issue receives from foreign processes
428  for (unsigned int i=0; i<_recvrequests.size(); i++)
429  if (_recvrequests[i].rank!=rank())
430  {
431  // std::cout << "[" << rank() << "]" << " recv " << _recvrequests[i].size << " bytes "
432  // << "fm " << _recvrequests[i].rank << " p=" << _recvrequests[i].buffer << std::endl;
433  MPI_Irecv(_recvrequests[i].buffer, _recvrequests[i].size, MPI_BYTE,
434  _recvrequests[i].rank, _tag, _comm, &(_recvrequests[i].request));
435  _recvrequests[i].flag = false;
436  recvs++;
437  }
438 
439  // poll sends
440  while (sends>0)
441  {
442  for (unsigned int i=0; i<_sendrequests.size(); i++)
443  if (!_sendrequests[i].flag)
444  {
445  MPI_Status status;
446  MPI_Test( &(_sendrequests[i].request), &(_sendrequests[i].flag), &status);
447  if (_sendrequests[i].flag)
448  {
449  sends--;
450  // std::cout << "[" << rank() << "]" << " send to " << _sendrequests[i].rank << " OK" << std::endl;
451  }
452  }
453  }
454 
455  // poll receives
456  while (recvs>0)
457  {
458  for (unsigned int i=0; i<_recvrequests.size(); i++)
459  if (!_recvrequests[i].flag)
460  {
461  MPI_Status status;
462  MPI_Test( &(_recvrequests[i].request), &(_recvrequests[i].flag), &status);
463  if (_recvrequests[i].flag)
464  {
465  recvs--;
466  // std::cout << "[" << rank() << "]" << " recv fm " << _recvrequests[i].rank << " OK" << std::endl;
467  }
468 
469  }
470  }
471 
472  // clear request buffers
473  _sendrequests.clear();
474  _recvrequests.clear();
475 #endif
476  }
477 
479  double global_max (double x) const
480  {
481  double res = 0.0;
482  _comm.template allreduce<Dune::Max<double>,double>(&x, &res, 1);
483  return res;
484  }
485 
487  void print (std::ostream& s) const
488  {
489  s << "[" << rank() << "]: Torus " << procs() << " processor(s) arranged as " << dims() << std::endl;
490  for (ProcListIterator i=sendbegin(); i!=sendend(); ++i)
491  {
492  s << "[" << rank() << "]: send to "
493  << "rank=" << i.rank()
494  << " index=" << i.index()
495  << " delta=" << i.delta() << " dist=" << i.distance() << std::endl;
496  }
497  for (ProcListIterator i=recvbegin(); i!=recvend(); ++i)
498  {
499  s << "[" << rank() << "]: recv from "
500  << "rank=" << i.rank()
501  << " index=" << i.index()
502  << " delta=" << i.delta() << " dist=" << i.distance() << std::endl;
503  }
504  }
505 
506  private:
507 
508  void proclists ()
509  {
510  // compile the full neighbor list
511  CommPartner cp;
512  iTupel delta;
513 
514  std::fill(delta.begin(), delta.end(), -1);
515  bool ready = false;
516  iTupel me, nb;
517  me = rank_to_coord(_comm.rank());
518  int index = 0;
519  int last = neighbors()-1;
520  while (!ready)
521  {
522  // find neighbors coordinates
523  for (int i=0; i<d; i++)
524  nb[i] = ( me[i]+_dims[i]+delta[i] ) % _dims[i];
525 
526  // find neighbors rank
527  int nbrank = coord_to_rank(nb);
528 
529  // check if delta is not zero
530  for (int i=0; i<d; i++)
531  if (delta[i]!=0)
532  {
533  cp.rank = nbrank;
534  cp.delta = delta;
535  cp.index = index;
536  _recvlist.push_back(cp);
537  cp.index = last-index;
538  _sendlist.push_front(cp);
539  index++;
540  break;
541  }
542 
543  // next neighbor
544  ready = true;
545  for (int i=0; i<d; i++)
546  if (delta[i]<1)
547  {
548  (delta[i])++;
549  ready=false;
550  break;
551  }
552  else
553  {
554  delta[i] = -1;
555  }
556  }
557 
558  }
559 
560  CollectiveCommunication _comm;
561 
562  iTupel _dims;
563  iTupel _increment;
564  int _tag;
565  std::deque<CommPartner> _sendlist;
566  std::deque<CommPartner> _recvlist;
567 
568  mutable std::vector<CommTask> _sendrequests;
569  mutable std::vector<CommTask> _recvrequests;
570  mutable std::vector<CommTask> _localsendrequests;
571  mutable std::vector<CommTask> _localrecvrequests;
572 
573  };
574 
576  template <class CollectiveCommunication, int d>
577  inline std::ostream& operator<< (std::ostream& s, const Torus<CollectiveCommunication, d> & t)
578  {
579  t.print(s);
580  return s;
581  }
582 }
583 
584 #endif
int rank() const
return rank of neighboring process
Definition: torus.hh:288
Definition: torus.hh:279
CollectiveCommunication comm() const
return communicator
Definition: torus.hh:127
ProcListIterator sendend() const
end of send list
Definition: torus.hh:346
iTupel rank_to_coord(int rank) const
map rank to coordinate in torus using lexicographic ordering
Definition: torus.hh:147
int rank() const
return own rank
Definition: torus.hh:97
int procs() const
return number of processes
Definition: torus.hh:109
void send(int rank, void *buffer, int size) const
store a send request; buffers are sent in order; handles also local requests with memcpy ...
Definition: torus.hh:364
int neighbors() const
return the number of neighbors, which is
Definition: torus.hh:206
ProcListIterator recvend() const
last process in receive list
Definition: torus.hh:358
int index() const
return index in proclist
Definition: torus.hh:300
void recv(int rank, void *buffer, int size) const
store a receive request; buffers are received in order; handles also local requests with memcpy ...
Definition: torus.hh:377
bool is_neighbor(iTupel delta, std::bitset< d > periodic) const
return true if neighbor with given delta is a neighbor under the given periodicity ...
Definition: torus.hh:215
double partition(int rank, iTupel origin_in, iTupel size_in, iTupel &origin_out, iTupel &size_out) const
partition the given grid onto the torus and return the piece of the process with given rank; returns ...
Definition: torus.hh:242
double global_max(double x) const
global max
Definition: torus.hh:479
const iTupel & dims() const
return dimensions of torus
Definition: torus.hh:115
void abs(const DofVectorPointer< int > &dofVector)
Definition: dofvector.hh:326
int rank_relative(int rank, int dir, int cnt) const
return rank of process where its coordinate in direction dir has offset cnt (handles periodic case) ...
Definition: torus.hh:169
This file provides tools to partition YaspGrids. If you want to write your own partitioner, inherit from YLoadBalance and implement the loadbalance() method. You can also browse this file for already available useful partitioners, like YaspFixedSizePartitioner.
ProcListIterator sendbegin() const
first process in send list
Definition: torus.hh:340
int distance() const
return 1-norm of distance vector
Definition: torus.hh:306
iTupel coord() const
return own coordinates
Definition: torus.hh:103
bool inside(iTupel c) const
return true if coordinate is inside torus
Definition: torus.hh:139
int coord_to_rank(iTupel coord) const
map coordinate in torus to rank using lexicographic ordering
Definition: torus.hh:160
Definition: torus.hh:43
ProcListIterator(typename std::deque< CommPartner >::const_iterator iter)
make an iterator
Definition: torus.hh:282
std::array< int, d > iTupel
type used to pass tupels in and out
Definition: torus.hh:46
Torus()
constructor making uninitialized object
Definition: torus.hh:70
void print(std::ostream &s) const
print contents of torus object
Definition: torus.hh:487
int color(int rank) const
assign color to given rank
Definition: torus.hh:200
Include standard header files.
Definition: agrid.hh:59
int dims(int i) const
return dimensions of torus in direction i
Definition: torus.hh:121
virtual void loadbalance(const iTupel &, int, iTupel &) const =0
a base class for the yaspgrid partitioning strategy The name might be irritating. It will probably ch...
Definition: partitioning.hh:23
ProcListIterator recvbegin() const
first process in receive list
Definition: torus.hh:352
void exchange() const
exchange messages stored in request buffers; clear request buffers afterwards
Definition: torus.hh:390
int color(const iTupel &coord) const
assign color to given coordinate
Definition: torus.hh:177
int tag() const
return tag used by torus
Definition: torus.hh:133
Torus(CollectiveCommunication comm, int tag, iTupel size, const YLoadBalance< d > *lb)
make partitioner from communicator and coarse mesh size
Definition: torus.hh:74
iTupel delta() const
return distance vector
Definition: torus.hh:294