00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #include "FieldLayout/BinaryBalancer.h"
00028 #include "FieldLayout/FieldLayout.h"
00029 #include "Field/BareField.h"
00030 #include "Utility/PAssert.h"
00031 #include "Profile/Profiler.h"
00032
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00075
00076
00077
00078
00079
00080 template <unsigned Dim>
00081 static int
00082 FindCutAxis(const NDIndex<Dim> &domain, const FieldLayout<Dim> &layout)
00083 {
00084 TAU_TYPE_STRING(taustr, "int (" + CT(domain) + "," + CT(layout) + " )");
00085 TAU_PROFILE("FindCutAxis()", taustr, TAU_LAYOUT);
00086
00087
00088 int cutAxis=-1;
00089
00090 int maxLength=0;
00091
00092 for (int d=0; d<Dim; ++d) {
00093 if (layout.getDistribution(d) != SERIAL ||
00094 layout.getRequestedDistribution(d) != SERIAL) {
00095
00096 int length = domain[d].length();
00097 if ( maxLength < length ) {
00098
00099 cutAxis = d;
00100 maxLength = length;
00101 }
00102 }
00103 }
00104
00105
00106 PAssert(cutAxis>=0);
00107
00108
00109 return cutAxis;
00110 }
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120 template<class RandomIterator, class T>
00121 static RandomIterator
00122 FindMedian(int nprocs,RandomIterator begin, RandomIterator end, T)
00123 {
00124
00125 T w = 0;
00126
00127 TAU_TYPE_STRING(taustr, CT(begin) + " (" + CT(begin) + ", "
00128 + CT(end) + ", " + CT(w) + " )");
00129 TAU_PROFILE("FindMedian()", taustr, TAU_LAYOUT);
00130
00131
00132 if ( nprocs == 1 )
00133 return begin;
00134
00135 int lprocs = nprocs/2;
00136 RandomIterator rp, median;
00137 for (rp=begin; rp!=end; ++rp)
00138 w += *rp;
00139
00140
00141 if ( w==0 )
00142 {
00143
00144
00145 median = begin + ((end-begin)*lprocs)/nprocs;
00146 }
00147 else
00148 {
00149
00150
00151 T w2 = (w*lprocs)/nprocs;
00152
00153 bool found = false;
00154 w = T(0);
00155 for (rp=begin; (rp!=end)&&(!found); ++rp) {
00156
00157 w += *rp;
00158 if (w>=w2) {
00159 found = true;
00160 if ( (w-w2) > (*rp/T(2)) )
00161 median = rp;
00162 else
00163 median = (rp+1 != end) ? rp+1 : rp;
00164 }
00165 }
00166 }
00167
00168 return median;
00169 }
00170
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186 static inline double
00187 PerpReduce(BrickIterator<double,3>& data, int i, int cutAxis)
00188 {
00189 double r=0;
00190 if (cutAxis==0)
00191 {
00192 int l1 = data.size(1);
00193 int l2 = data.size(2);
00194 if ( (l1>0) && (l2>0) )
00195 for (int j2=0; j2<l2; ++j2)
00196 for (int j1=0; j1<l1; ++j1)
00197 r += data.offset(i,j1,j2);
00198 }
00199 else if (cutAxis==1)
00200 {
00201 int l0 = data.size(0);
00202 int l2 = data.size(2);
00203 if ( (l0>0) && (l2>0) )
00204 for (int j2=0; j2<l2; ++j2)
00205 for (int j0=0; j0<l0; ++j0)
00206 r += data.offset(j0,i,j2);
00207 }
00208 else if (cutAxis==2)
00209 {
00210 int l0 = data.size(0);
00211 int l1 = data.size(1);
00212 if ( (l0>0) && (l1>0) )
00213 for (int j1=0; j1<l1; ++j1)
00214 for (int j0=0; j0<l0; ++j0)
00215 r += data.offset(j0,j1,i);
00216 }
00217 return r;
00218 }
00219
00220
00221
00222
00223
00224 static inline double
00225 PerpReduce(BrickIterator<double,2>& data, int i, int cutAxis)
00226 {
00227 double r=0;
00228 if (cutAxis==0)
00229 {
00230 int length = data.size(1);
00231 for (int j=0; j<length; ++j)
00232 r += data.offset(i,j);
00233 }
00234 else
00235 {
00236 int length = data.size(0);
00237 for (int j=0; j<length; ++j)
00238 r += data.offset(j,i);
00239 }
00240 return r;
00241 }
00242
00243
00244
00245
00246
00247
00248
00249 static inline double
00250 PerpReduce(BrickIterator<double,1>& data, int i, int cutAxis)
00251 {
00252 return data.offset(i);
00253 }
00254
00255
00256
00257
00258
00259
00260 template<unsigned Dim>
00261 static void
00262 LocalReduce(double *reduced, int cutAxis, BrickIterator<double,Dim> data)
00263 {
00264 TAU_TYPE_STRING(taustr, "double (double *, int, " + CT(data) + " )");
00265 TAU_PROFILE("LocalReduce()", taustr, TAU_LAYOUT);
00266
00267 int length = data.size(cutAxis);
00268 for (int i=0; i<length; ++i)
00269 reduced[i] = PerpReduce(data,i,cutAxis);
00270 }
00271
00273
00274
00275
00276
00277
00278
00279
00280
00281
00282
00283
00284 template<class IndexIterator, unsigned Dim>
00285 static void
00286 SendReduce(IndexIterator domainsBegin, IndexIterator domainsEnd,
00287 BareField<double,Dim>& weights, int tag)
00288 {
00289 TAU_TYPE_STRING(taustr, "void (" + CT(domainsBegin) + ", "
00290 + CT(domainsEnd) + ", " + CT(weights) + ", int )");
00291 TAU_PROFILE("SendReduce()", taustr, TAU_LAYOUT);
00292
00293
00294 vector<double*> reducedBuffer;
00295 vector<Index> domainBuffer;
00296
00297 int di;
00298 IndexIterator dp;
00299
00300 for (dp=domainsBegin, di=0; dp!=domainsEnd; ++dp, ++di)
00301 {
00302
00303
00304
00305 int cutAxis = FindCutAxis(*dp, weights.getLayout());
00306
00307 typename BareField<double,Dim>::iterator_if lf_p;
00308 for (lf_p=weights.begin_if(); lf_p != weights.end_if(); ++lf_p)
00309 if ( (*dp).touches( (*lf_p).second->getOwned() ) )
00310 {
00311
00312 NDIndex<Dim> intersection =
00313 (*dp).intersect( (*lf_p).second->getOwned() );
00314
00315 int length = intersection[cutAxis].length();
00316 double *reduced = new double[length];
00317
00318
00319 LocalReduce(reduced,cutAxis,(*lf_p).second->begin(intersection));
00320
00321 reducedBuffer.push_back(reduced);
00322 domainBuffer.push_back(intersection[cutAxis]);
00323 }
00324
00325
00326 int nrdomains = reducedBuffer.size();
00327
00328 if ( nrdomains>0 )
00329 {
00330
00331 Message *mess = new Message;
00332
00333 mess->put(nrdomains);
00334
00335 vector<Index>::iterator dbp = domainBuffer.begin();
00336 vector<double*>::iterator rbp = reducedBuffer.begin();
00337 for (int i=0; i<nrdomains; ++i, ++dbp, ++rbp)
00338 {
00339
00340
00341 putMessage(*mess,*dbp);
00342
00343
00344 double *p = *rbp;
00345 mess->setCopy(false).setDelete(true).put(p,p+(*dbp).length());
00346 }
00347
00348 DEBUGMSG("Comm->Send to Node " << di << ", tag=" << tag << endl);
00349 Ippl::Comm->send(mess, di, tag);
00350 }
00351
00352 domainBuffer.erase( domainBuffer.begin(), domainBuffer.end() );
00353 reducedBuffer.erase( reducedBuffer.begin(), reducedBuffer.end() );
00354
00355 }
00356 }
00357
00359
00360
00361
00362
00363
00364
00365
00366 template<unsigned Dim>
00367 static void
00368 ReceiveReduce(NDIndex<Dim>& domain, BareField<double,Dim>& weights,
00369 int reduce_tag, int nprocs,
00370 int& cutLoc, int& cutAxis)
00371 {
00372 TAU_TYPE_STRING(taustr, "void (" + CT(domain)
00373 + ", " + CT(weights) + ", int, int, int )");
00374 TAU_PROFILE("ReceiveReduce()", taustr, TAU_LAYOUT);
00375
00376
00377 cutAxis = FindCutAxis(domain, weights.getLayout());
00378
00379 int i, length = domain[cutAxis].length();
00380 int offset = domain[cutAxis].first();
00381 vector<double> reduced(length);
00382 vector<double> subReduced(length);
00383 for (i=0; i<length; ++i)
00384 reduced[i] = 0;
00385
00386
00387
00388 int expected = 0;
00389 int nodes = Ippl::getNodes();
00390 int mynode = Ippl::myNode();
00391 bool* found_touch = new bool[nodes];
00392 for (i=0; i<nodes; ++i) found_touch[i] = false;
00393
00394 typename BareField<double,Dim>::iterator_if lf_p, lf_end = weights.end_if();
00395 for (lf_p = weights.begin_if();
00396 lf_p != lf_end && !(found_touch[mynode]); ++lf_p) {
00397
00398 if ( (*lf_p).second->getOwned().touches(domain) )
00399 found_touch[mynode] = true;
00400 }
00401
00402 typename FieldLayout<Dim>::touch_iterator_dv rf_p;
00403
00404 typename FieldLayout<Dim>::touch_range_dv range =
00405 weights.getLayout().touch_range_rdv( domain );
00406
00407 for (rf_p = range.first; rf_p != range.second ; ++rf_p) {
00408 int owner = (*((*rf_p).second)).getNode();
00409 found_touch[owner] = true;
00410 }
00411
00412 for (i=0; i<nodes; ++i)
00413 if (found_touch[i]) expected++;
00414 delete [] found_touch;
00415
00416 DEBUGMSG("ReceiveReduce, msgs expected=" << expected << endl);
00417
00418 while ( --expected >= 0 )
00419 {
00420
00421 int any_node = COMM_ANY_NODE;
00422 Message *mess = Ippl::Comm->receive_block(any_node,reduce_tag);
00423 PAssert(mess != 0);
00424 DEBUGMSG("ReceiveReduce: Comm->Receive from Node " << any_node << ", tag=" << reduce_tag << endl);
00425
00426 int received_domains;
00427 mess->get(received_domains);
00428 while ( --received_domains>=0 )
00429 {
00430
00431 Index rdomain;
00432 getMessage( *mess, rdomain );
00433
00434
00435 int rfirst = rdomain.first() - offset;
00436 mess->get(subReduced[rfirst]);
00437
00438 int rlast = rdomain.last() - offset;
00439 for (int i=rfirst; i<=rlast; ++i)
00440 reduced[i] += subReduced[i];
00441 }
00442
00443 delete mess;
00444 }
00445
00446
00447 cutLoc =
00448 FindMedian(nprocs,reduced.begin(),reduced.begin()+length,double())
00449 -reduced.begin() + domain[cutAxis].first();
00450
00451 }
00452
00454
00455
00456
00457
00458
00459
00460 static void
00461 BcastCuts(int cutLoc, int cutAxis, int bcast_tag)
00462 {
00463 TAU_PROFILE("BcastCuts()", "void (int, int, int)", TAU_LAYOUT);
00464
00465
00466 Message *mess = new Message();
00467
00468 DEBUGMSG("Broadcast cutLoc=" << cutLoc << ", cutAxis=" << cutAxis << endl);
00469 mess->put(cutLoc);
00470 mess->put(cutAxis);
00471
00472 Ippl::Comm->broadcast_all(mess,bcast_tag);
00473 }
00474
00476
00477
00478
00479
00480
00481
00482 template<unsigned Dim>
00483 static void
00484 ReceiveCuts(vector< NDIndex<Dim> > &domains,
00485 vector< int >& nprocs,
00486 int bcast_tag)
00487 {
00488 TAU_TYPE_STRING(taustr, "void (" + CT(domains) + ", int)");
00489 TAU_PROFILE("ReceiveCuts()", taustr, TAU_LAYOUT);
00490
00491
00492 int nDomains = domains.size();
00493 vector< NDIndex<Dim> > cutDomains(nDomains*2);
00494 vector<int> cutProcs(vector<int>::size_type(nDomains*2));
00495
00496
00497
00498 for (int expected = 0; expected < nDomains; ++expected)
00499 {
00500
00501
00502
00503 int whichDomain = COMM_ANY_NODE;
00504 int cutLocation, cutAxis;
00505 Message *mess = Ippl::Comm->receive_block(whichDomain,bcast_tag);
00506 PAssert(mess != 0);
00507 DEBUGMSG("ReceiveCuts: received bcast " << expected << endl);
00508 mess->get(cutLocation);
00509 mess->get(cutAxis);
00510 delete mess;
00511
00512
00513 const NDIndex<Dim>& domain = domains[whichDomain];
00514 NDIndex<Dim>& left = cutDomains[ whichDomain*2 ];
00515 NDIndex<Dim>& right = cutDomains[ whichDomain*2+1 ];
00516
00517 left = domain ;
00518 right = domain ;
00519
00520
00521
00522
00523
00524 left[ cutAxis ] = Index( domain[cutAxis].first(), cutLocation-1 );
00525 right[ cutAxis ] = Index( cutLocation, domain[cutAxis].last() );
00526
00527 int procs = nprocs[whichDomain];
00528 cutProcs[ whichDomain*2 ] = procs/2;
00529 cutProcs[ whichDomain*2+1 ] = procs - procs/2;
00530 }
00531
00532
00533
00534 domains.clear();
00535 nprocs.clear();
00536 PAssert(cutProcs.size() == cutDomains.size());
00537 for (int i=0; i<cutProcs.size(); ++i)
00538 {
00539 if ( cutProcs[i] != 0 )
00540 {
00541 domains.push_back(cutDomains[i]);
00542 nprocs.push_back(cutProcs[i]);
00543 }
00544 else
00545 {
00546 PAssert(cutDomains[i].size() == 0);
00547 }
00548 }
00549 }
00550
00552
00553
00554
00555
00556
00557
00558 template<unsigned Dim>
00559 static void
00560 CutEach(vector< NDIndex<Dim> >& domains,
00561 vector< int >& nprocs,
00562 BareField<double,Dim>& weights)
00563 {
00564 TAU_TYPE_STRING(taustr, "void (" + CT(domains) + ", "
00565 + CT(weights) + " )");
00566 TAU_PROFILE("CutEach()", taustr, TAU_LAYOUT);
00567
00568
00569 int reduce_tag = Ippl::Comm->next_tag( F_REDUCE_PERP_TAG , F_TAG_CYCLE );
00570 int bcast_tag = Ippl::Comm->next_tag( F_REDUCE_PERP_TAG , F_TAG_CYCLE );
00571
00572
00573
00574
00575 DEBUGMSG("Do SendReduce" << endl);
00576 SendReduce(domains.begin(),domains.end(),weights,reduce_tag);
00577 DEBUGMSG("Did SendReduce" << endl);
00578
00579
00580
00581 int mynode = Ippl::Comm->myNode();
00582 if ( mynode < domains.size() )
00583 {
00584
00585 int cutAxis, cutLoc;
00586 DEBUGMSG("Do ReceiveReduce" << endl);
00587 ReceiveReduce(domains[mynode],weights,reduce_tag,
00588 nprocs[mynode],cutLoc,cutAxis);
00589 DEBUGMSG("Did ReceiveReduce" << endl);
00590
00591 DEBUGMSG("Do BcastCuts" << endl);
00592 BcastCuts(cutLoc,cutAxis,bcast_tag);
00593 DEBUGMSG("Did BcastCuts" << endl);
00594 }
00595
00596
00597 DEBUGMSG("Do ReceiveCuts" << endl);
00598 ReceiveCuts(domains,nprocs,bcast_tag);
00599 DEBUGMSG("Did ReceiveCuts" << endl);
00600 }
00601
00603
00604 template<unsigned Dim>
00605 NDIndex<Dim>
00606 CalcBinaryRepartition(FieldLayout<Dim>& layout, BareField<double,Dim>& weights)
00607 {
00608
00609 vector< NDIndex<Dim> > domains;
00610 vector<int> procs;
00611
00612 TAU_TYPE_STRING(taustr, CT(domains) + " (" + CT(layout) + ", "
00613 + CT(weights) + " )");
00614 TAU_PROFILE("CalcBinaryRepartition()", taustr, TAU_LAYOUT);
00615
00616
00617
00618
00619
00620
00621 int nprocs = Ippl::Comm->getNodes();
00622 int myproc = Ippl::Comm->myNode();
00623 domains.reserve(nprocs);
00624 procs.reserve(nprocs);
00625
00626 domains.push_back( layout.getDomain() );
00627 procs.push_back( nprocs );
00628
00629
00630 int mprocs=nprocs;
00631
00632
00633 while ( mprocs>1 )
00634 {
00635
00636 DEBUGMSG("Do Cut " << mprocs << endl);
00637 CutEach(domains,procs,weights);
00638 DEBUGMSG("Did Cut " << mprocs << endl);
00639
00640
00641 mprocs = 0;
00642 for (int i=0; i<procs.size(); ++i)
00643 if (mprocs<procs[i]) mprocs = procs[i];
00644 }
00645
00646 return domains.begin()[myproc];
00647 }
00648
00650
00651
00652
00653
00654
00655
00656