00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #include "FFT/FFT.h"
00027 #include "FieldLayout/FieldLayout.h"
00028 #include "Field/BareField.h"
00029 #include "Utility/IpplStats.h"
00030
00031 #ifdef IPPL_PRINTDEBUG
00032 #define FFTDBG(x) x
00033 #else
00034 #define FFTDBG(x)
00035 #endif
00036
00037
00043
00044
00045
00046
00052 template <unsigned Dim, class T>
00053 FFT<CCTransform,Dim,T>::FFT(
00054 const typename FFT<CCTransform,Dim,T>::Domain_t& cdomain,
00055 const bool transformTheseDims[Dim],
00056 const bool& compressTemps)
00057 : FFTBase<Dim,T>(FFT<CCTransform,Dim,T>::ccFFT, cdomain,
00058 transformTheseDims, compressTemps)
00059 {
00060
00061
00062 TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
00063 TAU_TYPE_STRING(tautype,
00064 "(" + CT(cdomain) + ", const bool [], const bool&)");
00065 TAU_PROFILE(tauname, tautype, TAU_FFT);
00066
00067
00068 unsigned nTransformDims = this->numTransformDims();
00069 int* lengths = new int[nTransformDims];
00070 unsigned d;
00071 for (d=0; d<nTransformDims; ++d)
00072 lengths[d] = cdomain[this->activeDimension(d)].length();
00073
00074
00075 int* transformTypes = new int[nTransformDims];
00076 T& normFact = this->getNormFact();
00077 normFact = 1.0;
00078 for (d=0; d<nTransformDims; ++d) {
00079 transformTypes[d] = FFTBase<Dim,T>::ccFFT;
00080 normFact /= lengths[d];
00081 }
00082
00083
00084 this->getEngine().setup(nTransformDims, transformTypes, lengths);
00085 delete [] transformTypes;
00086 delete [] lengths;
00087
00088 setup();
00089 }
00090
00091
00092
00097 template <unsigned Dim, class T>
00098 void
00099 FFT<CCTransform,Dim,T>::setup(void)
00100 {
00101
00102 TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::setup");
00103 TAU_TYPE_STRING(tautype, "(void)");
00104 TAU_PROFILE(tauname, tautype, TAU_FFT);
00105
00106 unsigned d, activeDim;
00107 unsigned nTransformDims = this->numTransformDims();
00108
00109 e_dim_tag serialParallel[Dim];
00110
00111 serialParallel[0] = SERIAL;
00112
00113 for (d=1; d<Dim; ++d)
00114 serialParallel[d] = PARALLEL;
00115
00116 tempLayouts_m = new Layout_t*[nTransformDims];
00117 tempFields_m = new ComplexField_t*[nTransformDims];
00118
00119
00120 for (unsigned dim=0; dim<nTransformDims; ++dim) {
00121
00122 activeDim = this->activeDimension(dim);
00123
00124 const Domain_t& ndic = this->getDomain();
00125
00126 Domain_t ndip;
00127 ndip[0] = ndic[activeDim];
00128 for (d=1; d<Dim; ++d) {
00129 int nextDim = activeDim + d;
00130 if (nextDim >= Dim) nextDim -= Dim;
00131 ndip[d] = ndic[nextDim];
00132 }
00133
00134 tempLayouts_m[dim] = new Layout_t(ndip, serialParallel, this->transVnodes());
00135
00136 tempFields_m[dim] = new ComplexField_t(*tempLayouts_m[dim]);
00137
00138 if (!this->compressTemps()) (*tempFields_m[dim]).Uncompress();
00139 }
00140
00141 return;
00142 }
00143
00144
00145
00146
00147
00148 template <unsigned Dim, class T>
00149 FFT<CCTransform,Dim,T>::~FFT(void) {
00150
00151
00152 TAU_TYPE_STRING(tauname, CT(*this) + "::~FFT");
00153 TAU_TYPE_STRING(tautype, "(void)");
00154 TAU_PROFILE(tauname, tautype, TAU_FFT);
00155
00156
00157 unsigned nTransformDims = this->numTransformDims();
00158 for (unsigned d=0; d<nTransformDims; ++d) {
00159 delete tempFields_m[d];
00160 delete tempLayouts_m[d];
00161 }
00162 delete [] tempFields_m;
00163 delete [] tempLayouts_m;
00164 }
00165
00166
00167
00168
00169
00170
00171 template <unsigned Dim, class T>
00172 void
00173 FFT<CCTransform,Dim,T>::transform(
00174 int direction,
00175 typename FFT<CCTransform,Dim,T>::ComplexField_t& f,
00176 typename FFT<CCTransform,Dim,T>::ComplexField_t& g,
00177 const bool& constInput)
00178 {
00179
00180 TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
00181 TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ", const bool&)");
00182 TAU_PROFILE(tauname, tautype, TAU_FFT);
00183 TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
00184 TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
00185
00186
00187
00188
00189
00190 const Layout_t& in_layout = f.getLayout();
00191 const Domain_t& in_dom = in_layout.getDomain();
00192 const Layout_t& out_layout = g.getLayout();
00193 const Domain_t& out_dom = out_layout.getDomain();
00194 PAssert( checkDomain(this->getDomain(),in_dom) &&
00195 checkDomain(this->getDomain(),out_dom) );
00196
00197
00198 unsigned d;
00199 int idim;
00200 int begdim, enddim;
00201 unsigned nTransformDims = this->numTransformDims();
00202
00203 ComplexField_t* temp = &f;
00204
00205 Complex_t* localdata;
00206
00207
00208 begdim = (direction == +1) ? 0 : (nTransformDims-1);
00209 enddim = (direction == +1) ? nTransformDims : -1;
00210 for (idim = begdim; idim != enddim; idim += direction) {
00211 TAU_PROFILE_START(timer_swap);
00212
00213
00214 bool skipTranspose = false;
00215
00216
00217 if (idim == begdim && !constInput) {
00218
00219 const Domain_t& first_dom = tempLayouts_m[idim]->getDomain();
00220
00221
00222 skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
00223 (in_dom[0].length() == first_dom[0].length()) &&
00224 (in_layout.getDistribution(0) == SERIAL) &&
00225 (f.getGC() == FFT<CCTransform,Dim,T>::nullGC) );
00226 }
00227
00228
00229
00230 if (idim == enddim-direction) {
00231
00232 const Domain_t& last_dom = tempLayouts_m[idim]->getDomain();
00233
00234
00235 skipTranspose = ( (out_dom[0].sameBase(last_dom[0])) &&
00236 (out_dom[0].length() == last_dom[0].length()) &&
00237 (out_layout.getDistribution(0) == SERIAL) &&
00238 (g.getGC() == FFT<CCTransform,Dim,T>::nullGC) );
00239 }
00240
00241 if (!skipTranspose) {
00242
00243 (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] =
00244 (*temp)[temp->getLayout().getDomain()];
00245
00246
00247 if (this->compressTemps() && temp != &f) *temp = 0;
00248 temp = tempFields_m[idim];
00249 }
00250 else if (idim == enddim-direction && temp != &g) {
00251
00252
00253
00254
00255 g[out_dom] = (*temp)[temp->getLayout().getDomain()];
00256
00257
00258 if (this->compressTemps() && temp != &f) *temp = 0;
00259 temp = &g;
00260 }
00261 TAU_PROFILE_STOP(timer_swap);
00262
00263 TAU_PROFILE_START(timer_fft);
00264
00265 typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
00266 for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
00267
00268
00269 ComplexLField_t* ldf = (*l_i).second.get();
00270
00271 ldf->Uncompress();
00272
00273 localdata = ldf->getP();
00274
00275
00276 int nstrips = 1, length = ldf->size(0);
00277 for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
00278 for (int istrip=0; istrip<nstrips; ++istrip) {
00279
00280 this->getEngine().callFFT(idim, direction, localdata);
00281
00282 localdata += length;
00283 }
00284 }
00285 TAU_PROFILE_STOP(timer_fft);
00286 }
00287
00288
00289 if (temp != &g) {
00290 TAU_PROFILE_START(timer_swap);
00291
00292 g[out_dom] = (*temp)[temp->getLayout().getDomain()];
00293 if (this->compressTemps() && temp != &f) *temp = 0;
00294 TAU_PROFILE_STOP(timer_swap);
00295 }
00296
00297
00298 if (direction == +1)
00299 g *= Complex_t(this->getNormFact(), 0.0);
00300
00301 return;
00302 }
00303
00304
00305
00306
00307
00308 template <unsigned Dim, class T>
00309 void
00310 FFT<CCTransform,Dim,T>::transform(
00311 int direction,
00312 typename FFT<CCTransform,Dim,T>::ComplexField_t& f)
00313 {
00314
00315 TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
00316 TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ")");
00317 TAU_PROFILE(tauname, tautype, TAU_FFT);
00318 TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
00319 TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
00320
00321
00322
00323
00324
00325 const Layout_t& in_layout = f.getLayout();
00326 const Domain_t& in_dom = in_layout.getDomain();
00327 PAssert(checkDomain(this->getDomain(),in_dom));
00328
00329
00330 unsigned d;
00331 int idim;
00332 int begdim, enddim;
00333 unsigned nTransformDims = this->numTransformDims();
00334
00335 ComplexField_t* temp = &f;
00336
00337 Complex_t* localdata;
00338
00339
00340 begdim = (direction == +1) ? 0 : (nTransformDims-1);
00341 enddim = (direction == +1) ? nTransformDims : -1;
00342 for (idim = begdim; idim != enddim; idim += direction) {
00343 TAU_PROFILE_START(timer_swap);
00344
00345
00346 bool skipTranspose = false;
00347
00348
00349 if (idim == begdim) {
00350
00351 const Domain_t& first_dom = tempLayouts_m[idim]->getDomain();
00352
00353
00354 skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
00355 (in_dom[0].length() == first_dom[0].length()) &&
00356 (in_layout.getDistribution(0) == SERIAL) &&
00357 (f.getGC() == FFT<CCTransform,Dim,T>::nullGC) );
00358 }
00359
00360
00361
00362 if (idim == enddim-direction) {
00363
00364 const Domain_t& last_dom = tempLayouts_m[idim]->getDomain();
00365
00366
00367 skipTranspose = ( (in_dom[0].sameBase(last_dom[0])) &&
00368 (in_dom[0].length() == last_dom[0].length()) &&
00369 (in_layout.getDistribution(0) == SERIAL) &&
00370 (f.getGC() == FFT<CCTransform,Dim,T>::nullGC) );
00371 }
00372
00373 if (!skipTranspose) {
00374
00375 (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] =
00376 (*temp)[temp->getLayout().getDomain()];
00377
00378
00379 if (this->compressTemps() && temp != &f) *temp = 0;
00380 temp = tempFields_m[idim];
00381 }
00382 else if (idim == enddim-direction && temp != &f) {
00383
00384
00385
00386
00387 f[in_dom] = (*temp)[temp->getLayout().getDomain()];
00388
00389
00390 if (this->compressTemps()) *temp = 0;
00391 temp = &f;
00392 }
00393 TAU_PROFILE_STOP(timer_swap);
00394
00395 TAU_PROFILE_START(timer_fft);
00396
00397 typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
00398 for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
00399
00400
00401 ComplexLField_t* ldf = (*l_i).second.get();
00402
00403 ldf->Uncompress();
00404
00405 localdata = ldf->getP();
00406
00407
00408 int nstrips = 1, length = ldf->size(0);
00409 for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
00410 for (int istrip=0; istrip<nstrips; ++istrip) {
00411
00412 this->getEngine().callFFT(idim, direction, localdata);
00413
00414 localdata += length;
00415 }
00416 }
00417 TAU_PROFILE_STOP(timer_fft);
00418
00419 }
00420
00421
00422 if (temp != &f) {
00423 TAU_PROFILE_START(timer_swap);
00424
00425 f[in_dom] = (*temp)[temp->getLayout().getDomain()];
00426 if (this->compressTemps()) *temp = 0;
00427 TAU_PROFILE_STOP(timer_swap);
00428 }
00429
00430
00431 if (direction == +1)
00432 f *= Complex_t(this->getNormFact(), 0.0);
00433
00434 return;
00435 }
00436
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447 template <class T>
00448 FFT<CCTransform,1U,T>::FFT(
00449 const typename FFT<CCTransform,1U,T>::Domain_t& cdomain,
00450 const bool transformTheseDims[1U], const bool& compressTemps)
00451 : FFTBase<1U,T>(FFT<CCTransform,1U,T>::ccFFT, cdomain,
00452 transformTheseDims, compressTemps)
00453 {
00454
00455
00456 TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
00457 TAU_TYPE_STRING(tautype, "(" + CT(cdomain) + ", const bool [], const bool&)");
00458 TAU_PROFILE(tauname, tautype, TAU_FFT);
00459
00460 unsigned nTransformDims = 1U;
00461
00462 int length;
00463 length = cdomain[0].length();
00464
00465
00466 int transformType;
00467 transformType = FFTBase<1U,T>::ccFFT;
00468 T& normFact = this->getNormFact();
00469 normFact = 1.0 / length;
00470
00471
00472 this->getEngine().setup(nTransformDims, &transformType, &length);
00473
00474 setup();
00475 }
00476
00477
00478
00479
00480
00481
00482 template <class T>
00483 FFT<CCTransform,1U,T>::FFT(
00484 const typename FFT<CCTransform,1U,T>::Domain_t& cdomain,
00485 const bool& compressTemps)
00486 : FFTBase<1U,T>(FFT<CCTransform,1U,T>::ccFFT, cdomain, compressTemps)
00487 {
00488
00489
00490 TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
00491 TAU_TYPE_STRING(tautype, "(" + CT(cdomain) + ", const bool&)");
00492 TAU_PROFILE(tauname, tautype, TAU_FFT);
00493
00494
00495 int length;
00496 length = cdomain[0].length();
00497
00498
00499 int transformType;
00500 transformType = FFTBase<1U,T>::ccFFT;
00501 T& normFact = this->getNormFact();
00502 normFact = 1.0 / length;
00503
00504
00505 this->getEngine().setup(1U, &transformType, &length);
00506
00507 setup();
00508 }
00509
00510
00511
00512
00513
00514
00515 template <class T>
00516 void
00517 FFT<CCTransform,1U,T>::setup(void)
00518 {
00519
00520 TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::setup");
00521 TAU_TYPE_STRING(tautype, "(void)");
00522 TAU_PROFILE(tauname, tautype, TAU_FFT);
00523
00524
00525 const Domain_t& ndic = this->getDomain();
00526
00527 tempLayouts_m = new Layout_t(ndic[0], PARALLEL, 1);
00528
00529 tempFields_m = new ComplexField_t(*tempLayouts_m);
00530
00531 if (!this->compressTemps()) tempFields_m->Uncompress();
00532
00533 return;
00534 }
00535
00536
00537
00538
00539
00540 template <class T>
00541 FFT<CCTransform,1U,T>::~FFT(void) {
00542
00543
00544 TAU_TYPE_STRING(tauname, CT(*this) + "::~FFT");
00545 TAU_TYPE_STRING(tautype, "(void)");
00546 TAU_PROFILE(tauname, tautype, TAU_FFT);
00547
00548
00549 delete tempFields_m;
00550 delete tempLayouts_m;
00551 }
00552
00553
00554
00555
00556
00557
00558 template <class T>
00559 void
00560 FFT<CCTransform,1U,T>::transform(
00561 int direction,
00562 typename FFT<CCTransform,1U,T>::ComplexField_t& f,
00563 typename FFT<CCTransform,1U,T>::ComplexField_t& g,
00564 const bool& constInput)
00565 {
00566
00567 TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
00568 TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ", const bool&)");
00569 TAU_PROFILE(tauname, tautype, TAU_FFT);
00570 TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
00571 TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
00572
00573
00574
00575
00576
00577 const Layout_t& in_layout = f.getLayout();
00578 const Domain_t& in_dom = in_layout.getDomain();
00579 const Layout_t& out_layout = g.getLayout();
00580 const Domain_t& out_dom = out_layout.getDomain();
00581 PAssert( checkDomain(this->getDomain(),in_dom) &&
00582 checkDomain(this->getDomain(),out_dom) );
00583
00584
00585 ComplexField_t* temp = &f;
00586
00587 Complex_t* localdata;
00588
00589 TAU_PROFILE_START(timer_swap);
00590
00591
00592
00593 const Domain_t& temp_dom = tempLayouts_m->getDomain();
00594
00595 bool skipTranspose = false;
00596
00597
00598 if (!constInput) {
00599
00600
00601 skipTranspose = ( (in_dom[0].sameBase(temp_dom[0])) &&
00602 (in_dom[0].length() == temp_dom[0].length()) &&
00603 (in_layout.numVnodes() == 1) &&
00604 (f.getGC() == FFT<CCTransform,1U,T>::nullGC) );
00605 }
00606
00607 bool skipFinal;
00608
00609
00610
00611
00612
00613 skipFinal = ( (out_dom[0].sameBase(temp_dom[0])) &&
00614 (out_dom[0].length() == temp_dom[0].length()) &&
00615 (out_layout.numVnodes() == 1) &&
00616 (g.getGC() == FFT<CCTransform,1U,T>::nullGC) );
00617
00618 if (!skipTranspose) {
00619
00620 (*tempFields_m) = (*temp);
00621 temp = tempFields_m;
00622 }
00623 if (skipFinal) {
00624
00625
00626
00627
00628 g = (*temp);
00629
00630
00631 if (this->compressTemps() && temp != &f) *temp = 0;
00632 temp = &g;
00633 }
00634 TAU_PROFILE_STOP(timer_swap);
00635
00636 TAU_PROFILE_START(timer_fft);
00637
00638
00639 typename ComplexField_t::const_iterator_if l_i = temp->begin_if();
00640 if (l_i != temp->end_if()) {
00641
00642 ComplexLField_t* ldf = (*l_i).second.get();
00643
00644 ldf->Uncompress();
00645
00646 localdata = ldf->getP();
00647
00648
00649 int length = ldf->size(0);
00650
00651 this->getEngine().callFFT(0, direction, localdata);
00652 }
00653
00654 TAU_PROFILE_STOP(timer_fft);
00655
00656
00657 if (temp != &g) {
00658 TAU_PROFILE_START(timer_swap);
00659
00660 g = (*temp);
00661 if (this->compressTemps() && temp != &f) *temp = 0;
00662 TAU_PROFILE_STOP(timer_swap);
00663 }
00664
00665
00666 if (direction == +1)
00667 g *= Complex_t(this->getNormFact(), 0.0);
00668
00669 return;
00670 }
00671
00672
00673
00674
00675
00676 template <class T>
00677 void
00678 FFT<CCTransform,1U,T>::transform(
00679 int direction,
00680 typename FFT<CCTransform,1U,T>::ComplexField_t& f)
00681 {
00682
00683 TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
00684 TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ")");
00685 TAU_PROFILE(tauname, tautype, TAU_FFT);
00686 TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
00687 TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
00688
00689
00690
00691
00692
00693 const Layout_t& in_layout = f.getLayout();
00694 const Domain_t& in_dom = in_layout.getDomain();
00695 PAssert(checkDomain(this->getDomain(),in_dom));
00696
00697
00698 ComplexField_t* temp = &f;
00699
00700 Complex_t* localdata;
00701
00702 TAU_PROFILE_START(timer_swap);
00703
00704
00705
00706 const Domain_t& temp_dom = tempLayouts_m->getDomain();
00707
00708 bool skipTranspose;
00709
00710
00711
00712
00713
00714 skipTranspose = ( (in_dom[0].sameBase(temp_dom[0])) &&
00715 (in_dom[0].length() == temp_dom[0].length()) &&
00716 (in_layout.numVnodes() == 1) &&
00717 (f.getGC() == FFT<CCTransform,1U,T>::nullGC) );
00718
00719 if (!skipTranspose) {
00720
00721 (*tempFields_m) = (*temp);
00722 temp = tempFields_m;
00723 }
00724 TAU_PROFILE_STOP(timer_swap);
00725
00726 TAU_PROFILE_START(timer_fft);
00727
00728
00729 typename ComplexField_t::const_iterator_if l_i = temp->begin_if();
00730 if (l_i != temp->end_if()) {
00731
00732 ComplexLField_t* ldf = (*l_i).second.get();
00733
00734 ldf->Uncompress();
00735
00736 localdata = ldf->getP();
00737
00738
00739 int length = ldf->size(0);
00740
00741 this->getEngine().callFFT(0, direction, localdata);
00742 }
00743
00744 TAU_PROFILE_STOP(timer_fft);
00745
00746
00747 if (temp != &f) {
00748 TAU_PROFILE_START(timer_swap);
00749
00750 f = (*temp);
00751 if (this->compressTemps()) *temp = 0;
00752 TAU_PROFILE_STOP(timer_swap);
00753 }
00754
00755
00756 if (direction == +1)
00757 f *= Complex_t(this->getNormFact(), 0.0);
00758
00759 return;
00760 }
00761
00762
00763
00764
00765
00766
00767
00768
00769
00770
00771
00772
00773
00774
00775 template <unsigned Dim, class T>
00776 FFT<RCTransform,Dim,T>::FFT(
00777 const typename FFT<RCTransform,Dim,T>::Domain_t& rdomain,
00778 const typename FFT<RCTransform,Dim,T>::Domain_t& cdomain,
00779 const bool transformTheseDims[Dim], const bool& compressTemps)
00780 : FFTBase<Dim,T>(FFT<RCTransform,Dim,T>::rcFFT, rdomain,
00781 transformTheseDims, compressTemps),
00782 complexDomain_m(cdomain), serialAxes_m(1)
00783 {
00784
00785 TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
00786 TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", " + CT(cdomain) +
00787 ", const bool [], const bool&)");
00788 TAU_PROFILE(tauname, tautype, TAU_FFT);
00789
00790
00791 unsigned nTransformDims = this->numTransformDims();
00792 int* lengths = new int[nTransformDims];
00793 unsigned d;
00794 for (d=0; d<nTransformDims; ++d)
00795 lengths[d] = rdomain[this->activeDimension(d)].length();
00796
00797
00798 int* transformTypes = new int[nTransformDims];
00799 T& normFact = this->getNormFact();
00800 normFact = 1.0;
00801 transformTypes[0] = FFTBase<Dim,T>::rcFFT;
00802 normFact /= lengths[0];
00803 for (d=1; d<nTransformDims; ++d) {
00804 transformTypes[d] = FFTBase<Dim,T>::ccFFT;
00805 normFact /= lengths[d];
00806 }
00807
00808
00809 this->getEngine().setup(nTransformDims, transformTypes, lengths);
00810 delete [] transformTypes;
00811 delete [] lengths;
00812
00813
00814 setup();
00815 }
00816
00817
00818
00819
00820
00821
00822 template <unsigned Dim, class T>
00823 FFT<RCTransform,Dim,T>::FFT(
00824 const typename FFT<RCTransform,Dim,T>::Domain_t& rdomain,
00825 const typename FFT<RCTransform,Dim,T>::Domain_t& cdomain,
00826 const bool& compressTemps,
00827 int serialAxes)
00828 : FFTBase<Dim,T>(FFT<RCTransform,Dim,T>::rcFFT, rdomain, compressTemps),
00829 complexDomain_m(cdomain), serialAxes_m(serialAxes)
00830 {
00831
00832 TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
00833 TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", " + CT(cdomain) + ", const bool&)");
00834 TAU_PROFILE(tauname, tautype, TAU_FFT);
00835
00836
00837 int lengths[Dim];
00838 unsigned d;
00839 for (d=0; d<Dim; ++d)
00840 lengths[d] = rdomain[d].length();
00841
00842
00843 int transformTypes[Dim];
00844 T& normFact = this->getNormFact();
00845 normFact = 1.0;
00846 transformTypes[0] = FFTBase<Dim,T>::rcFFT;
00847 normFact /= lengths[0];
00848 for (d=1; d<Dim; ++d) {
00849 transformTypes[d] = FFTBase<Dim,T>::ccFFT;
00850 normFact /= lengths[d];
00851 }
00852
00853
00854 this->getEngine().setup(Dim, transformTypes, lengths);
00855
00856
00857 setup();
00858 }
00859
00860
00861
00862
00863
00864
00865 template <unsigned Dim, class T>
00866 void
00867 FFT<RCTransform,Dim,T>::setup(void) {
00868
00869
00870 TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::setup");
00871 TAU_TYPE_STRING(tautype, "(void)");
00872 TAU_PROFILE(tauname, tautype, TAU_FFT);
00873
00874 PAssert(serialAxes_m > 0 && serialAxes_m < Dim);
00875
00876 unsigned d, d2, activeDim;
00877 unsigned nTransformDims = this->numTransformDims();
00878
00879
00880
00881
00882
00883 e_dim_tag serialParallel[Dim];
00884 e_dim_tag NserialParallel[Dim];
00885 for (d=0; d < Dim; ++d) {
00886 serialParallel[d] = (d == 0 ? SERIAL : PARALLEL);
00887 NserialParallel[d] = (d < serialAxes_m ? SERIAL : PARALLEL);
00888 }
00889
00890
00891 const Domain_t& domain = this->getDomain();
00892 activeDim = this->activeDimension(0);
00893 bool match = true;
00894 for (d=0; d<Dim; ++d) {
00895 if (d == activeDim) {
00896
00897 if ( complexDomain_m[d].length() !=
00898 (domain[d].length()/2 + 1) ) match = false;
00899 }
00900 else {
00901
00902 if (complexDomain_m[d].length() != domain[d].length()) match = false;
00903 }
00904 }
00905 PInsist(match,
00906 "Domains provided for real and complex Fields are incompatible!");
00907
00908
00909 tempLayouts_m = new Layout_t*[nTransformDims];
00910 tempFields_m = new ComplexField_t*[nTransformDims];
00911
00912
00913
00914
00915 Domain_t ndip;
00916 Domain_t ndipc;
00917 ndip[0] = domain[activeDim];
00918 ndipc[0] = complexDomain_m[activeDim];
00919 for (d=1; d<Dim; ++d) {
00920 int nextDim = activeDim + d;
00921 if (nextDim >= Dim) nextDim -= Dim;
00922 ndip[d] = domain[nextDim];
00923 ndipc[d] = complexDomain_m[nextDim];
00924 }
00925
00926
00927 tempRLayout_m = new Layout_t(ndip, serialParallel, this->transVnodes());
00928 tempRField_m = new RealField_t(*tempRLayout_m);
00929
00930
00931 tempLayouts_m[0] = new Layout_t(ndipc, serialParallel, this->transVnodes());
00932 tempFields_m[0] = new ComplexField_t(*tempLayouts_m[0]);
00933
00934
00935
00936 int fftorder[Dim], tmporder[Dim];
00937 int nofft = nTransformDims;
00938 for (d=0; d < nTransformDims; ++d)
00939 fftorder[d] = this->activeDimension(d);
00940 for (d=0; d < Dim; ++d) {
00941
00942 bool active = false;
00943 for (d2=0; d2 < nTransformDims; ++d2) {
00944 if (this->activeDimension(d2) == d) {
00945 active = true;
00946 break;
00947 }
00948 }
00949
00950 if (!active)
00951
00952 fftorder[nofft++] = d;
00953 }
00954
00955
00956
00957 nofft = fftorder[0];
00958 for (d=0; d < (Dim - 1); ++d)
00959 fftorder[d] = fftorder[d+1];
00960 fftorder[Dim-1] = nofft;
00961
00962
00963
00964
00965 int dim = 1;
00966 while (dim < nTransformDims) {
00967
00968 int sp;
00969 for (sp=0; sp < serialAxes_m && dim < nTransformDims; ++sp, ++dim) {
00970
00971
00972 for (d=0; d < Dim; ++d)
00973 ndip[d] = complexDomain_m[fftorder[d]];
00974
00975
00976 tempLayouts_m[dim] = new Layout_t(ndip, NserialParallel, this->transVnodes());
00977 tempFields_m[dim] = new ComplexField_t(*tempLayouts_m[dim]);
00978
00979
00980 if (serialAxes_m > 1) {
00981 tmporder[0] = fftorder[0];
00982 for (d=0; d < (serialAxes_m-1); ++d)
00983 fftorder[d] = fftorder[d+1];
00984 fftorder[serialAxes_m - 1] = tmporder[0];
00985 }
00986 }
00987
00988
00989
00990 for (d=0; d < Dim; ++d)
00991 tmporder[d] = fftorder[d];
00992 for (d=0; d < Dim; ++d)
00993 fftorder[d] = tmporder[(d + serialAxes_m) % Dim];
00994 }
00995 }
00996
00997
00998
00999
01000
01001
01002 template <unsigned Dim, class T>
01003 FFT<RCTransform,Dim,T>::~FFT(void) {
01004
01005
01006 TAU_TYPE_STRING(tauname, CT(*this) + "::~FFT");
01007 TAU_TYPE_STRING(tautype, "(void)");
01008 TAU_PROFILE(tauname, tautype, TAU_FFT);
01009
01010
01011 unsigned nTransformDims = this->numTransformDims();
01012 for (unsigned d=0; d<nTransformDims; ++d) {
01013 delete tempFields_m[d];
01014 delete tempLayouts_m[d];
01015 }
01016 delete [] tempFields_m;
01017 delete [] tempLayouts_m;
01018 delete tempRField_m;
01019 delete tempRLayout_m;
01020 }
01021
01022
01023
01024
01025
01026 template <unsigned Dim, class T>
01027 void
01028 FFT<RCTransform,Dim,T>::transform(
01029 int direction,
01030 typename FFT<RCTransform,Dim,T>::RealField_t& f,
01031 typename FFT<RCTransform,Dim,T>::ComplexField_t& g,
01032 const bool& constInput)
01033 {
01034
01035 TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
01036 TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ", const bool&)");
01037 TAU_PROFILE(tauname, tautype, TAU_FFT);
01038 TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
01039 TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
01040
01041
01042 static IpplTimings::TimerRef tr1=IpplTimings::getTimer("RC-RRtranspose");
01043 static IpplTimings::TimerRef tr2=IpplTimings::getTimer("RC-CCtranspose");
01044 static IpplTimings::TimerRef ffttr1=IpplTimings::getTimer("RC-RC1Dfft");
01045 static IpplTimings::TimerRef ffttr2=IpplTimings::getTimer("RC-CC1Dfft");
01046 static IpplTimings::TimerRef tottimer=IpplTimings::getTimer("RC-total");
01047 FFTDBG(Inform tmsg("FFT-RC-forward"));
01048
01049
01050 IpplTimings::startTimer(tottimer);
01051
01052
01053
01054
01055
01056 const Layout_t& in_layout = f.getLayout();
01057 const Domain_t& in_dom = in_layout.getDomain();
01058 const Layout_t& out_layout = g.getLayout();
01059 const Domain_t& out_dom = out_layout.getDomain();
01060 PAssert( checkDomain(this->getDomain(),in_dom) &&
01061 checkDomain(complexDomain_m,out_dom) );
01062
01063
01064 unsigned d;
01065 int idim;
01066 unsigned nTransformDims = this->numTransformDims();
01067
01068
01069 idim = 0;
01070
01071 RealField_t* tempR = tempRField_m;
01072 if (!constInput) {
01073
01074 bool skipTemp = true;
01075
01076
01077 if ( !(in_layout == *tempRLayout_m) ) {
01078 skipTemp = false;
01079 } else {
01080
01081 for (d=0; d<Dim; ++d)
01082 if (in_layout.getDistribution(d) != tempRLayout_m->getDistribution(d))
01083 skipTemp = false;
01084
01085
01086 if (in_layout.numVnodes() != tempRLayout_m->numVnodes())
01087 skipTemp = false;
01088
01089
01090 if (!(f.getGC() == FFT<RCTransform,Dim,T>::nullGC))
01091 skipTemp = false;
01092 }
01093
01094
01095
01096
01097
01098 if (skipTemp)
01099 tempR = &f;
01100 }
01101
01102
01103 if (tempR != &f) {
01104 TAU_PROFILE_START(timer_swap);
01105
01106
01107 FFTDBG(tmsg << "Doing transpose of real field into temporary ");
01108 FFTDBG(tmsg << "with layout = " << tempR->getLayout() << endl);
01109 IpplTimings::startTimer(tr1);
01110 (*tempR)[tempR->getDomain()] = f[in_dom];
01111 IpplTimings::stopTimer(tr1);
01112
01113 TAU_PROFILE_STOP(timer_swap);
01114 }
01115
01116
01117 ComplexField_t* temp = tempFields_m[0];
01118
01119
01120
01121 if (nTransformDims == 1) {
01122 bool skipTemp = true;
01123
01124
01125 if (!(out_layout == *tempLayouts_m[0])) {
01126 skipTemp = false;
01127 } else {
01128 for (d=0; d<Dim; ++d)
01129 if (out_layout.getDistribution(d) !=
01130 tempLayouts_m[0]->getDistribution(d))
01131 skipTemp = false;
01132
01133 if ( out_layout.numVnodes() != tempLayouts_m[0]->numVnodes() )
01134 skipTemp = false;
01135
01136
01137 if (!(g.getGC() == FFT<RCTransform,Dim,T>::nullGC))
01138 skipTemp = false;
01139
01140
01141
01142 if (skipTemp)
01143 temp = &g;
01144 }
01145 }
01146
01147 FFTDBG(tmsg << "Doing real->complex fft of first dimension ..." << endl);
01148 TAU_PROFILE_START(timer_fft);
01149 IpplTimings::startTimer(ffttr1);
01150
01151
01152 typename RealField_t::const_iterator_if rl_i, rl_end = tempR->end_if();
01153 typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
01154 for (rl_i = tempR->begin_if(); rl_i != rl_end; ++rl_i, ++cl_i) {
01155
01156 RealLField_t* rldf = (*rl_i).second.get();
01157 ComplexLField_t* cldf = (*cl_i).second.get();
01158
01159
01160 rldf->Uncompress();
01161 cldf->Uncompress();
01162
01163
01164 T* localreal = rldf->getP();
01165 Complex_t* localcomp = cldf->getP();
01166
01167
01168 int nstrips = 1, lengthreal = rldf->size(0), lengthcomp = cldf->size(0);
01169 for (d=1; d<Dim; ++d)
01170 nstrips *= rldf->size(d);
01171
01172 for (int istrip=0; istrip<nstrips; ++istrip) {
01173
01174 for (int ilen=0; ilen<lengthreal; ilen+=2)
01175 localcomp[ilen/2] = Complex_t(localreal[ilen],localreal[ilen+1]);
01176
01177
01178
01179 this->getEngine().callFFT(idim, +1, localcomp);
01180
01181
01182 localreal += lengthreal;
01183 localcomp += lengthcomp;
01184 }
01185 }
01186
01187 IpplTimings::stopTimer(ffttr1);
01188 TAU_PROFILE_STOP(timer_fft);
01189
01190
01191 if (this->compressTemps() && tempR != &f)
01192 *tempR = 0;
01193
01194
01195
01196
01197 Complex_t* localdata;
01198
01199
01200 for (idim = 1; idim < nTransformDims; ++idim) {
01201 TAU_PROFILE_START(timer_swap);
01202 bool skipTranspose = false;
01203
01204
01205
01206 if (idim == nTransformDims-1) {
01207
01208 const Domain_t& last_dom = tempLayouts_m[idim]->getDomain();
01209
01210
01211
01212
01213 skipTranspose = (g.getGC() == FFT<RCTransform,Dim,T>::nullGC &&
01214 out_dom[0].sameBase(last_dom[0]) &&
01215 out_dom[0].length() == last_dom[0].length() &&
01216 out_layout.getDistribution(0) == SERIAL);
01217 }
01218
01219 if (!skipTranspose) {
01220
01221 FFTDBG(tmsg << "Doing complex->complex transpose into field ");
01222 FFTDBG(tmsg << "with layout = " << tempFields_m[idim]->getLayout());
01223 FFTDBG(tmsg << endl);
01224 IpplTimings::startTimer(tr2);
01225 (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] =
01226 (*temp)[temp->getLayout().getDomain()];
01227 IpplTimings::stopTimer(tr2);
01228
01229
01230 if (this->compressTemps())
01231 *temp = 0;
01232 temp = tempFields_m[idim];
01233
01234 } else if (idim == nTransformDims-1) {
01235
01236
01237
01238
01239 FFTDBG(tmsg << "Doing final complex->complex transpose ");
01240 FFTDBG(tmsg << "into return ");
01241 FFTDBG(tmsg << "with layout = " << g.getLayout());
01242 FFTDBG(tmsg << endl);
01243 IpplTimings::startTimer(tr2);
01244 g[out_dom] = (*temp)[temp->getLayout().getDomain()];
01245 IpplTimings::stopTimer(tr2);
01246
01247
01248 if (this->compressTemps())
01249 *temp = 0;
01250 temp = &g;
01251 }
01252 TAU_PROFILE_STOP(timer_swap);
01253
01254 FFTDBG(tmsg << "Doing complex->complex fft of other dimension .." << endl);
01255 TAU_PROFILE_START(timer_fft);
01256 IpplTimings::startTimer(ffttr2);
01257
01258
01259 typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
01260 for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
01261
01262 ComplexLField_t* ldf = (*l_i).second.get();
01263
01264
01265 ldf->Uncompress();
01266
01267
01268 localdata = ldf->getP();
01269
01270
01271 int nstrips = 1, length = ldf->size(0);
01272 for (d=1; d<Dim; ++d)
01273 nstrips *= ldf->size(d);
01274
01275 for (int istrip=0; istrip<nstrips; ++istrip) {
01276
01277 this->getEngine().callFFT(idim, direction, localdata);
01278
01279
01280 localdata += length;
01281 }
01282 }
01283
01284 IpplTimings::stopTimer(ffttr2);
01285 TAU_PROFILE_STOP(timer_fft);
01286 }
01287
01288
01289 if (temp != &g) {
01290 TAU_PROFILE_START(timer_swap);
01291
01292
01293 FFTDBG(tmsg << "Doing cleanup complex->complex transpose ");
01294 FFTDBG(tmsg << "into return ");
01295 FFTDBG(tmsg << "with layout = " << g.getLayout());
01296 FFTDBG(tmsg << endl);
01297 IpplTimings::startTimer(tr2);
01298 g[out_dom] = (*temp)[temp->getLayout().getDomain()];
01299 IpplTimings::stopTimer(tr2);
01300
01301 if (this->compressTemps()) *temp = 0;
01302 TAU_PROFILE_STOP(timer_swap);
01303 }
01304
01305
01306 if (direction == +1) g = g * this->getNormFact();
01307
01308
01309 IpplTimings::stopTimer(tottimer);
01310 }
01311
01312
01313
01314
01315
01316 template <unsigned Dim, class T>
01317 void
01318 FFT<RCTransform,Dim,T>::transform(
01319 int direction,
01320 typename FFT<RCTransform,Dim,T>::ComplexField_t& f,
01321 typename FFT<RCTransform,Dim,T>::RealField_t& g,
01322 const bool& constInput)
01323 {
01324
01325 TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
01326 TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ", const bool&)");
01327 TAU_PROFILE(tauname, tautype, TAU_FFT);
01328 TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
01329 TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
01330
01331
01332 static IpplTimings::TimerRef tr1=IpplTimings::getTimer("RC-RRtranspose");
01333 static IpplTimings::TimerRef tr2=IpplTimings::getTimer("RC-CCtranspose");
01334 static IpplTimings::TimerRef ffttr1=IpplTimings::getTimer("RC-RC1Dfft");
01335 static IpplTimings::TimerRef ffttr2=IpplTimings::getTimer("RC-CC1Dfft");
01336 static IpplTimings::TimerRef tottimer=IpplTimings::getTimer("RC-total");
01337 FFTDBG(Inform tmsg("FFT-RC-reverse"));
01338
01339
01340 IpplTimings::startTimer(tottimer);
01341
01342
01343
01344
01345
01346 const Layout_t& in_layout = f.getLayout();
01347 const Domain_t& in_dom = in_layout.getDomain();
01348 const Layout_t& out_layout = g.getLayout();
01349 const Domain_t& out_dom = out_layout.getDomain();
01350 PAssert( checkDomain(complexDomain_m,in_dom) &&
01351 checkDomain(this->getDomain(),out_dom) );
01352
01353
01354 unsigned d;
01355 int idim;
01356 unsigned nTransformDims = this->numTransformDims();
01357
01358
01359
01360
01361 ComplexField_t* temp = &f;
01362
01363
01364 Complex_t* localdata;
01365
01366
01367 for (idim = nTransformDims-1; idim != 0; --idim) {
01368 TAU_PROFILE_START(timer_swap);
01369
01370
01371 bool skipTranspose = false;
01372
01373
01374 if (idim == nTransformDims-1 && !constInput) {
01375
01376 const Domain_t& first_dom = tempLayouts_m[idim]->getDomain();
01377
01378
01379 skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
01380 (in_dom[0].length() == first_dom[0].length()) &&
01381 (in_layout.getDistribution(0) == SERIAL) &&
01382 (f.getGC() == FFT<RCTransform,Dim,T>::nullGC) );
01383 }
01384
01385 if (!skipTranspose) {
01386
01387 FFTDBG(tmsg << "Doing complex->complex transpose into field ");
01388 FFTDBG(tmsg << "with layout = "<<tempFields_m[idim]->getLayout()<<endl);
01389 IpplTimings::startTimer(tr2);
01390 (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] =
01391 (*temp)[temp->getLayout().getDomain()];
01392 IpplTimings::stopTimer(tr2);
01393
01394
01395 if (this->compressTemps() && temp != &f)
01396 *temp = 0;
01397 temp = tempFields_m[idim];
01398 }
01399 TAU_PROFILE_STOP(timer_swap);
01400
01401 FFTDBG(tmsg << "Doing complex->complex fft of other dimension .." << endl);
01402 TAU_PROFILE_START(timer_fft);
01403 IpplTimings::startTimer(ffttr2);
01404
01405
01406 typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
01407 for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
01408
01409
01410 ComplexLField_t* ldf = (*l_i).second.get();
01411
01412
01413 ldf->Uncompress();
01414
01415
01416 localdata = ldf->getP();
01417
01418
01419 int nstrips = 1, length = ldf->size(0);
01420 for (d=1; d<Dim; ++d)
01421 nstrips *= ldf->size(d);
01422
01423 for (int istrip=0; istrip<nstrips; ++istrip) {
01424
01425 this->getEngine().callFFT(idim, direction, localdata);
01426
01427
01428 localdata += length;
01429 }
01430 }
01431
01432 IpplTimings::stopTimer(ffttr2);
01433 TAU_PROFILE_STOP(timer_fft);
01434 }
01435
01436
01437 idim = 0;
01438
01439
01440 RealField_t* tempR;
01441 bool skipTemp = true;
01442
01443
01444 if (!(out_layout == *tempRLayout_m)) {
01445 skipTemp = false;
01446 } else {
01447 for (d=0; d<Dim; ++d)
01448 if (out_layout.getDistribution(d) != tempRLayout_m->getDistribution(d))
01449 skipTemp = false;
01450
01451 if ( out_layout.numVnodes() != tempRLayout_m->numVnodes() )
01452 skipTemp = false;
01453
01454
01455 if (!(g.getGC() == FFT<RCTransform,Dim,T>::nullGC))
01456 skipTemp = false;
01457 }
01458
01459 if (skipTemp)
01460 tempR = &g;
01461 else
01462 tempR = tempRField_m;
01463
01464 skipTemp = true;
01465 if (nTransformDims == 1 && !constInput) {
01466
01467
01468
01469 if (!(in_layout == *tempLayouts_m[0])) {
01470 skipTemp = false;
01471 } else {
01472 for (d=0; d<Dim; ++d)
01473 if (in_layout.getDistribution(d) !=
01474 tempLayouts_m[0]->getDistribution(d))
01475 skipTemp = false;
01476
01477 if ( in_layout.numVnodes() != tempLayouts_m[0]->numVnodes() )
01478 skipTemp = false;
01479
01480
01481 if (!(f.getGC() == FFT<RCTransform,Dim,T>::nullGC))
01482 skipTemp = false;
01483 }
01484 } else {
01485 skipTemp = false;
01486 }
01487
01488 TAU_PROFILE_START(timer_swap);
01489 if (!skipTemp) {
01490
01491 FFTDBG(tmsg << "Doing final complex->complex transpose into field ");
01492 FFTDBG(tmsg << "with layout = "<<tempFields_m[0]->getLayout()<<endl);
01493 IpplTimings::startTimer(tr2);
01494 (*tempFields_m[0])[tempLayouts_m[0]->getDomain()] =
01495 (*temp)[temp->getLayout().getDomain()];
01496 IpplTimings::stopTimer(tr2);
01497
01498
01499 if (this->compressTemps() && temp != &f)
01500 *temp = 0;
01501 temp = tempFields_m[0];
01502 }
01503 TAU_PROFILE_STOP(timer_swap);
01504
01505 FFTDBG(tmsg << "Doing complex->real fft of final dimension ..." << endl);
01506 TAU_PROFILE_START(timer_fft);
01507 IpplTimings::startTimer(ffttr1);
01508
01509
01510 typename RealField_t::const_iterator_if rl_i, rl_end = tempR->end_if();
01511 typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
01512 for (rl_i = tempR->begin_if(); rl_i != rl_end; ++rl_i, ++cl_i) {
01513
01514 RealLField_t* rldf = (*rl_i).second.get();
01515 ComplexLField_t* cldf = (*cl_i).second.get();
01516
01517
01518 rldf->Uncompress();
01519 cldf->Uncompress();
01520
01521
01522 T* localreal = rldf->getP();
01523 Complex_t* localcomp = cldf->getP();
01524
01525
01526 int nstrips = 1, lengthreal = rldf->size(0), lengthcomp = cldf->size(0);
01527 for (d=1; d<Dim; ++d)
01528 nstrips *= rldf->size(d);
01529
01530 for (int istrip=0; istrip<nstrips; ++istrip) {
01531
01532
01533 this->getEngine().callFFT(idim, -1, localcomp);
01534
01535
01536 for (int ilen=0; ilen<lengthreal; ilen+=2) {
01537 localreal[ilen] = real(localcomp[ilen/2]);
01538 localreal[ilen+1] = imag(localcomp[ilen/2]);
01539 }
01540
01541
01542 localreal += lengthreal;
01543 localcomp += lengthcomp;
01544 }
01545 }
01546
01547 IpplTimings::stopTimer(ffttr1);
01548 TAU_PROFILE_STOP(timer_fft);
01549
01550
01551 if (this->compressTemps() && temp != &f)
01552 *temp = 0;
01553
01554
01555 if (tempR != &g) {
01556 TAU_PROFILE_START(timer_swap);
01557
01558
01559 FFTDBG(tmsg << "Doing cleanup real->real transpose into return ");
01560 FFTDBG(tmsg << "with layout = " << g.getLayout() << endl);
01561 IpplTimings::startTimer(tr1);
01562 g[out_dom] = (*tempR)[tempR->getLayout().getDomain()];
01563 IpplTimings::stopTimer(tr1);
01564
01565 if (this->compressTemps())
01566 *tempR = 0;
01567 TAU_PROFILE_STOP(timer_swap);
01568 }
01569
01570
01571 if (direction == +1) g = g * this->getNormFact();
01572
01573
01574 IpplTimings::stopTimer(tottimer);
01575 }
01576
01577
01578
01579
01580
01581
01582
01583
01584
01585
01586
01587
01588
01589 template <class T>
01590 FFT<RCTransform,1U,T>::FFT(
01591 const typename FFT<RCTransform,1U,T>::Domain_t& rdomain,
01592 const typename FFT<RCTransform,1U,T>::Domain_t& cdomain,
01593 const bool transformTheseDims[1U], const bool& compressTemps)
01594 : FFTBase<1U,T>(FFT<RCTransform,1U,T>::rcFFT, rdomain,
01595 transformTheseDims, compressTemps),
01596 complexDomain_m(cdomain)
01597 {
01598
01599 TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
01600 TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", " + CT(cdomain) +
01601 ", const bool [], const bool&)");
01602 TAU_PROFILE(tauname, tautype, TAU_FFT);
01603
01604 unsigned nTransformDims = 1U;
01605
01606 int length;
01607 length = rdomain[0].length();
01608
01609
01610 int transformType;
01611 transformType = FFTBase<1U,T>::rcFFT;
01612 T& normFact = this->getNormFact();
01613 normFact = 1.0 / length;
01614
01615
01616 this->getEngine().setup(nTransformDims, &transformType, &length);
01617
01618 setup();
01619 }
01620
01621
01622
01623
01624
01625
01626 template <class T>
01627 FFT<RCTransform,1U,T>::FFT(
01628 const typename FFT<RCTransform,1U,T>::Domain_t& rdomain,
01629 const typename FFT<RCTransform,1U,T>::Domain_t& cdomain,
01630 const bool& compressTemps)
01631 : FFTBase<1U,T>(FFT<RCTransform,1U,T>::rcFFT, rdomain, compressTemps),
01632 complexDomain_m(cdomain)
01633 {
01634
01635 TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
01636 TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", " + CT(cdomain) + ", const bool&)");
01637 TAU_PROFILE(tauname, tautype, TAU_FFT);
01638
01639
01640 int length;
01641 length = rdomain[0].length();
01642
01643
01644 int transformType;
01645 transformType = FFTBase<1U,T>::rcFFT;
01646 T& normFact = this->getNormFact();
01647 normFact = 1.0 / length;
01648
01649
01650 this->getEngine().setup(1U, &transformType, &length);
01651
01652 setup();
01653 }
01654
01655
01656
01657
01658
01659
01660 template <class T>
01661 void
01662 FFT<RCTransform,1U,T>::setup(void) {
01663
01664
01665 TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::setup");
01666 TAU_TYPE_STRING(tautype, "(void)");
01667 TAU_PROFILE(tauname, tautype, TAU_FFT);
01668
01669
01670 const Domain_t& domain = this->getDomain();
01671 bool match = true;
01672
01673 if ( complexDomain_m[0].length() !=
01674 (domain[0].length()/2 + 1) ) match = false;
01675 PInsist(match,
01676 "Domains provided for real and complex Fields are incompatible!");
01677
01678
01679 tempRLayout_m = new Layout_t(domain[0], PARALLEL, 1);
01680
01681 tempRField_m = new RealField_t(*tempRLayout_m);
01682
01683 if (!this->compressTemps()) tempRField_m->Uncompress();
01684
01685
01686 tempLayouts_m = new Layout_t(complexDomain_m[0], PARALLEL, 1);
01687
01688 tempFields_m = new ComplexField_t(*tempLayouts_m);
01689
01690 if (!this->compressTemps()) tempFields_m->Uncompress();
01691
01692 return;
01693 }
01694
01695
01696
01697
01698
01699 template <class T>
01700 FFT<RCTransform,1U,T>::~FFT(void) {
01701
01702
01703 TAU_TYPE_STRING(tauname, CT(*this) + "::~FFT");
01704 TAU_TYPE_STRING(tautype, "(void)");
01705 TAU_PROFILE(tauname, tautype, TAU_FFT);
01706
01707
01708 delete tempFields_m;
01709 delete tempLayouts_m;
01710 delete tempRField_m;
01711 delete tempRLayout_m;
01712 }
01713
01714
01715
01716
01717
01718 template <class T>
01719 void
01720 FFT<RCTransform,1U,T>::transform(
01721 int direction,
01722 typename FFT<RCTransform,1U,T>::RealField_t& f,
01723 typename FFT<RCTransform,1U,T>::ComplexField_t& g,
01724 const bool& constInput)
01725 {
01726
01727 TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
01728 TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ", const bool&)");
01729 TAU_PROFILE(tauname, tautype, TAU_FFT);
01730 TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
01731 TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
01732
01733
01734
01735
01736
01737 const Layout_t& in_layout = f.getLayout();
01738 const Domain_t& in_dom = in_layout.getDomain();
01739 const Layout_t& out_layout = g.getLayout();
01740 const Domain_t& out_dom = out_layout.getDomain();
01741 PAssert( checkDomain(this->getDomain(),in_dom) &&
01742 checkDomain(complexDomain_m,out_dom) );
01743
01744
01745 RealField_t* tempR = tempRField_m;
01746 if (!constInput) {
01747
01748 bool skipTemp = true;
01749
01750 if ( !(in_layout == *tempRLayout_m) ) skipTemp = false;
01751 if ( in_layout.getDistribution(0) !=
01752 tempRLayout_m->getDistribution(0) ) skipTemp = false;
01753 if ( in_layout.numVnodes() != tempRLayout_m->numVnodes() )
01754 skipTemp = false;
01755
01756 if (!(f.getGC() == FFT<RCTransform,1U,T>::nullGC)) skipTemp = false;
01757 if (skipTemp) tempR = &f;
01758 }
01759
01760 if (tempR != &f) {
01761 TAU_PROFILE_START(timer_swap);
01762
01763 (*tempR) = f;
01764 TAU_PROFILE_STOP(timer_swap);
01765 }
01766
01767
01768 ComplexField_t* temp = tempFields_m;
01769
01770 bool skipFinal = true;
01771
01772 if ( !(out_layout == *tempLayouts_m) ) skipFinal = false;
01773 if ( out_layout.getDistribution(0) !=
01774 tempLayouts_m->getDistribution(0) ) skipFinal = false;
01775 if ( out_layout.numVnodes() != tempLayouts_m->numVnodes() )
01776 skipFinal = false;
01777
01778 if (!(g.getGC() == FFT<RCTransform,1U,T>::nullGC)) skipFinal = false;
01779 if (skipFinal) temp = &g;
01780
01781 TAU_PROFILE_START(timer_fft);
01782
01783 typename RealField_t::const_iterator_if rl_i = tempR->begin_if();
01784 typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
01785 if (rl_i != tempR->end_if() && cl_i != temp->end_if()) {
01786
01787 RealLField_t* rldf = (*rl_i).second.get();
01788 ComplexLField_t* cldf = (*cl_i).second.get();
01789
01790 rldf->Uncompress();
01791 cldf->Uncompress();
01792
01793 T* localreal = rldf->getP();
01794 Complex_t* localcomp = cldf->getP();
01795
01796 int lengthreal = rldf->size(0);
01797
01798 for (int ilen=0; ilen<lengthreal; ilen+=2)
01799 localcomp[ilen/2] = Complex_t(localreal[ilen],localreal[ilen+1]);
01800
01801
01802 this->getEngine().callFFT(0, +1, localcomp);
01803 }
01804 TAU_PROFILE_STOP(timer_fft);
01805
01806 TAU_PROFILE_START(timer_swap);
01807
01808 if (this->compressTemps() && tempR != &f) *tempR = 0;
01809 TAU_PROFILE_STOP(timer_swap);
01810
01811
01812 if (temp != &g) {
01813 TAU_PROFILE_START(timer_swap);
01814
01815 g = (*temp);
01816 if (this->compressTemps()) *temp = 0;
01817 TAU_PROFILE_STOP(timer_swap);
01818 }
01819
01820
01821 if (direction == +1) g = g * this->getNormFact();
01822
01823 return;
01824 }
01825
01826
01827
01828
01829
01830 template <class T>
01831 void
01832 FFT<RCTransform,1U,T>::transform(
01833 int direction,
01834 typename FFT<RCTransform,1U,T>::ComplexField_t& f,
01835 typename FFT<RCTransform,1U,T>::RealField_t& g,
01836 const bool& constInput)
01837 {
01838
01839 TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
01840 TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ", const bool&)");
01841 TAU_PROFILE(tauname, tautype, TAU_FFT);
01842 TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
01843 TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
01844
01845
01846
01847
01848
01849 const Layout_t& in_layout = f.getLayout();
01850 const Domain_t& in_dom = in_layout.getDomain();
01851 const Layout_t& out_layout = g.getLayout();
01852 const Domain_t& out_dom = out_layout.getDomain();
01853 PAssert( checkDomain(complexDomain_m,in_dom) &&
01854 checkDomain(this->getDomain(),out_dom) );
01855
01856
01857 ComplexField_t* temp = &f;
01858
01859
01860 RealField_t* tempR;
01861 bool skipFinal = true;
01862
01863 if ( !(out_layout == *tempRLayout_m) ) skipFinal = false;
01864 if ( out_layout.getDistribution(0) !=
01865 tempRLayout_m->getDistribution(0) ) skipFinal = false;
01866 if ( out_layout.numVnodes() != tempRLayout_m->numVnodes() )
01867 skipFinal = false;
01868
01869 if (!(g.getGC() == FFT<RCTransform,1U,T>::nullGC)) skipFinal = false;
01870 if (skipFinal)
01871 tempR = &g;
01872 else
01873 tempR = tempRField_m;
01874
01875 bool skipTemp = true;
01876 if (!constInput) {
01877
01878
01879
01880 if ( !(in_layout == *tempLayouts_m) ) skipTemp = false;
01881 if ( in_layout.getDistribution(0) !=
01882 tempLayouts_m->getDistribution(0) ) skipTemp = false;
01883 if ( in_layout.numVnodes() != tempLayouts_m->numVnodes() )
01884 skipTemp = false;
01885
01886 if (!(f.getGC() == FFT<RCTransform,1U,T>::nullGC)) skipTemp = false;
01887 }
01888 else {
01889 skipTemp = false;
01890 }
01891
01892 TAU_PROFILE_START(timer_swap);
01893 if (!skipTemp) {
01894
01895 (*tempFields_m) = (*temp);
01896
01897 if (this->compressTemps() && temp != &f) *temp = 0;
01898 temp = tempFields_m;
01899 }
01900 TAU_PROFILE_STOP(timer_swap);
01901
01902 TAU_PROFILE_START(timer_fft);
01903
01904 typename RealField_t::const_iterator_if rl_i = tempR->begin_if();
01905 typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
01906 if (rl_i != tempR->end_if() && cl_i != temp->end_if()) {
01907
01908 RealLField_t* rldf = (*rl_i).second.get();
01909 ComplexLField_t* cldf = (*cl_i).second.get();
01910
01911 rldf->Uncompress();
01912 cldf->Uncompress();
01913
01914 T* localreal = rldf->getP();
01915 Complex_t* localcomp = cldf->getP();
01916
01917 int lengthreal = rldf->size(0);
01918
01919
01920 this->getEngine().callFFT(0, -1, localcomp);
01921
01922 for (int ilen=0; ilen<lengthreal; ilen+=2) {
01923 localreal[ilen] = real(localcomp[ilen/2]);
01924 localreal[ilen+1] = imag(localcomp[ilen/2]);
01925 }
01926 }
01927 TAU_PROFILE_STOP(timer_fft);
01928
01929 TAU_PROFILE_START(timer_swap);
01930
01931 if (this->compressTemps() && temp != &f) *temp = 0;
01932 TAU_PROFILE_STOP(timer_swap);
01933
01934
01935 if (tempR != &g) {
01936 TAU_PROFILE_START(timer_swap);
01937
01938 g = (*tempR);
01939 if (this->compressTemps()) *tempR = 0;
01940 TAU_PROFILE_STOP(timer_swap);
01941 }
01942
01943
01944 if (direction == +1) g = g * this->getNormFact();
01945
01946 return;
01947 }
01948
01949
01950
01951
01952
01953
01954
01955
01956
01957
01958
01959
01960
01961
01962 template <unsigned Dim, class T>
01963 FFT<SineTransform,Dim,T>::FFT(
01964 const typename FFT<SineTransform,Dim,T>::Domain_t& rdomain,
01965 const typename FFT<SineTransform,Dim,T>::Domain_t& cdomain,
01966 const bool transformTheseDims[Dim], const bool sineTransformDims[Dim],
01967 const bool& compressTemps)
01968 : FFTBase<Dim,T>(FFT<SineTransform,Dim,T>::sineFFT, rdomain,
01969 transformTheseDims, compressTemps),
01970 complexDomain_m(&cdomain)
01971 {
01972
01973 TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
01974 TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", " + CT(cdomain) +
01975 ", const bool [], const bool [], const bool&)");
01976 TAU_PROFILE(tauname, tautype, TAU_FFT);
01977
01978 unsigned d;
01979
01980 numSineTransforms_m = 0;
01981 for (d=0; d<Dim; ++d) {
01982 sineTransformDims_m[d] = sineTransformDims[d];
01983 if (sineTransformDims[d]) {
01984 PAssert(transformTheseDims[d]);
01985 ++numSineTransforms_m;
01986 }
01987 }
01988
01989
01990 unsigned nTransformDims = this->numTransformDims();
01991 int* lengths = new int[nTransformDims];
01992 for (d=0; d<nTransformDims; ++d)
01993 lengths[d] = rdomain[this->activeDimension(d)].length();
01994
01995
01996 int* transformTypes = new int[nTransformDims];
01997 T& normFact = this->getNormFact();
01998 normFact = 1.0;
01999 bool foundRC = false;
02000 for (d=0; d<nTransformDims; ++d) {
02001 if (sineTransformDims_m[this->activeDimension(d)]) {
02002 transformTypes[d] = FFTBase<Dim,T>::sineFFT;
02003 normFact /= (2.0 * (lengths[d] + 1));
02004 }
02005 else if (!foundRC) {
02006 transformTypes[d] = FFTBase<Dim,T>::rcFFT;
02007 normFact /= lengths[d];
02008 foundRC = true;
02009 }
02010 else {
02011 transformTypes[d] = FFTBase<Dim,T>::ccFFT;
02012 normFact /= lengths[d];
02013 }
02014 }
02015
02016
02017 this->getEngine().setup(nTransformDims, transformTypes, lengths);
02018 delete [] transformTypes;
02019 delete [] lengths;
02020
02021 setup();
02022 }
02023
02024
02025
02026
02027
02028
02029 template <unsigned Dim, class T>
02030 FFT<SineTransform,Dim,T>::FFT(
02031 const typename FFT<SineTransform,Dim,T>::Domain_t& rdomain,
02032 const typename FFT<SineTransform,Dim,T>::Domain_t& cdomain,
02033 const bool sineTransformDims[Dim], const bool& compressTemps)
02034 : FFTBase<Dim,T>(FFT<SineTransform,Dim,T>::sineFFT, rdomain, compressTemps),
02035 complexDomain_m(&cdomain)
02036 {
02037
02038 TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
02039 TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", " + CT(cdomain) +
02040 ", const bool [], const bool&)");
02041 TAU_PROFILE(tauname, tautype, TAU_FFT);
02042
02043 unsigned d;
02044
02045 numSineTransforms_m = 0;
02046 for (d=0; d<Dim; ++d) {
02047 sineTransformDims_m[d] = sineTransformDims[d];
02048 if (sineTransformDims[d]) ++numSineTransforms_m;
02049 }
02050
02051
02052 int lengths[Dim];
02053 for (d=0; d<Dim; ++d)
02054 lengths[d] = rdomain[d].length();
02055
02056
02057 int transformTypes[Dim];
02058 T& normFact = this->getNormFact();
02059 normFact = 1.0;
02060 bool foundRC = false;
02061 for (d=0; d<Dim; ++d) {
02062 if (sineTransformDims_m[d]) {
02063 transformTypes[d] = FFTBase<Dim,T>::sineFFT;
02064 normFact /= (2.0 * (lengths[d] + 1));
02065 }
02066 else if (!foundRC) {
02067 transformTypes[d] = FFTBase<Dim,T>::rcFFT;
02068 normFact /= lengths[d];
02069 foundRC = true;
02070 }
02071 else {
02072 transformTypes[d] = FFTBase<Dim,T>::ccFFT;
02073 normFact /= lengths[d];
02074 }
02075 }
02076
02077
02078 this->getEngine().setup(Dim, transformTypes, lengths);
02079
02080 setup();
02081 }
02082
02083
02084
02085
02086
02087
02088
02089 template <unsigned Dim, class T>
02090 FFT<SineTransform,Dim,T>::FFT(
02091 const typename FFT<SineTransform,Dim,T>::Domain_t& rdomain,
02092 const bool sineTransformDims[Dim], const bool& compressTemps)
02093 : FFTBase<Dim,T>(FFT<SineTransform,Dim,T>::sineFFT, rdomain,
02094 sineTransformDims, compressTemps)
02095 {
02096
02097 TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
02098 TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", const bool [], const bool&)");
02099 TAU_PROFILE(tauname, tautype, TAU_FFT);
02100
02101
02102 numSineTransforms_m = this->numTransformDims();
02103 unsigned d;
02104 for (d=0; d<Dim; ++d)
02105 sineTransformDims_m[d] = sineTransformDims[d];
02106
02107
02108 int* lengths = new int[numSineTransforms_m];
02109 for (d=0; d<numSineTransforms_m; ++d)
02110 lengths[d] = rdomain[this->activeDimension(d)].length();
02111
02112
02113 int* transformTypes = new int[numSineTransforms_m];
02114 T& normFact = this->getNormFact();
02115 normFact = 1.0;
02116 for (d=0; d<numSineTransforms_m; ++d) {
02117 transformTypes[d] = FFTBase<Dim,T>::sineFFT;
02118 normFact /= (2.0 * (lengths[d] + 1));
02119 }
02120
02121
02122 this->getEngine().setup(numSineTransforms_m, transformTypes, lengths);
02123 delete [] transformTypes;
02124 delete [] lengths;
02125
02126 setup();
02127 }
02128
02129
02130
02131
02132
02133
02134 template <unsigned Dim, class T>
02135 FFT<SineTransform,Dim,T>::FFT(
02136 const typename FFT<SineTransform,Dim,T>::Domain_t& rdomain, const bool& compressTemps)
02137 : FFTBase<Dim,T>(FFT<SineTransform,Dim,T>::sineFFT, rdomain, compressTemps)
02138 {
02139
02140 TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
02141 TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", const bool&)");
02142 TAU_PROFILE(tauname, tautype, TAU_FFT);
02143
02144 unsigned d;
02145
02146 numSineTransforms_m = this->numTransformDims();
02147 for (d=0; d<Dim; ++d)
02148 sineTransformDims_m[d] = true;
02149
02150
02151 int lengths[Dim];
02152 for (d=0; d<Dim; ++d)
02153 lengths[d] = rdomain[d].length();
02154
02155
02156 int transformTypes[Dim];
02157 T& normFact = this->getNormFact();
02158 normFact = 1.0;
02159 for (d=0; d<Dim; ++d) {
02160 transformTypes[d] = FFTBase<Dim,T>::sineFFT;
02161 normFact /= (2.0 * (lengths[d] + 1));
02162 }
02163
02164
02165 this->getEngine().setup(Dim, transformTypes, lengths);
02166
02167 setup();
02168 }
02169
02170
02171
02172
02173
02174
02175 template <unsigned Dim, class T>
02176 void
02177 FFT<SineTransform,Dim,T>::setup(void) {
02178
02179
02180 TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::setup");
02181 TAU_TYPE_STRING(tautype, "(void)");
02182 TAU_PROFILE(tauname, tautype, TAU_FFT);
02183
02184 unsigned d, dim, activeDim;
02185 int icount;
02186 Domain_t ndip;
02187 unsigned nTransformDims = this->numTransformDims();
02188 const Domain_t& domain = this->getDomain();
02189
02190
02191 e_dim_tag serialParallel[Dim];
02192
02193 serialParallel[0] = SERIAL;
02194
02195 for (d=1; d<Dim; ++d)
02196 serialParallel[d] = PARALLEL;
02197
02198
02199 if (nTransformDims > numSineTransforms_m) {
02200
02201 PAssert(complexDomain_m);
02202
02203 bool match = false;
02204 d=0;
02205 while (d<Dim && !match) {
02206 if (this->transformDim(d) && !sineTransformDims_m[d]) {
02207 activeDim = this->activeDimension(d);
02208 match = true;
02209 }
02210 ++d;
02211 }
02212 PAssert(match);
02213
02214 for (d=0; d<Dim; ++d) {
02215 if (d == activeDim) {
02216
02217 if ( (*complexDomain_m)[d].length() !=
02218 (domain[d].length()/2 + 1) ) match = false;
02219 }
02220 else {
02221
02222 if ( (*complexDomain_m)[d].length() !=
02223 domain[d].length() ) match = false;
02224 }
02225 }
02226 PInsist(match,
02227 "Domains provided for real and complex Fields are incompatible!");
02228
02229
02230
02231 tempRLayouts_m = new Layout_t*[numSineTransforms_m+1];
02232 tempRFields_m = new RealField_t*[numSineTransforms_m+1];
02233
02234 icount=0;
02235 for (dim=0; dim<numSineTransforms_m; ++dim, ++icount) {
02236
02237 while (!sineTransformDims_m[icount]) ++icount;
02238 PAssert(icount<Dim);
02239
02240 ndip[0] = domain[icount];
02241 for (d=1; d<Dim; ++d) {
02242 int nextDim = icount + d;
02243 if (nextDim >= Dim) nextDim -= Dim;
02244 ndip[d] = domain[nextDim];
02245 }
02246
02247 tempRLayouts_m[dim] = new Layout_t(ndip, serialParallel, this->transVnodes());
02248
02249 tempRFields_m[dim] = new RealField_t(*tempRLayouts_m[dim]);
02250
02251 if (!this->compressTemps()) (*tempRFields_m[dim]).Uncompress();
02252 }
02253
02254
02255 ndip[0] = domain[activeDim];
02256 for (d=1; d<Dim; ++d) {
02257 int nextDim = activeDim + d;
02258 if (nextDim >= Dim) nextDim -= Dim;
02259 ndip[d] = domain[nextDim];
02260 }
02261
02262 tempRLayouts_m[numSineTransforms_m] = new Layout_t(ndip, serialParallel,
02263 this->transVnodes());
02264
02265 tempRFields_m[numSineTransforms_m] =
02266 new RealField_t(*tempRLayouts_m[numSineTransforms_m]);
02267
02268 if (!this->compressTemps()) (*tempRFields_m[numSineTransforms_m]).Uncompress();
02269
02270
02271 int numComplex = nTransformDims - numSineTransforms_m;
02272
02273 tempLayouts_m = new Layout_t*[numComplex];
02274 tempFields_m = new ComplexField_t*[numComplex];
02275 icount=0;
02276 for (dim=0; dim<numComplex; ++dim, ++icount) {
02277
02278 while (!this->transformDim(icount) || sineTransformDims_m[icount]) ++icount;
02279 PAssert(icount<Dim);
02280
02281 ndip[0] = (*complexDomain_m)[icount];
02282 for (d=1; d<Dim; ++d) {
02283 int nextDim = icount + d;
02284 if (nextDim >= Dim) nextDim -= Dim;
02285 ndip[d] = (*complexDomain_m)[nextDim];
02286 }
02287
02288 tempLayouts_m[dim] = new Layout_t(ndip, serialParallel, this->transVnodes());
02289
02290 tempFields_m[dim] = new ComplexField_t(*tempLayouts_m[dim]);
02291
02292 if (!this->compressTemps()) (*tempFields_m[dim]).Uncompress();
02293 }
02294
02295 }
02296 else {
02297
02298
02299
02300 tempRLayouts_m = new Layout_t*[numSineTransforms_m];
02301 tempRFields_m = new RealField_t*[numSineTransforms_m];
02302
02303 for (dim=0; dim<numSineTransforms_m; ++dim) {
02304
02305 activeDim = this->activeDimension(dim);
02306
02307 ndip[0] = domain[activeDim];
02308 for (d=1; d<Dim; ++d) {
02309 int nextDim = activeDim + d;
02310 if (nextDim >= Dim) nextDim -= Dim;
02311 ndip[d] = domain[nextDim];
02312 }
02313
02314 tempRLayouts_m[dim] = new Layout_t(ndip, serialParallel, this->transVnodes());
02315
02316 tempRFields_m[dim] = new RealField_t(*tempRLayouts_m[dim]);
02317
02318 if (!this->compressTemps()) (*tempRFields_m[dim]).Uncompress();
02319 }
02320
02321 }
02322
02323 return;
02324 }
02325
02326
02327
02328
02329
02330 template <unsigned Dim, class T>
02331 FFT<SineTransform,Dim,T>::~FFT(void) {
02332
02333
02334 TAU_TYPE_STRING(tauname, CT(*this) + "::~FFT");
02335 TAU_TYPE_STRING(tautype, "(void)");
02336 TAU_PROFILE(tauname, tautype, TAU_FFT);
02337
02338
02339 unsigned d;
02340 unsigned nTransformDims = this->numTransformDims();
02341 if (nTransformDims > numSineTransforms_m) {
02342 for (d=0; d<numSineTransforms_m+1; ++d) {
02343 delete tempRFields_m[d];
02344 delete tempRLayouts_m[d];
02345 }
02346 int numComplex = nTransformDims - numSineTransforms_m;
02347 for (d=0; d<numComplex; ++d) {
02348 delete tempFields_m[d];
02349 delete tempLayouts_m[d];
02350 }
02351 delete [] tempFields_m;
02352 delete [] tempLayouts_m;
02353 }
02354 else {
02355 for (d=0; d<numSineTransforms_m; ++d) {
02356 delete tempRFields_m[d];
02357 delete tempRLayouts_m[d];
02358 }
02359 }
02360 delete [] tempRFields_m;
02361 delete [] tempRLayouts_m;
02362 }
02363
02364
02365
02366
02367
02368 template <unsigned Dim, class T>
02369 void
02370 FFT<SineTransform,Dim,T>::transform(
02371 int direction,
02372 FFT<SineTransform,Dim,T>::RealField_t& f,
02373 FFT<SineTransform,Dim,T>::ComplexField_t& g,
02374 const bool& constInput)
02375 {
02376
02377 TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
02378 TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ")");
02379 TAU_PROFILE(tauname, tautype, TAU_FFT);
02380 TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
02381 TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
02382
02383
02384
02385
02386
02387 const Layout_t& in_layout = f.getLayout();
02388 const Domain_t& in_dom = in_layout.getDomain();
02389 const Layout_t& out_layout = g.getLayout();
02390 const Domain_t& out_dom = out_layout.getDomain();
02391 PAssert( checkDomain(this->getDomain(),in_dom) &&
02392 checkDomain(*complexDomain_m,out_dom) );
02393
02394
02395 unsigned d;
02396 int icount, activeDim;
02397 int idim;
02398 unsigned nTransformDims = this->numTransformDims();
02399
02400 PInsist(nTransformDims>numSineTransforms_m,
02401 "Wrong output Field type for real-to-real transform!!");
02402
02403
02404
02405
02406 RealField_t* tempR = &f;
02407
02408 T* localdataR;
02409
02410
02411 icount = 0;
02412 activeDim = 0;
02413 for (idim = 0; idim != numSineTransforms_m; ++idim, ++icount, ++activeDim) {
02414 TAU_PROFILE_START(timer_swap);
02415
02416 while (!sineTransformDims_m[icount]) {
02417 if (this->transformDim(icount)) ++activeDim;
02418 ++icount;
02419 }
02420 PAssert(activeDim<Dim);
02421
02422
02423 bool skipTranspose = false;
02424
02425
02426 if (idim == 0 && !constInput) {
02427
02428 const Domain_t& first_dom = tempRLayouts_m[idim]->getDomain();
02429
02430
02431 skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
02432 (in_dom[0].length() == first_dom[0].length()) &&
02433 (in_layout.getDistribution(0) == SERIAL) &&
02434 (f.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
02435 }
02436
02437 if (!skipTranspose) {
02438
02439 (*tempRFields_m[idim])[tempRLayouts_m[idim]->getDomain()] =
02440 (*tempR)[tempR->getLayout().getDomain()];
02441
02442
02443 if (this->compressTemps() && tempR != &f) *tempR = 0;
02444 tempR = tempRFields_m[idim];
02445 }
02446 TAU_PROFILE_STOP(timer_swap);
02447
02448 TAU_PROFILE_START(timer_fft);
02449
02450 typename RealField_t::const_iterator_if l_i, l_end = tempR->end_if();
02451 for (l_i = tempR->begin_if(); l_i != l_end; ++l_i) {
02452
02453
02454 RealLField_t* ldf = (*l_i).second.get();
02455
02456 ldf->Uncompress();
02457
02458 localdataR = ldf->getP();
02459
02460
02461 int nstrips = 1, length = ldf->size(0);
02462 for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
02463 for (int istrip=0; istrip<nstrips; ++istrip) {
02464
02465 this->getEngine().callFFT(activeDim, direction, localdataR);
02466
02467 localdataR += length;
02468 }
02469 }
02470 TAU_PROFILE_STOP(timer_fft);
02471 }
02472
02473
02474
02475
02476 icount = 0;
02477 activeDim = 0;
02478 while (!this->transformDim(icount) || sineTransformDims_m[icount]) {
02479 if (sineTransformDims_m[icount]) ++activeDim;
02480 ++icount;
02481 }
02482 PAssert(activeDim<Dim);
02483
02484 TAU_PROFILE_START(timer_swap);
02485
02486 int last = numSineTransforms_m;
02487 (*tempRFields_m[last])[tempRLayouts_m[last]->getDomain()] =
02488 (*tempR)[tempR->getLayout().getDomain()];
02489
02490
02491 if (this->compressTemps() && tempR != &f) *tempR = 0;
02492 tempR = tempRFields_m[last];
02493 TAU_PROFILE_STOP(timer_swap);
02494
02495
02496 ComplexField_t* temp = tempFields_m[0];
02497
02498 int numComplex = nTransformDims-numSineTransforms_m;
02499 if (numComplex == 1) {
02500 bool skipTemp = true;
02501
02502 if ( !(out_layout == *tempLayouts_m[0]) ) skipTemp = false;
02503 for (d=0; d<Dim; ++d) {
02504 if ( out_layout.getDistribution(d) !=
02505 tempLayouts_m[0]->getDistribution(d) ) skipTemp = false;
02506 }
02507 if ( out_layout.numVnodes() != tempLayouts_m[0]->numVnodes() )
02508 skipTemp = false;
02509
02510 if (!(g.getGC() == FFT<SineTransform,Dim,T>::nullGC)) skipTemp = false;
02511 if (skipTemp) temp = &g;
02512 }
02513
02514
02515 TAU_PROFILE_START(timer_fft);
02516
02517 typename RealField_t::const_iterator_if rl_i, rl_end = tempR->end_if();
02518 typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
02519 for (rl_i = tempR->begin_if(); rl_i != rl_end; ++rl_i, ++cl_i) {
02520
02521 RealLField_t* rldf = (*rl_i).second.get();
02522 ComplexLField_t* cldf = (*cl_i).second.get();
02523
02524 rldf->Uncompress();
02525 cldf->Uncompress();
02526
02527 T* localreal = rldf->getP();
02528 Complex_t* localcomp = cldf->getP();
02529
02530 int nstrips = 1, lengthreal = rldf->size(0), lengthcomp = cldf->size(0);
02531
02532 for (d=1; d<Dim; ++d) nstrips *= rldf->size(d);
02533 for (int istrip=0; istrip<nstrips; ++istrip) {
02534
02535 for (int ilen=0; ilen<lengthreal; ilen+=2)
02536 localcomp[ilen/2] = Complex_t(localreal[ilen],localreal[ilen+1]);
02537
02538
02539 this->getEngine().callFFT(activeDim, +1, localcomp);
02540
02541 localreal += lengthreal;
02542 localcomp += lengthcomp;
02543 }
02544 }
02545 TAU_PROFILE_STOP(timer_fft);
02546
02547
02548
02549
02550 Complex_t* localdata;
02551
02552
02553 ++icount;
02554 ++activeDim;
02555 for (idim = 1; idim != numComplex; ++idim, ++icount, ++activeDim) {
02556 TAU_PROFILE_START(timer_swap);
02557
02558 while (!this->transformDim(icount) || sineTransformDims_m[icount]) {
02559 if (sineTransformDims_m[icount]) ++activeDim;
02560 ++icount;
02561 }
02562 PAssert(activeDim<Dim);
02563
02564
02565 bool skipTranspose = false;
02566
02567
02568 if (idim == numComplex-1) {
02569
02570 const Domain_t& last_dom = tempLayouts_m[idim]->getDomain();
02571
02572
02573 skipTranspose = ( (out_dom[0].sameBase(last_dom[0])) &&
02574 (out_dom[0].length() == last_dom[0].length()) &&
02575 (out_layout.getDistribution(0) == SERIAL) &&
02576 (g.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
02577 }
02578
02579 if (!skipTranspose) {
02580
02581 (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] =
02582 (*temp)[temp->getLayout().getDomain()];
02583
02584
02585 if (this->compressTemps()) *temp = 0;
02586 temp = tempFields_m[idim];
02587 }
02588 else if (idim == numComplex-1) {
02589
02590
02591
02592
02593 g[out_dom] = (*temp)[temp->getLayout().getDomain()];
02594
02595
02596 if (this->compressTemps()) *temp = 0;
02597 temp = &g;
02598 }
02599 TAU_PROFILE_STOP(timer_swap);
02600
02601 TAU_PROFILE_START(timer_fft);
02602
02603 typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
02604 for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
02605
02606
02607 ComplexLField_t* ldf = (*l_i).second.get();
02608
02609 ldf->Uncompress();
02610
02611 localdata = ldf->getP();
02612
02613
02614 int nstrips = 1, length = ldf->size(0);
02615 for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
02616 for (int istrip=0; istrip<nstrips; ++istrip) {
02617
02618 this->getEngine().callFFT(activeDim, direction, localdata);
02619
02620 localdata += length;
02621 }
02622 }
02623 TAU_PROFILE_STOP(timer_fft);
02624 }
02625
02626
02627 if (temp != &g) {
02628 TAU_PROFILE_START(timer_swap);
02629
02630 g[out_dom] = (*temp)[temp->getLayout().getDomain()];
02631 if (this->compressTemps()) *temp = 0;
02632 TAU_PROFILE_STOP(timer_swap);
02633 }
02634
02635
02636 if (direction == +1) g = g * this->getNormFact();
02637
02638 return;
02639 }
02640
02641
02642
02643
02644
02645 template <unsigned Dim, class T>
02646 void
02647 FFT<SineTransform,Dim,T>::transform(
02648 int direction,
02649 FFT<SineTransform,Dim,T>::ComplexField_t& f,
02650 FFT<SineTransform,Dim,T>::RealField_t& g,
02651 const bool& constInput)
02652 {
02653
02654 TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
02655 TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ")");
02656 TAU_PROFILE(tauname, tautype, TAU_FFT);
02657 TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
02658 TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
02659
02660
02661
02662
02663
02664 const Layout_t& in_layout = f.getLayout();
02665 const Domain_t& in_dom = in_layout.getDomain();
02666 const Layout_t& out_layout = g.getLayout();
02667 const Domain_t& out_dom = out_layout.getDomain();
02668 PAssert( checkDomain(*complexDomain_m,in_dom) &&
02669 checkDomain(this->getDomain(),out_dom) );
02670
02671
02672 unsigned d;
02673 int icount, activeDim;
02674 int idim;
02675 unsigned nTransformDims = this->numTransformDims();
02676
02677
02678
02679
02680 ComplexField_t* temp = &f;
02681
02682 Complex_t* localdata;
02683
02684
02685 int numComplex = nTransformDims - numSineTransforms_m;
02686 icount = Dim-1;
02687 activeDim = nTransformDims-1;
02688 for (idim = numComplex-1; idim != 0; --idim, --icount, --activeDim) {
02689 TAU_PROFILE_START(timer_swap);
02690
02691 while (!this->transformDim(icount) || sineTransformDims_m[icount]) {
02692 if (sineTransformDims_m[icount]) --activeDim;
02693 --icount;
02694 }
02695 PAssert(activeDim>=0);
02696
02697
02698 bool skipTranspose = false;
02699
02700
02701 if (idim == numComplex-1 && !constInput) {
02702
02703 const Domain_t& first_dom = tempLayouts_m[idim]->getDomain();
02704
02705
02706 skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
02707 (in_dom[0].length() == first_dom[0].length()) &&
02708 (in_layout.getDistribution(0) == SERIAL) &&
02709 (f.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
02710 }
02711
02712 if (!skipTranspose) {
02713
02714 (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] =
02715 (*temp)[temp->getLayout().getDomain()];
02716
02717
02718 if (this->compressTemps() && temp != &f) *temp = 0;
02719 temp = tempFields_m[idim];
02720 }
02721 TAU_PROFILE_STOP(timer_swap);
02722
02723 TAU_PROFILE_START(timer_fft);
02724
02725 typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
02726 for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
02727
02728
02729 ComplexLField_t* ldf = (*l_i).second.get();
02730
02731 ldf->Uncompress();
02732
02733 localdata = ldf->getP();
02734
02735
02736 int nstrips = 1, length = ldf->size(0);
02737 for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
02738 for (int istrip=0; istrip<nstrips; ++istrip) {
02739
02740 this->getEngine().callFFT(activeDim, direction, localdata);
02741
02742 localdata += length;
02743 }
02744 }
02745 TAU_PROFILE_STOP(timer_fft);
02746 }
02747
02748
02749
02750
02751 while (!this->transformDim(icount) || sineTransformDims_m[icount]) {
02752 if (sineTransformDims_m[icount]) --activeDim;
02753 --icount;
02754 }
02755 PAssert(activeDim>=0);
02756
02757
02758 RealField_t* tempR = tempRFields_m[numSineTransforms_m];
02759
02760 bool skipTemp = true;
02761 if (numComplex == 1 && !constInput) {
02762
02763
02764
02765 if ( !(in_layout == *tempLayouts_m[0]) ) skipTemp = false;
02766 for (d=0; d<Dim; ++d) {
02767 if ( in_layout.getDistribution(d) !=
02768 tempLayouts_m[0]->getDistribution(d) ) skipTemp = false;
02769 }
02770 if ( in_layout.numVnodes() != tempLayouts_m[0]->numVnodes() )
02771 skipTemp = false;
02772
02773 if (!(f.getGC() == FFT<SineTransform,Dim,T>::nullGC)) skipTemp = false;
02774 }
02775 else {
02776 skipTemp = false;
02777 }
02778
02779 if (!skipTemp) {
02780 TAU_PROFILE_START(timer_swap);
02781
02782 (*tempFields_m[0])[tempLayouts_m[0]->getDomain()] =
02783 (*temp)[temp->getLayout().getDomain()];
02784
02785 if (this->compressTemps() && temp != &f) *temp = 0;
02786 temp = tempFields_m[0];
02787 TAU_PROFILE_STOP(timer_swap);
02788 }
02789
02790 TAU_PROFILE_START(timer_fft);
02791
02792 typename RealField_t::const_iterator_if rl_i, rl_end = tempR->end_if();
02793 typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
02794 for (rl_i = tempR->begin_if(); rl_i != rl_end; ++rl_i, ++cl_i) {
02795
02796 RealLField_t* rldf = (*rl_i).second.get();
02797 ComplexLField_t* cldf = (*cl_i).second.get();
02798
02799 rldf->Uncompress();
02800 cldf->Uncompress();
02801
02802 T* localreal = rldf->getP();
02803 Complex_t* localcomp = cldf->getP();
02804
02805 int nstrips = 1, lengthreal = rldf->size(0), lengthcomp = cldf->size(0);
02806
02807 for (d=1; d<Dim; ++d) nstrips *= rldf->size(d);
02808 for (int istrip=0; istrip<nstrips; ++istrip) {
02809
02810
02811 this->getEngine().callFFT(activeDim, -1, localcomp);
02812
02813 for (int ilen=0; ilen<lengthreal; ilen+=2) {
02814 localreal[ilen] = real(localcomp[ilen/2]);
02815 localreal[ilen+1] = imag(localcomp[ilen/2]);
02816 }
02817
02818 localreal += lengthreal;
02819 localcomp += lengthcomp;
02820 }
02821 }
02822 TAU_PROFILE_STOP(timer_fft);
02823
02824 TAU_PROFILE_START(timer_swap);
02825
02826 if (this->compressTemps() && temp != &f) *temp = 0;
02827 TAU_PROFILE_STOP(timer_swap);
02828
02829
02830
02831
02832 T* localdataR;
02833
02834
02835 icount = Dim-1;
02836 activeDim = nTransformDims - 1;
02837 for (idim = numSineTransforms_m-1; idim != -1;
02838 --idim, --icount, --activeDim) {
02839 TAU_PROFILE_START(timer_swap);
02840
02841 while (!sineTransformDims_m[icount]) {
02842 if (this->transformDim(icount)) --activeDim;
02843 --icount;
02844 }
02845 PAssert(activeDim>=0);
02846
02847
02848 bool skipTranspose = false;
02849
02850
02851 if (idim == 0) {
02852
02853 const Domain_t& last_dom = tempRLayouts_m[idim]->getDomain();
02854
02855
02856 skipTranspose = ( (out_dom[0].sameBase(last_dom[0])) &&
02857 (out_dom[0].length() == last_dom[0].length()) &&
02858 (out_layout.getDistribution(0) == SERIAL) &&
02859 (g.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
02860 }
02861
02862 if (!skipTranspose) {
02863
02864 (*tempRFields_m[idim])[tempRLayouts_m[idim]->getDomain()] =
02865 (*tempR)[tempR->getLayout().getDomain()];
02866
02867
02868 if (this->compressTemps()) *tempR = 0;
02869 tempR = tempRFields_m[idim];
02870 }
02871 else if (idim == 0) {
02872
02873
02874
02875
02876 g[out_dom] = (*tempR)[tempR->getLayout().getDomain()];
02877
02878
02879 if (this->compressTemps()) *tempR = 0;
02880 tempR = &g;
02881 }
02882 TAU_PROFILE_STOP(timer_swap);
02883
02884 TAU_PROFILE_START(timer_fft);
02885
02886 typename RealField_t::const_iterator_if l_i, l_end = tempR->end_if();
02887 for (l_i = tempR->begin_if(); l_i != l_end; ++l_i) {
02888
02889
02890 RealLField_t* ldf = (*l_i).second.get();
02891
02892 ldf->Uncompress();
02893
02894 localdataR = ldf->getP();
02895
02896
02897 int nstrips = 1, length = ldf->size(0);
02898 for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
02899 for (int istrip=0; istrip<nstrips; ++istrip) {
02900
02901 this->getEngine().callFFT(activeDim, direction, localdataR);
02902
02903 localdataR += length;
02904 }
02905 }
02906 TAU_PROFILE_STOP(timer_fft);
02907 }
02908
02909
02910 if (tempR != &g) {
02911 TAU_PROFILE_START(timer_swap);
02912
02913 g[out_dom] = (*tempR)[tempR->getLayout().getDomain()];
02914 if (this->compressTemps()) *tempR = 0;
02915 TAU_PROFILE_STOP(timer_swap);
02916 }
02917
02918
02919 if (direction == +1) g = g * this->getNormFact();
02920
02921 return;
02922 }
02923
02924
02925
02926
02927
02928 template <unsigned Dim, class T>
02929 void
02930 FFT<SineTransform,Dim,T>::transform(
02931 int direction,
02932 FFT<SineTransform,Dim,T>::RealField_t& f,
02933 FFT<SineTransform,Dim,T>::RealField_t& g,
02934 const bool& constInput)
02935 {
02936
02937 TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
02938 TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ")");
02939 TAU_PROFILE(tauname, tautype, TAU_FFT);
02940 TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
02941 TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
02942
02943
02944
02945
02946
02947 const Layout_t& in_layout = f.getLayout();
02948 const Domain_t& in_dom = in_layout.getDomain();
02949 const Layout_t& out_layout = g.getLayout();
02950 const Domain_t& out_dom = out_layout.getDomain();
02951 PAssert( checkDomain(this->getDomain(),in_dom) &&
02952 checkDomain(this->getDomain(),out_dom) );
02953
02954
02955 unsigned d;
02956 int idim;
02957 int begdim, enddim;
02958 unsigned nTransformDims = this->numTransformDims();
02959
02960 PInsist(nTransformDims==numSineTransforms_m,
02961 "Wrong output Field type for real-to-complex transform!!");
02962
02963
02964
02965
02966 RealField_t* tempR = &f;
02967
02968 T* localdataR;
02969
02970
02971 begdim = (direction == +1) ? 0 : nTransformDims-1;
02972 enddim = (direction == +1) ? nTransformDims : -1;
02973 for (idim = begdim; idim != enddim; idim+=direction) {
02974 TAU_PROFILE_START(timer_swap);
02975
02976
02977 bool skipTranspose = false;
02978
02979
02980 if (idim == begdim && !constInput) {
02981
02982 const Domain_t& first_dom = tempRLayouts_m[idim]->getDomain();
02983
02984
02985 skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
02986 (in_dom[0].length() == first_dom[0].length()) &&
02987 (in_layout.getDistribution(0) == SERIAL) &&
02988 (f.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
02989 }
02990
02991
02992
02993 if (idim == enddim-direction) {
02994
02995 const Domain_t& last_dom = tempRLayouts_m[idim]->getDomain();
02996
02997
02998 skipTranspose = ( (out_dom[0].sameBase(last_dom[0])) &&
02999 (out_dom[0].length() == last_dom[0].length()) &&
03000 (out_layout.getDistribution(0) == SERIAL) &&
03001 (g.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
03002 }
03003
03004 if (!skipTranspose) {
03005
03006 (*tempRFields_m[idim])[tempRLayouts_m[idim]->getDomain()] =
03007 (*tempR)[tempR->getLayout().getDomain()];
03008
03009
03010 if (this->compressTemps() && tempR != &f) *tempR = 0;
03011 tempR = tempRFields_m[idim];
03012 }
03013 else if (idim == enddim-direction && tempR != &g) {
03014
03015
03016
03017
03018 g[out_dom] = (*tempR)[tempR->getLayout().getDomain()];
03019
03020
03021 if (this->compressTemps() && tempR != &f) *tempR = 0;
03022 tempR = &g;
03023 }
03024 TAU_PROFILE_STOP(timer_swap);
03025
03026 TAU_PROFILE_START(timer_fft);
03027
03028 typename RealField_t::const_iterator_if l_i, l_end = tempR->end_if();
03029 for (l_i = tempR->begin_if(); l_i != l_end; ++l_i) {
03030
03031
03032 RealLField_t* ldf = (*l_i).second.get();
03033
03034 ldf->Uncompress();
03035
03036 localdataR = ldf->getP();
03037
03038
03039 int nstrips = 1, length = ldf->size(0);
03040 for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
03041 for (int istrip=0; istrip<nstrips; ++istrip) {
03042
03043 this->getEngine().callFFT(idim, direction, localdataR);
03044
03045 localdataR += length;
03046 }
03047 }
03048 TAU_PROFILE_STOP(timer_fft);
03049 }
03050
03051
03052 if (tempR != &g) {
03053 TAU_PROFILE_START(timer_swap);
03054
03055 g[out_dom] = (*tempR)[tempR->getLayout().getDomain()];
03056 if (this->compressTemps() && tempR != &f) *tempR = 0;
03057 TAU_PROFILE_STOP(timer_swap);
03058 }
03059
03060
03061 if (direction == +1) g = g * this->getNormFact();
03062
03063 return;
03064 }
03065
03066
03067
03068
03069
03070 template <unsigned Dim, class T>
03071 void
03072 FFT<SineTransform,Dim,T>::transform(
03073 int direction,
03074 FFT<SineTransform,Dim,T>::RealField_t& f)
03075 {
03076
03077 TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
03078 TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ")");
03079 TAU_PROFILE(tauname, tautype, TAU_FFT);
03080 TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
03081 TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
03082
03083
03084
03085
03086
03087 const Layout_t& in_layout = f.getLayout();
03088 const Domain_t& in_dom = in_layout.getDomain();
03089 PAssert(checkDomain(this->getDomain(),in_dom));
03090
03091
03092 unsigned d;
03093 int idim;
03094 int begdim, enddim;
03095 unsigned nTransformDims = this->numTransformDims();
03096
03097 PInsist(nTransformDims==numSineTransforms_m,
03098 "Cannot perform real-to-complex transform in-place!!");
03099
03100
03101
03102
03103 RealField_t* tempR = &f;
03104
03105 T* localdataR;
03106
03107
03108 begdim = (direction == +1) ? 0 : nTransformDims-1;
03109 enddim = (direction == +1) ? nTransformDims : -1;
03110 for (idim = begdim; idim != enddim; idim+=direction) {
03111 TAU_PROFILE_START(timer_swap);
03112
03113
03114 bool skipTranspose = false;
03115
03116
03117 if (idim == begdim) {
03118
03119 const Domain_t& first_dom = tempRLayouts_m[idim]->getDomain();
03120
03121
03122 skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
03123 (in_dom[0].length() == first_dom[0].length()) &&
03124 (in_layout.getDistribution(0) == SERIAL) &&
03125 (f.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
03126 }
03127
03128
03129
03130 if (idim == enddim-direction) {
03131
03132 const Domain_t& last_dom = tempRLayouts_m[idim]->getDomain();
03133
03134
03135 skipTranspose = ( (in_dom[0].sameBase(last_dom[0])) &&
03136 (in_dom[0].length() == last_dom[0].length()) &&
03137 (in_layout.getDistribution(0) == SERIAL) &&
03138 (f.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
03139 }
03140
03141 if (!skipTranspose) {
03142
03143 (*tempRFields_m[idim])[tempRLayouts_m[idim]->getDomain()] =
03144 (*tempR)[tempR->getLayout().getDomain()];
03145
03146
03147 if (this->compressTemps() && tempR != &f) *tempR = 0;
03148 tempR = tempRFields_m[idim];
03149 }
03150 else if (idim == enddim-direction && tempR != &f) {
03151
03152
03153
03154
03155 f[in_dom] = (*tempR)[tempR->getLayout().getDomain()];
03156
03157
03158 if (this->compressTemps() && tempR != &f) *tempR = 0;
03159 tempR = &f;
03160 }
03161 TAU_PROFILE_STOP(timer_swap);
03162
03163 TAU_PROFILE_START(timer_fft);
03164
03165 typename RealField_t::const_iterator_if l_i, l_end = tempR->end_if();
03166 for (l_i = tempR->begin_if(); l_i != l_end; ++l_i) {
03167
03168
03169 RealLField_t* ldf = (*l_i).second.get();
03170
03171 ldf->Uncompress();
03172
03173 localdataR = ldf->getP();
03174
03175
03176 int nstrips = 1, length = ldf->size(0);
03177 for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
03178 for (int istrip=0; istrip<nstrips; ++istrip) {
03179
03180 this->getEngine().callFFT(idim, direction, localdataR);
03181
03182 localdataR += length;
03183 }
03184 }
03185 TAU_PROFILE_STOP(timer_fft);
03186 }
03187
03188
03189 if (tempR != &f) {
03190 TAU_PROFILE_START(timer_swap);
03191
03192 f[in_dom] = (*tempR)[tempR->getLayout().getDomain()];
03193 if (this->compressTemps()) *tempR = 0;
03194 TAU_PROFILE_STOP(timer_swap);
03195 }
03196
03197
03198 if (direction == +1) f = f * this->getNormFact();
03199
03200 return;
03201 }
03202
03203
03204
03205
03206
03207
03208
03209
03210
03211
03212
03213
03214 template <class T>
03215 FFT<SineTransform,1U,T>::FFT(
03216 const typename FFT<SineTransform,1U,T>::Domain_t& rdomain,
03217 const bool sineTransformDims[1U], const bool& compressTemps)
03218 : FFTBase<1U,T>(FFT<SineTransform,1U,T>::sineFFT, rdomain,
03219 sineTransformDims, compressTemps)
03220 {
03221
03222 TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
03223 TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", const bool [], const bool&)");
03224 TAU_PROFILE(tauname, tautype, TAU_FFT);
03225
03226
03227 int length;
03228 length = rdomain[0].length();
03229
03230
03231 int transformType;
03232 transformType = FFTBase<1U,T>::sineFFT;
03233 T& normFact = this->getNormFact();
03234 normFact = 1.0 / (2.0 * (length + 1));
03235
03236
03237 this->getEngine().setup(1U, &transformType, &length);
03238
03239 setup();
03240 }
03241
03242
03243
03244
03245
03246
03247 template <class T>
03248 FFT<SineTransform,1U,T>::FFT(
03249 const typename FFT<SineTransform,1U,T>::Domain_t& rdomain,
03250 const bool& compressTemps)
03251 : FFTBase<1U,T>(FFT<SineTransform,1U,T>::sineFFT, rdomain, compressTemps)
03252 {
03253
03254 TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
03255 TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", const bool&)");
03256 TAU_PROFILE(tauname, tautype, TAU_FFT);
03257
03258
03259 int length;
03260 length = rdomain[0].length();
03261
03262
03263 int transformType;
03264 transformType = FFTBase<1U,T>::sineFFT;
03265 T& normFact = this->getNormFact();
03266 normFact = 1.0 / (2.0 * (length + 1));
03267
03268
03269 this->getEngine().setup(1U, &transformType, &length);
03270
03271 setup();
03272 }
03273
03274
03275
03276
03277
03278
03279 template <class T>
03280 void
03281 FFT<SineTransform,1U,T>::setup(void) {
03282
03283
03284 TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::setup");
03285 TAU_TYPE_STRING(tautype, "(void)");
03286 TAU_PROFILE(tauname, tautype, TAU_FFT);
03287
03288 const Domain_t& domain = this->getDomain();
03289
03290
03291 tempRLayouts_m = new Layout_t(domain[0], PARALLEL, 1);
03292
03293 tempRFields_m = new RealField_t(*tempRLayouts_m);
03294
03295 if (!this->compressTemps()) tempRFields_m->Uncompress();
03296
03297 return;
03298 }
03299
03300
03301
03302
03303
03304 template <class T>
03305 FFT<SineTransform,1U,T>::~FFT(void) {
03306
03307
03308 TAU_TYPE_STRING(tauname, CT(*this) + "::~FFT");
03309 TAU_TYPE_STRING(tautype, "(void)");
03310 TAU_PROFILE(tauname, tautype, TAU_FFT);
03311
03312
03313 delete tempRFields_m;
03314 delete tempRLayouts_m;
03315 }
03316
03317
03318
03319
03320
03321 template <class T>
03322 void
03323 FFT<SineTransform,1U,T>::transform(
03324 int direction,
03325 FFT<SineTransform,1U,T>::RealField_t& f,
03326 FFT<SineTransform,1U,T>::RealField_t& g,
03327 const bool& constInput)
03328 {
03329
03330 TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
03331 TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ", const bool&)");
03332 TAU_PROFILE(tauname, tautype, TAU_FFT);
03333 TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
03334 TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
03335
03336
03337
03338
03339
03340 const Layout_t& in_layout = f.getLayout();
03341 const Domain_t& in_dom = in_layout.getDomain();
03342 const Layout_t& out_layout = g.getLayout();
03343 const Domain_t& out_dom = out_layout.getDomain();
03344 PAssert( checkDomain(this->getDomain(),in_dom) &&
03345 checkDomain(this->getDomain(),out_dom) );
03346
03347
03348 RealField_t* tempR = &f;
03349
03350 T* localdataR;
03351
03352 TAU_PROFILE_START(timer_swap);
03353
03354
03355
03356 const Domain_t& temp_dom = tempRLayouts_m->getDomain();
03357 bool skipTranspose = false;
03358
03359
03360 if (!constInput) {
03361
03362
03363 skipTranspose = ( (in_dom[0].sameBase(temp_dom[0])) &&
03364 (in_dom[0].length() == temp_dom[0].length()) &&
03365 (in_layout.numVnodes() == 1) &&
03366 (f.getGC() == FFT<SineTransform,1U,T>::nullGC) );
03367 }
03368
03369 bool skipFinal = false;
03370
03371
03372
03373
03374
03375 skipFinal = ( (out_dom[0].sameBase(temp_dom[0])) &&
03376 (out_dom[0].length() == temp_dom[0].length()) &&
03377 (out_layout.numVnodes() == 1) &&
03378 (g.getGC() == FFT<SineTransform,1U,T>::nullGC) );
03379
03380 if (!skipTranspose) {
03381
03382 (*tempRFields_m) = (*tempR);
03383 tempR = tempRFields_m;
03384 }
03385 if (skipFinal) {
03386
03387
03388
03389
03390 g = (*tempR);
03391
03392
03393 if (this->compressTemps() && tempR != &f) *tempR = 0;
03394 tempR = &g;
03395 }
03396 TAU_PROFILE_STOP(timer_swap);
03397
03398 TAU_PROFILE_START(timer_fft);
03399
03400 typename RealField_t::const_iterator_if l_i = tempR->begin_if();
03401 if (l_i != tempR->end_if()) {
03402
03403
03404 RealLField_t* ldf = (*l_i).second.get();
03405
03406 ldf->Uncompress();
03407
03408 localdataR = ldf->getP();
03409
03410
03411 this->getEngine().callFFT(0, direction, localdataR);
03412 }
03413 TAU_PROFILE_STOP(timer_fft);
03414
03415
03416 if (tempR != &g) {
03417 TAU_PROFILE_START(timer_swap);
03418
03419 g = (*tempR);
03420 if (this->compressTemps() && tempR != &f) *tempR = 0;
03421 TAU_PROFILE_STOP(timer_swap);
03422 }
03423
03424
03425 if (direction == +1) g = g * this->getNormFact();
03426
03427 return;
03428 }
03429
03430
03431
03432
03433
03434 template <class T>
03435 void
03436 FFT<SineTransform,1U,T>::transform(
03437 int direction,
03438 FFT<SineTransform,1U,T>::RealField_t& f)
03439 {
03440
03441 TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
03442 TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ")");
03443 TAU_PROFILE(tauname, tautype, TAU_FFT);
03444 TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
03445 TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
03446
03447
03448
03449
03450
03451 const Layout_t& in_layout = f.getLayout();
03452 const Domain_t& in_dom = in_layout.getDomain();
03453 PAssert(checkDomain(this->getDomain(),in_dom));
03454
03455
03456 RealField_t* tempR = &f;
03457
03458 T* localdataR;
03459
03460 TAU_PROFILE_START(timer_swap);
03461
03462
03463
03464 const Domain_t& temp_dom = tempRLayouts_m->getDomain();
03465 bool skipTranspose = false;
03466
03467
03468
03469
03470
03471 skipTranspose = ( (in_dom[0].sameBase(temp_dom[0])) &&
03472 (in_dom[0].length() == temp_dom[0].length()) &&
03473 (in_layout.numVnodes() == 1) &&
03474 (f.getGC() == FFT<SineTransform,1U,T>::nullGC) );
03475
03476 bool skipFinal = false;
03477
03478
03479
03480
03481
03482 skipFinal = ( (in_dom[0].sameBase(temp_dom[0])) &&
03483 (in_dom[0].length() == temp_dom[0].length()) &&
03484 (in_layout.numVnodes() == 1) &&
03485 (f.getGC() == FFT<SineTransform,1U,T>::nullGC) );
03486
03487 if (!skipTranspose) {
03488
03489 (*tempRFields_m) = (*tempR);
03490
03491 tempR = tempRFields_m;
03492 }
03493 if (skipFinal) {
03494
03495
03496
03497
03498 f = (*tempR);
03499
03500
03501 if (this->compressTemps() && tempR != &f) *tempR = 0;
03502 tempR = &f;
03503 }
03504 TAU_PROFILE_STOP(timer_swap);
03505
03506 TAU_PROFILE_START(timer_fft);
03507
03508 typename RealField_t::const_iterator_if l_i = tempR->begin_if();
03509 if (l_i != tempR->end_if()) {
03510
03511
03512 RealLField_t* ldf = (*l_i).second.get();
03513
03514 ldf->Uncompress();
03515
03516 localdataR = ldf->getP();
03517
03518
03519 this->getEngine().callFFT(0, direction, localdataR);
03520 }
03521 TAU_PROFILE_STOP(timer_fft);
03522
03523
03524 if (tempR != &f) {
03525 TAU_PROFILE_START(timer_swap);
03526
03527 f = (*tempR);
03528 if (this->compressTemps()) *tempR = 0;
03529 TAU_PROFILE_STOP(timer_swap);
03530 }
03531
03532
03533 if (direction == +1) f = f * this->getNormFact();
03534
03535 return;
03536 }
03537
03538
03539
03540
03541
03542
03543
03544