00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 #include "FFT/FFT.h"
00027 #include "FieldLayout/FieldLayout.h"
00028 #include "Field/BareField.h"
00029 #include "Utility/IpplStats.h"
00030 
00031 #ifdef IPPL_PRINTDEBUG
00032 #define FFTDBG(x) x
00033 #else
00034 #define FFTDBG(x)
00035 #endif
00036 
00037 
00043 
00044 
00045 
00046 
00052 template <unsigned Dim, class T>
00053 FFT<CCTransform,Dim,T>::FFT(
00054   const typename FFT<CCTransform,Dim,T>::Domain_t& cdomain, 
00055   const bool transformTheseDims[Dim],
00056   const bool& compressTemps)
00057   : FFTBase<Dim,T>(FFT<CCTransform,Dim,T>::ccFFT, cdomain,
00058                    transformTheseDims, compressTemps)
00059 {
00060 
00061   
00062   TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
00063   TAU_TYPE_STRING(tautype,
00064                   "(" + CT(cdomain) + ", const bool [], const bool&)");
00065   TAU_PROFILE(tauname, tautype, TAU_FFT);
00066 
00067   
00068   unsigned nTransformDims = this->numTransformDims();
00069   int* lengths = new int[nTransformDims];
00070   unsigned d;
00071   for (d=0; d<nTransformDims; ++d)
00072     lengths[d] = cdomain[this->activeDimension(d)].length();
00073 
00074   
00075   int* transformTypes = new int[nTransformDims];
00076   T& normFact = this->getNormFact();
00077   normFact = 1.0;
00078   for (d=0; d<nTransformDims; ++d) {
00079       transformTypes[d] = FFTBase<Dim,T>::ccFFT;  
00080       normFact /= lengths[d];
00081   }
00082 
00083   
00084   this->getEngine().setup(nTransformDims, transformTypes, lengths);
00085   delete [] transformTypes;
00086   delete [] lengths;
00087   
00088   setup();
00089 }
00090 
00091 
00092 
00097 template <unsigned Dim, class T>
00098 void
00099 FFT<CCTransform,Dim,T>::setup(void)
00100 {
00101   
00102   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::setup");
00103   TAU_TYPE_STRING(tautype, "(void)");
00104   TAU_PROFILE(tauname, tautype, TAU_FFT); 
00105 
00106   unsigned d, activeDim;
00107   unsigned nTransformDims = this->numTransformDims();
00108   
00109   e_dim_tag serialParallel[Dim];  
00110   
00111   serialParallel[0] = SERIAL;
00112   
00113   for (d=1; d<Dim; ++d)
00114     serialParallel[d] = PARALLEL;
00115 
00116   tempLayouts_m = new Layout_t*[nTransformDims];
00117   tempFields_m = new ComplexField_t*[nTransformDims];
00118 
00119   
00120   for (unsigned dim=0; dim<nTransformDims; ++dim) {
00121     
00122     activeDim = this->activeDimension(dim);
00123     
00124     const Domain_t& ndic = this->getDomain();
00125     
00126     Domain_t ndip;
00127     ndip[0] = ndic[activeDim];
00128     for (d=1; d<Dim; ++d) {
00129       int nextDim = activeDim + d;
00130       if (nextDim >= Dim) nextDim -= Dim;
00131       ndip[d] = ndic[nextDim];
00132     }
00133     
00134     tempLayouts_m[dim] = new Layout_t(ndip, serialParallel, this->transVnodes());
00135     
00136     tempFields_m[dim] = new ComplexField_t(*tempLayouts_m[dim]);
00137     
00138     if (!this->compressTemps()) (*tempFields_m[dim]).Uncompress();
00139   }
00140 
00141   return;
00142 }
00143 
00144 
00145 
00146 
00147 
00148 template <unsigned Dim, class T>
00149 FFT<CCTransform,Dim,T>::~FFT(void) {
00150 
00151   
00152   TAU_TYPE_STRING(tauname, CT(*this) + "::~FFT");
00153   TAU_TYPE_STRING(tautype, "(void)");
00154   TAU_PROFILE(tauname, tautype, TAU_FFT); 
00155 
00156   
00157   unsigned nTransformDims = this->numTransformDims();
00158   for (unsigned d=0; d<nTransformDims; ++d) {
00159     delete tempFields_m[d];
00160     delete tempLayouts_m[d];
00161   }
00162   delete [] tempFields_m;
00163   delete [] tempLayouts_m;
00164 }
00165 
00166 
00167 
00168 
00169 
00170 
00171 template <unsigned Dim, class T>
00172 void
00173 FFT<CCTransform,Dim,T>::transform(
00174   int direction,
00175   typename FFT<CCTransform,Dim,T>::ComplexField_t& f,
00176   typename FFT<CCTransform,Dim,T>::ComplexField_t& g,
00177   const bool& constInput)
00178 {
00179   
00180   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
00181   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ", const bool&)");
00182   TAU_PROFILE(tauname, tautype, TAU_FFT);
00183   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
00184   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
00185 
00186   
00187   
00188 
00189   
00190   const Layout_t& in_layout = f.getLayout();
00191   const Domain_t& in_dom = in_layout.getDomain();
00192   const Layout_t& out_layout = g.getLayout();
00193   const Domain_t& out_dom = out_layout.getDomain();
00194   PAssert( checkDomain(this->getDomain(),in_dom) &&
00195            checkDomain(this->getDomain(),out_dom) );
00196 
00197   
00198   unsigned d;
00199   int idim;            
00200   int begdim, enddim;  
00201   unsigned nTransformDims = this->numTransformDims();
00202   
00203   ComplexField_t* temp = &f;
00204   
00205   Complex_t* localdata;
00206       
00207   
00208   begdim = (direction == +1) ? 0 : (nTransformDims-1);
00209   enddim = (direction == +1) ? nTransformDims : -1;
00210   for (idim = begdim; idim != enddim; idim += direction) {
00211     TAU_PROFILE_START(timer_swap);
00212     
00213 
00214     bool skipTranspose = false;
00215     
00216     
00217     if (idim == begdim && !constInput) {
00218       
00219       const Domain_t& first_dom = tempLayouts_m[idim]->getDomain();
00220       
00221       
00222       skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
00223                         (in_dom[0].length() == first_dom[0].length()) &&
00224                         (in_layout.getDistribution(0) == SERIAL) &&
00225                         (f.getGC() == FFT<CCTransform,Dim,T>::nullGC) );
00226     }
00227 
00228     
00229     
00230     if (idim == enddim-direction) {
00231       
00232       const Domain_t& last_dom = tempLayouts_m[idim]->getDomain();
00233       
00234       
00235       skipTranspose = ( (out_dom[0].sameBase(last_dom[0])) &&
00236                         (out_dom[0].length() == last_dom[0].length()) &&
00237                         (out_layout.getDistribution(0) == SERIAL) &&
00238                         (g.getGC() == FFT<CCTransform,Dim,T>::nullGC) );
00239     }
00240 
00241     if (!skipTranspose) {
00242       
00243       (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] = 
00244         (*temp)[temp->getLayout().getDomain()];
00245 
00246       
00247       if (this->compressTemps() && temp != &f) *temp = 0;
00248       temp = tempFields_m[idim];  
00249     }
00250     else if (idim == enddim-direction && temp != &g) {
00251       
00252       
00253 
00254       
00255       g[out_dom] = (*temp)[temp->getLayout().getDomain()];
00256 
00257       
00258       if (this->compressTemps() && temp != &f) *temp = 0;
00259       temp = &g;  
00260     }
00261     TAU_PROFILE_STOP(timer_swap);
00262       
00263     TAU_PROFILE_START(timer_fft);
00264     
00265     typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
00266     for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
00267 
00268       
00269       ComplexLField_t* ldf = (*l_i).second.get();
00270       
00271       ldf->Uncompress();
00272       
00273       localdata = ldf->getP();
00274 
00275       
00276       int nstrips = 1, length = ldf->size(0);
00277       for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
00278       for (int istrip=0; istrip<nstrips; ++istrip) {
00279         
00280         this->getEngine().callFFT(idim, direction, localdata);
00281         
00282         localdata += length;
00283       } 
00284     } 
00285     TAU_PROFILE_STOP(timer_fft);
00286   } 
00287 
00288   
00289   if (temp != &g) {
00290     TAU_PROFILE_START(timer_swap);
00291     
00292     g[out_dom] = (*temp)[temp->getLayout().getDomain()];
00293     if (this->compressTemps() && temp != &f) *temp = 0;
00294     TAU_PROFILE_STOP(timer_swap);
00295   }
00296 
00297   
00298   if (direction == +1)
00299     g *= Complex_t(this->getNormFact(), 0.0);
00300 
00301   return;
00302 }
00303 
00304 
00305 
00306 
00307 
00308 template <unsigned Dim, class T>
00309 void
00310 FFT<CCTransform,Dim,T>::transform(
00311   int direction,
00312   typename FFT<CCTransform,Dim,T>::ComplexField_t& f)
00313 {
00314   
00315   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
00316   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ")");
00317   TAU_PROFILE(tauname, tautype, TAU_FFT);
00318   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
00319   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
00320 
00321   
00322   
00323 
00324   
00325   const Layout_t& in_layout = f.getLayout();
00326   const Domain_t& in_dom = in_layout.getDomain();
00327   PAssert(checkDomain(this->getDomain(),in_dom));
00328 
00329   
00330   unsigned d;
00331   int idim;            
00332   int begdim, enddim;  
00333   unsigned nTransformDims = this->numTransformDims();
00334   
00335   ComplexField_t* temp = &f;
00336   
00337   Complex_t* localdata;
00338       
00339   
00340   begdim = (direction == +1) ? 0 : (nTransformDims-1);
00341   enddim = (direction == +1) ? nTransformDims : -1;
00342   for (idim = begdim; idim != enddim; idim += direction) {
00343     TAU_PROFILE_START(timer_swap);
00344     
00345 
00346     bool skipTranspose = false;
00347     
00348     
00349     if (idim == begdim) {
00350       
00351       const Domain_t& first_dom = tempLayouts_m[idim]->getDomain();
00352       
00353       
00354       skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
00355                         (in_dom[0].length() == first_dom[0].length()) &&
00356                         (in_layout.getDistribution(0) == SERIAL) &&
00357                         (f.getGC() == FFT<CCTransform,Dim,T>::nullGC) );
00358     }
00359 
00360     
00361     
00362     if (idim == enddim-direction) {
00363       
00364       const Domain_t& last_dom = tempLayouts_m[idim]->getDomain();
00365       
00366       
00367       skipTranspose = ( (in_dom[0].sameBase(last_dom[0])) &&
00368                         (in_dom[0].length() == last_dom[0].length()) &&
00369                         (in_layout.getDistribution(0) == SERIAL) &&
00370                         (f.getGC() == FFT<CCTransform,Dim,T>::nullGC) );
00371     }
00372 
00373     if (!skipTranspose) {
00374       
00375       (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] = 
00376         (*temp)[temp->getLayout().getDomain()];
00377 
00378       
00379       if (this->compressTemps() && temp != &f) *temp = 0;
00380       temp = tempFields_m[idim];  
00381     }
00382     else if (idim == enddim-direction && temp != &f) {
00383       
00384       
00385 
00386       
00387       f[in_dom] = (*temp)[temp->getLayout().getDomain()];
00388 
00389       
00390       if (this->compressTemps()) *temp = 0;
00391       temp = &f;  
00392     }
00393     TAU_PROFILE_STOP(timer_swap);
00394       
00395     TAU_PROFILE_START(timer_fft);
00396     
00397     typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
00398     for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
00399 
00400       
00401       ComplexLField_t* ldf = (*l_i).second.get();
00402       
00403       ldf->Uncompress();
00404       
00405       localdata = ldf->getP();
00406 
00407       
00408       int nstrips = 1, length = ldf->size(0);
00409       for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
00410       for (int istrip=0; istrip<nstrips; ++istrip) {
00411         
00412         this->getEngine().callFFT(idim, direction, localdata);
00413         
00414         localdata += length;
00415       } 
00416     } 
00417     TAU_PROFILE_STOP(timer_fft);
00418 
00419   } 
00420 
00421   
00422   if (temp != &f) {
00423     TAU_PROFILE_START(timer_swap);
00424     
00425     f[in_dom] = (*temp)[temp->getLayout().getDomain()];
00426     if (this->compressTemps()) *temp = 0;
00427     TAU_PROFILE_STOP(timer_swap);
00428   }
00429 
00430   
00431   if (direction == +1)
00432     f *= Complex_t(this->getNormFact(), 0.0);
00433 
00434   return;
00435 }
00436 
00437 
00438 
00439 
00440 
00441 
00442 
00443 
00444 
00445 
00446 
00447 template <class T>
00448 FFT<CCTransform,1U,T>::FFT(
00449   const typename FFT<CCTransform,1U,T>::Domain_t& cdomain, 
00450   const bool transformTheseDims[1U], const bool& compressTemps)
00451   : FFTBase<1U,T>(FFT<CCTransform,1U,T>::ccFFT, cdomain,
00452                   transformTheseDims, compressTemps)
00453 {
00454 
00455   
00456   TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
00457   TAU_TYPE_STRING(tautype, "(" + CT(cdomain) + ", const bool [], const bool&)");
00458   TAU_PROFILE(tauname, tautype, TAU_FFT);
00459 
00460   unsigned nTransformDims = 1U;
00461   
00462   int length;
00463   length = cdomain[0].length();
00464 
00465   
00466   int transformType;
00467   transformType = FFTBase<1U,T>::ccFFT;  
00468   T& normFact = this->getNormFact();
00469   normFact = 1.0 / length;
00470 
00471   
00472   this->getEngine().setup(nTransformDims, &transformType, &length);
00473   
00474   setup();
00475 }
00476 
00477 
00478 
00479 
00480 
00481 
00482 template <class T>
00483 FFT<CCTransform,1U,T>::FFT(
00484   const typename FFT<CCTransform,1U,T>::Domain_t& cdomain,
00485   const bool& compressTemps) 
00486   : FFTBase<1U,T>(FFT<CCTransform,1U,T>::ccFFT, cdomain, compressTemps)
00487 {
00488 
00489   
00490   TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
00491   TAU_TYPE_STRING(tautype, "(" + CT(cdomain) + ", const bool&)");
00492   TAU_PROFILE(tauname, tautype, TAU_FFT); 
00493 
00494   
00495   int length;
00496   length = cdomain[0].length();
00497 
00498   
00499   int transformType;
00500   transformType = FFTBase<1U,T>::ccFFT;  
00501   T& normFact = this->getNormFact();
00502   normFact = 1.0 / length;
00503 
00504   
00505   this->getEngine().setup(1U, &transformType, &length);
00506   
00507   setup();
00508 }
00509 
00510 
00511 
00512 
00513 
00514 
00515 template <class T>
00516 void
00517 FFT<CCTransform,1U,T>::setup(void)
00518 {
00519   
00520   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::setup");
00521   TAU_TYPE_STRING(tautype, "(void)");
00522   TAU_PROFILE(tauname, tautype, TAU_FFT); 
00523 
00524   
00525   const Domain_t& ndic = this->getDomain();
00526   
00527   tempLayouts_m = new Layout_t(ndic[0], PARALLEL, 1);
00528   
00529   tempFields_m = new ComplexField_t(*tempLayouts_m);
00530   
00531   if (!this->compressTemps()) tempFields_m->Uncompress();
00532 
00533   return;
00534 }
00535 
00536 
00537 
00538 
00539 
00540 template <class T>
00541 FFT<CCTransform,1U,T>::~FFT(void) {
00542 
00543   
00544   TAU_TYPE_STRING(tauname, CT(*this) + "::~FFT");
00545   TAU_TYPE_STRING(tautype, "(void)");
00546   TAU_PROFILE(tauname, tautype, TAU_FFT); 
00547 
00548   
00549   delete tempFields_m;
00550   delete tempLayouts_m;
00551 }
00552 
00553 
00554 
00555 
00556 
00557 
00558 template <class T>
00559 void
00560 FFT<CCTransform,1U,T>::transform(
00561   int direction,
00562   typename FFT<CCTransform,1U,T>::ComplexField_t& f,
00563   typename FFT<CCTransform,1U,T>::ComplexField_t& g,
00564   const bool& constInput)
00565 {
00566   
00567   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
00568   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ", const bool&)");
00569   TAU_PROFILE(tauname, tautype, TAU_FFT);
00570   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
00571   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
00572 
00573   
00574   
00575 
00576   
00577   const Layout_t& in_layout = f.getLayout();
00578   const Domain_t& in_dom = in_layout.getDomain();
00579   const Layout_t& out_layout = g.getLayout();
00580   const Domain_t& out_dom = out_layout.getDomain();
00581   PAssert( checkDomain(this->getDomain(),in_dom) &&
00582            checkDomain(this->getDomain(),out_dom) );
00583 
00584   
00585   ComplexField_t* temp = &f;
00586   
00587   Complex_t* localdata;
00588       
00589   TAU_PROFILE_START(timer_swap);
00590   
00591 
00592   
00593   const Domain_t& temp_dom = tempLayouts_m->getDomain();
00594 
00595   bool skipTranspose = false;
00596   
00597   
00598   if (!constInput) {
00599     
00600     
00601     skipTranspose = ( (in_dom[0].sameBase(temp_dom[0])) &&
00602                       (in_dom[0].length() == temp_dom[0].length()) &&
00603                       (in_layout.numVnodes() == 1) &&
00604                       (f.getGC() == FFT<CCTransform,1U,T>::nullGC) );
00605   }
00606 
00607   bool skipFinal;
00608   
00609   
00610 
00611   
00612   
00613   skipFinal = ( (out_dom[0].sameBase(temp_dom[0])) &&
00614                 (out_dom[0].length() == temp_dom[0].length()) &&
00615                 (out_layout.numVnodes() == 1) &&
00616                 (g.getGC() == FFT<CCTransform,1U,T>::nullGC) );
00617 
00618   if (!skipTranspose) {
00619     
00620     (*tempFields_m) = (*temp);
00621     temp = tempFields_m;  
00622   }
00623   if (skipFinal) {
00624     
00625     
00626 
00627     
00628     g = (*temp);
00629 
00630     
00631     if (this->compressTemps() && temp != &f) *temp = 0;
00632     temp = &g;  
00633   }
00634   TAU_PROFILE_STOP(timer_swap);
00635       
00636   TAU_PROFILE_START(timer_fft);
00637 
00638   
00639   typename ComplexField_t::const_iterator_if l_i = temp->begin_if();
00640   if (l_i != temp->end_if()) {
00641     
00642     ComplexLField_t* ldf = (*l_i).second.get();
00643     
00644     ldf->Uncompress();
00645     
00646     localdata = ldf->getP();
00647 
00648     
00649     int length = ldf->size(0);
00650     
00651     this->getEngine().callFFT(0, direction, localdata);
00652   }
00653 
00654   TAU_PROFILE_STOP(timer_fft);
00655 
00656   
00657   if (temp != &g) {
00658     TAU_PROFILE_START(timer_swap);
00659     
00660     g = (*temp);
00661     if (this->compressTemps() && temp != &f) *temp = 0;
00662     TAU_PROFILE_STOP(timer_swap);
00663   }
00664 
00665   
00666   if (direction == +1)
00667     g *= Complex_t(this->getNormFact(), 0.0);
00668 
00669   return;
00670 }
00671 
00672 
00673 
00674 
00675 
00676 template <class T>
00677 void
00678 FFT<CCTransform,1U,T>::transform(
00679   int direction,
00680   typename FFT<CCTransform,1U,T>::ComplexField_t& f)
00681 {
00682   
00683   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
00684   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ")");
00685   TAU_PROFILE(tauname, tautype, TAU_FFT);
00686   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
00687   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
00688 
00689   
00690   
00691 
00692   
00693   const Layout_t& in_layout = f.getLayout();
00694   const Domain_t& in_dom = in_layout.getDomain();
00695   PAssert(checkDomain(this->getDomain(),in_dom));
00696 
00697   
00698   ComplexField_t* temp = &f;
00699   
00700   Complex_t* localdata;
00701       
00702   TAU_PROFILE_START(timer_swap);
00703   
00704 
00705   
00706   const Domain_t& temp_dom = tempLayouts_m->getDomain();
00707 
00708   bool skipTranspose;
00709   
00710   
00711 
00712   
00713   
00714   skipTranspose = ( (in_dom[0].sameBase(temp_dom[0])) &&
00715                     (in_dom[0].length() == temp_dom[0].length()) &&
00716                     (in_layout.numVnodes() == 1) &&
00717                     (f.getGC() == FFT<CCTransform,1U,T>::nullGC) );
00718 
00719   if (!skipTranspose) {
00720     
00721     (*tempFields_m) = (*temp);
00722     temp = tempFields_m;  
00723   }
00724   TAU_PROFILE_STOP(timer_swap);
00725       
00726   TAU_PROFILE_START(timer_fft);
00727 
00728   
00729   typename ComplexField_t::const_iterator_if l_i = temp->begin_if();
00730   if (l_i != temp->end_if()) {
00731     
00732     ComplexLField_t* ldf = (*l_i).second.get();
00733     
00734     ldf->Uncompress();
00735     
00736     localdata = ldf->getP();
00737 
00738     
00739     int length = ldf->size(0);
00740     
00741     this->getEngine().callFFT(0, direction, localdata);
00742   }
00743 
00744   TAU_PROFILE_STOP(timer_fft);
00745 
00746   
00747   if (temp != &f) {
00748     TAU_PROFILE_START(timer_swap);
00749     
00750     f = (*temp);
00751     if (this->compressTemps()) *temp = 0;
00752     TAU_PROFILE_STOP(timer_swap);
00753   }
00754 
00755   
00756   if (direction == +1)
00757     f *= Complex_t(this->getNormFact(), 0.0);
00758 
00759   return;
00760 }
00761 
00762 
00763 
00764 
00765 
00766 
00767 
00768 
00769 
00770 
00771 
00772 
00773 
00774 
00775 template <unsigned Dim, class T>
00776 FFT<RCTransform,Dim,T>::FFT(
00777   const typename FFT<RCTransform,Dim,T>::Domain_t& rdomain,
00778   const typename FFT<RCTransform,Dim,T>::Domain_t& cdomain,
00779   const bool transformTheseDims[Dim], const bool& compressTemps)
00780   : FFTBase<Dim,T>(FFT<RCTransform,Dim,T>::rcFFT, rdomain,
00781                    transformTheseDims, compressTemps), 
00782     complexDomain_m(cdomain), serialAxes_m(1)
00783 {
00784   
00785   TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
00786   TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", " + CT(cdomain) +
00787                   ", const bool [], const bool&)");
00788   TAU_PROFILE(tauname, tautype, TAU_FFT);
00789 
00790   
00791   unsigned nTransformDims = this->numTransformDims();
00792   int* lengths = new int[nTransformDims];
00793   unsigned d;
00794   for (d=0; d<nTransformDims; ++d)
00795     lengths[d] = rdomain[this->activeDimension(d)].length();
00796 
00797   
00798   int* transformTypes = new int[nTransformDims];
00799   T& normFact = this->getNormFact();
00800   normFact = 1.0;
00801   transformTypes[0] = FFTBase<Dim,T>::rcFFT;    
00802   normFact /= lengths[0];
00803   for (d=1; d<nTransformDims; ++d) {
00804     transformTypes[d] = FFTBase<Dim,T>::ccFFT;  
00805     normFact /= lengths[d];
00806   }
00807 
00808   
00809   this->getEngine().setup(nTransformDims, transformTypes, lengths);
00810   delete [] transformTypes;
00811   delete [] lengths;
00812 
00813   
00814   setup();
00815 }
00816 
00817 
00818 
00819 
00820 
00821 
00822 template <unsigned Dim, class T>
00823 FFT<RCTransform,Dim,T>::FFT(
00824   const typename FFT<RCTransform,Dim,T>::Domain_t& rdomain,
00825   const typename FFT<RCTransform,Dim,T>::Domain_t& cdomain,
00826   const bool& compressTemps,
00827   int serialAxes)
00828   : FFTBase<Dim,T>(FFT<RCTransform,Dim,T>::rcFFT, rdomain, compressTemps),
00829     complexDomain_m(cdomain), serialAxes_m(serialAxes)
00830 {
00831   
00832   TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
00833   TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", " + CT(cdomain) + ", const bool&)");
00834   TAU_PROFILE(tauname, tautype, TAU_FFT); 
00835 
00836   
00837   int lengths[Dim];
00838   unsigned d;
00839   for (d=0; d<Dim; ++d)
00840     lengths[d] = rdomain[d].length();
00841 
00842   
00843   int transformTypes[Dim];
00844   T& normFact = this->getNormFact();
00845   normFact = 1.0;
00846   transformTypes[0] = FFTBase<Dim,T>::rcFFT;    
00847   normFact /= lengths[0];
00848   for (d=1; d<Dim; ++d) {
00849     transformTypes[d] = FFTBase<Dim,T>::ccFFT;  
00850     normFact /= lengths[d];
00851   }
00852 
00853   
00854   this->getEngine().setup(Dim, transformTypes, lengths);
00855 
00856   
00857   setup();
00858 }
00859 
00860 
00861 
00862 
00863 
00864 
00865 template <unsigned Dim, class T>
00866 void
00867 FFT<RCTransform,Dim,T>::setup(void) {
00868 
00869   
00870   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::setup");
00871   TAU_TYPE_STRING(tautype, "(void)");
00872   TAU_PROFILE(tauname, tautype, TAU_FFT); 
00873 
00874   PAssert(serialAxes_m > 0 && serialAxes_m < Dim);
00875 
00876   unsigned d, d2, activeDim;
00877   unsigned nTransformDims = this->numTransformDims();
00878 
00879   
00880 
00881   
00882   
00883   e_dim_tag serialParallel[Dim];
00884   e_dim_tag NserialParallel[Dim];
00885   for (d=0; d < Dim; ++d) {
00886     serialParallel[d] = (d == 0 ? SERIAL : PARALLEL);
00887     NserialParallel[d] = (d < serialAxes_m ? SERIAL : PARALLEL);
00888   }
00889 
00890   
00891   const Domain_t& domain = this->getDomain();
00892   activeDim = this->activeDimension(0);
00893   bool match = true;
00894   for (d=0; d<Dim; ++d) {
00895     if (d == activeDim) {
00896       
00897       if ( complexDomain_m[d].length() !=
00898            (domain[d].length()/2 + 1) ) match = false;
00899     }
00900     else {
00901       
00902       if (complexDomain_m[d].length() != domain[d].length()) match = false;
00903     }
00904   }
00905   PInsist(match,
00906           "Domains provided for real and complex Fields are incompatible!");
00907 
00908   
00909   tempLayouts_m = new Layout_t*[nTransformDims];
00910   tempFields_m = new ComplexField_t*[nTransformDims];
00911 
00912   
00913 
00914   
00915   Domain_t ndip;
00916   Domain_t ndipc;
00917   ndip[0] = domain[activeDim];
00918   ndipc[0] = complexDomain_m[activeDim];
00919   for (d=1; d<Dim; ++d) {
00920     int nextDim = activeDim + d;
00921     if (nextDim >= Dim) nextDim -= Dim;
00922     ndip[d] = domain[nextDim];
00923     ndipc[d] = complexDomain_m[nextDim];
00924   }
00925 
00926   
00927   tempRLayout_m = new Layout_t(ndip, serialParallel, this->transVnodes());
00928   tempRField_m = new RealField_t(*tempRLayout_m);
00929 
00930   
00931   tempLayouts_m[0] = new Layout_t(ndipc, serialParallel, this->transVnodes());
00932   tempFields_m[0] = new ComplexField_t(*tempLayouts_m[0]);
00933 
00934   
00935   
00936   int fftorder[Dim], tmporder[Dim];
00937   int nofft = nTransformDims;
00938   for (d=0; d < nTransformDims; ++d)
00939     fftorder[d] = this->activeDimension(d);
00940   for (d=0; d < Dim; ++d) {
00941     
00942     bool active = false;
00943     for (d2=0; d2 < nTransformDims; ++d2) {
00944       if (this->activeDimension(d2) == d) {
00945         active = true;
00946         break;
00947       }
00948     }
00949 
00950     if (!active)
00951       
00952       fftorder[nofft++] = d;
00953   }
00954 
00955   
00956   
00957   nofft = fftorder[0];
00958   for (d=0; d < (Dim - 1); ++d)
00959     fftorder[d] = fftorder[d+1];
00960   fftorder[Dim-1] = nofft;
00961 
00962   
00963 
00964   
00965   int dim = 1;                  
00966   while (dim < nTransformDims) {
00967 
00968     int sp;
00969     for (sp=0; sp < serialAxes_m && dim < nTransformDims; ++sp, ++dim) {
00970 
00971       
00972       for (d=0; d < Dim; ++d)
00973         ndip[d] = complexDomain_m[fftorder[d]];
00974 
00975       
00976       tempLayouts_m[dim] = new Layout_t(ndip, NserialParallel, this->transVnodes());
00977       tempFields_m[dim] = new ComplexField_t(*tempLayouts_m[dim]);
00978 
00979       
00980       if (serialAxes_m > 1) {
00981         tmporder[0] = fftorder[0];
00982         for (d=0; d < (serialAxes_m-1); ++d)
00983           fftorder[d] = fftorder[d+1];
00984         fftorder[serialAxes_m - 1] = tmporder[0];
00985       }
00986     }
00987 
00988     
00989     
00990     for (d=0; d < Dim; ++d)
00991       tmporder[d] = fftorder[d];
00992     for (d=0; d < Dim; ++d)
00993       fftorder[d] = tmporder[(d + serialAxes_m) % Dim];
00994   }
00995 }
00996 
00997 
00998 
00999 
01000 
01001 
01002 template <unsigned Dim, class T>
01003 FFT<RCTransform,Dim,T>::~FFT(void) {
01004 
01005   
01006   TAU_TYPE_STRING(tauname, CT(*this) + "::~FFT");
01007   TAU_TYPE_STRING(tautype, "(void)");
01008   TAU_PROFILE(tauname, tautype, TAU_FFT); 
01009 
01010   
01011   unsigned nTransformDims = this->numTransformDims();
01012   for (unsigned d=0; d<nTransformDims; ++d) {
01013     delete tempFields_m[d];
01014     delete tempLayouts_m[d];
01015   }
01016   delete [] tempFields_m;
01017   delete [] tempLayouts_m;
01018   delete tempRField_m;
01019   delete tempRLayout_m;
01020 }
01021 
01022 
01023 
01024 
01025 
01026 template <unsigned Dim, class T>
01027 void
01028 FFT<RCTransform,Dim,T>::transform(
01029   int direction,
01030   typename FFT<RCTransform,Dim,T>::RealField_t& f,
01031   typename FFT<RCTransform,Dim,T>::ComplexField_t& g,
01032   const bool& constInput)
01033 {
01034   
01035   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
01036   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ", const bool&)");
01037   TAU_PROFILE(tauname, tautype, TAU_FFT);
01038   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
01039   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
01040 
01041   
01042   static IpplTimings::TimerRef tr1=IpplTimings::getTimer("RC-RRtranspose");
01043   static IpplTimings::TimerRef tr2=IpplTimings::getTimer("RC-CCtranspose");
01044   static IpplTimings::TimerRef ffttr1=IpplTimings::getTimer("RC-RC1Dfft");
01045   static IpplTimings::TimerRef ffttr2=IpplTimings::getTimer("RC-CC1Dfft");
01046   static IpplTimings::TimerRef tottimer=IpplTimings::getTimer("RC-total");
01047   FFTDBG(Inform tmsg("FFT-RC-forward"));
01048 
01049   
01050   IpplTimings::startTimer(tottimer);
01051 
01052   
01053   
01054 
01055   
01056   const Layout_t& in_layout = f.getLayout();
01057   const Domain_t& in_dom = in_layout.getDomain();
01058   const Layout_t& out_layout = g.getLayout();
01059   const Domain_t& out_dom = out_layout.getDomain();
01060   PAssert( checkDomain(this->getDomain(),in_dom) &&
01061            checkDomain(complexDomain_m,out_dom) );
01062 
01063   
01064   unsigned d;
01065   int idim;      
01066   unsigned nTransformDims = this->numTransformDims();
01067 
01068   
01069   idim = 0;
01070 
01071   RealField_t* tempR = tempRField_m;  
01072   if (!constInput) {
01073     
01074     bool skipTemp = true;
01075 
01076     
01077     if ( !(in_layout == *tempRLayout_m) ) {
01078       skipTemp = false;
01079     } else {
01080       
01081       for (d=0; d<Dim; ++d)
01082         if (in_layout.getDistribution(d) != tempRLayout_m->getDistribution(d))
01083           skipTemp = false;
01084 
01085       
01086       if (in_layout.numVnodes() != tempRLayout_m->numVnodes())
01087         skipTemp = false;
01088 
01089       
01090       if (!(f.getGC() == FFT<RCTransform,Dim,T>::nullGC))
01091         skipTemp = false;
01092     }
01093 
01094     
01095     
01096     
01097     
01098     if (skipTemp)
01099       tempR = &f;
01100   }
01101 
01102   
01103   if (tempR != &f) {
01104     TAU_PROFILE_START(timer_swap);
01105 
01106     
01107     FFTDBG(tmsg << "Doing transpose of real field into temporary ");
01108     FFTDBG(tmsg << "with layout = " << tempR->getLayout() << endl);
01109     IpplTimings::startTimer(tr1);
01110     (*tempR)[tempR->getDomain()] = f[in_dom];
01111     IpplTimings::stopTimer(tr1);
01112 
01113     TAU_PROFILE_STOP(timer_swap);
01114   }
01115 
01116   
01117   ComplexField_t* temp = tempFields_m[0];
01118 
01119   
01120   
01121   if (nTransformDims == 1) {  
01122     bool skipTemp = true;
01123 
01124     
01125     if (!(out_layout == *tempLayouts_m[0])) {
01126       skipTemp = false;
01127     } else {
01128       for (d=0; d<Dim; ++d)
01129         if (out_layout.getDistribution(d) !=
01130             tempLayouts_m[0]->getDistribution(d))
01131           skipTemp = false;
01132 
01133       if ( out_layout.numVnodes() != tempLayouts_m[0]->numVnodes() )
01134         skipTemp = false;
01135 
01136       
01137       if (!(g.getGC() == FFT<RCTransform,Dim,T>::nullGC))
01138         skipTemp = false;
01139 
01140       
01141       
01142       if (skipTemp)
01143         temp = &g;
01144     }
01145   }
01146 
01147   FFTDBG(tmsg << "Doing real->complex fft of first dimension ..." << endl);
01148   TAU_PROFILE_START(timer_fft);
01149   IpplTimings::startTimer(ffttr1);
01150 
01151   
01152   typename RealField_t::const_iterator_if rl_i, rl_end = tempR->end_if();
01153   typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
01154   for (rl_i = tempR->begin_if(); rl_i != rl_end; ++rl_i, ++cl_i) {
01155     
01156     RealLField_t* rldf = (*rl_i).second.get();
01157     ComplexLField_t* cldf = (*cl_i).second.get();
01158 
01159     
01160     rldf->Uncompress();
01161     cldf->Uncompress();
01162 
01163     
01164     T* localreal = rldf->getP();
01165     Complex_t* localcomp = cldf->getP();
01166 
01167     
01168     int nstrips = 1, lengthreal = rldf->size(0), lengthcomp = cldf->size(0);
01169     for (d=1; d<Dim; ++d)
01170       nstrips *= rldf->size(d);
01171 
01172     for (int istrip=0; istrip<nstrips; ++istrip) {
01173       
01174       for (int ilen=0; ilen<lengthreal; ilen+=2) 
01175         localcomp[ilen/2] = Complex_t(localreal[ilen],localreal[ilen+1]);
01176 
01177       
01178       
01179       this->getEngine().callFFT(idim, +1, localcomp);
01180 
01181       
01182       localreal += lengthreal;
01183       localcomp += lengthcomp;
01184     } 
01185   } 
01186 
01187   IpplTimings::stopTimer(ffttr1);
01188   TAU_PROFILE_STOP(timer_fft);
01189 
01190   
01191   if (this->compressTemps() && tempR != &f)
01192     *tempR = 0;
01193 
01194   
01195 
01196   
01197   Complex_t* localdata;
01198       
01199   
01200   for (idim = 1; idim < nTransformDims; ++idim) {
01201     TAU_PROFILE_START(timer_swap);
01202     bool skipTranspose = false;
01203 
01204     
01205     
01206     if (idim == nTransformDims-1) {
01207       
01208       const Domain_t& last_dom = tempLayouts_m[idim]->getDomain();
01209 
01210       
01211       
01212       
01213       skipTranspose = (g.getGC() == FFT<RCTransform,Dim,T>::nullGC &&
01214                        out_dom[0].sameBase(last_dom[0]) &&
01215                        out_dom[0].length() == last_dom[0].length() &&
01216                        out_layout.getDistribution(0) == SERIAL);
01217     }
01218 
01219     if (!skipTranspose) {
01220       
01221       FFTDBG(tmsg << "Doing complex->complex transpose into field ");
01222       FFTDBG(tmsg << "with layout = " << tempFields_m[idim]->getLayout());
01223       FFTDBG(tmsg << endl);
01224       IpplTimings::startTimer(tr2);
01225       (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] = 
01226         (*temp)[temp->getLayout().getDomain()];
01227       IpplTimings::stopTimer(tr2);
01228 
01229       
01230       if (this->compressTemps())
01231         *temp = 0;
01232       temp = tempFields_m[idim];  
01233 
01234     } else if (idim == nTransformDims-1) {
01235       
01236       
01237 
01238       
01239       FFTDBG(tmsg << "Doing final complex->complex transpose ");
01240       FFTDBG(tmsg << "into return ");
01241       FFTDBG(tmsg << "with layout = " << g.getLayout());
01242       FFTDBG(tmsg << endl);
01243       IpplTimings::startTimer(tr2);
01244       g[out_dom] = (*temp)[temp->getLayout().getDomain()];
01245       IpplTimings::stopTimer(tr2);
01246 
01247       
01248       if (this->compressTemps())
01249         *temp = 0;
01250       temp = &g;  
01251     }
01252     TAU_PROFILE_STOP(timer_swap);
01253 
01254     FFTDBG(tmsg << "Doing complex->complex fft of other dimension .." << endl);
01255     TAU_PROFILE_START(timer_fft);
01256     IpplTimings::startTimer(ffttr2);
01257 
01258     
01259     typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
01260     for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
01261       
01262       ComplexLField_t* ldf = (*l_i).second.get();
01263 
01264       
01265       ldf->Uncompress();
01266 
01267       
01268       localdata = ldf->getP();
01269 
01270       
01271       int nstrips = 1, length = ldf->size(0);
01272       for (d=1; d<Dim; ++d)
01273         nstrips *= ldf->size(d);
01274 
01275       for (int istrip=0; istrip<nstrips; ++istrip) {
01276         
01277         this->getEngine().callFFT(idim, direction, localdata);
01278 
01279         
01280         localdata += length;
01281       } 
01282     } 
01283 
01284     IpplTimings::stopTimer(ffttr2);
01285     TAU_PROFILE_STOP(timer_fft);
01286   } 
01287 
01288   
01289   if (temp != &g) {
01290     TAU_PROFILE_START(timer_swap);
01291 
01292     
01293     FFTDBG(tmsg << "Doing cleanup complex->complex transpose ");
01294     FFTDBG(tmsg << "into return ");
01295     FFTDBG(tmsg << "with layout = " << g.getLayout());
01296     FFTDBG(tmsg << endl);
01297     IpplTimings::startTimer(tr2);
01298     g[out_dom] = (*temp)[temp->getLayout().getDomain()];
01299     IpplTimings::stopTimer(tr2);
01300 
01301     if (this->compressTemps()) *temp = 0;
01302     TAU_PROFILE_STOP(timer_swap);
01303   }
01304 
01305   
01306   if (direction == +1) g = g * this->getNormFact();
01307 
01308   
01309   IpplTimings::stopTimer(tottimer);
01310 }
01311 
01312 
01313 
01314 
01315 
01316 template <unsigned Dim, class T>
01317 void
01318 FFT<RCTransform,Dim,T>::transform(
01319   int direction,
01320   typename FFT<RCTransform,Dim,T>::ComplexField_t& f,
01321   typename FFT<RCTransform,Dim,T>::RealField_t& g,
01322   const bool& constInput)
01323 {
01324   
01325   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
01326   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ", const bool&)");
01327   TAU_PROFILE(tauname, tautype, TAU_FFT);
01328   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
01329   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
01330 
01331   
01332   static IpplTimings::TimerRef tr1=IpplTimings::getTimer("RC-RRtranspose");
01333   static IpplTimings::TimerRef tr2=IpplTimings::getTimer("RC-CCtranspose");
01334   static IpplTimings::TimerRef ffttr1=IpplTimings::getTimer("RC-RC1Dfft");
01335   static IpplTimings::TimerRef ffttr2=IpplTimings::getTimer("RC-CC1Dfft");
01336   static IpplTimings::TimerRef tottimer=IpplTimings::getTimer("RC-total");
01337   FFTDBG(Inform tmsg("FFT-RC-reverse"));
01338 
01339   
01340   IpplTimings::startTimer(tottimer);
01341 
01342   
01343   
01344 
01345   
01346   const Layout_t& in_layout = f.getLayout();
01347   const Domain_t& in_dom = in_layout.getDomain();
01348   const Layout_t& out_layout = g.getLayout();
01349   const Domain_t& out_dom = out_layout.getDomain();
01350   PAssert( checkDomain(complexDomain_m,in_dom) &&
01351            checkDomain(this->getDomain(),out_dom) );
01352 
01353   
01354   unsigned d;
01355   int idim;      
01356   unsigned nTransformDims = this->numTransformDims();
01357 
01358   
01359 
01360   
01361   ComplexField_t* temp = &f;
01362 
01363   
01364   Complex_t* localdata;
01365       
01366   
01367   for (idim = nTransformDims-1; idim != 0; --idim) {
01368     TAU_PROFILE_START(timer_swap);
01369     
01370 
01371     bool skipTranspose = false;
01372     
01373     
01374     if (idim == nTransformDims-1 && !constInput) {
01375       
01376       const Domain_t& first_dom = tempLayouts_m[idim]->getDomain();
01377       
01378       
01379       skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
01380                         (in_dom[0].length() == first_dom[0].length()) &&
01381                         (in_layout.getDistribution(0) == SERIAL) &&
01382                         (f.getGC() == FFT<RCTransform,Dim,T>::nullGC) );
01383     }
01384 
01385     if (!skipTranspose) {
01386       
01387       FFTDBG(tmsg << "Doing complex->complex transpose into field ");
01388       FFTDBG(tmsg << "with layout = "<<tempFields_m[idim]->getLayout()<<endl);
01389       IpplTimings::startTimer(tr2);
01390       (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] = 
01391         (*temp)[temp->getLayout().getDomain()];
01392       IpplTimings::stopTimer(tr2);
01393 
01394       
01395       if (this->compressTemps() && temp != &f)
01396         *temp = 0;
01397       temp = tempFields_m[idim];  
01398     }
01399     TAU_PROFILE_STOP(timer_swap);
01400 
01401     FFTDBG(tmsg << "Doing complex->complex fft of other dimension .." << endl);
01402     TAU_PROFILE_START(timer_fft);
01403     IpplTimings::startTimer(ffttr2);
01404 
01405     
01406     typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
01407     for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
01408 
01409       
01410       ComplexLField_t* ldf = (*l_i).second.get();
01411 
01412       
01413       ldf->Uncompress();
01414 
01415       
01416       localdata = ldf->getP();
01417 
01418       
01419       int nstrips = 1, length = ldf->size(0);
01420       for (d=1; d<Dim; ++d)
01421         nstrips *= ldf->size(d);
01422 
01423       for (int istrip=0; istrip<nstrips; ++istrip) {
01424         
01425         this->getEngine().callFFT(idim, direction, localdata);
01426 
01427         
01428         localdata += length;
01429       } 
01430     } 
01431 
01432     IpplTimings::stopTimer(ffttr2);
01433     TAU_PROFILE_STOP(timer_fft);
01434   } 
01435 
01436   
01437   idim = 0;
01438 
01439   
01440   RealField_t* tempR;
01441   bool skipTemp = true;
01442 
01443   
01444   if (!(out_layout == *tempRLayout_m)) {
01445     skipTemp = false;
01446   } else {
01447     for (d=0; d<Dim; ++d)
01448       if (out_layout.getDistribution(d) != tempRLayout_m->getDistribution(d))
01449         skipTemp = false;
01450 
01451     if ( out_layout.numVnodes() != tempRLayout_m->numVnodes() )
01452       skipTemp = false;
01453 
01454     
01455     if (!(g.getGC() == FFT<RCTransform,Dim,T>::nullGC))
01456       skipTemp = false;
01457   }
01458 
01459   if (skipTemp)
01460     tempR = &g;
01461   else 
01462     tempR = tempRField_m;
01463 
01464   skipTemp = true;
01465   if (nTransformDims == 1 && !constInput) {
01466     
01467     
01468     
01469     if (!(in_layout == *tempLayouts_m[0])) {
01470       skipTemp = false;
01471     } else {
01472       for (d=0; d<Dim; ++d)
01473         if (in_layout.getDistribution(d) !=
01474             tempLayouts_m[0]->getDistribution(d))
01475           skipTemp = false;
01476 
01477       if ( in_layout.numVnodes() != tempLayouts_m[0]->numVnodes() )
01478         skipTemp = false;
01479 
01480       
01481       if (!(f.getGC() == FFT<RCTransform,Dim,T>::nullGC))
01482         skipTemp = false;
01483     }
01484   } else {  
01485     skipTemp = false;
01486   }
01487 
01488   TAU_PROFILE_START(timer_swap);
01489   if (!skipTemp) {
01490     
01491     FFTDBG(tmsg << "Doing final complex->complex transpose into field ");
01492     FFTDBG(tmsg << "with layout = "<<tempFields_m[0]->getLayout()<<endl);
01493     IpplTimings::startTimer(tr2);
01494     (*tempFields_m[0])[tempLayouts_m[0]->getDomain()] =
01495       (*temp)[temp->getLayout().getDomain()];
01496     IpplTimings::stopTimer(tr2);
01497 
01498     
01499     if (this->compressTemps() && temp != &f)
01500       *temp = 0;
01501     temp = tempFields_m[0];
01502   }
01503   TAU_PROFILE_STOP(timer_swap);
01504 
01505   FFTDBG(tmsg << "Doing complex->real fft of final dimension ..." << endl);
01506   TAU_PROFILE_START(timer_fft);
01507   IpplTimings::startTimer(ffttr1);
01508 
01509   
01510   typename RealField_t::const_iterator_if rl_i, rl_end = tempR->end_if();
01511   typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
01512   for (rl_i = tempR->begin_if(); rl_i != rl_end; ++rl_i, ++cl_i) {
01513     
01514     RealLField_t* rldf = (*rl_i).second.get();
01515     ComplexLField_t* cldf = (*cl_i).second.get();
01516 
01517     
01518     rldf->Uncompress();
01519     cldf->Uncompress();
01520 
01521     
01522     T* localreal = rldf->getP();
01523     Complex_t* localcomp = cldf->getP();
01524 
01525     
01526     int nstrips = 1, lengthreal = rldf->size(0), lengthcomp = cldf->size(0);
01527     for (d=1; d<Dim; ++d)
01528       nstrips *= rldf->size(d);
01529 
01530     for (int istrip=0; istrip<nstrips; ++istrip) {
01531       
01532       
01533       this->getEngine().callFFT(idim, -1, localcomp);
01534 
01535       
01536       for (int ilen=0; ilen<lengthreal; ilen+=2) {
01537         localreal[ilen] = real(localcomp[ilen/2]);
01538         localreal[ilen+1] = imag(localcomp[ilen/2]);
01539       }
01540 
01541       
01542       localreal += lengthreal;
01543       localcomp += lengthcomp;
01544     } 
01545   } 
01546 
01547   IpplTimings::stopTimer(ffttr1);
01548   TAU_PROFILE_STOP(timer_fft);
01549 
01550   
01551   if (this->compressTemps() && temp != &f)
01552     *temp = 0;
01553 
01554   
01555   if (tempR != &g) {
01556     TAU_PROFILE_START(timer_swap);
01557 
01558     
01559     FFTDBG(tmsg << "Doing cleanup real->real transpose into return ");
01560     FFTDBG(tmsg << "with layout = " << g.getLayout() << endl);
01561     IpplTimings::startTimer(tr1);
01562     g[out_dom] = (*tempR)[tempR->getLayout().getDomain()];
01563     IpplTimings::stopTimer(tr1);
01564 
01565     if (this->compressTemps())
01566       *tempR = 0;
01567     TAU_PROFILE_STOP(timer_swap);
01568   }
01569 
01570   
01571   if (direction == +1) g = g * this->getNormFact();
01572 
01573   
01574   IpplTimings::stopTimer(tottimer);
01575 }
01576 
01577 
01578 
01579 
01580 
01581 
01582 
01583 
01584 
01585 
01586 
01587 
01588 
01589 template <class T>
01590 FFT<RCTransform,1U,T>::FFT(
01591   const typename FFT<RCTransform,1U,T>::Domain_t& rdomain,
01592   const typename FFT<RCTransform,1U,T>::Domain_t& cdomain,
01593   const bool transformTheseDims[1U], const bool& compressTemps)
01594   : FFTBase<1U,T>(FFT<RCTransform,1U,T>::rcFFT, rdomain,
01595                   transformTheseDims, compressTemps), 
01596     complexDomain_m(cdomain)
01597 {
01598   
01599   TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
01600   TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", " + CT(cdomain) +
01601                   ", const bool [], const bool&)");
01602   TAU_PROFILE(tauname, tautype, TAU_FFT);
01603 
01604   unsigned nTransformDims = 1U;
01605   
01606   int length;
01607   length = rdomain[0].length();
01608 
01609   
01610   int transformType;
01611   transformType = FFTBase<1U,T>::rcFFT;    
01612   T& normFact = this->getNormFact();
01613   normFact = 1.0 / length;
01614 
01615   
01616   this->getEngine().setup(nTransformDims, &transformType, &length);
01617   
01618   setup();
01619 }
01620 
01621 
01622 
01623 
01624 
01625 
01626 template <class T>
01627 FFT<RCTransform,1U,T>::FFT(
01628   const typename FFT<RCTransform,1U,T>::Domain_t& rdomain,
01629   const typename FFT<RCTransform,1U,T>::Domain_t& cdomain,
01630   const bool& compressTemps) 
01631   : FFTBase<1U,T>(FFT<RCTransform,1U,T>::rcFFT, rdomain, compressTemps),
01632     complexDomain_m(cdomain)
01633 {
01634   
01635   TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
01636   TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", " + CT(cdomain) + ", const bool&)");
01637   TAU_PROFILE(tauname, tautype, TAU_FFT); 
01638 
01639   
01640   int length;
01641   length = rdomain[0].length();
01642 
01643   
01644   int transformType;
01645   transformType = FFTBase<1U,T>::rcFFT;    
01646   T& normFact = this->getNormFact();
01647   normFact = 1.0 / length;
01648 
01649   
01650   this->getEngine().setup(1U, &transformType, &length);
01651   
01652   setup();
01653 }
01654 
01655 
01656 
01657 
01658 
01659 
01660 template <class T>
01661 void
01662 FFT<RCTransform,1U,T>::setup(void) {
01663 
01664   
01665   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::setup");
01666   TAU_TYPE_STRING(tautype, "(void)");
01667   TAU_PROFILE(tauname, tautype, TAU_FFT); 
01668 
01669   
01670   const Domain_t& domain = this->getDomain();
01671   bool match = true;
01672   
01673   if ( complexDomain_m[0].length() !=
01674        (domain[0].length()/2 + 1) ) match = false;
01675   PInsist(match,
01676           "Domains provided for real and complex Fields are incompatible!");
01677 
01678   
01679   tempRLayout_m = new Layout_t(domain[0], PARALLEL, 1);
01680   
01681   tempRField_m = new RealField_t(*tempRLayout_m);
01682   
01683   if (!this->compressTemps()) tempRField_m->Uncompress();
01684 
01685   
01686   tempLayouts_m = new Layout_t(complexDomain_m[0], PARALLEL, 1);
01687   
01688   tempFields_m = new ComplexField_t(*tempLayouts_m);
01689   
01690   if (!this->compressTemps()) tempFields_m->Uncompress();
01691 
01692   return;
01693 }
01694 
01695 
01696 
01697 
01698 
01699 template <class T>
01700 FFT<RCTransform,1U,T>::~FFT(void) {
01701 
01702   
01703   TAU_TYPE_STRING(tauname, CT(*this) + "::~FFT");
01704   TAU_TYPE_STRING(tautype, "(void)");
01705   TAU_PROFILE(tauname, tautype, TAU_FFT); 
01706 
01707   
01708   delete tempFields_m;
01709   delete tempLayouts_m;
01710   delete tempRField_m;
01711   delete tempRLayout_m;
01712 }
01713 
01714 
01715 
01716 
01717 
01718 template <class T>
01719 void
01720 FFT<RCTransform,1U,T>::transform(
01721   int direction,
01722   typename FFT<RCTransform,1U,T>::RealField_t& f,
01723   typename FFT<RCTransform,1U,T>::ComplexField_t& g,
01724   const bool& constInput)
01725 {
01726   
01727   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
01728   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ", const bool&)");
01729   TAU_PROFILE(tauname, tautype, TAU_FFT);
01730   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
01731   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
01732 
01733   
01734   
01735 
01736   
01737   const Layout_t& in_layout = f.getLayout();
01738   const Domain_t& in_dom = in_layout.getDomain();
01739   const Layout_t& out_layout = g.getLayout();
01740   const Domain_t& out_dom = out_layout.getDomain();
01741   PAssert( checkDomain(this->getDomain(),in_dom) &&
01742            checkDomain(complexDomain_m,out_dom) );
01743 
01744   
01745   RealField_t* tempR = tempRField_m;  
01746   if (!constInput) {
01747     
01748     bool skipTemp = true;
01749     
01750     if ( !(in_layout == *tempRLayout_m) ) skipTemp = false;
01751     if ( in_layout.getDistribution(0) !=
01752          tempRLayout_m->getDistribution(0) ) skipTemp = false;
01753     if ( in_layout.numVnodes() != tempRLayout_m->numVnodes() )
01754       skipTemp = false;
01755     
01756     if (!(f.getGC() == FFT<RCTransform,1U,T>::nullGC)) skipTemp = false;
01757     if (skipTemp) tempR = &f;
01758   }
01759 
01760   if (tempR != &f) {  
01761     TAU_PROFILE_START(timer_swap);
01762     
01763     (*tempR) = f;
01764     TAU_PROFILE_STOP(timer_swap);
01765   }
01766 
01767   
01768   ComplexField_t* temp = tempFields_m;
01769   
01770   bool skipFinal = true;
01771   
01772   if ( !(out_layout == *tempLayouts_m) ) skipFinal = false;
01773   if ( out_layout.getDistribution(0) !=
01774        tempLayouts_m->getDistribution(0) ) skipFinal = false;
01775   if ( out_layout.numVnodes() != tempLayouts_m->numVnodes() )
01776     skipFinal = false;
01777   
01778   if (!(g.getGC() == FFT<RCTransform,1U,T>::nullGC)) skipFinal = false;
01779   if (skipFinal) temp = &g;
01780 
01781   TAU_PROFILE_START(timer_fft);
01782   
01783   typename RealField_t::const_iterator_if rl_i = tempR->begin_if();
01784   typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
01785   if (rl_i != tempR->end_if() && cl_i != temp->end_if()) {
01786     
01787     RealLField_t* rldf = (*rl_i).second.get();
01788     ComplexLField_t* cldf = (*cl_i).second.get();
01789     
01790     rldf->Uncompress();
01791     cldf->Uncompress();
01792     
01793     T* localreal = rldf->getP();
01794     Complex_t* localcomp = cldf->getP();
01795 
01796     int lengthreal = rldf->size(0);
01797     
01798     for (int ilen=0; ilen<lengthreal; ilen+=2) 
01799       localcomp[ilen/2] = Complex_t(localreal[ilen],localreal[ilen+1]);
01800     
01801     
01802     this->getEngine().callFFT(0, +1, localcomp);
01803   } 
01804   TAU_PROFILE_STOP(timer_fft);
01805 
01806   TAU_PROFILE_START(timer_swap);
01807   
01808   if (this->compressTemps() && tempR != &f) *tempR = 0;
01809   TAU_PROFILE_STOP(timer_swap);
01810 
01811   
01812   if (temp != &g) {
01813     TAU_PROFILE_START(timer_swap);
01814     
01815     g = (*temp);
01816     if (this->compressTemps()) *temp = 0;
01817     TAU_PROFILE_STOP(timer_swap);
01818   }
01819 
01820   
01821   if (direction == +1) g = g * this->getNormFact();
01822 
01823   return;
01824 }
01825 
01826 
01827 
01828 
01829 
01830 template <class T>
01831 void
01832 FFT<RCTransform,1U,T>::transform(
01833   int direction,
01834   typename FFT<RCTransform,1U,T>::ComplexField_t& f,
01835   typename FFT<RCTransform,1U,T>::RealField_t& g,
01836   const bool& constInput)
01837 {
01838   
01839   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
01840   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ", const bool&)");
01841   TAU_PROFILE(tauname, tautype, TAU_FFT);
01842   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
01843   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
01844 
01845   
01846   
01847 
01848   
01849   const Layout_t& in_layout = f.getLayout();
01850   const Domain_t& in_dom = in_layout.getDomain();
01851   const Layout_t& out_layout = g.getLayout();
01852   const Domain_t& out_dom = out_layout.getDomain();
01853   PAssert( checkDomain(complexDomain_m,in_dom) &&
01854            checkDomain(this->getDomain(),out_dom) );
01855 
01856   
01857   ComplexField_t* temp = &f;
01858       
01859   
01860   RealField_t* tempR;
01861   bool skipFinal = true;
01862   
01863   if ( !(out_layout == *tempRLayout_m) ) skipFinal = false;
01864   if ( out_layout.getDistribution(0) !=
01865        tempRLayout_m->getDistribution(0) ) skipFinal = false;
01866   if ( out_layout.numVnodes() != tempRLayout_m->numVnodes() )
01867     skipFinal = false;
01868   
01869   if (!(g.getGC() == FFT<RCTransform,1U,T>::nullGC)) skipFinal = false;
01870   if (skipFinal)
01871     tempR = &g;
01872   else 
01873     tempR = tempRField_m;
01874 
01875   bool skipTemp = true;
01876   if (!constInput) {
01877     
01878     
01879     
01880     if ( !(in_layout == *tempLayouts_m) ) skipTemp = false;
01881     if ( in_layout.getDistribution(0) !=
01882         tempLayouts_m->getDistribution(0) ) skipTemp = false;
01883     if ( in_layout.numVnodes() != tempLayouts_m->numVnodes() )
01884       skipTemp = false;
01885     
01886     if (!(f.getGC() == FFT<RCTransform,1U,T>::nullGC)) skipTemp = false;
01887   }
01888   else {  
01889     skipTemp = false;
01890   }
01891 
01892   TAU_PROFILE_START(timer_swap);
01893   if (!skipTemp) {
01894     
01895     (*tempFields_m) = (*temp);
01896     
01897     if (this->compressTemps() && temp != &f) *temp = 0;
01898     temp = tempFields_m;
01899   }
01900   TAU_PROFILE_STOP(timer_swap);
01901 
01902   TAU_PROFILE_START(timer_fft);
01903   
01904   typename RealField_t::const_iterator_if rl_i = tempR->begin_if();
01905   typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
01906   if (rl_i != tempR->end_if() && cl_i != temp->end_if()) {
01907     
01908     RealLField_t* rldf = (*rl_i).second.get();
01909     ComplexLField_t* cldf = (*cl_i).second.get();
01910     
01911     rldf->Uncompress();
01912     cldf->Uncompress();
01913     
01914     T* localreal = rldf->getP();
01915     Complex_t* localcomp = cldf->getP();
01916 
01917     int lengthreal = rldf->size(0);
01918     
01919     
01920     this->getEngine().callFFT(0, -1, localcomp);
01921     
01922     for (int ilen=0; ilen<lengthreal; ilen+=2) {
01923       localreal[ilen] = real(localcomp[ilen/2]);
01924       localreal[ilen+1] = imag(localcomp[ilen/2]);
01925     }
01926   }
01927   TAU_PROFILE_STOP(timer_fft);
01928 
01929   TAU_PROFILE_START(timer_swap);
01930   
01931   if (this->compressTemps() && temp != &f) *temp = 0;
01932   TAU_PROFILE_STOP(timer_swap);
01933 
01934   
01935   if (tempR != &g) {
01936     TAU_PROFILE_START(timer_swap);
01937     
01938     g = (*tempR);
01939     if (this->compressTemps()) *tempR = 0;
01940     TAU_PROFILE_STOP(timer_swap);
01941   }
01942 
01943   
01944   if (direction == +1) g = g * this->getNormFact();
01945 
01946   return;
01947 }
01948 
01949 
01950 
01951 
01952 
01953 
01954 
01955 
01956 
01957 
01958 
01959 
01960 
01961 
01962 template <unsigned Dim, class T>
01963 FFT<SineTransform,Dim,T>::FFT(
01964   const typename FFT<SineTransform,Dim,T>::Domain_t& rdomain,
01965   const typename FFT<SineTransform,Dim,T>::Domain_t& cdomain,
01966   const bool transformTheseDims[Dim], const bool sineTransformDims[Dim],
01967   const bool& compressTemps)
01968   : FFTBase<Dim,T>(FFT<SineTransform,Dim,T>::sineFFT, rdomain,
01969                    transformTheseDims, compressTemps), 
01970     complexDomain_m(&cdomain)
01971 {
01972   
01973   TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
01974   TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", " + CT(cdomain) +
01975                   ", const bool [], const bool [], const bool&)");
01976   TAU_PROFILE(tauname, tautype, TAU_FFT);
01977 
01978   unsigned d;
01979   
01980   numSineTransforms_m = 0;
01981   for (d=0; d<Dim; ++d) {
01982     sineTransformDims_m[d] = sineTransformDims[d];
01983     if (sineTransformDims[d]) {
01984       PAssert(transformTheseDims[d]);  
01985       ++numSineTransforms_m;
01986     }
01987   }
01988 
01989   
01990   unsigned nTransformDims = this->numTransformDims();
01991   int* lengths = new int[nTransformDims];
01992   for (d=0; d<nTransformDims; ++d)
01993     lengths[d] = rdomain[this->activeDimension(d)].length();
01994 
01995   
01996   int* transformTypes = new int[nTransformDims];
01997   T& normFact = this->getNormFact();
01998   normFact = 1.0;
01999   bool foundRC = false;
02000   for (d=0; d<nTransformDims; ++d) {
02001     if (sineTransformDims_m[this->activeDimension(d)]) {
02002       transformTypes[d] = FFTBase<Dim,T>::sineFFT;  
02003       normFact /= (2.0 * (lengths[d] + 1));
02004     }
02005     else if (!foundRC) {
02006       transformTypes[d] = FFTBase<Dim,T>::rcFFT;    
02007       normFact /= lengths[d];
02008       foundRC = true;
02009     }
02010     else {
02011       transformTypes[d] = FFTBase<Dim,T>::ccFFT;    
02012       normFact /= lengths[d];
02013     }
02014   }
02015 
02016   
02017   this->getEngine().setup(nTransformDims, transformTypes, lengths);
02018   delete [] transformTypes;
02019   delete [] lengths;
02020   
02021   setup();
02022 }
02023 
02024 
02025 
02026 
02027 
02028 
02029 template <unsigned Dim, class T>
02030 FFT<SineTransform,Dim,T>::FFT(
02031   const typename FFT<SineTransform,Dim,T>::Domain_t& rdomain,
02032   const typename FFT<SineTransform,Dim,T>::Domain_t& cdomain,
02033   const bool sineTransformDims[Dim], const bool& compressTemps) 
02034   : FFTBase<Dim,T>(FFT<SineTransform,Dim,T>::sineFFT, rdomain, compressTemps),
02035     complexDomain_m(&cdomain)
02036 {
02037   
02038   TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
02039   TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", " + CT(cdomain) +
02040                   ", const bool [], const bool&)");
02041   TAU_PROFILE(tauname, tautype, TAU_FFT); 
02042 
02043   unsigned d;
02044   
02045   numSineTransforms_m = 0;
02046   for (d=0; d<Dim; ++d) {
02047     sineTransformDims_m[d] = sineTransformDims[d];
02048     if (sineTransformDims[d]) ++numSineTransforms_m;
02049   }
02050 
02051   
02052   int lengths[Dim];
02053   for (d=0; d<Dim; ++d)
02054     lengths[d] = rdomain[d].length();
02055 
02056   
02057   int transformTypes[Dim];
02058   T& normFact = this->getNormFact();
02059   normFact = 1.0;
02060   bool foundRC = false;
02061   for (d=0; d<Dim; ++d) {
02062     if (sineTransformDims_m[d]) {
02063       transformTypes[d] = FFTBase<Dim,T>::sineFFT;  
02064       normFact /= (2.0 * (lengths[d] + 1));
02065     }
02066     else if (!foundRC) {
02067       transformTypes[d] = FFTBase<Dim,T>::rcFFT;    
02068       normFact /= lengths[d];
02069       foundRC = true;
02070     }
02071     else {
02072       transformTypes[d] = FFTBase<Dim,T>::ccFFT;    
02073       normFact /= lengths[d];
02074     }
02075   }
02076 
02077   
02078   this->getEngine().setup(Dim, transformTypes, lengths);
02079   
02080   setup();
02081 }
02082 
02083 
02084 
02085 
02086 
02087 
02088 
02089 template <unsigned Dim, class T>
02090 FFT<SineTransform,Dim,T>::FFT(
02091   const typename FFT<SineTransform,Dim,T>::Domain_t& rdomain,
02092   const bool sineTransformDims[Dim], const bool& compressTemps)
02093   : FFTBase<Dim,T>(FFT<SineTransform,Dim,T>::sineFFT, rdomain,
02094                    sineTransformDims, compressTemps)
02095 {
02096   
02097   TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
02098   TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", const bool [], const bool&)");
02099   TAU_PROFILE(tauname, tautype, TAU_FFT);
02100 
02101   
02102   numSineTransforms_m = this->numTransformDims();
02103   unsigned d;
02104   for (d=0; d<Dim; ++d)
02105     sineTransformDims_m[d] = sineTransformDims[d];
02106 
02107   
02108   int* lengths = new int[numSineTransforms_m];
02109   for (d=0; d<numSineTransforms_m; ++d)
02110     lengths[d] = rdomain[this->activeDimension(d)].length();
02111 
02112   
02113   int* transformTypes = new int[numSineTransforms_m];
02114   T& normFact = this->getNormFact();
02115   normFact = 1.0;
02116   for (d=0; d<numSineTransforms_m; ++d) {
02117     transformTypes[d] = FFTBase<Dim,T>::sineFFT;  
02118     normFact /= (2.0 * (lengths[d] + 1));
02119   }
02120 
02121   
02122   this->getEngine().setup(numSineTransforms_m, transformTypes, lengths);
02123   delete [] transformTypes;
02124   delete [] lengths;
02125   
02126   setup();
02127 }
02128 
02129 
02130 
02131 
02132 
02133 
02134 template <unsigned Dim, class T>
02135 FFT<SineTransform,Dim,T>::FFT(
02136   const typename FFT<SineTransform,Dim,T>::Domain_t& rdomain, const bool& compressTemps) 
02137   : FFTBase<Dim,T>(FFT<SineTransform,Dim,T>::sineFFT, rdomain, compressTemps)
02138 {
02139   
02140   TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
02141   TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", const bool&)");
02142   TAU_PROFILE(tauname, tautype, TAU_FFT); 
02143 
02144   unsigned d;
02145   
02146   numSineTransforms_m = this->numTransformDims();
02147   for (d=0; d<Dim; ++d)
02148     sineTransformDims_m[d] = true;
02149 
02150   
02151   int lengths[Dim];
02152   for (d=0; d<Dim; ++d)
02153     lengths[d] = rdomain[d].length();
02154 
02155   
02156   int transformTypes[Dim];
02157   T& normFact = this->getNormFact();
02158   normFact = 1.0;
02159   for (d=0; d<Dim; ++d) {
02160     transformTypes[d] = FFTBase<Dim,T>::sineFFT;  
02161     normFact /= (2.0 * (lengths[d] + 1));
02162   }
02163 
02164   
02165   this->getEngine().setup(Dim, transformTypes, lengths);
02166   
02167   setup();
02168 }
02169 
02170 
02171 
02172 
02173 
02174 
02175 template <unsigned Dim, class T>
02176 void
02177 FFT<SineTransform,Dim,T>::setup(void) {
02178 
02179   
02180   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::setup");
02181   TAU_TYPE_STRING(tautype, "(void)");
02182   TAU_PROFILE(tauname, tautype, TAU_FFT); 
02183 
02184   unsigned d, dim, activeDim;
02185   int icount;
02186   Domain_t ndip;
02187   unsigned nTransformDims = this->numTransformDims();  
02188   const Domain_t& domain = this->getDomain();          
02189 
02190   
02191   e_dim_tag serialParallel[Dim];  
02192   
02193   serialParallel[0] = SERIAL;
02194   
02195   for (d=1; d<Dim; ++d)
02196     serialParallel[d] = PARALLEL;
02197 
02198   
02199   if (nTransformDims > numSineTransforms_m) {  
02200 
02201     PAssert(complexDomain_m);  
02202     
02203     bool match = false;
02204     d=0;
02205     while (d<Dim && !match) {
02206       if (this->transformDim(d) && !sineTransformDims_m[d]) {
02207         activeDim = this->activeDimension(d);
02208         match = true;
02209       }
02210       ++d;
02211     }
02212     PAssert(match);  
02213     
02214     for (d=0; d<Dim; ++d) {
02215       if (d == activeDim) {
02216         
02217         if ( (*complexDomain_m)[d].length() !=
02218              (domain[d].length()/2 + 1) ) match = false;
02219       }
02220       else {
02221         
02222         if ( (*complexDomain_m)[d].length() !=
02223              domain[d].length() ) match = false;
02224       }
02225     }
02226     PInsist(match,
02227             "Domains provided for real and complex Fields are incompatible!");
02228 
02229     
02230     
02231     tempRLayouts_m = new Layout_t*[numSineTransforms_m+1];
02232     tempRFields_m = new RealField_t*[numSineTransforms_m+1];
02233     
02234     icount=0;
02235     for (dim=0; dim<numSineTransforms_m; ++dim, ++icount) {
02236       
02237       while (!sineTransformDims_m[icount]) ++icount;
02238       PAssert(icount<Dim);  
02239       
02240       ndip[0] = domain[icount];
02241       for (d=1; d<Dim; ++d) {
02242         int nextDim = icount + d;
02243         if (nextDim >= Dim) nextDim -= Dim;
02244         ndip[d] = domain[nextDim];
02245       }
02246       
02247       tempRLayouts_m[dim] = new Layout_t(ndip, serialParallel, this->transVnodes());
02248       
02249       tempRFields_m[dim] = new RealField_t(*tempRLayouts_m[dim]);
02250       
02251       if (!this->compressTemps()) (*tempRFields_m[dim]).Uncompress();
02252     }
02253 
02254     
02255     ndip[0] = domain[activeDim];
02256     for (d=1; d<Dim; ++d) {
02257       int nextDim = activeDim + d;
02258       if (nextDim >= Dim) nextDim -= Dim;
02259       ndip[d] = domain[nextDim];
02260     }
02261     
02262     tempRLayouts_m[numSineTransforms_m] = new Layout_t(ndip, serialParallel,
02263                                                        this->transVnodes());
02264     
02265     tempRFields_m[numSineTransforms_m] =
02266       new RealField_t(*tempRLayouts_m[numSineTransforms_m]);
02267     
02268     if (!this->compressTemps()) (*tempRFields_m[numSineTransforms_m]).Uncompress();
02269 
02270     
02271     int numComplex = nTransformDims - numSineTransforms_m;
02272     
02273     tempLayouts_m = new Layout_t*[numComplex];
02274     tempFields_m = new ComplexField_t*[numComplex];
02275     icount=0;  
02276     for (dim=0; dim<numComplex; ++dim, ++icount) {
02277       
02278       while (!this->transformDim(icount) || sineTransformDims_m[icount]) ++icount;
02279       PAssert(icount<Dim);  
02280       
02281       ndip[0] = (*complexDomain_m)[icount];
02282       for (d=1; d<Dim; ++d) {
02283         int nextDim = icount + d;
02284         if (nextDim >= Dim) nextDim -= Dim;
02285         ndip[d] = (*complexDomain_m)[nextDim];
02286       }
02287       
02288       tempLayouts_m[dim] = new Layout_t(ndip, serialParallel, this->transVnodes());
02289       
02290       tempFields_m[dim] = new ComplexField_t(*tempLayouts_m[dim]);
02291       
02292       if (!this->compressTemps()) (*tempFields_m[dim]).Uncompress();
02293     }
02294 
02295   }
02296   else {  
02297 
02298     
02299     
02300     tempRLayouts_m = new Layout_t*[numSineTransforms_m];
02301     tempRFields_m = new RealField_t*[numSineTransforms_m];
02302     
02303     for (dim=0; dim<numSineTransforms_m; ++dim) {
02304       
02305       activeDim = this->activeDimension(dim);
02306       
02307       ndip[0] = domain[activeDim];
02308       for (d=1; d<Dim; ++d) {
02309         int nextDim = activeDim + d;
02310         if (nextDim >= Dim) nextDim -= Dim;
02311         ndip[d] = domain[nextDim];
02312       }
02313       
02314       tempRLayouts_m[dim] = new Layout_t(ndip, serialParallel, this->transVnodes());
02315       
02316       tempRFields_m[dim] = new RealField_t(*tempRLayouts_m[dim]);
02317       
02318       if (!this->compressTemps()) (*tempRFields_m[dim]).Uncompress();
02319     }
02320 
02321   }
02322 
02323   return;
02324 }
02325 
02326 
02327 
02328 
02329 
02330 template <unsigned Dim, class T>
02331 FFT<SineTransform,Dim,T>::~FFT(void) {
02332 
02333   
02334   TAU_TYPE_STRING(tauname, CT(*this) + "::~FFT");
02335   TAU_TYPE_STRING(tautype, "(void)");
02336   TAU_PROFILE(tauname, tautype, TAU_FFT); 
02337 
02338   
02339   unsigned d;
02340   unsigned nTransformDims = this->numTransformDims();
02341   if (nTransformDims > numSineTransforms_m) {
02342     for (d=0; d<numSineTransforms_m+1; ++d) {
02343       delete tempRFields_m[d];
02344       delete tempRLayouts_m[d];
02345     }
02346     int numComplex = nTransformDims - numSineTransforms_m;
02347     for (d=0; d<numComplex; ++d) {
02348       delete tempFields_m[d];
02349       delete tempLayouts_m[d];
02350     }
02351     delete [] tempFields_m;
02352     delete [] tempLayouts_m;
02353   }
02354   else {
02355     for (d=0; d<numSineTransforms_m; ++d) {
02356       delete tempRFields_m[d];
02357       delete tempRLayouts_m[d];
02358     }
02359   }
02360   delete [] tempRFields_m;
02361   delete [] tempRLayouts_m;
02362 }
02363 
02364 
02365 
02366 
02367 
02368 template <unsigned Dim, class T>
02369 void
02370 FFT<SineTransform,Dim,T>::transform(
02371   int direction,
02372   FFT<SineTransform,Dim,T>::RealField_t& f,
02373   FFT<SineTransform,Dim,T>::ComplexField_t& g,
02374   const bool& constInput)
02375 {
02376   
02377   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
02378   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ")");
02379   TAU_PROFILE(tauname, tautype, TAU_FFT);
02380   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
02381   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
02382 
02383   
02384   
02385 
02386   
02387   const Layout_t& in_layout = f.getLayout();
02388   const Domain_t& in_dom = in_layout.getDomain();
02389   const Layout_t& out_layout = g.getLayout();
02390   const Domain_t& out_dom = out_layout.getDomain();
02391   PAssert( checkDomain(this->getDomain(),in_dom) &&
02392            checkDomain(*complexDomain_m,out_dom) );
02393 
02394   
02395   unsigned d;
02396   int icount, activeDim;
02397   int idim;      
02398   unsigned nTransformDims = this->numTransformDims();
02399   
02400   PInsist(nTransformDims>numSineTransforms_m,
02401           "Wrong output Field type for real-to-real transform!!");
02402 
02403   
02404 
02405   
02406   RealField_t* tempR = &f;
02407   
02408   T* localdataR;
02409       
02410   
02411   icount = 0;
02412   activeDim = 0;
02413   for (idim = 0; idim != numSineTransforms_m; ++idim, ++icount, ++activeDim) {
02414     TAU_PROFILE_START(timer_swap);
02415     
02416     while (!sineTransformDims_m[icount]) {
02417       if (this->transformDim(icount)) ++activeDim;
02418       ++icount;
02419     }
02420     PAssert(activeDim<Dim);  
02421     
02422 
02423     bool skipTranspose = false;
02424     
02425     
02426     if (idim == 0 && !constInput) {
02427       
02428       const Domain_t& first_dom = tempRLayouts_m[idim]->getDomain();
02429       
02430       
02431       skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
02432                         (in_dom[0].length() == first_dom[0].length()) &&
02433                         (in_layout.getDistribution(0) == SERIAL) &&
02434                         (f.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
02435     }
02436 
02437     if (!skipTranspose) {
02438       
02439       (*tempRFields_m[idim])[tempRLayouts_m[idim]->getDomain()] = 
02440         (*tempR)[tempR->getLayout().getDomain()];
02441 
02442       
02443       if (this->compressTemps() && tempR != &f) *tempR = 0;
02444       tempR = tempRFields_m[idim];  
02445     }
02446     TAU_PROFILE_STOP(timer_swap);
02447       
02448     TAU_PROFILE_START(timer_fft);
02449     
02450     typename RealField_t::const_iterator_if l_i, l_end = tempR->end_if();
02451     for (l_i = tempR->begin_if(); l_i != l_end; ++l_i) {
02452 
02453       
02454       RealLField_t* ldf = (*l_i).second.get();
02455       
02456       ldf->Uncompress();
02457       
02458       localdataR = ldf->getP();
02459 
02460       
02461       int nstrips = 1, length = ldf->size(0);
02462       for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
02463       for (int istrip=0; istrip<nstrips; ++istrip) {
02464         
02465         this->getEngine().callFFT(activeDim, direction, localdataR);
02466         
02467         localdataR += length;
02468       } 
02469     } 
02470     TAU_PROFILE_STOP(timer_fft);
02471   } 
02472 
02473   
02474 
02475   
02476   icount = 0;
02477   activeDim = 0;
02478   while (!this->transformDim(icount) || sineTransformDims_m[icount]) {
02479     if (sineTransformDims_m[icount]) ++activeDim;
02480     ++icount;
02481   }
02482   PAssert(activeDim<Dim);  
02483 
02484   TAU_PROFILE_START(timer_swap);
02485   
02486   int last = numSineTransforms_m;
02487   (*tempRFields_m[last])[tempRLayouts_m[last]->getDomain()] =
02488     (*tempR)[tempR->getLayout().getDomain()];
02489 
02490   
02491   if (this->compressTemps() && tempR != &f) *tempR = 0;
02492   tempR = tempRFields_m[last];  
02493   TAU_PROFILE_STOP(timer_swap);
02494 
02495   
02496   ComplexField_t* temp = tempFields_m[0];
02497   
02498   int numComplex = nTransformDims-numSineTransforms_m;
02499   if (numComplex == 1) {  
02500     bool skipTemp = true;
02501     
02502     if ( !(out_layout == *tempLayouts_m[0]) ) skipTemp = false;
02503     for (d=0; d<Dim; ++d) {
02504       if ( out_layout.getDistribution(d) !=
02505            tempLayouts_m[0]->getDistribution(d) ) skipTemp = false;
02506     }
02507     if ( out_layout.numVnodes() != tempLayouts_m[0]->numVnodes() )
02508       skipTemp = false;
02509     
02510     if (!(g.getGC() == FFT<SineTransform,Dim,T>::nullGC)) skipTemp = false;
02511     if (skipTemp) temp = &g;
02512   }
02513 
02514 
02515   TAU_PROFILE_START(timer_fft);
02516   
02517   typename RealField_t::const_iterator_if rl_i, rl_end = tempR->end_if();
02518   typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
02519   for (rl_i = tempR->begin_if(); rl_i != rl_end; ++rl_i, ++cl_i) {
02520     
02521     RealLField_t* rldf = (*rl_i).second.get();
02522     ComplexLField_t* cldf = (*cl_i).second.get();
02523     
02524     rldf->Uncompress();
02525     cldf->Uncompress();
02526     
02527     T* localreal = rldf->getP();
02528     Complex_t* localcomp = cldf->getP();
02529 
02530     int nstrips = 1, lengthreal = rldf->size(0), lengthcomp = cldf->size(0);
02531     
02532     for (d=1; d<Dim; ++d) nstrips *= rldf->size(d);
02533     for (int istrip=0; istrip<nstrips; ++istrip) {
02534       
02535       for (int ilen=0; ilen<lengthreal; ilen+=2) 
02536         localcomp[ilen/2] = Complex_t(localreal[ilen],localreal[ilen+1]);
02537       
02538       
02539       this->getEngine().callFFT(activeDim, +1, localcomp);
02540       
02541       localreal += lengthreal;
02542       localcomp += lengthcomp;
02543     } 
02544   } 
02545   TAU_PROFILE_STOP(timer_fft);
02546 
02547   
02548 
02549   
02550   Complex_t* localdata;
02551       
02552   
02553   ++icount;
02554   ++activeDim;
02555   for (idim = 1; idim != numComplex; ++idim, ++icount, ++activeDim) {
02556     TAU_PROFILE_START(timer_swap);
02557     
02558     while (!this->transformDim(icount) || sineTransformDims_m[icount]) {
02559       if (sineTransformDims_m[icount]) ++activeDim;
02560       ++icount;
02561     }
02562     PAssert(activeDim<Dim);  
02563     
02564 
02565     bool skipTranspose = false;
02566     
02567     
02568     if (idim == numComplex-1) {
02569       
02570       const Domain_t& last_dom = tempLayouts_m[idim]->getDomain();
02571       
02572       
02573       skipTranspose = ( (out_dom[0].sameBase(last_dom[0])) &&
02574                         (out_dom[0].length() == last_dom[0].length()) &&
02575                         (out_layout.getDistribution(0) == SERIAL) &&
02576                         (g.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
02577     }
02578 
02579     if (!skipTranspose) {
02580       
02581       (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] = 
02582         (*temp)[temp->getLayout().getDomain()];
02583 
02584       
02585       if (this->compressTemps()) *temp = 0;
02586       temp = tempFields_m[idim];  
02587     }
02588     else if (idim == numComplex-1) {
02589       
02590       
02591 
02592       
02593       g[out_dom] = (*temp)[temp->getLayout().getDomain()];
02594 
02595       
02596       if (this->compressTemps()) *temp = 0;
02597       temp = &g;  
02598     }
02599     TAU_PROFILE_STOP(timer_swap);
02600       
02601     TAU_PROFILE_START(timer_fft);
02602     
02603     typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
02604     for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
02605 
02606       
02607       ComplexLField_t* ldf = (*l_i).second.get();
02608       
02609       ldf->Uncompress();
02610       
02611       localdata = ldf->getP();
02612 
02613       
02614       int nstrips = 1, length = ldf->size(0);
02615       for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
02616       for (int istrip=0; istrip<nstrips; ++istrip) {
02617         
02618         this->getEngine().callFFT(activeDim, direction, localdata);
02619         
02620         localdata += length;
02621       } 
02622     } 
02623     TAU_PROFILE_STOP(timer_fft);
02624   } 
02625 
02626   
02627   if (temp != &g) {
02628     TAU_PROFILE_START(timer_swap);
02629     
02630     g[out_dom] = (*temp)[temp->getLayout().getDomain()];
02631     if (this->compressTemps()) *temp = 0;
02632     TAU_PROFILE_STOP(timer_swap);
02633   }
02634 
02635   
02636   if (direction == +1) g = g * this->getNormFact();
02637 
02638   return;
02639 }
02640 
02641 
02642 
02643 
02644 
02645 template <unsigned Dim, class T>
02646 void
02647 FFT<SineTransform,Dim,T>::transform(
02648   int direction,
02649   FFT<SineTransform,Dim,T>::ComplexField_t& f,
02650   FFT<SineTransform,Dim,T>::RealField_t& g,
02651   const bool& constInput)
02652 {
02653   
02654   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
02655   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ")");
02656   TAU_PROFILE(tauname, tautype, TAU_FFT);
02657   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
02658   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
02659 
02660   
02661   
02662 
02663   
02664   const Layout_t& in_layout = f.getLayout();
02665   const Domain_t& in_dom = in_layout.getDomain();
02666   const Layout_t& out_layout = g.getLayout();
02667   const Domain_t& out_dom = out_layout.getDomain();
02668   PAssert( checkDomain(*complexDomain_m,in_dom) &&
02669            checkDomain(this->getDomain(),out_dom) );
02670 
02671   
02672   unsigned d;
02673   int icount, activeDim;
02674   int idim;      
02675   unsigned nTransformDims = this->numTransformDims();
02676 
02677   
02678 
02679   
02680   ComplexField_t* temp = &f;
02681   
02682   Complex_t* localdata;
02683       
02684   
02685   int numComplex = nTransformDims - numSineTransforms_m;
02686   icount = Dim-1;  
02687   activeDim = nTransformDims-1;
02688   for (idim = numComplex-1; idim != 0; --idim, --icount, --activeDim) {
02689     TAU_PROFILE_START(timer_swap);
02690     
02691     while (!this->transformDim(icount) || sineTransformDims_m[icount]) {
02692       if (sineTransformDims_m[icount]) --activeDim;
02693       --icount;
02694     }
02695     PAssert(activeDim>=0);  
02696     
02697 
02698     bool skipTranspose = false;
02699     
02700     
02701     if (idim == numComplex-1 && !constInput) {
02702       
02703       const Domain_t& first_dom = tempLayouts_m[idim]->getDomain();
02704       
02705       
02706       skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
02707                         (in_dom[0].length() == first_dom[0].length()) &&
02708                         (in_layout.getDistribution(0) == SERIAL) &&
02709                         (f.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
02710     }
02711 
02712     if (!skipTranspose) {
02713       
02714       (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] = 
02715         (*temp)[temp->getLayout().getDomain()];
02716 
02717       
02718       if (this->compressTemps() && temp != &f) *temp = 0;
02719       temp = tempFields_m[idim];  
02720     }
02721     TAU_PROFILE_STOP(timer_swap);
02722       
02723     TAU_PROFILE_START(timer_fft);
02724     
02725     typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
02726     for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
02727 
02728       
02729       ComplexLField_t* ldf = (*l_i).second.get();
02730       
02731       ldf->Uncompress();
02732       
02733       localdata = ldf->getP();
02734 
02735       
02736       int nstrips = 1, length = ldf->size(0);
02737       for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
02738       for (int istrip=0; istrip<nstrips; ++istrip) {
02739         
02740         this->getEngine().callFFT(activeDim, direction, localdata);
02741         
02742         localdata += length;
02743       } 
02744     } 
02745     TAU_PROFILE_STOP(timer_fft);
02746   } 
02747 
02748   
02749 
02750   
02751   while (!this->transformDim(icount) || sineTransformDims_m[icount]) {
02752     if (sineTransformDims_m[icount]) --activeDim;
02753     --icount;
02754   }
02755   PAssert(activeDim>=0);  
02756 
02757   
02758   RealField_t* tempR = tempRFields_m[numSineTransforms_m];
02759 
02760   bool skipTemp = true;
02761   if (numComplex == 1 && !constInput) {
02762     
02763     
02764     
02765     if ( !(in_layout == *tempLayouts_m[0]) ) skipTemp = false;
02766     for (d=0; d<Dim; ++d) {
02767       if ( in_layout.getDistribution(d) !=
02768           tempLayouts_m[0]->getDistribution(d) ) skipTemp = false;
02769     }
02770     if ( in_layout.numVnodes() != tempLayouts_m[0]->numVnodes() )
02771       skipTemp = false;
02772     
02773     if (!(f.getGC() == FFT<SineTransform,Dim,T>::nullGC)) skipTemp = false;
02774   }
02775   else {  
02776     skipTemp = false;
02777   }
02778 
02779   if (!skipTemp) {
02780     TAU_PROFILE_START(timer_swap);
02781     
02782     (*tempFields_m[0])[tempLayouts_m[0]->getDomain()] =
02783       (*temp)[temp->getLayout().getDomain()];
02784     
02785     if (this->compressTemps() && temp != &f) *temp = 0;
02786     temp = tempFields_m[0];
02787     TAU_PROFILE_STOP(timer_swap);
02788   }
02789 
02790   TAU_PROFILE_START(timer_fft);
02791   
02792   typename RealField_t::const_iterator_if rl_i, rl_end = tempR->end_if();
02793   typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
02794   for (rl_i = tempR->begin_if(); rl_i != rl_end; ++rl_i, ++cl_i) {
02795     
02796     RealLField_t* rldf = (*rl_i).second.get();
02797     ComplexLField_t* cldf = (*cl_i).second.get();
02798     
02799     rldf->Uncompress();
02800     cldf->Uncompress();
02801     
02802     T* localreal = rldf->getP();
02803     Complex_t* localcomp = cldf->getP();
02804 
02805     int nstrips = 1, lengthreal = rldf->size(0), lengthcomp = cldf->size(0);
02806     
02807     for (d=1; d<Dim; ++d) nstrips *= rldf->size(d);
02808     for (int istrip=0; istrip<nstrips; ++istrip) {
02809       
02810       
02811       this->getEngine().callFFT(activeDim, -1, localcomp);
02812       
02813       for (int ilen=0; ilen<lengthreal; ilen+=2) {
02814         localreal[ilen] = real(localcomp[ilen/2]);
02815         localreal[ilen+1] = imag(localcomp[ilen/2]);
02816       }
02817       
02818       localreal += lengthreal;
02819       localcomp += lengthcomp;
02820     } 
02821   } 
02822   TAU_PROFILE_STOP(timer_fft);
02823 
02824   TAU_PROFILE_START(timer_swap);
02825   
02826   if (this->compressTemps() && temp != &f) *temp = 0;
02827   TAU_PROFILE_STOP(timer_swap);
02828 
02829   
02830 
02831   
02832   T* localdataR;
02833       
02834   
02835   icount = Dim-1;  
02836   activeDim = nTransformDims - 1;
02837   for (idim = numSineTransforms_m-1; idim != -1;
02838        --idim, --icount, --activeDim) {
02839     TAU_PROFILE_START(timer_swap);
02840     
02841     while (!sineTransformDims_m[icount]) {
02842       if (this->transformDim(icount)) --activeDim;
02843       --icount;
02844     }
02845     PAssert(activeDim>=0);  
02846     
02847 
02848     bool skipTranspose = false;
02849     
02850     
02851     if (idim == 0) {
02852       
02853       const Domain_t& last_dom = tempRLayouts_m[idim]->getDomain();
02854       
02855       
02856       skipTranspose = ( (out_dom[0].sameBase(last_dom[0])) &&
02857                         (out_dom[0].length() == last_dom[0].length()) &&
02858                         (out_layout.getDistribution(0) == SERIAL) &&
02859                         (g.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
02860     }
02861 
02862     if (!skipTranspose) {
02863       
02864       (*tempRFields_m[idim])[tempRLayouts_m[idim]->getDomain()] = 
02865         (*tempR)[tempR->getLayout().getDomain()];
02866 
02867       
02868       if (this->compressTemps()) *tempR = 0;
02869       tempR = tempRFields_m[idim];  
02870     }
02871     else if (idim == 0) {
02872       
02873       
02874 
02875       
02876       g[out_dom] = (*tempR)[tempR->getLayout().getDomain()];
02877 
02878       
02879       if (this->compressTemps()) *tempR = 0;
02880       tempR = &g;  
02881     }
02882     TAU_PROFILE_STOP(timer_swap);
02883       
02884     TAU_PROFILE_START(timer_fft);
02885     
02886     typename RealField_t::const_iterator_if l_i, l_end = tempR->end_if();
02887     for (l_i = tempR->begin_if(); l_i != l_end; ++l_i) {
02888 
02889       
02890       RealLField_t* ldf = (*l_i).second.get();
02891       
02892       ldf->Uncompress();
02893       
02894       localdataR = ldf->getP();
02895 
02896       
02897       int nstrips = 1, length = ldf->size(0);
02898       for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
02899       for (int istrip=0; istrip<nstrips; ++istrip) {
02900         
02901         this->getEngine().callFFT(activeDim, direction, localdataR);
02902         
02903         localdataR += length;
02904       } 
02905     } 
02906     TAU_PROFILE_STOP(timer_fft);
02907   } 
02908 
02909   
02910   if (tempR != &g) {
02911     TAU_PROFILE_START(timer_swap);
02912     
02913     g[out_dom] = (*tempR)[tempR->getLayout().getDomain()];
02914     if (this->compressTemps()) *tempR = 0;
02915     TAU_PROFILE_STOP(timer_swap);
02916   }
02917 
02918   
02919   if (direction == +1) g = g * this->getNormFact();
02920 
02921   return;
02922 }
02923 
02924 
02925 
02926 
02927 
02928 template <unsigned Dim, class T>
02929 void
02930 FFT<SineTransform,Dim,T>::transform(
02931   int direction,
02932   FFT<SineTransform,Dim,T>::RealField_t& f,
02933   FFT<SineTransform,Dim,T>::RealField_t& g,
02934   const bool& constInput)
02935 {
02936   
02937   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
02938   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ")");
02939   TAU_PROFILE(tauname, tautype, TAU_FFT);
02940   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
02941   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
02942 
02943   
02944   
02945 
02946   
02947   const Layout_t& in_layout = f.getLayout();
02948   const Domain_t& in_dom = in_layout.getDomain();
02949   const Layout_t& out_layout = g.getLayout();
02950   const Domain_t& out_dom = out_layout.getDomain();
02951   PAssert( checkDomain(this->getDomain(),in_dom) &&
02952            checkDomain(this->getDomain(),out_dom) );
02953 
02954   
02955   unsigned d;
02956   int idim;      
02957   int begdim, enddim;
02958   unsigned nTransformDims = this->numTransformDims();
02959   
02960   PInsist(nTransformDims==numSineTransforms_m,
02961           "Wrong output Field type for real-to-complex transform!!");
02962 
02963   
02964 
02965   
02966   RealField_t* tempR = &f;
02967   
02968   T* localdataR;
02969       
02970   
02971   begdim = (direction == +1) ? 0 : nTransformDims-1;
02972   enddim = (direction == +1) ? nTransformDims : -1;
02973   for (idim = begdim; idim != enddim; idim+=direction) {
02974     TAU_PROFILE_START(timer_swap);
02975     
02976 
02977     bool skipTranspose = false;
02978     
02979     
02980     if (idim == begdim && !constInput) {
02981       
02982       const Domain_t& first_dom = tempRLayouts_m[idim]->getDomain();
02983       
02984       
02985       skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
02986                         (in_dom[0].length() == first_dom[0].length()) &&
02987                         (in_layout.getDistribution(0) == SERIAL) &&
02988                         (f.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
02989     }
02990 
02991     
02992     
02993     if (idim == enddim-direction) {
02994       
02995       const Domain_t& last_dom = tempRLayouts_m[idim]->getDomain();
02996       
02997       
02998       skipTranspose = ( (out_dom[0].sameBase(last_dom[0])) &&
02999                         (out_dom[0].length() == last_dom[0].length()) &&
03000                         (out_layout.getDistribution(0) == SERIAL) &&
03001                         (g.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
03002     }
03003 
03004     if (!skipTranspose) {
03005       
03006       (*tempRFields_m[idim])[tempRLayouts_m[idim]->getDomain()] = 
03007         (*tempR)[tempR->getLayout().getDomain()];
03008 
03009       
03010       if (this->compressTemps() && tempR != &f) *tempR = 0;
03011       tempR = tempRFields_m[idim];  
03012     }
03013     else if (idim == enddim-direction && tempR != &g) {
03014       
03015       
03016 
03017       
03018       g[out_dom] = (*tempR)[tempR->getLayout().getDomain()];
03019 
03020       
03021       if (this->compressTemps() && tempR != &f) *tempR = 0;
03022       tempR = &g;  
03023     }
03024     TAU_PROFILE_STOP(timer_swap);
03025       
03026     TAU_PROFILE_START(timer_fft);
03027     
03028     typename RealField_t::const_iterator_if l_i, l_end = tempR->end_if();
03029     for (l_i = tempR->begin_if(); l_i != l_end; ++l_i) {
03030 
03031       
03032       RealLField_t* ldf = (*l_i).second.get();
03033       
03034       ldf->Uncompress();
03035       
03036       localdataR = ldf->getP();
03037 
03038       
03039       int nstrips = 1, length = ldf->size(0);
03040       for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
03041       for (int istrip=0; istrip<nstrips; ++istrip) {
03042         
03043         this->getEngine().callFFT(idim, direction, localdataR);
03044         
03045         localdataR += length;
03046       } 
03047     } 
03048     TAU_PROFILE_STOP(timer_fft);
03049   } 
03050 
03051   
03052   if (tempR != &g) {
03053     TAU_PROFILE_START(timer_swap);
03054     
03055     g[out_dom] = (*tempR)[tempR->getLayout().getDomain()];
03056     if (this->compressTemps() && tempR != &f) *tempR = 0;
03057     TAU_PROFILE_STOP(timer_swap);
03058   }
03059 
03060   
03061   if (direction == +1) g = g * this->getNormFact();
03062 
03063   return;
03064 }
03065 
03066 
03067 
03068 
03069 
03070 template <unsigned Dim, class T>
03071 void
03072 FFT<SineTransform,Dim,T>::transform(
03073   int direction,
03074   FFT<SineTransform,Dim,T>::RealField_t& f)
03075 {
03076   
03077   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
03078   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ")");
03079   TAU_PROFILE(tauname, tautype, TAU_FFT);
03080   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
03081   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
03082 
03083   
03084   
03085 
03086   
03087   const Layout_t& in_layout = f.getLayout();
03088   const Domain_t& in_dom = in_layout.getDomain();
03089   PAssert(checkDomain(this->getDomain(),in_dom));
03090 
03091   
03092   unsigned d;
03093   int idim;      
03094   int begdim, enddim;
03095   unsigned nTransformDims = this->numTransformDims();
03096   
03097   PInsist(nTransformDims==numSineTransforms_m,
03098           "Cannot perform real-to-complex transform in-place!!");
03099 
03100   
03101 
03102   
03103   RealField_t* tempR = &f;
03104   
03105   T* localdataR;
03106       
03107   
03108   begdim = (direction == +1) ? 0 : nTransformDims-1;
03109   enddim = (direction == +1) ? nTransformDims : -1;
03110   for (idim = begdim; idim != enddim; idim+=direction) {
03111     TAU_PROFILE_START(timer_swap);
03112     
03113 
03114     bool skipTranspose = false;
03115     
03116     
03117     if (idim == begdim) {
03118       
03119       const Domain_t& first_dom = tempRLayouts_m[idim]->getDomain();
03120       
03121       
03122       skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
03123                         (in_dom[0].length() == first_dom[0].length()) &&
03124                         (in_layout.getDistribution(0) == SERIAL) &&
03125                         (f.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
03126     }
03127 
03128     
03129     
03130     if (idim == enddim-direction) {
03131       
03132       const Domain_t& last_dom = tempRLayouts_m[idim]->getDomain();
03133       
03134       
03135       skipTranspose = ( (in_dom[0].sameBase(last_dom[0])) &&
03136                         (in_dom[0].length() == last_dom[0].length()) &&
03137                         (in_layout.getDistribution(0) == SERIAL) &&
03138                         (f.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
03139     }
03140 
03141     if (!skipTranspose) {
03142       
03143       (*tempRFields_m[idim])[tempRLayouts_m[idim]->getDomain()] = 
03144         (*tempR)[tempR->getLayout().getDomain()];
03145 
03146       
03147       if (this->compressTemps() && tempR != &f) *tempR = 0;
03148       tempR = tempRFields_m[idim];  
03149     }
03150     else if (idim == enddim-direction && tempR != &f) {
03151       
03152       
03153 
03154       
03155       f[in_dom] = (*tempR)[tempR->getLayout().getDomain()];
03156 
03157       
03158       if (this->compressTemps() && tempR != &f) *tempR = 0;
03159       tempR = &f;  
03160     }
03161     TAU_PROFILE_STOP(timer_swap);
03162       
03163     TAU_PROFILE_START(timer_fft);
03164     
03165     typename RealField_t::const_iterator_if l_i, l_end = tempR->end_if();
03166     for (l_i = tempR->begin_if(); l_i != l_end; ++l_i) {
03167 
03168       
03169       RealLField_t* ldf = (*l_i).second.get();
03170       
03171       ldf->Uncompress();
03172       
03173       localdataR = ldf->getP();
03174 
03175       
03176       int nstrips = 1, length = ldf->size(0);
03177       for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
03178       for (int istrip=0; istrip<nstrips; ++istrip) {
03179         
03180         this->getEngine().callFFT(idim, direction, localdataR);
03181         
03182         localdataR += length;
03183       } 
03184     } 
03185     TAU_PROFILE_STOP(timer_fft);
03186   } 
03187 
03188   
03189   if (tempR != &f) {
03190     TAU_PROFILE_START(timer_swap);
03191     
03192     f[in_dom] = (*tempR)[tempR->getLayout().getDomain()];
03193     if (this->compressTemps()) *tempR = 0;
03194     TAU_PROFILE_STOP(timer_swap);
03195   }
03196 
03197   
03198   if (direction == +1) f = f * this->getNormFact();
03199 
03200   return;
03201 }
03202 
03203 
03204 
03205 
03206 
03207 
03208 
03209 
03210 
03211 
03212 
03213 
03214 template <class T>
03215 FFT<SineTransform,1U,T>::FFT(
03216   const typename FFT<SineTransform,1U,T>::Domain_t& rdomain,
03217   const bool sineTransformDims[1U], const bool& compressTemps)
03218   : FFTBase<1U,T>(FFT<SineTransform,1U,T>::sineFFT, rdomain,
03219                   sineTransformDims, compressTemps)
03220 {
03221   
03222   TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
03223   TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", const bool [], const bool&)");
03224   TAU_PROFILE(tauname, tautype, TAU_FFT);
03225 
03226   
03227   int length;
03228   length = rdomain[0].length();
03229 
03230   
03231   int transformType;
03232   transformType = FFTBase<1U,T>::sineFFT;  
03233   T& normFact = this->getNormFact();
03234   normFact = 1.0 / (2.0 * (length + 1));
03235 
03236   
03237   this->getEngine().setup(1U, &transformType, &length);
03238   
03239   setup();
03240 }
03241 
03242 
03243 
03244 
03245 
03246 
03247 template <class T>
03248 FFT<SineTransform,1U,T>::FFT(
03249   const typename FFT<SineTransform,1U,T>::Domain_t& rdomain,
03250   const bool& compressTemps) 
03251   : FFTBase<1U,T>(FFT<SineTransform,1U,T>::sineFFT, rdomain, compressTemps)
03252 {
03253   
03254   TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
03255   TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", const bool&)");
03256   TAU_PROFILE(tauname, tautype, TAU_FFT); 
03257 
03258   
03259   int length;
03260   length = rdomain[0].length();
03261 
03262   
03263   int transformType;
03264   transformType = FFTBase<1U,T>::sineFFT;  
03265   T& normFact = this->getNormFact();
03266   normFact = 1.0 / (2.0 * (length + 1));
03267 
03268   
03269   this->getEngine().setup(1U, &transformType, &length);
03270   
03271   setup();
03272 }
03273 
03274 
03275 
03276 
03277 
03278 
03279 template <class T>
03280 void
03281 FFT<SineTransform,1U,T>::setup(void) {
03282 
03283   
03284   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::setup");
03285   TAU_TYPE_STRING(tautype, "(void)");
03286   TAU_PROFILE(tauname, tautype, TAU_FFT); 
03287 
03288   const Domain_t& domain = this->getDomain();          
03289 
03290   
03291   tempRLayouts_m = new Layout_t(domain[0], PARALLEL, 1);
03292   
03293   tempRFields_m = new RealField_t(*tempRLayouts_m);
03294   
03295   if (!this->compressTemps()) tempRFields_m->Uncompress();
03296 
03297   return;
03298 }
03299 
03300 
03301 
03302 
03303 
03304 template <class T>
03305 FFT<SineTransform,1U,T>::~FFT(void) {
03306 
03307   
03308   TAU_TYPE_STRING(tauname, CT(*this) + "::~FFT");
03309   TAU_TYPE_STRING(tautype, "(void)");
03310   TAU_PROFILE(tauname, tautype, TAU_FFT); 
03311 
03312   
03313   delete tempRFields_m;
03314   delete tempRLayouts_m;
03315 }
03316 
03317 
03318 
03319 
03320 
03321 template <class T>
03322 void
03323 FFT<SineTransform,1U,T>::transform(
03324   int direction,
03325   FFT<SineTransform,1U,T>::RealField_t& f,
03326   FFT<SineTransform,1U,T>::RealField_t& g,
03327   const bool& constInput)
03328 {
03329   
03330   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
03331   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ", const bool&)");
03332   TAU_PROFILE(tauname, tautype, TAU_FFT);
03333   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
03334   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
03335 
03336   
03337   
03338 
03339   
03340   const Layout_t& in_layout = f.getLayout();
03341   const Domain_t& in_dom = in_layout.getDomain();
03342   const Layout_t& out_layout = g.getLayout();
03343   const Domain_t& out_dom = out_layout.getDomain();
03344   PAssert( checkDomain(this->getDomain(),in_dom) &&
03345            checkDomain(this->getDomain(),out_dom) );
03346 
03347   
03348   RealField_t* tempR = &f;
03349   
03350   T* localdataR;
03351       
03352   TAU_PROFILE_START(timer_swap);
03353   
03354 
03355   
03356   const Domain_t& temp_dom = tempRLayouts_m->getDomain();
03357   bool skipTranspose = false;
03358   
03359   
03360   if (!constInput) {
03361     
03362     
03363     skipTranspose = ( (in_dom[0].sameBase(temp_dom[0])) &&
03364                       (in_dom[0].length() == temp_dom[0].length()) &&
03365                       (in_layout.numVnodes() == 1) &&
03366                       (f.getGC() == FFT<SineTransform,1U,T>::nullGC) );
03367   }
03368 
03369   bool skipFinal = false;
03370   
03371   
03372 
03373   
03374   
03375   skipFinal = ( (out_dom[0].sameBase(temp_dom[0])) &&
03376                 (out_dom[0].length() == temp_dom[0].length()) &&
03377                 (out_layout.numVnodes() == 1) &&
03378                 (g.getGC() == FFT<SineTransform,1U,T>::nullGC) );
03379 
03380   if (!skipTranspose) {
03381     
03382     (*tempRFields_m) = (*tempR);
03383     tempR = tempRFields_m;  
03384   }
03385   if (skipFinal) {
03386     
03387     
03388 
03389     
03390     g = (*tempR);
03391 
03392     
03393     if (this->compressTemps() && tempR != &f) *tempR = 0;
03394     tempR = &g;  
03395   }
03396   TAU_PROFILE_STOP(timer_swap);
03397       
03398   TAU_PROFILE_START(timer_fft);
03399   
03400   typename RealField_t::const_iterator_if l_i = tempR->begin_if();
03401   if (l_i != tempR->end_if()) {
03402 
03403     
03404     RealLField_t* ldf = (*l_i).second.get();
03405     
03406     ldf->Uncompress();
03407     
03408     localdataR = ldf->getP();
03409 
03410     
03411     this->getEngine().callFFT(0, direction, localdataR);
03412   } 
03413   TAU_PROFILE_STOP(timer_fft);
03414 
03415   
03416   if (tempR != &g) {
03417     TAU_PROFILE_START(timer_swap);
03418     
03419     g = (*tempR);
03420     if (this->compressTemps() && tempR != &f) *tempR = 0;
03421     TAU_PROFILE_STOP(timer_swap);
03422   }
03423 
03424   
03425   if (direction == +1) g = g * this->getNormFact();
03426 
03427   return;
03428 }
03429 
03430 
03431 
03432 
03433 
03434 template <class T>
03435 void
03436 FFT<SineTransform,1U,T>::transform(
03437   int direction,
03438   FFT<SineTransform,1U,T>::RealField_t& f)
03439 {
03440   
03441   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
03442   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ")");
03443   TAU_PROFILE(tauname, tautype, TAU_FFT);
03444   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
03445   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
03446 
03447   
03448   
03449 
03450   
03451   const Layout_t& in_layout = f.getLayout();
03452   const Domain_t& in_dom = in_layout.getDomain();
03453   PAssert(checkDomain(this->getDomain(),in_dom));
03454 
03455   
03456   RealField_t* tempR = &f;
03457   
03458   T* localdataR;
03459       
03460   TAU_PROFILE_START(timer_swap);
03461   
03462 
03463   
03464   const Domain_t& temp_dom = tempRLayouts_m->getDomain();
03465   bool skipTranspose = false;
03466   
03467   
03468 
03469   
03470   
03471   skipTranspose = ( (in_dom[0].sameBase(temp_dom[0])) &&
03472                     (in_dom[0].length() == temp_dom[0].length()) &&
03473                     (in_layout.numVnodes() == 1) &&
03474                     (f.getGC() == FFT<SineTransform,1U,T>::nullGC) );
03475 
03476   bool skipFinal = false;
03477   
03478   
03479 
03480   
03481   
03482   skipFinal = ( (in_dom[0].sameBase(temp_dom[0])) &&
03483                 (in_dom[0].length() == temp_dom[0].length()) &&
03484                 (in_layout.numVnodes() == 1) &&
03485                 (f.getGC() == FFT<SineTransform,1U,T>::nullGC) );
03486 
03487   if (!skipTranspose) {
03488     
03489     (*tempRFields_m) = (*tempR);
03490 
03491     tempR = tempRFields_m;  
03492   }
03493   if (skipFinal) {
03494     
03495     
03496 
03497     
03498     f = (*tempR);
03499 
03500     
03501     if (this->compressTemps() && tempR != &f) *tempR = 0;
03502     tempR = &f;  
03503   }
03504   TAU_PROFILE_STOP(timer_swap);
03505       
03506   TAU_PROFILE_START(timer_fft);
03507   
03508   typename RealField_t::const_iterator_if l_i = tempR->begin_if();
03509   if (l_i != tempR->end_if()) {
03510 
03511     
03512     RealLField_t* ldf = (*l_i).second.get();
03513     
03514     ldf->Uncompress();
03515     
03516     localdataR = ldf->getP();
03517 
03518     
03519     this->getEngine().callFFT(0, direction, localdataR);
03520   }
03521   TAU_PROFILE_STOP(timer_fft);
03522 
03523   
03524   if (tempR != &f) {
03525     TAU_PROFILE_START(timer_swap);
03526     
03527     f = (*tempR);
03528     if (this->compressTemps()) *tempR = 0;
03529     TAU_PROFILE_STOP(timer_swap);
03530   }
03531 
03532   
03533   if (direction == +1) f = f * this->getNormFact();
03534 
03535   return;
03536 }
03537 
03538 
03539 
03540 
03541 
03542 
03543 
03544