IPPL: src/FFT/FFT.cpp Source File

00001 // -*- C++ -*-
00002 /***************************************************************************
00003  *
00004  * The IPPL Framework
00005  * 
00006  * This program was prepared by PSI. 
00007  * All rights in the program are reserved by PSI.
00008  * Neither PSI nor the author(s)
00009  * makes any warranty, express or implied, or assumes any liability or
00010  * responsibility for the use of this software
00011  *
00012  *
00013  ***************************************************************************/
00014 
00015 // -*- C++ -*-
00016 /***************************************************************************
00017  *
00018  * The IPPL Framework
00019  * 
00020  *
00021  * Visit http://people.web.psi.ch/adelmann/ for more details
00022  *
00023  ***************************************************************************/
00024 
00025 // include files
00026 #include "FFT/FFT.h"
00027 #include "FieldLayout/FieldLayout.h"
00028 #include "Field/BareField.h"
00029 #include "Utility/IpplStats.h"
00030 
00031 #ifdef IPPL_PRINTDEBUG
00032 #define FFTDBG(x) x
00033 #else
00034 #define FFTDBG(x)
00035 #endif
00036 
00037 
00043 //=============================================================================
00044 // FFT CCTransform Constructors
00045 //=============================================================================
00046 
00052 template <unsigned Dim, class T>
00053 FFT<CCTransform,Dim,T>::FFT(
00054   const typename FFT<CCTransform,Dim,T>::Domain_t& cdomain, 
00055   const bool transformTheseDims[Dim],
00056   const bool& compressTemps)
00057   : FFTBase<Dim,T>(FFT<CCTransform,Dim,T>::ccFFT, cdomain,
00058                    transformTheseDims, compressTemps)
00059 {
00060 
00061   // Tau profiling
00062   TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
00063   TAU_TYPE_STRING(tautype,
00064                   "(" + CT(cdomain) + ", const bool [], const bool&)");
00065   TAU_PROFILE(tauname, tautype, TAU_FFT);
00066 
00067   // construct array of axis lengths
00068   unsigned nTransformDims = this->numTransformDims();
00069   int* lengths = new int[nTransformDims];
00070   unsigned d;
00071   for (d=0; d<nTransformDims; ++d)
00072     lengths[d] = cdomain[this->activeDimension(d)].length();
00073 
00074   // construct array of transform types for FFT Engine, compute normalization
00075   int* transformTypes = new int[nTransformDims];
00076   T& normFact = this->getNormFact();
00077   normFact = 1.0;
00078   for (d=0; d<nTransformDims; ++d) {
00079       transformTypes[d] = FFTBase<Dim,T>::ccFFT;  // all transforms are complex-to-complex
00080       normFact /= lengths[d];
00081   }
00082 
00083   // set up FFT Engine
00084   this->getEngine().setup(nTransformDims, transformTypes, lengths);
00085   delete [] transformTypes;
00086   delete [] lengths;
00087   // set up the temporary fields
00088   setup();
00089 }
00090 
00091 
00092 
00097 template <unsigned Dim, class T>
00098 void
00099 FFT<CCTransform,Dim,T>::setup(void)
00100 {
00101   // Tau profiling
00102   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::setup");
00103   TAU_TYPE_STRING(tautype, "(void)");
00104   TAU_PROFILE(tauname, tautype, TAU_FFT); 
00105 
00106   unsigned d, activeDim;
00107   unsigned nTransformDims = this->numTransformDims();
00108   // Set up the arrays of temporary Fields and FieldLayouts:
00109   e_dim_tag serialParallel[Dim];  // Specifies SERIAL, PARALLEL dims in temp
00110   // make zeroth dimension always SERIAL
00111   serialParallel[0] = SERIAL;
00112   // all other dimensions parallel
00113   for (d=1; d<Dim; ++d)
00114     serialParallel[d] = PARALLEL;
00115 
00116   tempLayouts_m = new Layout_t*[nTransformDims];
00117   tempFields_m = new ComplexField_t*[nTransformDims];
00118 
00119   // loop over transform dimensions
00120   for (unsigned dim=0; dim<nTransformDims; ++dim) {
00121     // get number of dimension to be transformed
00122     activeDim = this->activeDimension(dim);
00123     // Get input Field's domain
00124     const Domain_t& ndic = this->getDomain();
00125     // make new domain with permuted Indexes, activeDim first
00126     Domain_t ndip;
00127     ndip[0] = ndic[activeDim];
00128     for (d=1; d<Dim; ++d) {
00129       int nextDim = activeDim + d;
00130       if (nextDim >= Dim) nextDim -= Dim;
00131       ndip[d] = ndic[nextDim];
00132     }
00133     // generate temporary field layout
00134     tempLayouts_m[dim] = new Layout_t(ndip, serialParallel, this->transVnodes());
00135     // generate temporary Field
00136     tempFields_m[dim] = new ComplexField_t(*tempLayouts_m[dim]);
00137     // If user requests no intermediate compression, uncompress right now:
00138     if (!this->compressTemps()) (*tempFields_m[dim]).Uncompress();
00139   }
00140 
00141   return;
00142 }
00143 
00144 //-----------------------------------------------------------------------------
00145 // Destructor
00146 //-----------------------------------------------------------------------------
00147 
00148 template <unsigned Dim, class T>
00149 FFT<CCTransform,Dim,T>::~FFT(void) {
00150 
00151   // Tau profiling
00152   TAU_TYPE_STRING(tauname, CT(*this) + "::~FFT");
00153   TAU_TYPE_STRING(tautype, "(void)");
00154   TAU_PROFILE(tauname, tautype, TAU_FFT); 
00155 
00156   // delete arrays of temporary fields and field layouts
00157   unsigned nTransformDims = this->numTransformDims();
00158   for (unsigned d=0; d<nTransformDims; ++d) {
00159     delete tempFields_m[d];
00160     delete tempLayouts_m[d];
00161   }
00162   delete [] tempFields_m;
00163   delete [] tempLayouts_m;
00164 }
00165 
00166 
00167 //-----------------------------------------------------------------------------
00168 // do the CC FFT; separate input and output fields
00169 //-----------------------------------------------------------------------------
00170 
00171 template <unsigned Dim, class T>
00172 void
00173 FFT<CCTransform,Dim,T>::transform(
00174   int direction,
00175   typename FFT<CCTransform,Dim,T>::ComplexField_t& f,
00176   typename FFT<CCTransform,Dim,T>::ComplexField_t& g,
00177   const bool& constInput)
00178 {
00179   // Tau profiling
00180   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
00181   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ", const bool&)");
00182   TAU_PROFILE(tauname, tautype, TAU_FFT);
00183   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
00184   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
00185 
00186   // indicate we're doing another FFT
00187   //INCIPPLSTAT(incFFTs);
00188 
00189   // Check domain of incoming Fields
00190   const Layout_t& in_layout = f.getLayout();
00191   const Domain_t& in_dom = in_layout.getDomain();
00192   const Layout_t& out_layout = g.getLayout();
00193   const Domain_t& out_dom = out_layout.getDomain();
00194   PAssert( checkDomain(this->getDomain(),in_dom) &&
00195            checkDomain(this->getDomain(),out_dom) );
00196 
00197   // Common loop iterate and other vars:
00198   unsigned d;
00199   int idim;            // idim loops over the number of transform dims.
00200   int begdim, enddim;  // beginning and end of transform dim loop
00201   unsigned nTransformDims = this->numTransformDims();
00202   // Field* for temp Field management:
00203   ComplexField_t* temp = &f;
00204   // Local work array passed to FFT:
00205   Complex_t* localdata;
00206       
00207   // Loop over the dimensions be transformed:
00208   begdim = (direction == +1) ? 0 : (nTransformDims-1);
00209   enddim = (direction == +1) ? nTransformDims : -1;
00210   for (idim = begdim; idim != enddim; idim += direction) {
00211     TAU_PROFILE_START(timer_swap);
00212     // Now do the serial transforms along this dimension:
00213 
00214     bool skipTranspose = false;
00215     // if this is the first transform dimension, we might be able
00216     // to skip the transpose into the first temporary Field
00217     if (idim == begdim && !constInput) {
00218       // get domain for comparison
00219       const Domain_t& first_dom = tempLayouts_m[idim]->getDomain();
00220       // check that zeroth axis is the same and is serial
00221       // and that there are no guard cells
00222       skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
00223                         (in_dom[0].length() == first_dom[0].length()) &&
00224                         (in_layout.getDistribution(0) == SERIAL) &&
00225                         (f.getGC() == FFT<CCTransform,Dim,T>::nullGC) );
00226     }
00227 
00228     // if this is the last transform dimension, we might be able
00229     // to skip the last temporary and transpose right into g
00230     if (idim == enddim-direction) {
00231       // get the domain for comparison
00232       const Domain_t& last_dom = tempLayouts_m[idim]->getDomain();
00233       // check that zeroth axis is the same and is serial
00234       // and that there are no guard cells
00235       skipTranspose = ( (out_dom[0].sameBase(last_dom[0])) &&
00236                         (out_dom[0].length() == last_dom[0].length()) &&
00237                         (out_layout.getDistribution(0) == SERIAL) &&
00238                         (g.getGC() == FFT<CCTransform,Dim,T>::nullGC) );
00239     }
00240 
00241     if (!skipTranspose) {
00242       // transpose and permute to Field with transform dim first
00243       (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] = 
00244         (*temp)[temp->getLayout().getDomain()];
00245 
00246       // Compress out previous iterate's storage:
00247       if (this->compressTemps() && temp != &f) *temp = 0;
00248       temp = tempFields_m[idim];  // Field* management aid
00249     }
00250     else if (idim == enddim-direction && temp != &g) {
00251       // last transform and we can skip the last temporary field
00252       // so do the transpose here using g instead
00253 
00254       // transpose and permute to Field with transform dim first
00255       g[out_dom] = (*temp)[temp->getLayout().getDomain()];
00256 
00257       // Compress out previous iterate's storage:
00258       if (this->compressTemps() && temp != &f) *temp = 0;
00259       temp = &g;  // Field* management aid
00260     }
00261     TAU_PROFILE_STOP(timer_swap);
00262       
00263     TAU_PROFILE_START(timer_fft);
00264     // Loop over all the Vnodes, working on the LField in each.
00265     typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
00266     for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
00267 
00268       // Get the LField
00269       ComplexLField_t* ldf = (*l_i).second.get();
00270       // make sure we are uncompressed
00271       ldf->Uncompress();
00272       // get the raw data pointer
00273       localdata = ldf->getP();
00274 
00275       // Do 1D complex-to-complex FFT's on all the strips in the LField:
00276       int nstrips = 1, length = ldf->size(0);
00277       for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
00278       for (int istrip=0; istrip<nstrips; ++istrip) {
00279         // Do the 1D FFT:
00280         this->getEngine().callFFT(idim, direction, localdata);
00281         // advance the data pointer
00282         localdata += length;
00283       } // loop over 1D strips
00284     } // loop over all the LFields
00285     TAU_PROFILE_STOP(timer_fft);
00286   } // loop over all transformed dimensions 
00287 
00288   // skip final assignment and compress if we used g as final temporary
00289   if (temp != &g) {
00290     TAU_PROFILE_START(timer_swap);
00291     // Now assign into output Field, and compress last temp's storage:
00292     g[out_dom] = (*temp)[temp->getLayout().getDomain()];
00293     if (this->compressTemps() && temp != &f) *temp = 0;
00294     TAU_PROFILE_STOP(timer_swap);
00295   }
00296 
00297   // Normalize:
00298   if (direction == +1)
00299     g *= Complex_t(this->getNormFact(), 0.0);
00300 
00301   return;
00302 }
00303 
00304 //-----------------------------------------------------------------------------
00305 // "in-place" FFT; specify +1 or -1 to indicate forward or inverse transform.
00306 //-----------------------------------------------------------------------------
00307 
00308 template <unsigned Dim, class T>
00309 void
00310 FFT<CCTransform,Dim,T>::transform(
00311   int direction,
00312   typename FFT<CCTransform,Dim,T>::ComplexField_t& f)
00313 {
00314   // Tau profiling
00315   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
00316   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ")");
00317   TAU_PROFILE(tauname, tautype, TAU_FFT);
00318   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
00319   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
00320 
00321   // indicate we're doing another FFT
00322   // INCIPPLSTAT(incFFTs);
00323 
00324   // Check domain of incoming Field
00325   const Layout_t& in_layout = f.getLayout();
00326   const Domain_t& in_dom = in_layout.getDomain();
00327   PAssert(checkDomain(this->getDomain(),in_dom));
00328 
00329   // Common loop iterate and other vars:
00330   unsigned d;
00331   int idim;            // idim loops over the number of transform dims.
00332   int begdim, enddim;  // beginning and end of transform dim loop
00333   unsigned nTransformDims = this->numTransformDims();
00334   // Field* for temp Field management:
00335   ComplexField_t* temp = &f;
00336   // Local work array passed to FFT:
00337   Complex_t* localdata;
00338       
00339   // Loop over the dimensions be transformed:
00340   begdim = (direction == +1) ? 0 : (nTransformDims-1);
00341   enddim = (direction == +1) ? nTransformDims : -1;
00342   for (idim = begdim; idim != enddim; idim += direction) {
00343     TAU_PROFILE_START(timer_swap);
00344     // Now do the serial transforms along this dimension:
00345 
00346     bool skipTranspose = false;
00347     // if this is the first transform dimension, we might be able
00348     // to skip the transpose into the first temporary Field
00349     if (idim == begdim) {
00350       // get domain for comparison
00351       const Domain_t& first_dom = tempLayouts_m[idim]->getDomain();
00352       // check that zeroth axis is the same and is serial
00353       // and that there are no guard cells
00354       skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
00355                         (in_dom[0].length() == first_dom[0].length()) &&
00356                         (in_layout.getDistribution(0) == SERIAL) &&
00357                         (f.getGC() == FFT<CCTransform,Dim,T>::nullGC) );
00358     }
00359 
00360     // if this is the last transform dimension, we might be able
00361     // to skip the last temporary and transpose right into f
00362     if (idim == enddim-direction) {
00363       // get domain for comparison
00364       const Domain_t& last_dom = tempLayouts_m[idim]->getDomain();
00365       // check that zeroth axis is the same and is serial
00366       // and that there are no guard cells
00367       skipTranspose = ( (in_dom[0].sameBase(last_dom[0])) &&
00368                         (in_dom[0].length() == last_dom[0].length()) &&
00369                         (in_layout.getDistribution(0) == SERIAL) &&
00370                         (f.getGC() == FFT<CCTransform,Dim,T>::nullGC) );
00371     }
00372 
00373     if (!skipTranspose) {
00374       // transpose and permute to Field with transform dim first
00375       (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] = 
00376         (*temp)[temp->getLayout().getDomain()];
00377 
00378       // Compress out previous iterate's storage:
00379       if (this->compressTemps() && temp != &f) *temp = 0;
00380       temp = tempFields_m[idim];  // Field* management aid
00381     }
00382     else if (idim == enddim-direction && temp != &f) {
00383       // last transform and we can skip the last temporary field
00384       // so do the transpose here using f instead
00385 
00386       // transpose and permute to Field with transform dim first
00387       f[in_dom] = (*temp)[temp->getLayout().getDomain()];
00388 
00389       // Compress out previous iterate's storage:
00390       if (this->compressTemps()) *temp = 0;
00391       temp = &f;  // Field* management aid
00392     }
00393     TAU_PROFILE_STOP(timer_swap);
00394       
00395     TAU_PROFILE_START(timer_fft);
00396     // Loop over all the Vnodes, working on the LField in each.
00397     typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
00398     for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
00399 
00400       // Get the LField
00401       ComplexLField_t* ldf = (*l_i).second.get();
00402       // make sure we are uncompressed
00403       ldf->Uncompress();
00404       // get the raw data pointer
00405       localdata = ldf->getP();
00406 
00407       // Do 1D complex-to-complex FFT's on all the strips in the LField:
00408       int nstrips = 1, length = ldf->size(0);
00409       for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
00410       for (int istrip=0; istrip<nstrips; ++istrip) {
00411         // Do the 1D FFT:
00412         this->getEngine().callFFT(idim, direction, localdata);
00413         // advance the data pointer
00414         localdata += length;
00415       } // loop over 1D strips
00416     } // loop over all the LFields
00417     TAU_PROFILE_STOP(timer_fft);
00418 
00419   } // loop over all transformed dimensions 
00420 
00421   // skip final assignment and compress if we used f as final temporary
00422   if (temp != &f) {
00423     TAU_PROFILE_START(timer_swap);
00424     // Now assign back into original Field, and compress last temp's storage:
00425     f[in_dom] = (*temp)[temp->getLayout().getDomain()];
00426     if (this->compressTemps()) *temp = 0;
00427     TAU_PROFILE_STOP(timer_swap);
00428   }
00429 
00430   // Normalize:
00431   if (direction == +1)
00432     f *= Complex_t(this->getNormFact(), 0.0);
00433 
00434   return;
00435 }
00436 
00437 
00438 //=============================================================================
00439 // 1D FFT CCTransform Constructors
00440 //=============================================================================
00441 
00442 //-----------------------------------------------------------------------------
00443 // Create a new FFT object of type CCTransform, with a
00444 // given domain. Also specify which dimensions to transform along.
00445 //-----------------------------------------------------------------------------
00446 
00447 template <class T>
00448 FFT<CCTransform,1U,T>::FFT(
00449   const typename FFT<CCTransform,1U,T>::Domain_t& cdomain, 
00450   const bool transformTheseDims[1U], const bool& compressTemps)
00451   : FFTBase<1U,T>(FFT<CCTransform,1U,T>::ccFFT, cdomain,
00452                   transformTheseDims, compressTemps)
00453 {
00454 
00455   // Tau profiling
00456   TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
00457   TAU_TYPE_STRING(tautype, "(" + CT(cdomain) + ", const bool [], const bool&)");
00458   TAU_PROFILE(tauname, tautype, TAU_FFT);
00459 
00460   unsigned nTransformDims = 1U;
00461   // get axis length
00462   int length;
00463   length = cdomain[0].length();
00464 
00465   // get transform type for FFT Engine, compute normalization
00466   int transformType;
00467   transformType = FFTBase<1U,T>::ccFFT;  // all transforms are complex-to-complex
00468   T& normFact = this->getNormFact();
00469   normFact = 1.0 / length;
00470 
00471   // set up FFT Engine
00472   this->getEngine().setup(nTransformDims, &transformType, &length);
00473   // set up the temporary fields
00474   setup();
00475 }
00476 
00477 //-----------------------------------------------------------------------------
00478 // Create a new FFT object of type CCTransform, with a
00479 // given domain. Default case of transforming along all dimensions.
00480 //-----------------------------------------------------------------------------
00481 
00482 template <class T>
00483 FFT<CCTransform,1U,T>::FFT(
00484   const typename FFT<CCTransform,1U,T>::Domain_t& cdomain,
00485   const bool& compressTemps) 
00486   : FFTBase<1U,T>(FFT<CCTransform,1U,T>::ccFFT, cdomain, compressTemps)
00487 {
00488 
00489   // Tau profiling
00490   TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
00491   TAU_TYPE_STRING(tautype, "(" + CT(cdomain) + ", const bool&)");
00492   TAU_PROFILE(tauname, tautype, TAU_FFT); 
00493 
00494   // get axis length
00495   int length;
00496   length = cdomain[0].length();
00497 
00498   // get transform type for FFT Engine, compute normalization
00499   int transformType;
00500   transformType = FFTBase<1U,T>::ccFFT;  // all transforms are complex-to-complex
00501   T& normFact = this->getNormFact();
00502   normFact = 1.0 / length;
00503 
00504   // set up FFT Engine
00505   this->getEngine().setup(1U, &transformType, &length);
00506   // set up the temporary fields
00507   setup();
00508 }
00509 
00510 //-----------------------------------------------------------------------------
00511 // setup performs all the initializations necessary after the transform
00512 // directions have been specified.
00513 //-----------------------------------------------------------------------------
00514 
00515 template <class T>
00516 void
00517 FFT<CCTransform,1U,T>::setup(void)
00518 {
00519   // Tau profiling
00520   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::setup");
00521   TAU_TYPE_STRING(tautype, "(void)");
00522   TAU_PROFILE(tauname, tautype, TAU_FFT); 
00523 
00524   // Get input Field's domain
00525   const Domain_t& ndic = this->getDomain();
00526   // generate temporary field layout
00527   tempLayouts_m = new Layout_t(ndic[0], PARALLEL, 1);
00528   // generate temporary Field
00529   tempFields_m = new ComplexField_t(*tempLayouts_m);
00530   // If user requests no intermediate compression, uncompress right now:
00531   if (!this->compressTemps()) tempFields_m->Uncompress();
00532 
00533   return;
00534 }
00535 
00536 //-----------------------------------------------------------------------------
00537 // Destructor
00538 //-----------------------------------------------------------------------------
00539 
00540 template <class T>
00541 FFT<CCTransform,1U,T>::~FFT(void) {
00542 
00543   // Tau profiling
00544   TAU_TYPE_STRING(tauname, CT(*this) + "::~FFT");
00545   TAU_TYPE_STRING(tautype, "(void)");
00546   TAU_PROFILE(tauname, tautype, TAU_FFT); 
00547 
00548   // delete temporary fields and field layouts
00549   delete tempFields_m;
00550   delete tempLayouts_m;
00551 }
00552 
00553 
00554 //-----------------------------------------------------------------------------
00555 // do the CC FFT; separate input and output fields
00556 //-----------------------------------------------------------------------------
00557 
00558 template <class T>
00559 void
00560 FFT<CCTransform,1U,T>::transform(
00561   int direction,
00562   typename FFT<CCTransform,1U,T>::ComplexField_t& f,
00563   typename FFT<CCTransform,1U,T>::ComplexField_t& g,
00564   const bool& constInput)
00565 {
00566   // Tau profiling
00567   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
00568   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ", const bool&)");
00569   TAU_PROFILE(tauname, tautype, TAU_FFT);
00570   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
00571   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
00572 
00573   // indicate we're doing another FFT
00574   // INCIPPLSTAT(incFFTs);
00575 
00576   // Check domain of incoming Fields
00577   const Layout_t& in_layout = f.getLayout();
00578   const Domain_t& in_dom = in_layout.getDomain();
00579   const Layout_t& out_layout = g.getLayout();
00580   const Domain_t& out_dom = out_layout.getDomain();
00581   PAssert( checkDomain(this->getDomain(),in_dom) &&
00582            checkDomain(this->getDomain(),out_dom) );
00583 
00584   // Field* for temp Field management:
00585   ComplexField_t* temp = &f;
00586   // Local work array passed to FFT:
00587   Complex_t* localdata;
00588       
00589   TAU_PROFILE_START(timer_swap);
00590   // Now do the serial transforms along this dimension:
00591 
00592   // get temp domain for comparison
00593   const Domain_t& temp_dom = tempLayouts_m->getDomain();
00594 
00595   bool skipTranspose = false;
00596   // if this is the first transform dimension, we might be able
00597   // to skip the transpose into the first temporary Field
00598   if (!constInput) {
00599     // check that zeroth axis is the same, has one vnode,
00600     // and that there are no guard cells
00601     skipTranspose = ( (in_dom[0].sameBase(temp_dom[0])) &&
00602                       (in_dom[0].length() == temp_dom[0].length()) &&
00603                       (in_layout.numVnodes() == 1) &&
00604                       (f.getGC() == FFT<CCTransform,1U,T>::nullGC) );
00605   }
00606 
00607   bool skipFinal;
00608   // we might be able
00609   // to skip the last temporary and transpose right into g
00610 
00611   // check that zeroth axis is the same, has one vnode
00612   // and that there are no guard cells
00613   skipFinal = ( (out_dom[0].sameBase(temp_dom[0])) &&
00614                 (out_dom[0].length() == temp_dom[0].length()) &&
00615                 (out_layout.numVnodes() == 1) &&
00616                 (g.getGC() == FFT<CCTransform,1U,T>::nullGC) );
00617 
00618   if (!skipTranspose) {
00619     // assign to Field with proper layout
00620     (*tempFields_m) = (*temp);
00621     temp = tempFields_m;  // Field* management aid
00622   }
00623   if (skipFinal) {
00624     // we can skip the last temporary field
00625     // so do the transpose here using g instead
00626 
00627     // assign to Field with proper layout
00628     g = (*temp);
00629 
00630     // Compress out previous iterate's storage:
00631     if (this->compressTemps() && temp != &f) *temp = 0;
00632     temp = &g;  // Field* management aid
00633   }
00634   TAU_PROFILE_STOP(timer_swap);
00635       
00636   TAU_PROFILE_START(timer_fft);
00637 
00638   // should be only one LField!
00639   typename ComplexField_t::const_iterator_if l_i = temp->begin_if();
00640   if (l_i != temp->end_if()) {
00641     // Get the LField
00642     ComplexLField_t* ldf = (*l_i).second.get();
00643     // make sure we are uncompressed
00644     ldf->Uncompress();
00645     // get the raw data pointer
00646     localdata = ldf->getP();
00647 
00648     // Do 1D complex-to-complex FFT on the strip
00649     int length = ldf->size(0);
00650     // Do the 1D FFT:
00651     this->getEngine().callFFT(0, direction, localdata);
00652   }
00653 
00654   TAU_PROFILE_STOP(timer_fft);
00655 
00656   // skip final assignment and compress if we used g as final temporary
00657   if (temp != &g) {
00658     TAU_PROFILE_START(timer_swap);
00659     // Now assign into output Field, and compress last temp's storage:
00660     g = (*temp);
00661     if (this->compressTemps() && temp != &f) *temp = 0;
00662     TAU_PROFILE_STOP(timer_swap);
00663   }
00664 
00665   // Normalize:
00666   if (direction == +1)
00667     g *= Complex_t(this->getNormFact(), 0.0);
00668 
00669   return;
00670 }
00671 
00672 //-----------------------------------------------------------------------------
00673 // "in-place" FFT; specify +1 or -1 to indicate forward or inverse transform.
00674 //-----------------------------------------------------------------------------
00675 
00676 template <class T>
00677 void
00678 FFT<CCTransform,1U,T>::transform(
00679   int direction,
00680   typename FFT<CCTransform,1U,T>::ComplexField_t& f)
00681 {
00682   // Tau profiling
00683   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
00684   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ")");
00685   TAU_PROFILE(tauname, tautype, TAU_FFT);
00686   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
00687   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
00688 
00689   // indicate we're doing another FFT
00690   // INCIPPLSTAT(incFFTs);
00691 
00692   // Check domain of incoming Field
00693   const Layout_t& in_layout = f.getLayout();
00694   const Domain_t& in_dom = in_layout.getDomain();
00695   PAssert(checkDomain(this->getDomain(),in_dom));
00696 
00697   // Field* for temp Field management:
00698   ComplexField_t* temp = &f;
00699   // Local work array passed to FFT:
00700   Complex_t* localdata;
00701       
00702   TAU_PROFILE_START(timer_swap);
00703   // Now do the serial transforms along this dimension:
00704 
00705   // get domain for comparison
00706   const Domain_t& temp_dom = tempLayouts_m->getDomain();
00707 
00708   bool skipTranspose;
00709   // we might be able
00710   // to skip the transpose into the first temporary Field
00711 
00712   // check that zeroth axis is the same, has one vnode,
00713   // and that there are no guard cells
00714   skipTranspose = ( (in_dom[0].sameBase(temp_dom[0])) &&
00715                     (in_dom[0].length() == temp_dom[0].length()) &&
00716                     (in_layout.numVnodes() == 1) &&
00717                     (f.getGC() == FFT<CCTransform,1U,T>::nullGC) );
00718 
00719   if (!skipTranspose) {
00720     // assign to Field with proper layout
00721     (*tempFields_m) = (*temp);
00722     temp = tempFields_m;  // Field* management aid
00723   }
00724   TAU_PROFILE_STOP(timer_swap);
00725       
00726   TAU_PROFILE_START(timer_fft);
00727 
00728   // should be only one LField!
00729   typename ComplexField_t::const_iterator_if l_i = temp->begin_if();
00730   if (l_i != temp->end_if()) {
00731     // Get the LField
00732     ComplexLField_t* ldf = (*l_i).second.get();
00733     // make sure we are uncompressed
00734     ldf->Uncompress();
00735     // get the raw data pointer
00736     localdata = ldf->getP();
00737 
00738     // Do 1D complex-to-complex FFT on the strip
00739     int length = ldf->size(0);
00740     // Do the 1D FFT:
00741     this->getEngine().callFFT(0, direction, localdata);
00742   }
00743 
00744   TAU_PROFILE_STOP(timer_fft);
00745 
00746   // skip final assignment and compress if we used f as final temporary
00747   if (temp != &f) {
00748     TAU_PROFILE_START(timer_swap);
00749     // Now assign back into original Field, and compress last temp's storage:
00750     f = (*temp);
00751     if (this->compressTemps()) *temp = 0;
00752     TAU_PROFILE_STOP(timer_swap);
00753   }
00754 
00755   // Normalize:
00756   if (direction == +1)
00757     f *= Complex_t(this->getNormFact(), 0.0);
00758 
00759   return;
00760 }
00761 
00762 
00763 
00764 //=============================================================================
00765 // FFT RCTransform Constructors
00766 //=============================================================================
00767 
00768 //-----------------------------------------------------------------------------
00769 // Create a new FFT object of type RCTransform, with a
00770 // given domain. Also specify which dimensions to transform along.
00771 // Note that RC transform of a real array of length n results in a
00772 // complex array of length n/2+1.
00773 //-----------------------------------------------------------------------------
00774 
00775 template <unsigned Dim, class T>
00776 FFT<RCTransform,Dim,T>::FFT(
00777   const typename FFT<RCTransform,Dim,T>::Domain_t& rdomain,
00778   const typename FFT<RCTransform,Dim,T>::Domain_t& cdomain,
00779   const bool transformTheseDims[Dim], const bool& compressTemps)
00780   : FFTBase<Dim,T>(FFT<RCTransform,Dim,T>::rcFFT, rdomain,
00781                    transformTheseDims, compressTemps), 
00782     complexDomain_m(cdomain), serialAxes_m(1)
00783 {
00784   // Tau profiling
00785   TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
00786   TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", " + CT(cdomain) +
00787                   ", const bool [], const bool&)");
00788   TAU_PROFILE(tauname, tautype, TAU_FFT);
00789 
00790   // construct array of axis lengths
00791   unsigned nTransformDims = this->numTransformDims();
00792   int* lengths = new int[nTransformDims];
00793   unsigned d;
00794   for (d=0; d<nTransformDims; ++d)
00795     lengths[d] = rdomain[this->activeDimension(d)].length();
00796 
00797   // construct array of transform types for FFT Engine, compute normalization
00798   int* transformTypes = new int[nTransformDims];
00799   T& normFact = this->getNormFact();
00800   normFact = 1.0;
00801   transformTypes[0] = FFTBase<Dim,T>::rcFFT;    // first transform is real-to-complex
00802   normFact /= lengths[0];
00803   for (d=1; d<nTransformDims; ++d) {
00804     transformTypes[d] = FFTBase<Dim,T>::ccFFT;  // all other transforms are complex-to-complex
00805     normFact /= lengths[d];
00806   }
00807 
00808   // set up FFT Engine
00809   this->getEngine().setup(nTransformDims, transformTypes, lengths);
00810   delete [] transformTypes;
00811   delete [] lengths;
00812 
00813   // set up the temporary fields
00814   setup();
00815 }
00816 
00817 //-----------------------------------------------------------------------------
00818 // Create a new FFT object of type RCTransform, with
00819 // given real and complex domains. Default: transform along all dimensions.
00820 //-----------------------------------------------------------------------------
00821 
00822 template <unsigned Dim, class T>
00823 FFT<RCTransform,Dim,T>::FFT(
00824   const typename FFT<RCTransform,Dim,T>::Domain_t& rdomain,
00825   const typename FFT<RCTransform,Dim,T>::Domain_t& cdomain,
00826   const bool& compressTemps,
00827   int serialAxes)
00828   : FFTBase<Dim,T>(FFT<RCTransform,Dim,T>::rcFFT, rdomain, compressTemps),
00829     complexDomain_m(cdomain), serialAxes_m(serialAxes)
00830 {
00831   // Tau profiling
00832   TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
00833   TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", " + CT(cdomain) + ", const bool&)");
00834   TAU_PROFILE(tauname, tautype, TAU_FFT); 
00835 
00836   // construct array of axis lengths
00837   int lengths[Dim];
00838   unsigned d;
00839   for (d=0; d<Dim; ++d)
00840     lengths[d] = rdomain[d].length();
00841 
00842   // construct array of transform types for FFT Engine, compute normalization
00843   int transformTypes[Dim];
00844   T& normFact = this->getNormFact();
00845   normFact = 1.0;
00846   transformTypes[0] = FFTBase<Dim,T>::rcFFT;    // first transform is real-to-complex
00847   normFact /= lengths[0];
00848   for (d=1; d<Dim; ++d) {
00849     transformTypes[d] = FFTBase<Dim,T>::ccFFT;  // all other transforms are complex-to-complex
00850     normFact /= lengths[d];
00851   }
00852 
00853   // set up FFT Engine
00854   this->getEngine().setup(Dim, transformTypes, lengths);
00855 
00856   // set up the temporary fields
00857   setup();
00858 }
00859 
00860 //-----------------------------------------------------------------------------
00861 // setup performs all the initializations necessary after the transform
00862 // directions have been specified.
00863 //-----------------------------------------------------------------------------
00864 
00865 template <unsigned Dim, class T>
00866 void
00867 FFT<RCTransform,Dim,T>::setup(void) {
00868 
00869   // Tau profiling
00870   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::setup");
00871   TAU_TYPE_STRING(tautype, "(void)");
00872   TAU_PROFILE(tauname, tautype, TAU_FFT); 
00873 
00874   PAssert(serialAxes_m > 0 && serialAxes_m < Dim);
00875 
00876   unsigned d, d2, activeDim;
00877   unsigned nTransformDims = this->numTransformDims();
00878 
00879   // Set up the arrays of temporary Fields and FieldLayouts:
00880 
00881   // make first dimension(s) always SERIAL, all other dimensions parallel
00882   // for the real FFT; make first serialAxes_m axes serial for others
00883   e_dim_tag serialParallel[Dim];
00884   e_dim_tag NserialParallel[Dim];
00885   for (d=0; d < Dim; ++d) {
00886     serialParallel[d] = (d == 0 ? SERIAL : PARALLEL);
00887     NserialParallel[d] = (d < serialAxes_m ? SERIAL : PARALLEL);
00888   }
00889 
00890   // check that domain lengths agree between real and complex domains
00891   const Domain_t& domain = this->getDomain();
00892   activeDim = this->activeDimension(0);
00893   bool match = true;
00894   for (d=0; d<Dim; ++d) {
00895     if (d == activeDim) {
00896       // real array length n, complex array length n/2+1
00897       if ( complexDomain_m[d].length() !=
00898            (domain[d].length()/2 + 1) ) match = false;
00899     }
00900     else {
00901       // real and complex arrays should be same length for all other dims
00902       if (complexDomain_m[d].length() != domain[d].length()) match = false;
00903     }
00904   }
00905   PInsist(match,
00906           "Domains provided for real and complex Fields are incompatible!");
00907 
00908   // allocate arrays of temp fields and layouts for complex fields
00909   tempLayouts_m = new Layout_t*[nTransformDims];
00910   tempFields_m = new ComplexField_t*[nTransformDims];
00911 
00912   // set up the single temporary real field, with first dim serial, others par
00913 
00914   // make new domains with permuted Indexes, activeDim first
00915   Domain_t ndip;
00916   Domain_t ndipc;
00917   ndip[0] = domain[activeDim];
00918   ndipc[0] = complexDomain_m[activeDim];
00919   for (d=1; d<Dim; ++d) {
00920     int nextDim = activeDim + d;
00921     if (nextDim >= Dim) nextDim -= Dim;
00922     ndip[d] = domain[nextDim];
00923     ndipc[d] = complexDomain_m[nextDim];
00924   }
00925 
00926   // generate layout and object for temporary real field
00927   tempRLayout_m = new Layout_t(ndip, serialParallel, this->transVnodes());
00928   tempRField_m = new RealField_t(*tempRLayout_m);
00929 
00930   // generate layout and object for first temporary complex Field
00931   tempLayouts_m[0] = new Layout_t(ndipc, serialParallel, this->transVnodes());
00932   tempFields_m[0] = new ComplexField_t(*tempLayouts_m[0]);
00933 
00934   // determine the order in which dimensions will be transposed.  Put
00935   // the transposed dims first, and the others at the end.
00936   int fftorder[Dim], tmporder[Dim];
00937   int nofft = nTransformDims;
00938   for (d=0; d < nTransformDims; ++d)
00939     fftorder[d] = this->activeDimension(d);
00940   for (d=0; d < Dim; ++d) {
00941     // see if the dth dimension is one to transform
00942     bool active = false;
00943     for (d2=0; d2 < nTransformDims; ++d2) {
00944       if (this->activeDimension(d2) == d) {
00945         active = true;
00946         break;
00947       }
00948     }
00949 
00950     if (!active)
00951       // no it is not; put it at the bottom of list
00952       fftorder[nofft++] = d;
00953   }
00954 
00955   // But since the first FFT is done on a S,[P,P,...] field, permute
00956   // the order of this to get the first activeDimension at the end.
00957   nofft = fftorder[0];
00958   for (d=0; d < (Dim - 1); ++d)
00959     fftorder[d] = fftorder[d+1];
00960   fftorder[Dim-1] = nofft;
00961 
00962   // now construct the remaining temporary complex fields
00963 
00964   // loop through and create actual permuted layouts, and also fields
00965   int dim = 1;                  // already have one temp field
00966   while (dim < nTransformDims) {
00967 
00968     int sp;
00969     for (sp=0; sp < serialAxes_m && dim < nTransformDims; ++sp, ++dim) {
00970 
00971       // make new domain with permuted Indexes
00972       for (d=0; d < Dim; ++d)
00973         ndip[d] = complexDomain_m[fftorder[d]];
00974 
00975       // generate layout and object for temporary complex Field
00976       tempLayouts_m[dim] = new Layout_t(ndip, NserialParallel, this->transVnodes());
00977       tempFields_m[dim] = new ComplexField_t(*tempLayouts_m[dim]);
00978 
00979       // permute the fft order for the first 'serialAxes_m' axes
00980       if (serialAxes_m > 1) {
00981         tmporder[0] = fftorder[0];
00982         for (d=0; d < (serialAxes_m-1); ++d)
00983           fftorder[d] = fftorder[d+1];
00984         fftorder[serialAxes_m - 1] = tmporder[0];
00985       }
00986     }
00987 
00988     // now, permute ALL the axes by serialAxes_m steps, to get the next
00989     // set of axes in the first n serial slots
00990     for (d=0; d < Dim; ++d)
00991       tmporder[d] = fftorder[d];
00992     for (d=0; d < Dim; ++d)
00993       fftorder[d] = tmporder[(d + serialAxes_m) % Dim];
00994   }
00995 }
00996 
00997 
00998 //-----------------------------------------------------------------------------
00999 // Destructor
01000 //-----------------------------------------------------------------------------
01001 
01002 template <unsigned Dim, class T>
01003 FFT<RCTransform,Dim,T>::~FFT(void) {
01004 
01005   // Tau profiling
01006   TAU_TYPE_STRING(tauname, CT(*this) + "::~FFT");
01007   TAU_TYPE_STRING(tautype, "(void)");
01008   TAU_PROFILE(tauname, tautype, TAU_FFT); 
01009 
01010   // delete temporary fields and layouts
01011   unsigned nTransformDims = this->numTransformDims();
01012   for (unsigned d=0; d<nTransformDims; ++d) {
01013     delete tempFields_m[d];
01014     delete tempLayouts_m[d];
01015   }
01016   delete [] tempFields_m;
01017   delete [] tempLayouts_m;
01018   delete tempRField_m;
01019   delete tempRLayout_m;
01020 }
01021 
01022 //-----------------------------------------------------------------------------
01023 // real-to-complex FFT; direction is +1 or -1
01024 //-----------------------------------------------------------------------------
01025 
01026 template <unsigned Dim, class T>
01027 void
01028 FFT<RCTransform,Dim,T>::transform(
01029   int direction,
01030   typename FFT<RCTransform,Dim,T>::RealField_t& f,
01031   typename FFT<RCTransform,Dim,T>::ComplexField_t& g,
01032   const bool& constInput)
01033 {
01034   // Tau profiling
01035   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
01036   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ", const bool&)");
01037   TAU_PROFILE(tauname, tautype, TAU_FFT);
01038   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
01039   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
01040 
01041   // IPPL timers
01042   static IpplTimings::TimerRef tr1=IpplTimings::getTimer("RC-RRtranspose");
01043   static IpplTimings::TimerRef tr2=IpplTimings::getTimer("RC-CCtranspose");
01044   static IpplTimings::TimerRef ffttr1=IpplTimings::getTimer("RC-RC1Dfft");
01045   static IpplTimings::TimerRef ffttr2=IpplTimings::getTimer("RC-CC1Dfft");
01046   static IpplTimings::TimerRef tottimer=IpplTimings::getTimer("RC-total");
01047   FFTDBG(Inform tmsg("FFT-RC-forward"));
01048 
01049   // time the whole mess
01050   IpplTimings::startTimer(tottimer);
01051 
01052   // indicate we're doing another FFT
01053   // INCIPPLSTAT(incFFTs);
01054 
01055   // Check domain of incoming Fields
01056   const Layout_t& in_layout = f.getLayout();
01057   const Domain_t& in_dom = in_layout.getDomain();
01058   const Layout_t& out_layout = g.getLayout();
01059   const Domain_t& out_dom = out_layout.getDomain();
01060   PAssert( checkDomain(this->getDomain(),in_dom) &&
01061            checkDomain(complexDomain_m,out_dom) );
01062 
01063   // Common loop iterate and other vars:
01064   unsigned d;
01065   int idim;      // idim loops over the number of transform dims.
01066   unsigned nTransformDims = this->numTransformDims();
01067 
01068   // handle first RC transform separately
01069   idim = 0;
01070 
01071   RealField_t* tempR = tempRField_m;  // Field* management aid
01072   if (!constInput) {
01073     // see if we can use input field f as a temporary
01074     bool skipTemp = true;
01075 
01076     // more rigorous match required here; check that layouts are identical
01077     if ( !(in_layout == *tempRLayout_m) ) {
01078       skipTemp = false;
01079     } else {
01080       // make sure distributions match
01081       for (d=0; d<Dim; ++d)
01082         if (in_layout.getDistribution(d) != tempRLayout_m->getDistribution(d))
01083           skipTemp = false;
01084 
01085       // make sure vnode counts match
01086       if (in_layout.numVnodes() != tempRLayout_m->numVnodes())
01087         skipTemp = false;
01088 
01089       // also make sure there are no guard cells
01090       if (!(f.getGC() == FFT<RCTransform,Dim,T>::nullGC))
01091         skipTemp = false;
01092     }
01093 
01094     // if we can skip using this temporary, set the tempR pointer to the
01095     // original incoming field.  Otherwise, it will stay pointing at the
01096     // temporary real field, and we'll need to do a transpose of the data
01097     // from the original into the temporary.
01098     if (skipTemp)
01099       tempR = &f;
01100   }
01101 
01102   // if we're not using input as a temporary ...
01103   if (tempR != &f) {
01104     TAU_PROFILE_START(timer_swap);
01105 
01106     // transpose and permute to real Field with transform dim first
01107     FFTDBG(tmsg << "Doing transpose of real field into temporary ");
01108     FFTDBG(tmsg << "with layout = " << tempR->getLayout() << endl);
01109     IpplTimings::startTimer(tr1);
01110     (*tempR)[tempR->getDomain()] = f[in_dom];
01111     IpplTimings::stopTimer(tr1);
01112 
01113     TAU_PROFILE_STOP(timer_swap);
01114   }
01115 
01116   // Field* for temp Field management:
01117   ComplexField_t* temp = tempFields_m[0];
01118 
01119   // see if we can put final result directly into g.  This is useful if
01120   // we're doing just a 1D FFT of one dimension of a multi-dimensional field.
01121   if (nTransformDims == 1) {  // only a single RC transform
01122     bool skipTemp = true;
01123 
01124     // more rigorous match required here; check that layouts are identical
01125     if (!(out_layout == *tempLayouts_m[0])) {
01126       skipTemp = false;
01127     } else {
01128       for (d=0; d<Dim; ++d)
01129         if (out_layout.getDistribution(d) !=
01130             tempLayouts_m[0]->getDistribution(d))
01131           skipTemp = false;
01132 
01133       if ( out_layout.numVnodes() != tempLayouts_m[0]->numVnodes() )
01134         skipTemp = false;
01135 
01136       // also make sure there are no guard cells
01137       if (!(g.getGC() == FFT<RCTransform,Dim,T>::nullGC))
01138         skipTemp = false;
01139 
01140       // if we can skip using the temporary, set the pointer to the output
01141       // field for the first FFT to the second provided field (g)
01142       if (skipTemp)
01143         temp = &g;
01144     }
01145   }
01146 
01147   FFTDBG(tmsg << "Doing real->complex fft of first dimension ..." << endl);
01148   TAU_PROFILE_START(timer_fft);
01149   IpplTimings::startTimer(ffttr1);
01150 
01151   // Loop over all the Vnodes, working on the LField in each.
01152   typename RealField_t::const_iterator_if rl_i, rl_end = tempR->end_if();
01153   typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
01154   for (rl_i = tempR->begin_if(); rl_i != rl_end; ++rl_i, ++cl_i) {
01155     // Get the LFields
01156     RealLField_t* rldf = (*rl_i).second.get();
01157     ComplexLField_t* cldf = (*cl_i).second.get();
01158 
01159     // make sure we are uncompressed
01160     rldf->Uncompress();
01161     cldf->Uncompress();
01162 
01163     // get the raw data pointers
01164     T* localreal = rldf->getP();
01165     Complex_t* localcomp = cldf->getP();
01166 
01167     // number of strips should be the same for real and complex LFields!
01168     int nstrips = 1, lengthreal = rldf->size(0), lengthcomp = cldf->size(0);
01169     for (d=1; d<Dim; ++d)
01170       nstrips *= rldf->size(d);
01171 
01172     for (int istrip=0; istrip<nstrips; ++istrip) {
01173       // move the data into the complex strip, which is two reals longer
01174       for (int ilen=0; ilen<lengthreal; ilen+=2) 
01175         localcomp[ilen/2] = Complex_t(localreal[ilen],localreal[ilen+1]);
01176 
01177       // Do the 1D real-to-complex FFT:
01178       // note that real-to-complex FFT direction is always +1
01179       this->getEngine().callFFT(idim, +1, localcomp);
01180 
01181       // advance the data pointers
01182       localreal += lengthreal;
01183       localcomp += lengthcomp;
01184     } // loop over 1D strips
01185   } // loop over all the LFields
01186 
01187   IpplTimings::stopTimer(ffttr1);
01188   TAU_PROFILE_STOP(timer_fft);
01189 
01190   // compress temporary storage
01191   if (this->compressTemps() && tempR != &f)
01192     *tempR = 0;
01193 
01194   // now proceed with the other complex-to-complex transforms
01195 
01196   // Local work array passed to FFT:
01197   Complex_t* localdata;
01198       
01199   // Loop over the remaining dimensions to be transformed:
01200   for (idim = 1; idim < nTransformDims; ++idim) {
01201     TAU_PROFILE_START(timer_swap);
01202     bool skipTranspose = false;
01203 
01204     // if this is the last transform dimension, we might be able
01205     // to skip the last temporary and transpose right into g
01206     if (idim == nTransformDims-1) {
01207       // get the domain for comparison
01208       const Domain_t& last_dom = tempLayouts_m[idim]->getDomain();
01209 
01210       // make sure there are no guard cells, and that the first
01211       // axis matches what we expect and is serial.  Only need to
01212       // check first axis since we're just FFT'ing that one dimension.
01213       skipTranspose = (g.getGC() == FFT<RCTransform,Dim,T>::nullGC &&
01214                        out_dom[0].sameBase(last_dom[0]) &&
01215                        out_dom[0].length() == last_dom[0].length() &&
01216                        out_layout.getDistribution(0) == SERIAL);
01217     }
01218 
01219     if (!skipTranspose) {
01220       // transpose and permute to Field with transform dim first
01221       FFTDBG(tmsg << "Doing complex->complex transpose into field ");
01222       FFTDBG(tmsg << "with layout = " << tempFields_m[idim]->getLayout());
01223       FFTDBG(tmsg << endl);
01224       IpplTimings::startTimer(tr2);
01225       (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] = 
01226         (*temp)[temp->getLayout().getDomain()];
01227       IpplTimings::stopTimer(tr2);
01228 
01229       // Compress out previous iterate's storage:
01230       if (this->compressTemps())
01231         *temp = 0;
01232       temp = tempFields_m[idim];  // Field* management aid
01233 
01234     } else if (idim == nTransformDims-1) {
01235       // last transform and we can skip the last temporary field
01236       // so do the transpose here using g instead
01237 
01238       // transpose and permute to Field with transform dim first
01239       FFTDBG(tmsg << "Doing final complex->complex transpose ");
01240       FFTDBG(tmsg << "into return ");
01241       FFTDBG(tmsg << "with layout = " << g.getLayout());
01242       FFTDBG(tmsg << endl);
01243       IpplTimings::startTimer(tr2);
01244       g[out_dom] = (*temp)[temp->getLayout().getDomain()];
01245       IpplTimings::stopTimer(tr2);
01246 
01247       // Compress out previous iterate's storage:
01248       if (this->compressTemps())
01249         *temp = 0;
01250       temp = &g;  // Field* management aid
01251     }
01252     TAU_PROFILE_STOP(timer_swap);
01253 
01254     FFTDBG(tmsg << "Doing complex->complex fft of other dimension .." << endl);
01255     TAU_PROFILE_START(timer_fft);
01256     IpplTimings::startTimer(ffttr2);
01257 
01258     // Loop over all the Vnodes, working on the LField in each.
01259     typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
01260     for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
01261       // Get the LField
01262       ComplexLField_t* ldf = (*l_i).second.get();
01263 
01264       // make sure we are uncompressed
01265       ldf->Uncompress();
01266 
01267       // get the raw data pointer
01268       localdata = ldf->getP();
01269 
01270       // Do 1D complex-to-complex FFT's on all the strips in the LField:
01271       int nstrips = 1, length = ldf->size(0);
01272       for (d=1; d<Dim; ++d)
01273         nstrips *= ldf->size(d);
01274 
01275       for (int istrip=0; istrip<nstrips; ++istrip) {
01276         // Do the 1D FFT:
01277         this->getEngine().callFFT(idim, direction, localdata);
01278 
01279         // advance the data pointer
01280         localdata += length;
01281       } // loop over 1D strips
01282     } // loop over all the LFields
01283 
01284     IpplTimings::stopTimer(ffttr2);
01285     TAU_PROFILE_STOP(timer_fft);
01286   } // loop over all transformed dimensions 
01287 
01288   // skip final assignment and compress if we used g as final temporary
01289   if (temp != &g) {
01290     TAU_PROFILE_START(timer_swap);
01291 
01292     // Now assign into output Field, and compress last temp's storage:
01293     FFTDBG(tmsg << "Doing cleanup complex->complex transpose ");
01294     FFTDBG(tmsg << "into return ");
01295     FFTDBG(tmsg << "with layout = " << g.getLayout());
01296     FFTDBG(tmsg << endl);
01297     IpplTimings::startTimer(tr2);
01298     g[out_dom] = (*temp)[temp->getLayout().getDomain()];
01299     IpplTimings::stopTimer(tr2);
01300 
01301     if (this->compressTemps()) *temp = 0;
01302     TAU_PROFILE_STOP(timer_swap);
01303   }
01304 
01305   // Normalize:
01306   if (direction == +1) g = g * this->getNormFact();
01307 
01308   // finish timing the whole mess
01309   IpplTimings::stopTimer(tottimer);
01310 }
01311 
01312 //-----------------------------------------------------------------------------
01313 // RC FFT; opposite direction, from complex to real
01314 //-----------------------------------------------------------------------------
01315 
01316 template <unsigned Dim, class T>
01317 void
01318 FFT<RCTransform,Dim,T>::transform(
01319   int direction,
01320   typename FFT<RCTransform,Dim,T>::ComplexField_t& f,
01321   typename FFT<RCTransform,Dim,T>::RealField_t& g,
01322   const bool& constInput)
01323 {
01324   // Tau profiling
01325   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
01326   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ", const bool&)");
01327   TAU_PROFILE(tauname, tautype, TAU_FFT);
01328   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
01329   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
01330 
01331   // IPPL timers
01332   static IpplTimings::TimerRef tr1=IpplTimings::getTimer("RC-RRtranspose");
01333   static IpplTimings::TimerRef tr2=IpplTimings::getTimer("RC-CCtranspose");
01334   static IpplTimings::TimerRef ffttr1=IpplTimings::getTimer("RC-RC1Dfft");
01335   static IpplTimings::TimerRef ffttr2=IpplTimings::getTimer("RC-CC1Dfft");
01336   static IpplTimings::TimerRef tottimer=IpplTimings::getTimer("RC-total");
01337   FFTDBG(Inform tmsg("FFT-RC-reverse"));
01338 
01339   // time the whole mess
01340   IpplTimings::startTimer(tottimer);
01341 
01342   // indicate we're doing another FFT
01343   // INCIPPLSTAT(incFFTs);
01344 
01345   // Check domain of incoming Fields
01346   const Layout_t& in_layout = f.getLayout();
01347   const Domain_t& in_dom = in_layout.getDomain();
01348   const Layout_t& out_layout = g.getLayout();
01349   const Domain_t& out_dom = out_layout.getDomain();
01350   PAssert( checkDomain(complexDomain_m,in_dom) &&
01351            checkDomain(this->getDomain(),out_dom) );
01352 
01353   // Common loop iterate and other vars:
01354   unsigned d;
01355   int idim;      // idim loops over the number of transform dims.
01356   unsigned nTransformDims = this->numTransformDims();
01357 
01358   // proceed with the complex-to-complex transforms
01359 
01360   // Field* for temp Field management:
01361   ComplexField_t* temp = &f;
01362 
01363   // Local work array passed to FFT:
01364   Complex_t* localdata;
01365       
01366   // Loop over all dimensions to be transformed except last one:
01367   for (idim = nTransformDims-1; idim != 0; --idim) {
01368     TAU_PROFILE_START(timer_swap);
01369     // Now do the serial transforms along this dimension:
01370 
01371     bool skipTranspose = false;
01372     // if this is the first transform dimension, we might be able
01373     // to skip the transpose into the first temporary Field
01374     if (idim == nTransformDims-1 && !constInput) {
01375       // get domain for comparison
01376       const Domain_t& first_dom = tempLayouts_m[idim]->getDomain();
01377       // check that zeroth axis is the same and is serial
01378       // and that there are no guard cells
01379       skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
01380                         (in_dom[0].length() == first_dom[0].length()) &&
01381                         (in_layout.getDistribution(0) == SERIAL) &&
01382                         (f.getGC() == FFT<RCTransform,Dim,T>::nullGC) );
01383     }
01384 
01385     if (!skipTranspose) {
01386       // transpose and permute to Field with transform dim first
01387       FFTDBG(tmsg << "Doing complex->complex transpose into field ");
01388       FFTDBG(tmsg << "with layout = "<<tempFields_m[idim]->getLayout()<<endl);
01389       IpplTimings::startTimer(tr2);
01390       (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] = 
01391         (*temp)[temp->getLayout().getDomain()];
01392       IpplTimings::stopTimer(tr2);
01393 
01394       // Compress out previous iterate's storage:
01395       if (this->compressTemps() && temp != &f)
01396         *temp = 0;
01397       temp = tempFields_m[idim];  // Field* management aid
01398     }
01399     TAU_PROFILE_STOP(timer_swap);
01400 
01401     FFTDBG(tmsg << "Doing complex->complex fft of other dimension .." << endl);
01402     TAU_PROFILE_START(timer_fft);
01403     IpplTimings::startTimer(ffttr2);
01404 
01405     // Loop over all the Vnodes, working on the LField in each.
01406     typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
01407     for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
01408 
01409       // Get the LField
01410       ComplexLField_t* ldf = (*l_i).second.get();
01411 
01412       // make sure we are uncompressed
01413       ldf->Uncompress();
01414 
01415       // get the raw data pointer
01416       localdata = ldf->getP();
01417 
01418       // Do 1D complex-to-complex FFT's on all the strips in the LField:
01419       int nstrips = 1, length = ldf->size(0);
01420       for (d=1; d<Dim; ++d)
01421         nstrips *= ldf->size(d);
01422 
01423       for (int istrip=0; istrip<nstrips; ++istrip) {
01424         // Do the 1D FFT:
01425         this->getEngine().callFFT(idim, direction, localdata);
01426 
01427         // advance the data pointer
01428         localdata += length;
01429       } // loop over 1D strips
01430     } // loop over all the LFields
01431 
01432     IpplTimings::stopTimer(ffttr2);
01433     TAU_PROFILE_STOP(timer_fft);
01434   } // loop over all transformed dimensions 
01435 
01436   // handle last CR transform separately
01437   idim = 0;
01438 
01439   // see if we can put final result directly into g
01440   RealField_t* tempR;
01441   bool skipTemp = true;
01442 
01443   // more rigorous match required here; check that layouts are identical
01444   if (!(out_layout == *tempRLayout_m)) {
01445     skipTemp = false;
01446   } else {
01447     for (d=0; d<Dim; ++d)
01448       if (out_layout.getDistribution(d) != tempRLayout_m->getDistribution(d))
01449         skipTemp = false;
01450 
01451     if ( out_layout.numVnodes() != tempRLayout_m->numVnodes() )
01452       skipTemp = false;
01453 
01454     // also make sure there are no guard cells
01455     if (!(g.getGC() == FFT<RCTransform,Dim,T>::nullGC))
01456       skipTemp = false;
01457   }
01458 
01459   if (skipTemp)
01460     tempR = &g;
01461   else 
01462     tempR = tempRField_m;
01463 
01464   skipTemp = true;
01465   if (nTransformDims == 1 && !constInput) {
01466     // only one CR transform
01467     // see if we really need to transpose input data
01468     // more rigorous match required here; check that layouts are identical
01469     if (!(in_layout == *tempLayouts_m[0])) {
01470       skipTemp = false;
01471     } else {
01472       for (d=0; d<Dim; ++d)
01473         if (in_layout.getDistribution(d) !=
01474             tempLayouts_m[0]->getDistribution(d))
01475           skipTemp = false;
01476 
01477       if ( in_layout.numVnodes() != tempLayouts_m[0]->numVnodes() )
01478         skipTemp = false;
01479 
01480       // also make sure there are no guard cells
01481       if (!(f.getGC() == FFT<RCTransform,Dim,T>::nullGC))
01482         skipTemp = false;
01483     }
01484   } else {  // cannot skip transpose
01485     skipTemp = false;
01486   }
01487 
01488   TAU_PROFILE_START(timer_swap);
01489   if (!skipTemp) {
01490     // transpose and permute to complex Field with transform dim first
01491     FFTDBG(tmsg << "Doing final complex->complex transpose into field ");
01492     FFTDBG(tmsg << "with layout = "<<tempFields_m[0]->getLayout()<<endl);
01493     IpplTimings::startTimer(tr2);
01494     (*tempFields_m[0])[tempLayouts_m[0]->getDomain()] =
01495       (*temp)[temp->getLayout().getDomain()];
01496     IpplTimings::stopTimer(tr2);
01497 
01498     // compress previous iterates storage
01499     if (this->compressTemps() && temp != &f)
01500       *temp = 0;
01501     temp = tempFields_m[0];
01502   }
01503   TAU_PROFILE_STOP(timer_swap);
01504 
01505   FFTDBG(tmsg << "Doing complex->real fft of final dimension ..." << endl);
01506   TAU_PROFILE_START(timer_fft);
01507   IpplTimings::startTimer(ffttr1);
01508 
01509   // Loop over all the Vnodes, working on the LField in each.
01510   typename RealField_t::const_iterator_if rl_i, rl_end = tempR->end_if();
01511   typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
01512   for (rl_i = tempR->begin_if(); rl_i != rl_end; ++rl_i, ++cl_i) {
01513     // Get the LFields
01514     RealLField_t* rldf = (*rl_i).second.get();
01515     ComplexLField_t* cldf = (*cl_i).second.get();
01516 
01517     // make sure we are uncompressed
01518     rldf->Uncompress();
01519     cldf->Uncompress();
01520 
01521     // get the raw data pointers
01522     T* localreal = rldf->getP();
01523     Complex_t* localcomp = cldf->getP();
01524 
01525     // number of strips should be the same for real and complex LFields!
01526     int nstrips = 1, lengthreal = rldf->size(0), lengthcomp = cldf->size(0);
01527     for (d=1; d<Dim; ++d)
01528       nstrips *= rldf->size(d);
01529 
01530     for (int istrip=0; istrip<nstrips; ++istrip) {
01531       // Do the 1D complex-to-real FFT:
01532       // note that complex-to-real FFT direction is always -1
01533       this->getEngine().callFFT(idim, -1, localcomp);
01534 
01535       // move the data into the real strip, which is two reals shorter
01536       for (int ilen=0; ilen<lengthreal; ilen+=2) {
01537         localreal[ilen] = real(localcomp[ilen/2]);
01538         localreal[ilen+1] = imag(localcomp[ilen/2]);
01539       }
01540 
01541       // advance the data pointers
01542       localreal += lengthreal;
01543       localcomp += lengthcomp;
01544     } // loop over 1D strips
01545   } // loop over all the LFields
01546 
01547   IpplTimings::stopTimer(ffttr1);
01548   TAU_PROFILE_STOP(timer_fft);
01549 
01550   // compress previous iterates storage
01551   if (this->compressTemps() && temp != &f)
01552     *temp = 0;
01553 
01554   // skip final assignment and compress if we used g as final temporary
01555   if (tempR != &g) {
01556     TAU_PROFILE_START(timer_swap);
01557 
01558     // Now assign into output Field, and compress last temp's storage:
01559     FFTDBG(tmsg << "Doing cleanup real->real transpose into return ");
01560     FFTDBG(tmsg << "with layout = " << g.getLayout() << endl);
01561     IpplTimings::startTimer(tr1);
01562     g[out_dom] = (*tempR)[tempR->getLayout().getDomain()];
01563     IpplTimings::stopTimer(tr1);
01564 
01565     if (this->compressTemps())
01566       *tempR = 0;
01567     TAU_PROFILE_STOP(timer_swap);
01568   }
01569 
01570   // Normalize:
01571   if (direction == +1) g = g * this->getNormFact();
01572 
01573   // finish up timing the whole mess
01574   IpplTimings::stopTimer(tottimer);
01575 }
01576 
01577 
01578 //=============================================================================
01579 // 1D FFT RCTransform Constructors
01580 //=============================================================================
01581 
01582 //-----------------------------------------------------------------------------
01583 // Create a new FFT object of type RCTransform, with a
01584 // given domain. Also specify which dimensions to transform along.
01585 // Note that RC transform of a real array of length n results in a
01586 // complex array of length n/2+1.
01587 //-----------------------------------------------------------------------------
01588 
01589 template <class T>
01590 FFT<RCTransform,1U,T>::FFT(
01591   const typename FFT<RCTransform,1U,T>::Domain_t& rdomain,
01592   const typename FFT<RCTransform,1U,T>::Domain_t& cdomain,
01593   const bool transformTheseDims[1U], const bool& compressTemps)
01594   : FFTBase<1U,T>(FFT<RCTransform,1U,T>::rcFFT, rdomain,
01595                   transformTheseDims, compressTemps), 
01596     complexDomain_m(cdomain)
01597 {
01598   // Tau profiling
01599   TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
01600   TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", " + CT(cdomain) +
01601                   ", const bool [], const bool&)");
01602   TAU_PROFILE(tauname, tautype, TAU_FFT);
01603 
01604   unsigned nTransformDims = 1U;
01605   // get axis length
01606   int length;
01607   length = rdomain[0].length();
01608 
01609   // get transform type for FFT Engine, compute normalization
01610   int transformType;
01611   transformType = FFTBase<1U,T>::rcFFT;    // first transform is real-to-complex
01612   T& normFact = this->getNormFact();
01613   normFact = 1.0 / length;
01614 
01615   // set up FFT Engine
01616   this->getEngine().setup(nTransformDims, &transformType, &length);
01617   // set up the temporary fields
01618   setup();
01619 }
01620 
01621 //-----------------------------------------------------------------------------
01622 // Create a new FFT object of type RCTransform, with
01623 // given real and complex domains. Default: transform along all dimensions.
01624 //-----------------------------------------------------------------------------
01625 
01626 template <class T>
01627 FFT<RCTransform,1U,T>::FFT(
01628   const typename FFT<RCTransform,1U,T>::Domain_t& rdomain,
01629   const typename FFT<RCTransform,1U,T>::Domain_t& cdomain,
01630   const bool& compressTemps) 
01631   : FFTBase<1U,T>(FFT<RCTransform,1U,T>::rcFFT, rdomain, compressTemps),
01632     complexDomain_m(cdomain)
01633 {
01634   // Tau profiling
01635   TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
01636   TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", " + CT(cdomain) + ", const bool&)");
01637   TAU_PROFILE(tauname, tautype, TAU_FFT); 
01638 
01639   // get axis length
01640   int length;
01641   length = rdomain[0].length();
01642 
01643   // get transform type for FFT Engine, compute normalization
01644   int transformType;
01645   transformType = FFTBase<1U,T>::rcFFT;    // first transform is real-to-complex
01646   T& normFact = this->getNormFact();
01647   normFact = 1.0 / length;
01648 
01649   // set up FFT Engine
01650   this->getEngine().setup(1U, &transformType, &length);
01651   // set up the temporary fields
01652   setup();
01653 }
01654 
01655 //-----------------------------------------------------------------------------
01656 // setup performs all the initializations necessary after the transform
01657 // directions have been specified.
01658 //-----------------------------------------------------------------------------
01659 
01660 template <class T>
01661 void
01662 FFT<RCTransform,1U,T>::setup(void) {
01663 
01664   // Tau profiling
01665   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::setup");
01666   TAU_TYPE_STRING(tautype, "(void)");
01667   TAU_PROFILE(tauname, tautype, TAU_FFT); 
01668 
01669   // check that domain lengths agree between real and complex domains
01670   const Domain_t& domain = this->getDomain();
01671   bool match = true;
01672   // real array length n, complex array length n/2+1
01673   if ( complexDomain_m[0].length() !=
01674        (domain[0].length()/2 + 1) ) match = false;
01675   PInsist(match,
01676           "Domains provided for real and complex Fields are incompatible!");
01677 
01678   // generate layout for temporary real Field
01679   tempRLayout_m = new Layout_t(domain[0], PARALLEL, 1);
01680   // create temporary real Field
01681   tempRField_m = new RealField_t(*tempRLayout_m);
01682   // If user requests no intermediate compression, uncompress right now:
01683   if (!this->compressTemps()) tempRField_m->Uncompress();
01684 
01685   // generate temporary field layout
01686   tempLayouts_m = new Layout_t(complexDomain_m[0], PARALLEL, 1);
01687   // create temporary complex Field
01688   tempFields_m = new ComplexField_t(*tempLayouts_m);
01689   // If user requests no intermediate compression, uncompress right now:
01690   if (!this->compressTemps()) tempFields_m->Uncompress();
01691 
01692   return;
01693 }
01694 
01695 //-----------------------------------------------------------------------------
01696 // Destructor
01697 //-----------------------------------------------------------------------------
01698 
01699 template <class T>
01700 FFT<RCTransform,1U,T>::~FFT(void) {
01701 
01702   // Tau profiling
01703   TAU_TYPE_STRING(tauname, CT(*this) + "::~FFT");
01704   TAU_TYPE_STRING(tautype, "(void)");
01705   TAU_PROFILE(tauname, tautype, TAU_FFT); 
01706 
01707   // delete temporary fields and layouts
01708   delete tempFields_m;
01709   delete tempLayouts_m;
01710   delete tempRField_m;
01711   delete tempRLayout_m;
01712 }
01713 
01714 //-----------------------------------------------------------------------------
01715 // real-to-complex FFT; direction is +1 or -1
01716 //-----------------------------------------------------------------------------
01717 
01718 template <class T>
01719 void
01720 FFT<RCTransform,1U,T>::transform(
01721   int direction,
01722   typename FFT<RCTransform,1U,T>::RealField_t& f,
01723   typename FFT<RCTransform,1U,T>::ComplexField_t& g,
01724   const bool& constInput)
01725 {
01726   // Tau profiling
01727   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
01728   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ", const bool&)");
01729   TAU_PROFILE(tauname, tautype, TAU_FFT);
01730   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
01731   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
01732 
01733   // indicate we're doing another FFT
01734   //INCIPPLSTAT(incFFTs);
01735 
01736   // Check domain of incoming Fields
01737   const Layout_t& in_layout = f.getLayout();
01738   const Domain_t& in_dom = in_layout.getDomain();
01739   const Layout_t& out_layout = g.getLayout();
01740   const Domain_t& out_dom = out_layout.getDomain();
01741   PAssert( checkDomain(this->getDomain(),in_dom) &&
01742            checkDomain(complexDomain_m,out_dom) );
01743 
01744   // Common loop iterate and other vars:
01745   RealField_t* tempR = tempRField_m;  // Field* management aid
01746   if (!constInput) {
01747     // see if we can use input field f as a temporary
01748     bool skipTemp = true;
01749     // more rigorous match required here; check that layouts are identical
01750     if ( !(in_layout == *tempRLayout_m) ) skipTemp = false;
01751     if ( in_layout.getDistribution(0) !=
01752          tempRLayout_m->getDistribution(0) ) skipTemp = false;
01753     if ( in_layout.numVnodes() != tempRLayout_m->numVnodes() )
01754       skipTemp = false;
01755     // also make sure there are no guard cells
01756     if (!(f.getGC() == FFT<RCTransform,1U,T>::nullGC)) skipTemp = false;
01757     if (skipTemp) tempR = &f;
01758   }
01759 
01760   if (tempR != &f) {  // not using input as a temporary
01761     TAU_PROFILE_START(timer_swap);
01762     // assign to real Field with proper layout
01763     (*tempR) = f;
01764     TAU_PROFILE_STOP(timer_swap);
01765   }
01766 
01767   // Field* for temp Field management:
01768   ComplexField_t* temp = tempFields_m;
01769   // see if we can put final result directly into g
01770   bool skipFinal = true;
01771   // more rigorous match required here; check that layouts are identical
01772   if ( !(out_layout == *tempLayouts_m) ) skipFinal = false;
01773   if ( out_layout.getDistribution(0) !=
01774        tempLayouts_m->getDistribution(0) ) skipFinal = false;
01775   if ( out_layout.numVnodes() != tempLayouts_m->numVnodes() )
01776     skipFinal = false;
01777   // also make sure there are no guard cells
01778   if (!(g.getGC() == FFT<RCTransform,1U,T>::nullGC)) skipFinal = false;
01779   if (skipFinal) temp = &g;
01780 
01781   TAU_PROFILE_START(timer_fft);
01782   // There should be just one vnode!
01783   typename RealField_t::const_iterator_if rl_i = tempR->begin_if();
01784   typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
01785   if (rl_i != tempR->end_if() && cl_i != temp->end_if()) {
01786     // Get the LFields
01787     RealLField_t* rldf = (*rl_i).second.get();
01788     ComplexLField_t* cldf = (*cl_i).second.get();
01789     // make sure we are uncompressed
01790     rldf->Uncompress();
01791     cldf->Uncompress();
01792     // get the raw data pointers
01793     T* localreal = rldf->getP();
01794     Complex_t* localcomp = cldf->getP();
01795 
01796     int lengthreal = rldf->size(0);
01797     // move the data into the complex strip, which is two reals longer
01798     for (int ilen=0; ilen<lengthreal; ilen+=2) 
01799       localcomp[ilen/2] = Complex_t(localreal[ilen],localreal[ilen+1]);
01800     // Do the 1D real-to-complex FFT:
01801     // note that real-to-complex FFT direction is always +1
01802     this->getEngine().callFFT(0, +1, localcomp);
01803   } 
01804   TAU_PROFILE_STOP(timer_fft);
01805 
01806   TAU_PROFILE_START(timer_swap);
01807   // compress temporary storage
01808   if (this->compressTemps() && tempR != &f) *tempR = 0;
01809   TAU_PROFILE_STOP(timer_swap);
01810 
01811   // skip final assignment and compress if we used g as final temporary
01812   if (temp != &g) {
01813     TAU_PROFILE_START(timer_swap);
01814     // Now assign into output Field, and compress last temp's storage:
01815     g = (*temp);
01816     if (this->compressTemps()) *temp = 0;
01817     TAU_PROFILE_STOP(timer_swap);
01818   }
01819 
01820   // Normalize:
01821   if (direction == +1) g = g * this->getNormFact();
01822 
01823   return;
01824 }
01825 
01826 //-----------------------------------------------------------------------------
01827 // RC FFT; opposite direction, from complex to real
01828 //-----------------------------------------------------------------------------
01829 
01830 template <class T>
01831 void
01832 FFT<RCTransform,1U,T>::transform(
01833   int direction,
01834   typename FFT<RCTransform,1U,T>::ComplexField_t& f,
01835   typename FFT<RCTransform,1U,T>::RealField_t& g,
01836   const bool& constInput)
01837 {
01838   // Tau profiling
01839   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
01840   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ", const bool&)");
01841   TAU_PROFILE(tauname, tautype, TAU_FFT);
01842   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
01843   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
01844 
01845   // indicate we're doing another FFT
01846   //INCIPPLSTAT(incFFTs);
01847 
01848   // Check domain of incoming Fields
01849   const Layout_t& in_layout = f.getLayout();
01850   const Domain_t& in_dom = in_layout.getDomain();
01851   const Layout_t& out_layout = g.getLayout();
01852   const Domain_t& out_dom = out_layout.getDomain();
01853   PAssert( checkDomain(complexDomain_m,in_dom) &&
01854            checkDomain(this->getDomain(),out_dom) );
01855 
01856   // Field* for temp Field management:
01857   ComplexField_t* temp = &f;
01858       
01859   // see if we can put final result directly into g
01860   RealField_t* tempR;
01861   bool skipFinal = true;
01862   // more rigorous match required here; check that layouts are identical
01863   if ( !(out_layout == *tempRLayout_m) ) skipFinal = false;
01864   if ( out_layout.getDistribution(0) !=
01865        tempRLayout_m->getDistribution(0) ) skipFinal = false;
01866   if ( out_layout.numVnodes() != tempRLayout_m->numVnodes() )
01867     skipFinal = false;
01868   // also make sure there are no guard cells
01869   if (!(g.getGC() == FFT<RCTransform,1U,T>::nullGC)) skipFinal = false;
01870   if (skipFinal)
01871     tempR = &g;
01872   else 
01873     tempR = tempRField_m;
01874 
01875   bool skipTemp = true;
01876   if (!constInput) {
01877     // only one CR transform
01878     // see if we really need to transpose input data
01879     // more rigorous match required here; check that layouts are identical
01880     if ( !(in_layout == *tempLayouts_m) ) skipTemp = false;
01881     if ( in_layout.getDistribution(0) !=
01882         tempLayouts_m->getDistribution(0) ) skipTemp = false;
01883     if ( in_layout.numVnodes() != tempLayouts_m->numVnodes() )
01884       skipTemp = false;
01885     // also make sure there are no guard cells
01886     if (!(f.getGC() == FFT<RCTransform,1U,T>::nullGC)) skipTemp = false;
01887   }
01888   else {  // cannot skip transpose
01889     skipTemp = false;
01890   }
01891 
01892   TAU_PROFILE_START(timer_swap);
01893   if (!skipTemp) {
01894     // assign to complex Field with proper layout
01895     (*tempFields_m) = (*temp);
01896     // compress previous iterates storage
01897     if (this->compressTemps() && temp != &f) *temp = 0;
01898     temp = tempFields_m;
01899   }
01900   TAU_PROFILE_STOP(timer_swap);
01901 
01902   TAU_PROFILE_START(timer_fft);
01903   // There should be just one vnode!
01904   typename RealField_t::const_iterator_if rl_i = tempR->begin_if();
01905   typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
01906   if (rl_i != tempR->end_if() && cl_i != temp->end_if()) {
01907     // Get the LFields
01908     RealLField_t* rldf = (*rl_i).second.get();
01909     ComplexLField_t* cldf = (*cl_i).second.get();
01910     // make sure we are uncompressed
01911     rldf->Uncompress();
01912     cldf->Uncompress();
01913     // get the raw data pointers
01914     T* localreal = rldf->getP();
01915     Complex_t* localcomp = cldf->getP();
01916 
01917     int lengthreal = rldf->size(0);
01918     // Do the 1D complex-to-real FFT:
01919     // note that complex-to-real FFT direction is always -1
01920     this->getEngine().callFFT(0, -1, localcomp);
01921     // move the data into the real strip, which is two reals shorter
01922     for (int ilen=0; ilen<lengthreal; ilen+=2) {
01923       localreal[ilen] = real(localcomp[ilen/2]);
01924       localreal[ilen+1] = imag(localcomp[ilen/2]);
01925     }
01926   }
01927   TAU_PROFILE_STOP(timer_fft);
01928 
01929   TAU_PROFILE_START(timer_swap);
01930   // compress previous iterates storage
01931   if (this->compressTemps() && temp != &f) *temp = 0;
01932   TAU_PROFILE_STOP(timer_swap);
01933 
01934   // skip final assignment and compress if we used g as final temporary
01935   if (tempR != &g) {
01936     TAU_PROFILE_START(timer_swap);
01937     // Now assign into output Field, and compress last temp's storage:
01938     g = (*tempR);
01939     if (this->compressTemps()) *tempR = 0;
01940     TAU_PROFILE_STOP(timer_swap);
01941   }
01942 
01943   // Normalize:
01944   if (direction == +1) g = g * this->getNormFact();
01945 
01946   return;
01947 }
01948 
01949 
01950 //=============================================================================
01951 // FFT SineTransform Constructors
01952 //=============================================================================
01953 
01954 //-----------------------------------------------------------------------------
01955 // Create a new FFT object of type SineTransform, with given real and
01956 // complex field domains. Also specify which dimensions to transform along,
01957 // and which of these are sine transforms.
01958 // Note that RC transform of a real array of length n results in a
01959 // complex array of length n/2+1.
01960 //-----------------------------------------------------------------------------
01961 
01962 template <unsigned Dim, class T>
01963 FFT<SineTransform,Dim,T>::FFT(
01964   const typename FFT<SineTransform,Dim,T>::Domain_t& rdomain,
01965   const typename FFT<SineTransform,Dim,T>::Domain_t& cdomain,
01966   const bool transformTheseDims[Dim], const bool sineTransformDims[Dim],
01967   const bool& compressTemps)
01968   : FFTBase<Dim,T>(FFT<SineTransform,Dim,T>::sineFFT, rdomain,
01969                    transformTheseDims, compressTemps), 
01970     complexDomain_m(&cdomain)
01971 {
01972   // Tau profiling
01973   TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
01974   TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", " + CT(cdomain) +
01975                   ", const bool [], const bool [], const bool&)");
01976   TAU_PROFILE(tauname, tautype, TAU_FFT);
01977 
01978   unsigned d;
01979   // store which dimensions get sine transforms and count how many
01980   numSineTransforms_m = 0;
01981   for (d=0; d<Dim; ++d) {
01982     sineTransformDims_m[d] = sineTransformDims[d];
01983     if (sineTransformDims[d]) {
01984       PAssert(transformTheseDims[d]);  // should be marked as a transform dim
01985       ++numSineTransforms_m;
01986     }
01987   }
01988 
01989   // construct array of axis lengths for all transform dims
01990   unsigned nTransformDims = this->numTransformDims();
01991   int* lengths = new int[nTransformDims];
01992   for (d=0; d<nTransformDims; ++d)
01993     lengths[d] = rdomain[this->activeDimension(d)].length();
01994 
01995   // construct array of transform types for FFT Engine, compute normalization
01996   int* transformTypes = new int[nTransformDims];
01997   T& normFact = this->getNormFact();
01998   normFact = 1.0;
01999   bool foundRC = false;
02000   for (d=0; d<nTransformDims; ++d) {
02001     if (sineTransformDims_m[this->activeDimension(d)]) {
02002       transformTypes[d] = FFTBase<Dim,T>::sineFFT;  // sine transform
02003       normFact /= (2.0 * (lengths[d] + 1));
02004     }
02005     else if (!foundRC) {
02006       transformTypes[d] = FFTBase<Dim,T>::rcFFT;    // real-to-complex FFT
02007       normFact /= lengths[d];
02008       foundRC = true;
02009     }
02010     else {
02011       transformTypes[d] = FFTBase<Dim,T>::ccFFT;    // complex-to-complex FFT
02012       normFact /= lengths[d];
02013     }
02014   }
02015 
02016   // set up FFT Engine
02017   this->getEngine().setup(nTransformDims, transformTypes, lengths);
02018   delete [] transformTypes;
02019   delete [] lengths;
02020   // set up the temporary fields
02021   setup();
02022 }
02023 
02024 //-----------------------------------------------------------------------------
02025 // Create a new FFT object of type SineTransform, with
02026 // given real and complex domains. Default: transform along all dimensions.
02027 //-----------------------------------------------------------------------------
02028 
02029 template <unsigned Dim, class T>
02030 FFT<SineTransform,Dim,T>::FFT(
02031   const typename FFT<SineTransform,Dim,T>::Domain_t& rdomain,
02032   const typename FFT<SineTransform,Dim,T>::Domain_t& cdomain,
02033   const bool sineTransformDims[Dim], const bool& compressTemps) 
02034   : FFTBase<Dim,T>(FFT<SineTransform,Dim,T>::sineFFT, rdomain, compressTemps),
02035     complexDomain_m(&cdomain)
02036 {
02037   // Tau profiling
02038   TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
02039   TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", " + CT(cdomain) +
02040                   ", const bool [], const bool&)");
02041   TAU_PROFILE(tauname, tautype, TAU_FFT); 
02042 
02043   unsigned d;
02044   // store which dimensions get sine transforms and count how many
02045   numSineTransforms_m = 0;
02046   for (d=0; d<Dim; ++d) {
02047     sineTransformDims_m[d] = sineTransformDims[d];
02048     if (sineTransformDims[d]) ++numSineTransforms_m;
02049   }
02050 
02051   // construct array of axis lengths for all transform dims
02052   int lengths[Dim];
02053   for (d=0; d<Dim; ++d)
02054     lengths[d] = rdomain[d].length();
02055 
02056   // construct array of transform types for FFT Engine, compute normalization
02057   int transformTypes[Dim];
02058   T& normFact = this->getNormFact();
02059   normFact = 1.0;
02060   bool foundRC = false;
02061   for (d=0; d<Dim; ++d) {
02062     if (sineTransformDims_m[d]) {
02063       transformTypes[d] = FFTBase<Dim,T>::sineFFT;  // sine transform
02064       normFact /= (2.0 * (lengths[d] + 1));
02065     }
02066     else if (!foundRC) {
02067       transformTypes[d] = FFTBase<Dim,T>::rcFFT;    // real-to-complex FFT
02068       normFact /= lengths[d];
02069       foundRC = true;
02070     }
02071     else {
02072       transformTypes[d] = FFTBase<Dim,T>::ccFFT;    // complex-to-complex FFT
02073       normFact /= lengths[d];
02074     }
02075   }
02076 
02077   // set up FFT Engine
02078   this->getEngine().setup(Dim, transformTypes, lengths);
02079   // set up the temporary fields
02080   setup();
02081 }
02082 
02083 //-----------------------------------------------------------------------------
02084 // Constructor for doing only sine transforms.
02085 // Create a new FFT object of type SineTransform, with given real field
02086 // domain. Also specify which dimensions to sine transform along.
02087 //-----------------------------------------------------------------------------
02088 
02089 template <unsigned Dim, class T>
02090 FFT<SineTransform,Dim,T>::FFT(
02091   const typename FFT<SineTransform,Dim,T>::Domain_t& rdomain,
02092   const bool sineTransformDims[Dim], const bool& compressTemps)
02093   : FFTBase<Dim,T>(FFT<SineTransform,Dim,T>::sineFFT, rdomain,
02094                    sineTransformDims, compressTemps)
02095 {
02096   // Tau profiling
02097   TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
02098   TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", const bool [], const bool&)");
02099   TAU_PROFILE(tauname, tautype, TAU_FFT);
02100 
02101   // store which dimensions get sine transforms and how many
02102   numSineTransforms_m = this->numTransformDims();
02103   unsigned d;
02104   for (d=0; d<Dim; ++d)
02105     sineTransformDims_m[d] = sineTransformDims[d];
02106 
02107   // construct array of axis lengths
02108   int* lengths = new int[numSineTransforms_m];
02109   for (d=0; d<numSineTransforms_m; ++d)
02110     lengths[d] = rdomain[this->activeDimension(d)].length();
02111 
02112   // construct array of transform types for FFT Engine, compute normalization
02113   int* transformTypes = new int[numSineTransforms_m];
02114   T& normFact = this->getNormFact();
02115   normFact = 1.0;
02116   for (d=0; d<numSineTransforms_m; ++d) {
02117     transformTypes[d] = FFTBase<Dim,T>::sineFFT;  // sine transform
02118     normFact /= (2.0 * (lengths[d] + 1));
02119   }
02120 
02121   // set up FFT Engine
02122   this->getEngine().setup(numSineTransforms_m, transformTypes, lengths);
02123   delete [] transformTypes;
02124   delete [] lengths;
02125   // set up the temporary fields
02126   setup();
02127 }
02128 
02129 //-----------------------------------------------------------------------------
02130 // Create a new FFT object of type SineTransform, with
02131 // given real domain. Default: sine transform along all dimensions.
02132 //-----------------------------------------------------------------------------
02133 
02134 template <unsigned Dim, class T>
02135 FFT<SineTransform,Dim,T>::FFT(
02136   const typename FFT<SineTransform,Dim,T>::Domain_t& rdomain, const bool& compressTemps) 
02137   : FFTBase<Dim,T>(FFT<SineTransform,Dim,T>::sineFFT, rdomain, compressTemps)
02138 {
02139   // Tau profiling
02140   TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
02141   TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", const bool&)");
02142   TAU_PROFILE(tauname, tautype, TAU_FFT); 
02143 
02144   unsigned d;
02145   // store which dimensions get sine transforms and how many
02146   numSineTransforms_m = this->numTransformDims();
02147   for (d=0; d<Dim; ++d)
02148     sineTransformDims_m[d] = true;
02149 
02150   // construct array of axis lengths
02151   int lengths[Dim];
02152   for (d=0; d<Dim; ++d)
02153     lengths[d] = rdomain[d].length();
02154 
02155   // construct array of transform types for FFT Engine, compute normalization
02156   int transformTypes[Dim];
02157   T& normFact = this->getNormFact();
02158   normFact = 1.0;
02159   for (d=0; d<Dim; ++d) {
02160     transformTypes[d] = FFTBase<Dim,T>::sineFFT;  // sine transform
02161     normFact /= (2.0 * (lengths[d] + 1));
02162   }
02163 
02164   // set up FFT Engine
02165   this->getEngine().setup(Dim, transformTypes, lengths);
02166   // set up the temporary fields
02167   setup();
02168 }
02169 
02170 //-----------------------------------------------------------------------------
02171 // setup performs all the initializations necessary after the transform
02172 // directions have been specified.
02173 //-----------------------------------------------------------------------------
02174 
02175 template <unsigned Dim, class T>
02176 void
02177 FFT<SineTransform,Dim,T>::setup(void) {
02178 
02179   // Tau profiling
02180   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::setup");
02181   TAU_TYPE_STRING(tautype, "(void)");
02182   TAU_PROFILE(tauname, tautype, TAU_FFT); 
02183 
02184   unsigned d, dim, activeDim;
02185   int icount;
02186   Domain_t ndip;
02187   unsigned nTransformDims = this->numTransformDims();  // total number of transforms
02188   const Domain_t& domain = this->getDomain();          // get real Field domain
02189 
02190   // Set up the arrays of temporary Fields and FieldLayouts:
02191   e_dim_tag serialParallel[Dim];  // Specifies SERIAL, PARALLEL dims in temp
02192   // make zeroth dimension always SERIAL
02193   serialParallel[0] = SERIAL;
02194   // all other dimensions parallel
02195   for (d=1; d<Dim; ++d)
02196     serialParallel[d] = PARALLEL;
02197 
02198   // do we have a real-to-complex transform to do or not?
02199   if (nTransformDims > numSineTransforms_m) {  // have RC transform
02200 
02201     PAssert(complexDomain_m);  // This pointer should be initialized!
02202     // find first non-sine transform dimension; this is rc transform
02203     bool match = false;
02204     d=0;
02205     while (d<Dim && !match) {
02206       if (this->transformDim(d) && !sineTransformDims_m[d]) {
02207         activeDim = this->activeDimension(d);
02208         match = true;
02209       }
02210       ++d;
02211     }
02212     PAssert(match);  // check that we found rc transform dimension
02213     // compare lengths of real and complex Field domains
02214     for (d=0; d<Dim; ++d) {
02215       if (d == activeDim) {
02216         // real array length n, complex array length n/2+1
02217         if ( (*complexDomain_m)[d].length() !=
02218              (domain[d].length()/2 + 1) ) match = false;
02219       }
02220       else {
02221         // real and complex arrays should be same length for all other dims
02222         if ( (*complexDomain_m)[d].length() !=
02223              domain[d].length() ) match = false;
02224       }
02225     }
02226     PInsist(match,
02227             "Domains provided for real and complex Fields are incompatible!");
02228 
02229     // set up the real Fields first
02230     // we will have one for each sine transform, plus one for the rc transform
02231     tempRLayouts_m = new Layout_t*[numSineTransforms_m+1];
02232     tempRFields_m = new RealField_t*[numSineTransforms_m+1];
02233     // loop over the sine transform dimensions
02234     icount=0;
02235     for (dim=0; dim<numSineTransforms_m; ++dim, ++icount) {
02236       // get next dimension to be sine transformed
02237       while (!sineTransformDims_m[icount]) ++icount;
02238       PAssert(icount<Dim);  // check that icount is valid dimension
02239       // make new domain with permuted Indexes, icount first
02240       ndip[0] = domain[icount];
02241       for (d=1; d<Dim; ++d) {
02242         int nextDim = icount + d;
02243         if (nextDim >= Dim) nextDim -= Dim;
02244         ndip[d] = domain[nextDim];
02245       }
02246       // generate temporary field layout
02247       tempRLayouts_m[dim] = new Layout_t(ndip, serialParallel, this->transVnodes());
02248       // create temporary real Field
02249       tempRFields_m[dim] = new RealField_t(*tempRLayouts_m[dim]);
02250       // If user requests no intermediate compression, uncompress right now:
02251       if (!this->compressTemps()) (*tempRFields_m[dim]).Uncompress();
02252     }
02253 
02254     // build final real Field for rc transform along activeDim
02255     ndip[0] = domain[activeDim];
02256     for (d=1; d<Dim; ++d) {
02257       int nextDim = activeDim + d;
02258       if (nextDim >= Dim) nextDim -= Dim;
02259       ndip[d] = domain[nextDim];
02260     }
02261     // generate temporary field layout
02262     tempRLayouts_m[numSineTransforms_m] = new Layout_t(ndip, serialParallel,
02263                                                        this->transVnodes());
02264     // create temporary real Field
02265     tempRFields_m[numSineTransforms_m] =
02266       new RealField_t(*tempRLayouts_m[numSineTransforms_m]);
02267     // If user requests no intermediate compression, uncompress right now:
02268     if (!this->compressTemps()) (*tempRFields_m[numSineTransforms_m]).Uncompress();
02269 
02270     // now create the temporary complex Fields
02271     int numComplex = nTransformDims - numSineTransforms_m;
02272     // allocate arrays of temp fields and layouts
02273     tempLayouts_m = new Layout_t*[numComplex];
02274     tempFields_m = new ComplexField_t*[numComplex];
02275     icount=0;  // reset counter
02276     for (dim=0; dim<numComplex; ++dim, ++icount) {
02277       // get next non-sine transform dimension
02278       while (!this->transformDim(icount) || sineTransformDims_m[icount]) ++icount;
02279       PAssert(icount<Dim);  // check that this is a valid dimension
02280       // make new domain with permuted Indexes, icount first
02281       ndip[0] = (*complexDomain_m)[icount];
02282       for (d=1; d<Dim; ++d) {
02283         int nextDim = icount + d;
02284         if (nextDim >= Dim) nextDim -= Dim;
02285         ndip[d] = (*complexDomain_m)[nextDim];
02286       }
02287       // generate temporary field layout
02288       tempLayouts_m[dim] = new Layout_t(ndip, serialParallel, this->transVnodes());
02289       // create temporary complex Field
02290       tempFields_m[dim] = new ComplexField_t(*tempLayouts_m[dim]);
02291       // If user requests no intermediate compression, uncompress right now:
02292       if (!this->compressTemps()) (*tempFields_m[dim]).Uncompress();
02293     }
02294 
02295   }
02296   else {  // sine transforms only
02297 
02298     // set up the real Fields
02299     // we will have one for each sine transform
02300     tempRLayouts_m = new Layout_t*[numSineTransforms_m];
02301     tempRFields_m = new RealField_t*[numSineTransforms_m];
02302     // loop over the sine transform dimensions
02303     for (dim=0; dim<numSineTransforms_m; ++dim) {
02304       // get next dimension to be sine transformed
02305       activeDim = this->activeDimension(dim);
02306       // make new domain with permuted Indexes, activeDim first
02307       ndip[0] = domain[activeDim];
02308       for (d=1; d<Dim; ++d) {
02309         int nextDim = activeDim + d;
02310         if (nextDim >= Dim) nextDim -= Dim;
02311         ndip[d] = domain[nextDim];
02312       }
02313       // generate temporary field layout
02314       tempRLayouts_m[dim] = new Layout_t(ndip, serialParallel, this->transVnodes());
02315       // create temporary real Field
02316       tempRFields_m[dim] = new RealField_t(*tempRLayouts_m[dim]);
02317       // If user requests no intermediate compression, uncompress right now:
02318       if (!this->compressTemps()) (*tempRFields_m[dim]).Uncompress();
02319     }
02320 
02321   }
02322 
02323   return;
02324 }
02325 
02326 //-----------------------------------------------------------------------------
02327 // Destructor
02328 //-----------------------------------------------------------------------------
02329 
02330 template <unsigned Dim, class T>
02331 FFT<SineTransform,Dim,T>::~FFT(void) {
02332 
02333   // Tau profiling
02334   TAU_TYPE_STRING(tauname, CT(*this) + "::~FFT");
02335   TAU_TYPE_STRING(tautype, "(void)");
02336   TAU_PROFILE(tauname, tautype, TAU_FFT); 
02337 
02338   // delete temporary fields and layouts
02339   unsigned d;
02340   unsigned nTransformDims = this->numTransformDims();
02341   if (nTransformDims > numSineTransforms_m) {
02342     for (d=0; d<numSineTransforms_m+1; ++d) {
02343       delete tempRFields_m[d];
02344       delete tempRLayouts_m[d];
02345     }
02346     int numComplex = nTransformDims - numSineTransforms_m;
02347     for (d=0; d<numComplex; ++d) {
02348       delete tempFields_m[d];
02349       delete tempLayouts_m[d];
02350     }
02351     delete [] tempFields_m;
02352     delete [] tempLayouts_m;
02353   }
02354   else {
02355     for (d=0; d<numSineTransforms_m; ++d) {
02356       delete tempRFields_m[d];
02357       delete tempRLayouts_m[d];
02358     }
02359   }
02360   delete [] tempRFields_m;
02361   delete [] tempRLayouts_m;
02362 }
02363 
02364 //-----------------------------------------------------------------------------
02365 // Sine and RC FFT; separate input and output fields, direction is +1 or -1
02366 //-----------------------------------------------------------------------------
02367 
02368 template <unsigned Dim, class T>
02369 void
02370 FFT<SineTransform,Dim,T>::transform(
02371   int direction,
02372   FFT<SineTransform,Dim,T>::RealField_t& f,
02373   FFT<SineTransform,Dim,T>::ComplexField_t& g,
02374   const bool& constInput)
02375 {
02376   // Tau profiling
02377   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
02378   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ")");
02379   TAU_PROFILE(tauname, tautype, TAU_FFT);
02380   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
02381   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
02382 
02383   // indicate we're doing another FFT
02384   //INCIPPLSTAT(incFFTs);
02385 
02386   // Check domain of incoming Fields
02387   const Layout_t& in_layout = f.getLayout();
02388   const Domain_t& in_dom = in_layout.getDomain();
02389   const Layout_t& out_layout = g.getLayout();
02390   const Domain_t& out_dom = out_layout.getDomain();
02391   PAssert( checkDomain(this->getDomain(),in_dom) &&
02392            checkDomain(*complexDomain_m,out_dom) );
02393 
02394   // Common loop iterate and other vars:
02395   unsigned d;
02396   int icount, activeDim;
02397   int idim;      // idim loops over the number of transform dims.
02398   unsigned nTransformDims = this->numTransformDims();
02399   // check that there is a real-to-complex transform to do
02400   PInsist(nTransformDims>numSineTransforms_m,
02401           "Wrong output Field type for real-to-real transform!!");
02402 
02403   // first do all the sine transforms
02404 
02405   // Field* management aid
02406   RealField_t* tempR = &f;
02407   // Local work array passed to FFT:
02408   T* localdataR;
02409       
02410   // Loop over the dimensions to be sine transformed:
02411   icount = 0;
02412   activeDim = 0;
02413   for (idim = 0; idim != numSineTransforms_m; ++idim, ++icount, ++activeDim) {
02414     TAU_PROFILE_START(timer_swap);
02415     // find next sine transform dim
02416     while (!sineTransformDims_m[icount]) {
02417       if (this->transformDim(icount)) ++activeDim;
02418       ++icount;
02419     }
02420     PAssert(activeDim<Dim);  // check that this is a valid dimension!
02421     // Now do the serial transforms along this dimension:
02422 
02423     bool skipTranspose = false;
02424     // if this is the first transform dimension, we might be able
02425     // to skip the transpose into the first temporary Field
02426     if (idim == 0 && !constInput) {
02427       // get domain for comparison
02428       const Domain_t& first_dom = tempRLayouts_m[idim]->getDomain();
02429       // check that zeroth axis is the same and is serial
02430       // and that there are no guard cells
02431       skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
02432                         (in_dom[0].length() == first_dom[0].length()) &&
02433                         (in_layout.getDistribution(0) == SERIAL) &&
02434                         (f.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
02435     }
02436 
02437     if (!skipTranspose) {
02438       // transpose and permute to Field with transform dim first
02439       (*tempRFields_m[idim])[tempRLayouts_m[idim]->getDomain()] = 
02440         (*tempR)[tempR->getLayout().getDomain()];
02441 
02442       // Compress out previous iterate's storage:
02443       if (this->compressTemps() && tempR != &f) *tempR = 0;
02444       tempR = tempRFields_m[idim];  // Field* management aid
02445     }
02446     TAU_PROFILE_STOP(timer_swap);
02447       
02448     TAU_PROFILE_START(timer_fft);
02449     // Loop over all the Vnodes, working on the LField in each.
02450     typename RealField_t::const_iterator_if l_i, l_end = tempR->end_if();
02451     for (l_i = tempR->begin_if(); l_i != l_end; ++l_i) {
02452 
02453       // Get the LField
02454       RealLField_t* ldf = (*l_i).second.get();
02455       // make sure we are uncompressed
02456       ldf->Uncompress();
02457       // get the raw data pointer
02458       localdataR = ldf->getP();
02459 
02460       // Do 1D real-to-real FFT's on all the strips in the LField:
02461       int nstrips = 1, length = ldf->size(0);
02462       for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
02463       for (int istrip=0; istrip<nstrips; ++istrip) {
02464         // Do the 1D FFT:
02465         this->getEngine().callFFT(activeDim, direction, localdataR);
02466         // advance the data pointer
02467         localdataR += length;
02468       } // loop over 1D strips
02469     } // loop over all the LFields
02470     TAU_PROFILE_STOP(timer_fft);
02471   } // loop over all transformed dimensions 
02472 
02473   // now handle the RC transform separately
02474 
02475   // find first non-sine transform dimension
02476   icount = 0;
02477   activeDim = 0;
02478   while (!this->transformDim(icount) || sineTransformDims_m[icount]) {
02479     if (sineTransformDims_m[icount]) ++activeDim;
02480     ++icount;
02481   }
02482   PAssert(activeDim<Dim);  // check that this is a valid dimension!
02483 
02484   TAU_PROFILE_START(timer_swap);
02485   // transpose and permute to final real Field with transform dim first
02486   int last = numSineTransforms_m;
02487   (*tempRFields_m[last])[tempRLayouts_m[last]->getDomain()] =
02488     (*tempR)[tempR->getLayout().getDomain()];
02489 
02490   // Compress out previous iterate's storage:
02491   if (this->compressTemps() && tempR != &f) *tempR = 0;
02492   tempR = tempRFields_m[last];  // Field* management aid
02493   TAU_PROFILE_STOP(timer_swap);
02494 
02495   // Field* for temp Field management:
02496   ComplexField_t* temp = tempFields_m[0];
02497   // see if we can put final result directly into g
02498   int numComplex = nTransformDims-numSineTransforms_m;
02499   if (numComplex == 1) {  // only a single RC transform
02500     bool skipTemp = true;
02501     // more rigorous match required here; check that layouts are identical
02502     if ( !(out_layout == *tempLayouts_m[0]) ) skipTemp = false;
02503     for (d=0; d<Dim; ++d) {
02504       if ( out_layout.getDistribution(d) !=
02505            tempLayouts_m[0]->getDistribution(d) ) skipTemp = false;
02506     }
02507     if ( out_layout.numVnodes() != tempLayouts_m[0]->numVnodes() )
02508       skipTemp = false;
02509     // also make sure there are no guard cells
02510     if (!(g.getGC() == FFT<SineTransform,Dim,T>::nullGC)) skipTemp = false;
02511     if (skipTemp) temp = &g;
02512   }
02513 
02514 
02515   TAU_PROFILE_START(timer_fft);
02516   // Loop over all the Vnodes, working on the LField in each.
02517   typename RealField_t::const_iterator_if rl_i, rl_end = tempR->end_if();
02518   typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
02519   for (rl_i = tempR->begin_if(); rl_i != rl_end; ++rl_i, ++cl_i) {
02520     // Get the LFields
02521     RealLField_t* rldf = (*rl_i).second.get();
02522     ComplexLField_t* cldf = (*cl_i).second.get();
02523     // make sure we are uncompressed
02524     rldf->Uncompress();
02525     cldf->Uncompress();
02526     // get the raw data pointers
02527     T* localreal = rldf->getP();
02528     Complex_t* localcomp = cldf->getP();
02529 
02530     int nstrips = 1, lengthreal = rldf->size(0), lengthcomp = cldf->size(0);
02531     // number of strips should be the same for real and complex LFields!
02532     for (d=1; d<Dim; ++d) nstrips *= rldf->size(d);
02533     for (int istrip=0; istrip<nstrips; ++istrip) {
02534       // move the data into the complex strip, which is two reals longer
02535       for (int ilen=0; ilen<lengthreal; ilen+=2) 
02536         localcomp[ilen/2] = Complex_t(localreal[ilen],localreal[ilen+1]);
02537       // Do the 1D real-to-complex FFT:
02538       // note that real-to-complex FFT direction is always +1
02539       this->getEngine().callFFT(activeDim, +1, localcomp);
02540       // advance the data pointers
02541       localreal += lengthreal;
02542       localcomp += lengthcomp;
02543     } // loop over 1D strips
02544   } // loop over all the LFields
02545   TAU_PROFILE_STOP(timer_fft);
02546 
02547   // now proceed with the other complex-to-complex transforms
02548 
02549   // Local work array passed to FFT:
02550   Complex_t* localdata;
02551       
02552   // Loop over the remaining dimensions to be transformed:
02553   ++icount;
02554   ++activeDim;
02555   for (idim = 1; idim != numComplex; ++idim, ++icount, ++activeDim) {
02556     TAU_PROFILE_START(timer_swap);
02557     // find the next non-sine transform dimension
02558     while (!this->transformDim(icount) || sineTransformDims_m[icount]) {
02559       if (sineTransformDims_m[icount]) ++activeDim;
02560       ++icount;
02561     }
02562     PAssert(activeDim<Dim);  // check that this is a valid dimension!
02563     // Now do the serial transforms along this dimension:
02564 
02565     bool skipTranspose = false;
02566     // if this is the last transform dimension, we might be able
02567     // to skip the last temporary and transpose right into g
02568     if (idim == numComplex-1) {
02569       // get the domain for comparison
02570       const Domain_t& last_dom = tempLayouts_m[idim]->getDomain();
02571       // check that zeroth axis is the same and is serial
02572       // and that there are no guard cells
02573       skipTranspose = ( (out_dom[0].sameBase(last_dom[0])) &&
02574                         (out_dom[0].length() == last_dom[0].length()) &&
02575                         (out_layout.getDistribution(0) == SERIAL) &&
02576                         (g.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
02577     }
02578 
02579     if (!skipTranspose) {
02580       // transpose and permute to Field with transform dim first
02581       (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] = 
02582         (*temp)[temp->getLayout().getDomain()];
02583 
02584       // Compress out previous iterate's storage:
02585       if (this->compressTemps()) *temp = 0;
02586       temp = tempFields_m[idim];  // Field* management aid
02587     }
02588     else if (idim == numComplex-1) {
02589       // last transform and we can skip the last temporary field
02590       // so do the transpose here using g instead
02591 
02592       // transpose and permute to Field with transform dim first
02593       g[out_dom] = (*temp)[temp->getLayout().getDomain()];
02594 
02595       // Compress out previous iterate's storage:
02596       if (this->compressTemps()) *temp = 0;
02597       temp = &g;  // Field* management aid
02598     }
02599     TAU_PROFILE_STOP(timer_swap);
02600       
02601     TAU_PROFILE_START(timer_fft);
02602     // Loop over all the Vnodes, working on the LField in each.
02603     typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
02604     for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
02605 
02606       // Get the LField
02607       ComplexLField_t* ldf = (*l_i).second.get();
02608       // make sure we are uncompressed
02609       ldf->Uncompress();
02610       // get the raw data pointer
02611       localdata = ldf->getP();
02612 
02613       // Do 1D complex-to-complex FFT's on all the strips in the LField:
02614       int nstrips = 1, length = ldf->size(0);
02615       for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
02616       for (int istrip=0; istrip<nstrips; ++istrip) {
02617         // Do the 1D FFT:
02618         this->getEngine().callFFT(activeDim, direction, localdata);
02619         // advance the data pointer
02620         localdata += length;
02621       } // loop over 1D strips
02622     } // loop over all the LFields
02623     TAU_PROFILE_STOP(timer_fft);
02624   } // loop over all transformed dimensions 
02625 
02626   // skip final assignment and compress if we used g as final temporary
02627   if (temp != &g) {
02628     TAU_PROFILE_START(timer_swap);
02629     // Now assign into output Field, and compress last temp's storage:
02630     g[out_dom] = (*temp)[temp->getLayout().getDomain()];
02631     if (this->compressTemps()) *temp = 0;
02632     TAU_PROFILE_STOP(timer_swap);
02633   }
02634 
02635   // Normalize:
02636   if (direction == +1) g = g * this->getNormFact();
02637 
02638   return;
02639 }
02640 
02641 //-----------------------------------------------------------------------------
02642 // Sine and RC FFT; opposite direction, from complex to real
02643 //-----------------------------------------------------------------------------
02644 
02645 template <unsigned Dim, class T>
02646 void
02647 FFT<SineTransform,Dim,T>::transform(
02648   int direction,
02649   FFT<SineTransform,Dim,T>::ComplexField_t& f,
02650   FFT<SineTransform,Dim,T>::RealField_t& g,
02651   const bool& constInput)
02652 {
02653   // Tau profiling
02654   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
02655   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ")");
02656   TAU_PROFILE(tauname, tautype, TAU_FFT);
02657   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
02658   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
02659 
02660   // indicate we're doing another FFT
02661   // INCIPPLSTAT(incFFTs);
02662 
02663   // Check domain of incoming Fields
02664   const Layout_t& in_layout = f.getLayout();
02665   const Domain_t& in_dom = in_layout.getDomain();
02666   const Layout_t& out_layout = g.getLayout();
02667   const Domain_t& out_dom = out_layout.getDomain();
02668   PAssert( checkDomain(*complexDomain_m,in_dom) &&
02669            checkDomain(this->getDomain(),out_dom) );
02670 
02671   // Common loop iterate and other vars:
02672   unsigned d;
02673   int icount, activeDim;
02674   int idim;      // idim loops over the number of transform dims.
02675   unsigned nTransformDims = this->numTransformDims();
02676 
02677   // proceed with the complex-to-complex transforms
02678 
02679   // Field* for temp Field management:
02680   ComplexField_t* temp = &f;
02681   // Local work array passed to FFT:
02682   Complex_t* localdata;
02683       
02684   // Loop over all dimensions to be non-sine transformed except last one:
02685   int numComplex = nTransformDims - numSineTransforms_m;
02686   icount = Dim-1;  // start with last dimension
02687   activeDim = nTransformDims-1;
02688   for (idim = numComplex-1; idim != 0; --idim, --icount, --activeDim) {
02689     TAU_PROFILE_START(timer_swap);
02690     // find next non-sine transform dim
02691     while (!this->transformDim(icount) || sineTransformDims_m[icount]) {
02692       if (sineTransformDims_m[icount]) --activeDim;
02693       --icount;
02694     }
02695     PAssert(activeDim>=0);  // check that this is a valid dimension!
02696     // Now do the serial transforms along this dimension:
02697 
02698     bool skipTranspose = false;
02699     // if this is the first transform dimension, we might be able
02700     // to skip the transpose into the first temporary Field
02701     if (idim == numComplex-1 && !constInput) {
02702       // get domain for comparison
02703       const Domain_t& first_dom = tempLayouts_m[idim]->getDomain();
02704       // check that zeroth axis is the same and is serial
02705       // and that there are no guard cells
02706       skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
02707                         (in_dom[0].length() == first_dom[0].length()) &&
02708                         (in_layout.getDistribution(0) == SERIAL) &&
02709                         (f.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
02710     }
02711 
02712     if (!skipTranspose) {
02713       // transpose and permute to Field with transform dim first
02714       (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] = 
02715         (*temp)[temp->getLayout().getDomain()];
02716 
02717       // Compress out previous iterate's storage:
02718       if (this->compressTemps() && temp != &f) *temp = 0;
02719       temp = tempFields_m[idim];  // Field* management aid
02720     }
02721     TAU_PROFILE_STOP(timer_swap);
02722       
02723     TAU_PROFILE_START(timer_fft);
02724     // Loop over all the Vnodes, working on the LField in each.
02725     typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
02726     for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
02727 
02728       // Get the LField
02729       ComplexLField_t* ldf = (*l_i).second.get();
02730       // make sure we are uncompressed
02731       ldf->Uncompress();
02732       // get the raw data pointer
02733       localdata = ldf->getP();
02734 
02735       // Do 1D complex-to-complex FFT's on all the strips in the LField:
02736       int nstrips = 1, length = ldf->size(0);
02737       for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
02738       for (int istrip=0; istrip<nstrips; ++istrip) {
02739         // Do the 1D FFT:
02740         this->getEngine().callFFT(activeDim, direction, localdata);
02741         // advance the data pointer
02742         localdata += length;
02743       } // loop over 1D strips
02744     } // loop over all the LFields
02745     TAU_PROFILE_STOP(timer_fft);
02746   } // loop over all transformed dimensions 
02747 
02748   // handle the CR transform separately
02749 
02750   // find next non-sine transform dim
02751   while (!this->transformDim(icount) || sineTransformDims_m[icount]) {
02752     if (sineTransformDims_m[icount]) --activeDim;
02753     --icount;
02754   }
02755   PAssert(activeDim>=0);  // check that this is a valid dimension!
02756 
02757   // Temp Field* management aid
02758   RealField_t* tempR = tempRFields_m[numSineTransforms_m];
02759 
02760   bool skipTemp = true;
02761   if (numComplex == 1 && !constInput) {
02762     // only one CR transform
02763     // see if we really need to transpose input data
02764     // more rigorous match required here; check that layouts are identical
02765     if ( !(in_layout == *tempLayouts_m[0]) ) skipTemp = false;
02766     for (d=0; d<Dim; ++d) {
02767       if ( in_layout.getDistribution(d) !=
02768           tempLayouts_m[0]->getDistribution(d) ) skipTemp = false;
02769     }
02770     if ( in_layout.numVnodes() != tempLayouts_m[0]->numVnodes() )
02771       skipTemp = false;
02772     // also make sure there are no guard cells
02773     if (!(f.getGC() == FFT<SineTransform,Dim,T>::nullGC)) skipTemp = false;
02774   }
02775   else {  // cannot skip transpose
02776     skipTemp = false;
02777   }
02778 
02779   if (!skipTemp) {
02780     TAU_PROFILE_START(timer_swap);
02781     // transpose and permute to complex Field with transform dim first
02782     (*tempFields_m[0])[tempLayouts_m[0]->getDomain()] =
02783       (*temp)[temp->getLayout().getDomain()];
02784     // compress previous iterates storage
02785     if (this->compressTemps() && temp != &f) *temp = 0;
02786     temp = tempFields_m[0];
02787     TAU_PROFILE_STOP(timer_swap);
02788   }
02789 
02790   TAU_PROFILE_START(timer_fft);
02791   // Loop over all the Vnodes, working on the LField in each.
02792   typename RealField_t::const_iterator_if rl_i, rl_end = tempR->end_if();
02793   typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
02794   for (rl_i = tempR->begin_if(); rl_i != rl_end; ++rl_i, ++cl_i) {
02795     // Get the LFields
02796     RealLField_t* rldf = (*rl_i).second.get();
02797     ComplexLField_t* cldf = (*cl_i).second.get();
02798     // make sure we are uncompressed
02799     rldf->Uncompress();
02800     cldf->Uncompress();
02801     // get the raw data pointers
02802     T* localreal = rldf->getP();
02803     Complex_t* localcomp = cldf->getP();
02804 
02805     int nstrips = 1, lengthreal = rldf->size(0), lengthcomp = cldf->size(0);
02806     // number of strips should be the same for real and complex LFields!
02807     for (d=1; d<Dim; ++d) nstrips *= rldf->size(d);
02808     for (int istrip=0; istrip<nstrips; ++istrip) {
02809       // Do the 1D complex-to-real FFT:
02810       // note that complex-to-real FFT direction is always -1
02811       this->getEngine().callFFT(activeDim, -1, localcomp);
02812       // move the data into the real strip, which is two reals shorter
02813       for (int ilen=0; ilen<lengthreal; ilen+=2) {
02814         localreal[ilen] = real(localcomp[ilen/2]);
02815         localreal[ilen+1] = imag(localcomp[ilen/2]);
02816       }
02817       // advance the data pointers
02818       localreal += lengthreal;
02819       localcomp += lengthcomp;
02820     } // loop over 1D strips
02821   } // loop over all the LFields
02822   TAU_PROFILE_STOP(timer_fft);
02823 
02824   TAU_PROFILE_START(timer_swap);
02825   // compress previous iterates storage
02826   if (this->compressTemps() && temp != &f) *temp = 0;
02827   TAU_PROFILE_STOP(timer_swap);
02828 
02829   // now do the real-to-real FFTs
02830 
02831   // Local work array passed to FFT:
02832   T* localdataR;
02833       
02834   // Loop over the remaining dimensions to be transformed:
02835   icount = Dim-1;  // start with last dimension
02836   activeDim = nTransformDims - 1;
02837   for (idim = numSineTransforms_m-1; idim != -1;
02838        --idim, --icount, --activeDim) {
02839     TAU_PROFILE_START(timer_swap);
02840     // find next sine transform dim
02841     while (!sineTransformDims_m[icount]) {
02842       if (this->transformDim(icount)) --activeDim;
02843       --icount;
02844     }
02845     PAssert(activeDim>=0);  // check that this is a valid dimension!
02846     // Now do the serial transforms along this dimension:
02847 
02848     bool skipTranspose = false;
02849     // if this is the last transform dimension, we might be able
02850     // to skip the last temporary and transpose right into g
02851     if (idim == 0) {
02852       // get the domain for comparison
02853       const Domain_t& last_dom = tempRLayouts_m[idim]->getDomain();
02854       // check that zeroth axis is the same and is serial
02855       // and that there are no guard cells
02856       skipTranspose = ( (out_dom[0].sameBase(last_dom[0])) &&
02857                         (out_dom[0].length() == last_dom[0].length()) &&
02858                         (out_layout.getDistribution(0) == SERIAL) &&
02859                         (g.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
02860     }
02861 
02862     if (!skipTranspose) {
02863       // transpose and permute to Field with transform dim first
02864       (*tempRFields_m[idim])[tempRLayouts_m[idim]->getDomain()] = 
02865         (*tempR)[tempR->getLayout().getDomain()];
02866 
02867       // Compress out previous iterate's storage:
02868       if (this->compressTemps()) *tempR = 0;
02869       tempR = tempRFields_m[idim];  // Field* management aid
02870     }
02871     else if (idim == 0) {
02872       // last transform and we can skip the last temporary field
02873       // so do the transpose here using g instead
02874 
02875       // transpose and permute to Field with transform dim first
02876       g[out_dom] = (*tempR)[tempR->getLayout().getDomain()];
02877 
02878       // Compress out previous iterate's storage:
02879       if (this->compressTemps()) *tempR = 0;
02880       tempR = &g;  // Field* management aid
02881     }
02882     TAU_PROFILE_STOP(timer_swap);
02883       
02884     TAU_PROFILE_START(timer_fft);
02885     // Loop over all the Vnodes, working on the LField in each.
02886     typename RealField_t::const_iterator_if l_i, l_end = tempR->end_if();
02887     for (l_i = tempR->begin_if(); l_i != l_end; ++l_i) {
02888 
02889       // Get the LField
02890       RealLField_t* ldf = (*l_i).second.get();
02891       // make sure we are uncompressed
02892       ldf->Uncompress();
02893       // get the raw data pointer
02894       localdataR = ldf->getP();
02895 
02896       // Do 1D complex-to-complex FFT's on all the strips in the LField:
02897       int nstrips = 1, length = ldf->size(0);
02898       for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
02899       for (int istrip=0; istrip<nstrips; ++istrip) {
02900         // Do the 1D FFT:
02901         this->getEngine().callFFT(activeDim, direction, localdataR);
02902         // advance the data pointer
02903         localdataR += length;
02904       } // loop over 1D strips
02905     } // loop over all the LFields
02906     TAU_PROFILE_STOP(timer_fft);
02907   } // loop over all transformed dimensions 
02908 
02909   // skip final assignment and compress if we used g as final temporary
02910   if (tempR != &g) {
02911     TAU_PROFILE_START(timer_swap);
02912     // Now assign into output Field, and compress last temp's storage:
02913     g[out_dom] = (*tempR)[tempR->getLayout().getDomain()];
02914     if (this->compressTemps()) *tempR = 0;
02915     TAU_PROFILE_STOP(timer_swap);
02916   }
02917 
02918   // Normalize:
02919   if (direction == +1) g = g * this->getNormFact();
02920 
02921   return;
02922 }
02923 
02924 //-----------------------------------------------------------------------------
02925 // Sine FFT only; separate input and output fields, direction is +1 or -1
02926 //-----------------------------------------------------------------------------
02927 
02928 template <unsigned Dim, class T>
02929 void
02930 FFT<SineTransform,Dim,T>::transform(
02931   int direction,
02932   FFT<SineTransform,Dim,T>::RealField_t& f,
02933   FFT<SineTransform,Dim,T>::RealField_t& g,
02934   const bool& constInput)
02935 {
02936   // Tau profiling
02937   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
02938   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ")");
02939   TAU_PROFILE(tauname, tautype, TAU_FFT);
02940   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
02941   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
02942 
02943   // indicate we're doing another FFT
02944   //INCIPPLSTAT(incFFTs);
02945 
02946   // Check domain of incoming Fields
02947   const Layout_t& in_layout = f.getLayout();
02948   const Domain_t& in_dom = in_layout.getDomain();
02949   const Layout_t& out_layout = g.getLayout();
02950   const Domain_t& out_dom = out_layout.getDomain();
02951   PAssert( checkDomain(this->getDomain(),in_dom) &&
02952            checkDomain(this->getDomain(),out_dom) );
02953 
02954   // Common loop iterate and other vars:
02955   unsigned d;
02956   int idim;      // idim loops over the number of transform dims.
02957   int begdim, enddim;
02958   unsigned nTransformDims = this->numTransformDims();
02959   // check that there is no real-to-complex transform to do
02960   PInsist(nTransformDims==numSineTransforms_m,
02961           "Wrong output Field type for real-to-complex transform!!");
02962 
02963   // do all the sine transforms
02964 
02965   // Field* management aid
02966   RealField_t* tempR = &f;
02967   // Local work array passed to FFT:
02968   T* localdataR;
02969       
02970   // Loop over the dimensions to be sine transformed:
02971   begdim = (direction == +1) ? 0 : nTransformDims-1;
02972   enddim = (direction == +1) ? nTransformDims : -1;
02973   for (idim = begdim; idim != enddim; idim+=direction) {
02974     TAU_PROFILE_START(timer_swap);
02975     // Now do the serial transforms along this dimension:
02976 
02977     bool skipTranspose = false;
02978     // if this is the first transform dimension, we might be able
02979     // to skip the first temporary and just use f
02980     if (idim == begdim && !constInput) {
02981       // get the domain for comparison
02982       const Domain_t& first_dom = tempRLayouts_m[idim]->getDomain();
02983       // check that zeroth axis is the same and is serial
02984       // and that there are no guard cells
02985       skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
02986                         (in_dom[0].length() == first_dom[0].length()) &&
02987                         (in_layout.getDistribution(0) == SERIAL) &&
02988                         (f.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
02989     }
02990 
02991     // if this is the last transform dimension, we might be able
02992     // to skip the last temporary and transpose right into g
02993     if (idim == enddim-direction) {
02994       // get the domain for comparison
02995       const Domain_t& last_dom = tempRLayouts_m[idim]->getDomain();
02996       // check that zeroth axis is the same and is serial
02997       // and that there are no guard cells
02998       skipTranspose = ( (out_dom[0].sameBase(last_dom[0])) &&
02999                         (out_dom[0].length() == last_dom[0].length()) &&
03000                         (out_layout.getDistribution(0) == SERIAL) &&
03001                         (g.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
03002     }
03003 
03004     if (!skipTranspose) {
03005       // transpose and permute to Field with transform dim first
03006       (*tempRFields_m[idim])[tempRLayouts_m[idim]->getDomain()] = 
03007         (*tempR)[tempR->getLayout().getDomain()];
03008 
03009       // Compress out previous iterate's storage:
03010       if (this->compressTemps() && tempR != &f) *tempR = 0;
03011       tempR = tempRFields_m[idim];  // Field* management aid
03012     }
03013     else if (idim == enddim-direction && tempR != &g) {
03014       // last transform and we can skip the last temporary field
03015       // so do the transpose here using g instead
03016 
03017       // transpose and permute to Field with transform dim first
03018       g[out_dom] = (*tempR)[tempR->getLayout().getDomain()];
03019 
03020       // Compress out previous iterate's storage:
03021       if (this->compressTemps() && tempR != &f) *tempR = 0;
03022       tempR = &g;  // Field* management aid
03023     }
03024     TAU_PROFILE_STOP(timer_swap);
03025       
03026     TAU_PROFILE_START(timer_fft);
03027     // Loop over all the Vnodes, working on the LField in each.
03028     typename RealField_t::const_iterator_if l_i, l_end = tempR->end_if();
03029     for (l_i = tempR->begin_if(); l_i != l_end; ++l_i) {
03030 
03031       // Get the LField
03032       RealLField_t* ldf = (*l_i).second.get();
03033       // make sure we are uncompressed
03034       ldf->Uncompress();
03035       // get the raw data pointer
03036       localdataR = ldf->getP();
03037 
03038       // Do 1D real-to-real FFT's on all the strips in the LField:
03039       int nstrips = 1, length = ldf->size(0);
03040       for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
03041       for (int istrip=0; istrip<nstrips; ++istrip) {
03042         // Do the 1D FFT:
03043         this->getEngine().callFFT(idim, direction, localdataR);
03044         // advance the data pointer
03045         localdataR += length;
03046       } // loop over 1D strips
03047     } // loop over all the LFields
03048     TAU_PROFILE_STOP(timer_fft);
03049   } // loop over all transformed dimensions 
03050 
03051   // skip final assignment and compress if we used g as final temporary
03052   if (tempR != &g) {
03053     TAU_PROFILE_START(timer_swap);
03054     // Now assign into output Field, and compress last temp's storage:
03055     g[out_dom] = (*tempR)[tempR->getLayout().getDomain()];
03056     if (this->compressTemps() && tempR != &f) *tempR = 0;
03057     TAU_PROFILE_STOP(timer_swap);
03058   }
03059 
03060   // Normalize:
03061   if (direction == +1) g = g * this->getNormFact();
03062 
03063   return;
03064 }
03065 
03066 //-----------------------------------------------------------------------------
03067 // Sine FFT only; in-place transform, direction is +1 or -1
03068 //-----------------------------------------------------------------------------
03069 
03070 template <unsigned Dim, class T>
03071 void
03072 FFT<SineTransform,Dim,T>::transform(
03073   int direction,
03074   FFT<SineTransform,Dim,T>::RealField_t& f)
03075 {
03076   // Tau profiling
03077   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
03078   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ")");
03079   TAU_PROFILE(tauname, tautype, TAU_FFT);
03080   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
03081   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
03082 
03083   // indicate we're doing another FFT
03084   //INCIPPLSTAT(incFFTs);
03085 
03086   // Check domain of incoming Field
03087   const Layout_t& in_layout = f.getLayout();
03088   const Domain_t& in_dom = in_layout.getDomain();
03089   PAssert(checkDomain(this->getDomain(),in_dom));
03090 
03091   // Common loop iterate and other vars:
03092   unsigned d;
03093   int idim;      // idim loops over the number of transform dims.
03094   int begdim, enddim;
03095   unsigned nTransformDims = this->numTransformDims();
03096   // check that there is no real-to-complex transform to do
03097   PInsist(nTransformDims==numSineTransforms_m,
03098           "Cannot perform real-to-complex transform in-place!!");
03099 
03100   // do all the sine transforms
03101 
03102   // Field* management aid
03103   RealField_t* tempR = &f;
03104   // Local work array passed to FFT:
03105   T* localdataR;
03106       
03107   // Loop over the dimensions to be sine transformed:
03108   begdim = (direction == +1) ? 0 : nTransformDims-1;
03109   enddim = (direction == +1) ? nTransformDims : -1;
03110   for (idim = begdim; idim != enddim; idim+=direction) {
03111     TAU_PROFILE_START(timer_swap);
03112     // Now do the serial transforms along this dimension:
03113 
03114     bool skipTranspose = false;
03115     // if this is the first transform dimension, we might be able
03116     // to skip the transpose into the first temporary Field
03117     if (idim == begdim) {
03118       // get domain for comparison
03119       const Domain_t& first_dom = tempRLayouts_m[idim]->getDomain();
03120       // check that zeroth axis is the same and is serial
03121       // and that there are no guard cells
03122       skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
03123                         (in_dom[0].length() == first_dom[0].length()) &&
03124                         (in_layout.getDistribution(0) == SERIAL) &&
03125                         (f.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
03126     }
03127 
03128     // if this is the last transform dimension, we might be able
03129     // to skip the last temporary and transpose right into f
03130     if (idim == enddim-direction) {
03131       // get the domain for comparison
03132       const Domain_t& last_dom = tempRLayouts_m[idim]->getDomain();
03133       // check that zeroth axis is the same and is serial
03134       // and that there are no guard cells
03135       skipTranspose = ( (in_dom[0].sameBase(last_dom[0])) &&
03136                         (in_dom[0].length() == last_dom[0].length()) &&
03137                         (in_layout.getDistribution(0) == SERIAL) &&
03138                         (f.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
03139     }
03140 
03141     if (!skipTranspose) {
03142       // transpose and permute to Field with transform dim first
03143       (*tempRFields_m[idim])[tempRLayouts_m[idim]->getDomain()] = 
03144         (*tempR)[tempR->getLayout().getDomain()];
03145 
03146       // Compress out previous iterate's storage:
03147       if (this->compressTemps() && tempR != &f) *tempR = 0;
03148       tempR = tempRFields_m[idim];  // Field* management aid
03149     }
03150     else if (idim == enddim-direction && tempR != &f) {
03151       // last transform and we can skip the last temporary field
03152       // so do the transpose here using f instead
03153 
03154       // transpose and permute to Field with transform dim first
03155       f[in_dom] = (*tempR)[tempR->getLayout().getDomain()];
03156 
03157       // Compress out previous iterate's storage:
03158       if (this->compressTemps() && tempR != &f) *tempR = 0;
03159       tempR = &f;  // Field* management aid
03160     }
03161     TAU_PROFILE_STOP(timer_swap);
03162       
03163     TAU_PROFILE_START(timer_fft);
03164     // Loop over all the Vnodes, working on the LField in each.
03165     typename RealField_t::const_iterator_if l_i, l_end = tempR->end_if();
03166     for (l_i = tempR->begin_if(); l_i != l_end; ++l_i) {
03167 
03168       // Get the LField
03169       RealLField_t* ldf = (*l_i).second.get();
03170       // make sure we are uncompressed
03171       ldf->Uncompress();
03172       // get the raw data pointer
03173       localdataR = ldf->getP();
03174 
03175       // Do 1D real-to-real FFT's on all the strips in the LField:
03176       int nstrips = 1, length = ldf->size(0);
03177       for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
03178       for (int istrip=0; istrip<nstrips; ++istrip) {
03179         // Do the 1D FFT:
03180         this->getEngine().callFFT(idim, direction, localdataR);
03181         // advance the data pointer
03182         localdataR += length;
03183       } // loop over 1D strips
03184     } // loop over all the LFields
03185     TAU_PROFILE_STOP(timer_fft);
03186   } // loop over all transformed dimensions 
03187 
03188   // skip final assignment and compress if we used g as final temporary
03189   if (tempR != &f) {
03190     TAU_PROFILE_START(timer_swap);
03191     // Now assign into output Field, and compress last temp's storage:
03192     f[in_dom] = (*tempR)[tempR->getLayout().getDomain()];
03193     if (this->compressTemps()) *tempR = 0;
03194     TAU_PROFILE_STOP(timer_swap);
03195   }
03196 
03197   // Normalize:
03198   if (direction == +1) f = f * this->getNormFact();
03199 
03200   return;
03201 }
03202 
03203 
03204 //=============================================================================
03205 // 1D FFT SineTransform Constructors
03206 //=============================================================================
03207 
03208 //-----------------------------------------------------------------------------
03209 // Constructor for doing only sine transforms.
03210 // Create a new FFT object of type SineTransform, with given real field
03211 // domain. Also specify which dimensions to sine transform along.
03212 //-----------------------------------------------------------------------------
03213 
03214 template <class T>
03215 FFT<SineTransform,1U,T>::FFT(
03216   const typename FFT<SineTransform,1U,T>::Domain_t& rdomain,
03217   const bool sineTransformDims[1U], const bool& compressTemps)
03218   : FFTBase<1U,T>(FFT<SineTransform,1U,T>::sineFFT, rdomain,
03219                   sineTransformDims, compressTemps)
03220 {
03221   // Tau profiling
03222   TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
03223   TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", const bool [], const bool&)");
03224   TAU_PROFILE(tauname, tautype, TAU_FFT);
03225 
03226   // get axis length
03227   int length;
03228   length = rdomain[0].length();
03229 
03230   // get transform type for FFT Engine, compute normalization
03231   int transformType;
03232   transformType = FFTBase<1U,T>::sineFFT;  // sine transform
03233   T& normFact = this->getNormFact();
03234   normFact = 1.0 / (2.0 * (length + 1));
03235 
03236   // set up FFT Engine
03237   this->getEngine().setup(1U, &transformType, &length);
03238   // set up the temporary fields
03239   setup();
03240 }
03241 
03242 //-----------------------------------------------------------------------------
03243 // Create a new FFT object of type SineTransform, with
03244 // given real domain. Default: sine transform along all dimensions.
03245 //-----------------------------------------------------------------------------
03246 
03247 template <class T>
03248 FFT<SineTransform,1U,T>::FFT(
03249   const typename FFT<SineTransform,1U,T>::Domain_t& rdomain,
03250   const bool& compressTemps) 
03251   : FFTBase<1U,T>(FFT<SineTransform,1U,T>::sineFFT, rdomain, compressTemps)
03252 {
03253   // Tau profiling
03254   TAU_TYPE_STRING(tauname, CT(*this) + "::FFT");
03255   TAU_TYPE_STRING(tautype, "(" + CT(rdomain) + ", const bool&)");
03256   TAU_PROFILE(tauname, tautype, TAU_FFT); 
03257 
03258   // get axis length
03259   int length;
03260   length = rdomain[0].length();
03261 
03262   // get transform type for FFT Engine, compute normalization
03263   int transformType;
03264   transformType = FFTBase<1U,T>::sineFFT;  // sine transform
03265   T& normFact = this->getNormFact();
03266   normFact = 1.0 / (2.0 * (length + 1));
03267 
03268   // set up FFT Engine
03269   this->getEngine().setup(1U, &transformType, &length);
03270   // set up the temporary fields
03271   setup();
03272 }
03273 
03274 //-----------------------------------------------------------------------------
03275 // setup performs all the initializations necessary after the transform
03276 // directions have been specified.
03277 //-----------------------------------------------------------------------------
03278 
03279 template <class T>
03280 void
03281 FFT<SineTransform,1U,T>::setup(void) {
03282 
03283   // Tau profiling
03284   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::setup");
03285   TAU_TYPE_STRING(tautype, "(void)");
03286   TAU_PROFILE(tauname, tautype, TAU_FFT); 
03287 
03288   const Domain_t& domain = this->getDomain();          // get real Field domain
03289 
03290   // generate temporary field layout
03291   tempRLayouts_m = new Layout_t(domain[0], PARALLEL, 1);
03292   // create temporary real Field
03293   tempRFields_m = new RealField_t(*tempRLayouts_m);
03294   // If user requests no intermediate compression, uncompress right now:
03295   if (!this->compressTemps()) tempRFields_m->Uncompress();
03296 
03297   return;
03298 }
03299 
03300 //-----------------------------------------------------------------------------
03301 // Destructor
03302 //-----------------------------------------------------------------------------
03303 
03304 template <class T>
03305 FFT<SineTransform,1U,T>::~FFT(void) {
03306 
03307   // Tau profiling
03308   TAU_TYPE_STRING(tauname, CT(*this) + "::~FFT");
03309   TAU_TYPE_STRING(tautype, "(void)");
03310   TAU_PROFILE(tauname, tautype, TAU_FFT); 
03311 
03312   // delete temporary field and layout
03313   delete tempRFields_m;
03314   delete tempRLayouts_m;
03315 }
03316 
03317 //-----------------------------------------------------------------------------
03318 // Sine FFT only; separate input and output fields, direction is +1 or -1
03319 //-----------------------------------------------------------------------------
03320 
03321 template <class T>
03322 void
03323 FFT<SineTransform,1U,T>::transform(
03324   int direction,
03325   FFT<SineTransform,1U,T>::RealField_t& f,
03326   FFT<SineTransform,1U,T>::RealField_t& g,
03327   const bool& constInput)
03328 {
03329   // Tau profiling
03330   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
03331   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ", " + CT(g) + ", const bool&)");
03332   TAU_PROFILE(tauname, tautype, TAU_FFT);
03333   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
03334   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
03335 
03336   // indicate we're doing another FFT
03337   //INCIPPLSTAT(incFFTs);
03338 
03339   // Check domain of incoming Fields
03340   const Layout_t& in_layout = f.getLayout();
03341   const Domain_t& in_dom = in_layout.getDomain();
03342   const Layout_t& out_layout = g.getLayout();
03343   const Domain_t& out_dom = out_layout.getDomain();
03344   PAssert( checkDomain(this->getDomain(),in_dom) &&
03345            checkDomain(this->getDomain(),out_dom) );
03346 
03347   // Field* management aid
03348   RealField_t* tempR = &f;
03349   // Local work array passed to FFT:
03350   T* localdataR;
03351       
03352   TAU_PROFILE_START(timer_swap);
03353   // Now do the serial transform along this dimension:
03354 
03355   // get the domain for comparison
03356   const Domain_t& temp_dom = tempRLayouts_m->getDomain();
03357   bool skipTranspose = false;
03358   // we might be able
03359   // to skip the first temporary and just use f
03360   if (!constInput) {
03361     // check that zeroth axis is the same, has one vnode
03362     // and that there are no guard cells
03363     skipTranspose = ( (in_dom[0].sameBase(temp_dom[0])) &&
03364                       (in_dom[0].length() == temp_dom[0].length()) &&
03365                       (in_layout.numVnodes() == 1) &&
03366                       (f.getGC() == FFT<SineTransform,1U,T>::nullGC) );
03367   }
03368 
03369   bool skipFinal = false;
03370   // we might be able
03371   // to skip the last temporary and transpose right into g
03372 
03373   // check that zeroth axis is the same, has one vnode
03374   // and that there are no guard cells
03375   skipFinal = ( (out_dom[0].sameBase(temp_dom[0])) &&
03376                 (out_dom[0].length() == temp_dom[0].length()) &&
03377                 (out_layout.numVnodes() == 1) &&
03378                 (g.getGC() == FFT<SineTransform,1U,T>::nullGC) );
03379 
03380   if (!skipTranspose) {
03381     // assign to Field with proper layout
03382     (*tempRFields_m) = (*tempR);
03383     tempR = tempRFields_m;  // Field* management aid
03384   }
03385   if (skipFinal) {
03386     // we can skip the last temporary field
03387     // so do the transpose here using g instead
03388 
03389     // assign to Field with proper layout
03390     g = (*tempR);
03391 
03392     // Compress out previous iterate's storage:
03393     if (this->compressTemps() && tempR != &f) *tempR = 0;
03394     tempR = &g;  // Field* management aid
03395   }
03396   TAU_PROFILE_STOP(timer_swap);
03397       
03398   TAU_PROFILE_START(timer_fft);
03399   // There should be just one LField.
03400   typename RealField_t::const_iterator_if l_i = tempR->begin_if();
03401   if (l_i != tempR->end_if()) {
03402 
03403     // Get the LField
03404     RealLField_t* ldf = (*l_i).second.get();
03405     // make sure we are uncompressed
03406     ldf->Uncompress();
03407     // get the raw data pointer
03408     localdataR = ldf->getP();
03409 
03410     // Do the 1D FFT:
03411     this->getEngine().callFFT(0, direction, localdataR);
03412   } 
03413   TAU_PROFILE_STOP(timer_fft);
03414 
03415   // skip final assignment and compress if we used g as final temporary
03416   if (tempR != &g) {
03417     TAU_PROFILE_START(timer_swap);
03418     // Now assign into output Field, and compress last temp's storage:
03419     g = (*tempR);
03420     if (this->compressTemps() && tempR != &f) *tempR = 0;
03421     TAU_PROFILE_STOP(timer_swap);
03422   }
03423 
03424   // Normalize:
03425   if (direction == +1) g = g * this->getNormFact();
03426 
03427   return;
03428 }
03429 
03430 //-----------------------------------------------------------------------------
03431 // Sine FFT only; in-place transform, direction is +1 or -1
03432 //-----------------------------------------------------------------------------
03433 
03434 template <class T>
03435 void
03436 FFT<SineTransform,1U,T>::transform(
03437   int direction,
03438   FFT<SineTransform,1U,T>::RealField_t& f)
03439 {
03440   // Tau profiling
03441   TAU_TYPE_STRING(tauname, "void " + CT(*this) + "::transform");
03442   TAU_TYPE_STRING(tautype, "(int, " + CT(f) + ")");
03443   TAU_PROFILE(tauname, tautype, TAU_FFT);
03444   TAU_PROFILE_TIMER(timer_swap, tauname, "-- data transpose", TAU_FFT);
03445   TAU_PROFILE_TIMER(timer_fft, tauname, "-- fft", TAU_FFT);
03446 
03447   // indicate we're doing another FFT
03448   //INCIPPLSTAT(incFFTs);
03449 
03450   // Check domain of incoming Field
03451   const Layout_t& in_layout = f.getLayout();
03452   const Domain_t& in_dom = in_layout.getDomain();
03453   PAssert(checkDomain(this->getDomain(),in_dom));
03454 
03455   // Field* management aid
03456   RealField_t* tempR = &f;
03457   // Local work array passed to FFT:
03458   T* localdataR;
03459       
03460   TAU_PROFILE_START(timer_swap);
03461   // Now do the serial transform along this dimension:
03462 
03463   // get domain for comparison
03464   const Domain_t& temp_dom = tempRLayouts_m->getDomain();
03465   bool skipTranspose = false;
03466   // we might be able
03467   // to skip the transpose into the first temporary Field
03468 
03469   // check that zeroth axis is the same and is serial
03470   // and that there are no guard cells
03471   skipTranspose = ( (in_dom[0].sameBase(temp_dom[0])) &&
03472                     (in_dom[0].length() == temp_dom[0].length()) &&
03473                     (in_layout.numVnodes() == 1) &&
03474                     (f.getGC() == FFT<SineTransform,1U,T>::nullGC) );
03475 
03476   bool skipFinal = false;
03477   // we might be able
03478   // to skip the last temporary and transpose right into f
03479 
03480   // check that zeroth axis is the same, has one vnode
03481   // and that there are no guard cells
03482   skipFinal = ( (in_dom[0].sameBase(temp_dom[0])) &&
03483                 (in_dom[0].length() == temp_dom[0].length()) &&
03484                 (in_layout.numVnodes() == 1) &&
03485                 (f.getGC() == FFT<SineTransform,1U,T>::nullGC) );
03486 
03487   if (!skipTranspose) {
03488     // assign to Field with proper layout
03489     (*tempRFields_m) = (*tempR);
03490 
03491     tempR = tempRFields_m;  // Field* management aid
03492   }
03493   if (skipFinal) {
03494     // we can skip the last temporary field
03495     // so do the transpose here using f instead
03496 
03497     // assign to Field with proper layout
03498     f = (*tempR);
03499 
03500     // Compress out previous iterate's storage:
03501     if (this->compressTemps() && tempR != &f) *tempR = 0;
03502     tempR = &f;  // Field* management aid
03503   }
03504   TAU_PROFILE_STOP(timer_swap);
03505       
03506   TAU_PROFILE_START(timer_fft);
03507   // There should be just one LField.
03508   typename RealField_t::const_iterator_if l_i = tempR->begin_if();
03509   if (l_i != tempR->end_if()) {
03510 
03511     // Get the LField
03512     RealLField_t* ldf = (*l_i).second.get();
03513     // make sure we are uncompressed
03514     ldf->Uncompress();
03515     // get the raw data pointer
03516     localdataR = ldf->getP();
03517 
03518     // Do the 1D FFT:
03519     this->getEngine().callFFT(0, direction, localdataR);
03520   }
03521   TAU_PROFILE_STOP(timer_fft);
03522 
03523   // skip final assignment and compress if we used g as final temporary
03524   if (tempR != &f) {
03525     TAU_PROFILE_START(timer_swap);
03526     // Now assign into output Field, and compress last temp's storage:
03527     f = (*tempR);
03528     if (this->compressTemps()) *tempR = 0;
03529     TAU_PROFILE_STOP(timer_swap);
03530   }
03531 
03532   // Normalize:
03533   if (direction == +1) f = f * this->getNormFact();
03534 
03535   return;
03536 }
03537 
03538 
03539 
03540 /***************************************************************************
03541  * $RCSfile: FFT.cpp,v $   $Author: adelmann $
03542  * $Revision: 1.1.1.1 $   $Date: 2003/01/23 07:40:25 $
03543  * IPPL_VERSION_ID: $Id: FFT.cpp,v 1.1.1.1 2003/01/23 07:40:25 adelmann Exp $ 
03544  ***************************************************************************/