OPAL (Object Oriented Parallel Accelerator Library)  2.2.0
OPAL
FFT.hpp
Go to the documentation of this file.
1 //
2 // IPPL FFT
3 //
4 // Copyright (c) 2008-2018
5 // Paul Scherrer Institut, Villigen PSI, Switzerland
6 // All rights reserved.
7 //
8 // OPAL is licensed under GNU GPL version 3.
9 //
10 
15 #include "FFT/FFT.h"
17 #include "Field/BareField.h"
18 #include "Utility/IpplStats.h"
19 
20 #ifdef IPPL_PRINTDEBUG
21 #define FFTDBG(x) x
22 #else
23 #define FFTDBG(x)
24 #endif
25 
26 //=============================================================================
27 // FFT CCTransform Constructors
28 //=============================================================================
29 
35 template <size_t Dim, class T>
37  const typename FFT<CCTransform,Dim,T>::Domain_t& cdomain,
38  const bool transformTheseDims[Dim],
39  const bool& compressTemps)
40 : FFTBase<Dim,T>(FFT<CCTransform,Dim,T>::ccFFT, cdomain,
41  transformTheseDims, compressTemps)
42 {
43 
44  // construct array of axis lengths
45  size_t nTransformDims = this->numTransformDims();
46  int* lengths = new int[nTransformDims];
47  size_t d;
48  for (d=0; d<nTransformDims; ++d)
49  lengths[d] = cdomain[this->activeDimension(d)].length();
50 
51  // construct array of transform types for FFT Engine, compute normalization
52  int* transformTypes = new int[nTransformDims];
53  T& normFact = this->getNormFact();
54  normFact = 1.0;
55  for (d=0; d<nTransformDims; ++d) {
56  transformTypes[d] = FFTBase<Dim,T>::ccFFT; // all transforms are complex-to-complex
57  normFact /= lengths[d];
58  }
59 
60  // set up FFT Engine
61  this->getEngine().setup(nTransformDims, transformTypes, lengths);
62  delete [] transformTypes;
63  delete [] lengths;
64  // set up the temporary fields
65  setup();
66 }
67 
68 
73 template <size_t Dim, class T>
74 void
76 {
77  // Tau profiling
78 
79 
80  size_t d, activeDim;
81  size_t nTransformDims = this->numTransformDims();
82  // Set up the arrays of temporary Fields and FieldLayouts:
83  e_dim_tag serialParallel[Dim]; // Specifies SERIAL, PARALLEL dims in temp
84  // make zeroth dimension always SERIAL
85  serialParallel[0] = SERIAL;
86  // all other dimensions parallel
87  for (d=1; d<Dim; ++d)
88  serialParallel[d] = PARALLEL;
89 
90  tempLayouts_m = new Layout_t*[nTransformDims];
91  tempFields_m = new ComplexField_t*[nTransformDims];
92 
93  // loop over transform dimensions
94  for (size_t dim=0; dim<nTransformDims; ++dim) {
95  // get number of dimension to be transformed
96  activeDim = this->activeDimension(dim);
97  // Get input Field's domain
98  const Domain_t& ndic = this->getDomain();
99  // make new domain with permuted Indexes, activeDim first
100  Domain_t ndip;
101  ndip[0] = ndic[activeDim];
102  for (d=1; d<Dim; ++d) {
103  size_t nextDim = activeDim + d;
104  if (nextDim >= Dim) nextDim -= Dim;
105  ndip[d] = ndic[nextDim];
106  }
107  // generate temporary field layout
108  tempLayouts_m[dim] = new Layout_t(ndip, serialParallel, this->transVnodes());
109  // generate temporary Field
110  tempFields_m[dim] = new ComplexField_t(*tempLayouts_m[dim]);
111  // If user requests no intermediate compression, uncompress right now:
112  if (!this->compressTemps()) (*tempFields_m[dim]).Uncompress();
113  }
114 
115  return;
116 }
117 
118 //-----------------------------------------------------------------------------
119 // Destructor
120 //-----------------------------------------------------------------------------
121 
122 template <size_t Dim, class T>
124 
125  // Tau profiling
126 
127  /*
128  #ifdef IPPL_OPENCL
129  base.ocl_cleanUp();
130  #endif
131  */
132 
133  // delete arrays of temporary fields and field layouts
134  size_t nTransformDims = this->numTransformDims();
135  for (size_t d=0; d<nTransformDims; ++d) {
136  delete tempFields_m[d];
137  delete tempLayouts_m[d];
138  }
139  delete [] tempFields_m;
140  delete [] tempLayouts_m;
141 }
142 
143 
144 //-----------------------------------------------------------------------------
145 // do the CC FFT; separate input and output fields
146 //-----------------------------------------------------------------------------
147 
148 template <size_t Dim, class T>
149 void
151  int direction,
154  const bool& constInput)
155 {
156  // indicate we're doing another FFT
157  //INCIPPLSTAT(incFFTs);
158 
159  // Check domain of incoming Fields
160  const Layout_t& in_layout = f.getLayout();
161  const Domain_t& in_dom = in_layout.getDomain();
162  const Layout_t& out_layout = g.getLayout();
163  const Domain_t& out_dom = out_layout.getDomain();
164  PAssert_EQ( this->checkDomain(this->getDomain(),in_dom) &&
165  this->checkDomain(this->getDomain(),out_dom), true);
166 
167  // Common loop iterate and other vars:
168  size_t d;
169  int idim; // idim loops over the number of transform dims.
170  int begdim, enddim; // beginning and end of transform dim loop
171  size_t nTransformDims = this->numTransformDims();
172  // Field* for temp Field management:
173  ComplexField_t* temp = &f;
174  // Local work array passed to FFT:
175  Complex_t* localdata;
176 
177  // Loop over the dimensions be transformed:
178  begdim = (direction == +1) ? 0 : static_cast<int>(nTransformDims-1);
179  enddim = (direction == +1) ? static_cast<int>(nTransformDims) : -1;
180  for (idim = begdim; idim != enddim; idim += direction) {
181 
182  // Now do the serial transforms along this dimension:
183 
184  bool skipTranspose = false;
185  // if this is the first transform dimension, we might be able
186  // to skip the transpose into the first temporary Field
187  if (idim == begdim && !constInput) {
188  // get domain for comparison
189  const Domain_t& first_dom = tempLayouts_m[idim]->getDomain();
190  // check that zeroth axis is the same and is serial
191  // and that there are no guard cells
192  skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
193  (in_dom[0].length() == first_dom[0].length()) &&
194  (in_layout.getDistribution(0) == SERIAL) &&
195  (f.getGC() == FFT<CCTransform,Dim,T>::nullGC) );
196  }
197 
198  // if this is the last transform dimension, we might be able
199  // to skip the last temporary and transpose right into g
200  if (idim == enddim-direction) {
201  // get the domain for comparison
202  const Domain_t& last_dom = tempLayouts_m[idim]->getDomain();
203  // check that zeroth axis is the same and is serial
204  // and that there are no guard cells
205  skipTranspose = ( (out_dom[0].sameBase(last_dom[0])) &&
206  (out_dom[0].length() == last_dom[0].length()) &&
207  (out_layout.getDistribution(0) == SERIAL) &&
208  (g.getGC() == FFT<CCTransform,Dim,T>::nullGC) );
209  }
210 
211  if (!skipTranspose) {
212  // transpose and permute to Field with transform dim first
213  (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] =
214  (*temp)[temp->getLayout().getDomain()];
215 
216  // Compress out previous iterate's storage:
217  if (this->compressTemps() && temp != &f) *temp = 0;
218  temp = tempFields_m[idim]; // Field* management aid
219  }
220  else if (idim == enddim-direction && temp != &g) {
221  // last transform and we can skip the last temporary field
222  // so do the transpose here using g instead
223 
224  // transpose and permute to Field with transform dim first
225  g[out_dom] = (*temp)[temp->getLayout().getDomain()];
226 
227  // Compress out previous iterate's storage:
228  if (this->compressTemps() && temp != &f) *temp = 0;
229  temp = &g; // Field* management aid
230  }
231 
232  // Loop over all the Vnodes, working on the LField in each.
233  typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
234  for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
235 
236  // Get the LField
237  ComplexLField_t* ldf = (*l_i).second.get();
238  // make sure we are uncompressed
239  ldf->Uncompress();
240  // get the raw data pointer
241  localdata = ldf->getP();
242 
243  // Do 1D complex-to-complex FFT's on all the strips in the LField:
244  int nstrips = 1, length = ldf->size(0);
245  for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
246  for (int istrip=0; istrip<nstrips; ++istrip) {
247  // Do the 1D FFT:
248  this->getEngine().callFFT(idim, direction, localdata);
249  // advance the data pointer
250  localdata += length;
251  } // loop over 1D strips
252  } // loop over all the LFields
253 
254  } // loop over all transformed dimensions
255 
256  // skip final assignment and compress if we used g as final temporary
257  if (temp != &g) {
258 
259  // Now assign into output Field, and compress last temp's storage:
260  g[out_dom] = (*temp)[temp->getLayout().getDomain()];
261  if (this->compressTemps() && temp != &f) *temp = 0;
262 
263  }
264 
265  // Normalize:
266  if (direction == +1)
267  g *= Complex_t(this->getNormFact(), 0.0);
268 
269  return;
270 }
271 
272 template <size_t Dim, class T>
273 void
275  int direction,
277 {
278 
279  // indicate we're doing another FFT
280  // INCIPPLSTAT(incFFTs);
281 
282  // Check domain of incoming Field
283  const Layout_t& in_layout = f.getLayout();
284  const Domain_t& in_dom = in_layout.getDomain();
285  PAssert_EQ(this->checkDomain(this->getDomain(),in_dom), true);
286 
287  // Common loop iterate and other vars:
288  size_t d;
289  int idim; // idim loops over the number of transform dims.
290  int begdim, enddim; // beginning and end of transform dim loop
291  size_t nTransformDims = this->numTransformDims();
292  // Field* for temp Field management:
293  ComplexField_t* temp = &f;
294  // Local work array passed to FFT:
295  Complex_t* localdata;
296 
297  // Loop over the dimensions be transformed:
298  begdim = (direction == +1) ? 0 : static_cast<int>(nTransformDims-1);
299  enddim = (direction == +1) ? static_cast<int>(nTransformDims) : -1;
300  for (idim = begdim; idim != enddim; idim += direction) {
301 
302  // Now do the serial transforms along this dimension:
303 
304  bool skipTranspose = false;
305  // if this is the first transform dimension, we might be able
306  // to skip the transpose into the first temporary Field
307  if (idim == begdim) {
308  // get domain for comparison
309  const Domain_t& first_dom = tempLayouts_m[idim]->getDomain();
310  // check that zeroth axis is the same and is serial
311  // and that there are no guard cells
312  skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
313  (in_dom[0].length() == first_dom[0].length()) &&
314  (in_layout.getDistribution(0) == SERIAL) &&
315  (f.getGC() == FFT<CCTransform,Dim,T>::nullGC) );
316  }
317 
318  // if this is the last transform dimension, we might be able
319  // to skip the last temporary and transpose right into f
320  if (idim == enddim-direction) {
321  // get domain for comparison
322  const Domain_t& last_dom = tempLayouts_m[idim]->getDomain();
323  // check that zeroth axis is the same and is serial
324  // and that there are no guard cells
325  skipTranspose = ( (in_dom[0].sameBase(last_dom[0])) &&
326  (in_dom[0].length() == last_dom[0].length()) &&
327  (in_layout.getDistribution(0) == SERIAL) &&
328  (f.getGC() == FFT<CCTransform,Dim,T>::nullGC) );
329  }
330 
331  if (!skipTranspose) {
332  // transpose and permute to Field with transform dim first
333  (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] =
334  (*temp)[temp->getLayout().getDomain()];
335 
336  // Compress out previous iterate's storage:
337  if (this->compressTemps() && temp != &f) *temp = 0;
338  temp = tempFields_m[idim]; // Field* management aid
339  }
340  else if (idim == enddim-direction && temp != &f) {
341  // last transform and we can skip the last temporary field
342  // so do the transpose here using f instead
343 
344  // transpose and permute to Field with transform dim first
345  f[in_dom] = (*temp)[temp->getLayout().getDomain()];
346 
347  // Compress out previous iterate's storage:
348  if (this->compressTemps()) *temp = 0;
349  temp = &f; // Field* management aid
350  }
351 
352  // Loop over all the Vnodes, working on the LField in each.
353  typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
354  for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
355 
356  // Get the LField
357  ComplexLField_t* ldf = (*l_i).second.get();
358  // make sure we are uncompressed
359  ldf->Uncompress();
360  // get the raw data pointer
361  localdata = ldf->getP();
362 
363  // Do 1D complex-to-complex FFT's on all the strips in the LField:
364  int nstrips = 1, length = ldf->size(0);
365  for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
366  for (int istrip=0; istrip<nstrips; ++istrip) {
367  // Do the 1D FFT:
368  this->getEngine().callFFT(idim, direction, localdata);
369  // advance the data pointer
370  localdata += length;
371  } // loop over 1D strips
372  } // loop over all the LFields
373 
374 
375  } // loop over all transformed dimensions
376 
377  // skip final assignment and compress if we used f as final temporary
378  if (temp != &f) {
379 
380  // Now assign back into original Field, and compress last temp's storage:
381  f[in_dom] = (*temp)[temp->getLayout().getDomain()];
382  if (this->compressTemps()) *temp = 0;
383 
384  }
385 
386  // Normalize:
387  if (direction == +1)
388  f *= Complex_t(this->getNormFact(), 0.0);
389 
390  return;
391 }
392 
393 //=============================================================================
394 // 1D FFT CCTransform Constructors
395 //=============================================================================
396 
397 //-----------------------------------------------------------------------------
398 // Create a new FFT object of type CCTransform, with a
399 // given domain. Also specify which dimensions to transform along.
400 //-----------------------------------------------------------------------------
401 
402 template <class T>
404  const typename FFT<CCTransform,1U,T>::Domain_t& cdomain,
405  const bool transformTheseDims[1U], const bool& compressTemps)
406 : FFTBase<1U,T>(FFT<CCTransform,1U,T>::ccFFT, cdomain,
407  transformTheseDims, compressTemps)
408 {
409 
410  // Tau profiling
411 
412 
413 
414 
415  size_t nTransformDims = 1U;
416  // get axis length
417  int length;
418  length = cdomain[0].length();
419 
420  // get transform type for FFT Engine, compute normalization
421  int transformType;
422  transformType = FFTBase<1U,T>::ccFFT; // all transforms are complex-to-complex
423  T& normFact = this->getNormFact();
424  normFact = 1.0 / length;
425 
426  // set up FFT Engine
427  this->getEngine().setup(nTransformDims, &transformType, &length);
428  // set up the temporary fields
429  setup();
430 }
431 
432 //-----------------------------------------------------------------------------
433 // Create a new FFT object of type CCTransform, with a
434 // given domain. Default case of transforming along all dimensions.
435 //-----------------------------------------------------------------------------
436 
437 template <class T>
439  const typename FFT<CCTransform,1U,T>::Domain_t& cdomain,
440  const bool& compressTemps)
441 : FFTBase<1U,T>(FFT<CCTransform,1U,T>::ccFFT, cdomain, compressTemps)
442 {
443 
444  // Tau profiling
445 
446  // get axis length
447  int length;
448  length = cdomain[0].length();
449 
450  // get transform type for FFT Engine, compute normalization
451  int transformType;
452  transformType = FFTBase<1U,T>::ccFFT; // all transforms are complex-to-complex
453  T& normFact = this->getNormFact();
454  normFact = 1.0 / length;
455 
456  // set up FFT Engine
457  this->getEngine().setup(1U, &transformType, &length);
458  // set up the temporary fields
459  setup();
460 }
461 
462 //-----------------------------------------------------------------------------
463 // setup performs all the initializations necessary after the transform
464 // directions have been specified.
465 //-----------------------------------------------------------------------------
466 
467 template <class T>
468 void
470 {
471  // Tau profiling
472 
473  // Get input Field's domain
474  const Domain_t& ndic = this->getDomain();
475  // generate temporary field layout
476  tempLayouts_m = new Layout_t(ndic[0], PARALLEL, 1);
477  // generate temporary Field
478  tempFields_m = new ComplexField_t(*tempLayouts_m);
479  // If user requests no intermediate compression, uncompress right now:
480  if (!this->compressTemps()) tempFields_m->Uncompress();
481 
482  return;
483 }
484 
485 //-----------------------------------------------------------------------------
486 // Destructor
487 //-----------------------------------------------------------------------------
488 
489 template <class T>
491 
492  // Tau profiling
493 
494 
495 
496 
497  // delete temporary fields and field layouts
498  delete tempFields_m;
499  delete tempLayouts_m;
500 }
501 
502 
503 //-----------------------------------------------------------------------------
504 // do the CC FFT; separate input and output fields
505 //-----------------------------------------------------------------------------
506 
507 template <class T>
508 void
510  int direction,
513  const bool& constInput)
514 {
515 
516  // indicate we're doing another FFT
517  // INCIPPLSTAT(incFFTs);
518 
519  // Check domain of incoming Fields
520  const Layout_t& in_layout = f.getLayout();
521  const Domain_t& in_dom = in_layout.getDomain();
522  const Layout_t& out_layout = g.getLayout();
523  const Domain_t& out_dom = out_layout.getDomain();
524  PAssert_EQ( this->checkDomain(this->getDomain(),in_dom) &&
525  this->checkDomain(this->getDomain(),out_dom), true);
526 
527  // Field* for temp Field management:
528  ComplexField_t* temp = &f;
529  // Local work array passed to FFT:
530  Complex_t* localdata;
531 
532 
533  // Now do the serial transforms along this dimension:
534 
535  // get temp domain for comparison
536  const Domain_t& temp_dom = tempLayouts_m->getDomain();
537 
538  bool skipTranspose = false;
539  // if this is the first transform dimension, we might be able
540  // to skip the transpose into the first temporary Field
541  if (!constInput) {
542  // check that zeroth axis is the same, has one vnode,
543  // and that there are no guard cells
544  skipTranspose = ( (in_dom[0].sameBase(temp_dom[0])) &&
545  (in_dom[0].length() == temp_dom[0].length()) &&
546  (in_layout.numVnodes() == 1) &&
547  (f.getGC() == FFT<CCTransform,1U,T>::nullGC) );
548  }
549 
550  bool skipFinal;
551  // we might be able
552  // to skip the last temporary and transpose right into g
553 
554  // check that zeroth axis is the same, has one vnode
555  // and that there are no guard cells
556  skipFinal = ( (out_dom[0].sameBase(temp_dom[0])) &&
557  (out_dom[0].length() == temp_dom[0].length()) &&
558  (out_layout.numVnodes() == 1) &&
559  (g.getGC() == FFT<CCTransform,1U,T>::nullGC) );
560 
561  if (!skipTranspose) {
562  // assign to Field with proper layout
563  (*tempFields_m) = (*temp);
564  temp = tempFields_m; // Field* management aid
565  }
566  if (skipFinal) {
567  // we can skip the last temporary field
568  // so do the transpose here using g instead
569 
570  // assign to Field with proper layout
571  g = (*temp);
572 
573  // Compress out previous iterate's storage:
574  if (this->compressTemps() && temp != &f) *temp = 0;
575  temp = &g; // Field* management aid
576  }
577 
578 
579 
580 
581  // should be only one LField!
582  typename ComplexField_t::const_iterator_if l_i = temp->begin_if();
583  if (l_i != temp->end_if()) {
584  // Get the LField
585  ComplexLField_t* ldf = (*l_i).second.get();
586  // make sure we are uncompressed
587  ldf->Uncompress();
588  // get the raw data pointer
589  localdata = ldf->getP();
590 
591  // Do the 1D FFT:
592  this->getEngine().callFFT(0, direction, localdata);
593  }
594 
595 
596 
597  // skip final assignment and compress if we used g as final temporary
598  if (temp != &g) {
599 
600  // Now assign into output Field, and compress last temp's storage:
601  g = (*temp);
602  if (this->compressTemps() && temp != &f) *temp = 0;
603 
604  }
605 
606  // Normalize:
607  if (direction == +1)
608  g *= Complex_t(this->getNormFact(), 0.0);
609 
610  return;
611 }
612 
613 //-----------------------------------------------------------------------------
614 // "in-place" FFT; specify +1 or -1 to indicate forward or inverse transform.
615 //-----------------------------------------------------------------------------
616 
617 template <class T>
618 void
620  int direction,
622 {
623 
624  // indicate we're doing another FFT
625  // INCIPPLSTAT(incFFTs);
626 
627  // Check domain of incoming Field
628  const Layout_t& in_layout = f.getLayout();
629  const Domain_t& in_dom = in_layout.getDomain();
630  PAssert_EQ(this->checkDomain(this->getDomain(),in_dom), true);
631 
632  // Field* for temp Field management:
633  ComplexField_t* temp = &f;
634  // Local work array passed to FFT:
635  Complex_t* localdata;
636 
637 
638  // Now do the serial transforms along this dimension:
639 
640  // get domain for comparison
641  const Domain_t& temp_dom = tempLayouts_m->getDomain();
642 
643  bool skipTranspose;
644  // we might be able
645  // to skip the transpose into the first temporary Field
646 
647  // check that zeroth axis is the same, has one vnode,
648  // and that there are no guard cells
649  skipTranspose = ( (in_dom[0].sameBase(temp_dom[0])) &&
650  (in_dom[0].length() == temp_dom[0].length()) &&
651  (in_layout.numVnodes() == 1) &&
652  (f.getGC() == FFT<CCTransform,1U,T>::nullGC) );
653 
654  if (!skipTranspose) {
655  // assign to Field with proper layout
656  (*tempFields_m) = (*temp);
657  temp = tempFields_m; // Field* management aid
658  }
659 
660 
661 
662 
663  // should be only one LField!
664  typename ComplexField_t::const_iterator_if l_i = temp->begin_if();
665  if (l_i != temp->end_if()) {
666  // Get the LField
667  ComplexLField_t* ldf = (*l_i).second.get();
668  // make sure we are uncompressed
669  ldf->Uncompress();
670  // get the raw data pointer
671  localdata = ldf->getP();
672 
673  // Do the 1D FFT:
674  this->getEngine().callFFT(0, direction, localdata);
675  }
676 
677 
678 
679  // skip final assignment and compress if we used f as final temporary
680  if (temp != &f) {
681 
682  // Now assign back into original Field, and compress last temp's storage:
683  f = (*temp);
684  if (this->compressTemps()) *temp = 0;
685 
686  }
687 
688  // Normalize:
689  if (direction == +1)
690  f *= Complex_t(this->getNormFact(), 0.0);
691 
692  return;
693 }
694 
695 
696 
697 //=============================================================================
698 // FFT RCTransform Constructors
699 //=============================================================================
700 
701 //-----------------------------------------------------------------------------
702 // Create a new FFT object of type RCTransform, with a
703 // given domain. Also specify which dimensions to transform along.
704 // Note that RC transform of a real array of length n results in a
705 // complex array of length n/2+1.
706 //-----------------------------------------------------------------------------
707 
708 template <size_t Dim, class T>
710  const typename FFT<RCTransform,Dim,T>::Domain_t& rdomain,
711  const typename FFT<RCTransform,Dim,T>::Domain_t& cdomain,
712  const bool transformTheseDims[Dim], const bool& compressTemps)
713 : FFTBase<Dim,T>(FFT<RCTransform,Dim,T>::rcFFT, rdomain,
714  transformTheseDims, compressTemps),
715  complexDomain_m(cdomain), serialAxes_m(1)
716 {
717  // construct array of axis lengths
718  size_t nTransformDims = this->numTransformDims();
719  int* lengths = new int[nTransformDims];
720  size_t d;
721  for (d=0; d<nTransformDims; ++d)
722  lengths[d] = rdomain[this->activeDimension(d)].length();
723 
724  // construct array of transform types for FFT Engine, compute normalization
725  int* transformTypes = new int[nTransformDims];
726  T& normFact = this->getNormFact();
727  normFact = 1.0;
728  transformTypes[0] = FFTBase<Dim,T>::rcFFT; // first transform is real-to-complex
729  normFact /= lengths[0];
730  for (d=1; d<nTransformDims; ++d) {
731  transformTypes[d] = FFTBase<Dim,T>::ccFFT; // all other transforms are complex-to-complex
732  normFact /= lengths[d];
733  }
734 
735  // set up FFT Engine
736  this->getEngine().setup(nTransformDims, transformTypes, lengths);
737  delete [] transformTypes;
738  delete [] lengths;
739 
740  // set up the temporary fields
741  setup();
742 }
743 
744 //-----------------------------------------------------------------------------
745 // Create a new FFT object of type RCTransform, with
746 // given real and complex domains. Default: transform along all dimensions.
747 //-----------------------------------------------------------------------------
748 
749 template <size_t Dim, class T>
750 FFT<RCTransform,Dim,T>::FFT(
751  const typename FFT<RCTransform,Dim,T>::Domain_t& rdomain,
752  const typename FFT<RCTransform,Dim,T>::Domain_t& cdomain,
753  const bool& compressTemps,
754  int serialAxes)
755 : FFTBase<Dim,T>(FFT<RCTransform,Dim,T>::rcFFT, rdomain, compressTemps),
756  complexDomain_m(cdomain), serialAxes_m(serialAxes)
757 {
758  // Tau profiling
759 
760  // construct array of axis lengths
761  int lengths[Dim];
762  size_t d;
763  for (d=0; d<Dim; ++d)
764  lengths[d] = rdomain[d].length();
765 
766  // construct array of transform types for FFT Engine, compute normalization
767  int transformTypes[Dim];
768  T& normFact = this->getNormFact();
769  normFact = 1.0;
770  transformTypes[0] = FFTBase<Dim,T>::rcFFT; // first transform is real-to-complex
771  normFact /= lengths[0];
772  for (d=1; d<Dim; ++d) {
773  transformTypes[d] = FFTBase<Dim,T>::ccFFT; // all other transforms are complex-to-complex
774  normFact /= lengths[d];
775  }
776 
777  // set up FFT Engine
778  this->getEngine().setup(Dim, transformTypes, lengths);
779 
780  // set up the temporary fields
781  setup();
782 }
783 
784 //-----------------------------------------------------------------------------
785 // setup performs all the initializations necessary after the transform
786 // directions have been specified.
787 //-----------------------------------------------------------------------------
788 
789 template <size_t Dim, class T>
790 void
792 
793  // Tau profiling
794 
795 
796 
797 
798  PAssert_GT(serialAxes_m, 0);
799  PAssert_LT((size_t) serialAxes_m, Dim);
800 
801  size_t d, d2, activeDim;
802  size_t nTransformDims = this->numTransformDims();
803 
804  // Set up the arrays of temporary Fields and FieldLayouts:
805 
806  // make first dimension(s) always SERIAL, all other dimensions parallel
807  // for the real FFT; make first serialAxes_m axes serial for others
808  e_dim_tag serialParallel[Dim];
809  e_dim_tag NserialParallel[Dim];
810  for (d=0; d < Dim; ++d) {
811  serialParallel[d] = (d == 0 ? SERIAL : PARALLEL);
812  NserialParallel[d] = (d < (size_t) serialAxes_m ? SERIAL : PARALLEL);
813  }
814 
815  // check that domain lengths agree between real and complex domains
816  const Domain_t& domain = this->getDomain();
817  activeDim = this->activeDimension(0);
818  bool match = true;
819  for (d=0; d<Dim; ++d) {
820  if (d == activeDim) {
821  // real array length n, complex array length n/2+1
822  if ( complexDomain_m[d].length() !=
823  (domain[d].length()/2 + 1) ) match = false;
824  }
825  else {
826  // real and complex arrays should be same length for all other dims
827  if (complexDomain_m[d].length() != domain[d].length()) match = false;
828  }
829  }
830  PInsist(match,
831  "Domains provided for real and complex Fields are incompatible!");
832 
833  // allocate arrays of temp fields and layouts for complex fields
834  tempLayouts_m = new Layout_t*[nTransformDims];
835  tempFields_m = new ComplexField_t*[nTransformDims];
836 
837  // set up the single temporary real field, with first dim serial, others par
838 
839  // make new domains with permuted Indexes, activeDim first
840  Domain_t ndip;
841  Domain_t ndipc;
842  ndip[0] = domain[activeDim];
843  ndipc[0] = complexDomain_m[activeDim];
844  for (d=1; d<Dim; ++d) {
845  size_t nextDim = activeDim + d;
846  if (nextDim >= Dim) nextDim -= Dim;
847  ndip[d] = domain[nextDim];
848  ndipc[d] = complexDomain_m[nextDim];
849  }
850 
851  // generate layout and object for temporary real field
852  tempRLayout_m = new Layout_t(ndip, serialParallel, this->transVnodes());
853  tempRField_m = new RealField_t(*tempRLayout_m);
854 
855  // generate layout and object for first temporary complex Field
856  tempLayouts_m[0] = new Layout_t(ndipc, serialParallel, this->transVnodes());
857  tempFields_m[0] = new ComplexField_t(*tempLayouts_m[0]);
858 
859  // determine the order in which dimensions will be transposed. Put
860  // the transposed dims first, and the others at the end.
861  int fftorder[Dim], tmporder[Dim];
862  int nofft = nTransformDims;
863  for (d=0; d < nTransformDims; ++d)
864  fftorder[d] = this->activeDimension(d);
865  for (d=0; d < Dim; ++d) {
866  // see if the dth dimension is one to transform
867  bool active = false;
868  for (d2=0; d2 < nTransformDims; ++d2) {
869  if (this->activeDimension(d2) == d) {
870  active = true;
871  break;
872  }
873  }
874 
875  if (!active)
876  // no it is not; put it at the bottom of list
877  fftorder[nofft++] = d;
878  }
879 
880  // But since the first FFT is done on a S,[P,P,...] field, permute
881  // the order of this to get the first activeDimension at the end.
882  nofft = fftorder[0];
883  for (d=0; d < (Dim - 1); ++d)
884  fftorder[d] = fftorder[d+1];
885  fftorder[Dim-1] = nofft;
886 
887  // now construct the remaining temporary complex fields
888 
889  // loop through and create actual permuted layouts, and also fields
890  size_t dim = 1; // already have one temp field
891  while (dim < nTransformDims) {
892 
893  int sp;
894  for (sp=0; sp < serialAxes_m && dim < nTransformDims; ++sp, ++dim) {
895 
896  // make new domain with permuted Indexes
897  for (d=0; d < Dim; ++d)
898  ndip[d] = complexDomain_m[fftorder[d]];
899 
900  // generate layout and object for temporary complex Field
901  tempLayouts_m[dim] = new Layout_t(ndip, NserialParallel, this->transVnodes());
902  tempFields_m[dim] = new ComplexField_t(*tempLayouts_m[dim]);
903 
904  // permute the fft order for the first 'serialAxes_m' axes
905  if (serialAxes_m > 1) {
906  tmporder[0] = fftorder[0];
907  for (d=0; d < (size_t) (serialAxes_m-1); ++d)
908  fftorder[d] = fftorder[d+1];
909  fftorder[serialAxes_m - 1] = tmporder[0];
910  }
911  }
912 
913  // now, permute ALL the axes by serialAxes_m steps, to get the next
914  // set of axes in the first n serial slots
915  for (d=0; d < Dim; ++d)
916  tmporder[d] = fftorder[d];
917  for (d=0; d < Dim; ++d)
918  fftorder[d] = tmporder[(d + serialAxes_m) % Dim];
919  }
920 }
921 
922 
923 //-----------------------------------------------------------------------------
924 // Destructor
925 //-----------------------------------------------------------------------------
926 
927 template <size_t Dim, class T>
929 
930  // Tau profiling
931 
932 
933  // delete temporary fields and layouts
934  size_t nTransformDims = this->numTransformDims();
935  for (size_t d=0; d<nTransformDims; ++d) {
936  delete tempFields_m[d];
937  delete tempLayouts_m[d];
938  }
939  delete [] tempFields_m;
940  delete [] tempLayouts_m;
941  delete tempRField_m;
942  delete tempRLayout_m;
943 
944 }
945 
946 //-----------------------------------------------------------------------------
947 // real-to-complex fft; direction is +1 or -1
948 //-----------------------------------------------------------------------------
949 
950 /*
951  gpu version of fft if dks enabled transfers realfield_t to gpu, allocates memory
952  on gpu for result field (complex), does the fft and returns
953 */
954 #ifdef IPPL_DKS
955 template <size_t Dim, class T>
956 void
958  int direction,
960  void* real_ptr,
961  void* comp_ptr,
962  DKSOPAL &dksbase,
963  int streamId,
964  const bool& constInput)
965 {
966  //check the domain of incoming field
967  const Layout_t& in_layout = f.getLayout();
968  const Domain_t& in_dom = in_layout.getDomain();
969 
970  PAssert_EQ( this->checkDomain(this->getDomain(), in_dom), true);
971 
972  size_t nTransformDims = this->numTransformDims();
973 
974  //*** just use f field as is and keep decomposition as defined in input file ***//
975  RealField_t* tempR = &f;
976 
977  typename RealField_t::const_iterator_if rl_i = tempR->begin_if();
978 
979  // get the lfields
980  RealLField_t* rldf = (*rl_i).second.get();
981  // make sure we are uncompressed
982  rldf->Uncompress();
983  // get the raw data pointers
984  T* localreal = rldf->getP();
985 
988  int NR_l[Dim], NR_g[Dim], NC_g[Dim];
989  for (size_t d = 0; d < Dim; d++) {
990  NR_l[d] = (int)rldf->size(d);
991  NR_g[d] = (int)tempR->getDomain()[d].length();
992  NC_g[d] = NR_g[d];
993  }
994  NC_g[0] = (NC_g[0] / 2) + 1;
995 
996  //get global and local domain sizes
997  int sizereal = NR_l[0]*NR_l[1]*NR_l[2];
998  int totalreal = tempR->getDomain().size();
999  //int totalcomp = NC_g[0]*NC_g[1]*NC_g[2];
1000 
1001  //local vnodes get starting position for real field subdomains
1002  int *idx = new int[Ippl::getNodes()];
1003  int *idy = new int[Ippl::getNodes()];
1004  int *idz = new int[Ippl::getNodes()];
1005  for (typename Layout_t::const_iterator_iv i_s = tempR->getLayout().begin_iv(); i_s != tempR->getLayout().end_iv(); ++i_s) {
1006  Domain_t tmp = (*i_s).second->getDomain();
1007  int node = (*i_s).second->getNode();
1008  idx[node] = tmp[0].min();
1009  idy[node] = tmp[1].min();
1010  idz[node] = tmp[2].min();
1011  }
1012 
1013  //remote vnodes get starting position for real field subdomains
1014  for (typename Layout_t::iterator_dv remote = tempR->getLayout().begin_rdv(); remote != tempR->getLayout().end_rdv(); ++remote) {
1015  Domain_t tmp = (*remote).second->getDomain();
1016  int node = (*remote).second->getNode();
1017  idx[node] = tmp[0].min();
1018  idy[node] = tmp[1].min();
1019  idz[node] = tmp[2].min();
1020  }
1021 
1022  int id[3] = {idx[Ippl::myNode()], idy[Ippl::myNode()], idz[Ippl::myNode()]};
1023 
1024  if (Ippl::myNode() == 0) {
1025 
1026  //if only one node is working do dksbase write otherwise use cuda aware mpi
1027  if (Ippl::getNodes() > 1) {
1028 
1029  if (streamId == -1) {
1030  //gather data from different mpi processes directly into gpu buffer
1031  dksbase.gather3DData( real_ptr, localreal, sizereal, MPI_DOUBLE, NR_g, NR_l,
1032  idx, idy, idz,
1034  } else {
1035  //gather data using CUDA IPC for async data transfer
1036  dksbase.gather3DDataAsync<T>( real_ptr, localreal, NR_g, NR_l, id, streamId);
1037  //sync needed to wait for data transfer to finish
1038  dksbase.syncDevice();
1039  MPI_Barrier(Ippl::getComm());
1040  }
1041 
1042  } else {
1043  //write real data to device
1044  dksbase.writeDataAsync<T>(real_ptr, localreal, totalreal, streamId);
1045  //dksbase.writeData<T>(real_ptr, localreal, totalreal);
1046  }
1047 
1048  //call real to complex fft
1049  dksbase.callR2CFFT(real_ptr, comp_ptr, nTransformDims, (int*)NR_g, streamId);
1050 
1051  //normalize fft
1052  if (direction == +1)
1053  dksbase.callNormalizeFFT(comp_ptr, nTransformDims, (int*) NC_g, streamId);
1054 
1055  } else {
1056  if (streamId == -1) {
1057  //send data via gatherv to gpu controled by root process
1058  dksbase.gather3DData( NULL, localreal, sizereal, MPI_DOUBLE, NR_g, NR_l, idx, idy, idz,
1060  } else {
1061  //transfer data to device memory
1062  dksbase.gather3DDataAsync<T>( real_ptr, localreal, NR_g, NR_l, id, streamId);
1063  //sync needed to wait for data transfer to finish
1064  dksbase.syncDevice();
1065  MPI_Barrier(Ippl::getComm());
1066  }
1067 
1068  }
1069  /* end dks part */
1070 
1071  // finish timing the whole mess
1072 }
1073 #endif
1074 
1075 template <size_t Dim, class T>
1076 void
1078  int direction,
1081  const bool& constInput)
1082 {
1083  FFTDBG(Inform tmsg("FFT-RC-forward"));
1084 
1085  // time the whole mess
1086 
1087  // indicate we're doing another fft
1088  // incipplstat(incffts);
1089 
1090  // check domain of incoming fields
1091  const Layout_t& in_layout = f.getLayout();
1092  const Domain_t& in_dom = in_layout.getDomain();
1093  const Layout_t& out_layout = g.getLayout();
1094  const Domain_t& out_dom = out_layout.getDomain();
1095 
1096 
1097  PAssert_EQ( this->checkDomain(this->getDomain(),in_dom) &&
1098  this->checkDomain(complexDomain_m,out_dom), true);
1099 
1100  // common loop iterate and other vars:
1101  size_t d;
1102  size_t idim; // idim loops over the number of transform dims.
1103  size_t nTransformDims = this->numTransformDims();
1104 
1105  // handle first rc transform separately
1106  idim = 0;
1107 
1108  RealField_t* tempR = tempRField_m; // field* management aid
1109  if (!constInput) {
1110  // see if we can use input field f as a temporary
1111  bool skipTemp = true;
1112 
1113  // more rigorous match required here; check that layouts are identical
1114  if ( !(in_layout == *tempRLayout_m) ) {
1115  skipTemp = false;
1116  } else {
1117  // make sure distributions match
1118  for (d=0; d<Dim; ++d)
1119  if (in_layout.getDistribution(d) != tempRLayout_m->getDistribution(d))
1120  skipTemp = false;
1121 
1122  // make sure vnode counts match
1123  if (in_layout.numVnodes() != tempRLayout_m->numVnodes())
1124  skipTemp = false;
1125 
1126  // also make sure there are no guard cells
1127  if (!(f.getGC() == FFT<RCTransform,Dim,T>::nullGC))
1128  skipTemp = false;
1129  }
1130 
1131  // if we can skip using this temporary, set the tempr pointer to the
1132  // original incoming field. otherwise, it will stay pointing at the
1133  // temporary real field, and we'll need to do a transpose of the data
1134  // from the original into the temporary.
1135  if (skipTemp)
1136  tempR = &f;
1137  }
1138 
1139  // if we're not using input as a temporary ...
1140  if (tempR != &f) {
1141 
1142 
1143  // transpose AND PERMUTE TO REAL FIELD WITH TRANSFORM DIM FIRST
1144  FFTDBG(tmsg << "doing transpose of real field into temporary ");
1145  FFTDBG(tmsg << "with layout = " << tempR->getLayout() << std::endl);
1146  (*tempR)[tempR->getDomain()] = f[in_dom];
1147 
1148  }
1149 
1150  // field* for temp field management:
1151  ComplexField_t* temp = tempFields_m[0];
1152 
1153  // see if we can put final result directly into g. this is useful if
1154  // we're doing just a 1d fft of one dimension of a multi-dimensional field.
1155  if (nTransformDims == 1) { // only a single rc transform
1156  bool skipTemp = true;
1157 
1158  // more rigorous match required here; check that layouts are identical
1159  if (!(out_layout == *tempLayouts_m[0])) {
1160  skipTemp = false;
1161  } else {
1162  for (d=0; d<Dim; ++d)
1163  if (out_layout.getDistribution(d) !=
1164  tempLayouts_m[0]->getDistribution(d))
1165  skipTemp = false;
1166 
1167  if ( out_layout.numVnodes() != tempLayouts_m[0]->numVnodes() )
1168  skipTemp = false;
1169 
1170  // also make sure there are no guard cells
1171  if (!(g.getGC() == FFT<RCTransform,Dim,T>::nullGC))
1172  skipTemp = false;
1173 
1174  // if we can skip using the temporary, set the pointer to the output
1175  // field for the first fft to the second provided field (g)
1176  if (skipTemp)
1177  temp = &g;
1178  }
1179  }
1180 
1181  FFTDBG(tmsg << "doing real->complex fft of first dimension ..." << std::endl);
1182 
1183  // loop over all the vnodes, working on the lfield in each.
1184  typename RealField_t::const_iterator_if rl_i, rl_end = tempR->end_if();
1185  typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
1186  for (rl_i = tempR->begin_if(); rl_i != rl_end; ++rl_i, ++cl_i) {
1187  // get the lfields
1188  RealLField_t* rldf = (*rl_i).second.get();
1189  ComplexLField_t* cldf = (*cl_i).second.get();
1190 
1191  // make sure we are uncompressed
1192  rldf->Uncompress();
1193  cldf->Uncompress();
1194 
1195  // get the raw data pointers
1196  T* localreal = rldf->getP();
1197  Complex_t* localcomp = cldf->getP();
1198 
1199  // number of strips should be the same for real and complex lfields!
1200  int nstrips = 1, lengthreal = rldf->size(0), lengthcomp = cldf->size(0);
1201  for (d=1; d<Dim; ++d)
1202  nstrips *= rldf->size(d);
1203 
1204 
1205  for (int istrip=0; istrip<nstrips; ++istrip) {
1206  // move the data into the complex strip, which is two reals longer
1207  for (int ilen=0; ilen<lengthreal; ilen+=2) {
1208  localcomp[ilen/2] = Complex_t(localreal[ilen],localreal[ilen+1]);
1209  }
1210 
1211  // do the 1d real-to-complex fft:
1212  // note that real-to-complex fft direction is always +1
1213  this->getEngine().callFFT(idim, +1, localcomp);
1214 
1215  // advance the data pointers
1216  localreal += lengthreal;
1217  localcomp += lengthcomp;
1218  } // loop over 1d strips
1219 
1220  } // loop over all the lfields
1221 
1222  // compress temporary storage
1223  if (this->compressTemps() && tempR != &f)
1224  *tempR = 0;
1225 
1226  // now proceed with the other complex-to-complex transforms
1227 
1228  // local work array passed to fft:
1229  Complex_t* localdata;
1230 
1231  // loop over the remaining dimensions to be transformed:
1232  for (idim = 1; idim < nTransformDims; ++idim) {
1233 
1234  bool skipTranspose = false;
1235 
1236  // if this is the last transform dimension, we might be able
1237  // to skip the last temporary and transpose right into g
1238  if (idim == nTransformDims-1) {
1239  // get the domain for comparison
1240  const Domain_t& last_dom = tempLayouts_m[idim]->getDomain();
1241 
1242  // make sure there are no guard cells, and that the first
1243  // axis matches what we expect and is serial. only need to
1244  // check first axis since we're just fft'ing that one dimension.
1245  skipTranspose = (g.getGC() == FFT<RCTransform,Dim,T>::nullGC &&
1246  out_dom[0].sameBase(last_dom[0]) &&
1247  out_dom[0].length() == last_dom[0].length() &&
1248  out_layout.getDistribution(0) == SERIAL);
1249  }
1250 
1251  if (!skipTranspose) {
1252  // transpose and permute to field with transform dim first
1253  FFTDBG(tmsg << "doing complex->complex transpose into field ");
1254  FFTDBG(tmsg << "with layout = " << tempFields_m[idim]->getLayout());
1255  FFTDBG(tmsg << std::endl);
1256 
1257  (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] =
1258  (*temp)[temp->getLayout().getDomain()];
1259 
1260  // compress out previous iterate's storage:
1261  if (this->compressTemps())
1262  *temp = 0;
1263  temp = tempFields_m[idim]; // field* management aid
1264 
1265  } else if (idim == nTransformDims-1) {
1266  // last transform and we can skip the last temporary field
1267  // so do the transpose here using g instead
1268 
1269  // transpose and permute to field with transform dim first
1270  FFTDBG(tmsg << "doing final complex->complex transpose ");
1271  FFTDBG(tmsg << "into return ");
1272  FFTDBG(tmsg << "with layout = " << g.getLayout());
1273  FFTDBG(tmsg << std::endl);
1274 
1275  g[out_dom] = (*temp)[temp->getLayout().getDomain()];
1276 
1277  // compress out previous iterate's storage:
1278  if (this->compressTemps())
1279  *temp = 0;
1280  temp = &g; // field* management aid
1281 
1282  }
1283 
1284 
1285  FFTDBG(tmsg << "doing complex->complex fft of other dimension .." << std::endl);
1286 
1287  // loop over all the vnodes, working on the lfield in each.
1288  typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
1289  for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
1290  // get the lfield
1291  ComplexLField_t* ldf = (*l_i).second.get();
1292 
1293  // make sure we are uncompressed
1294  ldf->Uncompress();
1295 
1296  // get the raw data pointer
1297  localdata = ldf->getP();
1298 
1299  // do 1d complex-to-complex fft's on all the strips in the lfield:
1300  int nstrips = 1, length = ldf->size(0);
1301  for (d=1; d<Dim; ++d)
1302  nstrips *= ldf->size(d);
1303 
1304  for (int istrip=0; istrip<nstrips; ++istrip) {
1305  // do the 1D FFT:
1306  //this->getEngine().callFFT(idim, direction, localdata);
1307  this->getEngine().callFFT(idim, +1, localdata);
1308 
1309  // advance the data pointer
1310  localdata += length;
1311  } // loop over 1D strips
1312  } // loop over all the LFields
1313 
1314  } // loop over all transformed dimensions
1315 
1316 
1317  // skip final assignment and compress if we used g as final temporary
1318  if (temp != &g) {
1319 
1320 
1321  // Now assign into output Field, and compress last temp's storage:
1322  FFTDBG(tmsg << "Doing cleanup complex->complex transpose ");
1323  FFTDBG(tmsg << "into return ");
1324  FFTDBG(tmsg << "with layout = " << g.getLayout());
1325  FFTDBG(tmsg << std::endl);
1326 
1327  g[out_dom] = (*temp)[temp->getLayout().getDomain()];
1328 
1329  if (this->compressTemps()) *temp = 0;
1330 
1331  }
1332 
1333  // Normalize:
1334  if (direction == +1) g = g * this->getNormFact();
1335 
1336  // finish timing the whole mess
1337 
1338 }
1339 //#endif
1340 
1341 //-----------------------------------------------------------------------------
1342 // RC FFT; opposite direction, from complex to real
1343 //-----------------------------------------------------------------------------
1344 
1345 
1346 /*
1347  GPU version of CR inverse FFT uses complex field stored on GPU to perform inverse fft
1348  transfers back the real field
1349 */
1350 #ifdef IPPL_DKS
1351 template <size_t Dim, class T>
1352 void
1354  int direction,
1355  RealField_t& g,
1356  void* real_ptr,
1357  void* comp_ptr,
1358  DKSOPAL &dksbase,
1359  int streamId,
1360  const bool& constInput)
1361 {
1362 
1363  const Layout_t& out_layout = g.getLayout();
1364  const Domain_t& out_dom = out_layout.getDomain();
1365 
1366  //if (Ippl::myNode() == 0)
1367  // std::cout << "DEBUG INVERSE g: " << g.getLayout() << std::endl;
1368 
1369  PAssert_EQ( this->checkDomain(this->getDomain(),out_dom), true);
1370 
1371  size_t nTransformDims = this->numTransformDims();
1372 
1373  // see if we can put final result directly into g
1374  RealField_t* tempR;
1375 
1376  //***Use g as is and keep decomposition as defined in input file***/
1377  tempR = &g;
1378 
1379  typename RealField_t::const_iterator_if rl_i = tempR->begin_if();
1380 
1381  // Get the LFields
1382  RealLField_t* rldf = (*rl_i).second.get();
1383  // make sure we are uncompressed
1384  rldf->Uncompress();
1385 
1386  // get the raw data pointers
1387  T* localreal = rldf->getP();
1388 
1389  //get sizes of global domains and local subdomains
1390  int NR_l[Dim], NR_g[Dim], NC_g[Dim];
1391  for (size_t d=0; d<Dim; d++) {
1392  NR_l[d] = (int)rldf->size(d);
1393  NR_g[d] = (int)tempR->getDomain()[d].length();
1394  NC_g[d] = NR_g[d];
1395  }
1396  NC_g[0] = (NC_g[0] / 2) + 1;
1397 
1398  //get sizes of global and local domains
1399  int totalreal = tempR->getDomain().size();
1400 
1401  //local vnodes get starting position for real field subdomains
1402  int *idx = new int[Ippl::getNodes()];
1403  int *idy = new int[Ippl::getNodes()];
1404  int *idz = new int[Ippl::getNodes()];
1405  for (typename Layout_t::const_iterator_iv i_s = tempR->getLayout().begin_iv(); i_s != tempR->getLayout().end_iv(); ++i_s) {
1406  Domain_t tmp = (*i_s).second->getDomain();
1407  int node = (*i_s).second->getNode();
1408  idx[node] = tmp[0].min();
1409  idy[node] = tmp[1].min();
1410  idz[node] = tmp[2].min();
1411  }
1412 
1413  //remote vnodes get starting position for real field subdomains
1414  for (typename Layout_t::iterator_dv remote = tempR->getLayout().begin_rdv(); remote != tempR->getLayout().end_rdv(); ++remote) {
1415  Domain_t tmp = (*remote).second->getDomain();
1416  int node = (*remote).second->getNode();
1417  idx[node] = tmp[0].min();
1418  idy[node] = tmp[1].min();
1419  idz[node] = tmp[2].min();
1420  }
1421 
1422  int id[3] = {idx[Ippl::myNode()], idy[Ippl::myNode()], idz[Ippl::myNode()]};
1423 
1424  /* DKS part */
1425  if (Ippl::myNode() == 0) {
1426 
1427  //call real to complex fft
1428  dksbase.callC2RFFT(real_ptr, comp_ptr, nTransformDims, (int*)NR_g, streamId);
1429 
1430  //normalize
1431  if (direction == +1)
1432  dksbase.callNormalizeC2RFFT(real_ptr, nTransformDims, (int*)NR_g, streamId);
1433 
1434  if (Ippl::getNodes() > 1) {
1435  dksbase.syncDevice();
1436  MPI_Barrier(Ippl::getComm());
1437  /*
1438  dksbase.scatter3DData(real_ptr, localreal, sizereal, MPI_DOUBLE, NR_g, NR_l,
1439  idx, idy, idz, Ippl::getNodes(), Ippl::myNode(),
1440  0, Ippl::getComm() );
1441  */
1442  dksbase.scatter3DDataAsync<T>(real_ptr, localreal, NR_g, NR_l, id);
1443  MPI_Barrier(Ippl::getComm());
1444  dksbase.syncDevice();
1445  MPI_Barrier(Ippl::getComm());
1446  } else {
1447  //read real data from device
1448  dksbase.readDataAsync<T>(real_ptr, localreal, totalreal, streamId);
1449  dksbase.syncDevice();
1450  //dksbase.readData<T>(real_ptr, localreal, totalreal);
1451  }
1452 
1453  } else {
1454 
1455  //receive data from GPU controled by root process
1456  MPI_Barrier(Ippl::getComm());
1457  /*
1458  dksbase.scatter3DData(NULL, localreal, sizereal, MPI_DOUBLE, NR_g, NR_l, idx, idy, idz,
1459  Ippl::getNodes(), Ippl::myNode(), 0, Ippl::getComm() );
1460  */
1461  dksbase.scatter3DDataAsync<T>(real_ptr, localreal, NR_g, NR_l, id);
1462  MPI_Barrier(Ippl::getComm());
1463  dksbase.syncDevice();
1464  MPI_Barrier(Ippl::getComm());
1465  }
1466  /* end dks part */
1467 
1468  // Now assign into output Field, and compress last temp's storage:
1469  if (tempR != &g) {
1470  g[out_dom] = (*tempR)[tempR->getLayout().getDomain()];
1471  if (this->compressTemps()) *tempR = 0;
1472  }
1473 
1474  // finish timing the whole mess
1475 }
1476 #endif
1477 
1478 
1479 template <size_t Dim, class T>
1480 void
1482  int direction,
1485  const bool& constInput)
1486 {
1487  FFTDBG(Inform tmsg("FFT-RC-reverse"));
1488 
1489  // indicate we're doing another FFT
1490  // INCIPPLSTAT(incFFTs);
1491 
1492  // Check domain of incoming Fields
1493  const Layout_t& in_layout = f.getLayout();
1494  const Domain_t& in_dom = in_layout.getDomain();
1495  const Layout_t& out_layout = g.getLayout();
1496  const Domain_t& out_dom = out_layout.getDomain();
1497  PAssert_EQ( this->checkDomain(complexDomain_m,in_dom) &&
1498  this->checkDomain(this->getDomain(),out_dom), true);
1499 
1500  // Common loop iterate and other vars:
1501  size_t d;
1502  size_t idim; // idim loops over the number of transform dims.
1503  size_t nTransformDims = this->numTransformDims();
1504 
1505  // proceed with the complex-to-complex transforms
1506 
1507  // Field* for temp Field management:
1508  ComplexField_t* temp = &f;
1509 
1510  // Local work array passed to FFT:
1511  Complex_t* localdata;
1512 
1513  // Loop over all dimensions to be transformed except last one:
1514  for (idim = nTransformDims-1; idim != 0; --idim) {
1515 
1516  // Now do the serial transforms along this dimension:
1517 
1518  bool skipTranspose = false;
1519  // if this is the first transform dimension, we might be able
1520  // to skip the transpose into the first temporary Field
1521  if (idim == nTransformDims-1 && !constInput) {
1522  // get domain for comparison
1523  const Domain_t& first_dom = tempLayouts_m[idim]->getDomain();
1524  // check that zeroth axis is the same and is serial
1525  // and that there are no guard cells
1526  skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
1527  (in_dom[0].length() == first_dom[0].length()) &&
1528  (in_layout.getDistribution(0) == SERIAL) &&
1529  (f.getGC() == FFT<RCTransform,Dim,T>::nullGC) );
1530  }
1531 
1532  if (!skipTranspose) {
1533  // transpose and permute to Field with transform dim first
1534  FFTDBG(tmsg << "Doing complex->complex transpose into field ");
1535  FFTDBG(tmsg << "with layout = "<<tempFields_m[idim]->getLayout()<<std::endl);
1536  (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] =
1537  (*temp)[temp->getLayout().getDomain()];
1538 
1539  // Compress out previous iterate's storage:
1540  if (this->compressTemps() && temp != &f)
1541  *temp = 0;
1542  temp = tempFields_m[idim]; // Field* management aid
1543  }
1544 
1545  FFTDBG(tmsg << "Doing complex->complex fft of other dimension .." << std::endl);
1546 
1547 
1548  // Loop over all the Vnodes, working on the LField in each.
1549  typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
1550  for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
1551 
1552  // Get the LField
1553  ComplexLField_t* ldf = (*l_i).second.get();
1554 
1555  // make sure we are uncompressed
1556  ldf->Uncompress();
1557 
1558  // get the raw data pointer
1559  localdata = ldf->getP();
1560 
1561  // Do 1D complex-to-complex FFT's on all the strips in the LField:
1562  int nstrips = 1, length = ldf->size(0);
1563  for (d=1; d<Dim; ++d)
1564  nstrips *= ldf->size(d);
1565 
1566  for (int istrip=0; istrip<nstrips; ++istrip) {
1567  // Do the 1D FFT:
1568  //this->getEngine().callFFT(idim, direction, localdata);
1569  this->getEngine().callFFT(idim, -1, localdata);
1570 
1571  // advance the data pointer
1572  localdata += length;
1573  } // loop over 1D strips
1574  } // loop over all the LFields
1575 
1576  } // loop over all transformed dimensions
1577 
1578  // handle last CR transform separately
1579  idim = 0;
1580 
1581  // see if we can put final result directly into g
1582  RealField_t* tempR;
1583  bool skipTemp = true;
1584 
1585  // more rigorous match required here; check that layouts are identical
1586  if (!(out_layout == *tempRLayout_m)) {
1587  skipTemp = false;
1588  } else {
1589  for (d=0; d<Dim; ++d)
1590  if (out_layout.getDistribution(d) != tempRLayout_m->getDistribution(d))
1591  skipTemp = false;
1592 
1593  if ( out_layout.numVnodes() != tempRLayout_m->numVnodes() )
1594  skipTemp = false;
1595 
1596  // also make sure there are no guard cells
1597  if (!(g.getGC() == FFT<RCTransform,Dim,T>::nullGC))
1598  skipTemp = false;
1599  }
1600 
1601  if (skipTemp)
1602  tempR = &g;
1603  else
1604  tempR = tempRField_m;
1605 
1606  skipTemp = true;
1607  if (nTransformDims == 1 && !constInput) {
1608  // only one CR transform
1609  // see if we really need to transpose input data
1610  // more rigorous match required here; check that layouts are identical
1611  if (!(in_layout == *tempLayouts_m[0])) {
1612  skipTemp = false;
1613  } else {
1614  for (d=0; d<Dim; ++d)
1615  if (in_layout.getDistribution(d) !=
1616  tempLayouts_m[0]->getDistribution(d))
1617  skipTemp = false;
1618 
1619  if ( in_layout.numVnodes() != tempLayouts_m[0]->numVnodes() )
1620  skipTemp = false;
1621 
1622  // also make sure there are no guard cells
1623  if (!(f.getGC() == FFT<RCTransform,Dim,T>::nullGC))
1624  skipTemp = false;
1625  }
1626  } else { // cannot skip transpose
1627  skipTemp = false;
1628  }
1629 
1630 
1631  if (!skipTemp) {
1632  // transpose and permute to complex Field with transform dim first
1633  FFTDBG(tmsg << "Doing final complex->complex transpose into field ");
1634  FFTDBG(tmsg << "with layout = "<<tempFields_m[0]->getLayout()<<std::endl);
1635  (*tempFields_m[0])[tempLayouts_m[0]->getDomain()] =
1636  (*temp)[temp->getLayout().getDomain()];
1637 
1638  // compress previous iterates storage
1639  if (this->compressTemps() && temp != &f)
1640  *temp = 0;
1641  temp = tempFields_m[0];
1642  }
1643 
1644 
1645  FFTDBG(tmsg << "Doing complex->real fft of final dimension ..." << std::endl);
1646 
1647  // Loop over all the Vnodes, working on the LField in each.
1648  typename RealField_t::const_iterator_if rl_i, rl_end = tempR->end_if();
1649  typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
1650  for (rl_i = tempR->begin_if(); rl_i != rl_end; ++rl_i, ++cl_i) {
1651  // Get the LFields
1652  RealLField_t* rldf = (*rl_i).second.get();
1653  ComplexLField_t* cldf = (*cl_i).second.get();
1654 
1655  // make sure we are uncompressed
1656  rldf->Uncompress();
1657  cldf->Uncompress();
1658 
1659  // get the raw data pointers
1660  T* localreal = rldf->getP();
1661  Complex_t* localcomp = cldf->getP();
1662 
1663  // number of strips should be the same for real and complex LFields!
1664  int nstrips = 1, lengthreal = rldf->size(0), lengthcomp = cldf->size(0);
1665  for (d=1; d<Dim; ++d)
1666  nstrips *= rldf->size(d);
1667 
1668  for (int istrip=0; istrip<nstrips; ++istrip) {
1669  // Do the 1D complex-to-real FFT:
1670  // note that complex-to-real FFT direction is always -1
1671  this->getEngine().callFFT(idim, -1, localcomp);
1672 
1673  // move the data into the real strip, which is two reals shorter
1674  for (int ilen=0; ilen<lengthreal; ilen+=2) {
1675  localreal[ilen] = real(localcomp[ilen/2]);
1676  localreal[ilen+1] = imag(localcomp[ilen/2]);
1677  }
1678 
1679  // advance the data pointers
1680  localreal += lengthreal;
1681  localcomp += lengthcomp;
1682  } // loop over 1D strips
1683  } // loop over all the LFields
1684 
1685  // compress previous iterates storage
1686  if (this->compressTemps() && temp != &f)
1687  *temp = 0;
1688 
1689  // skip final assignment and compress if we used g as final temporary
1690  if (tempR != &g) {
1691 
1692 
1693  // Now assign into output Field, and compress last temp's storage:
1694  FFTDBG(tmsg << "Doing cleanup real->real transpose into return ");
1695  FFTDBG(tmsg << "with layout = " << g.getLayout() << std::endl);
1696  g[out_dom] = (*tempR)[tempR->getLayout().getDomain()];
1697 
1698  if (this->compressTemps())
1699  *tempR = 0;
1700 
1701  }
1702 
1703  // Normalize:
1704  if (direction == +1) g = g * this->getNormFact();
1705 
1706  // finish up timing the whole mess
1707 }
1708 
1709 //=============================================================================
1710 // 1D FFT RCTransform Constructors
1711 //=============================================================================
1712 
1713 //-----------------------------------------------------------------------------
1714 // Create a new FFT object of type RCTransform, with a
1715 // given domain. Also specify which dimensions to transform along.
1716 // Note that RC transform of a real array of length n results in a
1717 // complex array of length n/2+1.
1718 //-----------------------------------------------------------------------------
1719 
1720 template <class T>
1722  const typename FFT<RCTransform,1U,T>::Domain_t& rdomain,
1723  const typename FFT<RCTransform,1U,T>::Domain_t& cdomain,
1724  const bool transformTheseDims[1U], const bool& compressTemps)
1725 : FFTBase<1U,T>(FFT<RCTransform,1U,T>::rcFFT, rdomain,
1726  transformTheseDims, compressTemps),
1727  complexDomain_m(cdomain)
1728 {
1729  size_t nTransformDims = 1U;
1730  // get axis length
1731  int length;
1732  length = rdomain[0].length();
1733 
1734  // get transform type for FFT Engine, compute normalization
1735  int transformType;
1736  transformType = FFTBase<1U,T>::rcFFT; // first transform is real-to-complex
1737  T& normFact = this->getNormFact();
1738  normFact = 1.0 / length;
1739 
1740  // set up FFT Engine
1741  this->getEngine().setup(nTransformDims, &transformType, &length);
1742  // set up the temporary fields
1743  setup();
1744 }
1745 
1746 //-----------------------------------------------------------------------------
1747 // Create a new FFT object of type RCTransform, with
1748 // given real and complex domains. Default: transform along all dimensions.
1749 //-----------------------------------------------------------------------------
1750 
1751 template <class T>
1753  const typename FFT<RCTransform,1U,T>::Domain_t& rdomain,
1754  const typename FFT<RCTransform,1U,T>::Domain_t& cdomain,
1755  const bool& compressTemps)
1756 : FFTBase<1U,T>(FFT<RCTransform,1U,T>::rcFFT, rdomain, compressTemps),
1757  complexDomain_m(cdomain)
1758 {
1759  // Tau profiling
1760 
1761 
1762 
1763 
1764  // get axis length
1765  int length;
1766  length = rdomain[0].length();
1767 
1768  // get transform type for FFT Engine, compute normalization
1769  int transformType;
1770  transformType = FFTBase<1U,T>::rcFFT; // first transform is real-to-complex
1771  T& normFact = this->getNormFact();
1772  normFact = 1.0 / length;
1773 
1774  // set up FFT Engine
1775  this->getEngine().setup(1U, &transformType, &length);
1776  // set up the temporary fields
1777  setup();
1778 }
1779 
1780 //-----------------------------------------------------------------------------
1781 // setup performs all the initializations necessary after the transform
1782 // directions have been specified.
1783 //-----------------------------------------------------------------------------
1784 
1785 template <class T>
1786 void
1788 
1789  // Tau profiling
1790 
1791 
1792 
1793 
1794  // check that domain lengths agree between real and complex domains
1795  const Domain_t& domain = this->getDomain();
1796  bool match = true;
1797  // real array length n, complex array length n/2+1
1798  if ( complexDomain_m[0].length() !=
1799  (domain[0].length()/2 + 1) ) match = false;
1800  PInsist(match,
1801  "Domains provided for real and complex Fields are incompatible!");
1802 
1803  // generate layout for temporary real Field
1804  tempRLayout_m = new Layout_t(domain[0], PARALLEL, 1);
1805  // create temporary real Field
1806  this->tempRField_m = new RealField_t(*tempRLayout_m);
1807  // If user requests no intermediate compression, uncompress right now:
1808  if (!this->compressTemps()) this->tempRField_m->Uncompress();
1809 
1810  // generate temporary field layout
1811  tempLayouts_m = new Layout_t(complexDomain_m[0], PARALLEL, 1);
1812  // create temporary complex Field
1813  tempFields_m = new ComplexField_t(*tempLayouts_m);
1814  // If user requests no intermediate compression, uncompress right now:
1815  if (!this->compressTemps()) this->tempFields_m->Uncompress();
1816 
1817  return;
1818 }
1819 
1820 //-----------------------------------------------------------------------------
1821 // Destructor
1822 //-----------------------------------------------------------------------------
1823 
1824 template <class T>
1826 
1827  // Tau profiling
1828 
1829 
1830 
1831 
1832  // delete temporary fields and layouts
1833  delete tempFields_m;
1834  delete tempLayouts_m;
1835  delete this->tempRField_m;
1836  delete tempRLayout_m;
1837 }
1838 
1839 //-----------------------------------------------------------------------------
1840 // real-to-complex FFT; direction is +1 or -1
1841 //-----------------------------------------------------------------------------
1842 
1843 template <class T>
1844 void
1846  int direction,
1849  const bool& constInput)
1850 {
1851 
1852  // indicate we're doing another FFT
1853  //INCIPPLSTAT(incFFTs);
1854 
1855  // Check domain of incoming Fields
1856  const Layout_t& in_layout = f.getLayout();
1857  const Domain_t& in_dom = in_layout.getDomain();
1858  const Layout_t& out_layout = g.getLayout();
1859  const Domain_t& out_dom = out_layout.getDomain();
1860  PAssert_EQ( this->checkDomain(this->getDomain(),in_dom) &&
1861  this->checkDomain(complexDomain_m,out_dom), true);
1862 
1863  // Common loop iterate and other vars:
1864  RealField_t* tempR = this->tempRField_m; // Field* management aid
1865  if (!constInput) {
1866  // see if we can use input field f as a temporary
1867  bool skipTemp = true;
1868  // more rigorous match required here; check that layouts are identical
1869  if ( !(in_layout == *tempRLayout_m) ) skipTemp = false;
1870  if ( in_layout.getDistribution(0) !=
1871  tempRLayout_m->getDistribution(0) ) skipTemp = false;
1872  if ( in_layout.numVnodes() != tempRLayout_m->numVnodes() )
1873  skipTemp = false;
1874  // also make sure there are no guard cells
1875  if (!(f.getGC() == FFT<RCTransform,1U,T>::nullGC)) skipTemp = false;
1876  if (skipTemp) tempR = &f;
1877  }
1878 
1879  if (tempR != &f) { // not using input as a temporary
1880 
1881  // assign to real Field with proper layout
1882  (*tempR) = f;
1883 
1884  }
1885 
1886  // Field* for temp Field management:
1887  ComplexField_t* temp = tempFields_m;
1888  // see if we can put final result directly into g
1889  bool skipFinal = true;
1890  // more rigorous match required here; check that layouts are identical
1891  if ( !(out_layout == *tempLayouts_m) ) skipFinal = false;
1892  if ( out_layout.getDistribution(0) !=
1893  tempLayouts_m->getDistribution(0) ) skipFinal = false;
1894  if ( out_layout.numVnodes() != tempLayouts_m->numVnodes() )
1895  skipFinal = false;
1896  // also make sure there are no guard cells
1897  if (!(g.getGC() == FFT<RCTransform,1U,T>::nullGC)) skipFinal = false;
1898  if (skipFinal) temp = &g;
1899 
1900 
1901  // There should be just one vnode!
1902  typename RealField_t::const_iterator_if rl_i = tempR->begin_if();
1903  typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
1904  if (rl_i != tempR->end_if() && cl_i != temp->end_if()) {
1905  // Get the LFields
1906  RealLField_t* rldf = (*rl_i).second.get();
1907  ComplexLField_t* cldf = (*cl_i).second.get();
1908  // make sure we are uncompressed
1909  rldf->Uncompress();
1910  cldf->Uncompress();
1911  // get the raw data pointers
1912  T* localreal = rldf->getP();
1913  Complex_t* localcomp = cldf->getP();
1914 
1915  int lengthreal = rldf->size(0);
1916  // move the data into the complex strip, which is two reals longer
1917  for (int ilen=0; ilen<lengthreal; ilen+=2)
1918  localcomp[ilen/2] = Complex_t(localreal[ilen],localreal[ilen+1]);
1919  // Do the 1D real-to-complex FFT:
1920  // note that real-to-complex FFT direction is always +1
1921  this->getEngine().callFFT(0, +1, localcomp);
1922  }
1923 
1924 
1925 
1926  // compress temporary storage
1927  if (this->compressTemps() && tempR != &f) *tempR = 0;
1928 
1929 
1930  // skip final assignment and compress if we used g as final temporary
1931  if (temp != &g) {
1932 
1933  // Now assign into output Field, and compress last temp's storage:
1934  g = (*temp);
1935  if (this->compressTemps()) *temp = 0;
1936 
1937  }
1938 
1939  // Normalize:
1940  if (direction == +1) g = g * this->getNormFact();
1941 
1942  return;
1943 }
1944 
1945 //-----------------------------------------------------------------------------
1946 // RC FFT; opposite direction, from complex to real
1947 //-----------------------------------------------------------------------------
1948 
1949 template <class T>
1950 void
1952  int direction,
1955  const bool& constInput)
1956 {
1957 
1958  // indicate we're doing another FFT
1959  //INCIPPLSTAT(incFFTs);
1960 
1961  // Check domain of incoming Fields
1962  const Layout_t& in_layout = f.getLayout();
1963  const Domain_t& in_dom = in_layout.getDomain();
1964  const Layout_t& out_layout = g.getLayout();
1965  const Domain_t& out_dom = out_layout.getDomain();
1966  PAssert_EQ( this->checkDomain(complexDomain_m,in_dom) &&
1967  this->checkDomain(this->getDomain(),out_dom), true);
1968 
1969  // Field* for temp Field management:
1970  ComplexField_t* temp = &f;
1971 
1972  // see if we can put final result directly into g
1973  RealField_t* tempR;
1974  bool skipFinal = true;
1975  // more rigorous match required here; check that layouts are identical
1976  if ( !(out_layout == *tempRLayout_m) ) skipFinal = false;
1977  if ( out_layout.getDistribution(0) !=
1978  tempRLayout_m->getDistribution(0) ) skipFinal = false;
1979  if ( out_layout.numVnodes() != tempRLayout_m->numVnodes() )
1980  skipFinal = false;
1981  // also make sure there are no guard cells
1982  if (!(g.getGC() == FFT<RCTransform,1U,T>::nullGC)) skipFinal = false;
1983  if (skipFinal)
1984  tempR = &g;
1985  else
1986  tempR = this->tempRField_m;
1987 
1988  bool skipTemp = true;
1989  if (!constInput) {
1990  // only one CR transform
1991  // see if we really need to transpose input data
1992  // more rigorous match required here; check that layouts are identical
1993  if ( !(in_layout == *tempLayouts_m) ) skipTemp = false;
1994  if ( in_layout.getDistribution(0) !=
1995  tempLayouts_m->getDistribution(0) ) skipTemp = false;
1996  if ( in_layout.numVnodes() != tempLayouts_m->numVnodes() )
1997  skipTemp = false;
1998  // also make sure there are no guard cells
1999  if (!(f.getGC() == FFT<RCTransform,1U,T>::nullGC)) skipTemp = false;
2000  }
2001  else { // cannot skip transpose
2002  skipTemp = false;
2003  }
2004 
2005 
2006  if (!skipTemp) {
2007  // assign to complex Field with proper layout
2008  (*tempFields_m) = (*temp);
2009  // compress previous iterates storage
2010  if (this->compressTemps() && temp != &f) *temp = 0;
2011  temp = tempFields_m;
2012  }
2013 
2014 
2015 
2016  // There should be just one vnode!
2017  typename RealField_t::const_iterator_if rl_i = tempR->begin_if();
2018  typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
2019  if (rl_i != tempR->end_if() && cl_i != temp->end_if()) {
2020  // Get the LFields
2021  RealLField_t* rldf = (*rl_i).second.get();
2022  ComplexLField_t* cldf = (*cl_i).second.get();
2023  // make sure we are uncompressed
2024  rldf->Uncompress();
2025  cldf->Uncompress();
2026  // get the raw data pointers
2027  T* localreal = rldf->getP();
2028  Complex_t* localcomp = cldf->getP();
2029 
2030  int lengthreal = rldf->size(0);
2031  // Do the 1D complex-to-real FFT:
2032  // note that complex-to-real FFT direction is always -1
2033  this->getEngine().callFFT(0, -1, localcomp);
2034  // move the data into the real strip, which is two reals shorter
2035  for (int ilen=0; ilen<lengthreal; ilen+=2) {
2036  localreal[ilen] = real(localcomp[ilen/2]);
2037  localreal[ilen+1] = imag(localcomp[ilen/2]);
2038  }
2039  }
2040 
2041 
2042 
2043  // compress previous iterates storage
2044  if (this->compressTemps() && temp != &f) *temp = 0;
2045 
2046 
2047  // skip final assignment and compress if we used g as final temporary
2048  if (tempR != &g) {
2049 
2050  // Now assign into output Field, and compress last temp's storage:
2051  g = (*tempR);
2052  if (this->compressTemps()) *tempR = 0;
2053 
2054  }
2055 
2056  // Normalize:
2057  if (direction == +1) g = g * this->getNormFact();
2058 
2059  return;
2060 }
2061 
2062 
2063 //=============================================================================
2064 // FFT SineTransform Constructors
2065 //=============================================================================
2066 
2067 //-----------------------------------------------------------------------------
2068 // Create a new FFT object of type SineTransform, with given real and
2069 // complex field domains. Also specify which dimensions to transform along,
2070 // and which of these are sine transforms.
2071 // Note that RC transform of a real array of length n results in a
2072 // complex array of length n/2+1.
2073 //-----------------------------------------------------------------------------
2074 
2075 template <size_t Dim, class T>
2077  const typename FFT<SineTransform,Dim,T>::Domain_t& rdomain,
2078  const typename FFT<SineTransform,Dim,T>::Domain_t& cdomain,
2079  const bool transformTheseDims[Dim], const bool sineTransformDims[Dim],
2080  const bool& compressTemps)
2081 : FFTBase<Dim,T>(FFT<SineTransform,Dim,T>::sineFFT, rdomain,
2082  transformTheseDims, compressTemps),
2083  complexDomain_m(&cdomain)
2084 {
2085 
2086  size_t d;
2087  // store which dimensions get sine transforms and count how many
2088  numSineTransforms_m = 0;
2089  for (d=0; d<Dim; ++d) {
2090  sineTransformDims_m[d] = sineTransformDims[d];
2091  if (sineTransformDims[d]) {
2092  PAssert_EQ(transformTheseDims[d], true); // should be marked as a transform dim
2093  ++numSineTransforms_m;
2094  }
2095  }
2096 
2097  // construct array of axis lengths for all transform dims
2098  size_t nTransformDims = this->numTransformDims();
2099  int* lengths = new int[nTransformDims];
2100  for (d=0; d<nTransformDims; ++d)
2101  lengths[d] = rdomain[this->activeDimension(d)].length();
2102 
2103  // construct array of transform types for FFT Engine, compute normalization
2104  int* transformTypes = new int[nTransformDims];
2105  T& normFact = this->getNormFact();
2106  normFact = 1.0;
2107  bool foundRC = false;
2108  for (d=0; d<nTransformDims; ++d) {
2109  if (sineTransformDims_m[this->activeDimension(d)]) {
2110  transformTypes[d] = FFTBase<Dim,T>::sineFFT; // sine transform
2111  normFact /= (2.0 * (lengths[d] + 1));
2112  }
2113  else if (!foundRC) {
2114  transformTypes[d] = FFTBase<Dim,T>::rcFFT; // real-to-complex FFT
2115  normFact /= lengths[d];
2116  foundRC = true;
2117  }
2118  else {
2119  transformTypes[d] = FFTBase<Dim,T>::ccFFT; // complex-to-complex FFT
2120  normFact /= lengths[d];
2121  }
2122  }
2123 
2124  // set up FFT Engine
2125  this->getEngine().setup(nTransformDims, transformTypes, lengths);
2126  delete [] transformTypes;
2127  delete [] lengths;
2128  // set up the temporary fields
2129  setup();
2130 }
2131 
2132 //-----------------------------------------------------------------------------
2133 // Create a new FFT object of type SineTransform, with
2134 // given real and complex domains. Default: transform along all dimensions.
2135 //-----------------------------------------------------------------------------
2136 
2137 template <size_t Dim, class T>
2138 FFT<SineTransform,Dim,T>::FFT(
2139  const typename FFT<SineTransform,Dim,T>::Domain_t& rdomain,
2140  const typename FFT<SineTransform,Dim,T>::Domain_t& cdomain,
2141  const bool sineTransformDims[Dim], const bool& compressTemps)
2142 : FFTBase<Dim,T>(FFT<SineTransform,Dim,T>::sineFFT, rdomain, compressTemps),
2143  complexDomain_m(&cdomain)
2144 {
2145 
2146  size_t d;
2147  // store which dimensions get sine transforms and count how many
2148  numSineTransforms_m = 0;
2149  for (d=0; d<Dim; ++d) {
2150  sineTransformDims_m[d] = sineTransformDims[d];
2151  if (sineTransformDims[d]) ++numSineTransforms_m;
2152  }
2153 
2154  // construct array of axis lengths for all transform dims
2155  int lengths[Dim];
2156  for (d=0; d<Dim; ++d)
2157  lengths[d] = rdomain[d].length();
2158 
2159  // construct array of transform types for FFT Engine, compute normalization
2160  int transformTypes[Dim];
2161  T& normFact = this->getNormFact();
2162  normFact = 1.0;
2163  bool foundRC = false;
2164  for (d=0; d<Dim; ++d) {
2165  if (sineTransformDims_m[d]) {
2166  transformTypes[d] = FFTBase<Dim,T>::sineFFT; // sine transform
2167  normFact /= (2.0 * (lengths[d] + 1));
2168  }
2169  else if (!foundRC) {
2170  transformTypes[d] = FFTBase<Dim,T>::rcFFT; // real-to-complex FFT
2171  normFact /= lengths[d];
2172  foundRC = true;
2173  }
2174  else {
2175  transformTypes[d] = FFTBase<Dim,T>::ccFFT; // complex-to-complex FFT
2176  normFact /= lengths[d];
2177  }
2178  }
2179 
2180  // set up FFT Engine
2181  this->getEngine().setup(Dim, transformTypes, lengths);
2182  // set up the temporary fields
2183  setup();
2184 }
2185 
2186 //-----------------------------------------------------------------------------
2187 // Constructor for doing only sine transforms.
2188 // Create a new FFT object of type SineTransform, with given real field
2189 // domain. Also specify which dimensions to sine transform along.
2190 //-----------------------------------------------------------------------------
2191 
2192 template <size_t Dim, class T>
2193 FFT<SineTransform,Dim,T>::FFT(
2194  const typename FFT<SineTransform,Dim,T>::Domain_t& rdomain,
2195  const bool sineTransformDims[Dim], const bool& compressTemps)
2196 : FFTBase<Dim,T>(FFT<SineTransform,Dim,T>::sineFFT, rdomain,
2197  sineTransformDims, compressTemps)
2198 {
2199  // Tau profiling
2200 
2201 
2202 
2203 
2204  // store which dimensions get sine transforms and how many
2205  numSineTransforms_m = this->numTransformDims();
2206  size_t d;
2207  for (d=0; d<Dim; ++d)
2208  sineTransformDims_m[d] = sineTransformDims[d];
2209 
2210  // construct array of axis lengths
2211  int* lengths = new int[numSineTransforms_m];
2212  for (d=0; d<numSineTransforms_m; ++d)
2213  lengths[d] = rdomain[this->activeDimension(d)].length();
2214 
2215  // construct array of transform types for FFT Engine, compute normalization
2216  int* transformTypes = new int[numSineTransforms_m];
2217  T& normFact = this->getNormFact();
2218  normFact = 1.0;
2219  for (d=0; d<numSineTransforms_m; ++d) {
2220  transformTypes[d] = FFTBase<Dim,T>::sineFFT; // sine transform
2221  normFact /= (2.0 * (lengths[d] + 1));
2222  }
2223 
2224  // set up FFT Engine
2225  this->getEngine().setup(numSineTransforms_m, transformTypes, lengths);
2226  delete [] transformTypes;
2227  delete [] lengths;
2228  // set up the temporary fields
2229  setup();
2230 }
2231 
2232 //-----------------------------------------------------------------------------
2233 // Create a new FFT object of type SineTransform, with
2234 // given real domain. Default: sine transform along all dimensions.
2235 //-----------------------------------------------------------------------------
2236 
2237 template <size_t Dim, class T>
2238 FFT<SineTransform,Dim,T>::FFT(
2239  const typename FFT<SineTransform,Dim,T>::Domain_t& rdomain, const bool& compressTemps)
2240 : FFTBase<Dim,T>(FFT<SineTransform,Dim,T>::sineFFT, rdomain, compressTemps)
2241 {
2242  // Tau profiling
2243 
2244 
2245 
2246 
2247  size_t d;
2248  // store which dimensions get sine transforms and how many
2249  numSineTransforms_m = this->numTransformDims();
2250  for (d=0; d<Dim; ++d)
2251  sineTransformDims_m[d] = true;
2252 
2253  // construct array of axis lengths
2254  int lengths[Dim];
2255  for (d=0; d<Dim; ++d)
2256  lengths[d] = rdomain[d].length();
2257 
2258  // construct array of transform types for FFT Engine, compute normalization
2259  int transformTypes[Dim];
2260  T& normFact = this->getNormFact();
2261  normFact = 1.0;
2262  for (d=0; d<Dim; ++d) {
2263  transformTypes[d] = FFTBase<Dim,T>::sineFFT; // sine transform
2264  normFact /= (2.0 * (lengths[d] + 1));
2265  }
2266 
2267  // set up FFT Engine
2268  this->getEngine().setup(Dim, transformTypes, lengths);
2269  // set up the temporary fields
2270  setup();
2271 }
2272 
2273 //-----------------------------------------------------------------------------
2274 // setup performs all the initializations necessary after the transform
2275 // directions have been specified.
2276 //-----------------------------------------------------------------------------
2277 
2278 template <size_t Dim, class T>
2279 void
2281 
2282  // Tau profiling
2283 
2284 
2285 
2286 
2287  size_t d, dim, activeDim = 0;
2288  size_t icount;
2289  Domain_t ndip;
2290  size_t nTransformDims = this->numTransformDims(); // total number of transforms
2291  const Domain_t& domain = this->getDomain(); // get real Field domain
2292 
2293  // Set up the arrays of temporary Fields and FieldLayouts:
2294  e_dim_tag serialParallel[Dim]; // Specifies SERIAL, PARALLEL dims in temp
2295  // make zeroth dimension always SERIAL
2296  serialParallel[0] = SERIAL;
2297  // all other dimensions parallel
2298  for (d=1; d<Dim; ++d)
2299  serialParallel[d] = PARALLEL;
2300 
2301  // do we have a real-to-complex transform to do or not?
2302  if (nTransformDims > numSineTransforms_m) { // have RC transform
2303 
2304  PAssert(complexDomain_m); // This pointer should be initialized!
2305  // find first non-sine transform dimension; this is rc transform
2306  bool match = false;
2307  d=0;
2308  while (d<Dim && !match) {
2309  if (this->transformDim(d) && !sineTransformDims_m[d]) {
2310  activeDim = this->activeDimension(d);
2311  match = true;
2312  }
2313  ++d;
2314  }
2315  PAssert_EQ(match, true); // check that we found rc transform dimension
2316  // compare lengths of real and complex Field domains
2317  for (d=0; d<Dim; ++d) {
2318  if (d == activeDim) {
2319  // real array length n, complex array length n/2+1
2320  if ( (*complexDomain_m)[d].length() !=
2321  (domain[d].length()/2 + 1) ) match = false;
2322  }
2323  else {
2324  // real and complex arrays should be same length for all other dims
2325  if ( (*complexDomain_m)[d].length() !=
2326  domain[d].length() ) match = false;
2327  }
2328  }
2329  PInsist(match,
2330  "Domains provided for real and complex Fields are incompatible!");
2331 
2332  // set up the real Fields first
2333  // we will have one for each sine transform, plus one for the rc transform
2334  tempRLayouts_m = new Layout_t*[numSineTransforms_m+1];
2335  tempRFields_m = new RealField_t*[numSineTransforms_m+1];
2336  // loop over the sine transform dimensions
2337  icount=0;
2338  for (dim=0; dim<numSineTransforms_m; ++dim, ++icount) {
2339  // get next dimension to be sine transformed
2340  while (!sineTransformDims_m[icount]) ++icount;
2341  PAssert_LT(icount, Dim); // check that icount is valid dimension
2342  // make new domain with permuted Indexes, icount first
2343  ndip[0] = domain[icount];
2344  for (d=1; d<Dim; ++d) {
2345  size_t nextDim = icount + d;
2346  if (nextDim >= Dim) nextDim -= Dim;
2347  ndip[d] = domain[nextDim];
2348  }
2349  // generate temporary field layout
2350  tempRLayouts_m[dim] = new Layout_t(ndip, serialParallel, this->transVnodes());
2351  // create temporary real Field
2352  tempRFields_m[dim] = new RealField_t(*tempRLayouts_m[dim]);
2353  // If user requests no intermediate compression, uncompress right now:
2354  if (!this->compressTemps()) (*tempRFields_m[dim]).Uncompress();
2355  }
2356 
2357  // build final real Field for rc transform along activeDim
2358  ndip[0] = domain[activeDim];
2359  for (d=1; d<Dim; ++d) {
2360  size_t nextDim = activeDim + d;
2361  if (nextDim >= Dim) nextDim -= Dim;
2362  ndip[d] = domain[nextDim];
2363  }
2364  // generate temporary field layout
2365  tempRLayouts_m[numSineTransforms_m] = new Layout_t(ndip, serialParallel,
2366  this->transVnodes());
2367  // create temporary real Field
2368  tempRFields_m[numSineTransforms_m] =
2369  new RealField_t(*tempRLayouts_m[numSineTransforms_m]);
2370  // If user requests no intermediate compression, uncompress right now:
2371  if (!this->compressTemps()) (*tempRFields_m[numSineTransforms_m]).Uncompress();
2372 
2373  // now create the temporary complex Fields
2374  size_t numComplex = nTransformDims - numSineTransforms_m;
2375  // allocate arrays of temp fields and layouts
2376  tempLayouts_m = new Layout_t*[numComplex];
2377  tempFields_m = new ComplexField_t*[numComplex];
2378  icount=0; // reset counter
2379  for (dim=0; dim<numComplex; ++dim, ++icount) {
2380  // get next non-sine transform dimension
2381  while (!this->transformDim(icount) || sineTransformDims_m[icount]) ++icount;
2382  PAssert_LT(icount, Dim); // check that this is a valid dimension
2383  // make new domain with permuted Indexes, icount first
2384  ndip[0] = (*complexDomain_m)[icount];
2385  for (d=1; d<Dim; ++d) {
2386  size_t nextDim = icount + d;
2387  if (nextDim >= Dim) nextDim -= Dim;
2388  ndip[d] = (*complexDomain_m)[nextDim];
2389  }
2390  // generate temporary field layout
2391  tempLayouts_m[dim] = new Layout_t(ndip, serialParallel, this->transVnodes());
2392  // create temporary complex Field
2393  tempFields_m[dim] = new ComplexField_t(*tempLayouts_m[dim]);
2394  // If user requests no intermediate compression, uncompress right now:
2395  if (!this->compressTemps()) (*tempFields_m[dim]).Uncompress();
2396  }
2397 
2398  }
2399  else { // sine transforms only
2400 
2401  // set up the real Fields
2402  // we will have one for each sine transform
2403  tempRLayouts_m = new Layout_t*[numSineTransforms_m];
2404  tempRFields_m = new RealField_t*[numSineTransforms_m];
2405  // loop over the sine transform dimensions
2406  for (dim=0; dim<numSineTransforms_m; ++dim) {
2407  // get next dimension to be sine transformed
2408  activeDim = this->activeDimension(dim);
2409  // make new domain with permuted Indexes, activeDim first
2410  ndip[0] = domain[activeDim];
2411  for (d=1; d<Dim; ++d) {
2412  size_t nextDim = activeDim + d;
2413  if (nextDim >= Dim) nextDim -= Dim;
2414  ndip[d] = domain[nextDim];
2415  }
2416  // generate temporary field layout
2417  tempRLayouts_m[dim] = new Layout_t(ndip, serialParallel, this->transVnodes());
2418  // create temporary real Field
2419  tempRFields_m[dim] = new RealField_t(*tempRLayouts_m[dim]);
2420  // If user requests no intermediate compression, uncompress right now:
2421  if (!this->compressTemps()) (*tempRFields_m[dim]).Uncompress();
2422  }
2423 
2424  }
2425 
2426  return;
2427 }
2428 
2429 //-----------------------------------------------------------------------------
2430 // Destructor
2431 //-----------------------------------------------------------------------------
2432 
2433 template <size_t Dim, class T>
2435 
2436  // Tau profiling
2437 
2438 
2439 
2440 
2441  // delete temporary fields and layouts
2442  size_t d;
2443  size_t nTransformDims = this->numTransformDims();
2444  if (nTransformDims > numSineTransforms_m) {
2445  for (d=0; d<numSineTransforms_m+1; ++d) {
2446  delete tempRFields_m[d];
2447  delete tempRLayouts_m[d];
2448  }
2449  size_t numComplex = nTransformDims - numSineTransforms_m;
2450  for (d=0; d<numComplex; ++d) {
2451  delete tempFields_m[d];
2452  delete tempLayouts_m[d];
2453  }
2454  delete [] tempFields_m;
2455  delete [] tempLayouts_m;
2456  }
2457  else {
2458  for (d=0; d<numSineTransforms_m; ++d) {
2459  delete tempRFields_m[d];
2460  delete tempRLayouts_m[d];
2461  }
2462  }
2463  delete [] tempRFields_m;
2464  delete [] tempRLayouts_m;
2465 }
2466 
2467 //-----------------------------------------------------------------------------
2468 // Sine and RC FFT; separate input and output fields, direction is +1 or -1
2469 //-----------------------------------------------------------------------------
2470 
2471 template <size_t Dim, class T>
2472 void
2474  int direction,
2477  const bool& constInput)
2478 {
2479 
2480  // indicate we're doing another FFT
2481  //INCIPPLSTAT(incFFTs);
2482 
2483  // Check domain of incoming Fields
2484  const Layout_t& in_layout = f.getLayout();
2485  const Domain_t& in_dom = in_layout.getDomain();
2486  const Layout_t& out_layout = g.getLayout();
2487  const Domain_t& out_dom = out_layout.getDomain();
2488  PAssert_EQ( this->checkDomain(this->getDomain(),in_dom) &&
2489  this->checkDomain(*complexDomain_m,out_dom), true );
2490 
2491  // Common loop iterate and other vars:
2492  size_t d;
2493  int icount, activeDim;
2494  int idim; // idim loops over the number of transform dims.
2495  size_t nTransformDims = this->numTransformDims();
2496  // check that there is a real-to-complex transform to do
2497  PInsist(nTransformDims>numSineTransforms_m,
2498  "Wrong output Field type for real-to-real transform!!");
2499 
2500  // first do all the sine transforms
2501 
2502  // Field* management aid
2503  RealField_t* tempR = &f;
2504  // Local work array passed to FFT:
2505  T* localdataR;
2506 
2507  // Loop over the dimensions to be sine transformed:
2508  icount = 0;
2509  activeDim = 0;
2510  for (idim = 0; idim != numSineTransforms_m; ++idim, ++icount, ++activeDim) {
2511 
2512  // find next sine transform dim
2513  while (!sineTransformDims_m[icount]) {
2514  if (this->transformDim(icount)) ++activeDim;
2515  ++icount;
2516  }
2517  PAssert_LT(activeDim, Dim); // check that this is a valid dimension!
2518  // Now do the serial transforms along this dimension:
2519 
2520  bool skipTranspose = false;
2521  // if this is the first transform dimension, we might be able
2522  // to skip the transpose into the first temporary Field
2523  if (idim == 0 && !constInput) {
2524  // get domain for comparison
2525  const Domain_t& first_dom = tempRLayouts_m[idim]->getDomain();
2526  // check that zeroth axis is the same and is serial
2527  // and that there are no guard cells
2528  skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
2529  (in_dom[0].length() == first_dom[0].length()) &&
2530  (in_layout.getDistribution(0) == SERIAL) &&
2531  (f.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
2532  }
2533 
2534  if (!skipTranspose) {
2535  // transpose and permute to Field with transform dim first
2536  (*tempRFields_m[idim])[tempRLayouts_m[idim]->getDomain()] =
2537  (*tempR)[tempR->getLayout().getDomain()];
2538 
2539  // Compress out previous iterate's storage:
2540  if (this->compressTemps() && tempR != &f) *tempR = 0;
2541  tempR = tempRFields_m[idim]; // Field* management aid
2542  }
2543 
2544 
2545 
2546  // Loop over all the Vnodes, working on the LField in each.
2547  typename RealField_t::const_iterator_if l_i, l_end = tempR->end_if();
2548  for (l_i = tempR->begin_if(); l_i != l_end; ++l_i) {
2549 
2550  // Get the LField
2551  RealLField_t* ldf = (*l_i).second.get();
2552  // make sure we are uncompressed
2553  ldf->Uncompress();
2554  // get the raw data pointer
2555  localdataR = ldf->getP();
2556 
2557  // Do 1D real-to-real FFT's on all the strips in the LField:
2558  int nstrips = 1, length = ldf->size(0);
2559  for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
2560  for (int istrip=0; istrip<nstrips; ++istrip) {
2561  // Do the 1D FFT:
2562  this->getEngine().callFFT(activeDim, direction, localdataR);
2563  // advance the data pointer
2564  localdataR += length;
2565  } // loop over 1D strips
2566  } // loop over all the LFields
2567 
2568  } // loop over all transformed dimensions
2569 
2570  // now handle the RC transform separately
2571 
2572  // find first non-sine transform dimension
2573  icount = 0;
2574  activeDim = 0;
2575  while (!this->transformDim(icount) || sineTransformDims_m[icount]) {
2576  if (sineTransformDims_m[icount]) ++activeDim;
2577  ++icount;
2578  }
2579  PAssert_LT(activeDim, Dim); // check that this is a valid dimension!
2580 
2581 
2582  // transpose and permute to final real Field with transform dim first
2583  int last = numSineTransforms_m;
2584  (*tempRFields_m[last])[tempRLayouts_m[last]->getDomain()] =
2585  (*tempR)[tempR->getLayout().getDomain()];
2586 
2587  // Compress out previous iterate's storage:
2588  if (this->compressTemps() && tempR != &f) *tempR = 0;
2589  tempR = tempRFields_m[last]; // Field* management aid
2590 
2591 
2592  // Field* for temp Field management:
2593  ComplexField_t* temp = tempFields_m[0];
2594  // see if we can put final result directly into g
2595  int numComplex = nTransformDims-numSineTransforms_m;
2596  if (numComplex == 1) { // only a single RC transform
2597  bool skipTemp = true;
2598  // more rigorous match required here; check that layouts are identical
2599  if ( !(out_layout == *tempLayouts_m[0]) ) skipTemp = false;
2600  for (d=0; d<Dim; ++d) {
2601  if ( out_layout.getDistribution(d) !=
2602  tempLayouts_m[0]->getDistribution(d) ) skipTemp = false;
2603  }
2604  if ( out_layout.numVnodes() != tempLayouts_m[0]->numVnodes() )
2605  skipTemp = false;
2606  // also make sure there are no guard cells
2607  if (!(g.getGC() == FFT<SineTransform,Dim,T>::nullGC)) skipTemp = false;
2608  if (skipTemp) temp = &g;
2609  }
2610 
2611 
2612 
2613  // Loop over all the Vnodes, working on the LField in each.
2614  typename RealField_t::const_iterator_if rl_i, rl_end = tempR->end_if();
2615  typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
2616  for (rl_i = tempR->begin_if(); rl_i != rl_end; ++rl_i, ++cl_i) {
2617  // Get the LFields
2618  RealLField_t* rldf = (*rl_i).second.get();
2619  ComplexLField_t* cldf = (*cl_i).second.get();
2620  // make sure we are uncompressed
2621  rldf->Uncompress();
2622  cldf->Uncompress();
2623  // get the raw data pointers
2624  T* localreal = rldf->getP();
2625  Complex_t* localcomp = cldf->getP();
2626 
2627  int nstrips = 1, lengthreal = rldf->size(0), lengthcomp = cldf->size(0);
2628  // number of strips should be the same for real and complex LFields!
2629  for (d=1; d<Dim; ++d) nstrips *= rldf->size(d);
2630  for (int istrip=0; istrip<nstrips; ++istrip) {
2631  // move the data into the complex strip, which is two reals longer
2632  for (int ilen=0; ilen<lengthreal; ilen+=2)
2633  localcomp[ilen/2] = Complex_t(localreal[ilen],localreal[ilen+1]);
2634  // Do the 1D real-to-complex FFT:
2635  // note that real-to-complex FFT direction is always +1
2636  this->getEngine().callFFT(activeDim, +1, localcomp);
2637  // advance the data pointers
2638  localreal += lengthreal;
2639  localcomp += lengthcomp;
2640  } // loop over 1D strips
2641  } // loop over all the LFields
2642 
2643 
2644  // now proceed with the other complex-to-complex transforms
2645 
2646  // Local work array passed to FFT:
2647  Complex_t* localdata;
2648 
2649  // Loop over the remaining dimensions to be transformed:
2650  ++icount;
2651  ++activeDim;
2652  for (idim = 1; idim != numComplex; ++idim, ++icount, ++activeDim) {
2653 
2654  // find the next non-sine transform dimension
2655  while (!this->transformDim(icount) || sineTransformDims_m[icount]) {
2656  if (sineTransformDims_m[icount]) ++activeDim;
2657  ++icount;
2658  }
2659  PAssert_LT(activeDim, Dim); // check that this is a valid dimension!
2660  // Now do the serial transforms along this dimension:
2661 
2662  bool skipTranspose = false;
2663  // if this is the last transform dimension, we might be able
2664  // to skip the last temporary and transpose right into g
2665  if (idim == numComplex-1) {
2666  // get the domain for comparison
2667  const Domain_t& last_dom = tempLayouts_m[idim]->getDomain();
2668  // check that zeroth axis is the same and is serial
2669  // and that there are no guard cells
2670  skipTranspose = ( (out_dom[0].sameBase(last_dom[0])) &&
2671  (out_dom[0].length() == last_dom[0].length()) &&
2672  (out_layout.getDistribution(0) == SERIAL) &&
2673  (g.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
2674  }
2675 
2676  if (!skipTranspose) {
2677  // transpose and permute to Field with transform dim first
2678  (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] =
2679  (*temp)[temp->getLayout().getDomain()];
2680 
2681  // Compress out previous iterate's storage:
2682  if (this->compressTemps()) *temp = 0;
2683  temp = tempFields_m[idim]; // Field* management aid
2684  }
2685  else if (idim == numComplex-1) {
2686  // last transform and we can skip the last temporary field
2687  // so do the transpose here using g instead
2688 
2689  // transpose and permute to Field with transform dim first
2690  g[out_dom] = (*temp)[temp->getLayout().getDomain()];
2691 
2692  // Compress out previous iterate's storage:
2693  if (this->compressTemps()) *temp = 0;
2694  temp = &g; // Field* management aid
2695  }
2696 
2697 
2698 
2699  // Loop over all the Vnodes, working on the LField in each.
2700  typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
2701  for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
2702 
2703  // Get the LField
2704  ComplexLField_t* ldf = (*l_i).second.get();
2705  // make sure we are uncompressed
2706  ldf->Uncompress();
2707  // get the raw data pointer
2708  localdata = ldf->getP();
2709 
2710  // Do 1D complex-to-complex FFT's on all the strips in the LField:
2711  int nstrips = 1, length = ldf->size(0);
2712  for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
2713  for (int istrip=0; istrip<nstrips; ++istrip) {
2714  // Do the 1D FFT:
2715  this->getEngine().callFFT(activeDim, direction, localdata);
2716  // advance the data pointer
2717  localdata += length;
2718  } // loop over 1D strips
2719  } // loop over all the LFields
2720 
2721  } // loop over all transformed dimensions
2722 
2723  // skip final assignment and compress if we used g as final temporary
2724  if (temp != &g) {
2725 
2726  // Now assign into output Field, and compress last temp's storage:
2727  g[out_dom] = (*temp)[temp->getLayout().getDomain()];
2728  if (this->compressTemps()) *temp = 0;
2729 
2730  }
2731 
2732  // Normalize:
2733  if (direction == +1) g = g * this->getNormFact();
2734 
2735  return;
2736 }
2737 
2738 //-----------------------------------------------------------------------------
2739 // Sine and RC FFT; opposite direction, from complex to real
2740 //-----------------------------------------------------------------------------
2741 
2742 template <size_t Dim, class T>
2743 void
2745  int direction,
2748  const bool& constInput)
2749 {
2750 
2751  // indicate we're doing another FFT
2752  // INCIPPLSTAT(incFFTs);
2753 
2754  // Check domain of incoming Fields
2755  const Layout_t& in_layout = f.getLayout();
2756  const Domain_t& in_dom = in_layout.getDomain();
2757  const Layout_t& out_layout = g.getLayout();
2758  const Domain_t& out_dom = out_layout.getDomain();
2759  PAssert_EQ( this->checkDomain(*complexDomain_m,in_dom) &&
2760  this->checkDomain(this->getDomain(),out_dom), true );
2761 
2762  // Common loop iterate and other vars:
2763  size_t d;
2764  int icount, activeDim;
2765  int idim; // idim loops over the number of transform dims.
2766  size_t nTransformDims = this->numTransformDims();
2767 
2768  // proceed with the complex-to-complex transforms
2769 
2770  // Field* for temp Field management:
2771  ComplexField_t* temp = &f;
2772  // Local work array passed to FFT:
2773  Complex_t* localdata;
2774 
2775  // Loop over all dimensions to be non-sine transformed except last one:
2776  int numComplex = nTransformDims - numSineTransforms_m;
2777  icount = Dim-1; // start with last dimension
2778  activeDim = nTransformDims-1;
2779  for (idim = numComplex-1; idim != 0; --idim, --icount, --activeDim) {
2780 
2781  // find next non-sine transform dim
2782  while (!this->transformDim(icount) || sineTransformDims_m[icount]) {
2783  if (sineTransformDims_m[icount]) --activeDim;
2784  --icount;
2785  }
2786  PAssert_GE(activeDim, 0); // check that this is a valid dimension!
2787  // Now do the serial transforms along this dimension:
2788 
2789  bool skipTranspose = false;
2790  // if this is the first transform dimension, we might be able
2791  // to skip the transpose into the first temporary Field
2792  if (idim == numComplex-1 && !constInput) {
2793  // get domain for comparison
2794  const Domain_t& first_dom = tempLayouts_m[idim]->getDomain();
2795  // check that zeroth axis is the same and is serial
2796  // and that there are no guard cells
2797  skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
2798  (in_dom[0].length() == first_dom[0].length()) &&
2799  (in_layout.getDistribution(0) == SERIAL) &&
2800  (f.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
2801  }
2802 
2803  if (!skipTranspose) {
2804  // transpose and permute to Field with transform dim first
2805  (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] =
2806  (*temp)[temp->getLayout().getDomain()];
2807 
2808  // Compress out previous iterate's storage:
2809  if (this->compressTemps() && temp != &f) *temp = 0;
2810  temp = tempFields_m[idim]; // Field* management aid
2811  }
2812 
2813 
2814 
2815  // Loop over all the Vnodes, working on the LField in each.
2816  typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
2817  for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
2818 
2819  // Get the LField
2820  ComplexLField_t* ldf = (*l_i).second.get();
2821  // make sure we are uncompressed
2822  ldf->Uncompress();
2823  // get the raw data pointer
2824  localdata = ldf->getP();
2825 
2826  // Do 1D complex-to-complex FFT's on all the strips in the LField:
2827  int nstrips = 1, length = ldf->size(0);
2828  for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
2829  for (int istrip=0; istrip<nstrips; ++istrip) {
2830  // Do the 1D FFT:
2831  this->getEngine().callFFT(activeDim, direction, localdata);
2832  // advance the data pointer
2833  localdata += length;
2834  } // loop over 1D strips
2835  } // loop over all the LFields
2836 
2837  } // loop over all transformed dimensions
2838 
2839  // handle the CR transform separately
2840 
2841  // find next non-sine transform dim
2842  while (!this->transformDim(icount) || sineTransformDims_m[icount]) {
2843  if (sineTransformDims_m[icount]) --activeDim;
2844  --icount;
2845  }
2846  PAssert_GE(activeDim, 0); // check that this is a valid dimension!
2847 
2848  // Temp Field* management aid
2849  RealField_t* tempR = tempRFields_m[numSineTransforms_m];
2850 
2851  bool skipTemp = true;
2852  if (numComplex == 1 && !constInput) {
2853  // only one CR transform
2854  // see if we really need to transpose input data
2855  // more rigorous match required here; check that layouts are identical
2856  if ( !(in_layout == *tempLayouts_m[0]) ) skipTemp = false;
2857  for (d=0; d<Dim; ++d) {
2858  if ( in_layout.getDistribution(d) !=
2859  tempLayouts_m[0]->getDistribution(d) ) skipTemp = false;
2860  }
2861  if ( in_layout.numVnodes() != tempLayouts_m[0]->numVnodes() )
2862  skipTemp = false;
2863  // also make sure there are no guard cells
2864  if (!(f.getGC() == FFT<SineTransform,Dim,T>::nullGC)) skipTemp = false;
2865  }
2866  else { // cannot skip transpose
2867  skipTemp = false;
2868  }
2869 
2870  if (!skipTemp) {
2871 
2872  // transpose and permute to complex Field with transform dim first
2873  (*tempFields_m[0])[tempLayouts_m[0]->getDomain()] =
2874  (*temp)[temp->getLayout().getDomain()];
2875  // compress previous iterates storage
2876  if (this->compressTemps() && temp != &f) *temp = 0;
2877  temp = tempFields_m[0];
2878 
2879  }
2880 
2881 
2882  // Loop over all the Vnodes, working on the LField in each.
2883  typename RealField_t::const_iterator_if rl_i, rl_end = tempR->end_if();
2884  typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
2885  for (rl_i = tempR->begin_if(); rl_i != rl_end; ++rl_i, ++cl_i) {
2886  // Get the LFields
2887  RealLField_t* rldf = (*rl_i).second.get();
2888  ComplexLField_t* cldf = (*cl_i).second.get();
2889  // make sure we are uncompressed
2890  rldf->Uncompress();
2891  cldf->Uncompress();
2892  // get the raw data pointers
2893  T* localreal = rldf->getP();
2894  Complex_t* localcomp = cldf->getP();
2895 
2896  int nstrips = 1, lengthreal = rldf->size(0), lengthcomp = cldf->size(0);
2897  // number of strips should be the same for real and complex LFields!
2898  for (d=1; d<Dim; ++d) nstrips *= rldf->size(d);
2899  for (int istrip=0; istrip<nstrips; ++istrip) {
2900  // Do the 1D complex-to-real FFT:
2901  // note that complex-to-real FFT direction is always -1
2902  this->getEngine().callFFT(activeDim, -1, localcomp);
2903  // move the data into the real strip, which is two reals shorter
2904  for (int ilen=0; ilen<lengthreal; ilen+=2) {
2905  localreal[ilen] = real(localcomp[ilen/2]);
2906  localreal[ilen+1] = imag(localcomp[ilen/2]);
2907  }
2908  // advance the data pointers
2909  localreal += lengthreal;
2910  localcomp += lengthcomp;
2911  } // loop over 1D strips
2912  } // loop over all the LFields
2913 
2914 
2915 
2916  // compress previous iterates storage
2917  if (this->compressTemps() && temp != &f) *temp = 0;
2918 
2919 
2920  // now do the real-to-real FFTs
2921 
2922  // Local work array passed to FFT:
2923  T* localdataR;
2924 
2925  // Loop over the remaining dimensions to be transformed:
2926  icount = Dim-1; // start with last dimension
2927  activeDim = nTransformDims - 1;
2928  for (idim = numSineTransforms_m-1; idim != -1;
2929  --idim, --icount, --activeDim) {
2930 
2931  // find next sine transform dim
2932  while (!sineTransformDims_m[icount]) {
2933  if (this->transformDim(icount)) --activeDim;
2934  --icount;
2935  }
2936  PAssert_GE(activeDim, 0); // check that this is a valid dimension!
2937  // Now do the serial transforms along this dimension:
2938 
2939  bool skipTranspose = false;
2940  // if this is the last transform dimension, we might be able
2941  // to skip the last temporary and transpose right into g
2942  if (idim == 0) {
2943  // get the domain for comparison
2944  const Domain_t& last_dom = tempRLayouts_m[idim]->getDomain();
2945  // check that zeroth axis is the same and is serial
2946  // and that there are no guard cells
2947  skipTranspose = ( (out_dom[0].sameBase(last_dom[0])) &&
2948  (out_dom[0].length() == last_dom[0].length()) &&
2949  (out_layout.getDistribution(0) == SERIAL) &&
2950  (g.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
2951  }
2952 
2953  if (!skipTranspose) {
2954  // transpose and permute to Field with transform dim first
2955  (*tempRFields_m[idim])[tempRLayouts_m[idim]->getDomain()] =
2956  (*tempR)[tempR->getLayout().getDomain()];
2957 
2958  // Compress out previous iterate's storage:
2959  if (this->compressTemps()) *tempR = 0;
2960  tempR = tempRFields_m[idim]; // Field* management aid
2961  }
2962  else if (idim == 0) {
2963  // last transform and we can skip the last temporary field
2964  // so do the transpose here using g instead
2965 
2966  // transpose and permute to Field with transform dim first
2967  g[out_dom] = (*tempR)[tempR->getLayout().getDomain()];
2968 
2969  // Compress out previous iterate's storage:
2970  if (this->compressTemps()) *tempR = 0;
2971  tempR = &g; // Field* management aid
2972  }
2973 
2974 
2975 
2976  // Loop over all the Vnodes, working on the LField in each.
2977  typename RealField_t::const_iterator_if l_i, l_end = tempR->end_if();
2978  for (l_i = tempR->begin_if(); l_i != l_end; ++l_i) {
2979 
2980  // Get the LField
2981  RealLField_t* ldf = (*l_i).second.get();
2982  // make sure we are uncompressed
2983  ldf->Uncompress();
2984  // get the raw data pointer
2985  localdataR = ldf->getP();
2986 
2987  // Do 1D complex-to-complex FFT's on all the strips in the LField:
2988  int nstrips = 1, length = ldf->size(0);
2989  for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
2990  for (int istrip=0; istrip<nstrips; ++istrip) {
2991  // Do the 1D FFT:
2992  this->getEngine().callFFT(activeDim, direction, localdataR);
2993  // advance the data pointer
2994  localdataR += length;
2995  } // loop over 1D strips
2996  } // loop over all the LFields
2997 
2998  } // loop over all transformed dimensions
2999 
3000  // skip final assignment and compress if we used g as final temporary
3001  if (tempR != &g) {
3002 
3003  // Now assign into output Field, and compress last temp's storage:
3004  g[out_dom] = (*tempR)[tempR->getLayout().getDomain()];
3005  if (this->compressTemps()) *tempR = 0;
3006 
3007  }
3008 
3009  // Normalize:
3010  if (direction == +1) g = g * this->getNormFact();
3011 
3012  return;
3013 }
3014 
3015 //-----------------------------------------------------------------------------
3016 // Sine FFT only; separate input and output fields, direction is +1 or -1
3017 //-----------------------------------------------------------------------------
3018 
3019 template <size_t Dim, class T>
3020 void
3022  int direction,
3025  const bool& constInput)
3026 {
3027 
3028  // indicate we're doing another FFT
3029  //INCIPPLSTAT(incFFTs);
3030 
3031  // Check domain of incoming Fields
3032  const Layout_t& in_layout = f.getLayout();
3033  const Domain_t& in_dom = in_layout.getDomain();
3034  const Layout_t& out_layout = g.getLayout();
3035  const Domain_t& out_dom = out_layout.getDomain();
3036  PAssert_EQ( this->checkDomain(this->getDomain(),in_dom) &&
3037  this->checkDomain(this->getDomain(),out_dom), true );
3038 
3039  // Common loop iterate and other vars:
3040  size_t d;
3041  int idim; // idim loops over the number of transform dims.
3042  int begdim, enddim;
3043  size_t nTransformDims = this->numTransformDims();
3044  // check that there is no real-to-complex transform to do
3045  PInsist(nTransformDims==numSineTransforms_m,
3046  "Wrong output Field type for real-to-complex transform!!");
3047 
3048  // do all the sine transforms
3049 
3050  // Field* management aid
3051  RealField_t* tempR = &f;
3052  // Local work array passed to FFT:
3053  T* localdataR;
3054 
3055  // Loop over the dimensions to be sine transformed:
3056  begdim = (direction == +1) ? 0 : static_cast<int>(nTransformDims-1);
3057  enddim = (direction == +1) ? static_cast<int>(nTransformDims) : -1;
3058  for (idim = begdim; idim != enddim; idim+=direction) {
3059 
3060  // Now do the serial transforms along this dimension:
3061 
3062  bool skipTranspose = false;
3063  // if this is the first transform dimension, we might be able
3064  // to skip the first temporary and just use f
3065  if (idim == begdim && !constInput) {
3066  // get the domain for comparison
3067  const Domain_t& first_dom = tempRLayouts_m[idim]->getDomain();
3068  // check that zeroth axis is the same and is serial
3069  // and that there are no guard cells
3070  skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
3071  (in_dom[0].length() == first_dom[0].length()) &&
3072  (in_layout.getDistribution(0) == SERIAL) &&
3073  (f.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
3074  }
3075 
3076  // if this is the last transform dimension, we might be able
3077  // to skip the last temporary and transpose right into g
3078  if (idim == enddim-direction) {
3079  // get the domain for comparison
3080  const Domain_t& last_dom = tempRLayouts_m[idim]->getDomain();
3081  // check that zeroth axis is the same and is serial
3082  // and that there are no guard cells
3083  skipTranspose = ( (out_dom[0].sameBase(last_dom[0])) &&
3084  (out_dom[0].length() == last_dom[0].length()) &&
3085  (out_layout.getDistribution(0) == SERIAL) &&
3086  (g.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
3087  }
3088 
3089  if (!skipTranspose) {
3090  // transpose and permute to Field with transform dim first
3091  (*tempRFields_m[idim])[tempRLayouts_m[idim]->getDomain()] =
3092  (*tempR)[tempR->getLayout().getDomain()];
3093 
3094  // Compress out previous iterate's storage:
3095  if (this->compressTemps() && tempR != &f) *tempR = 0;
3096  tempR = tempRFields_m[idim]; // Field* management aid
3097  }
3098  else if (idim == enddim-direction && tempR != &g) {
3099  // last transform and we can skip the last temporary field
3100  // so do the transpose here using g instead
3101 
3102  // transpose and permute to Field with transform dim first
3103  g[out_dom] = (*tempR)[tempR->getLayout().getDomain()];
3104 
3105  // Compress out previous iterate's storage:
3106  if (this->compressTemps() && tempR != &f) *tempR = 0;
3107  tempR = &g; // Field* management aid
3108  }
3109 
3110 
3111 
3112  // Loop over all the Vnodes, working on the LField in each.
3113  typename RealField_t::const_iterator_if l_i, l_end = tempR->end_if();
3114  for (l_i = tempR->begin_if(); l_i != l_end; ++l_i) {
3115 
3116  // Get the LField
3117  RealLField_t* ldf = (*l_i).second.get();
3118  // make sure we are uncompressed
3119  ldf->Uncompress();
3120  // get the raw data pointer
3121  localdataR = ldf->getP();
3122 
3123  // Do 1D real-to-real FFT's on all the strips in the LField:
3124  int nstrips = 1, length = ldf->size(0);
3125  for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
3126  for (int istrip=0; istrip<nstrips; ++istrip) {
3127  // Do the 1D FFT:
3128  this->getEngine().callFFT(idim, direction, localdataR);
3129  // advance the data pointer
3130  localdataR += length;
3131  } // loop over 1D strips
3132  } // loop over all the LFields
3133 
3134  } // loop over all transformed dimensions
3135 
3136  // skip final assignment and compress if we used g as final temporary
3137  if (tempR != &g) {
3138 
3139  // Now assign into output Field, and compress last temp's storage:
3140  g[out_dom] = (*tempR)[tempR->getLayout().getDomain()];
3141  if (this->compressTemps() && tempR != &f) *tempR = 0;
3142 
3143  }
3144 
3145  // Normalize:
3146  if (direction == +1) g = g * this->getNormFact();
3147 
3148  return;
3149 }
3150 
3151 //-----------------------------------------------------------------------------
3152 // Sine FFT only; in-place transform, direction is +1 or -1
3153 //-----------------------------------------------------------------------------
3154 
3155 template <size_t Dim, class T>
3156 void
3158  int direction,
3160 {
3161  // indicate we're doing another FFT
3162  //INCIPPLSTAT(incFFTs);
3163 
3164  // Check domain of incoming Field
3165  const Layout_t& in_layout = f.getLayout();
3166  const Domain_t& in_dom = in_layout.getDomain();
3167  PAssert_EQ(this->checkDomain(this->getDomain(),in_dom), true);
3168 
3169  // Common loop iterate and other vars:
3170  size_t d;
3171  int idim; // idim loops over the number of transform dims.
3172  int begdim, enddim;
3173  size_t nTransformDims = this->numTransformDims();
3174  // check that there is no real-to-complex transform to do
3175  PInsist(nTransformDims==numSineTransforms_m,
3176  "Cannot perform real-to-complex transform in-place!!");
3177 
3178  // do all the sine transforms
3179 
3180  // Field* management aid
3181  RealField_t* tempR = &f;
3182  // Local work array passed to FFT:
3183  T* localdataR;
3184 
3185  // Loop over the dimensions to be sine transformed:
3186  begdim = (direction == +1) ? 0 : static_cast<int>(nTransformDims-1);
3187  enddim = (direction == +1) ? static_cast<int>(nTransformDims) : -1;
3188  for (idim = begdim; idim != enddim; idim+=direction) {
3189 
3190  // Now do the serial transforms along this dimension:
3191 
3192  bool skipTranspose = false;
3193  // if this is the first transform dimension, we might be able
3194  // to skip the transpose into the first temporary Field
3195  if (idim == begdim) {
3196  // get domain for comparison
3197  const Domain_t& first_dom = tempRLayouts_m[idim]->getDomain();
3198  // check that zeroth axis is the same and is serial
3199  // and that there are no guard cells
3200  skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
3201  (in_dom[0].length() == first_dom[0].length()) &&
3202  (in_layout.getDistribution(0) == SERIAL) &&
3203  (f.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
3204  }
3205 
3206  // if this is the last transform dimension, we might be able
3207  // to skip the last temporary and transpose right into f
3208  if (idim == enddim-direction) {
3209  // get the domain for comparison
3210  const Domain_t& last_dom = tempRLayouts_m[idim]->getDomain();
3211  // check that zeroth axis is the same and is serial
3212  // and that there are no guard cells
3213  skipTranspose = ( (in_dom[0].sameBase(last_dom[0])) &&
3214  (in_dom[0].length() == last_dom[0].length()) &&
3215  (in_layout.getDistribution(0) == SERIAL) &&
3216  (f.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
3217  }
3218 
3219  if (!skipTranspose) {
3220  // transpose and permute to Field with transform dim first
3221  (*tempRFields_m[idim])[tempRLayouts_m[idim]->getDomain()] =
3222  (*tempR)[tempR->getLayout().getDomain()];
3223 
3224  // Compress out previous iterate's storage:
3225  if (this->compressTemps() && tempR != &f) *tempR = 0;
3226  tempR = tempRFields_m[idim]; // Field* management aid
3227  }
3228  else if (idim == enddim-direction && tempR != &f) {
3229  // last transform and we can skip the last temporary field
3230  // so do the transpose here using f instead
3231 
3232  // transpose and permute to Field with transform dim first
3233  f[in_dom] = (*tempR)[tempR->getLayout().getDomain()];
3234 
3235  // Compress out previous iterate's storage:
3236  if (this->compressTemps() && tempR != &f) *tempR = 0;
3237  tempR = &f; // Field* management aid
3238  }
3239 
3240 
3241 
3242  // Loop over all the Vnodes, working on the LField in each.
3243  typename RealField_t::const_iterator_if l_i, l_end = tempR->end_if();
3244  for (l_i = tempR->begin_if(); l_i != l_end; ++l_i) {
3245 
3246  // Get the LField
3247  RealLField_t* ldf = (*l_i).second.get();
3248  // make sure we are uncompressed
3249  ldf->Uncompress();
3250  // get the raw data pointer
3251  localdataR = ldf->getP();
3252 
3253  // Do 1D real-to-real FFT's on all the strips in the LField:
3254  int nstrips = 1, length = ldf->size(0);
3255  for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
3256  for (int istrip=0; istrip<nstrips; ++istrip) {
3257  // Do the 1D FFT:
3258  this->getEngine().callFFT(idim, direction, localdataR);
3259  // advance the data pointer
3260  localdataR += length;
3261  } // loop over 1D strips
3262  } // loop over all the LFields
3263 
3264  } // loop over all transformed dimensions
3265 
3266  // skip final assignment and compress if we used g as final temporary
3267  if (tempR != &f) {
3268 
3269  // Now assign into output Field, and compress last temp's storage:
3270  f[in_dom] = (*tempR)[tempR->getLayout().getDomain()];
3271  if (this->compressTemps()) *tempR = 0;
3272 
3273  }
3274 
3275  // Normalize:
3276  if (direction == +1) f = f * this->getNormFact();
3277 
3278  return;
3279 }
3280 
3281 
3282 //=============================================================================
3283 // 1D FFT SineTransform Constructors
3284 //=============================================================================
3285 
3286 //-----------------------------------------------------------------------------
3287 // Constructor for doing only sine transforms.
3288 // Create a new FFT object of type SineTransform, with given real field
3289 // domain. Also specify which dimensions to sine transform along.
3290 //-----------------------------------------------------------------------------
3291 
3292 template <class T>
3294  const typename FFT<SineTransform,1U,T>::Domain_t& rdomain,
3295  const bool sineTransformDims[1U], const bool& compressTemps)
3296 : FFTBase<1U,T>(FFT<SineTransform,1U,T>::sineFFT, rdomain,
3297  sineTransformDims, compressTemps)
3298 {
3299  // Tau profiling
3300 
3301 
3302 
3303 
3304  // get axis length
3305  int length;
3306  length = rdomain[0].length();
3307 
3308  // get transform type for FFT Engine, compute normalization
3309  int transformType;
3310  transformType = FFTBase<1U,T>::sineFFT; // sine transform
3311  T& normFact = this->getNormFact();
3312  normFact = 1.0 / (2.0 * (length + 1));
3313 
3314  // set up FFT Engine
3315  this->getEngine().setup(1U, &transformType, &length);
3316  // set up the temporary fields
3317  setup();
3318 }
3319 
3320 //-----------------------------------------------------------------------------
3321 // Create a new FFT object of type SineTransform, with
3322 // given real domain. Default: sine transform along all dimensions.
3323 //-----------------------------------------------------------------------------
3324 
3325 template <class T>
3327  const typename FFT<SineTransform,1U,T>::Domain_t& rdomain,
3328  const bool& compressTemps)
3329 : FFTBase<1U,T>(FFT<SineTransform,1U,T>::sineFFT, rdomain, compressTemps)
3330 {
3331  // Tau profiling
3332 
3333 
3334 
3335 
3336  // get axis length
3337  int length;
3338  length = rdomain[0].length();
3339 
3340  // get transform type for FFT Engine, compute normalization
3341  int transformType;
3342  transformType = FFTBase<1U,T>::sineFFT; // sine transform
3343  T& normFact = this->getNormFact();
3344  normFact = 1.0 / (2.0 * (length + 1));
3345 
3346  // set up FFT Engine
3347  this->getEngine().setup(1U, &transformType, &length);
3348  // set up the temporary fields
3349  setup();
3350 }
3351 
3352 //-----------------------------------------------------------------------------
3353 // setup performs all the initializations necessary after the transform
3354 // directions have been specified.
3355 //-----------------------------------------------------------------------------
3356 
3357 template <class T>
3358 void
3360 
3361  // Tau profiling
3362 
3363 
3364 
3365 
3366  const Domain_t& domain = this->getDomain(); // get real Field domain
3367 
3368  // generate temporary field layout
3369  tempRLayouts_m = new Layout_t(domain[0], PARALLEL, 1);
3370  // create temporary real Field
3371  tempRFields_m = new RealField_t(*tempRLayouts_m);
3372  // If user requests no intermediate compression, uncompress right now:
3373  if (!this->compressTemps()) tempRFields_m->Uncompress();
3374 
3375  return;
3376 }
3377 
3378 //-----------------------------------------------------------------------------
3379 // Destructor
3380 //-----------------------------------------------------------------------------
3381 
3382 template <class T>
3384 
3385  // Tau profiling
3386 
3387 
3388 
3389 
3390  // delete temporary field and layout
3391  delete tempRFields_m;
3392  delete tempRLayouts_m;
3393 }
3394 
3395 //-----------------------------------------------------------------------------
3396 // Sine FFT only; separate input and output fields, direction is +1 or -1
3397 //-----------------------------------------------------------------------------
3398 
3399 template <class T>
3400 void
3402  int direction,
3405  const bool& constInput)
3406 {
3407  // indicate we're doing another FFT
3408  //INCIPPLSTAT(incFFTs);
3409 
3410  // Check domain of incoming Fields
3411  const Layout_t& in_layout = f.getLayout();
3412  const Domain_t& in_dom = in_layout.getDomain();
3413  const Layout_t& out_layout = g.getLayout();
3414  const Domain_t& out_dom = out_layout.getDomain();
3415  PAssert_EQ( this->checkDomain(this->getDomain(),in_dom) &&
3416  this->checkDomain(this->getDomain(),out_dom), true);
3417 
3418  // Field* management aid
3419  RealField_t* tempR = &f;
3420  // Local work array passed to FFT:
3421  T* localdataR;
3422 
3423 
3424  // Now do the serial transform along this dimension:
3425 
3426  // get the domain for comparison
3427  const Domain_t& temp_dom = tempRLayouts_m->getDomain();
3428  bool skipTranspose = false;
3429  // we might be able
3430  // to skip the first temporary and just use f
3431  if (!constInput) {
3432  // check that zeroth axis is the same, has one vnode
3433  // and that there are no guard cells
3434  skipTranspose = ( (in_dom[0].sameBase(temp_dom[0])) &&
3435  (in_dom[0].length() == temp_dom[0].length()) &&
3436  (in_layout.numVnodes() == 1) &&
3437  (f.getGC() == FFT<SineTransform,1U,T>::nullGC) );
3438  }
3439 
3440  bool skipFinal = false;
3441  // we might be able
3442  // to skip the last temporary and transpose right into g
3443 
3444  // check that zeroth axis is the same, has one vnode
3445  // and that there are no guard cells
3446  skipFinal = ( (out_dom[0].sameBase(temp_dom[0])) &&
3447  (out_dom[0].length() == temp_dom[0].length()) &&
3448  (out_layout.numVnodes() == 1) &&
3449  (g.getGC() == FFT<SineTransform,1U,T>::nullGC) );
3450 
3451  if (!skipTranspose) {
3452  // assign to Field with proper layout
3453  (*tempRFields_m) = (*tempR);
3454  tempR = tempRFields_m; // Field* management aid
3455  }
3456  if (skipFinal) {
3457  // we can skip the last temporary field
3458  // so do the transpose here using g instead
3459 
3460  // assign to Field with proper layout
3461  g = (*tempR);
3462 
3463  // Compress out previous iterate's storage:
3464  if (this->compressTemps() && tempR != &f) *tempR = 0;
3465  tempR = &g; // Field* management aid
3466  }
3467 
3468 
3469 
3470  // There should be just one LField.
3471  typename RealField_t::const_iterator_if l_i = tempR->begin_if();
3472  if (l_i != tempR->end_if()) {
3473 
3474  // Get the LField
3475  RealLField_t* ldf = (*l_i).second.get();
3476  // make sure we are uncompressed
3477  ldf->Uncompress();
3478  // get the raw data pointer
3479  localdataR = ldf->getP();
3480 
3481  // Do the 1D FFT:
3482  this->getEngine().callFFT(0, direction, localdataR);
3483  }
3484 
3485 
3486  // skip final assignment and compress if we used g as final temporary
3487  if (tempR != &g) {
3488 
3489  // Now assign into output Field, and compress last temp's storage:
3490  g = (*tempR);
3491  if (this->compressTemps() && tempR != &f) *tempR = 0;
3492 
3493  }
3494 
3495  // Normalize:
3496  if (direction == +1) g = g * this->getNormFact();
3497 
3498  return;
3499 }
3500 
3501 //-----------------------------------------------------------------------------
3502 // Sine FFT only; in-place transform, direction is +1 or -1
3503 //-----------------------------------------------------------------------------
3504 
3505 template <class T>
3506 void
3508  int direction,
3510 {
3511  // indicate we're doing another FFT
3512  //INCIPPLSTAT(incFFTs);
3513 
3514  // Check domain of incoming Field
3515  const Layout_t& in_layout = f.getLayout();
3516  const Domain_t& in_dom = in_layout.getDomain();
3517  PAssert_EQ(this->checkDomain(this->getDomain(),in_dom), true);
3518 
3519  // Field* management aid
3520  RealField_t* tempR = &f;
3521  // Local work array passed to FFT:
3522  T* localdataR;
3523 
3524 
3525  // Now do the serial transform along this dimension:
3526 
3527  // get domain for comparison
3528  const Domain_t& temp_dom = tempRLayouts_m->getDomain();
3529  bool skipTranspose = false;
3530  // we might be able
3531  // to skip the transpose into the first temporary Field
3532 
3533  // check that zeroth axis is the same and is serial
3534  // and that there are no guard cells
3535  skipTranspose = ( (in_dom[0].sameBase(temp_dom[0])) &&
3536  (in_dom[0].length() == temp_dom[0].length()) &&
3537  (in_layout.numVnodes() == 1) &&
3538  (f.getGC() == FFT<SineTransform,1U,T>::nullGC) );
3539 
3540  bool skipFinal = false;
3541  // we might be able
3542  // to skip the last temporary and transpose right into f
3543 
3544  // check that zeroth axis is the same, has one vnode
3545  // and that there are no guard cells
3546  skipFinal = ( (in_dom[0].sameBase(temp_dom[0])) &&
3547  (in_dom[0].length() == temp_dom[0].length()) &&
3548  (in_layout.numVnodes() == 1) &&
3549  (f.getGC() == FFT<SineTransform,1U,T>::nullGC) );
3550 
3551  if (!skipTranspose) {
3552  // assign to Field with proper layout
3553  (*tempRFields_m) = (*tempR);
3554 
3555  tempR = tempRFields_m; // Field* management aid
3556  }
3557  if (skipFinal) {
3558  // we can skip the last temporary field
3559  // so do the transpose here using f instead
3560 
3561  // assign to Field with proper layout
3562  f = (*tempR);
3563 
3564  // Compress out previous iterate's storage:
3565  if (this->compressTemps() && tempR != &f) *tempR = 0;
3566  tempR = &f; // Field* management aid
3567  }
3568 
3569 
3570 
3571  // There should be just one LField.
3572  typename RealField_t::const_iterator_if l_i = tempR->begin_if();
3573  if (l_i != tempR->end_if()) {
3574 
3575  // Get the LField
3576  RealLField_t* ldf = (*l_i).second.get();
3577  // make sure we are uncompressed
3578  ldf->Uncompress();
3579  // get the raw data pointer
3580  localdataR = ldf->getP();
3581 
3582  // Do the 1D FFT:
3583  this->getEngine().callFFT(0, direction, localdataR);
3584  }
3585 
3586 
3587  // skip final assignment and compress if we used g as final temporary
3588  if (tempR != &f) {
3589 
3590  // Now assign into output Field, and compress last temp's storage:
3591  f = (*tempR);
3592  if (this->compressTemps()) *tempR = 0;
3593 
3594  }
3595 
3596  // Normalize:
3597  if (direction == +1) f = f * this->getNormFact();
3598 
3599  return;
3600 }
3601 
3602 // vi: set et ts=4 sw=4 sts=4:
3603 // Local Variables:
3604 // mode:c
3605 // c-basic-offset: 4
3606 // indent-tabs-mode:nil
3607 // End:
static int getNodes()
Definition: IpplInfo.cpp:773
void setup(unsigned numTransformDims, const int *transformTypes, const int *axisLengths)
Definition: fftpack_FFT.h:153
#define FFTDBG(x)
Definition: FFT.hpp:23
RegionLayout< T, Dim, Mesh > & getLayout()
Definition: rbendmap.h:8
unsigned activeDimension(unsigned d) const
get dimension number from list of transformed dimensions
Definition: FFTBase.h:252
const Domain_t & getDomain(void) const
get our domain
Definition: FFTBase.h:160
bool transformDim(unsigned d) const
query whether this dimension is to be transformed
Definition: FFTBase.h:238
NDIndex< Dim > Domain_t
Definition: FFTBase.h:60
static int myNode()
Definition: IpplInfo.cpp:794
FFTBase< 1U, T >::Domain_t Domain_t
Definition: FFT.h:913
int transVnodes() const
Definition: FFTBase.h:125
FLieGenerator< T, N > real(const FLieGenerator< std::complex< T >, N > &)
Take real part of a complex generator.
#define PAssert_LT(a, b)
Definition: PAssert.h:121
FFTBase< 1U, T >::Domain_t Domain_t
Definition: FFT.h:327
#define PAssert_GE(a, b)
Definition: PAssert.h:124
unsigned numTransformDims(void) const
query number of transform dimensions
Definition: FFTBase.h:148
#define PAssert_EQ(a, b)
Definition: PAssert.h:119
FFTBase< 1U, T >::Domain_t Domain_t
Definition: FFT.h:589
InternalFFT_t & getEngine(void)
access the internal FFT Engine
Definition: FFTBase.h:154
Definition: FFT.h:52
static MPI_Comm getComm()
Definition: IpplInfo.h:178
Precision_t & getNormFact(void)
get the FFT normalization factor
Definition: FFTBase.h:157
Definition: FFT.h:30
#define PAssert_GT(a, b)
Definition: PAssert.h:123
void callFFT(unsigned transformDim, int direction, Complex_t *data)
#define PAssert(c)
Definition: PAssert.h:117
bool compressTemps(void) const
do we compress temps?
Definition: FFTBase.h:166
e_dim_tag
Definition: FieldLayout.h:55
#define PInsist(c, m)
Definition: PAssert.h:135
const unsigned Dim
bool checkDomain(const Domain_t &dom1, const Domain_t &dom2) const
compare indexes of two domains
Definition: FFTBase.h:269
ParticleSpatialLayout< double, 3, Mesh_t > Layout_t
Definition: PBunchDefs.h:43
FLieGenerator< T, N > imag(const FLieGenerator< std::complex< T >, N > &)
Take imaginary part of a complex generator.
Definition: Inform.h:41
Inform & endl(Inform &inf)
Definition: Inform.cpp:42