OPAL (Object Oriented Parallel Accelerator Library) 2022.1
OPAL
FFT.hpp
Go to the documentation of this file.
1//
2// IPPL FFT
3//
4// Copyright (c) 2008-2018
5// Paul Scherrer Institut, Villigen PSI, Switzerland
6// All rights reserved.
7//
8// OPAL is licensed under GNU GPL version 3.
9//
10
15#include "FFT/FFT.h"
17#include "Field/BareField.h"
18//#include "Utility/IpplStats.h"
19
20//=============================================================================
21// FFT CCTransform Constructors
22//=============================================================================
23
29template <size_t Dim, class T>
31 const typename FFT<CCTransform,Dim,T>::Domain_t& cdomain,
32 const bool transformTheseDims[Dim],
33 const bool& compressTemps)
34: FFTBase<Dim,T>(FFT<CCTransform,Dim,T>::ccFFT, cdomain,
35 transformTheseDims, compressTemps)
36{
37
38 // construct array of axis lengths
39 size_t nTransformDims = this->numTransformDims();
40 int* lengths = new int[nTransformDims];
41 size_t d;
42 for (d=0; d<nTransformDims; ++d)
43 lengths[d] = cdomain[this->activeDimension(d)].length();
44
45 // construct array of transform types for FFT Engine, compute normalization
46 int* transformTypes = new int[nTransformDims];
47 T& normFact = this->getNormFact();
48 normFact = 1.0;
49 for (d=0; d<nTransformDims; ++d) {
50 transformTypes[d] = FFTBase<Dim,T>::ccFFT; // all transforms are complex-to-complex
51 normFact /= lengths[d];
52 }
53
54 // set up FFT Engine
55 this->getEngine().setup(nTransformDims, transformTypes, lengths);
56 delete [] transformTypes;
57 delete [] lengths;
58 // set up the temporary fields
59 setup();
60}
61
62
67template <size_t Dim, class T>
68void
70{
71 // Tau profiling
72
73
74 size_t d, activeDim;
75 size_t nTransformDims = this->numTransformDims();
76 // Set up the arrays of temporary Fields and FieldLayouts:
77 e_dim_tag serialParallel[Dim]; // Specifies SERIAL, PARALLEL dims in temp
78 // make zeroth dimension always SERIAL
79 serialParallel[0] = SERIAL;
80 // all other dimensions parallel
81 for (d=1; d<Dim; ++d)
82 serialParallel[d] = PARALLEL;
83
84 tempLayouts_m = new Layout_t*[nTransformDims];
85 tempFields_m = new ComplexField_t*[nTransformDims];
86
87 // loop over transform dimensions
88 for (size_t dim=0; dim<nTransformDims; ++dim) {
89 // get number of dimension to be transformed
90 activeDim = this->activeDimension(dim);
91 // Get input Field's domain
92 const Domain_t& ndic = this->getDomain();
93 // make new domain with permuted Indexes, activeDim first
94 Domain_t ndip;
95 ndip[0] = ndic[activeDim];
96 for (d=1; d<Dim; ++d) {
97 size_t nextDim = activeDim + d;
98 if (nextDim >= Dim) nextDim -= Dim;
99 ndip[d] = ndic[nextDim];
100 }
101 // generate temporary field layout
102 tempLayouts_m[dim] = new Layout_t(ndip, serialParallel, this->transVnodes());
103 // generate temporary Field
104 tempFields_m[dim] = new ComplexField_t(*tempLayouts_m[dim]);
105 // If user requests no intermediate compression, uncompress right now:
106 if (!this->compressTemps()) (*tempFields_m[dim]).Uncompress();
107 }
108
109 return;
110}
111
112//-----------------------------------------------------------------------------
113// Destructor
114//-----------------------------------------------------------------------------
115
116template <size_t Dim, class T>
118
119 // Tau profiling
120
121 /*
122 #ifdef IPPL_OPENCL
123 base.ocl_cleanUp();
124 #endif
125 */
126
127 // delete arrays of temporary fields and field layouts
128 size_t nTransformDims = this->numTransformDims();
129 for (size_t d=0; d<nTransformDims; ++d) {
130 delete tempFields_m[d];
131 delete tempLayouts_m[d];
132 }
133 delete [] tempFields_m;
134 delete [] tempLayouts_m;
135}
136
137
138//-----------------------------------------------------------------------------
139// do the CC FFT; separate input and output fields
140//-----------------------------------------------------------------------------
141
142template <size_t Dim, class T>
143void
145 int direction,
148 const bool& constInput)
149{
150 // indicate we're doing another FFT
151 //INCIPPLSTAT(incFFTs);
152
153 // Check domain of incoming Fields
154 const Layout_t& in_layout = f.getLayout();
155 const Domain_t& in_dom = in_layout.getDomain();
156 const Layout_t& out_layout = g.getLayout();
157 const Domain_t& out_dom = out_layout.getDomain();
158 PAssert_EQ( this->checkDomain(this->getDomain(),in_dom) &&
159 this->checkDomain(this->getDomain(),out_dom), true);
160
161 // Common loop iterate and other vars:
162 size_t d;
163 int idim; // idim loops over the number of transform dims.
164 int begdim, enddim; // beginning and end of transform dim loop
165 size_t nTransformDims = this->numTransformDims();
166 // Field* for temp Field management:
167 ComplexField_t* temp = &f;
168 // Local work array passed to FFT:
169 Complex_t* localdata;
170
171 // Loop over the dimensions be transformed:
172 begdim = (direction == +1) ? 0 : static_cast<int>(nTransformDims-1);
173 enddim = (direction == +1) ? static_cast<int>(nTransformDims) : -1;
174 for (idim = begdim; idim != enddim; idim += direction) {
175
176 // Now do the serial transforms along this dimension:
177
178 bool skipTranspose = false;
179 // if this is the first transform dimension, we might be able
180 // to skip the transpose into the first temporary Field
181 if (idim == begdim && !constInput) {
182 // get domain for comparison
183 const Domain_t& first_dom = tempLayouts_m[idim]->getDomain();
184 // check that zeroth axis is the same and is serial
185 // and that there are no guard cells
186 skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
187 (in_dom[0].length() == first_dom[0].length()) &&
188 (in_layout.getDistribution(0) == SERIAL) &&
189 (f.getGC() == FFT<CCTransform,Dim,T>::nullGC) );
190 }
191
192 // if this is the last transform dimension, we might be able
193 // to skip the last temporary and transpose right into g
194 if (idim == enddim-direction) {
195 // get the domain for comparison
196 const Domain_t& last_dom = tempLayouts_m[idim]->getDomain();
197 // check that zeroth axis is the same and is serial
198 // and that there are no guard cells
199 skipTranspose = ( (out_dom[0].sameBase(last_dom[0])) &&
200 (out_dom[0].length() == last_dom[0].length()) &&
201 (out_layout.getDistribution(0) == SERIAL) &&
202 (g.getGC() == FFT<CCTransform,Dim,T>::nullGC) );
203 }
204
205 if (!skipTranspose) {
206 // transpose and permute to Field with transform dim first
207 (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] =
208 (*temp)[temp->getLayout().getDomain()];
209
210 // Compress out previous iterate's storage:
211 if (this->compressTemps() && temp != &f) *temp = 0;
212 temp = tempFields_m[idim]; // Field* management aid
213 }
214 else if (idim == enddim-direction && temp != &g) {
215 // last transform and we can skip the last temporary field
216 // so do the transpose here using g instead
217
218 // transpose and permute to Field with transform dim first
219 g[out_dom] = (*temp)[temp->getLayout().getDomain()];
220
221 // Compress out previous iterate's storage:
222 if (this->compressTemps() && temp != &f) *temp = 0;
223 temp = &g; // Field* management aid
224 }
225
226 // Loop over all the Vnodes, working on the LField in each.
227 typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
228 for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
229
230 // Get the LField
231 ComplexLField_t* ldf = (*l_i).second.get();
232 // make sure we are uncompressed
233 ldf->Uncompress();
234 // get the raw data pointer
235 localdata = ldf->getP();
236
237 // Do 1D complex-to-complex FFT's on all the strips in the LField:
238 int nstrips = 1, length = ldf->size(0);
239 for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
240 for (int istrip=0; istrip<nstrips; ++istrip) {
241 // Do the 1D FFT:
242 this->getEngine().callFFT(idim, direction, localdata);
243 // advance the data pointer
244 localdata += length;
245 } // loop over 1D strips
246 } // loop over all the LFields
247
248 } // loop over all transformed dimensions
249
250 // skip final assignment and compress if we used g as final temporary
251 if (temp != &g) {
252
253 // Now assign into output Field, and compress last temp's storage:
254 g[out_dom] = (*temp)[temp->getLayout().getDomain()];
255 if (this->compressTemps() && temp != &f) *temp = 0;
256
257 }
258
259 // Normalize:
260 if (direction == +1)
261 g *= Complex_t(this->getNormFact(), 0.0);
262
263 return;
264}
265
266template <size_t Dim, class T>
267void
269 int direction,
271{
272
273 // indicate we're doing another FFT
274 // INCIPPLSTAT(incFFTs);
275
276 // Check domain of incoming Field
277 const Layout_t& in_layout = f.getLayout();
278 const Domain_t& in_dom = in_layout.getDomain();
279 PAssert_EQ(this->checkDomain(this->getDomain(),in_dom), true);
280
281 // Common loop iterate and other vars:
282 size_t d;
283 int idim; // idim loops over the number of transform dims.
284 int begdim, enddim; // beginning and end of transform dim loop
285 size_t nTransformDims = this->numTransformDims();
286 // Field* for temp Field management:
287 ComplexField_t* temp = &f;
288 // Local work array passed to FFT:
289 Complex_t* localdata;
290
291 // Loop over the dimensions be transformed:
292 begdim = (direction == +1) ? 0 : static_cast<int>(nTransformDims-1);
293 enddim = (direction == +1) ? static_cast<int>(nTransformDims) : -1;
294 for (idim = begdim; idim != enddim; idim += direction) {
295
296 // Now do the serial transforms along this dimension:
297
298 bool skipTranspose = false;
299 // if this is the first transform dimension, we might be able
300 // to skip the transpose into the first temporary Field
301 if (idim == begdim) {
302 // get domain for comparison
303 const Domain_t& first_dom = tempLayouts_m[idim]->getDomain();
304 // check that zeroth axis is the same and is serial
305 // and that there are no guard cells
306 skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
307 (in_dom[0].length() == first_dom[0].length()) &&
308 (in_layout.getDistribution(0) == SERIAL) &&
309 (f.getGC() == FFT<CCTransform,Dim,T>::nullGC) );
310 }
311
312 // if this is the last transform dimension, we might be able
313 // to skip the last temporary and transpose right into f
314 if (idim == enddim-direction) {
315 // get domain for comparison
316 const Domain_t& last_dom = tempLayouts_m[idim]->getDomain();
317 // check that zeroth axis is the same and is serial
318 // and that there are no guard cells
319 skipTranspose = ( (in_dom[0].sameBase(last_dom[0])) &&
320 (in_dom[0].length() == last_dom[0].length()) &&
321 (in_layout.getDistribution(0) == SERIAL) &&
322 (f.getGC() == FFT<CCTransform,Dim,T>::nullGC) );
323 }
324
325 if (!skipTranspose) {
326 // transpose and permute to Field with transform dim first
327 (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] =
328 (*temp)[temp->getLayout().getDomain()];
329
330 // Compress out previous iterate's storage:
331 if (this->compressTemps() && temp != &f) *temp = 0;
332 temp = tempFields_m[idim]; // Field* management aid
333 }
334 else if (idim == enddim-direction && temp != &f) {
335 // last transform and we can skip the last temporary field
336 // so do the transpose here using f instead
337
338 // transpose and permute to Field with transform dim first
339 f[in_dom] = (*temp)[temp->getLayout().getDomain()];
340
341 // Compress out previous iterate's storage:
342 if (this->compressTemps()) *temp = 0;
343 temp = &f; // Field* management aid
344 }
345
346 // Loop over all the Vnodes, working on the LField in each.
347 typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
348 for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
349
350 // Get the LField
351 ComplexLField_t* ldf = (*l_i).second.get();
352 // make sure we are uncompressed
353 ldf->Uncompress();
354 // get the raw data pointer
355 localdata = ldf->getP();
356
357 // Do 1D complex-to-complex FFT's on all the strips in the LField:
358 int nstrips = 1, length = ldf->size(0);
359 for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
360 for (int istrip=0; istrip<nstrips; ++istrip) {
361 // Do the 1D FFT:
362 this->getEngine().callFFT(idim, direction, localdata);
363 // advance the data pointer
364 localdata += length;
365 } // loop over 1D strips
366 } // loop over all the LFields
367
368
369 } // loop over all transformed dimensions
370
371 // skip final assignment and compress if we used f as final temporary
372 if (temp != &f) {
373
374 // Now assign back into original Field, and compress last temp's storage:
375 f[in_dom] = (*temp)[temp->getLayout().getDomain()];
376 if (this->compressTemps()) *temp = 0;
377
378 }
379
380 // Normalize:
381 if (direction == +1)
382 f *= Complex_t(this->getNormFact(), 0.0);
383
384 return;
385}
386
387//=============================================================================
388// 1D FFT CCTransform Constructors
389//=============================================================================
390
391//-----------------------------------------------------------------------------
392// Create a new FFT object of type CCTransform, with a
393// given domain. Also specify which dimensions to transform along.
394//-----------------------------------------------------------------------------
395
396template <class T>
398 const typename FFT<CCTransform,1U,T>::Domain_t& cdomain,
399 const bool transformTheseDims[1U], const bool& compressTemps)
400: FFTBase<1U,T>(FFT<CCTransform,1U,T>::ccFFT, cdomain,
401 transformTheseDims, compressTemps)
402{
403
404 // Tau profiling
405
406
407
408
409 size_t nTransformDims = 1U;
410 // get axis length
411 int length;
412 length = cdomain[0].length();
413
414 // get transform type for FFT Engine, compute normalization
415 int transformType;
416 transformType = FFTBase<1U,T>::ccFFT; // all transforms are complex-to-complex
417 T& normFact = this->getNormFact();
418 normFact = 1.0 / length;
419
420 // set up FFT Engine
421 this->getEngine().setup(nTransformDims, &transformType, &length);
422 // set up the temporary fields
423 setup();
424}
425
426//-----------------------------------------------------------------------------
427// Create a new FFT object of type CCTransform, with a
428// given domain. Default case of transforming along all dimensions.
429//-----------------------------------------------------------------------------
430
431template <class T>
433 const typename FFT<CCTransform,1U,T>::Domain_t& cdomain,
434 const bool& compressTemps)
435: FFTBase<1U,T>(FFT<CCTransform,1U,T>::ccFFT, cdomain, compressTemps)
436{
437
438 // Tau profiling
439
440 // get axis length
441 int length;
442 length = cdomain[0].length();
443
444 // get transform type for FFT Engine, compute normalization
445 int transformType;
446 transformType = FFTBase<1U,T>::ccFFT; // all transforms are complex-to-complex
447 T& normFact = this->getNormFact();
448 normFact = 1.0 / length;
449
450 // set up FFT Engine
451 this->getEngine().setup(1U, &transformType, &length);
452 // set up the temporary fields
453 setup();
454}
455
456//-----------------------------------------------------------------------------
457// setup performs all the initializations necessary after the transform
458// directions have been specified.
459//-----------------------------------------------------------------------------
460
461template <class T>
462void
464{
465 // Tau profiling
466
467 // Get input Field's domain
468 const Domain_t& ndic = this->getDomain();
469 // generate temporary field layout
470 tempLayouts_m = new Layout_t(ndic[0], PARALLEL, 1);
471 // generate temporary Field
472 tempFields_m = new ComplexField_t(*tempLayouts_m);
473 // If user requests no intermediate compression, uncompress right now:
474 if (!this->compressTemps()) tempFields_m->Uncompress();
475
476 return;
477}
478
479//-----------------------------------------------------------------------------
480// Destructor
481//-----------------------------------------------------------------------------
482
483template <class T>
485
486 // Tau profiling
487
488
489
490
491 // delete temporary fields and field layouts
492 delete tempFields_m;
493 delete tempLayouts_m;
494}
495
496
497//-----------------------------------------------------------------------------
498// do the CC FFT; separate input and output fields
499//-----------------------------------------------------------------------------
500
501template <class T>
502void
504 int direction,
507 const bool& constInput)
508{
509
510 // indicate we're doing another FFT
511 // INCIPPLSTAT(incFFTs);
512
513 // Check domain of incoming Fields
514 const Layout_t& in_layout = f.getLayout();
515 const Domain_t& in_dom = in_layout.getDomain();
516 const Layout_t& out_layout = g.getLayout();
517 const Domain_t& out_dom = out_layout.getDomain();
518 PAssert_EQ( this->checkDomain(this->getDomain(),in_dom) &&
519 this->checkDomain(this->getDomain(),out_dom), true);
520
521 // Field* for temp Field management:
522 ComplexField_t* temp = &f;
523 // Local work array passed to FFT:
524 Complex_t* localdata;
525
526
527 // Now do the serial transforms along this dimension:
528
529 // get temp domain for comparison
530 const Domain_t& temp_dom = tempLayouts_m->getDomain();
531
532 bool skipTranspose = false;
533 // if this is the first transform dimension, we might be able
534 // to skip the transpose into the first temporary Field
535 if (!constInput) {
536 // check that zeroth axis is the same, has one vnode,
537 // and that there are no guard cells
538 skipTranspose = ( (in_dom[0].sameBase(temp_dom[0])) &&
539 (in_dom[0].length() == temp_dom[0].length()) &&
540 (in_layout.numVnodes() == 1) &&
541 (f.getGC() == FFT<CCTransform,1U,T>::nullGC) );
542 }
543
544 bool skipFinal;
545 // we might be able
546 // to skip the last temporary and transpose right into g
547
548 // check that zeroth axis is the same, has one vnode
549 // and that there are no guard cells
550 skipFinal = ( (out_dom[0].sameBase(temp_dom[0])) &&
551 (out_dom[0].length() == temp_dom[0].length()) &&
552 (out_layout.numVnodes() == 1) &&
553 (g.getGC() == FFT<CCTransform,1U,T>::nullGC) );
554
555 if (!skipTranspose) {
556 // assign to Field with proper layout
557 (*tempFields_m) = (*temp);
558 temp = tempFields_m; // Field* management aid
559 }
560 if (skipFinal) {
561 // we can skip the last temporary field
562 // so do the transpose here using g instead
563
564 // assign to Field with proper layout
565 g = (*temp);
566
567 // Compress out previous iterate's storage:
568 if (this->compressTemps() && temp != &f) *temp = 0;
569 temp = &g; // Field* management aid
570 }
571
572
573
574
575 // should be only one LField!
576 typename ComplexField_t::const_iterator_if l_i = temp->begin_if();
577 if (l_i != temp->end_if()) {
578 // Get the LField
579 ComplexLField_t* ldf = (*l_i).second.get();
580 // make sure we are uncompressed
581 ldf->Uncompress();
582 // get the raw data pointer
583 localdata = ldf->getP();
584
585 // Do the 1D FFT:
586 this->getEngine().callFFT(0, direction, localdata);
587 }
588
589
590
591 // skip final assignment and compress if we used g as final temporary
592 if (temp != &g) {
593
594 // Now assign into output Field, and compress last temp's storage:
595 g = (*temp);
596 if (this->compressTemps() && temp != &f) *temp = 0;
597
598 }
599
600 // Normalize:
601 if (direction == +1)
602 g *= Complex_t(this->getNormFact(), 0.0);
603
604 return;
605}
606
607//-----------------------------------------------------------------------------
608// "in-place" FFT; specify +1 or -1 to indicate forward or inverse transform.
609//-----------------------------------------------------------------------------
610
611template <class T>
612void
614 int direction,
616{
617
618 // indicate we're doing another FFT
619 // INCIPPLSTAT(incFFTs);
620
621 // Check domain of incoming Field
622 const Layout_t& in_layout = f.getLayout();
623 const Domain_t& in_dom = in_layout.getDomain();
624 PAssert_EQ(this->checkDomain(this->getDomain(),in_dom), true);
625
626 // Field* for temp Field management:
627 ComplexField_t* temp = &f;
628 // Local work array passed to FFT:
629 Complex_t* localdata;
630
631
632 // Now do the serial transforms along this dimension:
633
634 // get domain for comparison
635 const Domain_t& temp_dom = tempLayouts_m->getDomain();
636
637 bool skipTranspose;
638 // we might be able
639 // to skip the transpose into the first temporary Field
640
641 // check that zeroth axis is the same, has one vnode,
642 // and that there are no guard cells
643 skipTranspose = ( (in_dom[0].sameBase(temp_dom[0])) &&
644 (in_dom[0].length() == temp_dom[0].length()) &&
645 (in_layout.numVnodes() == 1) &&
646 (f.getGC() == FFT<CCTransform,1U,T>::nullGC) );
647
648 if (!skipTranspose) {
649 // assign to Field with proper layout
650 (*tempFields_m) = (*temp);
651 temp = tempFields_m; // Field* management aid
652 }
653
654
655
656
657 // should be only one LField!
658 typename ComplexField_t::const_iterator_if l_i = temp->begin_if();
659 if (l_i != temp->end_if()) {
660 // Get the LField
661 ComplexLField_t* ldf = (*l_i).second.get();
662 // make sure we are uncompressed
663 ldf->Uncompress();
664 // get the raw data pointer
665 localdata = ldf->getP();
666
667 // Do the 1D FFT:
668 this->getEngine().callFFT(0, direction, localdata);
669 }
670
671
672
673 // skip final assignment and compress if we used f as final temporary
674 if (temp != &f) {
675
676 // Now assign back into original Field, and compress last temp's storage:
677 f = (*temp);
678 if (this->compressTemps()) *temp = 0;
679
680 }
681
682 // Normalize:
683 if (direction == +1)
684 f *= Complex_t(this->getNormFact(), 0.0);
685
686 return;
687}
688
689
690
691//=============================================================================
692// FFT RCTransform Constructors
693//=============================================================================
694
695//-----------------------------------------------------------------------------
696// Create a new FFT object of type RCTransform, with a
697// given domain. Also specify which dimensions to transform along.
698// Note that RC transform of a real array of length n results in a
699// complex array of length n/2+1.
700//-----------------------------------------------------------------------------
701
702template <size_t Dim, class T>
704 const typename FFT<RCTransform,Dim,T>::Domain_t& rdomain,
705 const typename FFT<RCTransform,Dim,T>::Domain_t& cdomain,
706 const bool transformTheseDims[Dim], const bool& compressTemps)
707: FFTBase<Dim,T>(FFT<RCTransform,Dim,T>::rcFFT, rdomain,
708 transformTheseDims, compressTemps),
709 complexDomain_m(cdomain), serialAxes_m(1)
710{
711 // construct array of axis lengths
712 size_t nTransformDims = this->numTransformDims();
713 int* lengths = new int[nTransformDims];
714 size_t d;
715 for (d=0; d<nTransformDims; ++d)
716 lengths[d] = rdomain[this->activeDimension(d)].length();
717
718 // construct array of transform types for FFT Engine, compute normalization
719 int* transformTypes = new int[nTransformDims];
720 T& normFact = this->getNormFact();
721 normFact = 1.0;
722 transformTypes[0] = FFTBase<Dim,T>::rcFFT; // first transform is real-to-complex
723 normFact /= lengths[0];
724 for (d=1; d<nTransformDims; ++d) {
725 transformTypes[d] = FFTBase<Dim,T>::ccFFT; // all other transforms are complex-to-complex
726 normFact /= lengths[d];
727 }
728
729 // set up FFT Engine
730 this->getEngine().setup(nTransformDims, transformTypes, lengths);
731 delete [] transformTypes;
732 delete [] lengths;
733
734 // set up the temporary fields
735 setup();
736}
737
738//-----------------------------------------------------------------------------
739// Create a new FFT object of type RCTransform, with
740// given real and complex domains. Default: transform along all dimensions.
741//-----------------------------------------------------------------------------
742
743template <size_t Dim, class T>
745 const typename FFT<RCTransform,Dim,T>::Domain_t& rdomain,
746 const typename FFT<RCTransform,Dim,T>::Domain_t& cdomain,
747 const bool& compressTemps,
748 int serialAxes)
749: FFTBase<Dim,T>(FFT<RCTransform,Dim,T>::rcFFT, rdomain, compressTemps),
750 complexDomain_m(cdomain), serialAxes_m(serialAxes)
751{
752 // Tau profiling
753
754 // construct array of axis lengths
755 int lengths[Dim];
756 size_t d;
757 for (d=0; d<Dim; ++d)
758 lengths[d] = rdomain[d].length();
759
760 // construct array of transform types for FFT Engine, compute normalization
761 int transformTypes[Dim];
762 T& normFact = this->getNormFact();
763 normFact = 1.0;
764 transformTypes[0] = FFTBase<Dim,T>::rcFFT; // first transform is real-to-complex
765 normFact /= lengths[0];
766 for (d=1; d<Dim; ++d) {
767 transformTypes[d] = FFTBase<Dim,T>::ccFFT; // all other transforms are complex-to-complex
768 normFact /= lengths[d];
769 }
770
771 // set up FFT Engine
772 this->getEngine().setup(Dim, transformTypes, lengths);
773
774 // set up the temporary fields
775 setup();
776}
777
778//-----------------------------------------------------------------------------
779// setup performs all the initializations necessary after the transform
780// directions have been specified.
781//-----------------------------------------------------------------------------
782
783template <size_t Dim, class T>
784void
786
787 // Tau profiling
788
789
790
791
792 PAssert_GT(serialAxes_m, 0);
793 PAssert_LT((size_t) serialAxes_m, Dim);
794
795 size_t d, d2, activeDim;
796 size_t nTransformDims = this->numTransformDims();
797
798 // Set up the arrays of temporary Fields and FieldLayouts:
799
800 // make first dimension(s) always SERIAL, all other dimensions parallel
801 // for the real FFT; make first serialAxes_m axes serial for others
802 e_dim_tag serialParallel[Dim];
803 e_dim_tag NserialParallel[Dim];
804 for (d=0; d < Dim; ++d) {
805 serialParallel[d] = (d == 0 ? SERIAL : PARALLEL);
806 NserialParallel[d] = (d < (size_t) serialAxes_m ? SERIAL : PARALLEL);
807 }
808
809 // check that domain lengths agree between real and complex domains
810 const Domain_t& domain = this->getDomain();
811 activeDim = this->activeDimension(0);
812 bool match = true;
813 for (d=0; d<Dim; ++d) {
814 if (d == activeDim) {
815 // real array length n, complex array length n/2+1
816 if ( complexDomain_m[d].length() !=
817 (domain[d].length()/2 + 1) ) match = false;
818 }
819 else {
820 // real and complex arrays should be same length for all other dims
821 if (complexDomain_m[d].length() != domain[d].length()) match = false;
822 }
823 }
824 PInsist(match,
825 "Domains provided for real and complex Fields are incompatible!");
826
827 // allocate arrays of temp fields and layouts for complex fields
828 tempLayouts_m = new Layout_t*[nTransformDims];
829 tempFields_m = new ComplexField_t*[nTransformDims];
830
831 // set up the single temporary real field, with first dim serial, others par
832
833 // make new domains with permuted Indexes, activeDim first
834 Domain_t ndip;
835 Domain_t ndipc;
836 ndip[0] = domain[activeDim];
837 ndipc[0] = complexDomain_m[activeDim];
838 for (d=1; d<Dim; ++d) {
839 size_t nextDim = activeDim + d;
840 if (nextDim >= Dim) nextDim -= Dim;
841 ndip[d] = domain[nextDim];
842 ndipc[d] = complexDomain_m[nextDim];
843 }
844
845 // generate layout and object for temporary real field
846 tempRLayout_m = new Layout_t(ndip, serialParallel, this->transVnodes());
847 tempRField_m = new RealField_t(*tempRLayout_m);
848
849 // generate layout and object for first temporary complex Field
850 tempLayouts_m[0] = new Layout_t(ndipc, serialParallel, this->transVnodes());
851 tempFields_m[0] = new ComplexField_t(*tempLayouts_m[0]);
852
853 // determine the order in which dimensions will be transposed. Put
854 // the transposed dims first, and the others at the end.
855 int fftorder[Dim], tmporder[Dim];
856 int nofft = nTransformDims;
857 for (d=0; d < nTransformDims; ++d)
858 fftorder[d] = this->activeDimension(d);
859 for (d=0; d < Dim; ++d) {
860 // see if the dth dimension is one to transform
861 bool active = false;
862 for (d2=0; d2 < nTransformDims; ++d2) {
863 if (this->activeDimension(d2) == d) {
864 active = true;
865 break;
866 }
867 }
868
869 if (!active)
870 // no it is not; put it at the bottom of list
871 fftorder[nofft++] = d;
872 }
873
874 // But since the first FFT is done on a S,[P,P,...] field, permute
875 // the order of this to get the first activeDimension at the end.
876 nofft = fftorder[0];
877 for (d=0; d < (Dim - 1); ++d)
878 fftorder[d] = fftorder[d+1];
879 fftorder[Dim-1] = nofft;
880
881 // now construct the remaining temporary complex fields
882
883 // loop through and create actual permuted layouts, and also fields
884 size_t dim = 1; // already have one temp field
885 while (dim < nTransformDims) {
886
887 int sp;
888 for (sp=0; sp < serialAxes_m && dim < nTransformDims; ++sp, ++dim) {
889
890 // make new domain with permuted Indexes
891 for (d=0; d < Dim; ++d)
892 ndip[d] = complexDomain_m[fftorder[d]];
893
894 // generate layout and object for temporary complex Field
895 tempLayouts_m[dim] = new Layout_t(ndip, NserialParallel, this->transVnodes());
896 tempFields_m[dim] = new ComplexField_t(*tempLayouts_m[dim]);
897
898 // permute the fft order for the first 'serialAxes_m' axes
899 if (serialAxes_m > 1) {
900 tmporder[0] = fftorder[0];
901 for (d=0; d < (size_t) (serialAxes_m-1); ++d)
902 fftorder[d] = fftorder[d+1];
903 fftorder[serialAxes_m - 1] = tmporder[0];
904 }
905 }
906
907 // now, permute ALL the axes by serialAxes_m steps, to get the next
908 // set of axes in the first n serial slots
909 for (d=0; d < Dim; ++d)
910 tmporder[d] = fftorder[d];
911 for (d=0; d < Dim; ++d)
912 fftorder[d] = tmporder[(d + serialAxes_m) % Dim];
913 }
914}
915
916
917//-----------------------------------------------------------------------------
918// Destructor
919//-----------------------------------------------------------------------------
920
921template <size_t Dim, class T>
923
924 // Tau profiling
925
926
927 // delete temporary fields and layouts
928 size_t nTransformDims = this->numTransformDims();
929 for (size_t d=0; d<nTransformDims; ++d) {
930 delete tempFields_m[d];
931 delete tempLayouts_m[d];
932 }
933 delete [] tempFields_m;
934 delete [] tempLayouts_m;
935 delete tempRField_m;
936 delete tempRLayout_m;
937
938}
939
940template <size_t Dim, class T>
941void
943 int direction,
946 const bool& constInput)
947{
948 // indicate we're doing another fft
949 // incipplstat(incffts);
950
951 // check domain of incoming fields
952 const Layout_t& in_layout = f.getLayout();
953 const Domain_t& in_dom = in_layout.getDomain();
954 const Layout_t& out_layout = g.getLayout();
955 const Domain_t& out_dom = out_layout.getDomain();
956
957
958 PAssert_EQ( this->checkDomain(this->getDomain(),in_dom) &&
959 this->checkDomain(complexDomain_m,out_dom), true);
960
961 // common loop iterate and other vars:
962 size_t d;
963 size_t idim; // idim loops over the number of transform dims.
964 size_t nTransformDims = this->numTransformDims();
965
966 // handle first rc transform separately
967 idim = 0;
968
969 RealField_t* tempR = tempRField_m; // field* management aid
970 if (!constInput) {
971 // see if we can use input field f as a temporary
972 bool skipTemp = true;
973
974 // more rigorous match required here; check that layouts are identical
975 if ( !(in_layout == *tempRLayout_m) ) {
976 skipTemp = false;
977 } else {
978 // make sure distributions match
979 for (d=0; d<Dim; ++d)
980 if (in_layout.getDistribution(d) != tempRLayout_m->getDistribution(d))
981 skipTemp = false;
982
983 // make sure vnode counts match
984 if (in_layout.numVnodes() != tempRLayout_m->numVnodes())
985 skipTemp = false;
986
987 // also make sure there are no guard cells
988 if (!(f.getGC() == FFT<RCTransform,Dim,T>::nullGC))
989 skipTemp = false;
990 }
991
992 // if we can skip using this temporary, set the tempr pointer to the
993 // original incoming field. otherwise, it will stay pointing at the
994 // temporary real field, and we'll need to do a transpose of the data
995 // from the original into the temporary.
996 if (skipTemp)
997 tempR = &f;
998 }
999
1000 // if we're not using input as a temporary ...
1001 if (tempR != &f) {
1002
1003
1004 // transpose AND PERMUTE TO REAL FIELD WITH TRANSFORM DIM FIRST
1005 (*tempR)[tempR->getDomain()] = f[in_dom];
1006
1007 }
1008
1009 // field* for temp field management:
1010 ComplexField_t* temp = tempFields_m[0];
1011
1012 // see if we can put final result directly into g. this is useful if
1013 // we're doing just a 1d fft of one dimension of a multi-dimensional field.
1014 if (nTransformDims == 1) { // only a single rc transform
1015 bool skipTemp = true;
1016
1017 // more rigorous match required here; check that layouts are identical
1018 if (!(out_layout == *tempLayouts_m[0])) {
1019 skipTemp = false;
1020 } else {
1021 for (d=0; d<Dim; ++d)
1022 if (out_layout.getDistribution(d) !=
1023 tempLayouts_m[0]->getDistribution(d))
1024 skipTemp = false;
1025
1026 if ( out_layout.numVnodes() != tempLayouts_m[0]->numVnodes() )
1027 skipTemp = false;
1028
1029 // also make sure there are no guard cells
1030 if (!(g.getGC() == FFT<RCTransform,Dim,T>::nullGC))
1031 skipTemp = false;
1032
1033 // if we can skip using the temporary, set the pointer to the output
1034 // field for the first fft to the second provided field (g)
1035 if (skipTemp)
1036 temp = &g;
1037 }
1038 }
1039
1040 // loop over all the vnodes, working on the lfield in each.
1041 typename RealField_t::const_iterator_if rl_i, rl_end = tempR->end_if();
1042 typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
1043 for (rl_i = tempR->begin_if(); rl_i != rl_end; ++rl_i, ++cl_i) {
1044 // get the lfields
1045 RealLField_t* rldf = (*rl_i).second.get();
1046 ComplexLField_t* cldf = (*cl_i).second.get();
1047
1048 // make sure we are uncompressed
1049 rldf->Uncompress();
1050 cldf->Uncompress();
1051
1052 // get the raw data pointers
1053 T* localreal = rldf->getP();
1054 Complex_t* localcomp = cldf->getP();
1055
1056 // number of strips should be the same for real and complex lfields!
1057 int nstrips = 1, lengthreal = rldf->size(0), lengthcomp = cldf->size(0);
1058 for (d=1; d<Dim; ++d)
1059 nstrips *= rldf->size(d);
1060
1061
1062 for (int istrip=0; istrip<nstrips; ++istrip) {
1063 // move the data into the complex strip, which is two reals longer
1064 for (int ilen=0; ilen<lengthreal; ilen+=2) {
1065 localcomp[ilen/2] = Complex_t(localreal[ilen],localreal[ilen+1]);
1066 }
1067
1068 // do the 1d real-to-complex fft:
1069 // note that real-to-complex fft direction is always +1
1070 this->getEngine().callFFT(idim, +1, localcomp);
1071
1072 // advance the data pointers
1073 localreal += lengthreal;
1074 localcomp += lengthcomp;
1075 } // loop over 1d strips
1076
1077 } // loop over all the lfields
1078
1079 // compress temporary storage
1080 if (this->compressTemps() && tempR != &f)
1081 *tempR = 0;
1082
1083 // now proceed with the other complex-to-complex transforms
1084
1085 // local work array passed to fft:
1086 Complex_t* localdata;
1087
1088 // loop over the remaining dimensions to be transformed:
1089 for (idim = 1; idim < nTransformDims; ++idim) {
1090
1091 bool skipTranspose = false;
1092
1093 // if this is the last transform dimension, we might be able
1094 // to skip the last temporary and transpose right into g
1095 if (idim == nTransformDims-1) {
1096 // get the domain for comparison
1097 const Domain_t& last_dom = tempLayouts_m[idim]->getDomain();
1098
1099 // make sure there are no guard cells, and that the first
1100 // axis matches what we expect and is serial. only need to
1101 // check first axis since we're just fft'ing that one dimension.
1102 skipTranspose = (g.getGC() == FFT<RCTransform,Dim,T>::nullGC &&
1103 out_dom[0].sameBase(last_dom[0]) &&
1104 out_dom[0].length() == last_dom[0].length() &&
1105 out_layout.getDistribution(0) == SERIAL);
1106 }
1107
1108 if (!skipTranspose) {
1109 // transpose and permute to field with transform dim first
1110 (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] =
1111 (*temp)[temp->getLayout().getDomain()];
1112
1113 // compress out previous iterate's storage:
1114 if (this->compressTemps())
1115 *temp = 0;
1116 temp = tempFields_m[idim]; // field* management aid
1117
1118 } else if (idim == nTransformDims-1) {
1119 // last transform and we can skip the last temporary field
1120 // so do the transpose here using g instead
1121
1122 // transpose and permute to field with transform dim first
1123
1124 g[out_dom] = (*temp)[temp->getLayout().getDomain()];
1125
1126 // compress out previous iterate's storage:
1127 if (this->compressTemps())
1128 *temp = 0;
1129 temp = &g; // field* management aid
1130
1131 }
1132
1133 // loop over all the vnodes, working on the lfield in each.
1134 typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
1135 for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
1136 // get the lfield
1137 ComplexLField_t* ldf = (*l_i).second.get();
1138
1139 // make sure we are uncompressed
1140 ldf->Uncompress();
1141
1142 // get the raw data pointer
1143 localdata = ldf->getP();
1144
1145 // do 1d complex-to-complex fft's on all the strips in the lfield:
1146 int nstrips = 1, length = ldf->size(0);
1147 for (d=1; d<Dim; ++d)
1148 nstrips *= ldf->size(d);
1149
1150 for (int istrip=0; istrip<nstrips; ++istrip) {
1151 // do the 1D FFT:
1152 //this->getEngine().callFFT(idim, direction, localdata);
1153 this->getEngine().callFFT(idim, +1, localdata);
1154
1155 // advance the data pointer
1156 localdata += length;
1157 } // loop over 1D strips
1158 } // loop over all the LFields
1159
1160 } // loop over all transformed dimensions
1161
1162
1163 // skip final assignment and compress if we used g as final temporary
1164 if (temp != &g) {
1165
1166
1167 // Now assign into output Field, and compress last temp's storage:
1168 g[out_dom] = (*temp)[temp->getLayout().getDomain()];
1169
1170 if (this->compressTemps()) *temp = 0;
1171
1172 }
1173
1174 // Normalize:
1175 if (direction == +1) g = g * this->getNormFact();
1176
1177 // finish timing the whole mess
1178
1179}
1180//#endif
1181
1182//-----------------------------------------------------------------------------
1183// RC FFT; opposite direction, from complex to real
1184//-----------------------------------------------------------------------------
1185
1186template <size_t Dim, class T>
1187void
1189 int direction,
1192 const bool& constInput)
1193{
1194 // indicate we're doing another FFT
1195 // INCIPPLSTAT(incFFTs);
1196
1197 // Check domain of incoming Fields
1198 const Layout_t& in_layout = f.getLayout();
1199 const Domain_t& in_dom = in_layout.getDomain();
1200 const Layout_t& out_layout = g.getLayout();
1201 const Domain_t& out_dom = out_layout.getDomain();
1202 PAssert_EQ( this->checkDomain(complexDomain_m,in_dom) &&
1203 this->checkDomain(this->getDomain(),out_dom), true);
1204
1205 // Common loop iterate and other vars:
1206 size_t d;
1207 size_t idim; // idim loops over the number of transform dims.
1208 size_t nTransformDims = this->numTransformDims();
1209
1210 // proceed with the complex-to-complex transforms
1211
1212 // Field* for temp Field management:
1213 ComplexField_t* temp = &f;
1214
1215 // Local work array passed to FFT:
1216 Complex_t* localdata;
1217
1218 // Loop over all dimensions to be transformed except last one:
1219 for (idim = nTransformDims-1; idim != 0; --idim) {
1220
1221 // Now do the serial transforms along this dimension:
1222
1223 bool skipTranspose = false;
1224 // if this is the first transform dimension, we might be able
1225 // to skip the transpose into the first temporary Field
1226 if (idim == nTransformDims-1 && !constInput) {
1227 // get domain for comparison
1228 const Domain_t& first_dom = tempLayouts_m[idim]->getDomain();
1229 // check that zeroth axis is the same and is serial
1230 // and that there are no guard cells
1231 skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
1232 (in_dom[0].length() == first_dom[0].length()) &&
1233 (in_layout.getDistribution(0) == SERIAL) &&
1234 (f.getGC() == FFT<RCTransform,Dim,T>::nullGC) );
1235 }
1236
1237 if (!skipTranspose) {
1238 // transpose and permute to Field with transform dim first
1239 (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] =
1240 (*temp)[temp->getLayout().getDomain()];
1241
1242 // Compress out previous iterate's storage:
1243 if (this->compressTemps() && temp != &f)
1244 *temp = 0;
1245 temp = tempFields_m[idim]; // Field* management aid
1246 }
1247
1248 // Loop over all the Vnodes, working on the LField in each.
1249 typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
1250 for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
1251
1252 // Get the LField
1253 ComplexLField_t* ldf = (*l_i).second.get();
1254
1255 // make sure we are uncompressed
1256 ldf->Uncompress();
1257
1258 // get the raw data pointer
1259 localdata = ldf->getP();
1260
1261 // Do 1D complex-to-complex FFT's on all the strips in the LField:
1262 int nstrips = 1, length = ldf->size(0);
1263 for (d=1; d<Dim; ++d)
1264 nstrips *= ldf->size(d);
1265
1266 for (int istrip=0; istrip<nstrips; ++istrip) {
1267 // Do the 1D FFT:
1268 //this->getEngine().callFFT(idim, direction, localdata);
1269 this->getEngine().callFFT(idim, -1, localdata);
1270
1271 // advance the data pointer
1272 localdata += length;
1273 } // loop over 1D strips
1274 } // loop over all the LFields
1275
1276 } // loop over all transformed dimensions
1277
1278 // handle last CR transform separately
1279 idim = 0;
1280
1281 // see if we can put final result directly into g
1282 RealField_t* tempR;
1283 bool skipTemp = true;
1284
1285 // more rigorous match required here; check that layouts are identical
1286 if (!(out_layout == *tempRLayout_m)) {
1287 skipTemp = false;
1288 } else {
1289 for (d=0; d<Dim; ++d)
1290 if (out_layout.getDistribution(d) != tempRLayout_m->getDistribution(d))
1291 skipTemp = false;
1292
1293 if ( out_layout.numVnodes() != tempRLayout_m->numVnodes() )
1294 skipTemp = false;
1295
1296 // also make sure there are no guard cells
1297 if (!(g.getGC() == FFT<RCTransform,Dim,T>::nullGC))
1298 skipTemp = false;
1299 }
1300
1301 if (skipTemp)
1302 tempR = &g;
1303 else
1304 tempR = tempRField_m;
1305
1306 skipTemp = true;
1307 if (nTransformDims == 1 && !constInput) {
1308 // only one CR transform
1309 // see if we really need to transpose input data
1310 // more rigorous match required here; check that layouts are identical
1311 if (!(in_layout == *tempLayouts_m[0])) {
1312 skipTemp = false;
1313 } else {
1314 for (d=0; d<Dim; ++d)
1315 if (in_layout.getDistribution(d) !=
1316 tempLayouts_m[0]->getDistribution(d))
1317 skipTemp = false;
1318
1319 if ( in_layout.numVnodes() != tempLayouts_m[0]->numVnodes() )
1320 skipTemp = false;
1321
1322 // also make sure there are no guard cells
1323 if (!(f.getGC() == FFT<RCTransform,Dim,T>::nullGC))
1324 skipTemp = false;
1325 }
1326 } else { // cannot skip transpose
1327 skipTemp = false;
1328 }
1329
1330
1331 if (!skipTemp) {
1332 // transpose and permute to complex Field with transform dim first
1333 (*tempFields_m[0])[tempLayouts_m[0]->getDomain()] =
1334 (*temp)[temp->getLayout().getDomain()];
1335
1336 // compress previous iterates storage
1337 if (this->compressTemps() && temp != &f)
1338 *temp = 0;
1339 temp = tempFields_m[0];
1340 }
1341
1342 // Loop over all the Vnodes, working on the LField in each.
1343 typename RealField_t::const_iterator_if rl_i, rl_end = tempR->end_if();
1344 typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
1345 for (rl_i = tempR->begin_if(); rl_i != rl_end; ++rl_i, ++cl_i) {
1346 // Get the LFields
1347 RealLField_t* rldf = (*rl_i).second.get();
1348 ComplexLField_t* cldf = (*cl_i).second.get();
1349
1350 // make sure we are uncompressed
1351 rldf->Uncompress();
1352 cldf->Uncompress();
1353
1354 // get the raw data pointers
1355 T* localreal = rldf->getP();
1356 Complex_t* localcomp = cldf->getP();
1357
1358 // number of strips should be the same for real and complex LFields!
1359 int nstrips = 1, lengthreal = rldf->size(0), lengthcomp = cldf->size(0);
1360 for (d=1; d<Dim; ++d)
1361 nstrips *= rldf->size(d);
1362
1363 for (int istrip=0; istrip<nstrips; ++istrip) {
1364 // Do the 1D complex-to-real FFT:
1365 // note that complex-to-real FFT direction is always -1
1366 this->getEngine().callFFT(idim, -1, localcomp);
1367
1368 // move the data into the real strip, which is two reals shorter
1369 for (int ilen=0; ilen<lengthreal; ilen+=2) {
1370 localreal[ilen] = real(localcomp[ilen/2]);
1371 localreal[ilen+1] = imag(localcomp[ilen/2]);
1372 }
1373
1374 // advance the data pointers
1375 localreal += lengthreal;
1376 localcomp += lengthcomp;
1377 } // loop over 1D strips
1378 } // loop over all the LFields
1379
1380 // compress previous iterates storage
1381 if (this->compressTemps() && temp != &f)
1382 *temp = 0;
1383
1384 // skip final assignment and compress if we used g as final temporary
1385 if (tempR != &g) {
1386
1387
1388 // Now assign into output Field, and compress last temp's storage:
1389 g[out_dom] = (*tempR)[tempR->getLayout().getDomain()];
1390
1391 if (this->compressTemps())
1392 *tempR = 0;
1393
1394 }
1395
1396 // Normalize:
1397 if (direction == +1) g = g * this->getNormFact();
1398
1399 // finish up timing the whole mess
1400}
1401
1402//=============================================================================
1403// 1D FFT RCTransform Constructors
1404//=============================================================================
1405
1406//-----------------------------------------------------------------------------
1407// Create a new FFT object of type RCTransform, with a
1408// given domain. Also specify which dimensions to transform along.
1409// Note that RC transform of a real array of length n results in a
1410// complex array of length n/2+1.
1411//-----------------------------------------------------------------------------
1412
1413template <class T>
1415 const typename FFT<RCTransform,1U,T>::Domain_t& rdomain,
1416 const typename FFT<RCTransform,1U,T>::Domain_t& cdomain,
1417 const bool transformTheseDims[1U], const bool& compressTemps)
1418: FFTBase<1U,T>(FFT<RCTransform,1U,T>::rcFFT, rdomain,
1419 transformTheseDims, compressTemps),
1420 complexDomain_m(cdomain)
1421{
1422 size_t nTransformDims = 1U;
1423 // get axis length
1424 int length;
1425 length = rdomain[0].length();
1426
1427 // get transform type for FFT Engine, compute normalization
1428 int transformType;
1429 transformType = FFTBase<1U,T>::rcFFT; // first transform is real-to-complex
1430 T& normFact = this->getNormFact();
1431 normFact = 1.0 / length;
1432
1433 // set up FFT Engine
1434 this->getEngine().setup(nTransformDims, &transformType, &length);
1435 // set up the temporary fields
1436 setup();
1437}
1438
1439//-----------------------------------------------------------------------------
1440// Create a new FFT object of type RCTransform, with
1441// given real and complex domains. Default: transform along all dimensions.
1442//-----------------------------------------------------------------------------
1443
1444template <class T>
1446 const typename FFT<RCTransform,1U,T>::Domain_t& rdomain,
1447 const typename FFT<RCTransform,1U,T>::Domain_t& cdomain,
1448 const bool& compressTemps)
1449: FFTBase<1U,T>(FFT<RCTransform,1U,T>::rcFFT, rdomain, compressTemps),
1450 complexDomain_m(cdomain)
1451{
1452 // Tau profiling
1453
1454
1455
1456
1457 // get axis length
1458 int length;
1459 length = rdomain[0].length();
1460
1461 // get transform type for FFT Engine, compute normalization
1462 int transformType;
1463 transformType = FFTBase<1U,T>::rcFFT; // first transform is real-to-complex
1464 T& normFact = this->getNormFact();
1465 normFact = 1.0 / length;
1466
1467 // set up FFT Engine
1468 this->getEngine().setup(1U, &transformType, &length);
1469 // set up the temporary fields
1470 setup();
1471}
1472
1473//-----------------------------------------------------------------------------
1474// setup performs all the initializations necessary after the transform
1475// directions have been specified.
1476//-----------------------------------------------------------------------------
1477
1478template <class T>
1479void
1481
1482 // Tau profiling
1483
1484
1485
1486
1487 // check that domain lengths agree between real and complex domains
1488 const Domain_t& domain = this->getDomain();
1489 bool match = true;
1490 // real array length n, complex array length n/2+1
1491 if ( complexDomain_m[0].length() !=
1492 (domain[0].length()/2 + 1) ) match = false;
1493 PInsist(match,
1494 "Domains provided for real and complex Fields are incompatible!");
1495
1496 // generate layout for temporary real Field
1497 tempRLayout_m = new Layout_t(domain[0], PARALLEL, 1);
1498 // create temporary real Field
1499 this->tempRField_m = new RealField_t(*tempRLayout_m);
1500 // If user requests no intermediate compression, uncompress right now:
1501 if (!this->compressTemps()) this->tempRField_m->Uncompress();
1502
1503 // generate temporary field layout
1504 tempLayouts_m = new Layout_t(complexDomain_m[0], PARALLEL, 1);
1505 // create temporary complex Field
1506 tempFields_m = new ComplexField_t(*tempLayouts_m);
1507 // If user requests no intermediate compression, uncompress right now:
1508 if (!this->compressTemps()) this->tempFields_m->Uncompress();
1509
1510 return;
1511}
1512
1513//-----------------------------------------------------------------------------
1514// Destructor
1515//-----------------------------------------------------------------------------
1516
1517template <class T>
1519
1520 // Tau profiling
1521
1522
1523
1524
1525 // delete temporary fields and layouts
1526 delete tempFields_m;
1527 delete tempLayouts_m;
1528 delete this->tempRField_m;
1529 delete tempRLayout_m;
1530}
1531
1532//-----------------------------------------------------------------------------
1533// real-to-complex FFT; direction is +1 or -1
1534//-----------------------------------------------------------------------------
1535
1536template <class T>
1537void
1539 int direction,
1542 const bool& constInput)
1543{
1544
1545 // indicate we're doing another FFT
1546 //INCIPPLSTAT(incFFTs);
1547
1548 // Check domain of incoming Fields
1549 const Layout_t& in_layout = f.getLayout();
1550 const Domain_t& in_dom = in_layout.getDomain();
1551 const Layout_t& out_layout = g.getLayout();
1552 const Domain_t& out_dom = out_layout.getDomain();
1553 PAssert_EQ( this->checkDomain(this->getDomain(),in_dom) &&
1554 this->checkDomain(complexDomain_m,out_dom), true);
1555
1556 // Common loop iterate and other vars:
1557 RealField_t* tempR = this->tempRField_m; // Field* management aid
1558 if (!constInput) {
1559 // see if we can use input field f as a temporary
1560 bool skipTemp = true;
1561 // more rigorous match required here; check that layouts are identical
1562 if ( !(in_layout == *tempRLayout_m) ) skipTemp = false;
1563 if ( in_layout.getDistribution(0) !=
1564 tempRLayout_m->getDistribution(0) ) skipTemp = false;
1565 if ( in_layout.numVnodes() != tempRLayout_m->numVnodes() )
1566 skipTemp = false;
1567 // also make sure there are no guard cells
1568 if (!(f.getGC() == FFT<RCTransform,1U,T>::nullGC)) skipTemp = false;
1569 if (skipTemp) tempR = &f;
1570 }
1571
1572 if (tempR != &f) { // not using input as a temporary
1573
1574 // assign to real Field with proper layout
1575 (*tempR) = f;
1576
1577 }
1578
1579 // Field* for temp Field management:
1580 ComplexField_t* temp = tempFields_m;
1581 // see if we can put final result directly into g
1582 bool skipFinal = true;
1583 // more rigorous match required here; check that layouts are identical
1584 if ( !(out_layout == *tempLayouts_m) ) skipFinal = false;
1585 if ( out_layout.getDistribution(0) !=
1586 tempLayouts_m->getDistribution(0) ) skipFinal = false;
1587 if ( out_layout.numVnodes() != tempLayouts_m->numVnodes() )
1588 skipFinal = false;
1589 // also make sure there are no guard cells
1590 if (!(g.getGC() == FFT<RCTransform,1U,T>::nullGC)) skipFinal = false;
1591 if (skipFinal) temp = &g;
1592
1593
1594 // There should be just one vnode!
1595 typename RealField_t::const_iterator_if rl_i = tempR->begin_if();
1596 typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
1597 if (rl_i != tempR->end_if() && cl_i != temp->end_if()) {
1598 // Get the LFields
1599 RealLField_t* rldf = (*rl_i).second.get();
1600 ComplexLField_t* cldf = (*cl_i).second.get();
1601 // make sure we are uncompressed
1602 rldf->Uncompress();
1603 cldf->Uncompress();
1604 // get the raw data pointers
1605 T* localreal = rldf->getP();
1606 Complex_t* localcomp = cldf->getP();
1607
1608 int lengthreal = rldf->size(0);
1609 // move the data into the complex strip, which is two reals longer
1610 for (int ilen=0; ilen<lengthreal; ilen+=2)
1611 localcomp[ilen/2] = Complex_t(localreal[ilen],localreal[ilen+1]);
1612 // Do the 1D real-to-complex FFT:
1613 // note that real-to-complex FFT direction is always +1
1614 this->getEngine().callFFT(0, +1, localcomp);
1615 }
1616
1617
1618
1619 // compress temporary storage
1620 if (this->compressTemps() && tempR != &f) *tempR = 0;
1621
1622
1623 // skip final assignment and compress if we used g as final temporary
1624 if (temp != &g) {
1625
1626 // Now assign into output Field, and compress last temp's storage:
1627 g = (*temp);
1628 if (this->compressTemps()) *temp = 0;
1629
1630 }
1631
1632 // Normalize:
1633 if (direction == +1) g = g * this->getNormFact();
1634
1635 return;
1636}
1637
1638//-----------------------------------------------------------------------------
1639// RC FFT; opposite direction, from complex to real
1640//-----------------------------------------------------------------------------
1641
1642template <class T>
1643void
1645 int direction,
1648 const bool& constInput)
1649{
1650
1651 // indicate we're doing another FFT
1652 //INCIPPLSTAT(incFFTs);
1653
1654 // Check domain of incoming Fields
1655 const Layout_t& in_layout = f.getLayout();
1656 const Domain_t& in_dom = in_layout.getDomain();
1657 const Layout_t& out_layout = g.getLayout();
1658 const Domain_t& out_dom = out_layout.getDomain();
1659 PAssert_EQ( this->checkDomain(complexDomain_m,in_dom) &&
1660 this->checkDomain(this->getDomain(),out_dom), true);
1661
1662 // Field* for temp Field management:
1663 ComplexField_t* temp = &f;
1664
1665 // see if we can put final result directly into g
1666 RealField_t* tempR;
1667 bool skipFinal = true;
1668 // more rigorous match required here; check that layouts are identical
1669 if ( !(out_layout == *tempRLayout_m) ) skipFinal = false;
1670 if ( out_layout.getDistribution(0) !=
1671 tempRLayout_m->getDistribution(0) ) skipFinal = false;
1672 if ( out_layout.numVnodes() != tempRLayout_m->numVnodes() )
1673 skipFinal = false;
1674 // also make sure there are no guard cells
1675 if (!(g.getGC() == FFT<RCTransform,1U,T>::nullGC)) skipFinal = false;
1676 if (skipFinal)
1677 tempR = &g;
1678 else
1679 tempR = this->tempRField_m;
1680
1681 bool skipTemp = true;
1682 if (!constInput) {
1683 // only one CR transform
1684 // see if we really need to transpose input data
1685 // more rigorous match required here; check that layouts are identical
1686 if ( !(in_layout == *tempLayouts_m) ) skipTemp = false;
1687 if ( in_layout.getDistribution(0) !=
1688 tempLayouts_m->getDistribution(0) ) skipTemp = false;
1689 if ( in_layout.numVnodes() != tempLayouts_m->numVnodes() )
1690 skipTemp = false;
1691 // also make sure there are no guard cells
1692 if (!(f.getGC() == FFT<RCTransform,1U,T>::nullGC)) skipTemp = false;
1693 }
1694 else { // cannot skip transpose
1695 skipTemp = false;
1696 }
1697
1698
1699 if (!skipTemp) {
1700 // assign to complex Field with proper layout
1701 (*tempFields_m) = (*temp);
1702 // compress previous iterates storage
1703 if (this->compressTemps() && temp != &f) *temp = 0;
1704 temp = tempFields_m;
1705 }
1706
1707
1708
1709 // There should be just one vnode!
1710 typename RealField_t::const_iterator_if rl_i = tempR->begin_if();
1711 typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
1712 if (rl_i != tempR->end_if() && cl_i != temp->end_if()) {
1713 // Get the LFields
1714 RealLField_t* rldf = (*rl_i).second.get();
1715 ComplexLField_t* cldf = (*cl_i).second.get();
1716 // make sure we are uncompressed
1717 rldf->Uncompress();
1718 cldf->Uncompress();
1719 // get the raw data pointers
1720 T* localreal = rldf->getP();
1721 Complex_t* localcomp = cldf->getP();
1722
1723 int lengthreal = rldf->size(0);
1724 // Do the 1D complex-to-real FFT:
1725 // note that complex-to-real FFT direction is always -1
1726 this->getEngine().callFFT(0, -1, localcomp);
1727 // move the data into the real strip, which is two reals shorter
1728 for (int ilen=0; ilen<lengthreal; ilen+=2) {
1729 localreal[ilen] = real(localcomp[ilen/2]);
1730 localreal[ilen+1] = imag(localcomp[ilen/2]);
1731 }
1732 }
1733
1734
1735
1736 // compress previous iterates storage
1737 if (this->compressTemps() && temp != &f) *temp = 0;
1738
1739
1740 // skip final assignment and compress if we used g as final temporary
1741 if (tempR != &g) {
1742
1743 // Now assign into output Field, and compress last temp's storage:
1744 g = (*tempR);
1745 if (this->compressTemps()) *tempR = 0;
1746
1747 }
1748
1749 // Normalize:
1750 if (direction == +1) g = g * this->getNormFact();
1751
1752 return;
1753}
1754
1755
1756//=============================================================================
1757// FFT SineTransform Constructors
1758//=============================================================================
1759
1760//-----------------------------------------------------------------------------
1761// Create a new FFT object of type SineTransform, with given real and
1762// complex field domains. Also specify which dimensions to transform along,
1763// and which of these are sine transforms.
1764// Note that RC transform of a real array of length n results in a
1765// complex array of length n/2+1.
1766//-----------------------------------------------------------------------------
1767
1768template <size_t Dim, class T>
1770 const typename FFT<SineTransform,Dim,T>::Domain_t& rdomain,
1771 const typename FFT<SineTransform,Dim,T>::Domain_t& cdomain,
1772 const bool transformTheseDims[Dim], const bool sineTransformDims[Dim],
1773 const bool& compressTemps)
1774: FFTBase<Dim,T>(FFT<SineTransform,Dim,T>::sineFFT, rdomain,
1775 transformTheseDims, compressTemps),
1776 complexDomain_m(&cdomain)
1777{
1778
1779 size_t d;
1780 // store which dimensions get sine transforms and count how many
1781 numSineTransforms_m = 0;
1782 for (d=0; d<Dim; ++d) {
1783 sineTransformDims_m[d] = sineTransformDims[d];
1784 if (sineTransformDims[d]) {
1785 PAssert_EQ(transformTheseDims[d], true); // should be marked as a transform dim
1786 ++numSineTransforms_m;
1787 }
1788 }
1789
1790 // construct array of axis lengths for all transform dims
1791 size_t nTransformDims = this->numTransformDims();
1792 int* lengths = new int[nTransformDims];
1793 for (d=0; d<nTransformDims; ++d)
1794 lengths[d] = rdomain[this->activeDimension(d)].length();
1795
1796 // construct array of transform types for FFT Engine, compute normalization
1797 int* transformTypes = new int[nTransformDims];
1798 T& normFact = this->getNormFact();
1799 normFact = 1.0;
1800 bool foundRC = false;
1801 for (d=0; d<nTransformDims; ++d) {
1802 if (sineTransformDims_m[this->activeDimension(d)]) {
1803 transformTypes[d] = FFTBase<Dim,T>::sineFFT; // sine transform
1804 normFact /= (2.0 * (lengths[d] + 1));
1805 }
1806 else if (!foundRC) {
1807 transformTypes[d] = FFTBase<Dim,T>::rcFFT; // real-to-complex FFT
1808 normFact /= lengths[d];
1809 foundRC = true;
1810 }
1811 else {
1812 transformTypes[d] = FFTBase<Dim,T>::ccFFT; // complex-to-complex FFT
1813 normFact /= lengths[d];
1814 }
1815 }
1816
1817 // set up FFT Engine
1818 this->getEngine().setup(nTransformDims, transformTypes, lengths);
1819 delete [] transformTypes;
1820 delete [] lengths;
1821 // set up the temporary fields
1822 setup();
1823}
1824
1825//-----------------------------------------------------------------------------
1826// Create a new FFT object of type SineTransform, with
1827// given real and complex domains. Default: transform along all dimensions.
1828//-----------------------------------------------------------------------------
1829
1830template <size_t Dim, class T>
1832 const typename FFT<SineTransform,Dim,T>::Domain_t& rdomain,
1833 const typename FFT<SineTransform,Dim,T>::Domain_t& cdomain,
1834 const bool sineTransformDims[Dim], const bool& compressTemps)
1835: FFTBase<Dim,T>(FFT<SineTransform,Dim,T>::sineFFT, rdomain, compressTemps),
1836 complexDomain_m(&cdomain)
1837{
1838
1839 size_t d;
1840 // store which dimensions get sine transforms and count how many
1841 numSineTransforms_m = 0;
1842 for (d=0; d<Dim; ++d) {
1843 sineTransformDims_m[d] = sineTransformDims[d];
1844 if (sineTransformDims[d]) ++numSineTransforms_m;
1845 }
1846
1847 // construct array of axis lengths for all transform dims
1848 int lengths[Dim];
1849 for (d=0; d<Dim; ++d)
1850 lengths[d] = rdomain[d].length();
1851
1852 // construct array of transform types for FFT Engine, compute normalization
1853 int transformTypes[Dim];
1854 T& normFact = this->getNormFact();
1855 normFact = 1.0;
1856 bool foundRC = false;
1857 for (d=0; d<Dim; ++d) {
1858 if (sineTransformDims_m[d]) {
1859 transformTypes[d] = FFTBase<Dim,T>::sineFFT; // sine transform
1860 normFact /= (2.0 * (lengths[d] + 1));
1861 }
1862 else if (!foundRC) {
1863 transformTypes[d] = FFTBase<Dim,T>::rcFFT; // real-to-complex FFT
1864 normFact /= lengths[d];
1865 foundRC = true;
1866 }
1867 else {
1868 transformTypes[d] = FFTBase<Dim,T>::ccFFT; // complex-to-complex FFT
1869 normFact /= lengths[d];
1870 }
1871 }
1872
1873 // set up FFT Engine
1874 this->getEngine().setup(Dim, transformTypes, lengths);
1875 // set up the temporary fields
1876 setup();
1877}
1878
1879//-----------------------------------------------------------------------------
1880// Constructor for doing only sine transforms.
1881// Create a new FFT object of type SineTransform, with given real field
1882// domain. Also specify which dimensions to sine transform along.
1883//-----------------------------------------------------------------------------
1884
1885template <size_t Dim, class T>
1887 const typename FFT<SineTransform,Dim,T>::Domain_t& rdomain,
1888 const bool sineTransformDims[Dim], const bool& compressTemps)
1889: FFTBase<Dim,T>(FFT<SineTransform,Dim,T>::sineFFT, rdomain,
1890 sineTransformDims, compressTemps)
1891{
1892 // Tau profiling
1893
1894
1895
1896
1897 // store which dimensions get sine transforms and how many
1898 numSineTransforms_m = this->numTransformDims();
1899 size_t d;
1900 for (d=0; d<Dim; ++d)
1901 sineTransformDims_m[d] = sineTransformDims[d];
1902
1903 // construct array of axis lengths
1904 int* lengths = new int[numSineTransforms_m];
1905 for (d=0; d<numSineTransforms_m; ++d)
1906 lengths[d] = rdomain[this->activeDimension(d)].length();
1907
1908 // construct array of transform types for FFT Engine, compute normalization
1909 int* transformTypes = new int[numSineTransforms_m];
1910 T& normFact = this->getNormFact();
1911 normFact = 1.0;
1912 for (d=0; d<numSineTransforms_m; ++d) {
1913 transformTypes[d] = FFTBase<Dim,T>::sineFFT; // sine transform
1914 normFact /= (2.0 * (lengths[d] + 1));
1915 }
1916
1917 // set up FFT Engine
1918 this->getEngine().setup(numSineTransforms_m, transformTypes, lengths);
1919 delete [] transformTypes;
1920 delete [] lengths;
1921 // set up the temporary fields
1922 setup();
1923}
1924
1925//-----------------------------------------------------------------------------
1926// Create a new FFT object of type SineTransform, with
1927// given real domain. Default: sine transform along all dimensions.
1928//-----------------------------------------------------------------------------
1929
1930template <size_t Dim, class T>
1932 const typename FFT<SineTransform,Dim,T>::Domain_t& rdomain, const bool& compressTemps)
1933: FFTBase<Dim,T>(FFT<SineTransform,Dim,T>::sineFFT, rdomain, compressTemps)
1934{
1935 // Tau profiling
1936
1937
1938
1939
1940 size_t d;
1941 // store which dimensions get sine transforms and how many
1942 numSineTransforms_m = this->numTransformDims();
1943 for (d=0; d<Dim; ++d)
1944 sineTransformDims_m[d] = true;
1945
1946 // construct array of axis lengths
1947 int lengths[Dim];
1948 for (d=0; d<Dim; ++d)
1949 lengths[d] = rdomain[d].length();
1950
1951 // construct array of transform types for FFT Engine, compute normalization
1952 int transformTypes[Dim];
1953 T& normFact = this->getNormFact();
1954 normFact = 1.0;
1955 for (d=0; d<Dim; ++d) {
1956 transformTypes[d] = FFTBase<Dim,T>::sineFFT; // sine transform
1957 normFact /= (2.0 * (lengths[d] + 1));
1958 }
1959
1960 // set up FFT Engine
1961 this->getEngine().setup(Dim, transformTypes, lengths);
1962 // set up the temporary fields
1963 setup();
1964}
1965
1966//-----------------------------------------------------------------------------
1967// setup performs all the initializations necessary after the transform
1968// directions have been specified.
1969//-----------------------------------------------------------------------------
1970
1971template <size_t Dim, class T>
1972void
1974
1975 // Tau profiling
1976
1977
1978
1979
1980 size_t d, dim, activeDim = 0;
1981 size_t icount;
1982 Domain_t ndip;
1983 size_t nTransformDims = this->numTransformDims(); // total number of transforms
1984 const Domain_t& domain = this->getDomain(); // get real Field domain
1985
1986 // Set up the arrays of temporary Fields and FieldLayouts:
1987 e_dim_tag serialParallel[Dim]; // Specifies SERIAL, PARALLEL dims in temp
1988 // make zeroth dimension always SERIAL
1989 serialParallel[0] = SERIAL;
1990 // all other dimensions parallel
1991 for (d=1; d<Dim; ++d)
1992 serialParallel[d] = PARALLEL;
1993
1994 // do we have a real-to-complex transform to do or not?
1995 if (nTransformDims > numSineTransforms_m) { // have RC transform
1996
1997 PAssert(complexDomain_m); // This pointer should be initialized!
1998 // find first non-sine transform dimension; this is rc transform
1999 bool match = false;
2000 d=0;
2001 while (d<Dim && !match) {
2002 if (this->transformDim(d) && !sineTransformDims_m[d]) {
2003 activeDim = this->activeDimension(d);
2004 match = true;
2005 }
2006 ++d;
2007 }
2008 PAssert_EQ(match, true); // check that we found rc transform dimension
2009 // compare lengths of real and complex Field domains
2010 for (d=0; d<Dim; ++d) {
2011 if (d == activeDim) {
2012 // real array length n, complex array length n/2+1
2013 if ( (*complexDomain_m)[d].length() !=
2014 (domain[d].length()/2 + 1) ) match = false;
2015 }
2016 else {
2017 // real and complex arrays should be same length for all other dims
2018 if ( (*complexDomain_m)[d].length() !=
2019 domain[d].length() ) match = false;
2020 }
2021 }
2022 PInsist(match,
2023 "Domains provided for real and complex Fields are incompatible!");
2024
2025 // set up the real Fields first
2026 // we will have one for each sine transform, plus one for the rc transform
2027 tempRLayouts_m = new Layout_t*[numSineTransforms_m+1];
2028 tempRFields_m = new RealField_t*[numSineTransforms_m+1];
2029 // loop over the sine transform dimensions
2030 icount=0;
2031 for (dim=0; dim<numSineTransforms_m; ++dim, ++icount) {
2032 // get next dimension to be sine transformed
2033 while (!sineTransformDims_m[icount]) ++icount;
2034 PAssert_LT(icount, Dim); // check that icount is valid dimension
2035 // make new domain with permuted Indexes, icount first
2036 ndip[0] = domain[icount];
2037 for (d=1; d<Dim; ++d) {
2038 size_t nextDim = icount + d;
2039 if (nextDim >= Dim) nextDim -= Dim;
2040 ndip[d] = domain[nextDim];
2041 }
2042 // generate temporary field layout
2043 tempRLayouts_m[dim] = new Layout_t(ndip, serialParallel, this->transVnodes());
2044 // create temporary real Field
2045 tempRFields_m[dim] = new RealField_t(*tempRLayouts_m[dim]);
2046 // If user requests no intermediate compression, uncompress right now:
2047 if (!this->compressTemps()) (*tempRFields_m[dim]).Uncompress();
2048 }
2049
2050 // build final real Field for rc transform along activeDim
2051 ndip[0] = domain[activeDim];
2052 for (d=1; d<Dim; ++d) {
2053 size_t nextDim = activeDim + d;
2054 if (nextDim >= Dim) nextDim -= Dim;
2055 ndip[d] = domain[nextDim];
2056 }
2057 // generate temporary field layout
2058 tempRLayouts_m[numSineTransforms_m] = new Layout_t(ndip, serialParallel,
2059 this->transVnodes());
2060 // create temporary real Field
2061 tempRFields_m[numSineTransforms_m] =
2062 new RealField_t(*tempRLayouts_m[numSineTransforms_m]);
2063 // If user requests no intermediate compression, uncompress right now:
2064 if (!this->compressTemps()) (*tempRFields_m[numSineTransforms_m]).Uncompress();
2065
2066 // now create the temporary complex Fields
2067 size_t numComplex = nTransformDims - numSineTransforms_m;
2068 // allocate arrays of temp fields and layouts
2069 tempLayouts_m = new Layout_t*[numComplex];
2070 tempFields_m = new ComplexField_t*[numComplex];
2071 icount=0; // reset counter
2072 for (dim=0; dim<numComplex; ++dim, ++icount) {
2073 // get next non-sine transform dimension
2074 while (!this->transformDim(icount) || sineTransformDims_m[icount]) ++icount;
2075 PAssert_LT(icount, Dim); // check that this is a valid dimension
2076 // make new domain with permuted Indexes, icount first
2077 ndip[0] = (*complexDomain_m)[icount];
2078 for (d=1; d<Dim; ++d) {
2079 size_t nextDim = icount + d;
2080 if (nextDim >= Dim) nextDim -= Dim;
2081 ndip[d] = (*complexDomain_m)[nextDim];
2082 }
2083 // generate temporary field layout
2084 tempLayouts_m[dim] = new Layout_t(ndip, serialParallel, this->transVnodes());
2085 // create temporary complex Field
2086 tempFields_m[dim] = new ComplexField_t(*tempLayouts_m[dim]);
2087 // If user requests no intermediate compression, uncompress right now:
2088 if (!this->compressTemps()) (*tempFields_m[dim]).Uncompress();
2089 }
2090
2091 }
2092 else { // sine transforms only
2093
2094 // set up the real Fields
2095 // we will have one for each sine transform
2096 tempRLayouts_m = new Layout_t*[numSineTransforms_m];
2097 tempRFields_m = new RealField_t*[numSineTransforms_m];
2098 // loop over the sine transform dimensions
2099 for (dim=0; dim<numSineTransforms_m; ++dim) {
2100 // get next dimension to be sine transformed
2101 activeDim = this->activeDimension(dim);
2102 // make new domain with permuted Indexes, activeDim first
2103 ndip[0] = domain[activeDim];
2104 for (d=1; d<Dim; ++d) {
2105 size_t nextDim = activeDim + d;
2106 if (nextDim >= Dim) nextDim -= Dim;
2107 ndip[d] = domain[nextDim];
2108 }
2109 // generate temporary field layout
2110 tempRLayouts_m[dim] = new Layout_t(ndip, serialParallel, this->transVnodes());
2111 // create temporary real Field
2112 tempRFields_m[dim] = new RealField_t(*tempRLayouts_m[dim]);
2113 // If user requests no intermediate compression, uncompress right now:
2114 if (!this->compressTemps()) (*tempRFields_m[dim]).Uncompress();
2115 }
2116
2117 }
2118
2119 return;
2120}
2121
2122//-----------------------------------------------------------------------------
2123// Destructor
2124//-----------------------------------------------------------------------------
2125
2126template <size_t Dim, class T>
2128
2129 // Tau profiling
2130
2131
2132
2133
2134 // delete temporary fields and layouts
2135 size_t d;
2136 size_t nTransformDims = this->numTransformDims();
2137 if (nTransformDims > numSineTransforms_m) {
2138 for (d=0; d<numSineTransforms_m+1; ++d) {
2139 delete tempRFields_m[d];
2140 delete tempRLayouts_m[d];
2141 }
2142 size_t numComplex = nTransformDims - numSineTransforms_m;
2143 for (d=0; d<numComplex; ++d) {
2144 delete tempFields_m[d];
2145 delete tempLayouts_m[d];
2146 }
2147 delete [] tempFields_m;
2148 delete [] tempLayouts_m;
2149 }
2150 else {
2151 for (d=0; d<numSineTransforms_m; ++d) {
2152 delete tempRFields_m[d];
2153 delete tempRLayouts_m[d];
2154 }
2155 }
2156 delete [] tempRFields_m;
2157 delete [] tempRLayouts_m;
2158}
2159
2160//-----------------------------------------------------------------------------
2161// Sine and RC FFT; separate input and output fields, direction is +1 or -1
2162//-----------------------------------------------------------------------------
2163
2164template <size_t Dim, class T>
2165void
2167 int direction,
2170 const bool& constInput)
2171{
2172
2173 // indicate we're doing another FFT
2174 //INCIPPLSTAT(incFFTs);
2175
2176 // Check domain of incoming Fields
2177 const Layout_t& in_layout = f.getLayout();
2178 const Domain_t& in_dom = in_layout.getDomain();
2179 const Layout_t& out_layout = g.getLayout();
2180 const Domain_t& out_dom = out_layout.getDomain();
2181 PAssert_EQ( this->checkDomain(this->getDomain(),in_dom) &&
2182 this->checkDomain(*complexDomain_m,out_dom), true );
2183
2184 // Common loop iterate and other vars:
2185 size_t d;
2186 int icount, activeDim;
2187 int idim; // idim loops over the number of transform dims.
2188 size_t nTransformDims = this->numTransformDims();
2189 // check that there is a real-to-complex transform to do
2190 PInsist(nTransformDims>numSineTransforms_m,
2191 "Wrong output Field type for real-to-real transform!!");
2192
2193 // first do all the sine transforms
2194
2195 // Field* management aid
2196 RealField_t* tempR = &f;
2197 // Local work array passed to FFT:
2198 T* localdataR;
2199
2200 // Loop over the dimensions to be sine transformed:
2201 icount = 0;
2202 activeDim = 0;
2203 for (idim = 0; idim != numSineTransforms_m; ++idim, ++icount, ++activeDim) {
2204
2205 // find next sine transform dim
2206 while (!sineTransformDims_m[icount]) {
2207 if (this->transformDim(icount)) ++activeDim;
2208 ++icount;
2209 }
2210 PAssert_LT(activeDim, Dim); // check that this is a valid dimension!
2211 // Now do the serial transforms along this dimension:
2212
2213 bool skipTranspose = false;
2214 // if this is the first transform dimension, we might be able
2215 // to skip the transpose into the first temporary Field
2216 if (idim == 0 && !constInput) {
2217 // get domain for comparison
2218 const Domain_t& first_dom = tempRLayouts_m[idim]->getDomain();
2219 // check that zeroth axis is the same and is serial
2220 // and that there are no guard cells
2221 skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
2222 (in_dom[0].length() == first_dom[0].length()) &&
2223 (in_layout.getDistribution(0) == SERIAL) &&
2224 (f.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
2225 }
2226
2227 if (!skipTranspose) {
2228 // transpose and permute to Field with transform dim first
2229 (*tempRFields_m[idim])[tempRLayouts_m[idim]->getDomain()] =
2230 (*tempR)[tempR->getLayout().getDomain()];
2231
2232 // Compress out previous iterate's storage:
2233 if (this->compressTemps() && tempR != &f) *tempR = 0;
2234 tempR = tempRFields_m[idim]; // Field* management aid
2235 }
2236
2237
2238
2239 // Loop over all the Vnodes, working on the LField in each.
2240 typename RealField_t::const_iterator_if l_i, l_end = tempR->end_if();
2241 for (l_i = tempR->begin_if(); l_i != l_end; ++l_i) {
2242
2243 // Get the LField
2244 RealLField_t* ldf = (*l_i).second.get();
2245 // make sure we are uncompressed
2246 ldf->Uncompress();
2247 // get the raw data pointer
2248 localdataR = ldf->getP();
2249
2250 // Do 1D real-to-real FFT's on all the strips in the LField:
2251 int nstrips = 1, length = ldf->size(0);
2252 for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
2253 for (int istrip=0; istrip<nstrips; ++istrip) {
2254 // Do the 1D FFT:
2255 this->getEngine().callFFT(activeDim, direction, localdataR);
2256 // advance the data pointer
2257 localdataR += length;
2258 } // loop over 1D strips
2259 } // loop over all the LFields
2260
2261 } // loop over all transformed dimensions
2262
2263 // now handle the RC transform separately
2264
2265 // find first non-sine transform dimension
2266 icount = 0;
2267 activeDim = 0;
2268 while (!this->transformDim(icount) || sineTransformDims_m[icount]) {
2269 if (sineTransformDims_m[icount]) ++activeDim;
2270 ++icount;
2271 }
2272 PAssert_LT(activeDim, Dim); // check that this is a valid dimension!
2273
2274
2275 // transpose and permute to final real Field with transform dim first
2276 int last = numSineTransforms_m;
2277 (*tempRFields_m[last])[tempRLayouts_m[last]->getDomain()] =
2278 (*tempR)[tempR->getLayout().getDomain()];
2279
2280 // Compress out previous iterate's storage:
2281 if (this->compressTemps() && tempR != &f) *tempR = 0;
2282 tempR = tempRFields_m[last]; // Field* management aid
2283
2284
2285 // Field* for temp Field management:
2286 ComplexField_t* temp = tempFields_m[0];
2287 // see if we can put final result directly into g
2288 int numComplex = nTransformDims-numSineTransforms_m;
2289 if (numComplex == 1) { // only a single RC transform
2290 bool skipTemp = true;
2291 // more rigorous match required here; check that layouts are identical
2292 if ( !(out_layout == *tempLayouts_m[0]) ) skipTemp = false;
2293 for (d=0; d<Dim; ++d) {
2294 if ( out_layout.getDistribution(d) !=
2295 tempLayouts_m[0]->getDistribution(d) ) skipTemp = false;
2296 }
2297 if ( out_layout.numVnodes() != tempLayouts_m[0]->numVnodes() )
2298 skipTemp = false;
2299 // also make sure there are no guard cells
2300 if (!(g.getGC() == FFT<SineTransform,Dim,T>::nullGC)) skipTemp = false;
2301 if (skipTemp) temp = &g;
2302 }
2303
2304
2305
2306 // Loop over all the Vnodes, working on the LField in each.
2307 typename RealField_t::const_iterator_if rl_i, rl_end = tempR->end_if();
2308 typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
2309 for (rl_i = tempR->begin_if(); rl_i != rl_end; ++rl_i, ++cl_i) {
2310 // Get the LFields
2311 RealLField_t* rldf = (*rl_i).second.get();
2312 ComplexLField_t* cldf = (*cl_i).second.get();
2313 // make sure we are uncompressed
2314 rldf->Uncompress();
2315 cldf->Uncompress();
2316 // get the raw data pointers
2317 T* localreal = rldf->getP();
2318 Complex_t* localcomp = cldf->getP();
2319
2320 int nstrips = 1, lengthreal = rldf->size(0), lengthcomp = cldf->size(0);
2321 // number of strips should be the same for real and complex LFields!
2322 for (d=1; d<Dim; ++d) nstrips *= rldf->size(d);
2323 for (int istrip=0; istrip<nstrips; ++istrip) {
2324 // move the data into the complex strip, which is two reals longer
2325 for (int ilen=0; ilen<lengthreal; ilen+=2)
2326 localcomp[ilen/2] = Complex_t(localreal[ilen],localreal[ilen+1]);
2327 // Do the 1D real-to-complex FFT:
2328 // note that real-to-complex FFT direction is always +1
2329 this->getEngine().callFFT(activeDim, +1, localcomp);
2330 // advance the data pointers
2331 localreal += lengthreal;
2332 localcomp += lengthcomp;
2333 } // loop over 1D strips
2334 } // loop over all the LFields
2335
2336
2337 // now proceed with the other complex-to-complex transforms
2338
2339 // Local work array passed to FFT:
2340 Complex_t* localdata;
2341
2342 // Loop over the remaining dimensions to be transformed:
2343 ++icount;
2344 ++activeDim;
2345 for (idim = 1; idim != numComplex; ++idim, ++icount, ++activeDim) {
2346
2347 // find the next non-sine transform dimension
2348 while (!this->transformDim(icount) || sineTransformDims_m[icount]) {
2349 if (sineTransformDims_m[icount]) ++activeDim;
2350 ++icount;
2351 }
2352 PAssert_LT(activeDim, Dim); // check that this is a valid dimension!
2353 // Now do the serial transforms along this dimension:
2354
2355 bool skipTranspose = false;
2356 // if this is the last transform dimension, we might be able
2357 // to skip the last temporary and transpose right into g
2358 if (idim == numComplex-1) {
2359 // get the domain for comparison
2360 const Domain_t& last_dom = tempLayouts_m[idim]->getDomain();
2361 // check that zeroth axis is the same and is serial
2362 // and that there are no guard cells
2363 skipTranspose = ( (out_dom[0].sameBase(last_dom[0])) &&
2364 (out_dom[0].length() == last_dom[0].length()) &&
2365 (out_layout.getDistribution(0) == SERIAL) &&
2366 (g.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
2367 }
2368
2369 if (!skipTranspose) {
2370 // transpose and permute to Field with transform dim first
2371 (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] =
2372 (*temp)[temp->getLayout().getDomain()];
2373
2374 // Compress out previous iterate's storage:
2375 if (this->compressTemps()) *temp = 0;
2376 temp = tempFields_m[idim]; // Field* management aid
2377 }
2378 else if (idim == numComplex-1) {
2379 // last transform and we can skip the last temporary field
2380 // so do the transpose here using g instead
2381
2382 // transpose and permute to Field with transform dim first
2383 g[out_dom] = (*temp)[temp->getLayout().getDomain()];
2384
2385 // Compress out previous iterate's storage:
2386 if (this->compressTemps()) *temp = 0;
2387 temp = &g; // Field* management aid
2388 }
2389
2390
2391
2392 // Loop over all the Vnodes, working on the LField in each.
2393 typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
2394 for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
2395
2396 // Get the LField
2397 ComplexLField_t* ldf = (*l_i).second.get();
2398 // make sure we are uncompressed
2399 ldf->Uncompress();
2400 // get the raw data pointer
2401 localdata = ldf->getP();
2402
2403 // Do 1D complex-to-complex FFT's on all the strips in the LField:
2404 int nstrips = 1, length = ldf->size(0);
2405 for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
2406 for (int istrip=0; istrip<nstrips; ++istrip) {
2407 // Do the 1D FFT:
2408 this->getEngine().callFFT(activeDim, direction, localdata);
2409 // advance the data pointer
2410 localdata += length;
2411 } // loop over 1D strips
2412 } // loop over all the LFields
2413
2414 } // loop over all transformed dimensions
2415
2416 // skip final assignment and compress if we used g as final temporary
2417 if (temp != &g) {
2418
2419 // Now assign into output Field, and compress last temp's storage:
2420 g[out_dom] = (*temp)[temp->getLayout().getDomain()];
2421 if (this->compressTemps()) *temp = 0;
2422
2423 }
2424
2425 // Normalize:
2426 if (direction == +1) g = g * this->getNormFact();
2427
2428 return;
2429}
2430
2431//-----------------------------------------------------------------------------
2432// Sine and RC FFT; opposite direction, from complex to real
2433//-----------------------------------------------------------------------------
2434
2435template <size_t Dim, class T>
2436void
2438 int direction,
2441 const bool& constInput)
2442{
2443
2444 // indicate we're doing another FFT
2445 // INCIPPLSTAT(incFFTs);
2446
2447 // Check domain of incoming Fields
2448 const Layout_t& in_layout = f.getLayout();
2449 const Domain_t& in_dom = in_layout.getDomain();
2450 const Layout_t& out_layout = g.getLayout();
2451 const Domain_t& out_dom = out_layout.getDomain();
2452 PAssert_EQ( this->checkDomain(*complexDomain_m,in_dom) &&
2453 this->checkDomain(this->getDomain(),out_dom), true );
2454
2455 // Common loop iterate and other vars:
2456 size_t d;
2457 int icount, activeDim;
2458 int idim; // idim loops over the number of transform dims.
2459 size_t nTransformDims = this->numTransformDims();
2460
2461 // proceed with the complex-to-complex transforms
2462
2463 // Field* for temp Field management:
2464 ComplexField_t* temp = &f;
2465 // Local work array passed to FFT:
2466 Complex_t* localdata;
2467
2468 // Loop over all dimensions to be non-sine transformed except last one:
2469 int numComplex = nTransformDims - numSineTransforms_m;
2470 icount = Dim-1; // start with last dimension
2471 activeDim = nTransformDims-1;
2472 for (idim = numComplex-1; idim != 0; --idim, --icount, --activeDim) {
2473
2474 // find next non-sine transform dim
2475 while (!this->transformDim(icount) || sineTransformDims_m[icount]) {
2476 if (sineTransformDims_m[icount]) --activeDim;
2477 --icount;
2478 }
2479 PAssert_GE(activeDim, 0); // check that this is a valid dimension!
2480 // Now do the serial transforms along this dimension:
2481
2482 bool skipTranspose = false;
2483 // if this is the first transform dimension, we might be able
2484 // to skip the transpose into the first temporary Field
2485 if (idim == numComplex-1 && !constInput) {
2486 // get domain for comparison
2487 const Domain_t& first_dom = tempLayouts_m[idim]->getDomain();
2488 // check that zeroth axis is the same and is serial
2489 // and that there are no guard cells
2490 skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
2491 (in_dom[0].length() == first_dom[0].length()) &&
2492 (in_layout.getDistribution(0) == SERIAL) &&
2493 (f.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
2494 }
2495
2496 if (!skipTranspose) {
2497 // transpose and permute to Field with transform dim first
2498 (*tempFields_m[idim])[tempLayouts_m[idim]->getDomain()] =
2499 (*temp)[temp->getLayout().getDomain()];
2500
2501 // Compress out previous iterate's storage:
2502 if (this->compressTemps() && temp != &f) *temp = 0;
2503 temp = tempFields_m[idim]; // Field* management aid
2504 }
2505
2506
2507
2508 // Loop over all the Vnodes, working on the LField in each.
2509 typename ComplexField_t::const_iterator_if l_i, l_end = temp->end_if();
2510 for (l_i = temp->begin_if(); l_i != l_end; ++l_i) {
2511
2512 // Get the LField
2513 ComplexLField_t* ldf = (*l_i).second.get();
2514 // make sure we are uncompressed
2515 ldf->Uncompress();
2516 // get the raw data pointer
2517 localdata = ldf->getP();
2518
2519 // Do 1D complex-to-complex FFT's on all the strips in the LField:
2520 int nstrips = 1, length = ldf->size(0);
2521 for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
2522 for (int istrip=0; istrip<nstrips; ++istrip) {
2523 // Do the 1D FFT:
2524 this->getEngine().callFFT(activeDim, direction, localdata);
2525 // advance the data pointer
2526 localdata += length;
2527 } // loop over 1D strips
2528 } // loop over all the LFields
2529
2530 } // loop over all transformed dimensions
2531
2532 // handle the CR transform separately
2533
2534 // find next non-sine transform dim
2535 while (!this->transformDim(icount) || sineTransformDims_m[icount]) {
2536 if (sineTransformDims_m[icount]) --activeDim;
2537 --icount;
2538 }
2539 PAssert_GE(activeDim, 0); // check that this is a valid dimension!
2540
2541 // Temp Field* management aid
2542 RealField_t* tempR = tempRFields_m[numSineTransforms_m];
2543
2544 bool skipTemp = true;
2545 if (numComplex == 1 && !constInput) {
2546 // only one CR transform
2547 // see if we really need to transpose input data
2548 // more rigorous match required here; check that layouts are identical
2549 if ( !(in_layout == *tempLayouts_m[0]) ) skipTemp = false;
2550 for (d=0; d<Dim; ++d) {
2551 if ( in_layout.getDistribution(d) !=
2552 tempLayouts_m[0]->getDistribution(d) ) skipTemp = false;
2553 }
2554 if ( in_layout.numVnodes() != tempLayouts_m[0]->numVnodes() )
2555 skipTemp = false;
2556 // also make sure there are no guard cells
2557 if (!(f.getGC() == FFT<SineTransform,Dim,T>::nullGC)) skipTemp = false;
2558 }
2559 else { // cannot skip transpose
2560 skipTemp = false;
2561 }
2562
2563 if (!skipTemp) {
2564
2565 // transpose and permute to complex Field with transform dim first
2566 (*tempFields_m[0])[tempLayouts_m[0]->getDomain()] =
2567 (*temp)[temp->getLayout().getDomain()];
2568 // compress previous iterates storage
2569 if (this->compressTemps() && temp != &f) *temp = 0;
2570 temp = tempFields_m[0];
2571
2572 }
2573
2574
2575 // Loop over all the Vnodes, working on the LField in each.
2576 typename RealField_t::const_iterator_if rl_i, rl_end = tempR->end_if();
2577 typename ComplexField_t::const_iterator_if cl_i = temp->begin_if();
2578 for (rl_i = tempR->begin_if(); rl_i != rl_end; ++rl_i, ++cl_i) {
2579 // Get the LFields
2580 RealLField_t* rldf = (*rl_i).second.get();
2581 ComplexLField_t* cldf = (*cl_i).second.get();
2582 // make sure we are uncompressed
2583 rldf->Uncompress();
2584 cldf->Uncompress();
2585 // get the raw data pointers
2586 T* localreal = rldf->getP();
2587 Complex_t* localcomp = cldf->getP();
2588
2589 int nstrips = 1, lengthreal = rldf->size(0), lengthcomp = cldf->size(0);
2590 // number of strips should be the same for real and complex LFields!
2591 for (d=1; d<Dim; ++d) nstrips *= rldf->size(d);
2592 for (int istrip=0; istrip<nstrips; ++istrip) {
2593 // Do the 1D complex-to-real FFT:
2594 // note that complex-to-real FFT direction is always -1
2595 this->getEngine().callFFT(activeDim, -1, localcomp);
2596 // move the data into the real strip, which is two reals shorter
2597 for (int ilen=0; ilen<lengthreal; ilen+=2) {
2598 localreal[ilen] = real(localcomp[ilen/2]);
2599 localreal[ilen+1] = imag(localcomp[ilen/2]);
2600 }
2601 // advance the data pointers
2602 localreal += lengthreal;
2603 localcomp += lengthcomp;
2604 } // loop over 1D strips
2605 } // loop over all the LFields
2606
2607
2608
2609 // compress previous iterates storage
2610 if (this->compressTemps() && temp != &f) *temp = 0;
2611
2612
2613 // now do the real-to-real FFTs
2614
2615 // Local work array passed to FFT:
2616 T* localdataR;
2617
2618 // Loop over the remaining dimensions to be transformed:
2619 icount = Dim-1; // start with last dimension
2620 activeDim = nTransformDims - 1;
2621 for (idim = numSineTransforms_m-1; idim != -1;
2622 --idim, --icount, --activeDim) {
2623
2624 // find next sine transform dim
2625 while (!sineTransformDims_m[icount]) {
2626 if (this->transformDim(icount)) --activeDim;
2627 --icount;
2628 }
2629 PAssert_GE(activeDim, 0); // check that this is a valid dimension!
2630 // Now do the serial transforms along this dimension:
2631
2632 bool skipTranspose = false;
2633 // if this is the last transform dimension, we might be able
2634 // to skip the last temporary and transpose right into g
2635 if (idim == 0) {
2636 // get the domain for comparison
2637 const Domain_t& last_dom = tempRLayouts_m[idim]->getDomain();
2638 // check that zeroth axis is the same and is serial
2639 // and that there are no guard cells
2640 skipTranspose = ( (out_dom[0].sameBase(last_dom[0])) &&
2641 (out_dom[0].length() == last_dom[0].length()) &&
2642 (out_layout.getDistribution(0) == SERIAL) &&
2643 (g.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
2644 }
2645
2646 if (!skipTranspose) {
2647 // transpose and permute to Field with transform dim first
2648 (*tempRFields_m[idim])[tempRLayouts_m[idim]->getDomain()] =
2649 (*tempR)[tempR->getLayout().getDomain()];
2650
2651 // Compress out previous iterate's storage:
2652 if (this->compressTemps()) *tempR = 0;
2653 tempR = tempRFields_m[idim]; // Field* management aid
2654 }
2655 else if (idim == 0) {
2656 // last transform and we can skip the last temporary field
2657 // so do the transpose here using g instead
2658
2659 // transpose and permute to Field with transform dim first
2660 g[out_dom] = (*tempR)[tempR->getLayout().getDomain()];
2661
2662 // Compress out previous iterate's storage:
2663 if (this->compressTemps()) *tempR = 0;
2664 tempR = &g; // Field* management aid
2665 }
2666
2667
2668
2669 // Loop over all the Vnodes, working on the LField in each.
2670 typename RealField_t::const_iterator_if l_i, l_end = tempR->end_if();
2671 for (l_i = tempR->begin_if(); l_i != l_end; ++l_i) {
2672
2673 // Get the LField
2674 RealLField_t* ldf = (*l_i).second.get();
2675 // make sure we are uncompressed
2676 ldf->Uncompress();
2677 // get the raw data pointer
2678 localdataR = ldf->getP();
2679
2680 // Do 1D complex-to-complex FFT's on all the strips in the LField:
2681 int nstrips = 1, length = ldf->size(0);
2682 for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
2683 for (int istrip=0; istrip<nstrips; ++istrip) {
2684 // Do the 1D FFT:
2685 this->getEngine().callFFT(activeDim, direction, localdataR);
2686 // advance the data pointer
2687 localdataR += length;
2688 } // loop over 1D strips
2689 } // loop over all the LFields
2690
2691 } // loop over all transformed dimensions
2692
2693 // skip final assignment and compress if we used g as final temporary
2694 if (tempR != &g) {
2695
2696 // Now assign into output Field, and compress last temp's storage:
2697 g[out_dom] = (*tempR)[tempR->getLayout().getDomain()];
2698 if (this->compressTemps()) *tempR = 0;
2699
2700 }
2701
2702 // Normalize:
2703 if (direction == +1) g = g * this->getNormFact();
2704
2705 return;
2706}
2707
2708//-----------------------------------------------------------------------------
2709// Sine FFT only; separate input and output fields, direction is +1 or -1
2710//-----------------------------------------------------------------------------
2711
2712template <size_t Dim, class T>
2713void
2715 int direction,
2718 const bool& constInput)
2719{
2720
2721 // indicate we're doing another FFT
2722 //INCIPPLSTAT(incFFTs);
2723
2724 // Check domain of incoming Fields
2725 const Layout_t& in_layout = f.getLayout();
2726 const Domain_t& in_dom = in_layout.getDomain();
2727 const Layout_t& out_layout = g.getLayout();
2728 const Domain_t& out_dom = out_layout.getDomain();
2729 PAssert_EQ( this->checkDomain(this->getDomain(),in_dom) &&
2730 this->checkDomain(this->getDomain(),out_dom), true );
2731
2732 // Common loop iterate and other vars:
2733 size_t d;
2734 int idim; // idim loops over the number of transform dims.
2735 int begdim, enddim;
2736 size_t nTransformDims = this->numTransformDims();
2737 // check that there is no real-to-complex transform to do
2738 PInsist(nTransformDims==numSineTransforms_m,
2739 "Wrong output Field type for real-to-complex transform!!");
2740
2741 // do all the sine transforms
2742
2743 // Field* management aid
2744 RealField_t* tempR = &f;
2745 // Local work array passed to FFT:
2746 T* localdataR;
2747
2748 // Loop over the dimensions to be sine transformed:
2749 begdim = (direction == +1) ? 0 : static_cast<int>(nTransformDims-1);
2750 enddim = (direction == +1) ? static_cast<int>(nTransformDims) : -1;
2751 for (idim = begdim; idim != enddim; idim+=direction) {
2752
2753 // Now do the serial transforms along this dimension:
2754
2755 bool skipTranspose = false;
2756 // if this is the first transform dimension, we might be able
2757 // to skip the first temporary and just use f
2758 if (idim == begdim && !constInput) {
2759 // get the domain for comparison
2760 const Domain_t& first_dom = tempRLayouts_m[idim]->getDomain();
2761 // check that zeroth axis is the same and is serial
2762 // and that there are no guard cells
2763 skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
2764 (in_dom[0].length() == first_dom[0].length()) &&
2765 (in_layout.getDistribution(0) == SERIAL) &&
2766 (f.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
2767 }
2768
2769 // if this is the last transform dimension, we might be able
2770 // to skip the last temporary and transpose right into g
2771 if (idim == enddim-direction) {
2772 // get the domain for comparison
2773 const Domain_t& last_dom = tempRLayouts_m[idim]->getDomain();
2774 // check that zeroth axis is the same and is serial
2775 // and that there are no guard cells
2776 skipTranspose = ( (out_dom[0].sameBase(last_dom[0])) &&
2777 (out_dom[0].length() == last_dom[0].length()) &&
2778 (out_layout.getDistribution(0) == SERIAL) &&
2779 (g.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
2780 }
2781
2782 if (!skipTranspose) {
2783 // transpose and permute to Field with transform dim first
2784 (*tempRFields_m[idim])[tempRLayouts_m[idim]->getDomain()] =
2785 (*tempR)[tempR->getLayout().getDomain()];
2786
2787 // Compress out previous iterate's storage:
2788 if (this->compressTemps() && tempR != &f) *tempR = 0;
2789 tempR = tempRFields_m[idim]; // Field* management aid
2790 }
2791 else if (idim == enddim-direction && tempR != &g) {
2792 // last transform and we can skip the last temporary field
2793 // so do the transpose here using g instead
2794
2795 // transpose and permute to Field with transform dim first
2796 g[out_dom] = (*tempR)[tempR->getLayout().getDomain()];
2797
2798 // Compress out previous iterate's storage:
2799 if (this->compressTemps() && tempR != &f) *tempR = 0;
2800 tempR = &g; // Field* management aid
2801 }
2802
2803
2804
2805 // Loop over all the Vnodes, working on the LField in each.
2806 typename RealField_t::const_iterator_if l_i, l_end = tempR->end_if();
2807 for (l_i = tempR->begin_if(); l_i != l_end; ++l_i) {
2808
2809 // Get the LField
2810 RealLField_t* ldf = (*l_i).second.get();
2811 // make sure we are uncompressed
2812 ldf->Uncompress();
2813 // get the raw data pointer
2814 localdataR = ldf->getP();
2815
2816 // Do 1D real-to-real FFT's on all the strips in the LField:
2817 int nstrips = 1, length = ldf->size(0);
2818 for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
2819 for (int istrip=0; istrip<nstrips; ++istrip) {
2820 // Do the 1D FFT:
2821 this->getEngine().callFFT(idim, direction, localdataR);
2822 // advance the data pointer
2823 localdataR += length;
2824 } // loop over 1D strips
2825 } // loop over all the LFields
2826
2827 } // loop over all transformed dimensions
2828
2829 // skip final assignment and compress if we used g as final temporary
2830 if (tempR != &g) {
2831
2832 // Now assign into output Field, and compress last temp's storage:
2833 g[out_dom] = (*tempR)[tempR->getLayout().getDomain()];
2834 if (this->compressTemps() && tempR != &f) *tempR = 0;
2835
2836 }
2837
2838 // Normalize:
2839 if (direction == +1) g = g * this->getNormFact();
2840
2841 return;
2842}
2843
2844//-----------------------------------------------------------------------------
2845// Sine FFT only; in-place transform, direction is +1 or -1
2846//-----------------------------------------------------------------------------
2847
2848template <size_t Dim, class T>
2849void
2851 int direction,
2853{
2854 // indicate we're doing another FFT
2855 //INCIPPLSTAT(incFFTs);
2856
2857 // Check domain of incoming Field
2858 const Layout_t& in_layout = f.getLayout();
2859 const Domain_t& in_dom = in_layout.getDomain();
2860 PAssert_EQ(this->checkDomain(this->getDomain(),in_dom), true);
2861
2862 // Common loop iterate and other vars:
2863 size_t d;
2864 int idim; // idim loops over the number of transform dims.
2865 int begdim, enddim;
2866 size_t nTransformDims = this->numTransformDims();
2867 // check that there is no real-to-complex transform to do
2868 PInsist(nTransformDims==numSineTransforms_m,
2869 "Cannot perform real-to-complex transform in-place!!");
2870
2871 // do all the sine transforms
2872
2873 // Field* management aid
2874 RealField_t* tempR = &f;
2875 // Local work array passed to FFT:
2876 T* localdataR;
2877
2878 // Loop over the dimensions to be sine transformed:
2879 begdim = (direction == +1) ? 0 : static_cast<int>(nTransformDims-1);
2880 enddim = (direction == +1) ? static_cast<int>(nTransformDims) : -1;
2881 for (idim = begdim; idim != enddim; idim+=direction) {
2882
2883 // Now do the serial transforms along this dimension:
2884
2885 bool skipTranspose = false;
2886 // if this is the first transform dimension, we might be able
2887 // to skip the transpose into the first temporary Field
2888 if (idim == begdim) {
2889 // get domain for comparison
2890 const Domain_t& first_dom = tempRLayouts_m[idim]->getDomain();
2891 // check that zeroth axis is the same and is serial
2892 // and that there are no guard cells
2893 skipTranspose = ( (in_dom[0].sameBase(first_dom[0])) &&
2894 (in_dom[0].length() == first_dom[0].length()) &&
2895 (in_layout.getDistribution(0) == SERIAL) &&
2896 (f.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
2897 }
2898
2899 // if this is the last transform dimension, we might be able
2900 // to skip the last temporary and transpose right into f
2901 if (idim == enddim-direction) {
2902 // get the domain for comparison
2903 const Domain_t& last_dom = tempRLayouts_m[idim]->getDomain();
2904 // check that zeroth axis is the same and is serial
2905 // and that there are no guard cells
2906 skipTranspose = ( (in_dom[0].sameBase(last_dom[0])) &&
2907 (in_dom[0].length() == last_dom[0].length()) &&
2908 (in_layout.getDistribution(0) == SERIAL) &&
2909 (f.getGC() == FFT<SineTransform,Dim,T>::nullGC) );
2910 }
2911
2912 if (!skipTranspose) {
2913 // transpose and permute to Field with transform dim first
2914 (*tempRFields_m[idim])[tempRLayouts_m[idim]->getDomain()] =
2915 (*tempR)[tempR->getLayout().getDomain()];
2916
2917 // Compress out previous iterate's storage:
2918 if (this->compressTemps() && tempR != &f) *tempR = 0;
2919 tempR = tempRFields_m[idim]; // Field* management aid
2920 }
2921 else if (idim == enddim-direction && tempR != &f) {
2922 // last transform and we can skip the last temporary field
2923 // so do the transpose here using f instead
2924
2925 // transpose and permute to Field with transform dim first
2926 f[in_dom] = (*tempR)[tempR->getLayout().getDomain()];
2927
2928 // Compress out previous iterate's storage:
2929 if (this->compressTemps() && tempR != &f) *tempR = 0;
2930 tempR = &f; // Field* management aid
2931 }
2932
2933
2934
2935 // Loop over all the Vnodes, working on the LField in each.
2936 typename RealField_t::const_iterator_if l_i, l_end = tempR->end_if();
2937 for (l_i = tempR->begin_if(); l_i != l_end; ++l_i) {
2938
2939 // Get the LField
2940 RealLField_t* ldf = (*l_i).second.get();
2941 // make sure we are uncompressed
2942 ldf->Uncompress();
2943 // get the raw data pointer
2944 localdataR = ldf->getP();
2945
2946 // Do 1D real-to-real FFT's on all the strips in the LField:
2947 int nstrips = 1, length = ldf->size(0);
2948 for (d=1; d<Dim; ++d) nstrips *= ldf->size(d);
2949 for (int istrip=0; istrip<nstrips; ++istrip) {
2950 // Do the 1D FFT:
2951 this->getEngine().callFFT(idim, direction, localdataR);
2952 // advance the data pointer
2953 localdataR += length;
2954 } // loop over 1D strips
2955 } // loop over all the LFields
2956
2957 } // loop over all transformed dimensions
2958
2959 // skip final assignment and compress if we used g as final temporary
2960 if (tempR != &f) {
2961
2962 // Now assign into output Field, and compress last temp's storage:
2963 f[in_dom] = (*tempR)[tempR->getLayout().getDomain()];
2964 if (this->compressTemps()) *tempR = 0;
2965
2966 }
2967
2968 // Normalize:
2969 if (direction == +1) f = f * this->getNormFact();
2970
2971 return;
2972}
2973
2974
2975//=============================================================================
2976// 1D FFT SineTransform Constructors
2977//=============================================================================
2978
2979//-----------------------------------------------------------------------------
2980// Constructor for doing only sine transforms.
2981// Create a new FFT object of type SineTransform, with given real field
2982// domain. Also specify which dimensions to sine transform along.
2983//-----------------------------------------------------------------------------
2984
2985template <class T>
2987 const typename FFT<SineTransform,1U,T>::Domain_t& rdomain,
2988 const bool sineTransformDims[1U], const bool& compressTemps)
2989: FFTBase<1U,T>(FFT<SineTransform,1U,T>::sineFFT, rdomain,
2990 sineTransformDims, compressTemps)
2991{
2992 // Tau profiling
2993
2994
2995
2996
2997 // get axis length
2998 int length;
2999 length = rdomain[0].length();
3000
3001 // get transform type for FFT Engine, compute normalization
3002 int transformType;
3003 transformType = FFTBase<1U,T>::sineFFT; // sine transform
3004 T& normFact = this->getNormFact();
3005 normFact = 1.0 / (2.0 * (length + 1));
3006
3007 // set up FFT Engine
3008 this->getEngine().setup(1U, &transformType, &length);
3009 // set up the temporary fields
3010 setup();
3011}
3012
3013//-----------------------------------------------------------------------------
3014// Create a new FFT object of type SineTransform, with
3015// given real domain. Default: sine transform along all dimensions.
3016//-----------------------------------------------------------------------------
3017
3018template <class T>
3020 const typename FFT<SineTransform,1U,T>::Domain_t& rdomain,
3021 const bool& compressTemps)
3022: FFTBase<1U,T>(FFT<SineTransform,1U,T>::sineFFT, rdomain, compressTemps)
3023{
3024 // Tau profiling
3025
3026
3027
3028
3029 // get axis length
3030 int length;
3031 length = rdomain[0].length();
3032
3033 // get transform type for FFT Engine, compute normalization
3034 int transformType;
3035 transformType = FFTBase<1U,T>::sineFFT; // sine transform
3036 T& normFact = this->getNormFact();
3037 normFact = 1.0 / (2.0 * (length + 1));
3038
3039 // set up FFT Engine
3040 this->getEngine().setup(1U, &transformType, &length);
3041 // set up the temporary fields
3042 setup();
3043}
3044
3045//-----------------------------------------------------------------------------
3046// setup performs all the initializations necessary after the transform
3047// directions have been specified.
3048//-----------------------------------------------------------------------------
3049
3050template <class T>
3051void
3053
3054 // Tau profiling
3055
3056
3057
3058
3059 const Domain_t& domain = this->getDomain(); // get real Field domain
3060
3061 // generate temporary field layout
3062 tempRLayouts_m = new Layout_t(domain[0], PARALLEL, 1);
3063 // create temporary real Field
3064 tempRFields_m = new RealField_t(*tempRLayouts_m);
3065 // If user requests no intermediate compression, uncompress right now:
3066 if (!this->compressTemps()) tempRFields_m->Uncompress();
3067
3068 return;
3069}
3070
3071//-----------------------------------------------------------------------------
3072// Destructor
3073//-----------------------------------------------------------------------------
3074
3075template <class T>
3077
3078 // Tau profiling
3079
3080
3081
3082
3083 // delete temporary field and layout
3084 delete tempRFields_m;
3085 delete tempRLayouts_m;
3086}
3087
3088//-----------------------------------------------------------------------------
3089// Sine FFT only; separate input and output fields, direction is +1 or -1
3090//-----------------------------------------------------------------------------
3091
3092template <class T>
3093void
3095 int direction,
3098 const bool& constInput)
3099{
3100 // indicate we're doing another FFT
3101 //INCIPPLSTAT(incFFTs);
3102
3103 // Check domain of incoming Fields
3104 const Layout_t& in_layout = f.getLayout();
3105 const Domain_t& in_dom = in_layout.getDomain();
3106 const Layout_t& out_layout = g.getLayout();
3107 const Domain_t& out_dom = out_layout.getDomain();
3108 PAssert_EQ( this->checkDomain(this->getDomain(),in_dom) &&
3109 this->checkDomain(this->getDomain(),out_dom), true);
3110
3111 // Field* management aid
3112 RealField_t* tempR = &f;
3113 // Local work array passed to FFT:
3114 T* localdataR;
3115
3116
3117 // Now do the serial transform along this dimension:
3118
3119 // get the domain for comparison
3120 const Domain_t& temp_dom = tempRLayouts_m->getDomain();
3121 bool skipTranspose = false;
3122 // we might be able
3123 // to skip the first temporary and just use f
3124 if (!constInput) {
3125 // check that zeroth axis is the same, has one vnode
3126 // and that there are no guard cells
3127 skipTranspose = ( (in_dom[0].sameBase(temp_dom[0])) &&
3128 (in_dom[0].length() == temp_dom[0].length()) &&
3129 (in_layout.numVnodes() == 1) &&
3130 (f.getGC() == FFT<SineTransform,1U,T>::nullGC) );
3131 }
3132
3133 bool skipFinal = false;
3134 // we might be able
3135 // to skip the last temporary and transpose right into g
3136
3137 // check that zeroth axis is the same, has one vnode
3138 // and that there are no guard cells
3139 skipFinal = ( (out_dom[0].sameBase(temp_dom[0])) &&
3140 (out_dom[0].length() == temp_dom[0].length()) &&
3141 (out_layout.numVnodes() == 1) &&
3142 (g.getGC() == FFT<SineTransform,1U,T>::nullGC) );
3143
3144 if (!skipTranspose) {
3145 // assign to Field with proper layout
3146 (*tempRFields_m) = (*tempR);
3147 tempR = tempRFields_m; // Field* management aid
3148 }
3149 if (skipFinal) {
3150 // we can skip the last temporary field
3151 // so do the transpose here using g instead
3152
3153 // assign to Field with proper layout
3154 g = (*tempR);
3155
3156 // Compress out previous iterate's storage:
3157 if (this->compressTemps() && tempR != &f) *tempR = 0;
3158 tempR = &g; // Field* management aid
3159 }
3160
3161
3162
3163 // There should be just one LField.
3164 typename RealField_t::const_iterator_if l_i = tempR->begin_if();
3165 if (l_i != tempR->end_if()) {
3166
3167 // Get the LField
3168 RealLField_t* ldf = (*l_i).second.get();
3169 // make sure we are uncompressed
3170 ldf->Uncompress();
3171 // get the raw data pointer
3172 localdataR = ldf->getP();
3173
3174 // Do the 1D FFT:
3175 this->getEngine().callFFT(0, direction, localdataR);
3176 }
3177
3178
3179 // skip final assignment and compress if we used g as final temporary
3180 if (tempR != &g) {
3181
3182 // Now assign into output Field, and compress last temp's storage:
3183 g = (*tempR);
3184 if (this->compressTemps() && tempR != &f) *tempR = 0;
3185
3186 }
3187
3188 // Normalize:
3189 if (direction == +1) g = g * this->getNormFact();
3190
3191 return;
3192}
3193
3194//-----------------------------------------------------------------------------
3195// Sine FFT only; in-place transform, direction is +1 or -1
3196//-----------------------------------------------------------------------------
3197
3198template <class T>
3199void
3201 int direction,
3203{
3204 // indicate we're doing another FFT
3205 //INCIPPLSTAT(incFFTs);
3206
3207 // Check domain of incoming Field
3208 const Layout_t& in_layout = f.getLayout();
3209 const Domain_t& in_dom = in_layout.getDomain();
3210 PAssert_EQ(this->checkDomain(this->getDomain(),in_dom), true);
3211
3212 // Field* management aid
3213 RealField_t* tempR = &f;
3214 // Local work array passed to FFT:
3215 T* localdataR;
3216
3217
3218 // Now do the serial transform along this dimension:
3219
3220 // get domain for comparison
3221 const Domain_t& temp_dom = tempRLayouts_m->getDomain();
3222 bool skipTranspose = false;
3223 // we might be able
3224 // to skip the transpose into the first temporary Field
3225
3226 // check that zeroth axis is the same and is serial
3227 // and that there are no guard cells
3228 skipTranspose = ( (in_dom[0].sameBase(temp_dom[0])) &&
3229 (in_dom[0].length() == temp_dom[0].length()) &&
3230 (in_layout.numVnodes() == 1) &&
3231 (f.getGC() == FFT<SineTransform,1U,T>::nullGC) );
3232
3233 bool skipFinal = false;
3234 // we might be able
3235 // to skip the last temporary and transpose right into f
3236
3237 // check that zeroth axis is the same, has one vnode
3238 // and that there are no guard cells
3239 skipFinal = ( (in_dom[0].sameBase(temp_dom[0])) &&
3240 (in_dom[0].length() == temp_dom[0].length()) &&
3241 (in_layout.numVnodes() == 1) &&
3242 (f.getGC() == FFT<SineTransform,1U,T>::nullGC) );
3243
3244 if (!skipTranspose) {
3245 // assign to Field with proper layout
3246 (*tempRFields_m) = (*tempR);
3247
3248 tempR = tempRFields_m; // Field* management aid
3249 }
3250 if (skipFinal) {
3251 // we can skip the last temporary field
3252 // so do the transpose here using f instead
3253
3254 // assign to Field with proper layout
3255 f = (*tempR);
3256
3257 // Compress out previous iterate's storage:
3258 if (this->compressTemps() && tempR != &f) *tempR = 0;
3259 tempR = &f; // Field* management aid
3260 }
3261
3262
3263
3264 // There should be just one LField.
3265 typename RealField_t::const_iterator_if l_i = tempR->begin_if();
3266 if (l_i != tempR->end_if()) {
3267
3268 // Get the LField
3269 RealLField_t* ldf = (*l_i).second.get();
3270 // make sure we are uncompressed
3271 ldf->Uncompress();
3272 // get the raw data pointer
3273 localdataR = ldf->getP();
3274
3275 // Do the 1D FFT:
3276 this->getEngine().callFFT(0, direction, localdataR);
3277 }
3278
3279
3280 // skip final assignment and compress if we used g as final temporary
3281 if (tempR != &f) {
3282
3283 // Now assign into output Field, and compress last temp's storage:
3284 f = (*tempR);
3285 if (this->compressTemps()) *tempR = 0;
3286
3287 }
3288
3289 // Normalize:
3290 if (direction == +1) f = f * this->getNormFact();
3291
3292 return;
3293}
FLieGenerator< T, N > real(const FLieGenerator< std::complex< T >, N > &)
Take real part of a complex generator.
FLieGenerator< T, N > imag(const FLieGenerator< std::complex< T >, N > &)
Take imaginary part of a complex generator.
ParticleSpatialLayout< double, 3, Mesh_t > Layout_t
Definition: PBunchDefs.h:24
const unsigned Dim
e_dim_tag
Definition: FieldLayout.h:55
@ PARALLEL
Definition: FieldLayout.h:55
@ SERIAL
Definition: FieldLayout.h:55
#define PAssert_LT(a, b)
Definition: PAssert.h:106
#define PInsist(c, m)
Definition: PAssert.h:120
#define PAssert_EQ(a, b)
Definition: PAssert.h:104
#define PAssert(c)
Definition: PAssert.h:102
#define PAssert_GE(a, b)
Definition: PAssert.h:109
#define PAssert_GT(a, b)
Definition: PAssert.h:108
Definition: FFT.h:48
FFTBase< 1U, T >::Domain_t Domain_t
Definition: FFT.h:294
FFTBase< 1U, T >::Domain_t Domain_t
Definition: FFT.h:548
FFTBase< 1U, T >::Domain_t Domain_t
Definition: FFT.h:872
int transVnodes() const
Definition: FFTBase.h:125
Precision_t & getNormFact(void)
get the FFT normalization factor
Definition: FFTBase.h:157
const Domain_t & getDomain(void) const
get our domain
Definition: FFTBase.h:160
unsigned numTransformDims(void) const
query number of transform dimensions
Definition: FFTBase.h:148
NDIndex< Dim > Domain_t
Definition: FFTBase.h:60
bool transformDim(unsigned d) const
query whether this dimension is to be transformed
Definition: FFTBase.h:238
bool compressTemps(void) const
do we compress temps?
Definition: FFTBase.h:166
unsigned activeDimension(unsigned d) const
get dimension number from list of transformed dimensions
Definition: FFTBase.h:252
InternalFFT_t & getEngine(void)
access the internal FFT Engine
Definition: FFTBase.h:154
bool checkDomain(const Domain_t &dom1, const Domain_t &dom2) const
compare indexes of two domains
Definition: FFTBase.h:269
void callFFT(unsigned transformDim, int direction, Complex_t *data)
void setup(unsigned numTransformDims, const int *transformTypes, const int *axisLengths)
Definition: fftpack_FFT.h:153