OPAL (Object Oriented Parallel Accelerator Library)  2.2.0
OPAL
FFTPoissonSolver.cpp
Go to the documentation of this file.
1 // -*- C++ -*-
2 /***************************************************************************
3  *
4  *
5  * FFTPoissonSolver.cc
6  *
7  *
8  *
9  *
10  *
11  *
12  *
13  ***************************************************************************/
14 
16 #include "Algorithms/PartBunch.h"
17 #include "Physics/Physics.h"
18 #include "Utility/IpplTimings.h"
19 #include "BasicActions/Option.h"
20 #include "Utilities/Options.h"
21 #include "Utilities/Util.h"
22 #include <fstream>
23 
24 extern Inform *gmsg;
26 // a little helper class to specialize the action of the Green's function
27 // calculation. This should be specialized for each dimension
28 // to the proper action for computing the Green's function. The first
29 // template parameter is the full type of the Field to compute, and the second
30 // is the dimension of the data, which should be specialized.
31 
32 template<unsigned int Dim>
33 struct SpecializedGreensFunction { };
34 
35 template<>
36 struct SpecializedGreensFunction<3> {
37  template<class T, class FT, class FT2>
38  static void calculate(Vektor<T, 3> &hrsq, FT &grn, FT2 *grnI) {
39  grn = grnI[0] * hrsq[0] + grnI[1] * hrsq[1] + grnI[2] * hrsq[2];
40  grn = 1.0 / sqrt(grn);
41  grn[0][0][0] = grn[0][0][1];
42  }
43 };
44 
46 
47 // constructor
48 
49 
50 FFTPoissonSolver::FFTPoissonSolver(Mesh_t *mesh, FieldLayout_t *fl, std::string greensFunction, std::string bcz):
51  mesh_m(mesh),
52  layout_m(fl),
53  mesh2_m(nullptr),
54  layout2_m(nullptr),
55  mesh3_m(nullptr),
56  layout3_m(nullptr),
57  mesh4_m(nullptr),
58  layout4_m(nullptr)
59 {
60  bcz_m = (bcz == std::string("PERIODIC")); // for DC beams, the z direction has periodic boundary conditions
61  integratedGreens_m = (greensFunction == std::string("INTEGRATED"));
62 
64 
65  GreensFunctionTimer_m = IpplTimings::getTimer("SF: GreensFTotal");
66  ComputePotential_m = IpplTimings::getTimer("ComputePotential");
67 }
68 
69 
70 FFTPoissonSolver::FFTPoissonSolver(PartBunch &beam, std::string greensFunction):
71  mesh_m(&beam.getMesh()),
72  layout_m(&beam.getFieldLayout()),
73  mesh2_m(nullptr),
74  layout2_m(nullptr),
75  mesh3_m(nullptr),
76  layout3_m(nullptr),
77  mesh4_m(nullptr),
78  layout4_m(nullptr)
79 {
80  integratedGreens_m = (greensFunction == std::string("INTEGRATED"));
81 
83 
84  GreensFunctionTimer_m = IpplTimings::getTimer("SF: GreensFTotal");
85  ComputePotential_m = IpplTimings::getTimer("ComputePotential");
86 }
87 
89 // destructor
91 #ifdef OPAL_DKS
92  //free all the allocated memory
94  if (Ippl::myNode() == 0) {
95  //get number of elements
96  int sizegreen = tmpgreen_m.getLayout().getDomain().size();
97  int sizerho2_m = rho2_m.getLayout().getDomain().size();
98  int sizecomp = grntr_m.getLayout().getDomain().size();
99 
100  //free memory
101  dksbase.freeMemory<double>(tmpgreen_ptr, sizegreen);
102  dksbase.freeMemory<double>(rho2_m_ptr, sizerho2_m);
103  dksbase.freeMemory< std::complex<double> >(grntr_m_ptr, sizecomp);
104 
105  //wait for other processes to close handle to rho2real_m_ptr before freeing memory
106  MPI_Barrier(Ippl::getComm());
107  dksbase.freeMemory<double>(rho2real_m_ptr, sizerho2_m);
108  dksbase.freeMemory< std::complex<double> >(rho2tr_m_ptr, sizecomp);
109  } else {
110  dksbase.closeHandle(rho2real_m_ptr);
111  MPI_Barrier(Ippl::getComm());
112  }
113  }
114 #endif
115 }
116 
118 
120 
121  // For efficiency in the FFT's, we can use a parallel decomposition
122  // which can be serial in the first dimension.
123  e_dim_tag decomp[3];
124  e_dim_tag decomp2[3];
125  for(int d = 0; d < 3; ++ d) {
126  decomp[d] = layout_m->getRequestedDistribution(d);
127  decomp2[d] = layout_m->getRequestedDistribution(d);
128  }
129 
130  if (bcz_m) {
131  // The FFT's require double-sized field sizes in order to
132  // simulate an isolated system. The FFT of the charge density field, rho,
133  // would otherwise mimic periodic boundary conditions, i.e. as if there were
134  // several beams set a periodic distance apart. The double-sized fields in x and
135  // alleviate this problem, in z we have periodic BC's
136  for (int i = 0; i < 2; ++ i) {
137  hr_m[i] = mesh_m->get_meshSpacing(i);
138  nr_m[i] = domain_m[i].length();
139  domain2_m[i] = Index(2 * nr_m[i] + 1);
140  }
141 
142  hr_m[2] = mesh_m->get_meshSpacing(2);
143  nr_m[2] = domain_m[2].length();
144  domain2_m[2] = Index(nr_m[2] + 1);
145 
146  for (int i = 0; i < 2 * 3; ++ i) {
149  }
150  // z-direction
154  vbc_m[5] = new ZeroFace<Vector_t, 3, Mesh_t, Center_t>(5);
155  }
156  else {
157  // The FFT's require double-sized field sizes in order to
158  // simulate an isolated system. The FFT of the charge density field, rho,
159  // would otherwise mimic periodic boundary conditions, i.e. as if there were
160  // several beams set a periodic distance apart. The double-sized fields
161  // alleviate this problem.
162  for (int i = 0; i < 3; ++ i) {
163  hr_m[i] = mesh_m->get_meshSpacing(i);
164  nr_m[i] = domain_m[i].length();
165  domain2_m[i] = Index(2 * nr_m[i] + 1);
166  }
167 
168  for (int i = 0; i < 2 * 3; ++ i) {
171  }
172  }
173 
174  // create double sized mesh and layout objects for the use in the FFT's
175  mesh2_m = std::unique_ptr<Mesh_t>(new Mesh_t(domain2_m));
176  layout2_m = std::unique_ptr<FieldLayout_t>(new FieldLayout_t(*mesh2_m, decomp));
177 
178 #ifdef OPAL_DKS
179  rho2_m.initialize(*mesh2_m, *layout2_m, false);
180 #else
181  rho2_m.initialize(*mesh2_m, *layout2_m);
182 #endif
183 
184 
185  NDIndex<3> tmpdomain;
186  // Create the domain for the transformed (complex) fields. Do this by
187  // taking the domain from the doubled mesh, permuting it to the right, and
188  // setting the 2nd dimension to have n/2 + 1 elements.
189  if (bcz_m)
190  domain3_m[0] = Index(nr_m[2] + 1);
191  else
192  domain3_m[0] = Index(2 * nr_m[2] + 1);
193  domain3_m[1] = Index(nr_m[0] + 2);
194  domain3_m[2] = Index(2 * nr_m[1] + 1);
195 
196  // create mesh and layout for the new real-to-complex FFT's, for the
197  // complex transformed fields
198  mesh3_m = std::unique_ptr<Mesh_t>(new Mesh_t(domain3_m));
199  layout3_m = std::unique_ptr<FieldLayout_t>(new FieldLayout_t(*mesh3_m, decomp2));
200 
201  rho2tr_m.initialize(*mesh3_m, *layout3_m);
202  imgrho2tr_m.initialize(*mesh3_m, *layout3_m);
203  grntr_m.initialize(*mesh3_m, *layout3_m);
204 
205  // helper field for sin
206  greentr_m.initialize(*mesh3_m, *layout3_m);
207 
208  for (int i = 0; i < 3; ++ i) {
209  domain4_m[i] = Index(nr_m[i] + 2);
210  }
211  mesh4_m = std::unique_ptr<Mesh_t>(new Mesh_t(domain4_m));
212  layout4_m = std::unique_ptr<FieldLayout_t>(new FieldLayout_t(*mesh4_m, decomp));
213 
214  tmpgreen_m.initialize(*mesh4_m, *layout4_m);
215 
216  // create a domain used to indicate to the FFT's how to construct it's
217  // temporary fields. This is the same as the complex field's domain,
218  // but permuted back to the left.
219  tmpdomain = layout3_m->getDomain();
220  for (int i = 0; i < 3; ++ i)
221  domainFFTConstruct_m[i] = tmpdomain[(i+1) % 3];
222 
223  // create the FFT object
224  fft_m = std::unique_ptr<FFT_t>(new FFT_t(layout2_m->getDomain(), domainFFTConstruct_m));
225 
226  // these are fields that are used for calculating the Green's function.
227  // they eliminate some calculation at each time-step.
228  for (int i = 0; i < 3; ++ i) {
229  grnIField_m[i].initialize(*mesh2_m, *layout2_m);
230  grnIField_m[i][domain2_m] = where(lt(domain2_m[i], nr_m[i]),
231  domain2_m[i] * domain2_m[i],
232  (2 * nr_m[i] - domain2_m[i]) *
233  (2 * nr_m[i] - domain2_m[i]));
234  }
235 
236 #ifdef OPAL_DKS
237 
238  if (IpplInfo::DKSEnabled) {
239  int dkserr;
240 
241  dksbase.setAPI("Cuda", 4);
242  dksbase.setDevice("-gpu", 4);
243  dksbase.initDevice();
244 
245  if (Ippl::myNode() == 0) {
246 
247  //create stream for greens function
248  dksbase.createStream(streamGreens);
249  dksbase.createStream(streamFFT);
250 
251  //create fft plans for multiple reuse
252  int dimsize[3] = {2*nr_m[0], 2*nr_m[1], 2*nr_m[2]};
253 
254  dksbase.setupFFT(3, dimsize);
255 
256  //allocate memory
257  int sizegreen = tmpgreen_m.getLayout().getDomain().size();
258  int sizerho2_m = rho2_m.getLayout().getDomain().size();
259  int sizecomp = grntr_m.getLayout().getDomain().size();
260 
261  tmpgreen_ptr = dksbase.allocateMemory<double>(sizegreen, dkserr);
262  rho2_m_ptr = dksbase.allocateMemory<double>(sizerho2_m, dkserr);
263  rho2real_m_ptr = dksbase.allocateMemory<double>(sizerho2_m, dkserr);
264 
265  grntr_m_ptr = dksbase.allocateMemory< std::complex<double> >(sizecomp, dkserr);
266  rho2tr_m_ptr = dksbase.allocateMemory< std::complex<double> > (sizecomp, dkserr);
267 
268  //send rho2real_m_ptr to other mpi processes
269  //send streamFFT to other processes
270  for (int p = 1; p < Ippl::getNodes(); p++) {
271  dksbase.sendPointer( rho2real_m_ptr, p, Ippl::getComm() );
272  }
273  } else {
274  //create stream for FFT data transfer
275  dksbase.createStream(streamFFT);
276  //receive pointer
277  rho2real_m_ptr = dksbase.receivePointer(0, Ippl::getComm(), dkserr);
278  }
279  }
280 
281 #endif
282 }
283 
285 // given a charge-density field rho and a set of mesh spacings hr,
286 // compute the electric potential from the image charge by solving
287 // the Poisson's equation
288 
289 void FFTPoissonSolver::computePotential(Field_t &rho, Vector_t hr, double zshift) {
290 
291  // use grid of complex doubled in both dimensions
292  // and store rho in lower left quadrant of doubled grid
293  rho2_m = 0.0;
294 
295  rho2_m[domain_m] = rho[domain_m];
296 
297  // needed in greens function
298  hr_m = hr;
299  // FFT double-sized charge density
300  // we do a backward transformation so that we dont have to account for the normalization factor
301  // that is used in the forward transformation of the IPPL FFT
302  fft_m->transform(-1, rho2_m, rho2tr_m);
303 
304  // must be called if the mesh size has changed
305  // have to check if we can do G with h = (1,1,1)
306  // and rescale later
307 
308  // Do image charge.
309  // The minus sign is due to image charge.
310  // Convolute transformed charge density with shifted green's function.
312  shiftedIntGreensFunction(zshift);
314 
315  // Multiply transformed charge density and
316  // transformed Green's function. Don't divide
317  // by (2*nx_m)*(2*ny_m), as Ryne does; this
318  // normalization is done in POOMA's fft routine.
320 
321  // Inverse FFT to find image charge potential, rho2_m equals the electrostatic potential.
322  fft_m->transform(+1, imgrho2tr_m, rho2_m);
323 
324  // Re-use rho to store image potential. Flip z coordinate since this is a mirror image.
325  Index I = nr_m[0];
326  Index J = nr_m[1];
327  Index K = nr_m[2];
328  rho[I][J][K] = rho2_m[I][J][nr_m[2] - K - 1];
329 
330 }
331 
332 
334 // given a charge-density field rho and a set of mesh spacings hr,
335 // compute the electric field and put in eg by solving the Poisson's equation
336 
338 
340 
341  // use grid of complex doubled in both dimensions
342  // and store rho in lower left quadrant of doubled grid
343  rho2_m = 0.0;
344 
345  rho2_m[domain_m] = rho[domain_m];
346 
347  // needed in greens function
348  hr_m = hr;
349 
350  if (!IpplInfo::DKSEnabled) {
351  // FFT double-sized charge density
352  // we do a backward transformation so that we dont have to account for the normalization factor
353  // that is used in the forward transformation of the IPPL FFT
354  fft_m->transform(-1, rho2_m, rho2tr_m);
355 
356  // must be called if the mesh size has changed
357  // have to check if we can do G with h = (1,1,1)
358  // and rescale later
362  else
363  greensFunction();
365  // multiply transformed charge density
366  // and transformed Green function
367  // Don't divide by (2*nx_m)*(2*ny_m), as Ryne does;
368  // this normalization is done in POOMA's fft routine.
369  rho2tr_m *= grntr_m;
370 
371  // inverse FFT, rho2_m equals to the electrostatic potential
372  fft_m->transform(+1, rho2tr_m, rho2_m);
373  // end convolution
374  } else {
375  computePotentialDKS(rho);
376  }
377 
378  // back to physical grid
379  // reuse the charge density field to store the electrostatic potential
380  rho[domain_m] = rho2_m[domain_m];
382 }
383 
385 #ifdef OPAL_DKS
386  dksbase.syncDevice();
387  MPI_Barrier(Ippl::getComm());
388 
389  if (Ippl::myNode() == 0) {
393  //transform the greens function
394  int dimsize[3] = {2*nr_m[0], 2*nr_m[1], 2*nr_m[2]};
395  dksbase.callR2CFFT(rho2_m_ptr, grntr_m_ptr, 3, dimsize, streamGreens);
396  }
397  MPI_Barrier(Ippl::getComm());
398 
399  //transform rho2_m keep pointer to GPU memory where results are stored in rho2tr_m_ptr
400  fft_m->transformDKSRC(-1, rho2_m, rho2real_m_ptr, rho2tr_m_ptr, dksbase, streamFFT, false);
401 
402  if (Ippl::myNode() == 0) {
403  //transform the greens function
404  //int dimsize[3] = {2*nr_m[0], 2*nr_m[1], 2*nr_m[2]};
405  //dksbase.callR2CFFT(rho2_m_ptr, grntr_m_ptr, 3, dimsize, streamGreens);
406 
407  //multiply fields and free unneeded memory
408  int sizecomp = grntr_m.getLayout().getDomain().size();
409  dksbase.syncDevice();
410  dksbase.callMultiplyComplexFields(rho2tr_m_ptr, grntr_m_ptr, sizecomp);
411  }
412 
413  MPI_Barrier(Ippl::getComm());
414 
415  //inverse FFT and transfer result back to rho2_m
416  fft_m->transformDKSCR(+1, rho2_m, rho2real_m_ptr, rho2tr_m_ptr, dksbase);
417 
418  MPI_Barrier(Ippl::getComm());
419 #else
420  throw OpalException("FFTPoissonSolver::computePotentialDKS",
421  "DKS not enabled during compilation");
422 #endif
423 }
425 // calculate the FFT of the Green's function for the given field
427 
428  //hr_m[0]=hr_m[1]=hr_m[2]=1;
429 
430  Vector_t hrsq(hr_m * hr_m);
432  // Green's function calculation complete at this point.
433  // The next step is to FFT it.
434  // FFT of Green's function
435 
436  // we do a backward transformation so that we dont have to account for the normalization factor
437  // that is used in the forward transformation of the IPPL FFT
438  fft_m->transform(-1, rho2_m, grntr_m);
439 }
440 
460 
461 
462 
468  NDIndex<3> idx = layout4_m->getLocalNDIndex();
469  double cellVolume = hr_m[0] * hr_m[1] * hr_m[2];
470  tmpgreen_m = 0.0;
471 
472  for(int k = idx[2].first(); k <= idx[2].last() + 1; k++) {
473  for(int j = idx[1].first(); j <= idx[1].last() + 1; j++) {
474  for(int i = idx[0].first(); i <= idx[0].last() + 1; i++) {
475 
476  Vector_t vv = Vector_t(0.0);
477  vv(0) = i * hr_m[0] - hr_m[0] / 2;
478  vv(1) = j * hr_m[1] - hr_m[1] / 2;
479  vv(2) = k * hr_m[2] - hr_m[2] / 2;
480 
481  double r = sqrt(vv(0) * vv(0) + vv(1) * vv(1) + vv(2) * vv(2));
482  double tmpgrn = -vv(2) * vv(2) * atan(vv(0) * vv(1) / (vv(2) * r)) / 2;
483  tmpgrn += -vv(1) * vv(1) * atan(vv(0) * vv(2) / (vv(1) * r)) / 2;
484  tmpgrn += -vv(0) * vv(0) * atan(vv(1) * vv(2) / (vv(0) * r)) / 2;
485  tmpgrn += vv(1) * vv(2) * log(vv(0) + r);
486  tmpgrn += vv(0) * vv(2) * log(vv(1) + r);
487  tmpgrn += vv(0) * vv(1) * log(vv(2) + r);
488 
489  tmpgreen_m[i][j][k] = tmpgrn / cellVolume;
490 
491  }
492  }
493  }
494 
495 
496  //assign seems to have problems when we need values that are on another CPU, i.e. [I+1]
497  /*assign(rho2_m[I][J][K] ,
498  tmpgreen_m[I+1][J+1][K+1] - tmpgreen_m[I][J+1][K+1] -
499  tmpgreen_m[I+1][J][K+1] + tmpgreen_m[I][J][K+1] -
500  tmpgreen_m[I+1][J+1][K] + tmpgreen_m[I][J+1][K] +
501  tmpgreen_m[I+1][J][K] - tmpgreen_m[I][J][K]);*/
502 
503  Index I = nr_m[0] + 1;
504  Index J = nr_m[1] + 1;
505  Index K = nr_m[2] + 1;
506 
507  // the actual integration
508  rho2_m = 0.0;
509  rho2_m[I][J][K] = tmpgreen_m[I+1][J+1][K+1];
510  rho2_m[I][J][K] += tmpgreen_m[I+1][J][K];
511  rho2_m[I][J][K] += tmpgreen_m[I][J+1][K];
512  rho2_m[I][J][K] += tmpgreen_m[I][J][K+1];
513  rho2_m[I][J][K] -= tmpgreen_m[I+1][J+1][K];
514  rho2_m[I][J][K] -= tmpgreen_m[I+1][J][K+1];
515  rho2_m[I][J][K] -= tmpgreen_m[I][J+1][K+1];
516  rho2_m[I][J][K] -= tmpgreen_m[I][J][K];
517 
518  mirrorRhoField();
519 
520  fft_m->transform(-1, rho2_m, grntr_m);
521 
522 }
523 
525 
526 #ifdef OPAL_DKS
527 
530  NDIndex<3> idx = layout4_m->getDomain();
531  dksbase.callGreensIntegral(tmpgreen_ptr, idx[0].length(), idx[1].length(), idx[2].length(),
532  nr_m[0]+1, nr_m[1]+1, hr_m[0], hr_m[1], hr_m[2], streamGreens);
533 
534  Index I = nr_m[0] + 1;
535  Index J = nr_m[1] + 1;
536  Index K = nr_m[2] + 1;
537 
538  dksbase.callGreensIntegration(rho2_m_ptr, tmpgreen_ptr, nr_m[0]+1, nr_m[1]+1, nr_m[2]+1,
539  streamGreens);
540 
541  dksbase.callMirrorRhoField(rho2_m_ptr, nr_m[0], nr_m[1], nr_m[2], streamGreens);
542 #endif
543 
544 }
545 
547 
548  tmpgreen_m = 0.0;
549  Field_t grn2(*mesh4_m, *layout4_m);
550  grn2 = 0.0;
551  NDIndex<3> idx = layout4_m->getLocalNDIndex();
552  double cellVolume = hr_m[0] * hr_m[1] * hr_m[2];
553 
554  for(int k = idx[2].first(); k <= idx[2].last(); k++) {
555  for(int j = idx[1].first(); j <= idx[1].last(); j++) {
556  for(int i = idx[0].first(); i <= idx[0].last(); i++) {
557 
558  Vector_t vv = Vector_t(0.0);
559  vv(0) = i * hr_m[0] - hr_m[0] / 2;
560  vv(1) = j * hr_m[1] - hr_m[1] / 2;
561  vv(2) = k * hr_m[2] - hr_m[2] / 2 + zshift;
562 
563  double r = sqrt(vv(0) * vv(0) + vv(1) * vv(1) + vv(2) * vv(2));
564  double tmpgrn = -vv(2) * vv(2) * atan(vv(0) * vv(1) / (vv(2) * r)) / 2;
565  tmpgrn += -vv(1) * vv(1) * atan(vv(0) * vv(2) / (vv(1) * r)) / 2;
566  tmpgrn += -vv(0) * vv(0) * atan(vv(1) * vv(2) / (vv(0) * r)) / 2;
567  tmpgrn += vv(1) * vv(2) * log(vv(0) + r);
568  tmpgrn += vv(0) * vv(2) * log(vv(1) + r);
569  tmpgrn += vv(0) * vv(1) * log(vv(2) + r);
570 
571  tmpgreen_m[i][j][k] = tmpgrn / cellVolume;
572 
573  }
574  }
575  }
576 
577  for(int k = idx[2].first(); k <= idx[2].last(); k++) {
578  for(int j = idx[1].first(); j <= idx[1].last(); j++) {
579  for(int i = idx[0].first(); i <= idx[0].last(); i++) {
580 
581  Vector_t vv = Vector_t(0.0);
582  vv(0) = i * hr_m[0] - hr_m[0] / 2;
583  vv(1) = j * hr_m[1] - hr_m[1] / 2;
584  vv(2) = k * hr_m[2] - hr_m[2] / 2 + zshift - nr_m[2] * hr_m[2];
585 
586  double r = sqrt(vv(0) * vv(0) + vv(1) * vv(1) + vv(2) * vv(2));
587  double tmpgrn = -vv(2) * vv(2) * atan(vv(0) * vv(1) / (vv(2) * r)) / 2;
588  tmpgrn += -vv(1) * vv(1) * atan(vv(0) * vv(2) / (vv(1) * r)) / 2;
589  tmpgrn += -vv(0) * vv(0) * atan(vv(1) * vv(2) / (vv(0) * r)) / 2;
590  tmpgrn += vv(1) * vv(2) * log(vv(0) + r);
591  tmpgrn += vv(0) * vv(2) * log(vv(1) + r);
592  tmpgrn += vv(0) * vv(1) * log(vv(2) + r);
593 
594  grn2[i][j][k] = tmpgrn / cellVolume;
595 
596  }
597  }
598  }
611  Index I = nr_m[0] + 1;
612  Index J = nr_m[1] + 1;
613  Index K = nr_m[2] + 1;
614 
615  // the actual integration
616  rho2_m = 0.0;
617  rho2_m[I][J][K] = tmpgreen_m[I+1][J+1][K+1];
618  rho2_m[I][J][K] += tmpgreen_m[I+1][J][K];
619  rho2_m[I][J][K] += tmpgreen_m[I][J+1][K];
620  rho2_m[I][J][K] += tmpgreen_m[I][J][K+1];
621  rho2_m[I][J][K] -= tmpgreen_m[I+1][J+1][K];
622  rho2_m[I][J][K] -= tmpgreen_m[I+1][J][K+1];
623  rho2_m[I][J][K] -= tmpgreen_m[I][J+1][K+1];
624  rho2_m[I][J][K] -= tmpgreen_m[I][J][K];
625 
626  tmpgreen_m = 0.0;
627  tmpgreen_m[I][J][K] = grn2[I+1][J+1][K+1];
628  tmpgreen_m[I][J][K] += grn2[I+1][J][K];
629  tmpgreen_m[I][J][K] += grn2[I][J+1][K];
630  tmpgreen_m[I][J][K] += grn2[I][J][K+1];
631  tmpgreen_m[I][J][K] -= grn2[I+1][J+1][K];
632  tmpgreen_m[I][J][K] -= grn2[I+1][J][K+1];
633  tmpgreen_m[I][J][K] -= grn2[I][J+1][K+1];
634  tmpgreen_m[I][J][K] -= grn2[I][J][K];
635 
637 
638  fft_m->transform(-1, rho2_m, grntr_m);
639 }
640 
642 
643  Index aI(0, 2 * nr_m[0]);
644  Index aJ(0, 2 * nr_m[1]);
645 
646  Index J(0, nr_m[1]);
647  Index K(0, nr_m[2]);
648 
649  Index IE(nr_m[0] + 1, 2 * nr_m[0]);
650  Index JE(nr_m[1] + 1, 2 * nr_m[1]);
651  Index KE(nr_m[2] + 1, 2 * nr_m[2]);
652 
653  Index mirroredIE = 2 * nr_m[0] - IE;
654  Index mirroredJE = 2 * nr_m[1] - JE;
655  Index mirroredKE = 2 * nr_m[2] - KE;
656 
657  rho2_m[0][0][0] = rho2_m[0][0][1];
658 
659  rho2_m[IE][J ][K ] = rho2_m[mirroredIE][J ][K ];
660  rho2_m[aI][JE][K ] = rho2_m[aI ][mirroredJE][K ];
661  if (!bcz_m)
662  rho2_m[aI][aJ][KE] = rho2_m[aI ][aJ ][mirroredKE];
663 
664 }
665 
667 
668  Index aI(0, 2 * nr_m[0]);
669  Index aK(0, 2 * nr_m[2]);
670 
671  Index I(0, nr_m[0]);
672  Index J(0, nr_m[1]);
673  Index K(0, nr_m[2]);
674 
675  Index IE(nr_m[0] + 1, 2 * nr_m[0]);
676  Index JE(nr_m[1] + 1, 2 * nr_m[1]);
677  Index KE(nr_m[2] + 1, 2 * nr_m[2]);
678 
679  Index mirroredIE = 2*nr_m[0] - IE;
680  Index mirroredJE = 2*nr_m[1] - JE;
681  Index shiftedKE = KE - nr_m[2];
682 
683  if (!bcz_m) {
684  rho2_m[I ][J ][KE] = ggrn2[I ][J ][shiftedKE];
685  rho2_m[IE][J ][aK] = rho2_m[mirroredIE][J ][aK ];
686  rho2_m[aI][JE][aK] = rho2_m[aI ][mirroredJE][aK ];
687  } else {
688  rho2_m[IE][J ][K] = rho2_m[mirroredIE][J ][K ];
689  rho2_m[aI][JE][K] = rho2_m[aI ][mirroredJE][K ];
690  }
691 }
692 
694  os << "* ************* F F T P o i s s o n S o l v e r ************************************ " << endl;
695  os << "* h " << hr_m << '\n';
696  os << "* ********************************************************************************** " << endl;
697  return os;
698 }
static int getNodes()
Definition: IpplInfo.cpp:773
std::unique_ptr< Mesh_t > mesh2_m
Layout_t & getLayout() const
Definition: BareField.h:130
Definition: TSVMeta.h:24
void shiftedIntGreensFunction(double zshift)
compute the shifted integrated Green function as described in Three-dimensional quasistatic model for...
The base class for all OPAL exceptions.
Definition: OpalException.h:28
FFT< RCTransform, 3, double > FFT_t
Definition: PBunchDefs.h:54
NDIndex< 3 > domain2_m
std::unique_ptr< FieldLayout_t > layout4_m
CenteredFieldLayout< 3, Mesh_t, Center_t > FieldLayout_t
Definition: PBunchDefs.h:48
std::unique_ptr< Mesh_t > mesh3_m
Inform * gmsg
Definition: Main.cpp:21
static bool DKSEnabled
Definition: IpplInfo.h:285
static int myNode()
Definition: IpplInfo.cpp:794
Inform & print(Inform &os) const
BConds< Vector_t, 3, Mesh_t, Center_t > vbc_m
IpplTimings::TimerRef GreensFunctionTimer_m
static void calculate(Vektor< T, 3 > &hrsq, FT &grn, FT2 *grnI)
IpplTimings::TimerRef ComputePotential_m
NDIndex< 3 > domainFFTConstruct_m
Vektor< int, 3 > nr_m
Tps< T > log(const Tps< T > &x)
Natural logarithm.
Definition: TpsMath.h:182
void integratedGreensFunction()
compute the integrated Green function as described in Three-dimensional quasistatic model for high br...
Definition: Index.h:236
e_dim_tag getRequestedDistribution(unsigned int d) const
Definition: FieldLayout.h:405
std::unique_ptr< Mesh_t > mesh4_m
void integratedGreensFunctionDKS()
Uses DKS to offload the computation of Greens function on the GPU.
FFTPoissonSolver(PartBunch &bunch, std::string greensFuntion)
PETE_TUTree< FnArcTan, typename T::PETE_Expr_t > atan(const PETE_Expr< T > &l)
Definition: PETE.h:810
static void startTimer(TimerRef t)
Definition: IpplTimings.h:187
PETE_TBTree< OpLT, Index::PETE_Expr_t, PETE_Scalar< double > > lt(const Index &idx, double x)
Definition: IndexInlines.h:352
PETE_TTTree< OpWhere, typename Cond_t::PETE_Expr_t, typename True_t::PETE_Expr_t, PETE_Scalar< Vektor< T, Dim > > > where(const PETE_Expr< Cond_t > &c, const PETE_Expr< True_t > &t, const Vektor< T, Dim > &f)
BConds< double, 3, Mesh_t, Center_t > bc_m
for defining the boundary conditions
Vektor< double, 3 > Vector_t
Definition: Vektor.h:6
static MPI_Comm getComm()
Definition: IpplInfo.h:178
Tps< T > sqrt(const Tps< T > &x)
Square root.
Definition: TpsMath.h:91
IField_t grnIField_m[3]
NDIndex< 3 > domain3_m
MFLOAT get_meshSpacing(unsigned d) const
std::unique_ptr< FFT_t > fft_m
Particle Bunch.
Definition: PartBunch.h:30
void initialize(Layout_t &)
e_dim_tag
Definition: FieldLayout.h:55
static TimerRef getTimer(const char *nm)
Definition: IpplTimings.h:182
FieldLayout_t * layout_m
NDIndex< 3 > domain4_m
static void stopTimer(TimerRef t)
Definition: IpplTimings.h:192
#define K
Definition: integrate.cpp:118
void computePotentialDKS(Field_t &rho)
void computePotential(Field_t &rho, Vector_t hr, double zshift)
Definition: Inform.h:41
UniformCartesian< 3, double > Mesh_t
Definition: PBunchDefs.h:41
const NDIndex< Dim > & getDomain() const
Definition: FieldLayout.h:325
std::unique_ptr< FieldLayout_t > layout3_m
std::unique_ptr< FieldLayout_t > layout2_m
NDIndex< 3 > domain_m
Inform & endl(Inform &inf)
Definition: Inform.cpp:42