OPAL (Object Oriented Parallel Accelerator Library)  2.2.0
OPAL
fftpack_inc.c
Go to the documentation of this file.
1 //
2 // IPPL FFT
3 //
4 // Copyright (c) 2008-2018
5 // Paul Scherrer Institut, Villigen PSI, Switzerland
6 // All rights reserved.
7 //
8 // OPAL is licensed under GNU GPL version 3.
9 //
10 
11 /*
12  * This file is part of libfftpack.
13  *
14  * libfftpack is free software; you can redistribute it and/or modify
15  * it under the terms of the GNU General Public License as published by
16  * the Free Software Foundation; either version 2 of the License, or
17  * (at your option) any later version.
18  *
19  * libfftpack is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22  * GNU General Public License for more details.
23  *
24  * You should have received a copy of the GNU General Public License
25  * along with libfftpack; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
27  */
28 
29 /*
30  * libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
31  * and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
32  * (DLR).
33  */
34 
35 /*
36  fftpack.c : A set of FFT routines in C.
37  Algorithmically based on Fortran-77 FFTPACK by Paul N. Swarztrauber
38  (Version 4, 1985).
39 
40  C port by Martin Reinecke (2010)
41 */
42 
43 #ifdef BACKWARD
44 #define PSIGN +
45 #define PMSIGNC(a,b,c,d) { a.r=c.r+d.r; a.i=c.i+d.i; b.r=c.r-d.r; b.i=c.i-d.i; }
46 /* a = b*c */
47 #define MULPMSIGNC(a,b,c) { a.r=b.r*c.r-b.i*c.i; a.i=b.r*c.i+b.i*c.r; }
48 #else
49 #define PSIGN -
50 #define PMSIGNC(a,b,c,d) { a.r=c.r-d.r; a.i=c.i-d.i; b.r=c.r+d.r; b.i=c.i+d.i; }
51 /* a = conj(b)*c */
52 #define MULPMSIGNC(a,b,c) { a.r=b.r*c.r+b.i*c.i; a.i=b.r*c.i-b.i*c.r; }
53 #endif
54 
55 static void X(2) (size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
56  const cmplx *wa)
57 {
58  const size_t cdim=2;
59  size_t k,i;
60  cmplx t;
61  if (ido==1)
62  for (k=0;k<l1;++k)
63  PMC (CH(0,k,0),CH(0,k,1),CC(0,0,k),CC(0,1,k))
64  else
65  for (k=0;k<l1;++k)
66  for (i=0;i<ido;++i)
67  {
68  PMC (CH(i,k,0),t,CC(i,0,k),CC(i,1,k))
69  MULPMSIGNC (CH(i,k,1),WA(0,i),t)
70  }
71 }
72 
73 static void X(3)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
74  const cmplx *wa)
75 {
76  const size_t cdim=3;
77  static const double taur=-0.5, taui= PSIGN 0.86602540378443864676;
78  size_t i, k;
79  cmplx c2, c3, d2, d3, t2;
80 
81  if (ido==1)
82  for (k=0; k<l1; ++k)
83  {
84  PMC (t2,c3,CC(0,1,k),CC(0,2,k))
85  ADDC (CH(0,k,0),t2,CC(0,0,k))
86  SCALEC(t2,taur)
87  ADDC(c2,CC(0,0,k),t2)
88  SCALEC(c3,taui)
89  CONJFLIPC(c3)
90  PMC(CH(0,k,1),CH(0,k,2),c2,c3)
91  }
92  else
93  for (k=0; k<l1; ++k)
94  for (i=0; i<ido; ++i)
95  {
96  PMC (t2,c3,CC(i,1,k),CC(i,2,k))
97  ADDC (CH(i,k,0),t2,CC(i,0,k))
98  SCALEC(t2,taur)
99  ADDC(c2,CC(i,0,k),t2)
100  SCALEC(c3,taui)
101  CONJFLIPC(c3)
102  PMC(d2,d3,c2,c3)
103  MULPMSIGNC(CH(i,k,1),WA(0,i),d2)
104  MULPMSIGNC(CH(i,k,2),WA(1,i),d3)
105  }
106 }
107 
108 static void X(4)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
109  const cmplx *wa)
110 {
111  const size_t cdim=4;
112  size_t i, k;
113  cmplx c2, c3, c4, t1, t2, t3, t4;
114 
115  if (ido==1)
116  for (k=0; k<l1; ++k)
117  {
118  PMC(t2,t1,CC(0,0,k),CC(0,2,k))
119  PMC(t3,t4,CC(0,1,k),CC(0,3,k))
120  CONJFLIPC(t4)
121  PMC(CH(0,k,0),CH(0,k,2),t2,t3)
122  PMSIGNC (CH(0,k,1),CH(0,k,3),t1,t4)
123  }
124  else
125  for (k=0; k<l1; ++k)
126  for (i=0; i<ido; ++i)
127  {
128  PMC(t2,t1,CC(i,0,k),CC(i,2,k))
129  PMC(t3,t4,CC(i,1,k),CC(i,3,k))
130  CONJFLIPC(t4)
131  PMC(CH(i,k,0),c3,t2,t3)
132  PMSIGNC (c2,c4,t1,t4)
133  MULPMSIGNC (CH(i,k,1),WA(0,i),c2)
134  MULPMSIGNC (CH(i,k,2),WA(1,i),c3)
135  MULPMSIGNC (CH(i,k,3),WA(2,i),c4)
136  }
137 }
138 
139 static void X(5)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
140  const cmplx *wa)
141 {
142  const size_t cdim=5;
143  static const double tr11= 0.3090169943749474241,
144  ti11= PSIGN 0.95105651629515357212,
145  tr12=-0.8090169943749474241,
146  ti12= PSIGN 0.58778525229247312917;
147  size_t i, k;
148  cmplx c2, c3, c4, c5, d2, d3, d4, d5, t2, t3, t4, t5;
149 
150  if (ido==1)
151  for (k=0; k<l1; ++k)
152  {
153  PMC (t2,t5,CC(0,1,k),CC(0,4,k))
154  PMC (t3,t4,CC(0,2,k),CC(0,3,k))
155  CH(0,k,0).r=CC(0,0,k).r+t2.r+t3.r;
156  CH(0,k,0).i=CC(0,0,k).i+t2.i+t3.i;
157  c2.r=CC(0,0,k).r+tr11*t2.r+tr12*t3.r;
158  c2.i=CC(0,0,k).i+tr11*t2.i+tr12*t3.i;
159  c3.r=CC(0,0,k).r+tr12*t2.r+tr11*t3.r;
160  c3.i=CC(0,0,k).i+tr12*t2.i+tr11*t3.i;
161  c5.r=ti11*t5.r+ti12*t4.r;
162  c5.i=ti11*t5.i+ti12*t4.i;
163  c4.r=ti12*t5.r-ti11*t4.r;
164  c4.i=ti12*t5.i-ti11*t4.i;
165  CONJFLIPC(c5)
166  PMC(CH(0,k,1),CH(0,k,4),c2,c5)
167  CONJFLIPC(c4)
168  PMC(CH(0,k,2),CH(0,k,3),c3,c4)
169  }
170  else
171  for (k=0; k<l1; ++k)
172  for (i=0; i<ido; ++i)
173  {
174  PMC (t2,t5,CC(i,1,k),CC(i,4,k))
175  PMC (t3,t4,CC(i,2,k),CC(i,3,k))
176  CH(i,k,0).r=CC(i,0,k).r+t2.r+t3.r;
177  CH(i,k,0).i=CC(i,0,k).i+t2.i+t3.i;
178  c2.r=CC(i,0,k).r+tr11*t2.r+tr12*t3.r;
179  c2.i=CC(i,0,k).i+tr11*t2.i+tr12*t3.i;
180  c3.r=CC(i,0,k).r+tr12*t2.r+tr11*t3.r;
181  c3.i=CC(i,0,k).i+tr12*t2.i+tr11*t3.i;
182  c5.r=ti11*t5.r+ti12*t4.r;
183  c5.i=ti11*t5.i+ti12*t4.i;
184  c4.r=ti12*t5.r-ti11*t4.r;
185  c4.i=ti12*t5.i-ti11*t4.i;
186  CONJFLIPC(c5)
187  PMC(d2,d5,c2,c5)
188  CONJFLIPC(c4)
189  PMC(d3,d4,c3,c4)
190  MULPMSIGNC (CH(i,k,1),WA(0,i),d2)
191  MULPMSIGNC (CH(i,k,2),WA(1,i),d3)
192  MULPMSIGNC (CH(i,k,3),WA(2,i),d4)
193  MULPMSIGNC (CH(i,k,4),WA(3,i),d5)
194  }
195 }
196 
197 static void X(6)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
198  const cmplx *wa)
199 {
200  const size_t cdim=6;
201  static const double taui= PSIGN 0.86602540378443864676;
202  cmplx ta1,ta2,ta3,a0,a1,a2,tb1,tb2,tb3,b0,b1,b2,d1,d2,d3,d4,d5;
203  size_t i, k;
204 
205  if (ido==1)
206  for (k=0; k<l1; ++k)
207  {
208  PMC(ta1,ta3,CC(0,2,k),CC(0,4,k))
209  ta2.r = CC(0,0,k).r - .5*ta1.r;
210  ta2.i = CC(0,0,k).i - .5*ta1.i;
211  SCALEC(ta3,taui)
212  ADDC(a0,CC(0,0,k),ta1)
213  CONJFLIPC(ta3)
214  PMC(a1,a2,ta2,ta3)
215  PMC(tb1,tb3,CC(0,5,k),CC(0,1,k))
216  tb2.r = CC(0,3,k).r - .5*tb1.r;
217  tb2.i = CC(0,3,k).i - .5*tb1.i;
218  SCALEC(tb3,taui)
219  ADDC(b0,CC(0,3,k),tb1)
220  CONJFLIPC(tb3)
221  PMC(b1,b2,tb2,tb3)
222  PMC(CH(0,k,0),CH(0,k,3),a0,b0)
223  PMC(CH(0,k,4),CH(0,k,1),a1,b1)
224  PMC(CH(0,k,2),CH(0,k,5),a2,b2)
225  }
226  else
227  for (k=0; k<l1; ++k)
228  for (i=0; i<ido; ++i)
229  {
230  PMC(ta1,ta3,CC(i,2,k),CC(i,4,k))
231  ta2.r = CC(i,0,k).r - .5*ta1.r;
232  ta2.i = CC(i,0,k).i - .5*ta1.i;
233  SCALEC(ta3,taui)
234  ADDC(a0,CC(i,0,k),ta1)
235  CONJFLIPC(ta3)
236  PMC(a1,a2,ta2,ta3)
237  PMC(tb1,tb3,CC(i,5,k),CC(i,1,k))
238  tb2.r = CC(i,3,k).r - .5*tb1.r;
239  tb2.i = CC(i,3,k).i - .5*tb1.i;
240  SCALEC(tb3,taui)
241  ADDC(b0,CC(i,3,k),tb1)
242  CONJFLIPC(tb3)
243  PMC(b1,b2,tb2,tb3)
244  PMC(CH(i,k,0),d3,a0,b0)
245  PMC(d4,d1,a1,b1)
246  PMC(d2,d5,a2,b2)
247  MULPMSIGNC (CH(i,k,1),WA(0,i),d1)
248  MULPMSIGNC (CH(i,k,2),WA(1,i),d2)
249  MULPMSIGNC (CH(i,k,3),WA(2,i),d3)
250  MULPMSIGNC (CH(i,k,4),WA(3,i),d4)
251  MULPMSIGNC (CH(i,k,5),WA(4,i),d5)
252  }
253 }
254 
255 static void X(g)(size_t ido, size_t ip, size_t l1, const cmplx *cc, cmplx *ch,
256  const cmplx *wa)
257 {
258  const size_t cdim=ip;
259  cmplx *tarr=RALLOC(cmplx,2*ip);
260  cmplx *ccl=tarr, *wal=tarr+ip;
261  size_t i,j,k,l,jc,lc;
262  size_t ipph = (ip+1)/2;
263 
264  for (i=1; i<ip; ++i)
265  wal[i]=wa[ido*(i-1)];
266  for (k=0; k<l1; ++k)
267  for (i=0; i<ido; ++i)
268  {
269  cmplx s=CC(i,0,k);
270  ccl[0] = CC(i,0,k);
271  for(j=1,jc=ip-1; j<ipph; ++j,--jc)
272  {
273  PMC (ccl[j],ccl[jc],CC(i,j,k),CC(i,jc,k))
274  ADDC (s,s,ccl[j])
275  }
276  CH(i,k,0) = s;
277  for (j=1, jc=ip-1; j<=ipph; ++j,--jc)
278  {
279  cmplx abr=ccl[0], abi={0.,0.};
280  size_t iang=0;
281  for (l=1,lc=ip-1; l<ipph; ++l,--lc)
282  {
283  iang+=j;
284  if (iang>ip) iang-=ip;
285  abr.r += ccl[l ].r*wal[iang].r;
286  abr.i += ccl[l ].i*wal[iang].r;
287  abi.r += ccl[lc].r*wal[iang].i;
288  abi.i += ccl[lc].i*wal[iang].i;
289  }
290 #ifndef BACKWARD
291  { abi.i=-abi.i; abi.r=-abi.r; }
292 #endif
293  CONJFLIPC(abi)
294  PMC(CH(i,k,j),CH(i,k,jc),abr,abi)
295  }
296  }
297 
298  DEALLOC(tarr);
299 
300  if (ido==1) return;
301 
302  for (j=1; j<ip; ++j)
303  for (k=0; k<l1; ++k)
304  {
305  size_t idij=(j-1)*ido+1;
306  for(i=1; i<ido; ++i, ++idij)
307  {
308  cmplx t=CH(i,k,j);
309  MULPMSIGNC (CH(i,k,j),wa[idij],t)
310  }
311  }
312 }
313 
314 #undef PSIGN
315 #undef PMSIGNC
316 #undef MULPMSIGNC
317 
318 // vi: set et ts=4 sw=4 sts=4:
319 // Local Variables:
320 // mode:c
321 // c-basic-offset: 4
322 // indent-tabs-mode:nil
323 // End:
#define MULPMSIGNC(a, b, c)
Definition: fftpack_inc.c:52
#define WA(x, i)
Definition: fftpack.cpp:93
double i
Definition: fftpack.cpp:88
#define ADDC(a, b, c)
Definition: fftpack.cpp:98
#define PMC(a, b, c, d)
Definition: fftpack.cpp:97
Definition: rbendmap.h:8
#define CONJFLIPC(a)
Definition: fftpack.cpp:100
#define DEALLOC(ptr)
Definition: fftpack.cpp:74
double r
Definition: fftpack.cpp:88
#define RALLOC(type, num)
Definition: fftpack.cpp:71
constexpr double a0
Bohr radius in m.
Definition: Physics.h:73
#define PMSIGNC(a, b, c, d)
Definition: fftpack_inc.c:50
#define CH(a, b, c)
Definition: fftpack.cpp:388
#define PSIGN
Definition: fftpack_inc.c:49
#define CC(a, b, c)
Definition: fftpack.cpp:389
#define SCALEC(a, b)
Definition: fftpack.cpp:99