OPAL (Object Oriented Parallel Accelerator Library) 2022.1
OPAL
fftpack_inc.c
Go to the documentation of this file.
1//
2// IPPL FFT
3//
4// Copyright (c) 2008-2018
5// Paul Scherrer Institut, Villigen PSI, Switzerland
6// All rights reserved.
7//
8// OPAL is licensed under GNU GPL version 3.
9//
10
11/*
12 * This file is part of libfftpack.
13 *
14 * libfftpack is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
18 *
19 * libfftpack is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with libfftpack; if not, write to the Free Software
26 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
27 */
28
29/*
30 * libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
31 * and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
32 * (DLR).
33 */
34
35/*
36 fftpack.c : A set of FFT routines in C.
37 Algorithmically based on Fortran-77 FFTPACK by Paul N. Swarztrauber
38 (Version 4, 1985).
39
40 C port by Martin Reinecke (2010)
41*/
42
43#ifdef BACKWARD
44#define PSIGN +
45#define PMSIGNC(a,b,c,d) { a.r=c.r+d.r; a.i=c.i+d.i; b.r=c.r-d.r; b.i=c.i-d.i; }
46/* a = b*c */
47#define MULPMSIGNC(a,b,c) { a.r=b.r*c.r-b.i*c.i; a.i=b.r*c.i+b.i*c.r; }
48#else
49#define PSIGN -
50#define PMSIGNC(a,b,c,d) { a.r=c.r-d.r; a.i=c.i-d.i; b.r=c.r+d.r; b.i=c.i+d.i; }
51/* a = conj(b)*c */
52#define MULPMSIGNC(a,b,c) { a.r=b.r*c.r+b.i*c.i; a.i=b.r*c.i-b.i*c.r; }
53#endif
54
55static void X(2) (size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
56 const cmplx *wa)
57{
58 const size_t cdim=2;
59 size_t k,i;
60 cmplx t;
61 if (ido==1)
62 for (k=0;k<l1;++k)
63 PMC (CH(0,k,0),CH(0,k,1),CC(0,0,k),CC(0,1,k))
64 else
65 for (k=0;k<l1;++k)
66 for (i=0;i<ido;++i)
67 {
68 PMC (CH(i,k,0),t,CC(i,0,k),CC(i,1,k))
69 MULPMSIGNC (CH(i,k,1),WA(0,i),t)
70 }
71}
72
73static void X(3)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
74 const cmplx *wa)
75{
76 const size_t cdim=3;
77 static const double taur=-0.5, taui= PSIGN 0.86602540378443864676;
78 size_t i, k;
79 cmplx c2, c3, d2, d3, t2;
80
81 if (ido==1)
82 for (k=0; k<l1; ++k)
83 {
84 PMC (t2,c3,CC(0,1,k),CC(0,2,k))
85 ADDC (CH(0,k,0),t2,CC(0,0,k))
86 SCALEC(t2,taur)
87 ADDC(c2,CC(0,0,k),t2)
88 SCALEC(c3,taui)
89 CONJFLIPC(c3)
90 PMC(CH(0,k,1),CH(0,k,2),c2,c3)
91 }
92 else
93 for (k=0; k<l1; ++k)
94 for (i=0; i<ido; ++i)
95 {
96 PMC (t2,c3,CC(i,1,k),CC(i,2,k))
97 ADDC (CH(i,k,0),t2,CC(i,0,k))
98 SCALEC(t2,taur)
99 ADDC(c2,CC(i,0,k),t2)
100 SCALEC(c3,taui)
101 CONJFLIPC(c3)
102 PMC(d2,d3,c2,c3)
103 MULPMSIGNC(CH(i,k,1),WA(0,i),d2)
104 MULPMSIGNC(CH(i,k,2),WA(1,i),d3)
105 }
106}
107
108static void X(4)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
109 const cmplx *wa)
110{
111 const size_t cdim=4;
112 size_t i, k;
113 cmplx c2, c3, c4, t1, t2, t3, t4;
114
115 if (ido==1)
116 for (k=0; k<l1; ++k)
117 {
118 PMC(t2,t1,CC(0,0,k),CC(0,2,k))
119 PMC(t3,t4,CC(0,1,k),CC(0,3,k))
120 CONJFLIPC(t4)
121 PMC(CH(0,k,0),CH(0,k,2),t2,t3)
122 PMSIGNC (CH(0,k,1),CH(0,k,3),t1,t4)
123 }
124 else
125 for (k=0; k<l1; ++k)
126 for (i=0; i<ido; ++i)
127 {
128 PMC(t2,t1,CC(i,0,k),CC(i,2,k))
129 PMC(t3,t4,CC(i,1,k),CC(i,3,k))
130 CONJFLIPC(t4)
131 PMC(CH(i,k,0),c3,t2,t3)
132 PMSIGNC (c2,c4,t1,t4)
133 MULPMSIGNC (CH(i,k,1),WA(0,i),c2)
134 MULPMSIGNC (CH(i,k,2),WA(1,i),c3)
135 MULPMSIGNC (CH(i,k,3),WA(2,i),c4)
136 }
137}
138
139static void X(5)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
140 const cmplx *wa)
141{
142 const size_t cdim=5;
143 static const double tr11= 0.3090169943749474241,
144 ti11= PSIGN 0.95105651629515357212,
145 tr12=-0.8090169943749474241,
146 ti12= PSIGN 0.58778525229247312917;
147 size_t i, k;
148 cmplx c2, c3, c4, c5, d2, d3, d4, d5, t2, t3, t4, t5;
149
150 if (ido==1)
151 for (k=0; k<l1; ++k)
152 {
153 PMC (t2,t5,CC(0,1,k),CC(0,4,k))
154 PMC (t3,t4,CC(0,2,k),CC(0,3,k))
155 CH(0,k,0).r=CC(0,0,k).r+t2.r+t3.r;
156 CH(0,k,0).i=CC(0,0,k).i+t2.i+t3.i;
157 c2.r=CC(0,0,k).r+tr11*t2.r+tr12*t3.r;
158 c2.i=CC(0,0,k).i+tr11*t2.i+tr12*t3.i;
159 c3.r=CC(0,0,k).r+tr12*t2.r+tr11*t3.r;
160 c3.i=CC(0,0,k).i+tr12*t2.i+tr11*t3.i;
161 c5.r=ti11*t5.r+ti12*t4.r;
162 c5.i=ti11*t5.i+ti12*t4.i;
163 c4.r=ti12*t5.r-ti11*t4.r;
164 c4.i=ti12*t5.i-ti11*t4.i;
165 CONJFLIPC(c5)
166 PMC(CH(0,k,1),CH(0,k,4),c2,c5)
167 CONJFLIPC(c4)
168 PMC(CH(0,k,2),CH(0,k,3),c3,c4)
169 }
170 else
171 for (k=0; k<l1; ++k)
172 for (i=0; i<ido; ++i)
173 {
174 PMC (t2,t5,CC(i,1,k),CC(i,4,k))
175 PMC (t3,t4,CC(i,2,k),CC(i,3,k))
176 CH(i,k,0).r=CC(i,0,k).r+t2.r+t3.r;
177 CH(i,k,0).i=CC(i,0,k).i+t2.i+t3.i;
178 c2.r=CC(i,0,k).r+tr11*t2.r+tr12*t3.r;
179 c2.i=CC(i,0,k).i+tr11*t2.i+tr12*t3.i;
180 c3.r=CC(i,0,k).r+tr12*t2.r+tr11*t3.r;
181 c3.i=CC(i,0,k).i+tr12*t2.i+tr11*t3.i;
182 c5.r=ti11*t5.r+ti12*t4.r;
183 c5.i=ti11*t5.i+ti12*t4.i;
184 c4.r=ti12*t5.r-ti11*t4.r;
185 c4.i=ti12*t5.i-ti11*t4.i;
186 CONJFLIPC(c5)
187 PMC(d2,d5,c2,c5)
188 CONJFLIPC(c4)
189 PMC(d3,d4,c3,c4)
190 MULPMSIGNC (CH(i,k,1),WA(0,i),d2)
191 MULPMSIGNC (CH(i,k,2),WA(1,i),d3)
192 MULPMSIGNC (CH(i,k,3),WA(2,i),d4)
193 MULPMSIGNC (CH(i,k,4),WA(3,i),d5)
194 }
195}
196
197static void X(6)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch,
198 const cmplx *wa)
199{
200 const size_t cdim=6;
201 static const double taui= PSIGN 0.86602540378443864676;
202 cmplx ta1,ta2,ta3,a0,a1,a2,tb1,tb2,tb3,b0,b1,b2,d1,d2,d3,d4,d5;
203 size_t i, k;
204
205 if (ido==1)
206 for (k=0; k<l1; ++k)
207 {
208 PMC(ta1,ta3,CC(0,2,k),CC(0,4,k))
209 ta2.r = CC(0,0,k).r - .5*ta1.r;
210 ta2.i = CC(0,0,k).i - .5*ta1.i;
211 SCALEC(ta3,taui)
212 ADDC(a0,CC(0,0,k),ta1)
213 CONJFLIPC(ta3)
214 PMC(a1,a2,ta2,ta3)
215 PMC(tb1,tb3,CC(0,5,k),CC(0,1,k))
216 tb2.r = CC(0,3,k).r - .5*tb1.r;
217 tb2.i = CC(0,3,k).i - .5*tb1.i;
218 SCALEC(tb3,taui)
219 ADDC(b0,CC(0,3,k),tb1)
220 CONJFLIPC(tb3)
221 PMC(b1,b2,tb2,tb3)
222 PMC(CH(0,k,0),CH(0,k,3),a0,b0)
223 PMC(CH(0,k,4),CH(0,k,1),a1,b1)
224 PMC(CH(0,k,2),CH(0,k,5),a2,b2)
225 }
226 else
227 for (k=0; k<l1; ++k)
228 for (i=0; i<ido; ++i)
229 {
230 PMC(ta1,ta3,CC(i,2,k),CC(i,4,k))
231 ta2.r = CC(i,0,k).r - .5*ta1.r;
232 ta2.i = CC(i,0,k).i - .5*ta1.i;
233 SCALEC(ta3,taui)
234 ADDC(a0,CC(i,0,k),ta1)
235 CONJFLIPC(ta3)
236 PMC(a1,a2,ta2,ta3)
237 PMC(tb1,tb3,CC(i,5,k),CC(i,1,k))
238 tb2.r = CC(i,3,k).r - .5*tb1.r;
239 tb2.i = CC(i,3,k).i - .5*tb1.i;
240 SCALEC(tb3,taui)
241 ADDC(b0,CC(i,3,k),tb1)
242 CONJFLIPC(tb3)
243 PMC(b1,b2,tb2,tb3)
244 PMC(CH(i,k,0),d3,a0,b0)
245 PMC(d4,d1,a1,b1)
246 PMC(d2,d5,a2,b2)
247 MULPMSIGNC (CH(i,k,1),WA(0,i),d1)
248 MULPMSIGNC (CH(i,k,2),WA(1,i),d2)
249 MULPMSIGNC (CH(i,k,3),WA(2,i),d3)
250 MULPMSIGNC (CH(i,k,4),WA(3,i),d4)
251 MULPMSIGNC (CH(i,k,5),WA(4,i),d5)
252 }
253}
254
255static void X(g)(size_t ido, size_t ip, size_t l1, const cmplx *cc, cmplx *ch,
256 const cmplx *wa)
257{
258 const size_t cdim=ip;
259 cmplx *tarr=RALLOC(cmplx,2*ip);
260 cmplx *ccl=tarr, *wal=tarr+ip;
261 size_t i,j,k,l,jc,lc;
262 size_t ipph = (ip+1)/2;
263
264 for (i=1; i<ip; ++i)
265 wal[i]=wa[ido*(i-1)];
266 for (k=0; k<l1; ++k)
267 for (i=0; i<ido; ++i)
268 {
269 cmplx s=CC(i,0,k);
270 ccl[0] = CC(i,0,k);
271 for(j=1,jc=ip-1; j<ipph; ++j,--jc)
272 {
273 PMC (ccl[j],ccl[jc],CC(i,j,k),CC(i,jc,k))
274 ADDC (s,s,ccl[j])
275 }
276 CH(i,k,0) = s;
277 for (j=1, jc=ip-1; j<=ipph; ++j,--jc)
278 {
279 cmplx abr=ccl[0], abi={0.,0.};
280 size_t iang=0;
281 for (l=1,lc=ip-1; l<ipph; ++l,--lc)
282 {
283 iang+=j;
284 if (iang>ip) iang-=ip;
285 abr.r += ccl[l ].r*wal[iang].r;
286 abr.i += ccl[l ].i*wal[iang].r;
287 abi.r += ccl[lc].r*wal[iang].i;
288 abi.i += ccl[lc].i*wal[iang].i;
289 }
290#ifndef BACKWARD
291 { abi.i=-abi.i; abi.r=-abi.r; }
292#endif
293 CONJFLIPC(abi)
294 PMC(CH(i,k,j),CH(i,k,jc),abr,abi)
295 }
296 }
297
298 DEALLOC(tarr);
299
300 if (ido==1) return;
301
302 for (j=1; j<ip; ++j)
303 for (k=0; k<l1; ++k)
304 {
305 size_t idij=(j-1)*ido+1;
306 for(i=1; i<ido; ++i, ++idij)
307 {
308 cmplx t=CH(i,k,j);
309 MULPMSIGNC (CH(i,k,j),wa[idij],t)
310 }
311 }
312}
313
314#undef PSIGN
315#undef PMSIGNC
316#undef MULPMSIGNC
#define PSIGN
Definition: fftpack_inc.c:49
#define MULPMSIGNC(a, b, c)
Definition: fftpack_inc.c:52
#define PMSIGNC(a, b, c, d)
Definition: fftpack_inc.c:50
#define WA(x, i)
Definition: fftpack.cpp:93
#define PMC(a, b, c, d)
Definition: fftpack.cpp:97
#define RALLOC(type, num)
Definition: fftpack.cpp:71
#define CH(a, b, c)
Definition: fftpack.cpp:388
#define DEALLOC(ptr)
Definition: fftpack.cpp:74
#define ADDC(a, b, c)
Definition: fftpack.cpp:98
#define CONJFLIPC(a)
Definition: fftpack.cpp:100
#define CC(a, b, c)
Definition: fftpack.cpp:389
#define X(arg)
Definition: fftpack.cpp:112
#define SCALEC(a, b)
Definition: fftpack.cpp:99
constexpr double a0
Bohr radius in m.
Definition: Physics.h:66
double i
Definition: fftpack.cpp:88
double r
Definition: fftpack.cpp:88