OPAL (Object Oriented Parallel Accelerator Library) 2022.1
OPAL
fftpack.cpp
Go to the documentation of this file.
1//
2// IPPL FFT
3//
4// Copyright (c) 2008-2018
5// Paul Scherrer Institut, Villigen PSI, Switzerland
6// All rights reserved.
7//
8// OPAL is licensed under GNU GPL version 3.
9//
10
11/*
12 * This file is part of libfftpack.
13 *
14 * libfftpack is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
18 *
19 * libfftpack is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with libfftpack; if not, write to the Free Software
26 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
27 */
28
29/*
30 * libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
31 * and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
32 * (DLR).
33 */
34
35/*
36 fftpack.c : A set of FFT routines in C.
37 Algorithmically based on Fortran-77 FFTPACK by Paul N. Swarztrauber
38 (Version 4, 1985).
39
40 C port by Martin Reinecke (2010)
41*/
42
43#include <stdio.h>
44#include <math.h>
45#include <stdlib.h>
46#include <string.h>
47
48#ifdef __cplusplus
49extern "C" {
50#endif
51
52/******************************************************************************
53 some helper functions and macros formerly in c_utils.c and c_utils.h
54*/
55 void *util_malloc_ (size_t sz) {
56 void *res;
57 if (sz==0) return NULL;
58 res = malloc(sz);
59 if (!(res)) {
60 fprintf (stderr, "%s, %i (%s):\n%s\n",
61 __FILE__, __LINE__, __func__,
62 "malloc() failed");
63 exit (1);
64 }
65 return res;
66 }
67
68/* #define ALLOC(ptr,type,num) \ */
69/* do { (ptr)=(type *)util_malloc_((num)*sizeof(type)); } while (0) */
70
71#define RALLOC(type,num) \
72 ((type *)util_malloc_((num)*sizeof(type)))
73
74#define DEALLOC(ptr) \
75 { if ((ptr) != 0) free (ptr); (ptr) = NULL; }
76
77 void util_free_ (void *ptr) {
78 if ((ptr) != NULL) {
79 free(ptr);
80 }
81 }
82
83#define SWAP(a,b,type) \
84 do { type tmp_=(a); (a)=(b); (b)=tmp_; } while(0)
85/******************************************************************************/
86
87 typedef struct {
88 double r,i;
89 } cmplx;
90
91#include "fftpack.h"
92
93#define WA(x,i) wa[(i)+(x)*ido]
94#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
95#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
96#define PM(a,b,c,d) { a=c+d; b=c-d; }
97#define PMC(a,b,c,d) { a.r=c.r+d.r; a.i=c.i+d.i; b.r=c.r-d.r; b.i=c.i-d.i; }
98#define ADDC(a,b,c) { a.r=b.r+c.r; a.i=b.i+c.i; }
99#define SCALEC(a,b) { a.r*=b; a.i*=b; }
100#define CONJFLIPC(a) { double tmp_=a.r; a.r=-a.i; a.i=tmp_; }
101/* (a+ib) = conj(c+id) * (e+if) */
102#define MULPM(a,b,c,d,e,f) { a=c*e+d*f; b=c*f-d*e; }
103
104#define CONCAT(a,b) a ## b
105
106#define X(arg) CONCAT(passb,arg)
107#define BACKWARD
108#include "FFT/fftpack_inc.c"
109#undef BACKWARD
110#undef X
111
112#define X(arg) CONCAT(passf,arg)
113#include "fftpack_inc.c"
114#undef X
115
116#undef CC
117#undef CH
118#define CC(a,b,c) cc[(a)+ido*((b)+l1*(c))]
119#define CH(a,b,c) ch[(a)+ido*((b)+cdim*(c))]
120
121 static void radf2 (size_t ido, size_t l1, const double *cc, double *ch,
122 const double *wa)
123 {
124 const size_t cdim=2;
125 size_t i, k, ic;
126 double ti2, tr2;
127
128 for (k=0; k<l1; k++)
129 PM (CH(0,0,k),CH(ido-1,1,k),CC(0,k,0),CC(0,k,1))
130 if ((ido&1)==0)
131 for (k=0; k<l1; k++)
132 {
133 CH( 0,1,k) = -CC(ido-1,k,1);
134 CH(ido-1,0,k) = CC(ido-1,k,0);
135 }
136 if (ido<=2) return;
137 for (k=0; k<l1; k++)
138 for (i=2; i<ido; i+=2)
139 {
140 ic=ido-i;
141 MULPM (tr2,ti2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
142 PM (CH(i-1,0,k),CH(ic-1,1,k),CC(i-1,k,0),tr2)
143 PM (CH(i ,0,k),CH(ic ,1,k),ti2,CC(i ,k,0))
144 }
145 }
146
147 static void radf3(size_t ido, size_t l1, const double *cc, double *ch,
148 const double *wa)
149 {
150 const size_t cdim=3;
151 static const double taur=-0.5, taui=0.86602540378443864676;
152 size_t i, k, ic;
153 double ci2, di2, di3, cr2, dr2, dr3, ti2, ti3, tr2, tr3;
154
155 for (k=0; k<l1; k++)
156 {
157 cr2=CC(0,k,1)+CC(0,k,2);
158 CH(0,0,k) = CC(0,k,0)+cr2;
159 CH(0,2,k) = taui*(CC(0,k,2)-CC(0,k,1));
160 CH(ido-1,1,k) = CC(0,k,0)+taur*cr2;
161 }
162 if (ido==1) return;
163 for (k=0; k<l1; k++)
164 for (i=2; i<ido; i+=2)
165 {
166 ic=ido-i;
167 MULPM (dr2,di2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
168 MULPM (dr3,di3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
169 cr2=dr2+dr3;
170 ci2=di2+di3;
171 CH(i-1,0,k) = CC(i-1,k,0)+cr2;
172 CH(i ,0,k) = CC(i ,k,0)+ci2;
173 tr2 = CC(i-1,k,0)+taur*cr2;
174 ti2 = CC(i ,k,0)+taur*ci2;
175 tr3 = taui*(di2-di3);
176 ti3 = taui*(dr3-dr2);
177 PM(CH(i-1,2,k),CH(ic-1,1,k),tr2,tr3)
178 PM(CH(i ,2,k),CH(ic ,1,k),ti3,ti2)
179 }
180 }
181
182 static void radf4(size_t ido, size_t l1, const double *cc, double *ch,
183 const double *wa)
184 {
185 const size_t cdim=4;
186 static const double hsqt2=0.70710678118654752440;
187 size_t i, k, ic;
188 double ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
189
190 for (k=0; k<l1; k++)
191 {
192 PM (tr1,CH(0,2,k),CC(0,k,3),CC(0,k,1))
193 PM (tr2,CH(ido-1,1,k),CC(0,k,0),CC(0,k,2))
194 PM (CH(0,0,k),CH(ido-1,3,k),tr2,tr1)
195 }
196 if ((ido&1)==0)
197 for (k=0; k<l1; k++)
198 {
199 ti1=-hsqt2*(CC(ido-1,k,1)+CC(ido-1,k,3));
200 tr1= hsqt2*(CC(ido-1,k,1)-CC(ido-1,k,3));
201 PM (CH(ido-1,0,k),CH(ido-1,2,k),CC(ido-1,k,0),tr1)
202 PM (CH( 0,3,k),CH( 0,1,k),ti1,CC(ido-1,k,2))
203 }
204 if (ido<=2) return;
205 for (k=0; k<l1; k++)
206 for (i=2; i<ido; i+=2)
207 {
208 ic=ido-i;
209 MULPM(cr2,ci2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
210 MULPM(cr3,ci3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
211 MULPM(cr4,ci4,WA(2,i-2),WA(2,i-1),CC(i-1,k,3),CC(i,k,3))
212 PM(tr1,tr4,cr4,cr2)
213 PM(ti1,ti4,ci2,ci4)
214 PM(tr2,tr3,CC(i-1,k,0),cr3)
215 PM(ti2,ti3,CC(i ,k,0),ci3)
216 PM(CH(i-1,0,k),CH(ic-1,3,k),tr2,tr1)
217 PM(CH(i ,0,k),CH(ic ,3,k),ti1,ti2)
218 PM(CH(i-1,2,k),CH(ic-1,1,k),tr3,ti4)
219 PM(CH(i ,2,k),CH(ic ,1,k),tr4,ti3)
220 }
221 }
222
223 static void radf5(size_t ido, size_t l1, const double *cc, double *ch,
224 const double *wa)
225 {
226 const size_t cdim=5;
227 static const double tr11= 0.3090169943749474241, ti11=0.95105651629515357212,
228 tr12=-0.8090169943749474241, ti12=0.58778525229247312917;
229 size_t i, k, ic;
230 double ci2, di2, ci4, ci5, di3, di4, di5, ci3, cr2, cr3, dr2, dr3,
231 dr4, dr5, cr5, cr4, ti2, ti3, ti5, ti4, tr2, tr3, tr4, tr5;
232
233 for (k=0; k<l1; k++)
234 {
235 PM (cr2,ci5,CC(0,k,4),CC(0,k,1))
236 PM (cr3,ci4,CC(0,k,3),CC(0,k,2))
237 CH(0,0,k)=CC(0,k,0)+cr2+cr3;
238 CH(ido-1,1,k)=CC(0,k,0)+tr11*cr2+tr12*cr3;
239 CH(0,2,k)=ti11*ci5+ti12*ci4;
240 CH(ido-1,3,k)=CC(0,k,0)+tr12*cr2+tr11*cr3;
241 CH(0,4,k)=ti12*ci5-ti11*ci4;
242 }
243 if (ido==1) return;
244 for (k=0; k<l1;++k)
245 for (i=2; i<ido; i+=2)
246 {
247 ic=ido-i;
248 MULPM (dr2,di2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
249 MULPM (dr3,di3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
250 MULPM (dr4,di4,WA(2,i-2),WA(2,i-1),CC(i-1,k,3),CC(i,k,3))
251 MULPM (dr5,di5,WA(3,i-2),WA(3,i-1),CC(i-1,k,4),CC(i,k,4))
252 PM(cr2,ci5,dr5,dr2)
253 PM(ci2,cr5,di2,di5)
254 PM(cr3,ci4,dr4,dr3)
255 PM(ci3,cr4,di3,di4)
256 CH(i-1,0,k)=CC(i-1,k,0)+cr2+cr3;
257 CH(i ,0,k)=CC(i ,k,0)+ci2+ci3;
258 tr2=CC(i-1,k,0)+tr11*cr2+tr12*cr3;
259 ti2=CC(i ,k,0)+tr11*ci2+tr12*ci3;
260 tr3=CC(i-1,k,0)+tr12*cr2+tr11*cr3;
261 ti3=CC(i ,k,0)+tr12*ci2+tr11*ci3;
262 MULPM(tr5,tr4,cr5,cr4,ti11,ti12)
263 MULPM(ti5,ti4,ci5,ci4,ti11,ti12)
264 PM(CH(i-1,2,k),CH(ic-1,1,k),tr2,tr5)
265 PM(CH(i ,2,k),CH(ic ,1,k),ti5,ti2)
266 PM(CH(i-1,4,k),CH(ic-1,3,k),tr3,tr4)
267 PM(CH(i ,4,k),CH(ic ,3,k),ti4,ti3)
268 }
269 }
270
271#undef CH
272#undef CC
273#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
274#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
275#define C1(a,b,c) cc[(a)+ido*((b)+l1*(c))]
276#define C2(a,b) cc[(a)+idl1*(b)]
277#define CH2(a,b) ch[(a)+idl1*(b)]
278 static void radfg(size_t ido, size_t ip, size_t l1, size_t idl1,
279 double *cc, double *ch, const double *wa)
280 {
281 const size_t cdim=ip;
282 static const double twopi=6.28318530717958647692;
283 size_t idij, ipph, i, j, k, l, j2, ic, jc, lc, ik;
284 double ai1, ai2, ar1, ar2, arg;
285 double *csarr;
286 size_t aidx;
287
288 ipph=(ip+1)/ 2;
289 if(ido!=1)
290 {
291 memcpy(ch,cc,idl1*sizeof(double));
292
293 for(j=1; j<ip; j++)
294 for(k=0; k<l1; k++)
295 {
296 CH(0,k,j)=C1(0,k,j);
297 idij=(j-1)*ido+1;
298 for(i=2; i<ido; i+=2,idij+=2)
299 MULPM(CH(i-1,k,j),CH(i,k,j),wa[idij-1],wa[idij],C1(i-1,k,j),C1(i,k,j))
300 }
301
302 for(j=1,jc=ip-1; j<ipph; j++,jc--)
303 for(k=0; k<l1; k++)
304 for(i=2; i<ido; i+=2)
305 {
306 PM(C1(i-1,k,j),C1(i ,k,jc),CH(i-1,k,jc),CH(i-1,k,j ))
307 PM(C1(i ,k,j),C1(i-1,k,jc),CH(i ,k,j ),CH(i ,k,jc))
308 }
309 }
310 else
311 memcpy(cc,ch,idl1*sizeof(double));
312
313 for(j=1,jc=ip-1; j<ipph; j++,jc--)
314 for(k=0; k<l1; k++)
315 PM(C1(0,k,j),C1(0,k,jc),CH(0,k,jc),CH(0,k,j))
316
317 csarr=RALLOC(double,2*ip);
318 arg=twopi / ip;
319 csarr[0]=1.;
320 csarr[1]=0.;
321 csarr[2]=csarr[2*ip-2]=cos(arg);
322 csarr[3]=sin(arg); csarr[2*ip-1]=-csarr[3];
323 for (i=2; i<=ip/2; ++i)
324 {
325 csarr[2*i]=csarr[2*ip-2*i]=cos(i*arg);
326 csarr[2*i+1]=sin(i*arg);
327 csarr[2*ip-2*i+1]=-csarr[2*i+1];
328 }
329 for(l=1,lc=ip-1; l<ipph; l++,lc--)
330 {
331 ar1=csarr[2*l];
332 ai1=csarr[2*l+1];
333 for(ik=0; ik<idl1; ik++)
334 {
335 CH2(ik,l)=C2(ik,0)+ar1*C2(ik,1);
336 CH2(ik,lc)=ai1*C2(ik,ip-1);
337 }
338 aidx=2*l;
339 for(j=2,jc=ip-2; j<ipph; j++,jc--)
340 {
341 aidx+=2*l;
342 if (aidx>=2*ip) aidx-=2*ip;
343 ar2=csarr[aidx];
344 ai2=csarr[aidx+1];
345 for(ik=0; ik<idl1; ik++)
346 {
347 CH2(ik,l )+=ar2*C2(ik,j );
348 CH2(ik,lc)+=ai2*C2(ik,jc);
349 }
350 }
351 }
352 DEALLOC(csarr);
353
354 for(j=1; j<ipph; j++)
355 for(ik=0; ik<idl1; ik++)
356 CH2(ik,0)+=C2(ik,j);
357
358 for(k=0; k<l1; k++)
359 memcpy(&CC(0,0,k),&CH(0,k,0),ido*sizeof(double));
360 for(j=1; j<ipph; j++)
361 {
362 jc=ip-j;
363 j2=2*j;
364 for(k=0; k<l1; k++)
365 {
366 CC(ido-1,j2-1,k) = CH(0,k,j );
367 CC(0 ,j2 ,k) = CH(0,k,jc);
368 }
369 }
370 if(ido==1) return;
371
372 for(j=1; j<ipph; j++)
373 {
374 jc=ip-j;
375 j2=2*j;
376 for(k=0; k<l1; k++)
377 for(i=2; i<ido; i+=2)
378 {
379 ic=ido-i;
380 PM (CC(i-1,j2,k),CC(ic-1,j2-1,k),CH(i-1,k,j ),CH(i-1,k,jc))
381 PM (CC(i ,j2,k),CC(ic ,j2-1,k),CH(i ,k,jc),CH(i ,k,j ))
382 }
383 }
384 }
385
386#undef CC
387#undef CH
388#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
389#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
390
391 static void radb2(size_t ido, size_t l1, const double *cc, double *ch,
392 const double *wa)
393 {
394 const size_t cdim=2;
395 size_t i, k, ic;
396 double ti2, tr2;
397
398 for (k=0; k<l1; k++)
399 PM (CH(0,k,0),CH(0,k,1),CC(0,0,k),CC(ido-1,1,k))
400 if ((ido&1)==0)
401 for (k=0; k<l1; k++)
402 {
403 CH(ido-1,k,0) = 2*CC(ido-1,0,k);
404 CH(ido-1,k,1) = -2*CC(0 ,1,k);
405 }
406 if (ido<=2) return;
407 for (k=0; k<l1;++k)
408 for (i=2; i<ido; i+=2)
409 {
410 ic=ido-i;
411 PM (CH(i-1,k,0),tr2,CC(i-1,0,k),CC(ic-1,1,k))
412 PM (ti2,CH(i ,k,0),CC(i ,0,k),CC(ic ,1,k))
413 MULPM (CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),ti2,tr2)
414 }
415 }
416
417 static void radb3(size_t ido, size_t l1, const double *cc, double *ch,
418 const double *wa)
419 {
420 const size_t cdim=3;
421 static const double taur=-0.5, taui=0.86602540378443864676;
422 size_t i, k, ic;
423 double ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2;
424
425 for (k=0; k<l1; k++)
426 {
427 tr2=2*CC(ido-1,1,k);
428 cr2=CC(0,0,k)+taur*tr2;
429 CH(0,k,0)=CC(0,0,k)+tr2;
430 ci3=2*taui*CC(0,2,k);
431 PM (CH(0,k,2),CH(0,k,1),cr2,ci3);
432 }
433 if (ido==1) return;
434 for (k=0; k<l1; k++)
435 for (i=2; i<ido; i+=2)
436 {
437 ic=ido-i;
438 tr2=CC(i-1,2,k)+CC(ic-1,1,k);
439 ti2=CC(i ,2,k)-CC(ic ,1,k);
440 cr2=CC(i-1,0,k)+taur*tr2;
441 ci2=CC(i ,0,k)+taur*ti2;
442 CH(i-1,k,0)=CC(i-1,0,k)+tr2;
443 CH(i ,k,0)=CC(i ,0,k)+ti2;
444 cr3=taui*(CC(i-1,2,k)-CC(ic-1,1,k));
445 ci3=taui*(CC(i ,2,k)+CC(ic ,1,k));
446 PM(dr3,dr2,cr2,ci3)
447 PM(di2,di3,ci2,cr3)
448 MULPM(CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),di2,dr2)
449 MULPM(CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),di3,dr3)
450 }
451 }
452
453 static void radb4(size_t ido, size_t l1, const double *cc, double *ch,
454 const double *wa)
455 {
456 const size_t cdim=4;
457 static const double sqrt2=1.41421356237309504880;
458 size_t i, k, ic;
459 double ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
460
461 for (k=0; k<l1; k++)
462 {
463 PM (tr2,tr1,CC(0,0,k),CC(ido-1,3,k))
464 tr3=2*CC(ido-1,1,k);
465 tr4=2*CC(0,2,k);
466 PM (CH(0,k,0),CH(0,k,2),tr2,tr3)
467 PM (CH(0,k,3),CH(0,k,1),tr1,tr4)
468 }
469 if ((ido&1)==0)
470 for (k=0; k<l1; k++)
471 {
472 PM (ti1,ti2,CC(0 ,3,k),CC(0 ,1,k))
473 PM (tr2,tr1,CC(ido-1,0,k),CC(ido-1,2,k))
474 CH(ido-1,k,0)=tr2+tr2;
475 CH(ido-1,k,1)=sqrt2*(tr1-ti1);
476 CH(ido-1,k,2)=ti2+ti2;
477 CH(ido-1,k,3)=-sqrt2*(tr1+ti1);
478 }
479 if (ido<=2) return;
480 for (k=0; k<l1;++k)
481 for (i=2; i<ido; i+=2)
482 {
483 ic=ido-i;
484 PM (tr2,tr1,CC(i-1,0,k),CC(ic-1,3,k))
485 PM (ti1,ti2,CC(i ,0,k),CC(ic ,3,k))
486 PM (tr4,ti3,CC(i ,2,k),CC(ic ,1,k))
487 PM (tr3,ti4,CC(i-1,2,k),CC(ic-1,1,k))
488 PM (CH(i-1,k,0),cr3,tr2,tr3)
489 PM (CH(i ,k,0),ci3,ti2,ti3)
490 PM (cr4,cr2,tr1,tr4)
491 PM (ci2,ci4,ti1,ti4)
492 MULPM (CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),ci2,cr2)
493 MULPM (CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),ci3,cr3)
494 MULPM (CH(i,k,3),CH(i-1,k,3),WA(2,i-2),WA(2,i-1),ci4,cr4)
495 }
496 }
497
498 static void radb5(size_t ido, size_t l1, const double *cc, double *ch,
499 const double *wa)
500 {
501 const size_t cdim=5;
502 static const double tr11= 0.3090169943749474241, ti11=0.95105651629515357212,
503 tr12=-0.8090169943749474241, ti12=0.58778525229247312917;
504 size_t i, k, ic;
505 double ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4,
506 ti2, ti3, ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5;
507
508 for (k=0; k<l1; k++)
509 {
510 ti5=2*CC(0,2,k);
511 ti4=2*CC(0,4,k);
512 tr2=2*CC(ido-1,1,k);
513 tr3=2*CC(ido-1,3,k);
514 CH(0,k,0)=CC(0,0,k)+tr2+tr3;
515 cr2=CC(0,0,k)+tr11*tr2+tr12*tr3;
516 cr3=CC(0,0,k)+tr12*tr2+tr11*tr3;
517 MULPM(ci5,ci4,ti5,ti4,ti11,ti12)
518 PM(CH(0,k,4),CH(0,k,1),cr2,ci5)
519 PM(CH(0,k,3),CH(0,k,2),cr3,ci4)
520 }
521 if (ido==1) return;
522 for (k=0; k<l1;++k)
523 for (i=2; i<ido; i+=2)
524 {
525 ic=ido-i;
526 PM(tr2,tr5,CC(i-1,2,k),CC(ic-1,1,k))
527 PM(ti5,ti2,CC(i ,2,k),CC(ic ,1,k))
528 PM(tr3,tr4,CC(i-1,4,k),CC(ic-1,3,k))
529 PM(ti4,ti3,CC(i ,4,k),CC(ic ,3,k))
530 CH(i-1,k,0)=CC(i-1,0,k)+tr2+tr3;
531 CH(i ,k,0)=CC(i ,0,k)+ti2+ti3;
532 cr2=CC(i-1,0,k)+tr11*tr2+tr12*tr3;
533 ci2=CC(i ,0,k)+tr11*ti2+tr12*ti3;
534 cr3=CC(i-1,0,k)+tr12*tr2+tr11*tr3;
535 ci3=CC(i ,0,k)+tr12*ti2+tr11*ti3;
536 MULPM(cr5,cr4,tr5,tr4,ti11,ti12)
537 MULPM(ci5,ci4,ti5,ti4,ti11,ti12)
538 PM(dr4,dr3,cr3,ci4)
539 PM(di3,di4,ci3,cr4)
540 PM(dr5,dr2,cr2,ci5)
541 PM(di2,di5,ci2,cr5)
542 MULPM(CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),di2,dr2)
543 MULPM(CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),di3,dr3)
544 MULPM(CH(i,k,3),CH(i-1,k,3),WA(2,i-2),WA(2,i-1),di4,dr4)
545 MULPM(CH(i,k,4),CH(i-1,k,4),WA(3,i-2),WA(3,i-1),di5,dr5)
546 }
547 }
548
549 static void radbg(size_t ido, size_t ip, size_t l1, size_t idl1,
550 double *cc, double *ch, const double *wa)
551 {
552 const size_t cdim=ip;
553 static const double twopi=6.28318530717958647692;
554 size_t idij, ipph, i, j, k, l, j2, ic, jc, lc, ik;
555 double ai1, ai2, ar1, ar2, arg;
556 double *csarr;
557 size_t aidx;
558
559 ipph=(ip+1)/ 2;
560 for(k=0; k<l1; k++)
561 memcpy(&CH(0,k,0),&CC(0,0,k),ido*sizeof(double));
562 for(j=1; j<ipph; j++)
563 {
564 jc=ip-j;
565 j2=2*j;
566 for(k=0; k<l1; k++)
567 {
568 CH(0,k,j )=2*CC(ido-1,j2-1,k);
569 CH(0,k,jc)=2*CC(0 ,j2 ,k);
570 }
571 }
572
573 if(ido!=1)
574 for(j=1,jc=ip-1; j<ipph; j++,jc--)
575 for(k=0; k<l1; k++)
576 for(i=2; i<ido; i+=2)
577 {
578 ic=ido-i;
579 PM (CH(i-1,k,j ),CH(i-1,k,jc),CC(i-1,2*j,k),CC(ic-1,2*j-1,k))
580 PM (CH(i ,k,jc),CH(i ,k,j ),CC(i ,2*j,k),CC(ic ,2*j-1,k))
581 }
582
583 csarr=RALLOC(double,2*ip);
584 arg=twopi/ip;
585 csarr[0]=1.;
586 csarr[1]=0.;
587 csarr[2]=csarr[2*ip-2]=cos(arg);
588 csarr[3]=sin(arg); csarr[2*ip-1]=-csarr[3];
589 for (i=2; i<=ip/2; ++i)
590 {
591 csarr[2*i]=csarr[2*ip-2*i]=cos(i*arg);
592 csarr[2*i+1]=sin(i*arg);
593 csarr[2*ip-2*i+1]=-csarr[2*i+1];
594 }
595 for(l=1; l<ipph; l++)
596 {
597 lc=ip-l;
598 ar1=csarr[2*l];
599 ai1=csarr[2*l+1];
600 for(ik=0; ik<idl1; ik++)
601 {
602 C2(ik,l)=CH2(ik,0)+ar1*CH2(ik,1);
603 C2(ik,lc)=ai1*CH2(ik,ip-1);
604 }
605 aidx=2*l;
606 for(j=2; j<ipph; j++)
607 {
608 jc=ip-j;
609 aidx+=2*l;
610 if (aidx>=2*ip) aidx-=2*ip;
611 ar2=csarr[aidx];
612 ai2=csarr[aidx+1];
613 for(ik=0; ik<idl1; ik++)
614 {
615 C2(ik,l )+=ar2*CH2(ik,j );
616 C2(ik,lc)+=ai2*CH2(ik,jc);
617 }
618 }
619 }
620 DEALLOC(csarr);
621
622 for(j=1; j<ipph; j++)
623 for(ik=0; ik<idl1; ik++)
624 CH2(ik,0)+=CH2(ik,j);
625
626 for(j=1,jc=ip-1; j<ipph; j++,jc--)
627 for(k=0; k<l1; k++)
628 PM (CH(0,k,jc),CH(0,k,j),C1(0,k,j),C1(0,k,jc))
629
630 if(ido==1)
631 return;
632 for(j=1,jc=ip-1; j<ipph; j++,jc--)
633 for(k=0; k<l1; k++)
634 for(i=2; i<ido; i+=2)
635 {
636 PM (CH(i-1,k,jc),CH(i-1,k,j ),C1(i-1,k,j),C1(i ,k,jc))
637 PM (CH(i ,k,j ),CH(i ,k,jc),C1(i ,k,j),C1(i-1,k,jc))
638 }
639 memcpy(cc,ch,idl1*sizeof(double));
640
641 for(j=1; j<ip; j++)
642 for(k=0; k<l1; k++)
643 {
644 C1(0,k,j)=CH(0,k,j);
645 idij=(j-1)*ido+1;
646 for(i=2; i<ido; i+=2,idij+=2)
647 MULPM (C1(i,k,j),C1(i-1,k,j),wa[idij-1],wa[idij],CH(i,k,j),CH(i-1,k,j))
648 }
649 }
650
651#undef CC
652#undef CH
653#undef PM
654#undef MULPM
655
656
657/*----------------------------------------------------------------------
658 cfftf1, cfftb1, cfftf, cfftb, cffti1, cffti. Complex FFTs.
659 ----------------------------------------------------------------------*/
660
661 static void cfft1(size_t n, cmplx c[], cmplx ch[], const cmplx wa[],
662 const size_t ifac[], int isign)
663 {
664 size_t k1, l1=1, nf=ifac[1], iw=0;
665 cmplx *p1=c, *p2=ch;
666
667 for(k1=0; k1<nf; k1++)
668 {
669 size_t ip=ifac[k1+2];
670 size_t l2=ip*l1;
671 size_t ido = n/l2;
672 if(ip==4)
673 (isign>0) ? passb4(ido, l1, p1, p2, wa+iw)
674 : passf4(ido, l1, p1, p2, wa+iw);
675 else if(ip==2)
676 (isign>0) ? passb2(ido, l1, p1, p2, wa+iw)
677 : passf2(ido, l1, p1, p2, wa+iw);
678 else if(ip==3)
679 (isign>0) ? passb3(ido, l1, p1, p2, wa+iw)
680 : passf3(ido, l1, p1, p2, wa+iw);
681 else if(ip==5)
682 (isign>0) ? passb5(ido, l1, p1, p2, wa+iw)
683 : passf5(ido, l1, p1, p2, wa+iw);
684 else if(ip==6)
685 (isign>0) ? passb6(ido, l1, p1, p2, wa+iw)
686 : passf6(ido, l1, p1, p2, wa+iw);
687 else
688 (isign>0) ? passbg(ido, ip, l1, p1, p2, wa+iw)
689 : passfg(ido, ip, l1, p1, p2, wa+iw);
690 SWAP(p1,p2,cmplx *);
691 l1=l2;
692 iw+=(ip-1)*ido;
693 }
694 if (p1!=c)
695 memcpy (c,p1,n*sizeof(cmplx));
696 }
697
698 void cfftf(size_t n, double c[], double wsave[])
699 {
700 if (n!=1)
701 cfft1(n, (cmplx*)c, (cmplx*)wsave, (cmplx*)(wsave+2*n),
702 (size_t*)(wsave+4*n),-1);
703 }
704
705 void cfftb(size_t n, double c[], double wsave[])
706 {
707 if (n!=1)
708 cfft1(n, (cmplx*)c, (cmplx*)wsave, (cmplx*)(wsave+2*n),
709 (size_t*)(wsave+4*n),+1);
710 }
711
712 static void factorize (size_t n, const size_t *pf, size_t npf, size_t *ifac)
713 {
714 size_t nl=n, nf=0, ntry=0, j=0, i;
715
716 startloop:
717 j++;
718 ntry = (j<=npf) ? pf[j-1] : ntry+2;
719 do
720 {
721 size_t nq=nl / ntry;
722 size_t nr=nl-ntry*nq;
723 if (nr!=0)
724 goto startloop;
725 nf++;
726 ifac[nf+1]=ntry;
727 nl=nq;
728 if ((ntry==2) && (nf!=1))
729 {
730 for (i=nf+1; i>2; --i)
731 ifac[i]=ifac[i-1];
732 ifac[2]=2;
733 }
734 }
735 while(nl!=1);
736 ifac[0]=n;
737 ifac[1]=nf;
738 }
739
740 static void cffti1(size_t n, double wa[], size_t ifac[])
741 {
742 static const size_t ntryh[5]={4,6,3,2,5};
743 static const double twopi=6.28318530717958647692;
744 size_t j, k, fi;
745
746 double argh=twopi/n;
747 size_t i=0, l1=1;
748 factorize (n,ntryh,5,ifac);
749 for(k=1; k<=ifac[1]; k++)
750 {
751 size_t ip=ifac[k+1];
752 size_t ido=n/(l1*ip);
753 for(j=1; j<ip; j++)
754 {
755 size_t is = i;
756 double argld=j*l1*argh;
757 wa[i ]=1;
758 wa[i+1]=0;
759 for(fi=1; fi<=ido; fi++)
760 {
761 double arg=fi*argld;
762 i+=2;
763 wa[i ]=cos(arg);
764 wa[i+1]=sin(arg);
765 }
766 if(ip>6)
767 {
768 wa[is ]=wa[i ];
769 wa[is+1]=wa[i+1];
770 }
771 }
772 l1*=ip;
773 }
774 }
775
776 void cffti(size_t n, double wsave[])
777 { if (n!=1) cffti1(n, wsave+2*n,(size_t*)(wsave+4*n)); }
778
779
780/*----------------------------------------------------------------------
781 rfftf1, rfftb1, rfftf, rfftb, rffti1, rffti. Real FFTs.
782 ----------------------------------------------------------------------*/
783
784 static void rfftf1(
785 size_t n,
786 double c[],
787 double ch[],
788 const double wa[],
789 const size_t ifac[]
790 ) {
791 size_t k1, l1=n, nf=ifac[1], iw=n-1;
792 double *p1=ch, *p2=c;
793
794 for(k1=1; k1<=nf;++k1)
795 {
796 size_t ip=ifac[nf-k1+2];
797 size_t ido=n / l1;
798 l1 /= ip;
799 iw-=(ip-1)*ido;
800 SWAP (p1,p2,double *);
801 if(ip==4)
802 radf4(ido, l1, p1, p2, wa+iw);
803 else if(ip==2)
804 radf2(ido, l1, p1, p2, wa+iw);
805 else if(ip==3)
806 radf3(ido, l1, p1, p2, wa+iw);
807 else if(ip==5)
808 radf5(ido, l1, p1, p2, wa+iw);
809 else
810 {
811 if (ido==1)
812 SWAP (p1,p2,double *);
813 radfg(ido, ip, l1, ido*l1, p1, p2, wa+iw);
814 SWAP (p1,p2,double *);
815 }
816 }
817 if (p1==c)
818 memcpy (c,ch,n*sizeof(double));
819 }
820
821 static void rfftb1(size_t n, double c[], double ch[], const double wa[],
822 const size_t ifac[])
823 {
824 size_t k1, l1=1, nf=ifac[1], iw=0;
825 double *p1=c, *p2=ch;
826
827 for(k1=1; k1<=nf; k1++)
828 {
829 size_t ip = ifac[k1+1],
830 ido= n/(ip*l1);
831 if(ip==4)
832 radb4(ido, l1, p1, p2, wa+iw);
833 else if(ip==2)
834 radb2(ido, l1, p1, p2, wa+iw);
835 else if(ip==3)
836 radb3(ido, l1, p1, p2, wa+iw);
837 else if(ip==5)
838 radb5(ido, l1, p1, p2, wa+iw);
839 else
840 {
841 radbg(ido, ip, l1, ido*l1, p1, p2, wa+iw);
842 if (ido!=1)
843 SWAP (p1,p2,double *);
844 }
845 SWAP (p1,p2,double *);
846 l1*=ip;
847 iw+=(ip-1)*ido;
848 }
849 if (p1!=c)
850 memcpy (c,ch,n*sizeof(double));
851 }
852
853 void rfftf(size_t n, double r[], double wsave[])
854 { if(n!=1) rfftf1(n, r, wsave, wsave+n,(size_t*)(wsave+2*n)); }
855
856 void rfftb(size_t n, double r[], double wsave[])
857 { if(n!=1) rfftb1(n, r, wsave, wsave+n,(size_t*)(wsave+2*n)); }
858
859 static void rffti1(size_t n, double wa[], size_t ifac[])
860 {
861 static const size_t ntryh[4]={4,2,3,5};
862 static const double twopi=6.28318530717958647692;
863 size_t i, j, k, fi;
864
865 double argh=twopi/n;
866 size_t is=0, l1=1;
867 factorize (n,ntryh,4,ifac);
868 for (k=1; k<ifac[1]; k++)
869 {
870 size_t ip=ifac[k+1],
871 ido=n/(l1*ip);
872 for (j=1; j<ip; ++j)
873 {
874 double argld=j*l1*argh;
875 for(i=is,fi=1; i<=ido+is-3; i+=2,++fi)
876 {
877 double arg=fi*argld;
878 wa[i ]=cos(arg);
879 wa[i+1]=sin(arg);
880 }
881 is+=ido;
882 }
883 l1*=ip;
884 }
885 }
886
887 void rffti(size_t n, double wsave[])
888 {
889 if (n!=1) {
890 rffti1(n, wsave+n,(size_t*)(wsave+2*n));
891 }
892 }
893
894 const double pi = 3.14159265358979;
895 void sinti (size_t n, double wsave[])
896 {
897 if (n <= 1) return;
898 size_t ns2 = n / 2;
899 double dt = pi / double (n+1);
900 for (size_t k = 0; k < ns2; k++) {
901 wsave[k] = 2.0 * sin ((k+1) * dt);
902 }
903 rffti (n+1, wsave+ns2);
904 }
905
906 /*
907 with n = 20
908 +0 -> was wsave[0..9]
909 +10 -> xh wsave[10..30]
910 +31 -> x wsave[31..51]
911 +52 -> xxifac wsave[52..]
912 */
913
914 void sint1 (size_t n, double war[], double was[], double xh[], double x[], double xxifac[])
915 {
916 double sqrt3 = 1.73205080756888;
917
918 for (size_t i = 0; i < n; i++) {
919 xh[i] = war[i];
920 war[i] = x[i];
921 }
922
923 if (n < 2) {
924 xh[0] *= 2;
925 } else if (n == 2) {
926 double xhold = sqrt3*(xh[0]+xh[1]);
927 xh[1] = sqrt3*(xh[0]-xh[1]);
928 xh[0] = xhold;
929 } else {
930 size_t np1 = n + 1;
931 size_t ns2 = n / 2;
932 x[0] = 0.0;
933 for (size_t k = 0; k < ns2; k++) {
934 size_t kc = np1 - k;
935 double t1 = xh[k] - xh[kc];
936 double t2 = was[k] * (xh[k] + xh[kc]);
937 x[k+1] = t1 + t2;
938 x[kc+1] = t2 - t1;
939 }
940 size_t modn = n % 2;
941 if (modn != 0) {
942 x[ns2+2] = 4.0 * xh[ns2+1];
943 }
944 rfftf1 (np1, x, xh, war, (size_t*)xxifac);
945 xh[0] *= 0.5;
946 for (size_t i = 2; i < n; i += 2) {
947 xh[i-1] = -x[i];
948 xh[i] = xh[i-2] + x[i-1];
949 }
950 if (modn == 0) {
951 xh[n-1] = -x[n];
952 }
953 }
954 for (size_t i = 0; i < n; i++) {
955 x[i] = war[i];
956 war[i] = xh[i];
957 }
958 }
959
960 void sint (size_t n, double x[], double wsave[])
961 {
962 sint1 (
963 n,
964 x,
965 wsave,
966 wsave + n/2,
967 wsave + n/2 + n + 1,
968 wsave + n/2 + 2*n + 2
969 );
970 }
971
972#ifdef __cplusplus
973}
974#endif
const int nr
Definition: ClassicRandom.h:24
Tps< T > cos(const Tps< T > &x)
Cosine.
Definition: TpsMath.h:129
Tps< T > sin(const Tps< T > &x)
Sine.
Definition: TpsMath.h:111
#define PM(a, b, c, d)
Definition: fftpack.cpp:96
#define WA(x, i)
Definition: fftpack.cpp:93
#define C1(a, b, c)
Definition: fftpack.cpp:275
void sinti(size_t n, double wsave[])
Definition: fftpack.cpp:895
const double pi
Definition: fftpack.cpp:894
#define RALLOC(type, num)
Definition: fftpack.cpp:71
#define SWAP(a, b, type)
Definition: fftpack.cpp:83
#define CH(a, b, c)
Definition: fftpack.cpp:388
#define DEALLOC(ptr)
Definition: fftpack.cpp:74
void cffti(size_t n, double wsave[])
Definition: fftpack.cpp:776
void cfftf(size_t n, double c[], double wsave[])
Definition: fftpack.cpp:698
void sint(size_t n, double x[], double wsave[])
Definition: fftpack.cpp:960
void sint1(size_t n, double war[], double was[], double xh[], double x[], double xxifac[])
Definition: fftpack.cpp:914
#define CH2(a, b)
Definition: fftpack.cpp:277
void util_free_(void *ptr)
Definition: fftpack.cpp:77
void rffti(size_t n, double wsave[])
Definition: fftpack.cpp:887
void * util_malloc_(size_t sz)
Definition: fftpack.cpp:55
#define C2(a, b)
Definition: fftpack.cpp:276
void rfftf(size_t n, double r[], double wsave[])
Definition: fftpack.cpp:853
void cfftb(size_t n, double c[], double wsave[])
Definition: fftpack.cpp:705
#define MULPM(a, b, c, d, e, f)
Definition: fftpack.cpp:102
void rfftb(size_t n, double r[], double wsave[])
Definition: fftpack.cpp:856
#define CC(a, b, c)
Definition: fftpack.cpp:389
arg(a))
constexpr double c
The velocity of light in m/s.
Definition: Physics.h:45
double i
Definition: fftpack.cpp:88