OPAL (Object Oriented Parallel Accelerator Library)  2.2.0
OPAL
fftpack.cpp
Go to the documentation of this file.
1 //
2 // IPPL FFT
3 //
4 // Copyright (c) 2008-2018
5 // Paul Scherrer Institut, Villigen PSI, Switzerland
6 // All rights reserved.
7 //
8 // OPAL is licensed under GNU GPL version 3.
9 //
10 
11 /*
12  * This file is part of libfftpack.
13  *
14  * libfftpack is free software; you can redistribute it and/or modify
15  * it under the terms of the GNU General Public License as published by
16  * the Free Software Foundation; either version 2 of the License, or
17  * (at your option) any later version.
18  *
19  * libfftpack is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22  * GNU General Public License for more details.
23  *
24  * You should have received a copy of the GNU General Public License
25  * along with libfftpack; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
27  */
28 
29 /*
30  * libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
31  * and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
32  * (DLR).
33  */
34 
35 /*
36  fftpack.c : A set of FFT routines in C.
37  Algorithmically based on Fortran-77 FFTPACK by Paul N. Swarztrauber
38  (Version 4, 1985).
39 
40  C port by Martin Reinecke (2010)
41 */
42 
43 #include <stdio.h>
44 #include <math.h>
45 #include <stdlib.h>
46 #include <string.h>
47 
48 #ifdef __cplusplus
49 extern "C" {
50 #endif
51 
52 /******************************************************************************
53  some helper functions and macros formerly in c_utils.c and c_utils.h
54 */
55  void *util_malloc_ (size_t sz) {
56  void *res;
57  if (sz==0) return NULL;
58  res = malloc(sz);
59  if (!(res)) {
60  fprintf (stderr, "%s, %i (%s):\n%s\n",
61  __FILE__, __LINE__, __func__,
62  "malloc() failed");
63  exit (1);
64  }
65  return res;
66  }
67 
68 /* #define ALLOC(ptr,type,num) \ */
69 /* do { (ptr)=(type *)util_malloc_((num)*sizeof(type)); } while (0) */
70 
71 #define RALLOC(type,num) \
72  ((type *)util_malloc_((num)*sizeof(type)))
73 
74 #define DEALLOC(ptr) \
75  { if ((ptr) != 0) free (ptr); (ptr) = NULL; }
76 
77  void util_free_ (void *ptr) {
78  if ((ptr) != NULL) {
79  free(ptr);
80  }
81  }
82 
83 #define SWAP(a,b,type) \
84  do { type tmp_=(a); (a)=(b); (b)=tmp_; } while(0)
85 /******************************************************************************/
86 
87  typedef struct {
88  double r,i;
89  } cmplx;
90 
91 #include "fftpack.h"
92 
93 #define WA(x,i) wa[(i)+(x)*ido]
94 #define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
95 #define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
96 #define PM(a,b,c,d) { a=c+d; b=c-d; }
97 #define PMC(a,b,c,d) { a.r=c.r+d.r; a.i=c.i+d.i; b.r=c.r-d.r; b.i=c.i-d.i; }
98 #define ADDC(a,b,c) { a.r=b.r+c.r; a.i=b.i+c.i; }
99 #define SCALEC(a,b) { a.r*=b; a.i*=b; }
100 #define CONJFLIPC(a) { double tmp_=a.r; a.r=-a.i; a.i=tmp_; }
101 /* (a+ib) = conj(c+id) * (e+if) */
102 #define MULPM(a,b,c,d,e,f) { a=c*e+d*f; b=c*f-d*e; }
103 
104 #define CONCAT(a,b) a ## b
105 
106 #define X(arg) CONCAT(passb,arg)
107 #define BACKWARD
108 #include "FFT/fftpack_inc.c"
109 #undef BACKWARD
110 #undef X
111 
112 #define X(arg) CONCAT(passf,arg)
113 #include "fftpack_inc.c"
114 #undef X
115 
116 #undef CC
117 #undef CH
118 #define CC(a,b,c) cc[(a)+ido*((b)+l1*(c))]
119 #define CH(a,b,c) ch[(a)+ido*((b)+cdim*(c))]
120 
121  static void radf2 (size_t ido, size_t l1, const double *cc, double *ch,
122  const double *wa)
123  {
124  const size_t cdim=2;
125  size_t i, k, ic;
126  double ti2, tr2;
127 
128  for (k=0; k<l1; k++)
129  PM (CH(0,0,k),CH(ido-1,1,k),CC(0,k,0),CC(0,k,1))
130  if ((ido&1)==0)
131  for (k=0; k<l1; k++)
132  {
133  CH( 0,1,k) = -CC(ido-1,k,1);
134  CH(ido-1,0,k) = CC(ido-1,k,0);
135  }
136  if (ido<=2) return;
137  for (k=0; k<l1; k++)
138  for (i=2; i<ido; i+=2)
139  {
140  ic=ido-i;
141  MULPM (tr2,ti2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
142  PM (CH(i-1,0,k),CH(ic-1,1,k),CC(i-1,k,0),tr2)
143  PM (CH(i ,0,k),CH(ic ,1,k),ti2,CC(i ,k,0))
144  }
145  }
146 
147  static void radf3(size_t ido, size_t l1, const double *cc, double *ch,
148  const double *wa)
149  {
150  const size_t cdim=3;
151  static const double taur=-0.5, taui=0.86602540378443864676;
152  size_t i, k, ic;
153  double ci2, di2, di3, cr2, dr2, dr3, ti2, ti3, tr2, tr3;
154 
155  for (k=0; k<l1; k++)
156  {
157  cr2=CC(0,k,1)+CC(0,k,2);
158  CH(0,0,k) = CC(0,k,0)+cr2;
159  CH(0,2,k) = taui*(CC(0,k,2)-CC(0,k,1));
160  CH(ido-1,1,k) = CC(0,k,0)+taur*cr2;
161  }
162  if (ido==1) return;
163  for (k=0; k<l1; k++)
164  for (i=2; i<ido; i+=2)
165  {
166  ic=ido-i;
167  MULPM (dr2,di2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
168  MULPM (dr3,di3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
169  cr2=dr2+dr3;
170  ci2=di2+di3;
171  CH(i-1,0,k) = CC(i-1,k,0)+cr2;
172  CH(i ,0,k) = CC(i ,k,0)+ci2;
173  tr2 = CC(i-1,k,0)+taur*cr2;
174  ti2 = CC(i ,k,0)+taur*ci2;
175  tr3 = taui*(di2-di3);
176  ti3 = taui*(dr3-dr2);
177  PM(CH(i-1,2,k),CH(ic-1,1,k),tr2,tr3)
178  PM(CH(i ,2,k),CH(ic ,1,k),ti3,ti2)
179  }
180  }
181 
182  static void radf4(size_t ido, size_t l1, const double *cc, double *ch,
183  const double *wa)
184  {
185  const size_t cdim=4;
186  static const double hsqt2=0.70710678118654752440;
187  size_t i, k, ic;
188  double ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
189 
190  for (k=0; k<l1; k++)
191  {
192  PM (tr1,CH(0,2,k),CC(0,k,3),CC(0,k,1))
193  PM (tr2,CH(ido-1,1,k),CC(0,k,0),CC(0,k,2))
194  PM (CH(0,0,k),CH(ido-1,3,k),tr2,tr1)
195  }
196  if ((ido&1)==0)
197  for (k=0; k<l1; k++)
198  {
199  ti1=-hsqt2*(CC(ido-1,k,1)+CC(ido-1,k,3));
200  tr1= hsqt2*(CC(ido-1,k,1)-CC(ido-1,k,3));
201  PM (CH(ido-1,0,k),CH(ido-1,2,k),CC(ido-1,k,0),tr1)
202  PM (CH( 0,3,k),CH( 0,1,k),ti1,CC(ido-1,k,2))
203  }
204  if (ido<=2) return;
205  for (k=0; k<l1; k++)
206  for (i=2; i<ido; i+=2)
207  {
208  ic=ido-i;
209  MULPM(cr2,ci2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
210  MULPM(cr3,ci3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
211  MULPM(cr4,ci4,WA(2,i-2),WA(2,i-1),CC(i-1,k,3),CC(i,k,3))
212  PM(tr1,tr4,cr4,cr2)
213  PM(ti1,ti4,ci2,ci4)
214  PM(tr2,tr3,CC(i-1,k,0),cr3)
215  PM(ti2,ti3,CC(i ,k,0),ci3)
216  PM(CH(i-1,0,k),CH(ic-1,3,k),tr2,tr1)
217  PM(CH(i ,0,k),CH(ic ,3,k),ti1,ti2)
218  PM(CH(i-1,2,k),CH(ic-1,1,k),tr3,ti4)
219  PM(CH(i ,2,k),CH(ic ,1,k),tr4,ti3)
220  }
221  }
222 
223  static void radf5(size_t ido, size_t l1, const double *cc, double *ch,
224  const double *wa)
225  {
226  const size_t cdim=5;
227  static const double tr11= 0.3090169943749474241, ti11=0.95105651629515357212,
228  tr12=-0.8090169943749474241, ti12=0.58778525229247312917;
229  size_t i, k, ic;
230  double ci2, di2, ci4, ci5, di3, di4, di5, ci3, cr2, cr3, dr2, dr3,
231  dr4, dr5, cr5, cr4, ti2, ti3, ti5, ti4, tr2, tr3, tr4, tr5;
232 
233  for (k=0; k<l1; k++)
234  {
235  PM (cr2,ci5,CC(0,k,4),CC(0,k,1))
236  PM (cr3,ci4,CC(0,k,3),CC(0,k,2))
237  CH(0,0,k)=CC(0,k,0)+cr2+cr3;
238  CH(ido-1,1,k)=CC(0,k,0)+tr11*cr2+tr12*cr3;
239  CH(0,2,k)=ti11*ci5+ti12*ci4;
240  CH(ido-1,3,k)=CC(0,k,0)+tr12*cr2+tr11*cr3;
241  CH(0,4,k)=ti12*ci5-ti11*ci4;
242  }
243  if (ido==1) return;
244  for (k=0; k<l1;++k)
245  for (i=2; i<ido; i+=2)
246  {
247  ic=ido-i;
248  MULPM (dr2,di2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
249  MULPM (dr3,di3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
250  MULPM (dr4,di4,WA(2,i-2),WA(2,i-1),CC(i-1,k,3),CC(i,k,3))
251  MULPM (dr5,di5,WA(3,i-2),WA(3,i-1),CC(i-1,k,4),CC(i,k,4))
252  PM(cr2,ci5,dr5,dr2)
253  PM(ci2,cr5,di2,di5)
254  PM(cr3,ci4,dr4,dr3)
255  PM(ci3,cr4,di3,di4)
256  CH(i-1,0,k)=CC(i-1,k,0)+cr2+cr3;
257  CH(i ,0,k)=CC(i ,k,0)+ci2+ci3;
258  tr2=CC(i-1,k,0)+tr11*cr2+tr12*cr3;
259  ti2=CC(i ,k,0)+tr11*ci2+tr12*ci3;
260  tr3=CC(i-1,k,0)+tr12*cr2+tr11*cr3;
261  ti3=CC(i ,k,0)+tr12*ci2+tr11*ci3;
262  MULPM(tr5,tr4,cr5,cr4,ti11,ti12)
263  MULPM(ti5,ti4,ci5,ci4,ti11,ti12)
264  PM(CH(i-1,2,k),CH(ic-1,1,k),tr2,tr5)
265  PM(CH(i ,2,k),CH(ic ,1,k),ti5,ti2)
266  PM(CH(i-1,4,k),CH(ic-1,3,k),tr3,tr4)
267  PM(CH(i ,4,k),CH(ic ,3,k),ti4,ti3)
268  }
269  }
270 
271 #undef CH
272 #undef CC
273 #define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
274 #define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
275 #define C1(a,b,c) cc[(a)+ido*((b)+l1*(c))]
276 #define C2(a,b) cc[(a)+idl1*(b)]
277 #define CH2(a,b) ch[(a)+idl1*(b)]
278  static void radfg(size_t ido, size_t ip, size_t l1, size_t idl1,
279  double *cc, double *ch, const double *wa)
280  {
281  const size_t cdim=ip;
282  static const double twopi=6.28318530717958647692;
283  size_t idij, ipph, i, j, k, l, j2, ic, jc, lc, ik;
284  double ai1, ai2, ar1, ar2, arg;
285  double *csarr;
286  size_t aidx;
287 
288  ipph=(ip+1)/ 2;
289  if(ido!=1)
290  {
291  memcpy(ch,cc,idl1*sizeof(double));
292 
293  for(j=1; j<ip; j++)
294  for(k=0; k<l1; k++)
295  {
296  CH(0,k,j)=C1(0,k,j);
297  idij=(j-1)*ido+1;
298  for(i=2; i<ido; i+=2,idij+=2)
299  MULPM(CH(i-1,k,j),CH(i,k,j),wa[idij-1],wa[idij],C1(i-1,k,j),C1(i,k,j))
300  }
301 
302  for(j=1,jc=ip-1; j<ipph; j++,jc--)
303  for(k=0; k<l1; k++)
304  for(i=2; i<ido; i+=2)
305  {
306  PM(C1(i-1,k,j),C1(i ,k,jc),CH(i-1,k,jc),CH(i-1,k,j ))
307  PM(C1(i ,k,j),C1(i-1,k,jc),CH(i ,k,j ),CH(i ,k,jc))
308  }
309  }
310  else
311  memcpy(cc,ch,idl1*sizeof(double));
312 
313  for(j=1,jc=ip-1; j<ipph; j++,jc--)
314  for(k=0; k<l1; k++)
315  PM(C1(0,k,j),C1(0,k,jc),CH(0,k,jc),CH(0,k,j))
316 
317  csarr=RALLOC(double,2*ip);
318  arg=twopi / ip;
319  csarr[0]=1.;
320  csarr[1]=0.;
321  csarr[2]=csarr[2*ip-2]=cos(arg);
322  csarr[3]=sin(arg); csarr[2*ip-1]=-csarr[3];
323  for (i=2; i<=ip/2; ++i)
324  {
325  csarr[2*i]=csarr[2*ip-2*i]=cos(i*arg);
326  csarr[2*i+1]=sin(i*arg);
327  csarr[2*ip-2*i+1]=-csarr[2*i+1];
328  }
329  for(l=1,lc=ip-1; l<ipph; l++,lc--)
330  {
331  ar1=csarr[2*l];
332  ai1=csarr[2*l+1];
333  for(ik=0; ik<idl1; ik++)
334  {
335  CH2(ik,l)=C2(ik,0)+ar1*C2(ik,1);
336  CH2(ik,lc)=ai1*C2(ik,ip-1);
337  }
338  aidx=2*l;
339  for(j=2,jc=ip-2; j<ipph; j++,jc--)
340  {
341  aidx+=2*l;
342  if (aidx>=2*ip) aidx-=2*ip;
343  ar2=csarr[aidx];
344  ai2=csarr[aidx+1];
345  for(ik=0; ik<idl1; ik++)
346  {
347  CH2(ik,l )+=ar2*C2(ik,j );
348  CH2(ik,lc)+=ai2*C2(ik,jc);
349  }
350  }
351  }
352  DEALLOC(csarr);
353 
354  for(j=1; j<ipph; j++)
355  for(ik=0; ik<idl1; ik++)
356  CH2(ik,0)+=C2(ik,j);
357 
358  for(k=0; k<l1; k++)
359  memcpy(&CC(0,0,k),&CH(0,k,0),ido*sizeof(double));
360  for(j=1; j<ipph; j++)
361  {
362  jc=ip-j;
363  j2=2*j;
364  for(k=0; k<l1; k++)
365  {
366  CC(ido-1,j2-1,k) = CH(0,k,j );
367  CC(0 ,j2 ,k) = CH(0,k,jc);
368  }
369  }
370  if(ido==1) return;
371 
372  for(j=1; j<ipph; j++)
373  {
374  jc=ip-j;
375  j2=2*j;
376  for(k=0; k<l1; k++)
377  for(i=2; i<ido; i+=2)
378  {
379  ic=ido-i;
380  PM (CC(i-1,j2,k),CC(ic-1,j2-1,k),CH(i-1,k,j ),CH(i-1,k,jc))
381  PM (CC(i ,j2,k),CC(ic ,j2-1,k),CH(i ,k,jc),CH(i ,k,j ))
382  }
383  }
384  }
385 
386 #undef CC
387 #undef CH
388 #define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
389 #define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
390 
391  static void radb2(size_t ido, size_t l1, const double *cc, double *ch,
392  const double *wa)
393  {
394  const size_t cdim=2;
395  size_t i, k, ic;
396  double ti2, tr2;
397 
398  for (k=0; k<l1; k++)
399  PM (CH(0,k,0),CH(0,k,1),CC(0,0,k),CC(ido-1,1,k))
400  if ((ido&1)==0)
401  for (k=0; k<l1; k++)
402  {
403  CH(ido-1,k,0) = 2*CC(ido-1,0,k);
404  CH(ido-1,k,1) = -2*CC(0 ,1,k);
405  }
406  if (ido<=2) return;
407  for (k=0; k<l1;++k)
408  for (i=2; i<ido; i+=2)
409  {
410  ic=ido-i;
411  PM (CH(i-1,k,0),tr2,CC(i-1,0,k),CC(ic-1,1,k))
412  PM (ti2,CH(i ,k,0),CC(i ,0,k),CC(ic ,1,k))
413  MULPM (CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),ti2,tr2)
414  }
415  }
416 
417  static void radb3(size_t ido, size_t l1, const double *cc, double *ch,
418  const double *wa)
419  {
420  const size_t cdim=3;
421  static const double taur=-0.5, taui=0.86602540378443864676;
422  size_t i, k, ic;
423  double ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2;
424 
425  for (k=0; k<l1; k++)
426  {
427  tr2=2*CC(ido-1,1,k);
428  cr2=CC(0,0,k)+taur*tr2;
429  CH(0,k,0)=CC(0,0,k)+tr2;
430  ci3=2*taui*CC(0,2,k);
431  PM (CH(0,k,2),CH(0,k,1),cr2,ci3);
432  }
433  if (ido==1) return;
434  for (k=0; k<l1; k++)
435  for (i=2; i<ido; i+=2)
436  {
437  ic=ido-i;
438  tr2=CC(i-1,2,k)+CC(ic-1,1,k);
439  ti2=CC(i ,2,k)-CC(ic ,1,k);
440  cr2=CC(i-1,0,k)+taur*tr2;
441  ci2=CC(i ,0,k)+taur*ti2;
442  CH(i-1,k,0)=CC(i-1,0,k)+tr2;
443  CH(i ,k,0)=CC(i ,0,k)+ti2;
444  cr3=taui*(CC(i-1,2,k)-CC(ic-1,1,k));
445  ci3=taui*(CC(i ,2,k)+CC(ic ,1,k));
446  PM(dr3,dr2,cr2,ci3)
447  PM(di2,di3,ci2,cr3)
448  MULPM(CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),di2,dr2)
449  MULPM(CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),di3,dr3)
450  }
451  }
452 
453  static void radb4(size_t ido, size_t l1, const double *cc, double *ch,
454  const double *wa)
455  {
456  const size_t cdim=4;
457  static const double sqrt2=1.41421356237309504880;
458  size_t i, k, ic;
459  double ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
460 
461  for (k=0; k<l1; k++)
462  {
463  PM (tr2,tr1,CC(0,0,k),CC(ido-1,3,k))
464  tr3=2*CC(ido-1,1,k);
465  tr4=2*CC(0,2,k);
466  PM (CH(0,k,0),CH(0,k,2),tr2,tr3)
467  PM (CH(0,k,3),CH(0,k,1),tr1,tr4)
468  }
469  if ((ido&1)==0)
470  for (k=0; k<l1; k++)
471  {
472  PM (ti1,ti2,CC(0 ,3,k),CC(0 ,1,k))
473  PM (tr2,tr1,CC(ido-1,0,k),CC(ido-1,2,k))
474  CH(ido-1,k,0)=tr2+tr2;
475  CH(ido-1,k,1)=sqrt2*(tr1-ti1);
476  CH(ido-1,k,2)=ti2+ti2;
477  CH(ido-1,k,3)=-sqrt2*(tr1+ti1);
478  }
479  if (ido<=2) return;
480  for (k=0; k<l1;++k)
481  for (i=2; i<ido; i+=2)
482  {
483  ic=ido-i;
484  PM (tr2,tr1,CC(i-1,0,k),CC(ic-1,3,k))
485  PM (ti1,ti2,CC(i ,0,k),CC(ic ,3,k))
486  PM (tr4,ti3,CC(i ,2,k),CC(ic ,1,k))
487  PM (tr3,ti4,CC(i-1,2,k),CC(ic-1,1,k))
488  PM (CH(i-1,k,0),cr3,tr2,tr3)
489  PM (CH(i ,k,0),ci3,ti2,ti3)
490  PM (cr4,cr2,tr1,tr4)
491  PM (ci2,ci4,ti1,ti4)
492  MULPM (CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),ci2,cr2)
493  MULPM (CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),ci3,cr3)
494  MULPM (CH(i,k,3),CH(i-1,k,3),WA(2,i-2),WA(2,i-1),ci4,cr4)
495  }
496  }
497 
498  static void radb5(size_t ido, size_t l1, const double *cc, double *ch,
499  const double *wa)
500  {
501  const size_t cdim=5;
502  static const double tr11= 0.3090169943749474241, ti11=0.95105651629515357212,
503  tr12=-0.8090169943749474241, ti12=0.58778525229247312917;
504  size_t i, k, ic;
505  double ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4,
506  ti2, ti3, ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5;
507 
508  for (k=0; k<l1; k++)
509  {
510  ti5=2*CC(0,2,k);
511  ti4=2*CC(0,4,k);
512  tr2=2*CC(ido-1,1,k);
513  tr3=2*CC(ido-1,3,k);
514  CH(0,k,0)=CC(0,0,k)+tr2+tr3;
515  cr2=CC(0,0,k)+tr11*tr2+tr12*tr3;
516  cr3=CC(0,0,k)+tr12*tr2+tr11*tr3;
517  MULPM(ci5,ci4,ti5,ti4,ti11,ti12)
518  PM(CH(0,k,4),CH(0,k,1),cr2,ci5)
519  PM(CH(0,k,3),CH(0,k,2),cr3,ci4)
520  }
521  if (ido==1) return;
522  for (k=0; k<l1;++k)
523  for (i=2; i<ido; i+=2)
524  {
525  ic=ido-i;
526  PM(tr2,tr5,CC(i-1,2,k),CC(ic-1,1,k))
527  PM(ti5,ti2,CC(i ,2,k),CC(ic ,1,k))
528  PM(tr3,tr4,CC(i-1,4,k),CC(ic-1,3,k))
529  PM(ti4,ti3,CC(i ,4,k),CC(ic ,3,k))
530  CH(i-1,k,0)=CC(i-1,0,k)+tr2+tr3;
531  CH(i ,k,0)=CC(i ,0,k)+ti2+ti3;
532  cr2=CC(i-1,0,k)+tr11*tr2+tr12*tr3;
533  ci2=CC(i ,0,k)+tr11*ti2+tr12*ti3;
534  cr3=CC(i-1,0,k)+tr12*tr2+tr11*tr3;
535  ci3=CC(i ,0,k)+tr12*ti2+tr11*ti3;
536  MULPM(cr5,cr4,tr5,tr4,ti11,ti12)
537  MULPM(ci5,ci4,ti5,ti4,ti11,ti12)
538  PM(dr4,dr3,cr3,ci4)
539  PM(di3,di4,ci3,cr4)
540  PM(dr5,dr2,cr2,ci5)
541  PM(di2,di5,ci2,cr5)
542  MULPM(CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),di2,dr2)
543  MULPM(CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),di3,dr3)
544  MULPM(CH(i,k,3),CH(i-1,k,3),WA(2,i-2),WA(2,i-1),di4,dr4)
545  MULPM(CH(i,k,4),CH(i-1,k,4),WA(3,i-2),WA(3,i-1),di5,dr5)
546  }
547  }
548 
549  static void radbg(size_t ido, size_t ip, size_t l1, size_t idl1,
550  double *cc, double *ch, const double *wa)
551  {
552  const size_t cdim=ip;
553  static const double twopi=6.28318530717958647692;
554  size_t idij, ipph, i, j, k, l, j2, ic, jc, lc, ik;
555  double ai1, ai2, ar1, ar2, arg;
556  double *csarr;
557  size_t aidx;
558 
559  ipph=(ip+1)/ 2;
560  for(k=0; k<l1; k++)
561  memcpy(&CH(0,k,0),&CC(0,0,k),ido*sizeof(double));
562  for(j=1; j<ipph; j++)
563  {
564  jc=ip-j;
565  j2=2*j;
566  for(k=0; k<l1; k++)
567  {
568  CH(0,k,j )=2*CC(ido-1,j2-1,k);
569  CH(0,k,jc)=2*CC(0 ,j2 ,k);
570  }
571  }
572 
573  if(ido!=1)
574  for(j=1,jc=ip-1; j<ipph; j++,jc--)
575  for(k=0; k<l1; k++)
576  for(i=2; i<ido; i+=2)
577  {
578  ic=ido-i;
579  PM (CH(i-1,k,j ),CH(i-1,k,jc),CC(i-1,2*j,k),CC(ic-1,2*j-1,k))
580  PM (CH(i ,k,jc),CH(i ,k,j ),CC(i ,2*j,k),CC(ic ,2*j-1,k))
581  }
582 
583  csarr=RALLOC(double,2*ip);
584  arg=twopi/ip;
585  csarr[0]=1.;
586  csarr[1]=0.;
587  csarr[2]=csarr[2*ip-2]=cos(arg);
588  csarr[3]=sin(arg); csarr[2*ip-1]=-csarr[3];
589  for (i=2; i<=ip/2; ++i)
590  {
591  csarr[2*i]=csarr[2*ip-2*i]=cos(i*arg);
592  csarr[2*i+1]=sin(i*arg);
593  csarr[2*ip-2*i+1]=-csarr[2*i+1];
594  }
595  for(l=1; l<ipph; l++)
596  {
597  lc=ip-l;
598  ar1=csarr[2*l];
599  ai1=csarr[2*l+1];
600  for(ik=0; ik<idl1; ik++)
601  {
602  C2(ik,l)=CH2(ik,0)+ar1*CH2(ik,1);
603  C2(ik,lc)=ai1*CH2(ik,ip-1);
604  }
605  aidx=2*l;
606  for(j=2; j<ipph; j++)
607  {
608  jc=ip-j;
609  aidx+=2*l;
610  if (aidx>=2*ip) aidx-=2*ip;
611  ar2=csarr[aidx];
612  ai2=csarr[aidx+1];
613  for(ik=0; ik<idl1; ik++)
614  {
615  C2(ik,l )+=ar2*CH2(ik,j );
616  C2(ik,lc)+=ai2*CH2(ik,jc);
617  }
618  }
619  }
620  DEALLOC(csarr);
621 
622  for(j=1; j<ipph; j++)
623  for(ik=0; ik<idl1; ik++)
624  CH2(ik,0)+=CH2(ik,j);
625 
626  for(j=1,jc=ip-1; j<ipph; j++,jc--)
627  for(k=0; k<l1; k++)
628  PM (CH(0,k,jc),CH(0,k,j),C1(0,k,j),C1(0,k,jc))
629 
630  if(ido==1)
631  return;
632  for(j=1,jc=ip-1; j<ipph; j++,jc--)
633  for(k=0; k<l1; k++)
634  for(i=2; i<ido; i+=2)
635  {
636  PM (CH(i-1,k,jc),CH(i-1,k,j ),C1(i-1,k,j),C1(i ,k,jc))
637  PM (CH(i ,k,j ),CH(i ,k,jc),C1(i ,k,j),C1(i-1,k,jc))
638  }
639  memcpy(cc,ch,idl1*sizeof(double));
640 
641  for(j=1; j<ip; j++)
642  for(k=0; k<l1; k++)
643  {
644  C1(0,k,j)=CH(0,k,j);
645  idij=(j-1)*ido+1;
646  for(i=2; i<ido; i+=2,idij+=2)
647  MULPM (C1(i,k,j),C1(i-1,k,j),wa[idij-1],wa[idij],CH(i,k,j),CH(i-1,k,j))
648  }
649  }
650 
651 #undef CC
652 #undef CH
653 #undef PM
654 #undef MULPM
655 
656 
657 /*----------------------------------------------------------------------
658  cfftf1, cfftb1, cfftf, cfftb, cffti1, cffti. Complex FFTs.
659  ----------------------------------------------------------------------*/
660 
661  static void cfft1(size_t n, cmplx c[], cmplx ch[], const cmplx wa[],
662  const size_t ifac[], int isign)
663  {
664  size_t k1, l1=1, nf=ifac[1], iw=0;
665  cmplx *p1=c, *p2=ch;
666 
667  for(k1=0; k1<nf; k1++)
668  {
669  size_t ip=ifac[k1+2];
670  size_t l2=ip*l1;
671  size_t ido = n/l2;
672  if(ip==4)
673  (isign>0) ? passb4(ido, l1, p1, p2, wa+iw)
674  : passf4(ido, l1, p1, p2, wa+iw);
675  else if(ip==2)
676  (isign>0) ? passb2(ido, l1, p1, p2, wa+iw)
677  : passf2(ido, l1, p1, p2, wa+iw);
678  else if(ip==3)
679  (isign>0) ? passb3(ido, l1, p1, p2, wa+iw)
680  : passf3(ido, l1, p1, p2, wa+iw);
681  else if(ip==5)
682  (isign>0) ? passb5(ido, l1, p1, p2, wa+iw)
683  : passf5(ido, l1, p1, p2, wa+iw);
684  else if(ip==6)
685  (isign>0) ? passb6(ido, l1, p1, p2, wa+iw)
686  : passf6(ido, l1, p1, p2, wa+iw);
687  else
688  (isign>0) ? passbg(ido, ip, l1, p1, p2, wa+iw)
689  : passfg(ido, ip, l1, p1, p2, wa+iw);
690  SWAP(p1,p2,cmplx *);
691  l1=l2;
692  iw+=(ip-1)*ido;
693  }
694  if (p1!=c)
695  memcpy (c,p1,n*sizeof(cmplx));
696  }
697 
698  void cfftf(size_t n, double c[], double wsave[])
699  {
700  if (n!=1)
701  cfft1(n, (cmplx*)c, (cmplx*)wsave, (cmplx*)(wsave+2*n),
702  (size_t*)(wsave+4*n),-1);
703  }
704 
705  void cfftb(size_t n, double c[], double wsave[])
706  {
707  if (n!=1)
708  cfft1(n, (cmplx*)c, (cmplx*)wsave, (cmplx*)(wsave+2*n),
709  (size_t*)(wsave+4*n),+1);
710  }
711 
712  static void factorize (size_t n, const size_t *pf, size_t npf, size_t *ifac)
713  {
714  size_t nl=n, nf=0, ntry=0, j=0, i;
715 
716  startloop:
717  j++;
718  ntry = (j<=npf) ? pf[j-1] : ntry+2;
719  do
720  {
721  size_t nq=nl / ntry;
722  size_t nr=nl-ntry*nq;
723  if (nr!=0)
724  goto startloop;
725  nf++;
726  ifac[nf+1]=ntry;
727  nl=nq;
728  if ((ntry==2) && (nf!=1))
729  {
730  for (i=nf+1; i>2; --i)
731  ifac[i]=ifac[i-1];
732  ifac[2]=2;
733  }
734  }
735  while(nl!=1);
736  ifac[0]=n;
737  ifac[1]=nf;
738  }
739 
740  static void cffti1(size_t n, double wa[], size_t ifac[])
741  {
742  static const size_t ntryh[5]={4,6,3,2,5};
743  static const double twopi=6.28318530717958647692;
744  size_t j, k, fi;
745 
746  double argh=twopi/n;
747  size_t i=0, l1=1;
748  factorize (n,ntryh,5,ifac);
749  for(k=1; k<=ifac[1]; k++)
750  {
751  size_t ip=ifac[k+1];
752  size_t ido=n/(l1*ip);
753  for(j=1; j<ip; j++)
754  {
755  size_t is = i;
756  double argld=j*l1*argh;
757  wa[i ]=1;
758  wa[i+1]=0;
759  for(fi=1; fi<=ido; fi++)
760  {
761  double arg=fi*argld;
762  i+=2;
763  wa[i ]=cos(arg);
764  wa[i+1]=sin(arg);
765  }
766  if(ip>6)
767  {
768  wa[is ]=wa[i ];
769  wa[is+1]=wa[i+1];
770  }
771  }
772  l1*=ip;
773  }
774  }
775 
776  void cffti(size_t n, double wsave[])
777  { if (n!=1) cffti1(n, wsave+2*n,(size_t*)(wsave+4*n)); }
778 
779 
780 /*----------------------------------------------------------------------
781  rfftf1, rfftb1, rfftf, rfftb, rffti1, rffti. Real FFTs.
782  ----------------------------------------------------------------------*/
783 
784  static void rfftf1(
785  size_t n,
786  double c[],
787  double ch[],
788  const double wa[],
789  const size_t ifac[]
790  ) {
791  size_t k1, l1=n, nf=ifac[1], iw=n-1;
792  double *p1=ch, *p2=c;
793 
794  for(k1=1; k1<=nf;++k1)
795  {
796  size_t ip=ifac[nf-k1+2];
797  size_t ido=n / l1;
798  l1 /= ip;
799  iw-=(ip-1)*ido;
800  SWAP (p1,p2,double *);
801  if(ip==4)
802  radf4(ido, l1, p1, p2, wa+iw);
803  else if(ip==2)
804  radf2(ido, l1, p1, p2, wa+iw);
805  else if(ip==3)
806  radf3(ido, l1, p1, p2, wa+iw);
807  else if(ip==5)
808  radf5(ido, l1, p1, p2, wa+iw);
809  else
810  {
811  if (ido==1)
812  SWAP (p1,p2,double *);
813  radfg(ido, ip, l1, ido*l1, p1, p2, wa+iw);
814  SWAP (p1,p2,double *);
815  }
816  }
817  if (p1==c)
818  memcpy (c,ch,n*sizeof(double));
819  }
820 
821  static void rfftb1(size_t n, double c[], double ch[], const double wa[],
822  const size_t ifac[])
823  {
824  size_t k1, l1=1, nf=ifac[1], iw=0;
825  double *p1=c, *p2=ch;
826 
827  for(k1=1; k1<=nf; k1++)
828  {
829  size_t ip = ifac[k1+1],
830  ido= n/(ip*l1);
831  if(ip==4)
832  radb4(ido, l1, p1, p2, wa+iw);
833  else if(ip==2)
834  radb2(ido, l1, p1, p2, wa+iw);
835  else if(ip==3)
836  radb3(ido, l1, p1, p2, wa+iw);
837  else if(ip==5)
838  radb5(ido, l1, p1, p2, wa+iw);
839  else
840  {
841  radbg(ido, ip, l1, ido*l1, p1, p2, wa+iw);
842  if (ido!=1)
843  SWAP (p1,p2,double *);
844  }
845  SWAP (p1,p2,double *);
846  l1*=ip;
847  iw+=(ip-1)*ido;
848  }
849  if (p1!=c)
850  memcpy (c,ch,n*sizeof(double));
851  }
852 
853  void rfftf(size_t n, double r[], double wsave[])
854  { if(n!=1) rfftf1(n, r, wsave, wsave+n,(size_t*)(wsave+2*n)); }
855 
856  void rfftb(size_t n, double r[], double wsave[])
857  { if(n!=1) rfftb1(n, r, wsave, wsave+n,(size_t*)(wsave+2*n)); }
858 
859  static void rffti1(size_t n, double wa[], size_t ifac[])
860  {
861  static const size_t ntryh[4]={4,2,3,5};
862  static const double twopi=6.28318530717958647692;
863  size_t i, j, k, fi;
864 
865  double argh=twopi/n;
866  size_t is=0, l1=1;
867  factorize (n,ntryh,4,ifac);
868  for (k=1; k<ifac[1]; k++)
869  {
870  size_t ip=ifac[k+1],
871  ido=n/(l1*ip);
872  for (j=1; j<ip; ++j)
873  {
874  double argld=j*l1*argh;
875  for(i=is,fi=1; i<=ido+is-3; i+=2,++fi)
876  {
877  double arg=fi*argld;
878  wa[i ]=cos(arg);
879  wa[i+1]=sin(arg);
880  }
881  is+=ido;
882  }
883  l1*=ip;
884  }
885  }
886 
887  void rffti(size_t n, double wsave[])
888  {
889  if (n!=1) {
890  rffti1(n, wsave+n,(size_t*)(wsave+2*n));
891  }
892  }
893 
894  const double pi = 3.14159265358979;
895  void sinti (size_t n, double wsave[])
896  {
897  if (n <= 1) return;
898  size_t ns2 = n / 2;
899  double dt = pi / double (n+1);
900  for (size_t k = 0; k < ns2; k++) {
901  wsave[k] = 2.0 * sin ((k+1) * dt);
902  }
903  rffti (n+1, wsave+ns2);
904  }
905 
906  /*
907  with n = 20
908  +0 -> was wsave[0..9]
909  +10 -> xh wsave[10..30]
910  +31 -> x wsave[31..51]
911  +52 -> xxifac wsave[52..]
912  */
913 
914  void sint1 (size_t n, double war[], double was[], double xh[], double x[], double xxifac[])
915  {
916  double sqrt3 = 1.73205080756888;
917 
918  for (size_t i = 0; i < n; i++) {
919  xh[i] = war[i];
920  war[i] = x[i];
921  }
922 
923  if (n < 2) {
924  xh[0] *= 2;
925  } else if (n == 2) {
926  double xhold = sqrt3*(xh[0]+xh[1]);
927  xh[1] = sqrt3*(xh[0]-xh[1]);
928  xh[0] = xhold;
929  } else {
930  size_t np1 = n + 1;
931  size_t ns2 = n / 2;
932  x[0] = 0.0;
933  for (size_t k = 0; k < ns2; k++) {
934  size_t kc = np1 - k;
935  double t1 = xh[k] - xh[kc];
936  double t2 = was[k] * (xh[k] + xh[kc]);
937  x[k+1] = t1 + t2;
938  x[kc+1] = t2 - t1;
939  }
940  size_t modn = n % 2;
941  if (modn != 0) {
942  x[ns2+2] = 4.0 * xh[ns2+1];
943  }
944  rfftf1 (np1, x, xh, war, (size_t*)xxifac);
945  xh[0] *= 0.5;
946  for (size_t i = 2; i < n; i += 2) {
947  xh[i-1] = -x[i];
948  xh[i] = xh[i-2] + x[i-1];
949  }
950  if (modn == 0) {
951  xh[n-1] = -x[n];
952  }
953  }
954  for (size_t i = 0; i < n; i++) {
955  x[i] = war[i];
956  war[i] = xh[i];
957  }
958  }
959 
960  void sint (size_t n, double x[], double wsave[])
961  {
962  sint1 (
963  n,
964  x,
965  wsave,
966  wsave + n/2,
967  wsave + n/2 + n + 1,
968  wsave + n/2 + 2*n + 2
969  );
970  }
971 
972 #ifdef __cplusplus
973 }
974 #endif
975 
976 // vi: set et ts=4 sw=4 sts=4:
977 // Local Variables:
978 // mode:c
979 // c-basic-offset: 4
980 // indent-tabs-mode:nil
981 // End:
#define WA(x, i)
Definition: fftpack.cpp:93
void sint(size_t n, double x[], double wsave[])
Definition: fftpack.cpp:960
Tps< T > sin(const Tps< T > &x)
Sine.
Definition: TpsMath.h:111
const int nr
Definition: ClassicRandom.h:24
#define MULPM(a, b, c, d, e, f)
Definition: fftpack.cpp:102
#define SWAP(a, b, type)
Definition: fftpack.cpp:83
void cffti(size_t n, double wsave[])
Definition: fftpack.cpp:776
void cfftb(size_t n, double c[], double wsave[])
Definition: fftpack.cpp:705
#define CH2(a, b)
Definition: fftpack.cpp:277
#define DEALLOC(ptr)
Definition: fftpack.cpp:74
double r
Definition: fftpack.cpp:88
#define RALLOC(type, num)
Definition: fftpack.cpp:71
const double pi
Definition: fftpack.cpp:894
constexpr double c
The velocity of light in m/s.
Definition: Physics.h:52
void cfftf(size_t n, double c[], double wsave[])
Definition: fftpack.cpp:698
void rffti(size_t n, double wsave[])
Definition: fftpack.cpp:887
arg(a))
void sinti(size_t n, double wsave[])
Definition: fftpack.cpp:895
#define CH(a, b, c)
Definition: fftpack.cpp:388
void rfftb(size_t n, double r[], double wsave[])
Definition: fftpack.cpp:856
Tps< T > cos(const Tps< T > &x)
Cosine.
Definition: TpsMath.h:129
void rfftf(size_t n, double r[], double wsave[])
Definition: fftpack.cpp:853
void sint1(size_t n, double war[], double was[], double xh[], double x[], double xxifac[])
Definition: fftpack.cpp:914
#define C2(a, b)
Definition: fftpack.cpp:276
void * util_malloc_(size_t sz)
Definition: fftpack.cpp:55
#define C1(a, b, c)
Definition: fftpack.cpp:275
#define PM(a, b, c, d)
Definition: fftpack.cpp:96
void util_free_(void *ptr)
Definition: fftpack.cpp:77
#define CC(a, b, c)
Definition: fftpack.cpp:389