diff --git a/Workspace_msvc/decoder.vcxproj b/Workspace_msvc/decoder.vcxproj
index 98827b70c98f82dc4f0721e8a3c1ed4b3289d1f1..ca0d96f44247a24937132bda883936e7f5a3dde1 100644
--- a/Workspace_msvc/decoder.vcxproj
+++ b/Workspace_msvc/decoder.vcxproj
@@ -150,6 +150,7 @@
+
diff --git a/Workspace_msvc/lib_com.vcxproj b/Workspace_msvc/lib_com.vcxproj
index eeb61b2d94a7ab0fd8506de8ad4701e8f18e7f93..af6c1ed7b8462d7fbd979481512251471a24b70f 100644
--- a/Workspace_msvc/lib_com.vcxproj
+++ b/Workspace_msvc/lib_com.vcxproj
@@ -179,6 +179,7 @@
+
@@ -243,12 +244,14 @@
+
+
diff --git a/Workspace_msvc/lib_com.vcxproj.filters b/Workspace_msvc/lib_com.vcxproj.filters
index 8eac6b783caeabfbf62e0a8b1af1bf0ab3e2f361..4c72cb99e0b939aba1c8e9f459fd22056472d8be 100644
--- a/Workspace_msvc/lib_com.vcxproj.filters
+++ b/Workspace_msvc/lib_com.vcxproj.filters
@@ -451,6 +451,9 @@
common_ivas_c
+
+ common_ivas_c
+
common_ivas_c
diff --git a/apps/decoder.c b/apps/decoder.c
index dbbda43621d34ed562c6d1ec909cfcbc0638c877..a5b1949b0915fc71eea62977931bda03dd753c6c 100644
--- a/apps/decoder.c
+++ b/apps/decoder.c
@@ -130,6 +130,7 @@ static void usage_dec( void );
static ivas_error decodeG192( DecArguments arg, BS_READER_HANDLE hBsReader, RotFileReader *headRotReader, RotFileReader *externalOrientationFileReader, RotFileReader *refRotReader, Vector3PairFileReader *referenceVectorReader, IVAS_DEC_HANDLE hIvasDec, int16_t *pcmBuf );
static ivas_error decodeVoIP( DecArguments arg, BS_READER_HANDLE hBsReader, RotFileReader *headRotReader, RotFileReader *externalOrientationFileReader, RotFileReader *refRotReader, Vector3PairFileReader *referenceVectorReader, IVAS_DEC_HANDLE hIvasDec );
void run_fft_unit_test(void);
+void run_mdct_unit_test(void);
/*------------------------------------------------------------------------------------------*
* main()
@@ -173,6 +174,7 @@ int main(
if (run_unit_tests)
{
run_fft_unit_test();
+ run_mdct_unit_test();
return;
}
diff --git a/lib_com/fft_cldfb_fx.c b/lib_com/fft_cldfb_fx.c
new file mode 100644
index 0000000000000000000000000000000000000000..0aa46d6052da69fc98fb2e391e64143ba5beb8d8
--- /dev/null
+++ b/lib_com/fft_cldfb_fx.c
@@ -0,0 +1,1070 @@
+/******************************************************************************************************
+
+ (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+ Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+ Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+ Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+ contributors to this repository. All Rights Reserved.
+
+ This software is protected by copyright law and by international treaties.
+ The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+ Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+ Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+ Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+ contributors to this repository retain full ownership rights in their respective contributions in
+ the software. This notice grants no license of any kind, including but not limited to patent
+ license, nor is any license granted by implication, estoppel or otherwise.
+
+ Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+ contributions.
+
+ This software is provided "AS IS", without any express or implied warranties. The software is in the
+ development stage. It is intended exclusively for experts who have experience with such software and
+ solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+ and fitness for a particular purpose are hereby disclaimed and excluded.
+
+ Any dispute, controversy or claim arising under or in relation to providing this software shall be
+ submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+ accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+ the United Nations Convention on Contracts on the International Sales of Goods.
+
+*******************************************************************************************************/
+
+/*====================================================================================
+ EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
+ ====================================================================================*/
+
+#include
+#include "options.h"
+#include
+#include "prot.h"
+#include "ivas_cnst.h"
+#include "wmc_auto.h"
+#include "basop_util.h"
+#include "complex_basop.h"
+
+#define Mpy_32_xx Mpy_32_16_1
+
+#define FFTC(x) WORD322WORD16((Word32)x)
+
+#define C31 (FFTC(0x91261468)) /* FL2WORD32( -0.86602540) -sqrt(3)/2 */
+
+#define C51 (FFTC(0x79bc3854)) /* FL2WORD32( 0.95105652) */
+#define C52 (FFTC(0x9d839db0)) /* FL2WORD32(-1.53884180/2) */
+#define C53 (FFTC(0xd18053ce)) /* FL2WORD32(-0.36327126) */
+#define C54 (FFTC(0x478dde64)) /* FL2WORD32( 0.55901699) */
+#define C55 (FFTC(0xb0000001)) /* FL2WORD32(-1.25/2) */
+
+#define C81 (FFTC(0x5a82799a)) /* FL2WORD32( 7.071067811865475e-1) */
+#define C82 (FFTC(0xa57d8666)) /* FL2WORD32(-7.071067811865475e-1) */
+
+#define C161 (FFTC(0x5a82799a)) /* FL2WORD32( 7.071067811865475e-1) INV_SQRT2 */
+#define C162 (FFTC(0xa57d8666)) /* FL2WORD32(-7.071067811865475e-1) -INV_SQRT2 */
+
+#define C163 (FFTC(0x7641af3d)) /* FL2WORD32( 9.238795325112867e-1) COS_PI_DIV8 */
+#define C164 (FFTC(0x89be50c3)) /* FL2WORD32(-9.238795325112867e-1) -COS_PI_DIV8 */
+
+#define C165 (FFTC(0x30fbc54d)) /* FL2WORD32( 3.826834323650898e-1) COS_3PI_DIV8 */
+#define C166 (FFTC(0xcf043ab3)) /* FL2WORD32(-3.826834323650898e-1) -COS_3PI_DIV8 */
+
+
+#define cplxMpy4_8_0(re,im,a,b,c,d) re = L_shr(L_sub(Mpy_32_xx(a,c),Mpy_32_xx(b,d)),1); \
+ im = L_shr(L_add(Mpy_32_xx(a,d),Mpy_32_xx(b,c)),1);
+
+#define cplxMpy4_8_1(re,im,a,b) re = L_shr(a,1); \
+ im = L_shr(b,1);
+
+void fft16_with_cmplx_data(cmplx *pInp, Word16 bsacle);
+
+
+#if 0
+#define SCALEFACTOR5 ( 4)
+#define SCALEFACTOR8 ( 4)
+#define SCALEFACTOR10 ( 5)
+#define SCALEFACTOR16 ( 5)
+#define SCALEFACTOR20 ( 5)
+#define SCALEFACTOR30 ( 6)
+#define SCALEFACTOR30_1 ( 5)
+#define SCALEFACTOR30_2 ( 1)
+#else
+#undef SCALEFACTOR5
+#undef SCALEFACTOR8
+#undef SCALEFACTOR10
+#undef SCALEFACTOR16
+#undef SCALEFACTOR20
+#undef SCALEFACTOR30
+#undef SCALEFACTOR30_1
+#undef SCALEFACTOR30_2
+
+#define SCALEFACTOR5 ( 0)
+#define SCALEFACTOR8 ( 0)
+#define SCALEFACTOR10 ( 0)
+#define SCALEFACTOR16 ( 0)
+#define SCALEFACTOR20 ( 0)
+#define SCALEFACTOR30 ( 0)
+#define SCALEFACTOR30_1 ( 0)
+#define SCALEFACTOR30_2 ( 0)
+#endif
+
+cmplx CL_scale_t(cmplx x, Word16 y);
+cmplx CL_dscale_t(cmplx x, Word16 y1, Word16 y2);
+
+/**
+ * \brief Function performs a complex 8-point FFT
+ * The FFT is performed inplace. The result of the FFT
+ * is scaled by SCALEFACTOR8 bits.
+ *
+ * WOPS with 32x16 bit multiplications: 108 cycles
+ *
+ * \param [i/o] re real input / output
+ * \param [i/o] im imag input / output
+ * \param [i ] s stride real and imag input / output
+ *
+ * \return void
+ */
+static void fft8_with_cmplx_data(cmplx *inp)
+{
+ cmplx x0, x1, x2, x3, x4, x5, x6, x7;
+ cmplx s0, s1, s2, s3, s4, s5, s6, s7;
+ cmplx t0, t1, t2, t3, t4, t5, t6, t7;
+
+ /* Pre-additions */
+ x0 = CL_shr(inp[0], SCALEFACTOR8);
+ x1 = CL_shr(inp[1], SCALEFACTOR8);
+ x2 = CL_shr(inp[2], SCALEFACTOR8);
+ x3 = CL_shr(inp[3], SCALEFACTOR8);
+ x4 = CL_shr(inp[4], SCALEFACTOR8);
+ x5 = CL_shr(inp[5], SCALEFACTOR8);
+ x6 = CL_shr(inp[6], SCALEFACTOR8);
+ x7 = CL_shr(inp[7], SCALEFACTOR8);
+
+ /* loops are unrolled */
+ {
+ t0 = CL_add(x0,x4);
+ t1 = CL_sub(x0,x4);
+
+ t2 = CL_add(x1,x5);
+ t3 = CL_sub(x1,x5);
+
+ t4 = CL_add(x2,x6);
+ t5 = CL_sub(x2,x6);
+
+ t6 = CL_add(x3,x7);
+ t7 = CL_sub(x3,x7);
+ }
+
+ /* Pre-additions and core multiplications */
+
+ s0 = CL_add(t0, t4);
+ s2 = CL_sub(t0, t4);
+
+ s4 = CL_mac_j(t1, t5);
+ s5 = CL_msu_j(t1, t5);
+
+ s1 = CL_add(t2, t6);
+ s3 = CL_sub(t2, t6);
+ s3 = CL_mul_j(s3);
+
+ t0 = CL_add(t3, t7);
+ t1 = CL_sub(t3, t7);
+
+ s6 = CL_scale_t(CL_msu_j(t1, t0), C81);
+ s7 = CL_dscale_t(CL_swap_real_imag(CL_msu_j(t0, t1)), C81, C82);
+
+ /* Post-additions */
+
+ inp[0] = CL_add(s0, s1);
+ inp[4] = CL_sub(s0, s1);
+
+ inp[2] = CL_sub(s2, s3);
+ inp[6] = CL_add(s2, s3);
+
+ inp[3] = CL_add(s4, s7);
+ inp[7] = CL_sub(s4, s7);
+
+ inp[1] = CL_add(s5, s6);
+ inp[5] = CL_sub(s5, s6);
+#if (WMOPS)
+ multiCounter[currCounter].CL_move += 8;
+#endif
+}
+
+/**
+ * \brief Function performs a complex 5-point FFT
+ * The FFT is performed inplace. The result of the FFT
+ * is scaled by SCALEFACTOR5 bits.
+ *
+ * WOPS with 32x16 bit multiplications: 88 cycles
+ *
+ * \param [i/o] re real input / output
+ * \param [i/o] im imag input / output
+ * \param [i ] s stride real and imag input / output
+ *
+ * \return void
+ */
+static void fft5_with_cmplx_data(cmplx *inp)
+{
+ cmplx x0,x1,x2,x3,x4;
+ cmplx y1,y2,y3,y4;
+ cmplx t;
+
+ x0 = CL_shr(inp[0],SCALEFACTOR5);
+ x1 = CL_shr(inp[1],SCALEFACTOR5);
+ x2 = CL_shr(inp[2],SCALEFACTOR5);
+ x3 = CL_shr(inp[3],SCALEFACTOR5);
+ x4 = CL_shr(inp[4],SCALEFACTOR5);
+
+ y1 = CL_add(x1,x4);
+ y4 = CL_sub(x1,x4);
+ y3 = CL_add(x2,x3);
+ y2 = CL_sub(x2,x3);
+ t = CL_scale_t(CL_sub(y1,y3),C54);
+ y1 = CL_add(y1,y3);
+ inp[0] = CL_add(x0,y1);
+
+ /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
+ the values as fracts */
+ y1 = CL_add(inp[0],(CL_shl(CL_scale_t(y1,C55),1)));
+ y3 = CL_sub(y1,t);
+ y1 = CL_add(y1,t);
+
+ t = CL_scale_t(CL_add(y4,y2),C51);
+ /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
+ the values as fracts */
+ y4 = CL_add(t,CL_shl(CL_scale_t(y4, C52),1));
+ y2 = CL_add(t,CL_scale_t(y2,C53));
+
+
+ /* combination */
+ inp[1] = CL_msu_j(y1,y2);
+ inp[4] = CL_mac_j(y1,y2);
+
+ inp[2] = CL_mac_j(y3,y4);
+ inp[3] = CL_msu_j(y3,y4);
+
+#if (WMOPS)
+ multiCounter[currCounter].CL_move += 5;
+#endif
+
+}
+
+/**
+ * \brief Function performs a complex 10-point FFT
+ * The FFT is performed inplace. The result of the FFT
+ * is scaled by SCALEFACTOR10 bits.
+ *
+ * WOPS with 32x16 bit multiplications: 196 cycles
+ *
+ * \param [i/o] re real input / output
+ * \param [i/o] im imag input / output
+ * \param [i ] s stride real and imag input / output
+ *
+ * \return void
+ */
+static void fft10_with_cmplx_data(cmplx *inp_data)
+{
+ cmplx r1,r2,r3,r4;
+ cmplx x0,x1,x2,x3,x4,t;
+ cmplx y[10];
+
+ /* FOR i=0 */
+ {
+ x0 = CL_shr(inp_data[0],SCALEFACTOR10);
+ x1 = CL_shr(inp_data[2],SCALEFACTOR10);
+ x2 = CL_shr(inp_data[4],SCALEFACTOR10);
+ x3 = CL_shr(inp_data[6],SCALEFACTOR10);
+ x4 = CL_shr(inp_data[8],SCALEFACTOR10);
+
+ r1 = CL_add(x3,x2);
+ r4 = CL_sub(x3,x2);
+ r3 = CL_add(x1,x4);
+ r2 = CL_sub(x1,x4);
+ t = CL_scale_t(CL_sub(r1,r3),C54);
+ r1 = CL_add(r1,r3);
+ y[0] = CL_add(x0,r1);
+ r1 = CL_add(y[0],(CL_shl(CL_scale_t(r1,C55),1)));
+ r3 = CL_sub(r1,t);
+ r1 = CL_add(r1,t);
+ t = CL_scale_t((CL_add(r4,r2)),C51);
+ r4 = CL_add(t,CL_shl(CL_scale_t(r4, C52),1));
+ r2 = CL_add(t,CL_scale_t(r2,C53));
+
+
+ y[2] = CL_msu_j(r1,r2);
+ y[8] = CL_mac_j(r1,r2);
+ y[4] = CL_mac_j(r3,r4);
+ y[6] = CL_msu_j(r3,r4);
+ }
+ /* FOR i=1 */
+ {
+ x0 = CL_shr(inp_data[5],SCALEFACTOR10);
+ x1 = CL_shr(inp_data[1],SCALEFACTOR10);
+ x2 = CL_shr(inp_data[3],SCALEFACTOR10);
+ x3 = CL_shr(inp_data[7],SCALEFACTOR10);
+ x4 = CL_shr(inp_data[9],SCALEFACTOR10);
+
+ r1 = CL_add(x1,x4);
+ r4 = CL_sub(x1,x4);
+ r3 = CL_add(x3,x2);
+ r2 = CL_sub(x3,x2);
+ t = CL_scale_t(CL_sub(r1,r3),C54);
+ r1 = CL_add(r1,r3);
+ y[1] = CL_add(x0,r1);
+ r1 = CL_add(y[1],(CL_shl(CL_scale_t(r1,C55),1)));
+ r3 = CL_sub(r1,t);
+ r1 = CL_add(r1,t);
+ t = CL_scale_t((CL_add(r4,r2)),C51);
+ r4 = CL_add(t,CL_shl(CL_scale_t(r4, C52),1));
+ r2 = CL_add(t,CL_scale_t(r2,C53));
+
+
+ y[3] = CL_msu_j(r1,r2);
+ y[9] = CL_mac_j(r1,r2);
+ y[5] = CL_mac_j(r3,r4);
+ y[7] = CL_msu_j(r3,r4);
+ }
+
+ /* FOR i=0 */
+ {
+ inp_data[0] = CL_add(y[0],y[1]);
+ inp_data[5] = CL_sub(y[0],y[1]);
+ }
+ /* FOR i=2 */
+ {
+ inp_data[2] = CL_add(y[2],y[3]);
+ inp_data[7] = CL_sub(y[2],y[3]);
+ }
+ /* FOR i=4 */
+ {
+ inp_data[4] = CL_add(y[4],y[5]);
+ inp_data[9] = CL_sub(y[4],y[5]);
+ }
+ /* FOR i=6 */
+ {
+ inp_data[6] = CL_add(y[6],y[7]);
+ inp_data[1] = CL_sub(y[6],y[7]);
+ }
+ /* FOR i=8 */
+ {
+ inp_data[8] = CL_add(y[8],y[9]);
+ inp_data[3] = CL_sub(y[8],y[9]);
+ }
+
+#if (WMOPS)
+ multiCounter[currCounter].CL_move += 10;
+#endif
+
+}
+
+/**
+ * \brief Function performs a complex 20-point FFT
+ * The FFT is performed inplace. The result of the FFT
+ * is scaled by SCALEFACTOR20 bits.
+ *
+ * WOPS with 32x16 bit multiplications: 432 cycles
+ *
+ * \param [i/o] re real input / output
+ * \param [i/o] im imag input / output
+ * \param [i ] s stride real and imag input / output
+ *
+ * \return void
+ */
+static void fft20_with_cmplx_data(cmplx *inp_data)
+{
+ cmplx r1,r2,r3,r4;
+ cmplx x0,x1,x2,x3,x4;
+ cmplx t,t0,t1,t2,t3;
+ cmplx y[20];
+ cmplx *y0, *y1,*y2,*y3,*y4;
+
+ y0 = y;
+ y1 = &y[4];
+ y2 = &y[16];
+ y3 = &y[8];
+ y4 = &y[12];
+
+ {
+ x0 = CL_shr(inp_data[0],SCALEFACTOR20);
+ x1 = CL_shr(inp_data[16],SCALEFACTOR20);
+ x2 = CL_shr(inp_data[12],SCALEFACTOR20);
+ x3 = CL_shr(inp_data[8],SCALEFACTOR20);
+ x4 = CL_shr(inp_data[4],SCALEFACTOR20);
+
+ r4 = CL_sub(x1,x4);
+ r2 = CL_sub(x2,x3);
+ r1 = CL_add(x1,x4);
+ r3 = CL_add(x2,x3);
+ t = CL_scale_t(CL_sub(r1,r3),C54);
+ r1 = CL_add(r1,r3);
+ y0[0] = CL_add(x0,r1);
+ r1 = CL_add(y0[0],(CL_shl(CL_scale_t(r1,C55),1)));
+ r3 = CL_sub(r1,t);
+ r1 = CL_add(r1,t);
+ t = CL_scale_t((CL_add(r4,r2)),C51);
+ r4 = CL_add(t,CL_shl(CL_scale_t(r4, C52),1));
+ r2 = CL_add(t,CL_scale_t(r2,C53));
+
+
+ y1[0] = CL_msu_j(r1,r2);
+ y2[0] = CL_mac_j(r1,r2);
+ y3[0] = CL_mac_j(r3,r4);
+ y4[0] = CL_msu_j(r3,r4);
+ }
+ {
+ x0 = CL_shr(inp_data[5],SCALEFACTOR20);
+ x1 = CL_shr(inp_data[1],SCALEFACTOR20);
+ x2 = CL_shr(inp_data[17],SCALEFACTOR20);
+ x3 = CL_shr(inp_data[13],SCALEFACTOR20);
+ x4 = CL_shr(inp_data[9],SCALEFACTOR20);
+
+ r4 = CL_sub(x1,x4);
+ r2 = CL_sub(x2,x3);
+ r1 = CL_add(x1,x4);
+ r3 = CL_add(x2,x3);
+ t = CL_scale_t(CL_sub(r1,r3),C54);
+ r1 = CL_add(r1,r3);
+ y0[1] = CL_add(x0,r1);
+ r1 = CL_add(y0[1],(CL_shl(CL_scale_t(r1,C55),1)));
+ r3 = CL_sub(r1,t);
+ r1 = CL_add(r1,t);
+ t = CL_scale_t((CL_add(r4,r2)),C51);
+ r4 = CL_add(t,CL_shl(CL_scale_t(r4, C52),1));
+ r2 = CL_add(t,CL_scale_t(r2,C53));
+
+
+ y1[1] = CL_msu_j(r1,r2);
+ y2[1] = CL_mac_j(r1,r2);
+ y3[1] = CL_mac_j(r3,r4);
+ y4[1] = CL_msu_j(r3,r4);
+ }
+ {
+ x0 = CL_shr(inp_data[10],SCALEFACTOR20);
+ x1 = CL_shr(inp_data[6],SCALEFACTOR20);
+ x2 = CL_shr(inp_data[2],SCALEFACTOR20);
+ x3 = CL_shr(inp_data[18],SCALEFACTOR20);
+ x4 = CL_shr(inp_data[14],SCALEFACTOR20);
+
+ r4 = CL_sub(x1,x4);
+ r2 = CL_sub(x2,x3);
+ r1 = CL_add(x1,x4);
+ r3 = CL_add(x2,x3);
+ t = CL_scale_t(CL_sub(r1,r3),C54);
+ r1 = CL_add(r1,r3);
+ y0[2] = CL_add(x0,r1);
+ r1 = CL_add(y0[2],(CL_shl(CL_scale_t(r1,C55),1)));
+ r3 = CL_sub(r1,t);
+ r1 = CL_add(r1,t);
+ t = CL_scale_t((CL_add(r4,r2)),C51);
+ r4 = CL_add(t,CL_shl(CL_scale_t(r4, C52),1));
+ r2 = CL_add(t,CL_scale_t(r2,C53));
+
+
+ y1[2] = CL_msu_j(r1,r2);
+ y2[2] = CL_mac_j(r1,r2);
+ y3[2] = CL_mac_j(r3,r4);
+ y4[2] = CL_msu_j(r3,r4);
+ }
+ {
+ x0 = CL_shr(inp_data[15],SCALEFACTOR20);
+ x1 = CL_shr(inp_data[11],SCALEFACTOR20);
+ x2 = CL_shr(inp_data[7],SCALEFACTOR20);
+ x3 = CL_shr(inp_data[3],SCALEFACTOR20);
+ x4 = CL_shr(inp_data[19],SCALEFACTOR20);
+
+ r4 = CL_sub(x1,x4);
+ r2 = CL_sub(x2,x3);
+ r1 = CL_add(x1,x4);
+ r3 = CL_add(x2,x3);
+ t = CL_scale_t(CL_sub(r1,r3),C54);
+ r1 = CL_add(r1,r3);
+ y0[3] = CL_add(x0,r1);
+ r1 = CL_add(y0[3],(CL_shl(CL_scale_t(r1,C55),1)));
+ r3 = CL_sub(r1,t);
+ r1 = CL_add(r1,t);
+ t = CL_scale_t((CL_add(r4,r2)),C51);
+ r4 = CL_add(t,CL_shl(CL_scale_t(r4, C52),1));
+ r2 = CL_add(t,CL_scale_t(r2,C53));
+
+
+ y1[3] = CL_msu_j(r1,r2);
+ y2[3] = CL_mac_j(r1,r2);
+ y3[3] = CL_mac_j(r3,r4);
+ y4[3] = CL_msu_j(r3,r4);
+ }
+
+ {
+ cmplx * ptr_y = y;
+ {
+ cmplx Cy0, Cy1, Cy2, Cy3;
+
+ Cy0 = *ptr_y++;
+ Cy1 = *ptr_y++;
+ Cy2 = *ptr_y++;
+ Cy3 = *ptr_y++;
+
+ /* Pre-additions */
+ t0 = CL_add(Cy0,Cy2);
+ t1 = CL_sub(Cy0,Cy2);
+ t2 = CL_add(Cy1,Cy3);
+ t3 = CL_sub(Cy1,Cy3);
+
+
+ inp_data[0] = CL_add(t0,t2);
+ inp_data[5] = CL_msu_j(t1,t3);
+ inp_data[10] = CL_sub(t0,t2);
+ inp_data[15] = CL_mac_j(t1,t3);
+ }
+
+ {
+ cmplx Cy0, Cy1, Cy2, Cy3;
+
+ Cy0 = *ptr_y++;
+ Cy1 = *ptr_y++;
+ Cy2 = *ptr_y++;
+ Cy3 = *ptr_y++;
+
+ /* Pre-additions */
+ t0 = CL_add(Cy0,Cy2);
+ t1 = CL_sub(Cy0,Cy2);
+ t2 = CL_add(Cy1,Cy3);
+ t3 = CL_sub(Cy1,Cy3);
+
+
+ inp_data[4] = CL_add(t0,t2);
+ inp_data[9] = CL_msu_j(t1,t3);
+ inp_data[14] = CL_sub(t0,t2);
+ inp_data[19] = CL_mac_j(t1,t3);
+ }
+
+ {
+ cmplx Cy0, Cy1, Cy2, Cy3;
+
+ Cy0 = *ptr_y++;
+ Cy1 = *ptr_y++;
+ Cy2 = *ptr_y++;
+ Cy3 = *ptr_y++;
+
+ /* Pre-additions */
+ t0 = CL_add(Cy0,Cy2);
+ t1 = CL_sub(Cy0,Cy2);
+ t2 = CL_add(Cy1,Cy3);
+ t3 = CL_sub(Cy1,Cy3);
+
+
+ inp_data[8] = CL_add(t0,t2);
+ inp_data[13] = CL_msu_j(t1,t3);
+ inp_data[18] = CL_sub(t0,t2);
+ inp_data[3] = CL_mac_j(t1,t3);
+ }
+
+ {
+ cmplx Cy0, Cy1, Cy2, Cy3;
+
+ Cy0 = *ptr_y++;
+ Cy1 = *ptr_y++;
+ Cy2 = *ptr_y++;
+ Cy3 = *ptr_y++;
+
+ /* Pre-additions */
+ t0 = CL_add(Cy0,Cy2);
+ t1 = CL_sub(Cy0,Cy2);
+ t2 = CL_add(Cy1,Cy3);
+ t3 = CL_sub(Cy1,Cy3);
+
+ inp_data[12] = CL_add(t0,t2);
+ inp_data[17] = CL_msu_j(t1,t3);
+ inp_data[2] = CL_sub(t0,t2);
+ inp_data[7] = CL_mac_j(t1,t3);
+ }
+
+ {
+ cmplx Cy0, Cy1, Cy2, Cy3;
+
+ Cy0 = *ptr_y++;
+ Cy1 = *ptr_y++;
+ Cy2 = *ptr_y++;
+ Cy3 = *ptr_y++;
+
+ /* Pre-additions */
+ t0 = CL_add(Cy0,Cy2);
+ t1 = CL_sub(Cy0,Cy2);
+ t2 = CL_add(Cy1,Cy3);
+ t3 = CL_sub(Cy1,Cy3);
+
+
+ inp_data[16] = CL_add(t0,t2);
+ inp_data[1] = CL_msu_j(t1,t3);
+ inp_data[6] = CL_sub(t0,t2);
+ inp_data[11] = CL_mac_j(t1,t3);
+ }
+ }
+#if (WMOPS)
+ multiCounter[currCounter].CL_move += 20;
+#endif
+
+}
+
+
+/**
+ * \brief Function performs a complex 30-point FFT
+ * The FFT is performed inplace. The result of the FFT
+ * is scaled by SCALEFACTOR30 bits.
+ *
+ * WOPS with 32x16 bit multiplications: 828 cycles
+ *
+ * \param [i/o] re real input / output
+ * \param [i/o] im imag input / output
+ * \param [i ] s stride real and imag input / output
+ *
+ * \return void
+ */
+static void fft30_with_cmplx_data(cmplx * inp)
+{
+ cmplx *l = &inp[0];
+ cmplx *h = &inp[15];
+
+ cmplx z[30], y[15], x[15], rs1, rs2, rs3, rs4, t;
+
+ /* 1. FFT15 stage */
+
+ x[0] = CL_shr(inp[0],SCALEFACTOR30_1);
+ x[1] = CL_shr(inp[18],SCALEFACTOR30_1);
+ x[2] = CL_shr(inp[6],SCALEFACTOR30_1);
+ x[3] = CL_shr(inp[24],SCALEFACTOR30_1);
+ x[4] = CL_shr(inp[12],SCALEFACTOR30_1);
+
+ x[5] = CL_shr(inp[20],SCALEFACTOR30_1);
+ x[6] = CL_shr(inp[8],SCALEFACTOR30_1);
+ x[7] = CL_shr(inp[26],SCALEFACTOR30_1);
+ x[8] = CL_shr(inp[14],SCALEFACTOR30_1);
+ x[9] = CL_shr(inp[2],SCALEFACTOR30_1);
+
+ x[10] = CL_shr(inp[10],SCALEFACTOR30_1);
+ x[11] = CL_shr(inp[28],SCALEFACTOR30_1);
+ x[12] = CL_shr(inp[16],SCALEFACTOR30_1);
+ x[13] = CL_shr(inp[4],SCALEFACTOR30_1);
+ x[14] = CL_shr(inp[22],SCALEFACTOR30_1);
+
+
+ /* 1. FFT5 stage */
+ rs1 = CL_add(x[1],x[4]);
+ rs4 = CL_sub(x[1],x[4]);
+ rs3 = CL_add(x[2],x[3]);
+ rs2 = CL_sub(x[2],x[3]);
+ t = CL_scale_t(CL_sub(rs1,rs3),C54);
+ rs1 = CL_add(rs1,rs3);
+ y[0] = CL_add(x[0],rs1);
+ rs1 = CL_add(y[0],(CL_shl(CL_scale_t(rs1,C55),1)));
+ rs3 = CL_sub(rs1,t);
+ rs1 = CL_add(rs1,t);
+ t = CL_scale_t(CL_add(rs4,rs2),C51);
+ rs4 = CL_add(t,CL_shl(CL_scale_t(rs4, C52),1));
+ rs2 = CL_add(t,CL_scale_t(rs2,C53));
+
+ /* combination */
+ y[1] = CL_msu_j(rs1,rs2);
+ y[4] = CL_mac_j(rs1,rs2);
+ y[2] = CL_mac_j(rs3,rs4);
+ y[3] = CL_msu_j(rs3,rs4);
+
+
+ /* 2. FFT5 stage */
+ rs1 = CL_add(x[6],x[9]);
+ rs4 = CL_sub(x[6],x[9]);
+ rs3 = CL_add(x[7],x[8]);
+ rs2 = CL_sub(x[7],x[8]);
+ t = CL_scale_t(CL_sub(rs1,rs3),C54);
+ rs1 = CL_add(rs1,rs3);
+ y[5] = CL_add(x[5],rs1);
+ rs1 = CL_add(y[5],(CL_shl(CL_scale_t(rs1,C55),1)));
+ rs3 = CL_sub(rs1,t);
+ rs1 = CL_add(rs1,t);
+ t = CL_scale_t(CL_add(rs4,rs2),C51);
+ rs4 = CL_add(t,CL_shl(CL_scale_t(rs4, C52),1));
+ rs2 = CL_add(t,CL_scale_t(rs2,C53));
+
+ /* combination */
+ y[6] = CL_msu_j(rs1,rs2);
+ y[9] = CL_mac_j(rs1,rs2);
+ y[7] = CL_mac_j(rs3,rs4);
+ y[8] = CL_msu_j(rs3,rs4);
+
+
+ /* 3. FFT5 stage */
+ rs1 = CL_add(x[11],x[14]);
+ rs4 = CL_sub(x[11],x[14]);
+ rs3 = CL_add(x[12],x[13]);
+ rs2 = CL_sub(x[12],x[13]);
+ t = CL_scale_t(CL_sub(rs1,rs3),C54);
+ rs1 = CL_add(rs1,rs3);
+ y[10] = CL_add(x[10],rs1);
+ rs1 = CL_add(y[10],(CL_shl(CL_scale_t(rs1,C55),1)));
+ rs3 = CL_sub(rs1,t);
+ rs1 = CL_add(rs1,t);
+ t = CL_scale_t(CL_add(rs4,rs2),C51);
+ rs4 = CL_add(t,CL_shl(CL_scale_t(rs4, C52),1));
+ rs2 = CL_add(t,CL_scale_t(rs2,C53));
+
+ /* combination */
+ y[11] = CL_msu_j(rs1,rs2);
+ y[14] = CL_mac_j(rs1,rs2);
+ y[12] = CL_mac_j(rs3,rs4);
+ y[13] = CL_msu_j(rs3,rs4);
+ /*for (i=10; i<15; i++)
+ {
+ printf("%d,\t %d,\t",y[i].re, y[i].im);
+ }
+ printf("\n\n");*/
+
+
+ /* 1. FFT3 stage */
+ /* real part */
+ rs1 = CL_add(y[5],y[10]);
+ rs2 = CL_scale_t(CL_sub(y[5],y[10]),C31);
+ z[0] = CL_add(y[0],rs1);
+ rs1 = CL_sub(y[0],CL_shr(rs1,1));
+
+ z[10] = CL_mac_j(rs1,rs2);
+ z[5] = CL_msu_j(rs1,rs2);
+
+ /* 2. FFT3 stage */
+ rs1 = CL_add(y[6],y[11]);
+ rs2 = CL_scale_t(CL_sub(y[6],y[11]),C31);
+ z[6] = CL_add(y[1],rs1);
+ rs1 = CL_sub(y[1],CL_shr(rs1,1));
+
+ z[1] = CL_mac_j(rs1,rs2);
+ z[11] = CL_msu_j(rs1,rs2);
+
+
+ /* 3. FFT3 stage */
+ rs1 = CL_add(y[7],y[12]);
+ rs2 = CL_scale_t(CL_sub(y[7],y[12]),C31);
+ z[12] = CL_add(y[2],rs1);
+ rs1 = CL_sub(y[2],CL_shr(rs1,1));
+
+ z[7] = CL_mac_j(rs1,rs2);
+ z[2] = CL_msu_j(rs1,rs2);
+
+
+ /* 4. FFT3 stage */
+ rs1 = CL_add(y[8],y[13]);
+ rs2 = CL_scale_t(CL_sub(y[8],y[13]),C31);
+ z[3] = CL_add(y[3],rs1);
+ rs1 = CL_sub(y[3],CL_shr(rs1,1));
+
+ z[13] = CL_mac_j(rs1,rs2);
+ z[8] = CL_msu_j(rs1,rs2);
+
+
+ /* 5. FFT3 stage */
+ rs1 = CL_add(y[9],y[14]);
+ rs2 = CL_scale_t(CL_sub(y[9],y[14]),C31);
+ z[9] = CL_add(y[4],rs1);
+ rs1 = CL_sub(y[4],CL_shr(rs1,1));
+
+ z[4] = CL_mac_j(rs1,rs2);
+ z[14] = CL_msu_j(rs1,rs2);
+
+ /*for (i=0; i<15; i++)
+ printf("%d,\t %d,\t",z[i].re, z[i].im);
+ printf("\n\n");*/
+
+
+ /* 2. FFT15 stage */
+
+ x[0] = CL_shr(inp[15],SCALEFACTOR30_1);
+ x[1] = CL_shr(inp[3],SCALEFACTOR30_1);
+ x[2] = CL_shr(inp[21],SCALEFACTOR30_1);
+ x[3] = CL_shr(inp[9],SCALEFACTOR30_1);
+ x[4] = CL_shr(inp[27],SCALEFACTOR30_1);
+
+ x[5] = CL_shr(inp[5],SCALEFACTOR30_1);
+ x[6] = CL_shr(inp[23],SCALEFACTOR30_1);
+ x[7] = CL_shr(inp[11],SCALEFACTOR30_1);
+ x[8] = CL_shr(inp[29],SCALEFACTOR30_1);
+ x[9] = CL_shr(inp[17],SCALEFACTOR30_1);
+
+ x[10] = CL_shr(inp[25],SCALEFACTOR30_1);
+ x[11] = CL_shr(inp[13],SCALEFACTOR30_1);
+ x[12] = CL_shr(inp[1],SCALEFACTOR30_1);
+ x[13] = CL_shr(inp[19],SCALEFACTOR30_1);
+ x[14] = CL_shr(inp[7],SCALEFACTOR30_1);
+
+ /* 1. FFT5 stage */
+ rs1 = CL_add(x[1],x[4]);
+ rs4 = CL_sub(x[1],x[4]);
+ rs3 = CL_add(x[2],x[3]);
+ rs2 = CL_sub(x[2],x[3]);
+ t = CL_scale_t(CL_sub(rs1,rs3),C54);
+ rs1 = CL_add(rs1,rs3);
+ y[0] = CL_add(x[0],rs1);
+ rs1 = CL_add(y[0],(CL_shl(CL_scale_t(rs1,C55),1)));
+ rs3 = CL_sub(rs1,t);
+ rs1 = CL_add(rs1,t);
+ t = CL_scale_t(CL_add(rs4,rs2),C51);
+ rs4 = CL_add(t,CL_shl(CL_scale_t(rs4, C52),1));
+ rs2 = CL_add(t,CL_scale_t(rs2,C53));
+
+ /* combination */
+ y[1] = CL_msu_j(rs1,rs2);
+ y[4] = CL_mac_j(rs1,rs2);
+ y[2] = CL_mac_j(rs3,rs4);
+ y[3] = CL_msu_j(rs3,rs4);
+
+
+ /* 2. FFT5 stage */
+ rs1 = CL_add(x[6],x[9]);
+ rs4 = CL_sub(x[6],x[9]);
+ rs3 = CL_add(x[7],x[8]);
+ rs2 = CL_sub(x[7],x[8]);
+ t = CL_scale_t(CL_sub(rs1,rs3),C54);
+ rs1 = CL_add(rs1,rs3);
+ y[5] = CL_add(x[5],rs1);
+ rs1 = CL_add(y[5],(CL_shl(CL_scale_t(rs1,C55),1)));
+ rs3 = CL_sub(rs1,t);
+ rs1 = CL_add(rs1,t);
+ t = CL_scale_t(CL_add(rs4,rs2),C51);
+ rs4 = CL_add(t,CL_shl(CL_scale_t(rs4, C52),1));
+ rs2 = CL_add(t,CL_scale_t(rs2,C53));
+
+ /* combination */
+ y[6] = CL_msu_j(rs1,rs2);
+ y[9] = CL_mac_j(rs1,rs2);
+ y[7] = CL_mac_j(rs3,rs4);
+ y[8] = CL_msu_j(rs3,rs4);
+
+
+ /* 3. FFT5 stage */
+ rs1 = CL_add(x[11],x[14]);
+ rs4 = CL_sub(x[11],x[14]);
+ rs3 = CL_add(x[12],x[13]);
+ rs2 = CL_sub(x[12],x[13]);
+ t = CL_scale_t(CL_sub(rs1,rs3),C54);
+ rs1 = CL_add(rs1,rs3);
+ y[10] = CL_add(x[10],rs1);
+ rs1 = CL_add(y[10],(CL_shl(CL_scale_t(rs1,C55),1)));
+ rs3 = CL_sub(rs1,t);
+ rs1 = CL_add(rs1,t);
+ t = CL_scale_t(CL_add(rs4,rs2),C51);
+ rs4 = CL_add(t,CL_shl(CL_scale_t(rs4, C52),1));
+ rs2 = CL_add(t,CL_scale_t(rs2,C53));
+
+ /* combination */
+ y[11] = CL_msu_j(rs1,rs2);
+ y[14] = CL_mac_j(rs1,rs2);
+ y[12] = CL_mac_j(rs3,rs4);
+ y[13] = CL_msu_j(rs3,rs4);
+ /*for (i=10; i<15; i++)
+ {
+ printf("%d,\t %d,\t",y[i].re, y[i].im);
+ }
+ printf("\n\n");*/
+
+
+ /* 1. FFT3 stage */
+ /* real part */
+ rs1 = CL_add(y[5],y[10]);
+ rs2 = CL_scale_t(CL_sub(y[5],y[10]),C31);
+ z[15] = CL_add(y[0],rs1);
+ rs1 = CL_sub(y[0],CL_shr(rs1,1));
+
+ z[25] = CL_mac_j(rs1,rs2);
+ z[20] = CL_msu_j(rs1,rs2);
+
+ /* 2. FFT3 stage */
+ rs1 = CL_add(y[6],y[11]);
+ rs2 = CL_scale_t(CL_sub(y[6],y[11]),C31);
+ z[21] = CL_add(y[1],rs1);
+ rs1 = CL_sub(y[1],CL_shr(rs1,1));
+
+ z[16] = CL_mac_j(rs1,rs2);
+ z[26] = CL_msu_j(rs1,rs2);
+
+
+ /* 3. FFT3 stage */
+ rs1 = CL_add(y[7],y[12]);
+ rs2 = CL_scale_t(CL_sub(y[7],y[12]),C31);
+ z[27] = CL_add(y[2],rs1);
+ rs1 = CL_sub(y[2],CL_shr(rs1,1));
+
+ z[22] = CL_mac_j(rs1,rs2);
+ z[17] = CL_msu_j(rs1,rs2);
+
+
+ /* 4. FFT3 stage */
+ rs1 = CL_add(y[8],y[13]);
+ rs2 = CL_scale_t(CL_sub(y[8],y[13]),C31);
+ z[18] = CL_add(y[3],rs1);
+ rs1 = CL_sub(y[3],CL_shr(rs1,1));
+
+ z[28] = CL_mac_j(rs1,rs2);
+ z[23] = CL_msu_j(rs1,rs2);
+
+
+ /* 5. FFT3 stage */
+ rs1 = CL_add(y[9],y[14]);
+ rs2 = CL_scale_t(CL_sub(y[9],y[14]),C31);
+ z[24] = CL_add(y[4],rs1);
+ rs1 = CL_sub(y[4],CL_shr(rs1,1));
+
+ z[19] = CL_mac_j(rs1,rs2);
+ z[29] = CL_msu_j(rs1,rs2);
+
+ /*for (i=0; i<30; i++)
+ printf("%d,\t %d,\t",z[i].re, z[i].im);
+ printf("\n\n");*/
+
+
+ /* 1. FFT2 stage */
+ rs1 = CL_shr(z[0], SCALEFACTOR30_2);
+ rs2 = CL_shr(z[15],SCALEFACTOR30_2);
+ *l = CL_add(rs1,rs2);
+ *h = CL_sub(rs1,rs2);
+ l+=1; h+=1;
+
+ /* 2. FFT2 stage */
+ rs1 = CL_shr(z[8], SCALEFACTOR30_2);
+ rs2 = CL_shr(z[23],SCALEFACTOR30_2);
+ *h = CL_add(rs1,rs2);
+ *l = CL_sub(rs1,rs2);
+ l+=1; h+=1;
+
+
+ /* 3. FFT2 stage */
+ rs1 = CL_shr(z[1], SCALEFACTOR30_2);
+ rs2 = CL_shr(z[16],SCALEFACTOR30_2);
+ *l = CL_add(rs1,rs2);
+ *h = CL_sub(rs1,rs2);
+ l+=1; h+=1;
+
+
+ /* 4. FFT2 stage */
+ rs1 = CL_shr(z[9], SCALEFACTOR30_2);
+ rs2 = CL_shr(z[24],SCALEFACTOR30_2);
+ *h = CL_add(rs1,rs2);
+ *l = CL_sub(rs1,rs2);
+ l+=1; h+=1;
+
+ /* 5. FFT2 stage */
+ rs1 = CL_shr(z[2], SCALEFACTOR30_2);
+ rs2 = CL_shr(z[17],SCALEFACTOR30_2);
+ *l = CL_add(rs1,rs2);
+ *h = CL_sub(rs1,rs2);
+ l+=1; h+=1;
+
+ /* 6. FFT2 stage */
+ rs1 = CL_shr(z[10], SCALEFACTOR30_2);
+ rs2 = CL_shr(z[25],SCALEFACTOR30_2);
+ *h = CL_add(rs1,rs2);
+ *l = CL_sub(rs1,rs2);
+ l+=1; h+=1;
+
+ /* 7. FFT2 stage */
+ rs1 = CL_shr(z[3], SCALEFACTOR30_2);
+ rs2 = CL_shr(z[18],SCALEFACTOR30_2);
+ *l = CL_add(rs1,rs2);
+ *h = CL_sub(rs1,rs2);
+ l+=1; h+=1;
+
+ /* 8. FFT2 stage */
+ rs1 = CL_shr(z[11], SCALEFACTOR30_2);
+ rs2 = CL_shr(z[26],SCALEFACTOR30_2);
+ *h = CL_add(rs1,rs2);
+ *l = CL_sub(rs1,rs2);
+ l+=1; h+=1;
+
+ /* 9. FFT2 stage */
+ rs1 = CL_shr(z[4], SCALEFACTOR30_2);
+ rs2 = CL_shr(z[19],SCALEFACTOR30_2);
+ *l = CL_add(rs1,rs2);
+ *h = CL_sub(rs1,rs2);
+ l+=1; h+=1;
+
+ /* 10. FFT2 stage */
+ rs1 = CL_shr(z[12], SCALEFACTOR30_2);
+ rs2 = CL_shr(z[27],SCALEFACTOR30_2);
+ *h = CL_add(rs1,rs2);
+ *l = CL_sub(rs1,rs2);
+ l+=1; h+=1;
+
+ /* 11. FFT2 stage */
+ rs1 = CL_shr(z[5], SCALEFACTOR30_2);
+ rs2 = CL_shr(z[20],SCALEFACTOR30_2);
+ *l = CL_add(rs1,rs2);
+ *h = CL_sub(rs1,rs2);
+ l+=1; h+=1;
+
+ /* 12. FFT2 stage */
+ rs1 = CL_shr(z[13], SCALEFACTOR30_2);
+ rs2 = CL_shr(z[28],SCALEFACTOR30_2);
+ *h = CL_add(rs1,rs2);
+ *l = CL_sub(rs1,rs2);
+ l+=1; h+=1;
+
+ /* 13. FFT2 stage */
+ rs1 = CL_shr(z[6], SCALEFACTOR30_2);
+ rs2 = CL_shr(z[21],SCALEFACTOR30_2);
+ *l = CL_add(rs1,rs2);
+ *h = CL_sub(rs1,rs2);
+ l+=1; h+=1;
+
+ /* 14. FFT2 stage */
+ rs1 = CL_shr(z[14], SCALEFACTOR30_2);
+ rs2 = CL_shr(z[29],SCALEFACTOR30_2);
+ *h = CL_add(rs1,rs2);
+ *l = CL_sub(rs1,rs2);
+ l+=1; h+=1;
+
+ /* 15. FFT2 stage */
+ rs1 = CL_shr(z[7], SCALEFACTOR30_2);
+ rs2 = CL_shr(z[22],SCALEFACTOR30_2);
+ *l = CL_add(rs1,rs2);
+ *h = CL_sub(rs1,rs2);
+ l+=1; h+=1;
+
+#if (WMOPS)
+ multiCounter[currCounter].CL_move += 30;
+#endif
+
+}
+
+/*-------------------------------------------------------------------*
+ * fft_cldfb()
+ *
+ * Interface functions FFT subroutines
+ *--------------------------------------------------------------------*/
+void fft_cldfb_fx(
+ Word32 *data, /* i/o: input/output vector */
+ const Word16 size /* size of fft operation */
+)
+{
+
+ SWITCH ( size )
+ {
+ case 5:
+ fft5_with_cmplx_data( (cmplx *)data );
+ BREAK;
+ case 8:
+ fft8_with_cmplx_data( (cmplx *)data );
+ BREAK;
+ case 10:
+ fft10_with_cmplx_data( (cmplx *)data );
+ BREAK;
+ case 16:
+ fft16_with_cmplx_data( (cmplx *)data, 0);
+ BREAK;
+ case 20:
+ fft20_with_cmplx_data( (cmplx *)data );
+ BREAK;
+ case 30:
+ fft30_with_cmplx_data( (cmplx *)data );
+ BREAK;
+
+ default:
+ assert( 0 );
+ BREAK;
+ }
+
+ return;
+}
diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c
index efde7ba8dbf4932aaf69c648ae46910fc42d4e1b..32551b685142640864752627421e824f390244fe 100644
--- a/lib_com/fft_fx.c
+++ b/lib_com/fft_fx.c
@@ -46,10 +46,12 @@
#include "cnst.h"
//#include "prot.h"
#include "prot_fx1.h"
+#include "prot_fx2.h"
//#include "cnst_fx.h"
#include "rom_com.h"
#include "rom_com_fx.h"
#include "wmc_auto.h"
+#include "complex_basop.h"
#ifdef _MSC_VER
#pragma warning( disable : 4310 )
@@ -59,32 +61,6 @@
* Local constants
*-----------------------------------------------------------------*/
-#if 0
-
-#define FFT_15PONIT_WNK1 0.55901699f /* EDCT & EMDCT constants */
-#define FFT_15PONIT_WNK2 0.95105652f /* EDCT & EMDCT constants */
-#define FFT_15PONIT_WNK3 0.58778525f /* EDCT & EMDCT constants */
-#define FFT_15PONIT_WNK4 0.86602540f /* EDCT & EMDCT constants */
-#define FFT_15PONIT_WNK5 0.25000000f /* EDCT & EMDCT constants */
-
-/* FFT constants */
-#define FFT_C31 -0.8660254037f
-#define FFT_C51 0.9510565195f
-#define FFT_C52 -1.5388417989f
-#define FFT_C53 -0.3632712597f
-#define FFT_C54 0.5590169895f
-#define FFT_C55 -1.2500000000f
-#define FFT_C61 0.8660254036f
-#define FFT_C81 0.7071067811f
-#define FFT_C82 -0.7071067811f
-#define FFT_C161 0.7071067811f
-#define FFT_C162 -0.7071067811f
-#define FFT_C163 0.9238795325f
-#define FFT_C164 -0.9238795325f
-#define FFT_C165 0.3826834323f
-#define FFT_C166 -0.3826834323f
-
-#else
#define Mpy_32_xx Mpy_32_16_1
@@ -153,8 +129,6 @@
#define SCALEFACTOR480 (11)
#define SCALEFACTOR600 (10)
-#endif
-
/*-----------------------------------------------------------------*
* Local function prototypes
@@ -1694,12 +1668,12 @@ static void cftfsub(
Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
l = 2;
- IF ( n > 8 )
+ IF ( GT_16(n, 8) )
{
cft1st( n, a, w );
l = 8;
- WHILE ( shl(l, 2 ) < n )
+ WHILE ( LT_16(shl(l, 2 ), n) )
{
cftmdl( n, l, a, w );
l = shl(l, 2);
@@ -2037,12 +2011,12 @@ static void cftbsub(
Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
l = 2;
- IF ( n > 8 )
+ IF ( GT_16(n, 8) )
{
cft1st( n, a, w );
l = 8;
- WHILE ( shl(l, 2) < n )
+ WHILE ( LT_16(shl(l, 2), n) )
{
cftmdl( n, l, a, w );
l = shl(l, 2);
@@ -2135,7 +2109,7 @@ static void rftbsub(
Word32 xr, xi, yr, yi;
a[1] = L_negate(a[1]);
- m = n >> 1;
+ m = shr(n, 1);
ks = 2 * nc / m;
kk = 0;
FOR ( j = 2; j < m; j += 2 )
@@ -2149,7 +2123,7 @@ static void rftbsub(
yr = L_add(Mpy_32_16_1(xr, wkr), Mpy_32_16_1(xi, wki));
yi = L_sub(Mpy_32_16_1(xi, wkr), Mpy_32_16_1(xr, wki));
a[j] = L_sub(a[j], yr);
- a[j + 1] = L_add(yi, a[j + 1]);
+ a[j + 1] = L_sub(yi, a[j + 1]);
a[k] = L_add(a[k], yr);
a[k + 1] = L_sub(yi, a[k + 1]);
}
@@ -2169,12 +2143,12 @@ static void dctsub(
Word16 wkr, wki;
Word32 xr;
- m = n >> 1;
+ m = shr(n, 1);
ks = nc / n;
kk = 0;
FOR ( j = 1; j < m; j++ )
{
- k = n - j;
+ k = sub(n, j);
kk += ks;
wkr = sub(c[kk], c[nc - kk]);
wki = add(c[kk], c[nc - kk]);
@@ -2208,18 +2182,18 @@ void edct2_fx_ivas(
Copy32(in, a, n);
nw = ip[0];
- IF ( n > ( nw << 2 ) )
+ IF ( GT_16(n, shl( nw, 2 ) ) )
{
- nw = n >> 2;
+ nw = shr(n, 2);
}
nc = ip[1];
- IF ( n > nc )
+ IF ( GT_16(n, nc) )
{
nc = n;
}
- IF ( isgn < 0 )
+ IF ( LT_16(isgn, 0) )
{
xr = a[n - 1];
FOR ( j = n - 2; j >= 2; j -= 2 )
@@ -2230,7 +2204,7 @@ void edct2_fx_ivas(
a[1] = L_sub(a[0], xr);
a[0] = L_add(a[0], xr);
- IF ( n > 4 )
+ IF ( GT_16(n, 4) )
{
rftbsub( n, a, nc, w + nw );
bitrv2_SR( n, ip + 2, a );
@@ -2242,16 +2216,16 @@ void edct2_fx_ivas(
}
}
- IF ( isgn >= 0 )
+ IF ( GE_16(isgn, 0) )
{
a[0] = L_shr(a[0], 1);
}
dctsub( n, a, nc, w + nw );
- IF ( isgn >= 0 )
+ IF ( GE_16(isgn, 0) )
{
- IF ( n > 4 )
+ IF ( GT_16(n, 4) )
{
bitrv2_SR( n, ip + 2, a );
cftfsub( n, a, w );
@@ -2328,6 +2302,571 @@ void DoRTFTn_fx_ivas(
return;
}
+void fft3_fx_ivas(
+ const Word32 X[],
+ Word32 Y[],
+ const Word16 n )
+{
+ Word32 Z[PH_ECU_SPEC_SIZE];
+ Word32 *Z0, *Z1, *Z2;
+ Word32 *z0, *z1, *z2;
+ const Word32 *x;
+ const Word16 *t_sin = sincos_t_rad3_fx;
+ Word16 m, step, order;
+ Word16 i, j;
+ Word16 c1_ind, s1_ind, c2_ind, s2_ind;
+ Word16 c1_step, s1_step, c2_step, s2_step;
+ Word32 *RY, *IY, *RZ0, *IZ0, *RZ1, *IZ1, *RZ2, *IZ2;
+
+ /* Determine the order of the transform, the length of decimated */
+ /* transforms m, and the step for the sine and cosine tables. */
+ switch ( n )
+ {
+ case 1536:
+ order = 9;
+ m = 512;
+ step = 1;
+ break;
+ case 384:
+ order = 7;
+ m = 128;
+ step = 4;
+ break;
+ default:
+ order = 9;
+ m = 512;
+ step = 1;
+ }
+
+ /* Compose decimated sequences X[3i], X[3i+1],X[3i+2] */
+ /* compute their FFT of length m. */
+ Z0 = &Z[0];
+ z0 = &Z0[0];
+ Z1 = &Z0[m];
+ z1 = &Z1[0]; /* Z1 = &Z[ m]; */
+ Z2 = &Z1[m];
+ z2 = &Z2[0]; /* Z2 = &Z[2m]; */
+ x = &X[0];
+ FOR ( i = 0; i < n / 3; i++ )
+ {
+ *z0++ = *x++; /* Z0[i] = X[3i]; */
+ *z1++ = *x++; /* Z1[i] = X[3i+1]; */
+ *z2++ = *x++; /* Z2[i] = X[3i+2]; */
+ }
+
+ fft_rel_fx32( &Z0[0], m, order );
+ fft_rel_fx32( &Z1[0], m, order );
+ fft_rel_fx32( &Z2[0], m, order );
+
+ /* Butterflies of order 3. */
+ /* pointer initialization */
+ RY = &Y[0];
+ IY = &Y[n];
+ RZ0 = &Z0[0];
+ IZ0 = &Z0[m];
+ RZ1 = &Z1[0];
+ IZ1 = &Z1[m];
+ RZ2 = &Z2[0];
+ IZ2 = &Z2[m];
+
+ c1_step = negate(step);
+ s1_step = step;
+ c2_step = negate(shl(step, 1));
+ s2_step = shl(step, 1);
+ c1_ind = add(T_SIN_PI_2, c1_step);
+ s1_ind = s1_step;
+ c2_ind = add(T_SIN_PI_2, c2_step);
+ s2_ind = s2_step;
+
+ /* special case: i = 0 */
+ RY[0] = L_add(RZ0[0], L_add(RZ1[0], RZ2[0]));
+
+ /* first 3/12 */
+ for (i = 1; i < 3 * m / 8; i++, c1_ind = add(c1_ind, c1_step), s1_ind = add(s1_ind, s1_step), c2_ind = add(c2_ind,c2_step), s2_ind = add(s2_ind, s2_step))
+ {
+ RY[i] = L_add(RZ0[i], L_add(Mpy_32_16_1(RZ1[i], t_sin[c1_ind]),
+ L_add(Mpy_32_16_1(IZ1[-i], t_sin[s1_ind]),
+ L_add(Mpy_32_16_1(RZ2[i], t_sin[c2_ind]),
+ Mpy_32_16_1(IZ2[-i], t_sin[s2_ind])))));
+ IY[-i] = L_sub(IZ0[-i], L_add(L_sub(Mpy_32_16_1(RZ1[i], t_sin[s1_ind]),
+ Mpy_32_16_1(IZ1[-i], t_sin[c1_ind])),
+ L_sub(Mpy_32_16_1(RZ2[i], t_sin[s2_ind]),
+ Mpy_32_16_1(IZ2[-i], t_sin[c2_ind]))));
+ }
+
+ /* next 1/12 */
+ for ( ; i < 4 * m / 8; i++, c1_ind = add(c1_ind, c1_step), s1_ind = add(s1_ind, s1_step), c2_ind = sub(c2_ind, c2_step), s2_ind = sub(s2_ind, s2_step) )
+ {
+ RY[i] = L_add(RZ0[i], L_sub(L_add(Mpy_32_16_1(RZ1[i], t_sin[c1_ind]),
+ Mpy_32_16_1(IZ1[-i], t_sin[s1_ind])),
+ L_sub(Mpy_32_16_1(RZ2[i], t_sin[c2_ind]),
+ Mpy_32_16_1(IZ2[-i], t_sin[s2_ind]))));
+ IY[-i] = L_sub(IZ0[-i], L_sub(Mpy_32_16_1(RZ1[ i], t_sin[s1_ind]),
+ L_sub(Mpy_32_16_1(IZ1[-i], t_sin[c1_ind]),
+ L_add(Mpy_32_16_1(RZ2[ i], t_sin[s2_ind]),
+ Mpy_32_16_1(IZ2[-i], t_sin[c2_ind])))));
+ }
+
+ /* special case: i = m/2 i.e. 1/3 */
+ RY[i] = L_add(RZ0[i],
+ L_sub(Mpy_32_16_1(RZ1[i], t_sin[c1_ind]),
+ Mpy_32_16_1(RZ2[i], t_sin[c2_ind])));
+ IY[-i] = L_negate(L_add(Mpy_32_16_1(RZ1[i], t_sin[s1_ind]),
+ Mpy_32_16_1(RZ2[i], t_sin[s2_ind])));
+ i++;
+
+ c1_ind = add(c1_ind, c1_step);
+ s1_ind = add(s1_ind, s1_step);
+ c2_ind = sub(c2_ind, c2_step);
+ s2_ind = sub(s2_ind, s2_step);
+
+ /* next 2/12 */
+ for ( j = i - 2; i < 6 * m / 8; i++, j--, c1_ind = add(c1_ind, c1_step), s1_ind = add(s1_ind, s1_step), c2_ind = sub(c2_ind, c2_step), s2_ind = sub(s2_ind, s2_step) )
+ {
+ RY[i] = L_add(RZ0[j], L_sub(Mpy_32_16_1(RZ1[j], t_sin[c1_ind]),
+ L_add(Mpy_32_16_1(IZ1[-j], t_sin[s1_ind]),
+ L_add(Mpy_32_16_1(RZ2[j], t_sin[c2_ind]),
+ Mpy_32_16_1(IZ2[-j], t_sin[s2_ind])))));
+
+ IY[-i] = L_negate(L_add(IZ0[-j], L_add(Mpy_32_16_1(RZ1[j], t_sin[s1_ind]),
+ L_add(Mpy_32_16_1(IZ1[-j], t_sin[c1_ind]),
+ L_sub(Mpy_32_16_1(RZ2[j], t_sin[s2_ind]),
+ Mpy_32_16_1(IZ2[-j], t_sin[c2_ind]))))));
+ }
+
+ /*--------------------------half--------------------------*/
+ /* next 2/12 */
+ for ( ; i < 8 * m / 8; i++, j--, c1_ind = sub(c1_ind, c1_step), s1_ind = sub(s1_ind, s1_step), c2_ind = add(c2_ind, c2_step), s2_ind = add(s2_ind, s2_step) )
+ {
+ RY[i] = L_sub(RZ0[j], L_add(Mpy_32_16_1(RZ1[j], t_sin[c1_ind]),
+ L_add(Mpy_32_16_1(IZ1[-j], t_sin[s1_ind]),
+ L_sub(Mpy_32_16_1(RZ2[j], t_sin[c2_ind]),
+ Mpy_32_16_1(IZ2[-j], t_sin[s2_ind])))));
+ IY[-i] = L_negate(L_add(IZ0[-j], L_sub(Mpy_32_16_1(RZ1[j], t_sin[s1_ind]),
+ L_add(Mpy_32_16_1(IZ1[-j], t_sin[c1_ind]),
+ L_add(Mpy_32_16_1(RZ2[j], t_sin[s2_ind]),
+ Mpy_32_16_1(IZ2[-j], t_sin[c2_ind]))))));
+ }
+
+ /* special case: i = m, i.e 2/3 */
+ RY[i] = L_sub(RZ0[j], L_add(Mpy_32_16_1(RZ1[j], t_sin[c1_ind]),
+ Mpy_32_16_1(RZ2[j], t_sin[c2_ind])));
+ IY[-i++] = L_sub(Mpy_32_16_1(RZ2[j], t_sin[s2_ind]),
+ Mpy_32_16_1(RZ1[j], t_sin[s1_ind]));
+ c1_ind = sub(c1_ind, c1_step), s1_ind = sub(s1_ind, s1_step), c2_ind = add(c2_ind, c2_step), s2_ind = add(s2_ind, s2_step);
+
+ /* next 1/12 */
+ for ( j = 1; i < 9 * m / 8; i++, j++, c1_ind = sub(c1_ind, c1_step), s1_ind = sub(s1_ind, s1_step), c2_ind = add(c2_ind, c2_step), s2_ind = add(s2_ind, s2_step) )
+ {
+ RY[i] = L_sub(RZ0[j], L_sub(Mpy_32_16_1(RZ1[j], t_sin[c1_ind]),
+ L_sub(Mpy_32_16_1(IZ1[-j], t_sin[s1_ind]),
+ L_add(Mpy_32_16_1(RZ2[j], t_sin[c2_ind]),
+ Mpy_32_16_1(IZ2[-j], t_sin[s2_ind])))));
+ IY[-i] = L_sub(IZ0[-j], L_add(Mpy_32_16_1(RZ1[j], t_sin[s1_ind]),
+ L_sub(Mpy_32_16_1(IZ1[-j], t_sin[c1_ind]),
+ L_sub(Mpy_32_16_1(RZ2[j], t_sin[s2_ind]),
+ Mpy_32_16_1(IZ2[-j], t_sin[c2_ind])))));
+ }
+
+ /* last 3/12 */
+ for ( ; i < 12 * m / 8; i++, j++, c1_ind = sub(c1_ind, c1_step), s1_ind = sub(s1_ind, s1_step), c2_ind = sub(c2_ind, c2_step), s2_ind = sub(s2_ind, s2_step) )
+ {
+ RY[i] = L_sub(RZ0[j], L_sub(L_sub(Mpy_32_16_1(RZ1[j], t_sin[c1_ind]),
+ Mpy_32_16_1(IZ1[-j], t_sin[s1_ind])),
+ L_sub(Mpy_32_16_1(RZ2[j], t_sin[c2_ind]),
+ Mpy_32_16_1(IZ2[-j], t_sin[s2_ind]))));
+ IY[-i] = L_sub(IZ0[-j], L_sub(L_add(Mpy_32_16_1(RZ1[j], t_sin[s1_ind]),
+ Mpy_32_16_1(IZ1[-j], t_sin[c1_ind])),
+ L_add(Mpy_32_16_1(RZ2[j], t_sin[s2_ind]),
+ Mpy_32_16_1(IZ2[-j], t_sin[c2_ind]))));
+ }
+
+ /* special case: i = 3*m/2 */
+ RY[i] = L_sub(RZ0[j], L_sub(Mpy_32_16_1(RZ1[j], t_sin[c1_ind]),
+ Mpy_32_16_1(RZ2[j], t_sin[c2_ind])));
+
+ return;
+}
+
+void ifft3_fx_ivas(
+ const Word32 Z[],
+ Word32 X[],
+ const Word16 n )
+{
+ Word32 Y[PH_ECU_SPEC_SIZE];
+ const Word16 *t_sin = sincos_t_rad3_fx;
+ Word16 m, step, step2, order;
+ Word16 i;
+ Word16 c0_ind, s0_ind, c1_ind, s1_ind, c2_ind, s2_ind;
+ Word16 scale;
+ const Word32 *RZ0, *IZ0, *RZ1, *IZ1, *RZ2, *IZ2;
+ Word32 *RY0, *IY0, *RY1, *IY1, *RY2, *IY2, *y0, *y1, *y2;
+
+ /* Determine the order of the transform, the length of decimated */
+ /* transforms m, and the step for the sine and cosine tables. */
+ switch ( n )
+ {
+ case 1536:
+ order = 9;
+ m = 512;
+ step = 1;
+ break;
+ case 384:
+ order = 7;
+ m = 128;
+ step = 4;
+ break;
+ default:
+ order = 9;
+ m = 512;
+ step = 1;
+ }
+
+ /* pointer initialization */
+ RY0 = &Y[0];
+ IY0 = &RY0[m];
+ RY1 = &RY0[m];
+ IY1 = &RY1[m];
+ RY2 = &RY1[m];
+ IY2 = &RY2[m];
+
+ RZ0 = &Z[0];
+ RZ1 = RZ0 + m;
+ RZ2 = RZ0 + n / 2 - m / 2;
+ IZ0 = &Z[n];
+ IZ1 = IZ0 - m;
+ IZ2 = IZ0 - n / 2 + m / 2;
+
+ /* Inverse butterflies of order 3. */
+
+ /* Construction of Y0 */
+ RY0[0] = L_add(RZ0[0], L_add(RZ1[0], RZ2[0]));
+ FOR ( i = 1; i < m / 2; i++ )
+ {
+ RY0[i] = L_add(RZ0[i], L_add(RZ1[i], RZ2[-i]));
+ IY0[-i] = L_add(IZ0[-i], L_sub(IZ1[-i], IZ2[i]));
+ }
+
+ /* m/2 */
+ RY0[i] = L_add(RZ0[i], L_add(RZ1[i], RZ2[-i]));
+
+ /* Construction of Y1 */
+ c0_ind = T_SIN_PI_2;
+ s0_ind = 0;
+ c1_ind = T_SIN_PI_2 * 1 / 3;
+ s1_ind = T_SIN_PI_2 * 2 / 3;
+ c2_ind = T_SIN_PI_2 * 1 / 3;
+ s2_ind = T_SIN_PI_2 * 2 / 3;
+
+ RY1[0] = L_sub(Mpy_32_16_1(RZ0[0], t_sin[c0_ind]),
+ L_add(Mpy_32_16_1(RZ1[0], t_sin[c1_ind]),
+ L_add(Mpy_32_16_1(RZ2[0], t_sin[c2_ind]),
+ L_add(Mpy_32_16_1(IZ1[0], t_sin[s1_ind]),
+ Mpy_32_16_1(IZ2[0], t_sin[s2_ind])))));
+
+ c0_ind = sub(c0_ind, step);
+ s0_ind = add(s0_ind, step);
+ c1_ind = add(c1_ind, step);
+ s1_ind = sub(s1_ind, step);
+ c2_ind = sub(c2_ind, step);
+ s2_ind = add(s2_ind, step);
+ for ( i = 1; i < m / 4; i++, c0_ind = sub(c0_ind, step), s0_ind = add(s0_ind, step), c1_ind = add(c1_ind, step), s1_ind = sub(s1_ind, step), c2_ind = sub(c2_ind, step), s2_ind = add(s2_ind, step) )
+ {
+ RY1[i] = L_sub(Mpy_32_16_1(RZ0[i], t_sin[c0_ind]),
+ L_add(Mpy_32_16_1(RZ1[i], t_sin[c1_ind]),
+ L_add(Mpy_32_16_1(RZ2[-i], t_sin[c2_ind]),
+ L_add(Mpy_32_16_1(IZ0[-i], t_sin[s0_ind]),
+ L_add(Mpy_32_16_1(IZ1[-i], t_sin[s1_ind]),
+ Mpy_32_16_1(IZ2[i], t_sin[s2_ind]))))));
+ IY1[-i] = L_add(L_sub(Mpy_32_16_1(IZ0[-i], t_sin[c0_ind]),
+ Mpy_32_16_1(IZ1[-i], t_sin[c1_ind])),
+ L_add(Mpy_32_16_1(IZ2[i], t_sin[c2_ind]),
+ L_add(Mpy_32_16_1(RZ0[i], t_sin[s0_ind]),
+ L_sub(Mpy_32_16_1(RZ1[i], t_sin[s1_ind]),
+ Mpy_32_16_1(RZ2[-i], t_sin[s2_ind])))));
+ }
+
+ for ( ; i < m / 2; i++, c0_ind = sub(c0_ind, step), s0_ind = add(s0_ind, step), c1_ind = add(c1_ind, step), s1_ind = sub(s1_ind, step), c2_ind = add(c2_ind, step), s2_ind = sub(s2_ind, step) )
+ {
+ RY1[i] = L_sub(Mpy_32_16_1(RZ0[i], t_sin[c0_ind]),
+ L_add(L_sub(Mpy_32_16_1(RZ1[i], t_sin[c1_ind]),
+ Mpy_32_16_1(RZ2[-i], t_sin[c2_ind])),
+ L_add(Mpy_32_16_1(IZ0[-i], t_sin[s0_ind]),
+ L_add(Mpy_32_16_1(IZ1[-i], t_sin[s1_ind]),
+ Mpy_32_16_1(IZ2[i], t_sin[s2_ind])))));
+ IY1[-i] = L_sub(Mpy_32_16_1(IZ0[-i], t_sin[c0_ind]),
+ L_sub(L_add(Mpy_32_16_1(IZ1[-i], t_sin[c1_ind]),
+ Mpy_32_16_1(IZ2[i], t_sin[c2_ind])),
+ L_add(Mpy_32_16_1(RZ0[i], t_sin[s0_ind]),
+ L_sub(Mpy_32_16_1(RZ1[i], t_sin[s1_ind]),
+ Mpy_32_16_1(RZ2[-i], t_sin[s2_ind])))));
+ }
+
+ /* m/2 */
+ RY1[i] = L_sub(Mpy_32_16_1(RZ0[i], t_sin[c0_ind]),
+ L_add(L_sub(Mpy_32_16_1(RZ1[i], t_sin[c1_ind]),
+ Mpy_32_16_1(RZ2[-i], t_sin[c2_ind])),
+ L_add(Mpy_32_16_1(IZ0[-i], t_sin[s0_ind]),
+ L_add(Mpy_32_16_1(IZ1[-i], t_sin[s1_ind]),
+ Mpy_32_16_1(IZ2[i], t_sin[s2_ind])))));
+
+ /* Construction of Y2 */
+ c0_ind = T_SIN_PI_2;
+ s0_ind = 0;
+ c1_ind = T_SIN_PI_2 * 1 / 3;
+ s1_ind = T_SIN_PI_2 * 2 / 3;
+ c2_ind = T_SIN_PI_2 * 1 / 3;
+ s2_ind = T_SIN_PI_2 * 2 / 3;
+ step2 = 2 * step;
+ RY2[0] = L_sub(Mpy_32_16_1(RZ0[0], t_sin[c0_ind]),
+ L_sub(L_add(Mpy_32_16_1(RZ1[0], t_sin[c1_ind]),
+ Mpy_32_16_1(RZ2[0], t_sin[c2_ind])),
+ L_add(Mpy_32_16_1(IZ1[0], t_sin[s1_ind]),
+ Mpy_32_16_1(IZ2[0], t_sin[s2_ind]))));
+
+ c0_ind = sub(c0_ind, step2);
+ s0_ind = add(s0_ind, step2);
+ c1_ind = sub(c1_ind, step2);
+ s1_ind = add(s1_ind, step2);
+ c2_ind = add(c2_ind, step2);
+ s2_ind = sub(s2_ind, step2);
+ for ( i = 1; i < m / 8; i++, c0_ind = sub(c0_ind, step2), s0_ind = add(s0_ind, step2), c1_ind = sub(c1_ind, step2), s1_ind = add(s1_ind, step2), c2_ind = add(c2_ind, step2), s2_ind = sub(s2_ind, step2) )
+ {
+ RY2[i] = L_sub(Mpy_32_16_1(RZ0[i], t_sin[c0_ind]),
+ L_add(L_add(Mpy_32_16_1(RZ1[i], t_sin[c1_ind]),
+ Mpy_32_16_1(RZ2[-i], t_sin[c2_ind])),
+ L_sub(Mpy_32_16_1(IZ0[-i], t_sin[s0_ind]),
+ L_add(Mpy_32_16_1(IZ1[-i], t_sin[s1_ind]),
+ Mpy_32_16_1(IZ2[i], t_sin[s2_ind])))));
+ IY2[-i] = L_add(L_sub(Mpy_32_16_1(IZ0[-i], t_sin[c0_ind]),
+ Mpy_32_16_1(IZ1[-i], t_sin[c1_ind])),
+ L_add(Mpy_32_16_1(IZ2[i], t_sin[c2_ind]),
+ L_sub(Mpy_32_16_1(RZ0[i], t_sin[s0_ind]),
+ L_sub(Mpy_32_16_1(RZ1[i], t_sin[s1_ind]),
+ Mpy_32_16_1(RZ2[-i], t_sin[s2_ind])))));
+ }
+
+ for ( ; i < m / 4; i++, c0_ind = sub(c0_ind, step2), s0_ind = add(s0_ind, step2), c1_ind = add(c1_ind, step2), s1_ind = sub(s1_ind, step2), c2_ind = add(c2_ind, step2), s2_ind = sub(s2_ind, step2) )
+ {
+ RY2[i] = L_add(Mpy_32_16_1(RZ0[i], t_sin[c0_ind]),
+ L_sub(Mpy_32_16_1(RZ1[i], t_sin[c1_ind]),
+ L_sub(L_add(Mpy_32_16_1(RZ2[-i], t_sin[c2_ind]),
+ Mpy_32_16_1(IZ0[-i], t_sin[s0_ind])),
+ L_add(Mpy_32_16_1(IZ1[-i], t_sin[s1_ind]),
+ Mpy_32_16_1(IZ2[i], t_sin[s2_ind])))));
+ IY2[-i] = L_add(Mpy_32_16_1(IZ0[-i], t_sin[c0_ind]),
+ L_add(Mpy_32_16_1(IZ1[-i], t_sin[c1_ind]),
+ L_add(Mpy_32_16_1(IZ2[i], t_sin[c2_ind]),
+ L_sub(Mpy_32_16_1(RZ0[i], t_sin[s0_ind]),
+ L_sub(Mpy_32_16_1(RZ1[i], t_sin[s1_ind]),
+ Mpy_32_16_1(RZ2[-i], t_sin[s2_ind]))))));
+ }
+
+ for ( ; i < 3 * m / 8; i++, c0_ind = sub(c0_ind, step2), s0_ind = add(s0_ind, step2), c1_ind = add(c1_ind, step2), s1_ind = sub(s1_ind, step2), c2_ind = sub(c2_ind, step2), s2_ind = add(s2_ind, step2) )
+ {
+ RY2[i] = L_sub(L_add(Mpy_32_16_1(RZ0[i], t_sin[c0_ind]),
+ Mpy_32_16_1(RZ1[i], t_sin[c1_ind])),
+ L_sub(L_add(Mpy_32_16_1(RZ2[-i], t_sin[c2_ind]),
+ Mpy_32_16_1(IZ0[-i], t_sin[s0_ind])),
+ L_sub(Mpy_32_16_1(IZ1[-i], t_sin[s1_ind]),
+ Mpy_32_16_1(IZ2[i], t_sin[s2_ind]))));
+ IY2[-i] = L_sub(L_add(Mpy_32_16_1(IZ0[-i], t_sin[c0_ind]),
+ L_add(Mpy_32_16_1(IZ1[-i], t_sin[c1_ind]),
+ L_add(Mpy_32_16_1(IZ2[i], t_sin[c2_ind]),
+ Mpy_32_16_1(RZ0[i], t_sin[s0_ind])))),
+ L_add(Mpy_32_16_1(RZ1[i], t_sin[s1_ind]),
+ Mpy_32_16_1(RZ2[-i], t_sin[s2_ind])));
+ }
+
+ for ( ; i < m / 2; i++, c0_ind = add(c0_ind, step2), s0_ind = sub(s0_ind, step2), c1_ind = add(c1_ind, step2), s1_ind = sub(s1_ind, step2), c2_ind = sub(c2_ind, step2), s2_ind = add(s2_ind, step2) )
+ {
+ RY2[i] = L_sub(L_sub(Mpy_32_16_1(RZ1[i], t_sin[c1_ind]),
+ Mpy_32_16_1(RZ0[i], t_sin[c0_ind])),
+ L_sub(L_add(Mpy_32_16_1(RZ2[-i], t_sin[c2_ind]),
+ Mpy_32_16_1(IZ0[-i], t_sin[s0_ind])),
+ L_sub(Mpy_32_16_1(IZ1[-i], t_sin[s1_ind]),
+ Mpy_32_16_1(IZ2[i], t_sin[s2_ind]))));
+ IY2[-i] = L_add(L_sub(Mpy_32_16_1(IZ1[-i], t_sin[c1_ind]),
+ Mpy_32_16_1(IZ0[-i], t_sin[c0_ind])),
+ L_sub(L_add(Mpy_32_16_1(IZ2[i], t_sin[c2_ind]),
+ Mpy_32_16_1(RZ0[i], t_sin[s0_ind])),
+ L_add(Mpy_32_16_1(RZ1[i], t_sin[s1_ind]),
+ Mpy_32_16_1(RZ2[-i], t_sin[s2_ind]))));
+ }
+
+ /* m/2 */
+ RY2[i] = L_sub(L_sub(Mpy_32_16_1(RZ1[i], t_sin[c1_ind]),
+ Mpy_32_16_1(RZ0[i], t_sin[c0_ind])),
+ L_sub(L_add(Mpy_32_16_1(RZ2[-i], t_sin[c2_ind]),
+ Mpy_32_16_1(IZ0[-i], t_sin[s0_ind])),
+ L_sub(Mpy_32_16_1(IZ1[-i], t_sin[s1_ind]),
+ Mpy_32_16_1(IZ2[i], t_sin[s2_ind]))));
+
+ /* Compute the inverse FFT for all 3 blocks. */
+ ifft_rel_fx32( RY0, m, order );
+ ifft_rel_fx32( RY1, m, order );
+ ifft_rel_fx32( RY2, m, order );
+
+ y0 = RY0;
+ y1 = RY1;
+ y2 = RY2;
+
+ /* Interlacing and scaling, scale = 1/3 */
+ scale = (Word16)(0x2AAB);
+ FOR ( i = 0; i < n; )
+ {
+ X[i++] = Mpy_32_16_1(( *y0++ ), scale);
+ X[i++] = Mpy_32_16_1(( *y1++ ), scale);
+ X[i++] = Mpy_32_16_1(( *y2++ ), scale);
+ }
+
+ return;
+}
+
+
+static void rfft_post(
+ const Word16 *sine_table,
+ Word32 *buf,
+ const Word16 len )
+{
+ Word32 tmp1, tmp2, tmp3, tmp4;
+ Word16 s, c;
+ Word16 i = 0;
+
+ tmp1 = L_add(buf[0], buf[1]);
+ buf[1] = L_sub(buf[0], buf[1]);
+ buf[0] = tmp1;
+
+ FOR ( i = 1; i <= ( len + 2 ) / 4; i++ )
+ {
+ s = sine_table[i]; /* sin(pi*i/(len/2)) */
+ c = sine_table[i + len / 4]; /* cos(pi*i/(len/2)) */
+
+ tmp1 = L_sub(buf[2 * i], buf[len - 2 * i]);
+ tmp2 = L_add(buf[2 * i + 1], buf[len - 2 * i + 1]);
+ tmp3 = L_sub(Mpy_32_16_1(tmp1, s), Mpy_32_16_1(tmp2, c)); /* real part of j*W(k,N)*[T(k) - T'(N-k)] */
+ tmp4 = L_add(Mpy_32_16_1(tmp1, c), Mpy_32_16_1(tmp2, s)); /* imag part of j*W(k,N)*[T(k) - T'(N-k)] */
+ tmp1 = L_add(buf[2 * i], buf[len - 2 * i]);
+ tmp2 = L_sub(buf[2 * i + 1], buf[len - 2 * i + 1]);
+
+ buf[2 * i] = L_shr(L_sub(tmp1, tmp3), 1);
+ buf[2 * i + 1] = L_shr(L_sub(tmp2, tmp4), 1);
+ buf[len - 2 * i] = L_shr(L_add(tmp1, tmp3), 1);
+ buf[len - 2 * i + 1] = L_negate(L_shr(L_add(tmp2, tmp4), 1));
+ }
+}
+
+static void rfft_pre(
+ const Word16 *sine_table,
+ Word32 *buf,
+ const Word16 len )
+{
+ const Word16 scale = (Word16)(1.0f / len * 32768.0);
+ Word32 tmp1, tmp2, tmp3, tmp4;
+ Word16 s, c;
+ Word16 i = 0;
+
+ tmp1 = L_add(buf[0], buf[1]);
+ buf[1] = Mpy_32_16_1(L_sub(buf[0], buf[1]), scale);
+ buf[0] = Mpy_32_16_1(tmp1, scale);
+
+ FOR ( i = 1; i <= ( len + 2 ) / 4; i++ )
+ {
+ s = sine_table[i]; /* sin(pi*i/(len/2)) */
+ c = sine_table[i + len / 4]; /* cos(pi*i/(len/2)) */
+
+ tmp1 = L_sub(buf[2 * i], buf[len - 2 * i]);
+ tmp2 = L_add(buf[2 * i + 1], buf[len - 2 * i + 1]);
+ tmp3 = L_add(Mpy_32_16_1(tmp1, s), Mpy_32_16_1(tmp2, c)); /* real part of j*W(k,N)*[T(k) - T'(N-k)] */
+ tmp4 = L_sub(Mpy_32_16_1(tmp2, s), Mpy_32_16_1(tmp1, c)); /* imag part of j*W(k,N)*[T(k) - T'(N-k)] */
+ tmp1 = L_add(buf[2 * i], buf[len - 2 * i]);
+ tmp2 = L_sub(buf[2 * i + 1], buf[len - 2 * i + 1]);
+
+ buf[2 * i] = Mpy_32_16_1(L_add(tmp1, tmp3), scale);
+ buf[2 * i + 1] = L_negate(Mpy_32_16_1(L_add(tmp2, tmp4), scale));
+ buf[len - 2 * i] = Mpy_32_16_1(L_sub(tmp1, tmp3), scale);
+ buf[len - 2 * i + 1] = Mpy_32_16_1(L_sub(tmp2, tmp4), scale);
+ }
+
+ return;
+}
+
+Word16 RFFTN_fx(
+ Word32 *data,
+ const Word16 *sine_table,
+ const Word16 len,
+ const Word16 sign )
+{
+ assert( len <= 640 && len > 0 );
+
+ IF ( EQ_16(len, 640) )
+ {
+ Word32 x[320], y[320];
+ Word16 i;
+
+ IF ( NE_16(sign, -1) )
+ {
+ rfft_pre( sine_table, data, len );
+ }
+
+ FOR ( i = 0; i < 320; i++ )
+ {
+ x[i] = data[2 * i];
+ y[i] = data[2 * i + 1];
+ }
+ DoRTFT320_fx( x, y );
+ FOR ( i = 0; i < 320; i++ )
+ {
+ data[2 * i] = x[i];
+ data[2 * i + 1] = y[i];
+ }
+
+ IF ( EQ_16(sign, -1) )
+ {
+ rfft_post( sine_table, data, len );
+ }
+ }
+ ELSE
+ {
+ IF ( EQ_16(len, 512) )
+ {
+ Word16 i;
+ const Word16 log2 = 9;
+ Word32 reordered_data[512];
+
+ IF ( EQ_16(sign, -1) )
+ {
+ fft_rel_fx32( data, len, log2 );
+ reordered_data[0] = data[0];
+ reordered_data[1] = data[len / 2];
+ FOR ( i = 1; i < len / 2; i++ )
+ {
+ reordered_data[2 * i] = data[i];
+ reordered_data[2 * i + 1] = data[len - i];
+ }
+ }
+ ELSE
+ {
+ reordered_data[0] = data[0];
+ reordered_data[len / 2] = data[1];
+ FOR ( i = 1; i < len / 2; i++ )
+ {
+ reordered_data[i] = data[2 * i];
+ reordered_data[len - i] = data[2 * i + 1];
+ }
+ ifft_rel_fx32( reordered_data, len, log2 );
+ }
+ Copy32( reordered_data, data, len );
+ }
+ ELSE
+ {
+ assert( !"Not supported FFT length!" );
+ }
+ }
+
+ return 0;
+}
+
static void butterfly(
const Word32 a,
const Word32 b,
@@ -2458,30 +2997,16 @@ static const Word16 C53 = 0x678D; /* 0.809016994374947f cos( PI/5); */
static const Word16 C54 = 0x4B3D; /* 0.587785252292473f sin( PI/5); */
static void fft5(
- Word32 *pInOut )
+ cmplx *pInOut )
{
- Word32 re1, im1;
- Word32 re2, im2;
- Word32 re3, im3;
- Word32 re4, im4;
- Word32 re5, im5;
+ cmplx x[5];
+ cmplx t[4];
- Word32 tmp1, tmp2;
- Word32 tmp3, tmp4;
- Word32 tmp5, tmp6;
- Word32 tmp7, tmp8;
-
-
- re1 = pInOut[0];
- im1 = pInOut[1];
- re2 = pInOut[2];
- im2 = pInOut[3];
- re3 = pInOut[4];
- im3 = pInOut[5];
- re4 = pInOut[6];
- im4 = pInOut[7];
- re5 = pInOut[8];
- im5 = pInOut[9];
+ x[0] = pInOut[0];
+ x[1] = pInOut[1];
+ x[2] = pInOut[2];
+ x[3] = pInOut[3];
+ x[4] = pInOut[4];
/*
1.0000 1.0000 1.0000 1.0000 1.0000
@@ -2491,27 +3016,18 @@ static void fft5(
1.0000 -0.8090 + 0.5878i 0.3090 - 0.9511i 0.3090 + 0.9511i -0.8090 - 0.5878i
1.0000 0.3090 + 0.9511i -0.8090 + 0.5878i -0.8090 - 0.5878i 0.3090 - 0.9511i
*/
- tmp1 = L_add( re2, re5 );
- tmp2 = L_sub( re2, re5 );
- tmp3 = L_add( im2, im5 );
- tmp4 = L_sub( im2, im5 );
- tmp5 = L_add( re3, re4 );
- tmp6 = L_sub( re3, re4 );
- tmp7 = L_add( im3, im4 );
- tmp8 = L_sub( im3, im4 );
-
-
- pInOut[0] = L_add( re1, L_add( tmp1, tmp5 ) );
- pInOut[1] = L_add( im1, L_add( tmp3, tmp7 ) );
-
- pInOut[2] = re1 + Mpy_32_16_1( tmp1, C51 ) - Mpy_32_16_1( tmp5, C53 ) + Mpy_32_16_1( tmp4, C52 ) + Mpy_32_16_1( tmp8, C54 );
- pInOut[8] = re1 + Mpy_32_16_1( tmp1, C51 ) - Mpy_32_16_1( tmp5, C53 ) - Mpy_32_16_1( tmp4, C52 ) - Mpy_32_16_1( tmp8, C54 );
- pInOut[3] = im1 - Mpy_32_16_1( tmp2, C52 ) - Mpy_32_16_1( tmp6, C54 ) + Mpy_32_16_1( tmp3, C51 ) - Mpy_32_16_1( tmp7, C53 );
- pInOut[9] = im1 + Mpy_32_16_1( tmp2, C52 ) + Mpy_32_16_1( tmp6, C54 ) + Mpy_32_16_1( tmp3, C51 ) - Mpy_32_16_1( tmp7, C53 );
- pInOut[4] = re1 - Mpy_32_16_1( tmp1, C53 ) + Mpy_32_16_1( tmp5, C51 ) + Mpy_32_16_1( tmp4, C54 ) - Mpy_32_16_1( tmp8, C52 );
- pInOut[6] = re1 - Mpy_32_16_1( tmp1, C53 ) + Mpy_32_16_1( tmp5, C51 ) - Mpy_32_16_1( tmp4, C54 ) + Mpy_32_16_1( tmp8, C52 );
- pInOut[5] = im1 - Mpy_32_16_1( tmp2, C54 ) + Mpy_32_16_1( tmp6, C52 ) - Mpy_32_16_1( tmp3, C53 ) + Mpy_32_16_1( tmp7, C51 );
- pInOut[7] = im1 + Mpy_32_16_1( tmp2, C54 ) - Mpy_32_16_1( tmp6, C52 ) - Mpy_32_16_1( tmp3, C53 ) + Mpy_32_16_1( tmp7, C51 );
+ t[0] = CL_add( x[1], x[4] );
+ t[1] = CL_sub( x[1], x[4] );
+ t[2] = CL_add( x[2], x[3] );
+ t[3] = CL_sub( x[2], x[3] );
+
+
+ pInOut[0] = CL_add( x[0], CL_add( t[0], t[2] ) );
+
+ pInOut[1] = CL_add( CL_add( x[0], CL_sub( CL_scale( t[0], C51 ), CL_scale( t[2], C53 ) ) ), CL_add( CL_conjugate( CL_scale( CL_swap_real_imag( t[1] ), C52 ) ), CL_conjugate( CL_scale( CL_swap_real_imag( t[3] ), C54 ) ) ) );
+ pInOut[4] = CL_add( x[0], CL_sub( CL_scale( t[0], C51 ), CL_add( CL_scale( t[2], C53 ), CL_add( CL_conjugate( CL_scale( CL_swap_real_imag( t[1] ), C52 ) ), CL_conjugate( CL_scale( CL_swap_real_imag( t[3] ), C54 ) ) ) ) ) );
+ pInOut[2] = CL_add( CL_sub( x[0], CL_scale( t[0], C53 ) ), CL_add( CL_scale( t[2], C51 ), CL_sub( CL_conjugate( CL_scale( CL_swap_real_imag( t[1] ), C54 ) ), CL_conjugate( CL_scale( CL_swap_real_imag( t[3] ), C52 ) ) ) ) );
+ pInOut[3] = CL_add( CL_sub( x[0], CL_scale( t[0], C53 ) ), CL_add( CL_sub( CL_scale( t[2], C51 ), CL_conjugate( CL_scale( CL_swap_real_imag( t[1] ), C54 ) ) ), CL_conjugate( CL_scale( CL_swap_real_imag( t[3] ), C52 ) ) ) );
return;
}
@@ -2582,6 +3098,7 @@ static void nextFFT(
Word32 *x,
const Word16 length )
{
+ cmplx val[5];
SWITCH ( length )
{
case 2:
@@ -2594,7 +3111,17 @@ static void nextFFT(
fft4( x );
BREAK;
case 5:
- fft5( x );
+ FOR ( Word32 i = 0; i < 5; i++ )
+ {
+ val[i].re = x[2 * i];
+ val[i].im = x[2 * i + 1];
+ }
+ fft5( val );
+ FOR ( Word32 i = 0; i < 5; i++ )
+ {
+ x[2 * i] = val[i].re;
+ x[2 * i + 1] = val[i].im;
+ }
BREAK;
case 8:
fft8_2( x );
@@ -2670,6 +3197,7 @@ static void cooleyTukeyFFT(
Word16 n1, n2;
Word16 cnt = 0;
Word32 *src, *dest;
+ cmplx val[5];
SWITCH ( length )
{
@@ -2685,7 +3213,17 @@ static void cooleyTukeyFFT(
fft4( x );
BREAK;
case 5:
- fft5( x );
+ FOR ( i = 0; i < 5; i++ )
+ {
+ val[i].re = x[2 * i];
+ val[i].im = x[2 * i + 1];
+ }
+ fft5( val );
+ FOR ( i = 0; i < 5; i++ )
+ {
+ x[2 * i] = val[i].re;
+ x[2 * i + 1] = val[i].im;
+ }
BREAK;
case 8:
fft8_2( x );
@@ -2693,7 +3231,7 @@ static void cooleyTukeyFFT(
default:
{
factor = findFactor( length );
- IF ( factor > 0 && ( length / factor > 1 ) )
+ IF ( GT_16(factor, 0) && GT_16( length / factor, 1 ) )
{
n1 = factor;
n2 = length / factor;
@@ -2773,7 +3311,7 @@ static void pfaDFT(
Word16 i, ii;
Word16 cnt;
- IF ( numFactors > 1 )
+ IF ( GT_16(numFactors, 1) )
{
Word32 *tmp = scratch1;
Word16 n1_inv = 1, n2_inv = 1;
@@ -2800,7 +3338,7 @@ static void pfaDFT(
tmp[cnt++] = x[2 * idx + 1];
idx += incr;
- IF ( idx > length )
+ IF ( GT_16(idx, length) )
{
idx -= length;
}
@@ -2835,7 +3373,7 @@ static void pfaDFT(
tmp[2 * idx] = x[cnt++];
tmp[2 * idx + 1] = x[cnt++];
idx += n2;
- IF ( idx > length )
+ IF ( GT_16(idx, length) )
{
idx -= length;
}
@@ -3030,2327 +3568,1228 @@ void DoFFT_fx(
*-----------------------------------------------------------------*/
static void fft_len5(
- Word32 *re,
- Word32 *im,
- const Word16 s )
+ cmplx *x )
{
- Word32 x0, x1, x2, x3, x4;
- Word32 r1, r2, r3, r4;
- Word32 s1, s2, s3, s4;
- Word32 t;
-
- x0 = re[s * 0];
- x1 = re[s * 1];
- x2 = re[s * 2];
- x3 = re[s * 3];
- x4 = re[s * 4];
-
- r1 = L_add( x1, x4 );
- r4 = L_sub( x1, x4 );
- r3 = L_add( x2, x3 );
- r2 = L_sub( x2, x3 );
- t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 );
- r1 = L_add( r1, r3 );
- re[0] = L_add( x0, r1 );
-
- r1 = L_add( re[0], L_shl(Mpy_32_16_1( r1, FFT_C55 ), 1) );
- r3 = L_sub( r1, t );
- r1 = L_add( r1, t );
- t = Mpy_32_16_1(L_add( r4, r2 ), FFT_C51 );
-
- r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) );
- r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) );
-
- x0 = im[s * 0];
- x1 = im[s * 1];
- x2 = im[s * 2];
- x3 = im[s * 3];
- x4 = im[s * 4];
-
- s1 = L_add( x1, x4 );
- s4 = L_sub( x1, x4 );
- s3 = L_add( x2, x3 );
- s2 = L_sub( x2, x3 );
- t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 );
- s1 = L_add( s1, s3 );
- im[0] = L_add( x0, s1 );
-
- s1 = L_add( im[0], L_shl(Mpy_32_16_1( s1, FFT_C55 ), 1) );
- s3 = L_sub( s1, t );
- s1 = L_add( s1, t );
- t = Mpy_32_16_1(L_add( s4, s2 ), FFT_C51 );
-
- s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) );
- s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) );
-
- re[s * 1] = L_add( r1, s2 );
- re[s * 4] = L_sub( r1, s2 );
- re[s * 2] = L_sub( r3, s4 );
- re[s * 3] = L_add( r3, s4 );
-
- im[s * 1] = L_sub( s1, r2 );
- im[s * 4] = L_add( s1, r2 );
- im[s * 2] = L_add( s3, r4 );
- im[s * 3] = L_sub( s3, r4 );
+ cmplx y1, y2, y3, y4;
+ cmplx t;
+
+ y1 = CL_add( x[1], x[4] );
+ y4 = CL_sub( x[1], x[4] );
+ y3 = CL_add( x[2], x[3] );
+ y2 = CL_sub( x[2], x[3] );
+ t = CL_scale( CL_sub( y1, y3 ), FFT_C54 );
+ y1 = CL_add( y1, y3 );
+ x[0] = CL_add( x[0], y1 );
+
+ y1 = CL_add( x[0], CL_shl( CL_scale( y1, FFT_C55 ), 1 ) );
+ y3 = CL_sub( y1, t );
+ y1 = CL_add( y1, t );
+ t = CL_scale( CL_add( y4, y2 ), FFT_C51 );
+
+ y4 = CL_add( t, CL_shl( CL_scale( y4, FFT_C52 ), 1 ) );
+ y2 = CL_add( t, CL_scale( y2, FFT_C53 ) );
+
+ x[1] = CL_msu_j( y1, y2 );
+ x[4] = CL_mac_j( y1, y2 );
+ x[2] = CL_mac_j( y3, y4 );
+ x[3] = CL_msu_j( y3, y4 );
return;
}
static void fft_len8(
- Word32 *re,
- Word32 *im,
- const Word16 s )
+ cmplx *x )
{
- Word32 x00, x01, x02, x03, x04, x05, x06, x07;
- Word32 x08, x09, x10, x11, x12, x13, x14, x15;
- Word32 t00, t01, t02, t03, t04, t05, t06, t07;
- Word32 t08, t09, t10, t11, t12, t13, t14, t15;
- Word32 s00, s01, s02, s03, s04, s05, s06, s07;
- Word32 s08, s09, s10, s11, s12, s13, s14, s15;
-
- x00 = re[s * 0];
- x01 = im[s * 0];
- x02 = re[s * 1];
- x03 = im[s * 1];
- x04 = re[s * 2];
- x05 = im[s * 2];
- x06 = re[s * 3];
- x07 = im[s * 3];
- x08 = re[s * 4];
- x09 = im[s * 4];
- x10 = re[s * 5];
- x11 = im[s * 5];
- x12 = re[s * 6];
- x13 = im[s * 6];
- x14 = re[s * 7];
- x15 = im[s * 7];
-
- t00 = L_add( x00, x08 );
- t02 = L_sub( x00, x08 );
- t01 = L_add( x01, x09 );
- t03 = L_sub( x01, x09 );
- t04 = L_add( x02, x10 );
- t06 = L_sub( x02, x10 );
- t05 = L_add( x03, x11 );
- t07 = L_sub( x03, x11 );
- t08 = L_add( x04, x12 );
- t10 = L_sub( x04, x12 );
- t09 = L_add( x05, x13 );
- t11 = L_sub( x05, x13 );
- t12 = L_add( x06, x14 );
- t14 = L_sub( x06, x14 );
- t13 = L_add( x07, x15 );
- t15 = L_sub( x07, x15 );
-
- s00 = L_add( t00, t08 );
- s04 = L_sub( t00, t08 );
- s01 = L_add( t01, t09 );
- s05 = L_sub( t01, t09 );
- s08 = L_sub( t02, t11 );
- s10 = L_add( t02, t11 );
- s09 = L_add( t03, t10 );
- s11 = L_sub( t03, t10 );
- s02 = L_add( t04, t12 );
- s07 = L_sub( t04, t12 );
- s03 = L_add( t05, t13 );
- s06 = L_sub( t13, t05 );
-
- t01 = L_add( t06, t14 );
- t02 = L_sub( t06, t14 );
- t00 = L_add( t07, t15 );
- t03 = L_sub( t07, t15 );
-
- s12 = Mpy_32_16_1( L_add( t00, t02 ), FFT_C81 );
- s14 = Mpy_32_16_1( L_sub( t00, t02 ), FFT_C81 );
- s13 = Mpy_32_16_1( L_sub( t03, t01 ), FFT_C81 );
- s15 = Mpy_32_16_1( L_add( t01, t03 ), FFT_C82 );
-
- re[s * 0] = L_add( s00, s02 );
- re[s * 4] = L_sub( s00, s02 );
- im[s * 0] = L_add( s01, s03 );
- im[s * 4] = L_sub( s01, s03 );
- re[s * 2] = L_sub( s04, s06 );
- re[s * 6] = L_add( s04, s06 );
- im[s * 2] = L_sub( s05, s07 );
- im[s * 6] = L_add( s05, s07 );
- re[s * 3] = L_add( s08, s14 );
- re[s * 7] = L_sub( s08, s14 );
- im[s * 3] = L_add( s09, s15 );
- im[s * 7] = L_sub( s09, s15 );
- re[s * 1] = L_add( s10, s12 );
- re[s * 5] = L_sub( s10, s12 );
- im[s * 1] = L_add( s11, s13 );
- im[s * 5] = L_sub( s11, s13 );
+ cmplx t[8], s[8];
+
+ t[0] = CL_add( x[0], x[4] );
+ t[1] = CL_sub( x[0], x[4] );
+ t[2] = CL_add( x[1], x[5] );
+ t[3] = CL_sub( x[1], x[5] );
+ t[4] = CL_add( x[2], x[6] );
+ t[5] = CL_sub( x[2], x[6] );
+ t[6] = CL_add( x[3], x[7] );
+ t[7] = CL_sub( x[3], x[7] );
+
+ s[0] = CL_add( t[0], t[4] );
+ s[2] = CL_sub( t[0], t[4] );
+ s[4] = CL_mac_j( t[1], t[5] );
+ s[5] = CL_msu_j( t[1], t[5] );
+ s[1] = CL_add( t[2], t[6] );
+ s[3] = CL_swap_real_imag( CL_sub( CL_conjugate( t[2] ), CL_conjugate( t[6] ) ) );
+
+ t[0] = CL_swap_real_imag( CL_add( t[3], t[7] ) );
+ t[1] = CL_sub( t[3], t[7] );
+
+ s[6] = CL_scale( CL_add( CL_conjugate( t[0] ), t[1] ), FFT_C81 );
+ s[7] = CL_scale( CL_sub( t[0], CL_conjugate( t[1] ) ), FFT_C81 );
+ s[7] = CL_conjugate( s[7] );
+
+ x[0] = CL_add( s[0], s[1] );
+ x[4] = CL_sub( s[0], s[1] );
+ x[2] = CL_sub( s[2], s[3] );
+ x[6] = CL_add( s[2], s[3] );
+ x[3] = CL_add( s[4], s[7] );
+ x[7] = CL_sub( s[4], s[7] );
+ x[1] = CL_add( s[5], s[6] );
+ x[5] = CL_sub( s[5], s[6] );
return;
}
static void fft_len10(
- Word32 *re,
- Word32 *im,
- const Word16 s )
+ cmplx *x )
{
- Word32 t;
- Word32 x0, x1, x2, x3, x4;
- Word32 r1, r2, r3, r4;
- Word32 s1, s2, s3, s4;
- Word32 y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
- Word32 y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
-
- x0 = re[s * 0];
- x1 = re[s * 2];
- x2 = re[s * 4];
- x3 = re[s * 6];
- x4 = re[s * 8];
-
- r1 = L_add( x3, x2 );
- r4 = L_sub( x3, x2 );
- r3 = L_add( x1, x4 );
- r2 = L_sub( x1, x4 );
- t = Mpy_32_16_1(L_sub( r1, r3 ), FFT_C54 );
- r1 = L_add( r1, r3 );
- y00 = L_add( x0, r1 );
- r1 = L_add( y00, L_shl(Mpy_32_16_1(r1, FFT_C55), 1));
- r3 = L_sub( r1, t );
- r1 = L_add( r1, t );
- t = Mpy_32_16_1( L_add( r4, r2 ) , FFT_C51 );
- r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) );
- r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) );
-
- x0 = im[s * 0];
- x1 = im[s * 2];
- x2 = im[s * 4];
- x3 = im[s * 6];
- x4 = im[s * 8];
-
- s1 = L_add( x3, x2 );
- s4 = L_sub( x3, x2 );
- s3 = L_add( x1, x4 );
- s2 = L_sub( x1, x4 );
- t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 );
- s1 = L_add( s1, s3 );
- y01 = L_add( x0, s1 );
- s1 = L_add( y01, L_shl(Mpy_32_16_1( s1, FFT_C55 ), 1) );
- s3 = L_sub( s1, t );
- s1 = L_add( s1, t );
- t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 );
- s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) );
- s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) );
-
- y04 = L_add( r1, s2 );
- y16 = L_sub( r1, s2 );
- y08 = L_sub( r3, s4 );
- y12 = L_add( r3, s4 );
-
- y05 = L_sub( s1, r2 );
- y17 = L_add( s1, r2 );
- y09 = L_add( s3, r4 );
- y13 = L_sub( s3, r4 );
-
- x0 = re[s * 5];
- x1 = re[s * 1];
- x2 = re[s * 3];
- x3 = re[s * 7];
- x4 = re[s * 9];
-
- r1 = L_add( x1, x4 );
- r4 = L_sub( x1, x4 );
- r3 = L_add( x3, x2 );
- r2 = L_sub( x3, x2 );
- t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 );
- r1 = L_add( r1, r3 );
- y02 = L_add( x0, r1 );
- r1 = L_add( y02, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) );
- r3 = L_sub( r1, t );
- r1 = L_add( r1, t );
- t = Mpy_32_16_1(L_add( r4, r2 ), FFT_C51 );
- r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) );
- r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) );
-
- x0 = im[s * 5];
- x1 = im[s * 1];
- x2 = im[s * 3];
- x3 = im[s * 7];
- x4 = im[s * 9];
-
- s1 = L_add( x1, x4 );
- s4 = L_sub( x1, x4 );
- s3 = L_add( x3, x2 );
- s2 = L_sub( x3, x2 );
- t = Mpy_32_16_1(L_sub( s1, s3 ), FFT_C54 );
- s1 = L_add( s1, s3 );
- y03 = L_add( x0, s1 );
- s1 = L_add( y03, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) );
- s3 = L_sub( s1, t );
- s1 = L_add( s1, t );
- t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 );
- s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) );
- s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) );
-
- y06 = L_add( r1, s2 );
- y18 = L_sub( r1, s2 );
- y10 = L_sub( r3, s4 );
- y14 = L_add( r3, s4 );
-
- y07 = L_sub( s1, r2 );
- y19 = L_add( s1, r2 );
- y11 = L_add( s3, r4 );
- y15 = L_sub( s3, r4 );
-
- re[s * 0] = L_add( y00, y02 );
- im[s * 0] = L_add( y01, y03 );
- re[s * 5] = L_sub( y00, y02 );
- im[s * 5] = L_sub( y01, y03 );
-
- re[s * 2] = L_add( y04, y06 );
- im[s * 2] = L_add( y05, y07 );
- re[s * 7] = L_sub( y04, y06 );
- im[s * 7] = L_sub( y05, y07 );
-
- re[s * 4] = L_add( y08, y10 );
- im[s * 4] = L_add( y09, y11 );
- re[s * 9] = L_sub( y08, y10 );
- im[s * 9] = L_sub( y09, y11 );
-
- re[s * 6] = L_add( y12, y14 );
- im[s * 6] = L_add( y13, y15 );
- re[s * 1] = L_sub( y12, y14 );
- im[s * 1] = L_sub( y13, y15 );
-
- re[s * 8] = L_add( y16, y18 );
- im[s * 8] = L_add( y17, y19 );
- re[s * 3] = L_sub( y16, y18 );
- im[s * 3] = L_sub( y17, y19 );
+ cmplx t;
+ cmplx s[4];
+ cmplx y[10];
+
+ s[0] = CL_add( x[6], x[4] );
+ s[3] = CL_sub( x[6], x[4] );
+ s[2] = CL_add( x[2], x[8] );
+ s[1] = CL_sub( x[2], x[8] );
+ t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 );
+ s[0] = CL_add( s[0], s[2] );
+ y[0] = CL_add( x[0], s[0] );
+ s[0] = CL_add( y[0], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) );
+ s[2] = CL_sub( s[0], t );
+ s[0] = CL_add( s[0], t );
+ t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 );
+ s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) );
+ s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) );
+
+ y[2] = CL_msu_j( s[0], s[1] );
+ y[8] = CL_mac_j( s[0], s[1] );
+ y[4] = CL_mac_j( s[2], s[3] );
+ y[6] = CL_msu_j( s[2], s[3] );
+
+ s[0] = CL_add( x[1], x[9] );
+ s[3] = CL_sub( x[1], x[9] );
+ s[2] = CL_add( x[7], x[3] );
+ s[1] = CL_sub( x[7], x[3] );
+ t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 );
+ s[0] = CL_add( s[0], s[2] );
+ y[1] = CL_add( x[5], s[0] );
+ s[0] = CL_add( y[1], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) );
+ s[2] = CL_sub( s[0], t );
+ s[0] = CL_add( s[0], t );
+ t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 );
+ s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) );
+ s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) );
+
+ y[3] = CL_msu_j( s[0], s[1] );
+ y[9] = CL_mac_j( s[0], s[1] );
+ y[5] = CL_mac_j( s[2], s[3] );
+ y[7] = CL_msu_j( s[2], s[3] );
+
+
+ x[0] = CL_add( y[0], y[1] );
+ x[5] = CL_sub( y[0], y[1] );
+
+ x[2] = CL_add( y[2], y[3] );
+ x[7] = CL_sub( y[2], y[3] );
+
+ x[4] = CL_add( y[4], y[5] );
+ x[9] = CL_sub( y[4], y[5] );
+
+ x[6] = CL_add( y[6], y[7] );
+ x[1] = CL_sub( y[6], y[7] );
+
+ x[8] = CL_add( y[8], y[9] );
+ x[3] = CL_sub( y[8], y[9] );
return;
}
static void fft_len15(
- Word32 *re,
- Word32 *im,
- const Word16 s )
+ cmplx *x )
{
- Word32 t;
- Word32 r1, r2, r3, r4;
- Word32 s1, s2, s3, s4;
- Word32 x00, x01, x02, x03, x04, x05, x06, x07, x08, x09;
- Word32 x10, x11, x12, x13, x14, x15, x16, x17, x18, x19;
- Word32 x20, x21, x22, x23, x24, x25, x26, x27, x28, x29;
- Word32 y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
- Word32 y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
- Word32 y20, y21, y22, y23, y24, y25, y26, y27, y28, y29;
-
- x00 = re[s * 0];
- x01 = im[s * 0];
- x02 = re[s * 3];
- x03 = im[s * 3];
- x04 = re[s * 6];
- x05 = im[s * 6];
- x06 = re[s * 9];
- x07 = im[s * 9];
- x08 = re[s * 12];
- x09 = im[s * 12];
-
- x10 = re[s * 5];
- x11 = im[s * 5];
- x12 = re[s * 8];
- x13 = im[s * 8];
- x14 = re[s * 11];
- x15 = im[s * 11];
- x16 = re[s * 14];
- x17 = im[s * 14];
- x18 = re[s * 2];
- x19 = im[s * 2];
-
- x20 = re[s * 10];
- x21 = im[s * 10];
- x22 = re[s * 13];
- x23 = im[s * 13];
- x24 = re[s * 1];
- x25 = im[s * 1];
- x26 = re[s * 4];
- x27 = im[s * 4];
- x28 = re[s * 7];
- x29 = im[s * 7];
-
- r1 = L_add( x02, x08 );
- r4 = L_sub( x02, x08 );
- r3 = L_add( x04, x06 );
- r2 = L_sub( x04, x06 );
- t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 );
- r1 = L_add( r1, r3 );
- y00 = L_add( x00, r1 );
- r1 = L_add( y00, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) );
- r3 = L_sub( r1, t );
- r1 = L_add( r1, t );
- t = Mpy_32_16_1( L_add( r4, r2 ), FFT_C51 );
- r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) );
- r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) );
-
- s1 = L_add( x03, x09 );
- s4 = L_sub( x03, x09 );
- s3 = L_add( x05, x07 );
- s2 = L_sub( x05, x07 );
- t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 );
- s1 = L_add( s1, s3 );
- y01 = L_add( x01, s1 );
- s1 = L_add( y01, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) );
- s3 = L_sub( s1, t );
- s1 = L_add( s1, t );
- t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 );
- s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) );
- s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) );
-
- y02 = L_add( r1, s2 );
- y08 = L_sub( r1, s2 );
- y04 = L_sub( r3, s4 );
- y06 = L_add( r3, s4 );
-
- y03 = L_sub( s1, r2 );
- y09 = L_add( s1, r2 );
- y05 = L_add( s3, r4 );
- y07 = L_sub( s3, r4 );
-
- r1 = L_add( x12, x18 );
- r4 = L_sub( x12, x18 );
- r3 = L_add( x14, x16 );
- r2 = L_sub( x14, x16 );
- t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 );
- r1 = L_add( r1, r3 );
- y10 = L_add( x10, r1 );
- r1 = L_add( y10, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) );
- r3 = L_sub( r1, t );
- r1 = L_add( r1, t );
- t = Mpy_32_16_1( L_add( r4, r2 ), FFT_C51 );
- r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) );
- r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) );
-
- s1 = L_add( x13, x19 );
- s4 = L_sub( x13, x19 );
- s3 = L_add( x15, x17 );
- s2 = L_sub( x15, x17 );
- t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 );
- s1 = L_add( s1, s3 );
- y11 = L_add( x11, s1 );
- s1 = L_add( y11, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) );
- s3 = L_sub( s1, t );
- s1 = L_add( s1, t );
- t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 );
- s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) );
- s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) );
-
- y12 = L_add( r1, s2 );
- y18 = L_sub( r1, s2 );
- y14 = L_sub( r3, s4 );
- y16 = L_add( r3, s4 );
-
- y13 = L_sub( s1, r2 );
- y19 = L_add( s1, r2 );
- y15 = L_add( s3, r4 );
- y17 = L_sub( s3, r4 );
-
- r1 = L_add( x22, x28 );
- r4 = L_sub( x22, x28 );
- r3 = L_add( x24, x26 );
- r2 = L_sub( x24, x26 );
- t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 );
- r1 = L_add( r1, r3 );
- y20 = L_add( x20, r1 );
- r1 = L_add( y20, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) );
- r3 = L_sub( r1, t );
- r1 = L_add( r1, t );
- t = Mpy_32_16_1( L_add( r4, r2 ), FFT_C51 );
- r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) );
- r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) );
-
- s1 = L_add( x23, x29 );
- s4 = L_sub( x23, x29 );
- s3 = L_add( x25, x27 );
- s2 = L_sub( x25, x27 );
- t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 );
- s1 = L_add( s1, s3 );
- y21 = L_add( x21, s1 );
- s1 = L_add( y21, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) );
- s3 = L_sub( s1, t );
- s1 = L_add( s1, t );
- t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 );
- s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) );
- s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) );
-
- y22 = L_add( r1, s2 );
- y28 = L_sub( r1, s2 );
- y24 = L_sub( r3, s4 );
- y26 = L_add( r3, s4 );
-
- y23 = L_sub( s1, r2 );
- y29 = L_add( s1, r2 );
- y25 = L_add( s3, r4 );
- y27 = L_sub( s3, r4 );
-
- r1 = L_add( y10, y20 );
- r2 = Mpy_32_16_1(L_sub( y10, y20 ), FFT_C31 );
- re[s * 0] = L_add( y00, r1 );
- r1 = L_sub( y00, L_shr(r1, 1));
-
- s1 = L_add( y11, y21 );
- s2 = Mpy_32_16_1( L_sub( y11, y21 ), FFT_C31 );
- im[s * 0] = L_add( y01, s1 );
- s1 = L_sub( y01, L_shr(s1, 1) );
-
- re[s * 10] = L_sub( r1, s2 );
- re[s * 5] = L_add( r1, s2 );
- im[s * 10] = L_add( s1, r2 );
- im[s * 5] = L_sub( s1, r2 );
-
- r1 = L_add( y12, y22 );
- r2 = Mpy_32_16_1(L_sub( y12, y22 ), FFT_C31 );
- re[s * 6] = L_add( y02, r1 );
- r1 = L_sub( y02, L_shr(r1, 1) );
-
- s1 = L_add( y13, y23 );
- s2 = Mpy_32_16_1( L_sub( y13, y23 ), FFT_C31 );
- im[s * 6] = L_add( y03, s1 );
- s1 = L_sub( y03, L_shr(s1, 1) );
-
- re[s * 1] = L_sub( r1, s2 );
- re[s * 11] = L_add( r1, s2 );
- im[s * 1] = L_add( s1, r2 );
- im[s * 11] = L_sub( s1, r2 );
-
- r1 = L_add( y14, y24 );
- r2 = Mpy_32_16_1(L_sub( y14, y24 ), FFT_C31 );
- re[s * 12] = L_add( y04, r1 );
- r1 = L_sub( y04, L_shr(r1, 1) );
-
- s1 = L_add( y15, y25 );
- s2 = Mpy_32_16_1( L_sub( y15, y25 ), FFT_C31 );
- im[s * 12] = L_add( y05, s1 );
- s1 = L_sub( y05, L_shr(s1, 1) );
-
- re[s * 7] = L_sub( r1, s2 );
- re[s * 2] = L_add( r1, s2 );
- im[s * 7] = L_add( s1, r2 );
- im[s * 2] = L_sub( s1, r2 );
-
- r1 = L_add( y16, y26 );
- r2 = Mpy_32_16_1( L_sub( y16, y26 ), FFT_C31 );
- re[s * 3] = L_add( y06, r1 );
- r1 = L_sub( y06, L_shr(r1, 1) );
-
- s1 = L_add( y17, y27 );
- s2 = Mpy_32_16_1(L_sub( y17, y27 ), FFT_C31 );
- im[s * 3] = L_add( y07, s1 );
- s1 = L_sub( y07, L_shr(s1, 1) );
-
- re[s * 13] = L_sub( r1, s2 );
- re[s * 8] = L_add( r1, s2 );
- im[s * 13] = L_add( s1, r2 );
- im[s * 8] = L_sub( s1, r2 );
-
- r1 = L_add( y18, y28 );
- r2 = Mpy_32_16_1( L_sub( y18, y28 ), FFT_C31 );
- re[s * 9] = L_add( y08, r1 );
- r1 = L_sub( y08, L_shr(r1, 1) );
-
- s1 = L_add( y19, y29 );
- s2 = Mpy_32_16_1( L_sub( y19, y29 ), FFT_C31 );
- im[s * 9] = L_add( y09, s1 );
- s1 = L_sub( y09, L_shr(s1, 1));
-
- re[s * 4] = L_sub( r1, s2 );
- re[s * 14] = L_add( r1, s2 );
- im[s * 4] = L_add( s1, r2 );
- im[s * 14] = L_sub( s1, r2 );
+ cmplx t;
+ cmplx s[5];
+ cmplx y[15];
+
+ s[0] = CL_add( x[3], x[12] );
+ s[3] = CL_sub( x[3], x[12] );
+ s[2] = CL_add( x[6], x[9] );
+ s[1] = CL_sub( x[6], x[9] );
+ t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 );
+ s[0] = CL_add( s[0], s[2] );
+ y[0] = CL_add( x[0], s[0] );
+ s[0] = CL_add( y[0], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) );
+ s[2] = CL_sub( s[0], t );
+ s[0] = CL_add( s[0], t );
+ t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 );
+ s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) );
+ s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) );
+
+ y[1] = CL_msu_j( s[0], s[1] );
+ y[4] = CL_mac_j( s[0], s[1] );
+ y[2] = CL_mac_j( s[2], s[3] );
+ y[3] = CL_msu_j( s[2], s[3] );
+
+ s[0] = CL_add( x[8], x[2] );
+ s[3] = CL_sub( x[8], x[2] );
+ s[2] = CL_add( x[11], x[14] );
+ s[1] = CL_sub( x[11], x[14] );
+ t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 );
+ s[0] = CL_add( s[0], s[2] );
+ y[5] = CL_add( x[5], s[0] );
+ s[0] = CL_add( y[5], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) );
+ s[2] = CL_sub( s[0], t );
+ s[0] = CL_add( s[0], t );
+ t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 );
+ s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) );
+ s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) );
+
+ y[6] = CL_msu_j( s[0], s[1] );
+ y[9] = CL_mac_j( s[0], s[1] );
+ y[7] = CL_mac_j( s[2], s[3] );
+ y[8] = CL_msu_j( s[2], s[3] );
+
+ s[0] = CL_add( x[13], x[7] );
+ s[3] = CL_sub( x[13], x[7] );
+ s[2] = CL_add( x[1], x[4] );
+ s[1] = CL_sub( x[1], x[4] );
+ t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 );
+ s[0] = CL_add( s[0], s[2] );
+ y[10] = CL_add( x[10], s[0] );
+ s[0] = CL_add( y[10], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) );
+ s[2] = CL_sub( s[0], t );
+ s[0] = CL_add( s[0], t );
+ t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 );
+ s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) );
+ s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) );
+
+ y[11] = CL_msu_j( s[0], s[1] );
+ y[14] = CL_mac_j( s[0], s[1] );
+ y[12] = CL_mac_j( s[2], s[3] );
+ y[13] = CL_msu_j( s[2], s[3] );
+
+ s[0] = CL_add( y[5], y[10] );
+ s[1] = CL_scale( CL_sub( y[5], y[10] ), FFT_C31 );
+ x[0] = CL_add( y[0], s[0] );
+ s[0] = CL_sub( y[0], CL_shr( s[0], 1 ) );
+
+ x[10] = CL_mac_j( s[0], s[1] );
+ x[5] = CL_msu_j( s[0], s[1] );
+
+ s[0] = CL_add( y[6], y[11] );
+ s[1] = CL_scale( CL_sub( y[6], y[11] ), FFT_C31 );
+ x[6] = CL_add( y[1], s[0] );
+ s[0] = CL_sub( y[1], CL_shr( s[0], 1 ) );
+
+ x[1] = CL_mac_j( s[0], s[1] );
+ x[11] = CL_msu_j( s[0], s[1] );
+
+ s[0] = CL_add( y[7], y[12] );
+ s[1] = CL_scale( CL_sub( y[7], y[12] ), FFT_C31 );
+ x[12] = CL_add( y[2], s[0] );
+ s[0] = CL_sub( y[2], CL_shr( s[0], 1 ) );
+
+ x[7] = CL_mac_j( s[0], s[1] );
+ x[2] = CL_msu_j( s[0], s[1] );
+
+ s[0] = CL_add( y[8], y[13] );
+ s[1] = CL_scale( CL_sub( y[8], y[13] ), FFT_C31 );
+ x[3] = CL_add( y[3], s[0] );
+ s[0] = CL_sub( y[3], CL_shr( s[0], 1 ) );
+
+ x[13] = CL_mac_j( s[0], s[1] );
+ x[8] = CL_msu_j( s[0], s[1] );
+
+ s[0] = CL_add( y[9], y[14] );
+ s[1] = CL_scale( CL_sub( y[9], y[14] ), FFT_C31 );
+ x[9] = CL_add( y[4], s[0] );
+ s[0] = CL_sub( y[4], CL_shr( s[0], 1 ) );
+
+ x[4] = CL_mac_j( s[0], s[1] );
+ x[14] = CL_msu_j( s[0], s[1] );
return;
}
static void fft_len16(
- Word32 *re,
- Word32 *im,
- const Word16 s )
+ cmplx *x )
{
- Word32 x0, x1, x2, x3, x4, x5, x6, x7;
- Word32 t0, t1, t2, t3, t4, t5, t6, t7;
- Word32 y00, y01, y02, y03, y04, y05, y06, y07;
- Word32 y08, y09, y10, y11, y12, y13, y14, y15;
- Word32 y16, y17, y18, y19, y20, y21, y22, y23;
- Word32 y24, y25, y26, y27, y28, y29, y30, y31;
-
- x0 = L_shr( re[s * 0], SCALEFACTOR16 );
- x1 = L_shr( im[s * 0], SCALEFACTOR16 );
- x2 = L_shr( re[s * 4], SCALEFACTOR16 );
- x3 = L_shr( im[s * 4], SCALEFACTOR16 );
- x4 = L_shr( re[s * 8], SCALEFACTOR16 );
- x5 = L_shr( im[s * 8], SCALEFACTOR16 );
- x6 = L_shr( re[s * 12], SCALEFACTOR16 );
- x7 = L_shr( im[s * 12], SCALEFACTOR16 );
-
- t0 = L_add( x0, x4 );
- t2 = L_sub( x0, x4 );
- t1 = L_add( x1, x5 );
- t3 = L_sub( x1, x5 );
- t4 = L_add( x2, x6 );
- t7 = L_sub( x2, x6 );
- t5 = L_add( x7, x3 );
- t6 = L_sub( x7, x3 );
-
- y00 = L_add( t0, t4 );
- y01 = L_add( t1, t5 );
- y02 = L_sub( t2, t6 );
- y03 = L_sub( t3, t7 );
- y04 = L_sub( t0, t4 );
- y05 = L_sub( t1, t5 );
- y06 = L_add( t2, t6 );
- y07 = L_add( t3, t7 );
-
- x0 = L_shr( re[s * 1], SCALEFACTOR16 );
- x1 = L_shr( im[s * 1], SCALEFACTOR16 );
- x2 = L_shr( re[s * 5], SCALEFACTOR16 );
- x3 = L_shr( im[s * 5], SCALEFACTOR16 );
- x4 = L_shr( re[s * 9], SCALEFACTOR16 );
- x5 = L_shr( im[s * 9], SCALEFACTOR16 );
- x6 = L_shr( re[s * 13], SCALEFACTOR16 );
- x7 = L_shr( im[s * 13], SCALEFACTOR16 );
-
- t0 = L_add( x0, x4 );
- t2 = L_sub( x0, x4 );
- t1 = L_add( x1, x5 );
- t3 = L_sub( x1, x5 );
- t4 = L_add( x2, x6 );
- t7 = L_sub( x2, x6 );
- t5 = L_add( x7, x3 );
- t6 = L_sub( x7, x3 );
-
- y08 = L_add( t0, t4 );
- y09 = L_add( t1, t5 );
- y10 = L_sub( t2, t6 );
- y11 = L_sub( t3, t7 );
- y12 = L_sub( t0, t4 );
- y13 = L_sub( t1, t5 );
- y14 = L_add( t2, t6 );
- y15 = L_add( t3, t7 );
-
- x0 = L_shr( re[s * 2], SCALEFACTOR16 );
- x1 = L_shr( im[s * 2], SCALEFACTOR16 );
- x2 = L_shr( re[s * 6], SCALEFACTOR16 );
- x3 = L_shr( im[s * 6], SCALEFACTOR16 );
- x4 = L_shr( re[s * 10], SCALEFACTOR16 );
- x5 = L_shr( im[s * 10], SCALEFACTOR16 );
- x6 = L_shr( re[s * 14], SCALEFACTOR16 );
- x7 = L_shr( im[s * 14], SCALEFACTOR16 );
-
- t0 = L_add( x0, x4 );
- t2 = L_sub( x0, x4 );
- t1 = L_add( x1, x5 );
- t3 = L_sub( x1, x5 );
- t4 = L_add( x2, x6 );
- t7 = L_sub( x2, x6 );
- t5 = L_add( x7, x3 );
- t6 = L_sub( x7, x3 );
-
- y16 = L_add( t0, t4 );
- y17 = L_add( t1, t5 );
- y18 = L_sub( t2, t6 );
- y19 = L_sub( t3, t7 );
- y20 = L_sub( t1, t5 );
- y21 = L_sub( t4, t0 );
- y22 = L_add( t2, t6 );
- y23 = L_add( t3, t7 );
-
- x0 = L_shr( re[s * 3], SCALEFACTOR16 );
- x1 = L_shr( im[s * 3], SCALEFACTOR16 );
- x2 = L_shr( re[s * 7], SCALEFACTOR16 );
- x3 = L_shr( im[s * 7], SCALEFACTOR16 );
- x4 = L_shr( re[s * 11], SCALEFACTOR16 );
- x5 = L_shr( im[s * 11], SCALEFACTOR16 );
- x6 = L_shr( re[s * 15], SCALEFACTOR16 );
- x7 = L_shr( im[s * 15], SCALEFACTOR16 );
-
- t0 = L_add( x0, x4 );
- t2 = L_sub( x0, x4 );
- t1 = L_add( x1, x5 );
- t3 = L_sub( x1, x5 );
- t4 = L_add( x2, x6 );
- t7 = L_sub( x2, x6 );
- t5 = L_add( x7, x3 );
- t6 = L_sub( x7, x3 );
-
- y24 = L_add( t0, t4 );
- y25 = L_add( t1, t5 );
- y26 = L_sub( t2, t6 );
- y27 = L_sub( t3, t7 );
- y28 = L_sub( t0, t4 );
- y29 = L_sub( t1, t5 );
- y30 = L_add( t2, t6 );
- y31 = L_add( t3, t7 );
-
- x0 = Mpy_32_16_1( y22, FFT_C162 );
- x1 = Mpy_32_16_1( y23, FFT_C162 );
- y22 = L_sub( x0, x1 );
- y23 = L_add( x0, x1 );
-
- x0 = Mpy_32_16_1( y28, FFT_C162 );
- x1 = Mpy_32_16_1( y29, FFT_C162 );
- y28 = L_sub( x0, x1 );
- y29 = L_add( x0, x1 );
-
- x0 = Mpy_32_16_1( y12, FFT_C161 );
- x1 = Mpy_32_16_1( y13, FFT_C161 );
- y12 = L_add( x0, x1 );
- y13 = L_sub( x1, x0 );
-
- x0 = Mpy_32_16_1( y18, FFT_C161 );
- x1 = Mpy_32_16_1( y19, FFT_C161 );
- y18 = L_add( x0, x1 );
- y19 = L_sub( x1, x0 );
-
- x0 = Mpy_32_16_1( y10, FFT_C163 );
- x1 = Mpy_32_16_1( y11, FFT_C166 );
- x2 = Mpy_32_16_1( y10, FFT_C166 );
- x3 = Mpy_32_16_1( y11, FFT_C163 );
- y10 = L_sub( x0, x1 );
- y11 = L_add( x2, x3 );
-
- x0 = Mpy_32_16_1( y14, FFT_C165 );
- x1 = Mpy_32_16_1( y15, FFT_C164 );
- x2 = Mpy_32_16_1( y14, FFT_C164 );
- x3 = Mpy_32_16_1( y15, FFT_C165 );
- y14 = L_sub( x0, x1 );
- y15 = L_add( x2, x3 );
-
- x0 = Mpy_32_16_1( y26, FFT_C165 );
- x1 = Mpy_32_16_1( y27, FFT_C164 );
- x2 = Mpy_32_16_1( y26, FFT_C164 );
- x3 = Mpy_32_16_1( y27, FFT_C165 );
- y26 = L_sub( x0, x1 );
- y27 = L_add( x2, x3 );
-
- x0 = Mpy_32_16_1( y30, FFT_C164 );
- x1 = Mpy_32_16_1( y31, FFT_C165 );
- x2 = Mpy_32_16_1( y30, FFT_C165 );
- x3 = Mpy_32_16_1( y31, FFT_C164 );
- y30 = L_sub( x0, x1 );
- y31 = L_add( x2, x3 );
-
- t0 = L_add( y00, y16 );
- t2 = L_sub( y00, y16 );
- t1 = L_add( y01, y17 );
- t3 = L_sub( y01, y17 );
- t4 = L_add( y08, y24 );
- t7 = L_sub( y08, y24 );
- t5 = L_add( y25, y09 );
- t6 = L_sub( y25, y09 );
-
- re[s * 0] = L_add( t0, t4 );
- im[s * 0] = L_add( t1, t5 );
- re[s * 4] = L_sub( t2, t6 );
- im[s * 4] = L_sub( t3, t7 );
- re[s * 8] = L_sub( t0, t4 );
- im[s * 8] = L_sub( t1, t5 );
- re[s * 12] = L_add( t2, t6 );
- im[s * 12] = L_add( t3, t7 );
-
- t0 = L_add( y02, y18 );
- t2 = L_sub( y02, y18 );
- t1 = L_add( y03, y19 );
- t3 = L_sub( y03, y19 );
- t4 = L_add( y10, y26 );
- t7 = L_sub( y10, y26 );
- t5 = L_add( y27, y11 );
- t6 = L_sub( y27, y11 );
-
- re[s * 1] = L_add( t0, t4 );
- im[s * 1] = L_add( t1, t5 );
- re[s * 5] = L_sub( t2, t6 );
- im[s * 5] = L_sub( t3, t7 );
- re[s * 9] = L_sub( t0, t4 );
- im[s * 9] = L_sub( t1, t5 );
- re[s * 13] = L_add( t2, t6 );
- im[s * 13] = L_add( t3, t7 );
-
- t0 = L_add( y04, y20 );
- t2 = L_sub( y04, y20 );
- t1 = L_add( y05, y21 );
- t3 = L_sub( y05, y21 );
- t4 = L_add( y12, y28 );
- t7 = L_sub( y12, y28 );
- t5 = L_add( y29, y13 );
- t6 = L_sub( y29, y13 );
-
- re[s * 2] = L_add( t0, t4 );
- im[s * 2] = L_add( t1, t5 );
- re[s * 6] = L_sub( t2, t6 );
- im[s * 6] = L_sub( t3, t7 );
- re[s * 10] = L_sub( t0, t4 );
- im[s * 10] = L_sub( t1, t5 );
- re[s * 14] = L_add( t2, t6 );
- im[s * 14] = L_add( t3, t7 );
-
- t0 = L_add( y06, y22 );
- t2 = L_sub( y06, y22 );
- t1 = L_add( y07, y23 );
- t3 = L_sub( y07, y23 );
- t4 = L_add( y14, y30 );
- t7 = L_sub( y14, y30 );
- t5 = L_add( y31, y15 );
- t6 = L_sub( y31, y15 );
-
- re[s * 3] = L_add( t0, t4 );
- im[s * 3] = L_add( t1, t5 );
- re[s * 7] = L_sub( t2, t6 );
- im[s * 7] = L_sub( t3, t7 );
- re[s * 11] = L_sub( t0, t4 );
- im[s * 11] = L_sub( t1, t5 );
- re[s * 15] = L_add( t2, t6 );
- im[s * 15] = L_add( t3, t7 );
+ cmplx s[4];
+ cmplx t[4];
+ cmplx y[16];
+
+ s[0] = CL_shr( x[0], SCALEFACTOR16 );
+ s[1] = CL_shr( x[4], SCALEFACTOR16 );
+ s[2] = CL_shr( x[8], SCALEFACTOR16 );
+ s[3] = CL_shr( x[12], SCALEFACTOR16 );
+
+ t[0] = CL_add( s[0], s[2] );
+ t[1] = CL_sub( s[0], s[2] );
+ t[2] = CL_add( s[1], s[3] );
+ t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( s[1] ), CL_conjugate( s[3] ) ) );
+
+ y[0] = CL_add( t[0], t[2] );
+ y[1] = CL_sub( t[1], t[3] );
+ y[2] = CL_sub( t[0], t[2] );
+ y[3] = CL_add( t[1], t[3] );
+
+ s[0] = CL_shr( x[1], SCALEFACTOR16 );
+ s[1] = CL_shr( x[5], SCALEFACTOR16 );
+ s[2] = CL_shr( x[9], SCALEFACTOR16 );
+ s[3] = CL_shr( x[13], SCALEFACTOR16 );
+
+ t[0] = CL_add( s[0], s[2] );
+ t[1] = CL_sub( s[0], s[2] );
+ t[2] = CL_add( s[1], s[3] );
+ t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( s[1] ), CL_conjugate( s[3] ) ) );
+
+ y[4] = CL_add( t[0], t[2] );
+ y[5] = CL_sub( t[1], t[3] );
+ y[6] = CL_sub( t[0], t[2] );
+ y[7] = CL_add( t[1], t[3] );
+
+ s[0] = CL_shr( x[2], SCALEFACTOR16 );
+ s[1] = CL_shr( x[6], SCALEFACTOR16 );
+ s[2] = CL_shr( x[10], SCALEFACTOR16 );
+ s[3] = CL_shr( x[14], SCALEFACTOR16 );
+
+ t[0] = CL_add( s[0], s[2] );
+ t[1] = CL_sub( s[0], s[2] );
+ t[2] = CL_add( s[1], s[3] );
+ t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( s[1] ), CL_conjugate( s[3] ) ) );
+
+ y[8] = CL_add( t[0], t[2] );
+ y[9] = CL_sub( t[1], t[3] );
+ y[10] = CL_swap_real_imag( CL_sub( t[0], t[2] ) );
+ y[10] = CL_conjugate( y[10] );
+ y[11] = CL_add( t[1], t[3] );
+
+ s[0] = CL_shr( x[3], SCALEFACTOR16 );
+ s[1] = CL_shr( x[7], SCALEFACTOR16 );
+ s[2] = CL_shr( x[11], SCALEFACTOR16 );
+ s[3] = CL_shr( x[15], SCALEFACTOR16 );
+
+ t[0] = CL_add( s[0], s[2] );
+ t[1] = CL_sub( s[0], s[2] );
+ t[2] = CL_add( s[1], s[3] );
+ t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( s[1] ), CL_conjugate( s[3] ) ) );
+
+ y[12] = CL_add( t[0], t[2] );
+ y[13] = CL_sub( t[1], t[3] );
+ y[14] = CL_sub( t[0], t[2] );
+ y[15] = CL_add( t[1], t[3] );
+
+ s[0] = CL_scale( y[11], FFT_C162 );
+ y[11] = CL_mac_j( s[0], s[0] );
+
+ s[0] = CL_scale( y[14], FFT_C162 );
+ y[14] = CL_mac_j( s[0], s[0] );
+
+ s[0] = CL_scale( y[6], FFT_C161 );
+ y[6] = CL_mac_j( s[0], s[0] );
+ y[6] = CL_swap_real_imag( y[6] );
+ y[6] = CL_conjugate( y[6] );
+
+ s[0] = CL_scale( y[9], FFT_C161 );
+ y[9] = CL_mac_j( s[0], s[0] );
+ y[9] = CL_swap_real_imag( y[9] );
+ y[9] = CL_conjugate( y[9] );
+
+ s[0] = CL_scale( y[5], FFT_C163 );
+ s[1] = CL_scale( y[5], FFT_C166 );
+ y[5] = CL_mac_j( s[0], s[1] );
+
+ s[0] = CL_scale( y[7], FFT_C165 );
+ s[1] = CL_scale( y[7], FFT_C164 );
+ y[7] = CL_mac_j( s[0], s[1] );
+
+ s[0] = CL_scale( y[13], FFT_C165 );
+ s[1] = CL_scale( y[13], FFT_C164 );
+ y[13] = CL_mac_j( s[0], s[1] );
+
+ s[0] = CL_scale( y[15], FFT_C164 );
+ s[1] = CL_scale( y[15], FFT_C165 );
+ y[15] = CL_mac_j( s[0], s[1] );
+
+ t[0] = CL_add( y[0], y[8] );
+ t[1] = CL_sub( y[0], y[8] );
+ t[2] = CL_add( y[4], y[12] );
+ t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[4] ), CL_conjugate( y[12] ) ) );
+
+ x[0] = CL_add( t[0], t[2] );
+ x[4] = CL_sub( t[1], t[3] );
+ x[8] = CL_sub( t[0], t[2] );
+ x[12] = CL_add( t[1], t[3] );
+
+ t[0] = CL_add( y[1], y[9] );
+ t[1] = CL_sub( y[1], y[9] );
+ t[2] = CL_add( y[5], y[13] );
+ t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[5] ), CL_conjugate( y[13] ) ) );
+
+ x[1] = CL_add( t[0], t[2] );
+ x[5] = CL_sub( t[1], t[3] );
+ x[9] = CL_sub( t[0], t[2] );
+ x[13] = CL_add( t[1], t[3] );
+
+ t[0] = CL_add( y[2], y[10] );
+ t[1] = CL_sub( y[2], y[10] );
+ t[2] = CL_add( y[6], y[14] );
+ t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[6] ), CL_conjugate( y[14] ) ) );
+
+ x[2] = CL_add( t[0], t[2] );
+ x[6] = CL_sub( t[1], t[3] );
+ x[10] = CL_sub( t[0], t[2] );
+ x[14] = CL_add( t[1], t[3] );
+
+ t[0] = CL_add( y[3], y[11] );
+ t[1] = CL_sub( y[3], y[11] );
+ t[2] = CL_add( y[7], y[15] );
+ t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[7] ), CL_conjugate( y[15] ) ) );
+
+ x[3] = CL_add( t[0], t[2] );
+ x[7] = CL_sub( t[1], t[3] );
+ x[11] = CL_sub( t[0], t[2] );
+ x[15] = CL_add( t[1], t[3] );
return;
}
static void fft_len20_fx(
- Word32 *re,
- Word32 *im,
- const Word16 s )
+ cmplx *x )
{
- Word32 r1, r2, r3, r4;
- Word32 s1, s2, s3, s4;
- Word32 x0, x1, x2, x3, x4;
- Word32 t, t0, t1, t2, t3, t4, t5, t6, t7;
- Word32 y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
- Word32 y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
- Word32 y20, y21, y22, y23, y24, y25, y26, y27, y28, y29;
- Word32 y30, y31, y32, y33, y34, y35, y36, y37, y38, y39;
-
- x0 = L_shr( re[s * 0], SCALEFACTOR20 );
- x1 = L_shr( re[s * 16], SCALEFACTOR20 );
- x2 = L_shr( re[s * 12], SCALEFACTOR20 );
- x3 = L_shr( re[s * 8], SCALEFACTOR20 );
- x4 = L_shr( re[s * 4], SCALEFACTOR20 );
-
- r1 = L_add( x1, x4 );
- r4 = L_sub( x1, x4 );
- r3 = L_add( x2, x3 );
- r2 = L_sub( x2, x3 );
- t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 );
- r1 = L_add( r1, r3 );
- y00 = L_add( x0, r1 );
- r1 = L_add( y00, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) );
- r3 = L_sub( r1, t );
- r1 = L_add( r1, t );
- t = Mpy_32_16_1( L_add( r4, r2 ), FFT_C51 );
- r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) );
- r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) );
-
- x0 = L_shr(im[s * 0], SCALEFACTOR20 );
- x1 = L_shr(im[s * 16], SCALEFACTOR20);
- x2 = L_shr(im[s * 12], SCALEFACTOR20);
- x3 = L_shr(im[s * 8], SCALEFACTOR20);
- x4 = L_shr(im[s * 4], SCALEFACTOR20);
-
- s1 = L_add( x1, x4 );
- s4 = L_sub( x1, x4 );
- s3 = L_add( x2, x3 );
- s2 = L_sub( x2, x3 );
- t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 );
- s1 = L_add( s1, s3 );
- y01 = L_add( x0, s1 );
- s1 = L_add( y01, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) );
- s3 = L_sub( s1, t );
- s1 = L_add( s1, t );
- t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 );
- s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) );
- s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) );
-
- y08 = L_add( r1, s2 );
- y32 = L_sub( r1, s2 );
- y16 = L_sub( r3, s4 );
- y24 = L_add( r3, s4 );
-
- y09 = L_sub( s1, r2 );
- y33 = L_add( s1, r2 );
- y17 = L_add( s3, r4 );
- y25 = L_sub( s3, r4 );
-
- x0 = L_shr( re[s * 5], SCALEFACTOR20 );
- x1 = L_shr( re[s * 1], SCALEFACTOR20 );
- x2 = L_shr( re[s * 17], SCALEFACTOR20 );
- x3 = L_shr( re[s * 13], SCALEFACTOR20 );
- x4 = L_shr( re[s * 9], SCALEFACTOR20 );
-
- r1 = L_add( x1, x4 );
- r4 = L_sub( x1, x4 );
- r3 = L_add( x2, x3 );
- r2 = L_sub( x2, x3 );
- t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 );
- r1 = L_add( r1, r3 );
- y02 = L_add( x0, r1 );
- r1 = L_add( y02, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) );
- r3 = L_sub( r1, t );
- r1 = L_add( r1, t );
- t = Mpy_32_16_1( L_add( r4, r2 ), FFT_C51 );
- r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) );
- r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) );
-
- x0 = L_shr( im[s * 5], SCALEFACTOR20 );
- x1 = L_shr( im[s * 1], SCALEFACTOR20 );
- x2 = L_shr( im[s * 17], SCALEFACTOR20 );
- x3 = L_shr( im[s * 13], SCALEFACTOR20 );
- x4 = L_shr( im[s * 9], SCALEFACTOR20 );
-
- s1 = L_add( x1, x4 );
- s4 = L_sub( x1, x4 );
- s3 = L_add( x2, x3 );
- s2 = L_sub( x2, x3 );
- t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 );
- s1 = L_add( s1, s3 );
- y03 = L_add( x0, s1 );
- s1 = L_add( y03, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) );
- s3 = L_sub( s1, t );
- s1 = L_add( s1, t );
- t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 );
- s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) );
- s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) );
-
- y10 = L_add( r1, s2 );
- y34 = L_sub( r1, s2 );
- y18 = L_sub( r3, s4 );
- y26 = L_add( r3, s4 );
-
- y11 = L_sub( s1, r2 );
- y35 = L_add( s1, r2 );
- y19 = L_add( s3, r4 );
- y27 = L_sub( s3, r4 );
-
- x0 = L_shr( re[s * 10], SCALEFACTOR20 );
- x1 = L_shr( re[s * 6], SCALEFACTOR20 );
- x2 = L_shr( re[s * 2], SCALEFACTOR20 );
- x3 = L_shr( re[s * 18], SCALEFACTOR20 );
- x4 = L_shr( re[s * 14], SCALEFACTOR20 );
-
- r1 = L_add( x1, x4 );
- r4 = L_sub( x1, x4 );
- r3 = L_add( x2, x3 );
- r2 = L_sub( x2, x3 );
- t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 );
- r1 = L_add( r1, r3 );
- y04 = L_add( x0, r1 );
- r1 = L_add( y04, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) );
- r3 = L_sub( r1, t );
- r1 = L_add( r1, t );
- t = Mpy_32_16_1( L_add( r4, r2 ), FFT_C51 );
- r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) );
- r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) );
-
- x0 = L_shr( im[s * 10], SCALEFACTOR20 );
- x1 = L_shr( im[s * 6], SCALEFACTOR20 );
- x2 = L_shr( im[s * 2], SCALEFACTOR20 );
- x3 = L_shr( im[s * 18], SCALEFACTOR20 );
- x4 = L_shr( im[s * 14], SCALEFACTOR20 );
-
- s1 = L_add( x1, x4 );
- s4 = L_sub( x1, x4 );
- s3 = L_add( x2, x3 );
- s2 = L_sub( x2, x3 );
- t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 );
- s1 = L_add( s1, s3 );
- y05 = L_add( x0, s1 );
- s1 = L_add( y05, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) );
- s3 = L_sub( s1, t );
- s1 = L_add( s1, t );
- t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 );
- s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) );
- s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) );
-
- y12 = L_add( r1, s2 );
- y36 = L_sub( r1, s2 );
- y20 = L_sub( r3, s4 );
- y28 = L_add( r3, s4 );
-
- y13 = L_sub( s1, r2 );
- y37 = L_add( s1, r2 );
- y21 = L_add( s3, r4 );
- y29 = L_sub( s3, r4 );
-
- x0 = L_shr( re[s * 15], SCALEFACTOR20 );
- x1 = L_shr( re[s * 11], SCALEFACTOR20 );
- x2 = L_shr( re[s * 7], SCALEFACTOR20 );
- x3 = L_shr( re[s * 3], SCALEFACTOR20 );
- x4 = L_shr( re[s * 19], SCALEFACTOR20 );
-
- r1 = L_add( x1, x4 );
- r4 = L_sub( x1, x4 );
- r3 = L_add( x2, x3 );
- r2 = L_sub( x2, x3 );
- t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 );
- r1 = L_add( r1, r3 );
- y06 = L_add( x0, r1 );
- r1 = L_add( y06, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) );
- r3 = L_sub( r1, t );
- r1 = L_add( r1, t );
- t = Mpy_32_16_1( L_add( r4, r2 ), FFT_C51 );
- r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) );
- r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) );
-
- x0 = L_shr( im[s * 15], SCALEFACTOR20 );
- x1 = L_shr( im[s * 11], SCALEFACTOR20 );
- x2 = L_shr( im[s * 7], SCALEFACTOR20 );
- x3 = L_shr( im[s * 3], SCALEFACTOR20 );
- x4 = L_shr( im[s * 19], SCALEFACTOR20 );
-
- s1 = L_add( x1, x4 );
- s4 = L_sub( x1, x4 );
- s3 = L_add( x2, x3 );
- s2 = L_sub( x2, x3 );
- t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 );
- s1 = L_add( s1, s3 );
- y07 = L_add( x0, s1 );
- s1 = L_add( y07, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) );
- s3 = L_sub( s1, t );
- s1 = L_add( s1, t );
- t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 );
- s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) );
- s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) );
-
- y14 = L_add( r1, s2 );
- y38 = L_sub( r1, s2 );
- y22 = L_sub( r3, s4 );
- y30 = L_add( r3, s4 );
-
- y15 = L_sub( s1, r2 );
- y39 = L_add( s1, r2 );
- y23 = L_add( s3, r4 );
- y31 = L_sub( s3, r4 );
-
- t0 = L_add( y00, y04 );
- t2 = L_sub( y00, y04 );
- t1 = L_add( y01, y05 );
- t3 = L_sub( y01, y05 );
- t4 = L_add( y02, y06 );
- t7 = L_sub( y02, y06 );
- t5 = L_add( y07, y03 );
- t6 = L_sub( y07, y03 );
-
- re[s * 0] = L_add( t0, t4 );
- im[s * 0] = L_add( t1, t5 );
- re[s * 5] = L_sub( t2, t6 );
- im[s * 5] = L_sub( t3, t7 );
- re[s * 10] = L_sub( t0, t4 );
- im[s * 10] = L_sub( t1, t5 );
- re[s * 15] = L_add( t2, t6 );
- im[s * 15] = L_add( t3, t7 );
-
- t0 = L_add( y08, y12 );
- t2 = L_sub( y08, y12 );
- t1 = L_add( y09, y13 );
- t3 = L_sub( y09, y13 );
- t4 = L_add( y10, y14 );
- t7 = L_sub( y10, y14 );
- t5 = L_add( y15, y11 );
- t6 = L_sub( y15, y11 );
-
- re[s * 4] = L_add( t0, t4 );
- im[s * 4] = L_add( t1, t5 );
- re[s * 9] = L_sub( t2, t6 );
- im[s * 9] = L_sub( t3, t7 );
- re[s * 14] = L_sub( t0, t4 );
- im[s * 14] = L_sub( t1, t5 );
- re[s * 19] = L_add( t2, t6 );
- im[s * 19] = L_add( t3, t7 );
-
- t0 = L_add( y16, y20 );
- t2 = L_sub( y16, y20 );
- t1 = L_add( y17, y21 );
- t3 = L_sub( y17, y21 );
- t4 = L_add( y18, y22 );
- t7 = L_sub( y18, y22 );
- t5 = L_add( y23, y19 );
- t6 = L_sub( y23, y19 );
-
- re[s * 8] = L_add( t0, t4 );
- im[s * 8] = L_add( t1, t5 );
- re[s * 13] = L_sub( t2, t6 );
- im[s * 13] = L_sub( t3, t7 );
- re[s * 18] = L_sub( t0, t4 );
- im[s * 18] = L_sub( t1, t5 );
- re[s * 3] = L_add( t2, t6 );
- im[s * 3] = L_add( t3, t7 );
-
- t0 = L_add( y24, y28 );
- t2 = L_sub( y24, y28 );
- t1 = L_add( y25, y29 );
- t3 = L_sub( y25, y29 );
- t4 = L_add( y26, y30 );
- t7 = L_sub( y26, y30 );
- t5 = L_add( y31, y27 );
- t6 = L_sub( y31, y27 );
-
- re[s * 12] = L_add( t0, t4 );
- im[s * 12] = L_add( t1, t5 );
- re[s * 17] = L_sub( t2, t6 );
- im[s * 17] = L_sub( t3, t7 );
- re[s * 2] = L_sub( t0, t4 );
- im[s * 2] = L_sub( t1, t5 );
- re[s * 7] = L_add( t2, t6 );
- im[s * 7] = L_add( t3, t7 );
-
- t0 = L_add( y32, y36 );
- t2 = L_sub( y32, y36 );
- t1 = L_add( y33, y37 );
- t3 = L_sub( y33, y37 );
- t4 = L_add( y34, y38 );
- t7 = L_sub( y34, y38 );
- t5 = L_add( y39, y35 );
- t6 = L_sub( y39, y35 );
-
- re[s * 16] = L_add( t0, t4 );
- im[s * 16] = L_add( t1, t5 );
- re[s * 1] = L_sub( t2, t6 );
- im[s * 1] = L_sub( t3, t7 );
- re[s * 6] = L_sub( t0, t4 );
- im[s * 6] = L_sub( t1, t5 );
- re[s * 11] = L_add( t2, t6 );
- im[s * 11] = L_add( t3, t7 );
+ cmplx s[4];
+ cmplx xx[5];
+ cmplx t;
+ cmplx tt[4];
+ cmplx y[20];
+
+ xx[0] = CL_shr( x[0], SCALEFACTOR20 );
+ xx[1] = CL_shr( x[16], SCALEFACTOR20 );
+ xx[2] = CL_shr( x[12], SCALEFACTOR20 );
+ xx[3] = CL_shr( x[8], SCALEFACTOR20 );
+ xx[4] = CL_shr( x[4], SCALEFACTOR20 );
+
+ s[0] = CL_add( xx[1], xx[4] );
+ s[3] = CL_sub( xx[1], xx[4] );
+ s[2] = CL_add( xx[2], xx[3] );
+ s[1] = CL_sub( xx[2], xx[3] );
+ t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 );
+ s[0] = CL_add( s[0], s[2] );
+ y[0] = CL_add( xx[0], s[0] );
+ s[0] = CL_add( y[0], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) );
+ s[2] = CL_sub( s[0], t );
+ s[0] = CL_add( s[0], t );
+ t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 );
+ s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) );
+ s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) );
+
+ y[4] = CL_msu_j( s[0], s[1] );
+ y[16] = CL_mac_j( s[0], s[1] );
+ y[8] = CL_mac_j( s[2], s[3] );
+ y[12] = CL_msu_j( s[2], s[3] );
+
+ xx[0] = CL_shr( x[5], SCALEFACTOR20 );
+ xx[1] = CL_shr( x[1], SCALEFACTOR20 );
+ xx[2] = CL_shr( x[17], SCALEFACTOR20 );
+ xx[3] = CL_shr( x[13], SCALEFACTOR20 );
+ xx[4] = CL_shr( x[9], SCALEFACTOR20 );
+
+ s[0] = CL_add( xx[1], xx[4] );
+ s[3] = CL_sub( xx[1], xx[4] );
+ s[2] = CL_add( xx[2], xx[3] );
+ s[1] = CL_sub( xx[2], xx[3] );
+ t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 );
+ s[0] = CL_add( s[0], s[2] );
+ y[1] = CL_add( xx[0], s[0] );
+ s[0] = CL_add( y[1], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) );
+ s[2] = CL_sub( s[0], t );
+ s[0] = CL_add( s[0], t );
+ t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 );
+ s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) );
+ s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) );
+
+ y[5] = CL_msu_j( s[0], s[1] );
+ y[17] = CL_mac_j( s[0], s[1] );
+ y[9] = CL_mac_j( s[2], s[3] );
+ y[13] = CL_msu_j( s[2], s[3] );
+
+ xx[0] = CL_shr( x[10], SCALEFACTOR20 );
+ xx[1] = CL_shr( x[6], SCALEFACTOR20 );
+ xx[2] = CL_shr( x[2], SCALEFACTOR20 );
+ xx[3] = CL_shr( x[18], SCALEFACTOR20 );
+ xx[4] = CL_shr( x[14], SCALEFACTOR20 );
+
+ s[0] = CL_add( xx[1], xx[4] );
+ s[3] = CL_sub( xx[1], xx[4] );
+ s[2] = CL_add( xx[2], xx[3] );
+ s[1] = CL_sub( xx[2], xx[3] );
+ t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 );
+ s[0] = CL_add( s[0], s[2] );
+ y[2] = CL_add( xx[0], s[0] );
+ s[0] = CL_add( y[2], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) );
+ s[2] = CL_sub( s[0], t );
+ s[0] = CL_add( s[0], t );
+ t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 );
+ s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) );
+ s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) );
+
+ y[6] = CL_msu_j( s[0], s[1] );
+ y[18] = CL_mac_j( s[0], s[1] );
+ y[10] = CL_mac_j( s[2], s[3] );
+ y[14] = CL_msu_j( s[2], s[3] );
+
+ xx[0] = CL_shr( x[15], SCALEFACTOR20 );
+ xx[1] = CL_shr( x[11], SCALEFACTOR20 );
+ xx[2] = CL_shr( x[7], SCALEFACTOR20 );
+ xx[3] = CL_shr( x[3], SCALEFACTOR20 );
+ xx[4] = CL_shr( x[19], SCALEFACTOR20 );
+
+ s[0] = CL_add( xx[1], xx[4] );
+ s[3] = CL_sub( xx[1], xx[4] );
+ s[2] = CL_add( xx[2], xx[3] );
+ s[1] = CL_sub( xx[2], xx[3] );
+ t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 );
+ s[0] = CL_add( s[0], s[2] );
+ y[3] = CL_add( xx[0], s[0] );
+ s[0] = CL_add( y[3], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) );
+ s[2] = CL_sub( s[0], t );
+ s[0] = CL_add( s[0], t );
+ t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 );
+ s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) );
+ s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) );
+
+ y[7] = CL_msu_j( s[0], s[1] );
+ y[19] = CL_mac_j( s[0], s[1] );
+ y[11] = CL_mac_j( s[2], s[3] );
+ y[15] = CL_msu_j( s[2], s[3] );
+
+ tt[0] = CL_add( y[0], y[2] );
+ tt[1] = CL_sub( y[0], y[2] );
+ tt[2] = CL_add( y[1], y[3] );
+ tt[3] = CL_swap_real_imag( CL_conjugate( CL_sub( y[1], y[3] ) ) );
+
+ x[0] = CL_add( tt[0], tt[2] );
+ x[5] = CL_sub( tt[1], tt[3] );
+ x[10] = CL_sub( tt[0], tt[2] );
+ x[15] = CL_add( tt[1], tt[3] );
+
+ tt[0] = CL_add( y[4], y[6] );
+ tt[1] = CL_sub( y[4], y[6] );
+ tt[2] = CL_add( y[5], y[7] );
+ tt[3] = CL_swap_real_imag( CL_conjugate( CL_sub( y[5], y[7] ) ) );
+
+ x[4] = CL_add( tt[0], tt[2] );
+ x[9] = CL_sub( tt[1], tt[3] );
+ x[14] = CL_sub( tt[0], tt[2] );
+ x[19] = CL_add( tt[1], tt[3] );
+
+ tt[0] = CL_add( y[8], y[10] );
+ tt[1] = CL_sub( y[8], y[10] );
+ tt[2] = CL_add( y[9], y[11] );
+ tt[3] = CL_swap_real_imag( CL_conjugate( CL_sub( y[9], y[11] ) ) );
+
+ x[8] = CL_add( tt[0], tt[2] );
+ x[13] = CL_sub( tt[1], tt[3] );
+ x[18] = CL_sub( tt[0], tt[2] );
+ x[3] = CL_add( tt[1], tt[3] );
+
+ tt[0] = CL_add( y[12], y[14] );
+ tt[1] = CL_sub( y[12], y[14] );
+ tt[2] = CL_add( y[13], y[15] );
+ tt[3] = CL_swap_real_imag( CL_conjugate( CL_sub( y[13], y[15] ) ) );
+
+ x[12] = CL_add( tt[0], tt[2] );
+ x[17] = CL_sub( tt[1], tt[3] );
+ x[2] = CL_sub( tt[0], tt[2] );
+ x[7] = CL_add( tt[1], tt[3] );
+
+ tt[0] = CL_add( y[16], y[18] );
+ tt[1] = CL_sub( y[16], y[18] );
+ tt[2] = CL_add( y[17], y[19] );
+ tt[3] = CL_swap_real_imag( CL_conjugate( CL_sub( y[17], y[19] ) ) );
+
+ x[16] = CL_add( tt[0], tt[2] );
+ x[1] = CL_sub( tt[1], tt[3] );
+ x[6] = CL_sub( tt[0], tt[2] );
+ x[11] = CL_add( tt[1], tt[3] );
return;
}
static void fft_len30(
- Word32 *re,
- Word32 *im,
- const Word16 s )
+ cmplx *x )
{
- Word32 t;
- Word32 r1, r2, r3, r4;
- Word32 s1, s2, s3, s4;
- Word32 x00, x01, x02, x03, x04, x05, x06, x07, x08, x09;
- Word32 x10, x11, x12, x13, x14, x15, x16, x17, x18, x19;
- Word32 x20, x21, x22, x23, x24, x25, x26, x27, x28, x29;
-
- Word32 y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
- Word32 y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
- Word32 y20, y21, y22, y23, y24, y25, y26, y27, y28, y29;
-
- Word32 z00, z01, z02, z03, z04, z05, z06, z07, z08, z09;
- Word32 z10, z11, z12, z13, z14, z15, z16, z17, z18, z19;
- Word32 z20, z21, z22, z23, z24, z25, z26, z27, z28, z29;
- Word32 z30, z31, z32, z33, z34, z35, z36, z37, z38, z39;
- Word32 z40, z41, z42, z43, z44, z45, z46, z47, z48, z49;
- Word32 z50, z51, z52, z53, z54, z55, z56, z57, z58, z59;
-
- Word32 *rel, *reh, *iml, *imh;
-
- rel = &re[s * 0];
- reh = &re[s * 15];
- iml = &im[s * 0];
- imh = &im[s * 15];
-
- x00 = re[s * 0];
- x01 = im[s * 0];
- x02 = re[s * 18];
- x03 = im[s * 18];
- x04 = re[s * 6];
- x05 = im[s * 6];
- x06 = re[s * 24];
- x07 = im[s * 24];
- x08 = re[s * 12];
- x09 = im[s * 12];
-
- x10 = re[s * 20];
- x11 = im[s * 20];
- x12 = re[s * 8];
- x13 = im[s * 8];
- x14 = re[s * 26];
- x15 = im[s * 26];
- x16 = re[s * 14];
- x17 = im[s * 14];
- x18 = re[s * 2];
- x19 = im[s * 2];
-
- x20 = re[s * 10];
- x21 = im[s * 10];
- x22 = re[s * 28];
- x23 = im[s * 28];
- x24 = re[s * 16];
- x25 = im[s * 16];
- x26 = re[s * 4];
- x27 = im[s * 4];
- x28 = re[s * 22];
- x29 = im[s * 22];
-
- r1 = L_add( x02, x08 );
- r4 = L_sub( x02, x08 );
- r3 = L_add( x04, x06 );
- r2 = L_sub( x04, x06 );
- t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 );
- r1 = L_add( r1, r3 );
- y00 = L_add( x00, r1 );
- r1 = L_add( y00, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) );
- r3 = L_sub( r1, t );
- r1 = L_add( r1, t );
- t = Mpy_32_16_1( L_add( r4, r2 ), FFT_C51 );
- r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) );
- r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) );
-
- s1 = L_add( x03, x09 );
- s4 = L_sub( x03, x09 );
- s3 = L_add( x05, x07 );
- s2 = L_sub( x05, x07 );
- t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 );
- s1 = L_add( s1, s3 );
- y01 = L_add( x01, s1 );
- s1 = L_add( y01, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) );
- s3 = L_sub( s1, t );
- s1 = L_add( s1, t );
- t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 );
- s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) );
- s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) );
-
- y02 = L_add( r1, s2 );
- y08 = L_sub( r1, s2 );
- y04 = L_sub( r3, s4 );
- y06 = L_add( r3, s4 );
-
- y03 = L_sub( s1, r2 );
- y09 = L_add( s1, r2 );
- y05 = L_add( s3, r4 );
- y07 = L_sub( s3, r4 );
-
- r1 = L_add( x12, x18 );
- r4 = L_sub( x12, x18 );
- r3 = L_add( x14, x16 );
- r2 = L_sub( x14, x16 );
- t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 );
- r1 = L_add( r1, r3 );
- y10 = L_add( x10, r1 );
- r1 = L_add( y10, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) );
- r3 = L_sub( r1, t );
- r1 = L_add( r1, t );
- t = Mpy_32_16_1( L_add( r4, r2 ) , FFT_C51 );
- r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) );
- r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) );
-
- s1 = L_add( x13, x19 );
- s4 = L_sub( x13, x19 );
- s3 = L_add( x15, x17 );
- s2 = L_sub( x15, x17 );
- t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 );
- s1 = L_add( s1, s3 );
- y11 = L_add( x11, s1 );
- s1 = L_add( y11, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) );
- s3 = L_sub( s1, t );
- s1 = L_add( s1, t );
- t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 );
- s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) );
- s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) );
-
- y12 = L_add( r1, s2 );
- y18 = L_sub( r1, s2 );
- y14 = L_sub( r3, s4 );
- y16 = L_add( r3, s4 );
-
- y13 = L_sub( s1, r2 );
- y19 = L_add( s1, r2 );
- y15 = L_add( s3, r4 );
- y17 = L_sub( s3, r4 );
-
- r1 = L_add( x22, x28 );
- r4 = L_sub( x22, x28 );
- r3 = L_add( x24, x26 );
- r2 = L_sub( x24, x26 );
- t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 );
- r1 = L_add( r1, r3 );
- y20 = L_add( x20, r1 );
- r1 = L_add( y20, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) );
- r3 = L_sub( r1, t );
- r1 = L_add( r1, t );
- t = Mpy_32_16_1( L_add( r4, r2 ), FFT_C51 );
- r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) );
- r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) );
-
- s1 = L_add( x23, x29 );
- s4 = L_sub( x23, x29 );
- s3 = L_add( x25, x27 );
- s2 = L_sub( x25, x27 );
- t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 );
- s1 = L_add( s1, s3 );
- y21 = L_add( x21, s1 );
- s1 = L_add( y21, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) );
- s3 = L_sub( s1, t );
- s1 = L_add( s1, t );
- t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 );
- s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) );
- s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) );
-
- y22 = L_add( r1, s2 );
- y28 = L_sub( r1, s2 );
- y24 = L_sub( r3, s4 );
- y26 = L_add( r3, s4 );
-
- y23 = L_sub( s1, r2 );
- y29 = L_add( s1, r2 );
- y25 = L_add( s3, r4 );
- y27 = L_sub( s3, r4 );
-
- r1 = L_add( y10, y20 );
- r2 = Mpy_32_16_1( L_sub( y10, y20 ), FFT_C31 );
- z00 = L_add( y00, r1 );
- r1 = L_sub( y00, L_shr(r1, 1) );
-
- s1 = L_add( y11, y21 );
- s2 = Mpy_32_16_1( L_sub( y11, y21 ), FFT_C31 );
- z01 = L_add( y01, s1 );
- s1 = L_sub( y01, L_shr(s1, 1) );
-
- z20 = L_sub( r1, s2 );
- z10 = L_add( r1, s2 );
- z21 = L_add( s1, r2 );
- z11 = L_sub( s1, r2 );
-
- r1 = L_add( y12, y22 );
- r2 = Mpy_32_16_1( L_sub( y12, y22 ), FFT_C31 );
- z12 = L_add( y02, r1 );
- r1 = L_sub( y02, L_shr(r1, 1) );
-
- s1 = L_add( y13, y23 );
- s2 = Mpy_32_16_1( L_sub( y13, y23 ), FFT_C31 );
- z13 = L_add( y03, s1 );
- s1 = L_sub( y03, L_shr(s1, 1));
-
- z02 = L_sub( r1, s2 );
- z22 = L_add( r1, s2 );
- z03 = L_add( s1, r2 );
- z23 = L_sub( s1, r2 );
-
- r1 = L_add( y14, y24 );
- r2 = Mpy_32_16_1( L_sub( y14, y24 ), FFT_C31 );
- z24 = L_add( y04, r1 );
- r1 = L_sub( y04, L_shr(r1, 1) );
-
- s1 = L_add( y15, y25 );
- s2 = Mpy_32_16_1( L_sub( y15, y25 ), FFT_C31 );
- z25 = L_add( y05, s1 );
- s1 = L_sub( y05, L_shr(s1, 1) );
-
- z14 = L_sub( r1, s2 );
- z04 = L_add( r1, s2 );
- z15 = L_add( s1, r2 );
- z05 = L_sub( s1, r2 );
-
- r1 = L_add( y16, y26 );
- r2 = Mpy_32_16_1( L_sub( y16, y26 ), FFT_C31 );
- z06 = L_add( y06, r1 );
- r1 = L_sub( y06, L_shr(r1, 1) );
-
- s1 = L_add( y17, y27 );
- s2 = Mpy_32_16_1( L_sub( y17, y27 ), FFT_C31 );
- z07 = L_add( y07, s1 );
- s1 = L_sub( y07, L_shr(s1, 1) );
-
- z26 = L_sub( r1, s2 );
- z16 = L_add( r1, s2 );
- z27 = L_add( s1, r2 );
- z17 = L_sub( s1, r2 );
-
- r1 = L_add( y18, y28 );
- r2 = Mpy_32_16_1( L_sub( y18, y28 ), FFT_C31 );
- z18 = L_add( y08, r1 );
- r1 = L_sub( y08, L_shr(r1, 1) );
-
- s1 = L_add( y19, y29 );
- s2 = Mpy_32_16_1( L_sub( y19, y29 ), FFT_C31 );
- z19 = L_add( y09, s1 );
- s1 = L_sub(y09, L_shr(s1, 1));
-
- z08 = L_sub( r1, s2 );
- z28 = L_add( r1, s2 );
- z09 = L_add( s1, r2 );
- z29 = L_sub( s1, r2 );
-
- x00 = re[s * 15];
- x01 = im[s * 15];
- x02 = re[s * 3];
- x03 = im[s * 3];
- x04 = re[s * 21];
- x05 = im[s * 21];
- x06 = re[s * 9];
- x07 = im[s * 9];
- x08 = re[s * 27];
- x09 = im[s * 27];
-
- x10 = re[s * 5];
- x11 = im[s * 5];
- x12 = re[s * 23];
- x13 = im[s * 23];
- x14 = re[s * 11];
- x15 = im[s * 11];
- x16 = re[s * 29];
- x17 = im[s * 29];
- x18 = re[s * 17];
- x19 = im[s * 17];
-
- x20 = re[s * 25];
- x21 = im[s * 25];
- x22 = re[s * 13];
- x23 = im[s * 13];
- x24 = re[s * 1];
- x25 = im[s * 1];
- x26 = re[s * 19];
- x27 = im[s * 19];
- x28 = re[s * 7];
- x29 = im[s * 7];
-
- r1 = L_add( x02, x08 );
- r4 = L_sub( x02, x08 );
- r3 = L_add( x04, x06 );
- r2 = L_sub( x04, x06 );
- t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 );
- r1 = L_add( r1, r3 );
- y00 = L_add( x00, r1 );
- r1 = L_add( y00, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) );
- r3 = L_sub( r1, t );
- r1 = L_add( r1, t );
- t = Mpy_32_16_1( L_add( r4, r2 ), FFT_C51 );
- r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) );
- r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) );
-
- s1 = L_add( x03, x09 );
- s4 = L_sub( x03, x09 );
- s3 = L_add( x05, x07 );
- s2 = L_sub( x05, x07 );
- t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 );
- s1 = L_add( s1, s3 );
- y01 = L_add( x01, s1 );
- s1 = L_add( y01, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) );
- s3 = L_sub( s1, t );
- s1 = L_add( s1, t );
- t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 );
- s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) );
- s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) );
-
- y02 = L_add( r1, s2 );
- y08 = L_sub( r1, s2 );
- y04 = L_sub( r3, s4 );
- y06 = L_add( r3, s4 );
-
- y03 = L_sub( s1, r2 );
- y09 = L_add( s1, r2 );
- y05 = L_add( s3, r4 );
- y07 = L_sub( s3, r4 );
-
- r1 = L_add( x12, x18 );
- r4 = L_sub( x12, x18 );
- r3 = L_add( x14, x16 );
- r2 = L_sub( x14, x16 );
- t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 );
- r1 = L_add( r1, r3 );
- y10 = L_add( x10, r1 );
- r1 = L_add( y10, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) );
- r3 = L_sub( r1, t );
- r1 = L_add( r1, t );
- t = Mpy_32_16_1( L_add( r4, r2 ), FFT_C51 );
- r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) );
- r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) );
-
- s1 = L_add( x13, x19 );
- s4 = L_sub( x13, x19 );
- s3 = L_add( x15, x17 );
- s2 = L_sub( x15, x17 );
- t = Mpy_32_16_1(L_sub( s1, s3 ), FFT_C54 );
- s1 = L_add( s1, s3 );
- y11 = L_add( x11, s1 );
- s1 = L_add( y11, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) );
- s3 = L_sub( s1, t );
- s1 = L_add( s1, t );
- t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 );
- s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) );
- s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) );
-
- y12 = L_add( r1, s2 );
- y18 = L_sub( r1, s2 );
- y14 = L_sub( r3, s4 );
- y16 = L_add( r3, s4 );
-
- y13 = L_sub( s1, r2 );
- y19 = L_add( s1, r2 );
- y15 = L_add( s3, r4 );
- y17 = L_sub( s3, r4 );
-
- r1 = L_add( x22, x28 );
- r4 = L_sub( x22, x28 );
- r3 = L_add( x24, x26 );
- r2 = L_sub( x24, x26 );
- t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 );
- r1 = L_add( r1, r3 );
- y20 = L_add( x20, r1 );
- r1 = L_add( y20, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) );
- r3 = L_sub( r1, t );
- r1 = L_add( r1, t );
- t = Mpy_32_16_1( L_add( r4, r2 ), FFT_C51 );
- r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) );
- r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) );
-
- s1 = L_add( x23, x29 );
- s4 = L_sub( x23, x29 );
- s3 = L_add( x25, x27 );
- s2 = L_sub( x25, x27 );
- t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 );
- s1 = L_add( s1, s3 );
- y21 = L_add( x21, s1 );
- s1 = L_add( y21, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) );
- s3 = L_sub( s1, t );
- s1 = L_add( s1, t );
- t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 );
- s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) );
- s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) );
-
- y22 = L_add( r1, s2 );
- y28 = L_sub( r1, s2 );
- y24 = L_sub( r3, s4 );
- y26 = L_add( r3, s4 );
-
- y23 = L_sub( s1, r2 );
- y29 = L_add( s1, r2 );
- y25 = L_add( s3, r4 );
- y27 = L_sub( s3, r4 );
-
- r1 = L_add( y10, y20 );
- r2 = Mpy_32_16_1( L_sub( y10, y20 ), FFT_C31 );
- z30 = L_add( y00, r1 );
- r1 = L_sub( y00, L_shr(r1, 1) );
-
- s1 = L_add( y11, y21 );
- s2 = Mpy_32_16_1( L_sub( y11, y21 ), FFT_C31 );
- z31 = L_add( y01, s1 );
- s1 = L_sub( y01, L_shr(s1, 1) );
-
- z50 = L_sub( r1, s2 );
- z40 = L_add( r1, s2 );
- z51 = L_add( s1, r2 );
- z41 = L_sub( s1, r2 );
-
- r1 = L_add( y12, y22 );
- r2 = Mpy_32_16_1( L_sub( y12, y22 ), FFT_C31 );
- z42 = L_add( y02, r1 );
- r1 = L_sub( y02, L_shr(r1, 1) );
-
- s1 = L_add( y13, y23 );
- s2 = Mpy_32_16_1( L_sub( y13, y23 ), FFT_C31 );
- z43 = L_add( y03, s1 );
- s1 = L_sub( y03, L_shr(s1, 1) );
-
- z32 = L_sub( r1, s2 );
- z52 = L_add( r1, s2 );
- z33 = L_add( s1, r2 );
- z53 = L_sub( s1, r2 );
-
- r1 = L_add( y14, y24 );
- r2 = Mpy_32_16_1( L_sub( y14, y24 ), FFT_C31 );
- z54 = L_add( y04, r1 );
- r1 = L_sub( y04, L_shr(r1, 1) );
-
- s1 = L_add( y15, y25 );
- s2 = Mpy_32_16_1( L_sub( y15, y25 ), FFT_C31 );
- z55 = L_add( y05, s1 );
- s1 = L_sub( y05, L_shr(s1, 1) );
-
- z44 = L_sub( r1, s2 );
- z34 = L_add( r1, s2 );
- z45 = L_add( s1, r2 );
- z35 = L_sub( s1, r2 );
-
- r1 = L_add( y16, y26 );
- r2 = Mpy_32_16_1( L_sub( y16, y26 ), FFT_C31 );
- z36 = L_add( y06, r1 );
- r1 = L_sub( y06, L_shr(r1, 1) );
-
- s1 = L_add( y17, y27 );
- s2 = Mpy_32_16_1( L_sub( y17, y27 ), FFT_C31 );
- z37 = L_add( y07, s1 );
- s1 = L_sub( y07, L_shr(s1, 1) );
-
- z56 = L_sub( r1, s2 );
- z46 = L_add( r1, s2 );
- z57 = L_add( s1, r2 );
- z47 = L_sub( s1, r2 );
-
- r1 = L_add( y18, y28 );
- r2 = Mpy_32_16_1( L_sub( y18, y28 ), FFT_C31 );
- z48 = L_add( y08, r1 );
- r1 = L_sub( y08, L_shr(r1, 1) );
-
- s1 = L_add( y19, y29 );
- s2 = Mpy_32_16_1( L_sub( y19, y29 ), FFT_C31 );
- z49 = L_add( y09, s1 );
- s1 = L_sub( y09, L_shr(s1, 1) );
-
- z38 = L_sub( r1, s2 );
- z58 = L_add( r1, s2 );
- z39 = L_add( s1, r2 );
- z59 = L_sub( s1, r2 );
-
- r1 = z00;
- r2 = z30;
- r3 = z01;
- r4 = z31;
- *rel = L_add( r1, r2 );
- *reh = L_sub( r1, r2 );
- *iml = L_add( r3, r4 );
- *imh = L_sub( r3, r4 );
- rel += s, reh += s, iml += s;
- imh += s;
-
- r1 = z16;
- r2 = z46;
- r3 = z17;
- r4 = z47;
- *reh = L_add( r1, r2 );
- *rel = L_sub( r1, r2 );
- *imh = L_add( r3, r4 );
- *iml = L_sub( r3, r4 );
- rel += s, reh += s, iml += s;
- imh += s;
-
- r1 = z02;
- r2 = z32;
- r3 = z03;
- r4 = z33;
- *rel = L_add( r1, r2 );
- *reh = L_sub( r1, r2 );
- *iml = L_add( r3, r4 );
- *imh = L_sub( r3, r4 );
- rel += s, reh += s, iml += s;
- imh += s;
-
- r1 = z18;
- r2 = z48;
- r3 = z19;
- r4 = z49;
- *reh = L_add( r1, r2 );
- *rel = L_sub( r1, r2 );
- *imh = L_add( r3, r4 );
- *iml = L_sub( r3, r4 );
- rel += s, reh += s, iml += s;
- imh += s;
-
- r1 = z04;
- r2 = z34;
- r3 = z05;
- r4 = z35;
- *rel = L_add( r1, r2 );
- *reh = L_sub( r1, r2 );
- *iml = L_add( r3, r4 );
- *imh = L_sub( r3, r4 );
- rel += s, reh += s, iml += s;
- imh += s;
-
- r1 = z20;
- r2 = z50;
- r3 = z21;
- r4 = z51;
- *reh = L_add( r1, r2 );
- *rel = L_sub( r1, r2 );
- *imh = L_add( r3, r4 );
- *iml = L_sub( r3, r4 );
- rel += s, reh += s, iml += s;
- imh += s;
-
- r1 = z06;
- r2 = z36;
- r3 = z07;
- r4 = z37;
- *rel = L_add( r1, r2 );
- *reh = L_sub( r1, r2 );
- *iml = L_add( r3, r4 );
- *imh = L_sub( r3, r4 );
- rel += s, reh += s, iml += s;
- imh += s;
-
- r1 = z22;
- r2 = z52;
- r3 = z23;
- r4 = z53;
- *reh = L_add( r1, r2 );
- *rel = L_sub( r1, r2 );
- *imh = L_add( r3, r4 );
- *iml = L_sub( r3, r4 );
- rel += s, reh += s, iml += s;
- imh += s;
-
- r1 = z08;
- r2 = z38;
- r3 = z09;
- r4 = z39;
- *rel = L_add( r1, r2 );
- *reh = L_sub( r1, r2 );
- *iml = L_add( r3, r4 );
- *imh = L_sub( r3, r4 );
- rel += s, reh += s, iml += s;
- imh += s;
-
- r1 = z24;
- r2 = z54;
- r3 = z25;
- r4 = z55;
- *reh = L_add( r1, r2 );
- *rel = L_sub( r1, r2 );
- *imh = L_add( r3, r4 );
- *iml = L_sub( r3, r4 );
- rel += s, reh += s, iml += s;
- imh += s;
-
- r1 = z10;
- r2 = z40;
- r3 = z11;
- r4 = z41;
- *rel = L_add( r1, r2 );
- *reh = L_sub( r1, r2 );
- *iml = L_add( r3, r4 );
- *imh = L_sub( r3, r4 );
- rel += s, reh += s, iml += s;
- imh += s;
-
- r1 = z26;
- r2 = z56;
- r3 = z27;
- r4 = z57;
- *reh = L_add( r1, r2 );
- *rel = L_sub( r1, r2 );
- *imh = L_add( r3, r4 );
- *iml = L_sub( r3, r4 );
- rel += s, reh += s, iml += s;
- imh += s;
-
- r1 = z12;
- r2 = z42;
- r3 = z13;
- r4 = z43;
- *rel = L_add( r1, r2 );
- *reh = L_sub( r1, r2 );
- *iml = L_add( r3, r4 );
- *imh = L_sub( r3, r4 );
- rel += s, reh += s, iml += s;
- imh += s;
-
- r1 = z28;
- r2 = z58;
- r3 = z29;
- r4 = z59;
- *reh = L_add( r1, r2 );
- *rel = L_sub( r1, r2 );
- *imh = L_add( r3, r4 );
- *iml = L_sub( r3, r4 );
- rel += s, reh += s, iml += s;
- imh += s;
-
- r1 = z14;
- r2 = z44;
- r3 = z15;
- r4 = z45;
- *rel = L_add( r1, r2 );
- *reh = L_sub( r1, r2 );
- *iml = L_add( r3, r4 );
- *imh = L_sub( r3, r4 );
- rel += s, reh += s, iml += s;
- imh += s;
+ cmplx t;
+ cmplx s[4];
+ cmplx xx[15];
+ cmplx y[15];
+ cmplx z[30];
+ cmplx *l, *h;
+
+ l = &x[0];
+ h = &x[15];
+
+ xx[0] = x[0];
+ xx[1] = x[18];
+ xx[2] = x[6];
+ xx[3] = x[24];
+ xx[4] = x[12];
+
+ xx[5] = x[20];
+ xx[6] = x[8];
+ xx[7] = x[26];
+ xx[8] = x[14];
+ xx[9] = x[2];
+
+ xx[10] = x[10];
+ xx[11] = x[28];
+ xx[12] = x[16];
+ xx[13] = x[4];
+ xx[14] = x[22];
+
+ s[0] = CL_add( xx[1], xx[4] );
+ s[3] = CL_sub( xx[1], xx[4] );
+ s[2] = CL_add( xx[2], xx[3] );
+ s[1] = CL_sub( xx[2], xx[3] );
+ t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 );
+ s[0] = CL_add( s[0], s[2] );
+ y[0] = CL_add( xx[0], s[0] );
+ s[0] = CL_add( y[0], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) );
+ s[2] = CL_sub( s[0], t );
+ s[0] = CL_add( s[0], t );
+ t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 );
+ s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) );
+ s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) );
+
+ y[1] = CL_msu_j( s[0], s[1] );
+ y[4] = CL_mac_j( s[0], s[1] );
+ y[2] = CL_mac_j( s[2], s[3] );
+ y[3] = CL_msu_j( s[2], s[3] );
+
+ s[0] = CL_add( xx[6], xx[9] );
+ s[3] = CL_sub( xx[6], xx[9] );
+ s[2] = CL_add( xx[7], xx[8] );
+ s[1] = CL_sub( xx[7], xx[8] );
+ t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 );
+ s[0] = CL_add( s[0], s[2] );
+ y[5] = CL_add( xx[5], s[0] );
+ s[0] = CL_add( y[5], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) );
+ s[2] = CL_sub( s[0], t );
+ s[0] = CL_add( s[0], t );
+ t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 );
+ s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) );
+ s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) );
+
+ y[6] = CL_msu_j( s[0], s[1] );
+ y[9] = CL_mac_j( s[0], s[1] );
+ y[7] = CL_mac_j( s[2], s[3] );
+ y[8] = CL_msu_j( s[2], s[3] );
+
+ s[0] = CL_add( xx[11], xx[14] );
+ s[3] = CL_sub( xx[11], xx[14] );
+ s[2] = CL_add( xx[12], xx[13] );
+ s[1] = CL_sub( xx[12], xx[13] );
+ t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 );
+ s[0] = CL_add( s[0], s[2] );
+ y[10] = CL_add( xx[10], s[0] );
+ s[0] = CL_add( y[10], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) );
+ s[2] = CL_sub( s[0], t );
+ s[0] = CL_add( s[0], t );
+ t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 );
+ s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) );
+ s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) );
+
+ y[11] = CL_msu_j( s[0], s[1] );
+ y[14] = CL_mac_j( s[0], s[1] );
+ y[12] = CL_mac_j( s[2], s[3] );
+ y[13] = CL_msu_j( s[2], s[3] );
+
+ s[0] = CL_add( y[5], y[10] );
+ s[1] = CL_scale( CL_sub( y[5], y[10] ), FFT_C31 );
+ z[0] = CL_add( y[0], s[0] );
+ s[0] = CL_sub( y[0], CL_shr( s[0], 1 ) );
+
+ z[10] = CL_mac_j( s[0], s[1] );
+ z[5] = CL_msu_j( s[0], s[1] );
+
+ s[0] = CL_add( y[6], y[11] );
+ s[1] = CL_scale( CL_sub( y[6], y[11] ), FFT_C31 );
+ z[6] = CL_add( y[1], s[0] );
+ s[0] = CL_sub( y[1], CL_shr( s[0], 1 ) );
+
+ z[1] = CL_mac_j( s[0], s[1] );
+ z[11] = CL_msu_j( s[0], s[1] );
+
+ s[0] = CL_add( y[7], y[12] );
+ s[1] = CL_scale( CL_sub( y[7], y[12] ), FFT_C31 );
+ z[12] = CL_add( y[2], s[0] );
+ s[0] = CL_sub( y[2], CL_shr( s[0], 1 ) );
+
+ z[7] = CL_mac_j( s[0], s[1] );
+ z[2] = CL_msu_j( s[0], s[1] );
+
+ s[0] = CL_add( y[8], y[13] );
+ s[1] = CL_scale( CL_sub( y[8], y[13] ), FFT_C31 );
+ z[3] = CL_add( y[3], s[0] );
+ s[0] = CL_sub( y[3], CL_shr( s[0], 1 ) );
+
+ z[13] = CL_mac_j( s[0], s[1] );
+ z[8] = CL_msu_j( s[0], s[1] );
+
+ s[0] = CL_add( y[9], y[14] );
+ s[1] = CL_scale( CL_sub( y[9], y[14] ), FFT_C31 );
+ z[9] = CL_add( y[4], s[0] );
+ s[0] = CL_sub( y[4], CL_shr( s[0], 1 ) );
+
+ z[4] = CL_mac_j( s[0], s[1] );
+ z[14] = CL_msu_j( s[0], s[1] );
+
+ xx[0] = x[15];
+ xx[1] = x[3];
+ xx[2] = x[21];
+ xx[3] = x[9];
+ xx[4] = x[27];
+
+ xx[5] = x[5];
+ xx[6] = x[23];
+ xx[7] = x[11];
+ xx[8] = x[29];
+ xx[9] = x[17];
+
+ xx[10] = x[25];
+ xx[11] = x[13];
+ xx[12] = x[1];
+ xx[13] = x[19];
+ xx[14] = x[7];
+
+ s[0] = CL_add( xx[1], xx[4] );
+ s[3] = CL_sub( xx[1], xx[4] );
+ s[2] = CL_add( xx[2], xx[3] );
+ s[1] = CL_sub( xx[2], xx[3] );
+ t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 );
+ s[0] = CL_add( s[0], s[2] );
+ y[0] = CL_add( xx[0], s[0] );
+ s[0] = CL_add( y[0], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) );
+ s[2] = CL_sub( s[0], t );
+ s[0] = CL_add( s[0], t );
+ t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 );
+ s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) );
+ s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) );
+
+ y[1] = CL_msu_j( s[0], s[1] );
+ y[4] = CL_mac_j( s[0], s[1] );
+ y[2] = CL_mac_j( s[2], s[3] );
+ y[3] = CL_msu_j( s[2], s[3] );
+
+ s[0] = CL_add( xx[6], xx[9] );
+ s[3] = CL_sub( xx[6], xx[9] );
+ s[2] = CL_add( xx[7], xx[8] );
+ s[1] = CL_sub( xx[7], xx[8] );
+ t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 );
+ s[0] = CL_add( s[0], s[2] );
+ y[5] = CL_add( xx[5], s[0] );
+ s[0] = CL_add( y[5], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) );
+ s[2] = CL_sub( s[0], t );
+ s[0] = CL_add( s[0], t );
+ t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 );
+ s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) );
+ s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) );
+
+ y[6] = CL_msu_j( s[0], s[1] );
+ y[9] = CL_mac_j( s[0], s[1] );
+ y[7] = CL_mac_j( s[2], s[3] );
+ y[8] = CL_msu_j( s[2], s[3] );
+
+ s[0] = CL_add( xx[11], xx[14] );
+ s[3] = CL_sub( xx[11], xx[14] );
+ s[2] = CL_add( xx[12], xx[13] );
+ s[1] = CL_sub( xx[12], xx[13] );
+ t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 );
+ s[0] = CL_add( s[0], s[2] );
+ y[10] = CL_add( xx[10], s[0] );
+ s[0] = CL_add( y[10], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) );
+ s[2] = CL_sub( s[0], t );
+ s[0] = CL_add( s[0], t );
+ t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 );
+ s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) );
+ s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) );
+
+ y[11] = CL_msu_j( s[0], s[1] );
+ y[14] = CL_mac_j( s[0], s[1] );
+ y[12] = CL_mac_j( s[2], s[3] );
+ y[13] = CL_msu_j( s[2], s[3] );
+
+ s[0] = CL_add( y[5], y[10] );
+ s[1] = CL_scale( CL_sub( y[5], y[10] ), FFT_C31 );
+ z[15] = CL_add( y[0], s[0] );
+ s[0] = CL_sub( y[0], CL_shr( s[0], 1 ) );
+
+ z[25] = CL_mac_j( s[0], s[1] );
+ z[20] = CL_msu_j( s[0], s[1] );
+
+ s[0] = CL_add( y[6], y[11] );
+ s[1] = CL_scale( CL_sub( y[6], y[11] ), FFT_C31 );
+ z[21] = CL_add( y[1], s[0] );
+ s[0] = CL_sub( y[1], CL_shr( s[0], 1 ) );
+
+ z[16] = CL_mac_j( s[0], s[1] );
+ z[26] = CL_msu_j( s[0], s[1] );
+
+ s[0] = CL_add( y[7], y[12] );
+ s[1] = CL_scale( CL_sub( y[7], y[12] ), FFT_C31 );
+ z[27] = CL_add( y[2], s[0] );
+ s[0] = CL_sub( y[2], CL_shr( s[0], 1 ) );
+
+ z[22] = CL_mac_j( s[0], s[1] );
+ z[17] = CL_msu_j( s[0], s[1] );
+
+ s[0] = CL_add( y[8], y[13] );
+ s[1] = CL_scale( CL_sub( y[8], y[13] ), FFT_C31 );
+ z[18] = CL_add( y[3], s[0] );
+ s[0] = CL_sub( y[3], CL_shr( s[0], 1 ) );
+
+ z[28] = CL_mac_j( s[0], s[1] );
+ z[23] = CL_msu_j( s[0], s[1] );
+
+ s[0] = CL_add( y[9], y[14] );
+ s[1] = CL_scale( CL_sub( y[9], y[14] ), FFT_C31 );
+ z[24] = CL_add( y[4], s[0] );
+ s[0] = CL_sub( y[4], CL_shr( s[0], 1 ) );
+
+ z[19] = CL_mac_j( s[0], s[1] );
+ z[29] = CL_msu_j( s[0], s[1] );
+
+ s[0] = z[0];
+ s[1] = z[15];
+ *l = CL_add( s[0], s[1] );
+ *h = CL_sub( s[0], s[1] );
+ l += 1, h += 1;
+
+ s[0] = z[8];
+ s[1] = z[23];
+ *h = CL_add( s[0], s[1] );
+ *l = CL_sub( s[0], s[1] );
+ l += 1, h += 1;
+
+ s[0] = z[1];
+ s[1] = z[16];
+ *l = CL_add( s[0], s[1] );
+ *h = CL_sub( s[0], s[1] );
+ l += 1, h += 1;
+
+ s[0] = z[9];
+ s[1] = z[24];
+ *h = CL_add( s[0], s[1] );
+ *l = CL_sub( s[0], s[1] );
+ l += 1, h += 1;
+
+ s[0] = z[2];
+ s[1] = z[17];
+ *l = CL_add( s[0], s[1] );
+ *h = CL_sub( s[0], s[1] );
+ l += 1, h += 1;
+
+ s[0] = z[10];
+ s[1] = z[25];
+ *h = CL_add( s[0], s[1] );
+ *l = CL_sub( s[0], s[1] );
+ l += 1, h += 1;
+
+ s[0] = z[3];
+ s[1] = z[18];
+ *l = CL_add( s[0], s[1] );
+ *h = CL_sub( s[0], s[1] );
+ l += 1, h += 1;
+
+ s[0] = z[11];
+ s[1] = z[26];
+ *h = CL_add( s[0], s[1] );
+ *l = CL_sub( s[0], s[1] );
+ l += 1, h += 1;
+
+ s[0] = z[4];
+ s[1] = z[19];
+ *l = CL_add( s[0], s[1] );
+ *h = CL_sub( s[0], s[1] );
+ l += 1, h += 1;
+
+ s[0] = z[12];
+ s[1] = z[27];
+ *h = CL_add( s[0], s[1] );
+ *l = CL_sub( s[0], s[1] );
+ l += 1, h += 1;
+
+ s[0] = z[5];
+ s[1] = z[20];
+ *l = CL_add( s[0], s[1] );
+ *h = CL_sub( s[0], s[1] );
+ l += 1, h += 1;
+
+ s[0] = z[13];
+ s[1] = z[28];
+ *h = CL_add( s[0], s[1] );
+ *l = CL_sub( s[0], s[1] );
+ l += 1, h += 1;
+
+ s[0] = z[6];
+ s[1] = z[21];
+ *l = CL_add( s[0], s[1] );
+ *h = CL_sub( s[0], s[1] );
+ l += 1, h += 1;
+
+ s[0] = z[14];
+ s[1] = z[29];
+ *h = CL_add( s[0], s[1] );
+ *l = CL_sub( s[0], s[1] );
+ l += 1, h += 1;
+
+ s[0] = z[7];
+ s[1] = z[22];
+ *l = CL_add( s[0], s[1] );
+ *h = CL_sub( s[0], s[1] );
+ l += 1, h += 1;
return;
}
static void fft_len32(
- Word32 *re,
- Word32 *im,
- const Word16 s )
+ cmplx *x )
{
- Word32 as, bs;
- Word32 x00, x01, x02, x03, x04, x05, x06, x07;
- Word32 x08, x09, x10, x11, x12, x13, x14, x15;
- Word32 t00, t01, t02, t03, t04, t05, t06, t07;
- Word32 t08, t09, t10, t11, t12, t13, t14, t15;
- Word32 s00, s01, s02, s03, s04, s05, s06, s07;
- Word32 s08, s09, s10, s11, s12, s13, s14, s15;
-
- Word32 y00, y01, y02, y03, y04, y05, y06, y07;
- Word32 y08, y09, y10, y11, y12, y13, y14, y15;
- Word32 y16, y17, y18, y19, y20, y21, y22, y23;
- Word32 y24, y25, y26, y27, y28, y29, y30, y31;
- Word32 y32, y33, y34, y35, y36, y37, y38, y39;
- Word32 y40, y41, y42, y43, y44, y45, y46, y47;
- Word32 y48, y49, y50, y51, y52, y53, y54, y55;
- Word32 y56, y57, y58, y59, y60, y61, y62, y63;
-
- x00 = re[s * 0];
- x01 = im[s * 0];
- x02 = re[s * 4];
- x03 = im[s * 4];
- x04 = re[s * 8];
- x05 = im[s * 8];
- x06 = re[s * 12];
- x07 = im[s * 12];
- x08 = re[s * 16];
- x09 = im[s * 16];
- x10 = re[s * 20];
- x11 = im[s * 20];
- x12 = re[s * 24];
- x13 = im[s * 24];
- x14 = re[s * 28];
- x15 = im[s * 28];
-
- t00 = L_add( x00, x08 );
- t02 = L_sub( x00, x08 );
- t01 = L_add( x01, x09 );
- t03 = L_sub( x01, x09 );
- t04 = L_add( x02, x10 );
- t06 = L_sub( x02, x10 );
- t05 = L_add( x03, x11 );
- t07 = L_sub( x03, x11 );
- t08 = L_add( x04, x12 );
- t10 = L_sub( x04, x12 );
- t09 = L_add( x05, x13 );
- t11 = L_sub( x05, x13 );
- t12 = L_add( x06, x14 );
- t14 = L_sub( x06, x14 );
- t13 = L_add( x07, x15 );
- t15 = L_sub( x07, x15 );
-
- s00 = L_add( t00, t08 );
- s04 = L_sub( t00, t08 );
- s01 = L_add( t01, t09 );
- s05 = L_sub( t01, t09 );
- s08 = L_sub( t02, t11 );
- s10 = L_add( t02, t11 );
- s09 = L_add( t03, t10 );
- s11 = L_sub( t03, t10 );
- s02 = L_add( t04, t12 );
- s07 = L_sub( t04, t12 );
- s03 = L_add( t05, t13 );
- s06 = L_sub( t13, t05 );
- t01 = L_add( t06, t14 );
- t02 = L_sub( t06, t14 );
- t00 = L_add( t07, t15 );
- t03 = L_sub( t07, t15 );
+ cmplx t[8], s[8], xx[8];
+ cmplx y[32];
+ cmplx ab;
+
+ xx[0] = x[0];
+ xx[1] = x[4];
+ xx[2] = x[8];
+ xx[3] = x[12];
+ xx[4] = x[16];
+ xx[5] = x[20];
+ xx[6] = x[24];
+ xx[7] = x[28];
+
+ t[0] = CL_add( xx[0], xx[4] );
+ t[1] = CL_sub( xx[0], xx[4] );
+ t[2] = CL_add( xx[1], xx[5] );
+ t[3] = CL_sub( xx[1], xx[5] );
+ t[4] = CL_add( xx[2], xx[6] );
+ t[5] = CL_sub( xx[2], xx[6] );
+ t[6] = CL_add( xx[3], xx[7] );
+ t[7] = CL_sub( xx[3], xx[7] );
+
+ s[0] = CL_add( t[0], t[4] );
+ s[2] = CL_sub( t[0], t[4] );
+ s[4] = CL_mac_j( t[1], t[5] );
+ s[5] = CL_msu_j( t[1], t[5] );
+ s[1] = CL_add( t[2], t[6] );
+ s[3] = CL_swap_real_imag( CL_sub( CL_conjugate( t[2] ), CL_conjugate( t[6] ) ) );
+ t[0] = CL_swap_real_imag( CL_add( t[3], t[7] ) );
+ t[1] = CL_sub( t[3], t[7] );
{
- s12 = Mpy_32_16_1( L_add( t00, t02 ), FFT_C81 );
- s14 = Mpy_32_16_1( L_sub( t00, t02 ), FFT_C81 );
- s13 = Mpy_32_16_1( L_sub( t03, t01 ), FFT_C81 );
- s15 = Mpy_32_16_1( L_add( t01, t03 ), FFT_C82 );
+ s[6] = CL_scale( CL_add( CL_conjugate( t[0] ), t[1] ), FFT_C81 );
+ s[7] = CL_scale( CL_sub( t[0], CL_conjugate( t[1] ) ), FFT_C81 );
+ s[7] = CL_conjugate( s[7] );
};
- y00 = L_add( s00, s02 );
- y08 = L_sub( s00, s02 );
- y01 = L_add( s01, s03 );
- y09 = L_sub( s01, s03 );
- y04 = L_sub( s04, s06 );
- y12 = L_add( s04, s06 );
- y05 = L_sub( s05, s07 );
- y13 = L_add( s05, s07 );
- y06 = L_add( s08, s14 );
- y14 = L_sub( s08, s14 );
- y07 = L_add( s09, s15 );
- y15 = L_sub( s09, s15 );
- y02 = L_add( s10, s12 );
- y10 = L_sub( s10, s12 );
- y03 = L_add( s11, s13 );
- y11 = L_sub( s11, s13 );
-
- x00 = re[s * 1];
- x01 = im[s * 1];
- x02 = re[s * 5];
- x03 = im[s * 5];
- x04 = re[s * 9];
- x05 = im[s * 9];
- x06 = re[s * 13];
- x07 = im[s * 13];
- x08 = re[s * 17];
- x09 = im[s * 17];
- x10 = re[s * 21];
- x11 = im[s * 21];
- x12 = re[s * 25];
- x13 = im[s * 25];
- x14 = re[s * 29];
- x15 = im[s * 29];
-
- t00 = L_add( x00, x08 );
- t02 = L_sub( x00, x08 );
- t01 = L_add( x01, x09 );
- t03 = L_sub( x01, x09 );
- t04 = L_add( x02, x10 );
- t06 = L_sub( x02, x10 );
- t05 = L_add( x03, x11 );
- t07 = L_sub( x03, x11 );
- t08 = L_add( x04, x12 );
- t10 = L_sub( x04, x12 );
- t09 = L_add( x05, x13 );
- t11 = L_sub( x05, x13 );
- t12 = L_add( x06, x14 );
- t14 = L_sub( x06, x14 );
- t13 = L_add( x07, x15 );
- t15 = L_sub( x07, x15 );
-
- s00 = L_add( t00, t08 );
- s04 = L_sub( t00, t08 );
- s01 = L_add( t01, t09 );
- s05 = L_sub( t01, t09 );
- s08 = L_sub( t02, t11 );
- s10 = L_add( t02, t11 );
- s09 = L_add( t03, t10 );
- s11 = L_sub( t03, t10 );
- s02 = L_add( t04, t12 );
- s07 = L_sub( t04, t12 );
- s03 = L_add( t05, t13 );
- s06 = L_sub( t13, t05 );
- t01 = L_add( t06, t14 );
- t02 = L_sub( t06, t14 );
- t00 = L_add( t07, t15 );
- t03 = L_sub( t07, t15 );
+ y[0] = CL_add( s[0], s[1] );
+ y[4] = CL_sub( s[0], s[1] );
+ y[2] = CL_sub( s[2], s[3] );
+ y[6] = CL_add( s[2], s[3] );
+ y[3] = CL_add( s[4], s[7] );
+ y[7] = CL_sub( s[4], s[7] );
+ y[1] = CL_add( s[5], s[6] );
+ y[5] = CL_sub( s[5], s[6] );
+
+ xx[0] = x[1];
+ xx[1] = x[5];
+ xx[2] = x[9];
+ xx[3] = x[13];
+ xx[4] = x[17];
+ xx[5] = x[21];
+ xx[6] = x[25];
+ xx[7] = x[29];
+
+ t[0] = CL_add( xx[0], xx[4] );
+ t[1] = CL_sub( xx[0], xx[4] );
+ t[2] = CL_add( xx[1], xx[5] );
+ t[3] = CL_sub( xx[1], xx[5] );
+ t[4] = CL_add( xx[2], xx[6] );
+ t[5] = CL_sub( xx[2], xx[6] );
+ t[6] = CL_add( xx[3], xx[7] );
+ t[7] = CL_sub( xx[3], xx[7] );
+
+ s[0] = CL_add( t[0], t[4] );
+ s[2] = CL_sub( t[0], t[4] );
+ s[4] = CL_mac_j( t[1], t[5] );
+ s[5] = CL_msu_j( t[1], t[5] );
+ s[1] = CL_add( t[2], t[6] );
+ s[3] = CL_swap_real_imag( CL_sub( CL_conjugate( t[2] ), CL_conjugate( t[6] ) ) );
+ t[0] = CL_swap_real_imag( CL_add( t[3], t[7] ) );
+ t[1] = CL_sub( t[3], t[7] );
{
- s12 = Mpy_32_16_1( L_add( t00, t02 ), FFT_C81 );
- s14 = Mpy_32_16_1( L_sub( t00, t02 ), FFT_C81 );
- s13 = Mpy_32_16_1( L_sub( t03, t01 ), FFT_C81 );
- s15 = Mpy_32_16_1( L_add( t01, t03 ), FFT_C82 );
+ s[6] = CL_scale( CL_add( CL_conjugate( t[0] ), t[1] ), FFT_C81 );
+ s[7] = CL_scale( CL_sub( t[0], CL_conjugate( t[1] ) ), FFT_C81 );
+ s[7] = CL_conjugate( s[7] );
};
- y16 = L_add( s00, s02 );
- y24 = L_sub( s00, s02 );
- y17 = L_add( s01, s03 );
- y25 = L_sub( s01, s03 );
- y20 = L_sub( s04, s06 );
- y28 = L_add( s04, s06 );
- y21 = L_sub( s05, s07 );
- y29 = L_add( s05, s07 );
- y22 = L_add( s08, s14 );
- y30 = L_sub( s08, s14 );
- y23 = L_add( s09, s15 );
- y31 = L_sub( s09, s15 );
- y18 = L_add( s10, s12 );
- y26 = L_sub( s10, s12 );
- y19 = L_add( s11, s13 );
- y27 = L_sub( s11, s13 );
-
- x00 = re[s * 2];
- x01 = im[s * 2];
- x02 = re[s * 6];
- x03 = im[s * 6];
- x04 = re[s * 10];
- x05 = im[s * 10];
- x06 = re[s * 14];
- x07 = im[s * 14];
- x08 = re[s * 18];
- x09 = im[s * 18];
- x10 = re[s * 22];
- x11 = im[s * 22];
- x12 = re[s * 26];
- x13 = im[s * 26];
- x14 = re[s * 30];
- x15 = im[s * 30];
-
- t00 = L_add( x00, x08 );
- t02 = L_sub( x00, x08 );
- t01 = L_add( x01, x09 );
- t03 = L_sub( x01, x09 );
- t04 = L_add( x02, x10 );
- t06 = L_sub( x02, x10 );
- t05 = L_add( x03, x11 );
- t07 = L_sub( x03, x11 );
- t08 = L_add( x04, x12 );
- t10 = L_sub( x04, x12 );
- t09 = L_add( x05, x13 );
- t11 = L_sub( x05, x13 );
- t12 = L_add( x06, x14 );
- t14 = L_sub( x06, x14 );
- t13 = L_add( x07, x15 );
- t15 = L_sub( x07, x15 );
-
- s00 = L_add( t00, t08 );
- s04 = L_sub( t00, t08 );
- s01 = L_add( t01, t09 );
- s05 = L_sub( t01, t09 );
- s08 = L_sub( t02, t11 );
- s10 = L_add( t02, t11 );
- s09 = L_add( t03, t10 );
- s11 = L_sub( t03, t10 );
- s02 = L_add( t04, t12 );
- s07 = L_sub( t04, t12 );
- s03 = L_add( t05, t13 );
- s06 = L_sub( t13, t05 );
- t01 = L_add( t06, t14 );
- t02 = L_sub( t06, t14 );
- t00 = L_add( t07, t15 );
- t03 = L_sub( t07, t15 );
+ y[8] = CL_add( s[0], s[1] );
+ y[12] = CL_sub( s[0], s[1] );
+ y[10] = CL_sub( s[2], s[3] );
+ y[14] = CL_add( s[2], s[3] );
+ y[11] = CL_add( s[4], s[7] );
+ y[15] = CL_sub( s[4], s[7] );
+ y[9] = CL_add( s[5], s[6] );
+ y[13] = CL_sub( s[5], s[6] );
+
+ xx[0] = x[2];
+ xx[1] = x[6];
+ xx[2] = x[10];
+ xx[3] = x[14];
+ xx[4] = x[18];
+ xx[5] = x[22];
+ xx[6] = x[26];
+ xx[7] = x[30];
+
+ t[0] = CL_add( xx[0], xx[4] );
+ t[1] = CL_sub( xx[0], xx[4] );
+ t[2] = CL_add( xx[1], xx[5] );
+ t[3] = CL_sub( xx[1], xx[5] );
+ t[4] = CL_add( xx[2], xx[6] );
+ t[5] = CL_sub( xx[2], xx[6] );
+ t[6] = CL_add( xx[3], xx[7] );
+ t[7] = CL_sub( xx[3], xx[7] );
+
+ s[0] = CL_add( t[0], t[4] );
+ s[2] = CL_sub( t[0], t[4] );
+ s[4] = CL_mac_j( t[1], t[5] );
+ s[5] = CL_msu_j( t[1], t[5] );
+ s[1] = CL_add( t[2], t[6] );
+ s[3] = CL_swap_real_imag( CL_sub( CL_conjugate( t[2] ), CL_conjugate( t[6] ) ) );
+ t[0] = CL_swap_real_imag( CL_add( t[3], t[7] ) );
+ t[1] = CL_sub( t[3], t[7] );
{
- s12 = Mpy_32_16_1( L_add( t00, t02 ), FFT_C81 );
- s14 = Mpy_32_16_1( L_sub( t00, t02 ), FFT_C81 );
- s13 = Mpy_32_16_1( L_sub( t03, t01 ), FFT_C81 );
- s15 = Mpy_32_16_1( L_add( t01, t03 ), FFT_C82 );
+ s[6] = CL_scale( CL_add( CL_conjugate( t[0] ), t[1] ), FFT_C81 );
+ s[7] = CL_scale( CL_sub( t[0], CL_conjugate( t[1] ) ), FFT_C81 );
+ s[7] = CL_conjugate( s[7] );
};
- y32 = L_add( s00, s02 );
- y40 = L_sub( s00, s02 );
- y33 = L_add( s01, s03 );
- y41 = L_sub( s01, s03 );
- y36 = L_sub( s04, s06 );
- y44 = L_add( s04, s06 );
- y37 = L_sub( s05, s07 );
- y45 = L_add( s05, s07 );
- y38 = L_add( s08, s14 );
- y46 = L_sub( s08, s14 );
- y39 = L_add( s09, s15 );
- y47 = L_sub( s09, s15 );
- y34 = L_add( s10, s12 );
- y42 = L_sub( s10, s12 );
- y35 = L_add( s11, s13 );
- y43 = L_sub( s11, s13 );
-
- x00 = re[s * 3];
- x01 = im[s * 3];
- x02 = re[s * 7];
- x03 = im[s * 7];
- x04 = re[s * 11];
- x05 = im[s * 11];
- x06 = re[s * 15];
- x07 = im[s * 15];
- x08 = re[s * 19];
- x09 = im[s * 19];
- x10 = re[s * 23];
- x11 = im[s * 23];
- x12 = re[s * 27];
- x13 = im[s * 27];
- x14 = re[s * 31];
- x15 = im[s * 31];
-
- t00 = L_add( x00, x08 );
- t02 = L_sub( x00, x08 );
- t01 = L_add( x01, x09 );
- t03 = L_sub( x01, x09 );
- t04 = L_add( x02, x10 );
- t06 = L_sub( x02, x10 );
- t05 = L_add( x03, x11 );
- t07 = L_sub( x03, x11 );
- t08 = L_add( x04, x12 );
- t10 = L_sub( x04, x12 );
- t09 = L_add( x05, x13 );
- t11 = L_sub( x05, x13 );
- t12 = L_add( x06, x14 );
- t14 = L_sub( x06, x14 );
- t13 = L_add( x07, x15 );
- t15 = L_sub( x07, x15 );
-
- s00 = L_add( t00, t08 );
- s04 = L_sub( t00, t08 );
- s01 = L_add( t01, t09 );
- s05 = L_sub( t01, t09 );
- s08 = L_sub( t02, t11 );
- s10 = L_add( t02, t11 );
- s09 = L_add( t03, t10 );
- s11 = L_sub( t03, t10 );
- s02 = L_add( t04, t12 );
- s07 = L_sub( t04, t12 );
- s03 = L_add( t05, t13 );
- s06 = L_sub( t13, t05 );
- t01 = L_add( t06, t14 );
- t02 = L_sub( t06, t14 );
- t00 = L_add( t07, t15 );
- t03 = L_sub( t07, t15 );
+ y[16] = CL_add( s[0], s[1] );
+ y[20] = CL_sub( s[0], s[1] );
+ y[18] = CL_sub( s[2], s[3] );
+ y[22] = CL_add( s[2], s[3] );
+ y[19] = CL_add( s[4], s[7] );
+ y[23] = CL_sub( s[4], s[7] );
+ y[17] = CL_add( s[5], s[6] );
+ y[21] = CL_sub( s[5], s[6] );
+
+ xx[0] = x[3];
+ xx[1] = x[7];
+ xx[2] = x[11];
+ xx[3] = x[15];
+ xx[4] = x[19];
+ xx[5] = x[23];
+ xx[6] = x[27];
+ xx[7] = x[31];
+
+ t[0] = CL_add( xx[0], xx[4] );
+ t[1] = CL_sub( xx[0], xx[4] );
+ t[2] = CL_add( xx[1], xx[5] );
+ t[3] = CL_sub( xx[1], xx[5] );
+ t[4] = CL_add( xx[2], xx[6] );
+ t[5] = CL_sub( xx[2], xx[6] );
+ t[6] = CL_add( xx[3], xx[7] );
+ t[7] = CL_sub( xx[3], xx[7] );
+
+ s[0] = CL_add( t[0], t[4] );
+ s[2] = CL_sub( t[0], t[4] );
+ s[4] = CL_mac_j( t[1], t[5] );
+ s[5] = CL_msu_j( t[1], t[5] );
+ s[1] = CL_add( t[2], t[6] );
+ s[3] = CL_swap_real_imag( CL_sub( CL_conjugate( t[2] ), CL_conjugate( t[6] ) ) );
+ t[0] = CL_swap_real_imag( CL_add( t[3], t[7] ) );
+ t[1] = CL_sub( t[3], t[7] );
{
- s12 = Mpy_32_16_1( L_add( t00, t02 ), FFT_C81 );
- s14 = Mpy_32_16_1( L_sub( t00, t02 ), FFT_C81 );
- s13 = Mpy_32_16_1( L_sub( t03, t01 ), FFT_C81 );
- s15 = Mpy_32_16_1( L_add( t01, t03 ), FFT_C82 );
+ s[6] = CL_scale( CL_add( CL_conjugate( t[0] ), t[1] ), FFT_C81 );
+ s[7] = CL_scale( CL_sub( t[0], CL_conjugate( t[1] ) ), FFT_C81 );
+ s[7] = CL_conjugate( s[7] );
};
- y48 = L_add( s00, s02 );
- y56 = L_sub( s00, s02 );
- y49 = L_add( s01, s03 );
- y57 = L_sub( s01, s03 );
- y52 = L_sub( s04, s06 );
- y60 = L_add( s04, s06 );
- y53 = L_sub( s05, s07 );
- y61 = L_add( s05, s07 );
- y54 = L_add( s08, s14 );
- y62 = L_sub( s08, s14 );
- y55 = L_add( s09, s15 );
- y63 = L_sub( s09, s15 );
- y50 = L_add( s10, s12 );
- y58 = L_sub( s10, s12 );
- y51 = L_add( s11, s13 );
- y59 = L_sub( s11, s13 );
-
+ y[24] = CL_add( s[0], s[1] );
+ y[28] = CL_sub( s[0], s[1] );
+ y[26] = CL_sub( s[2], s[3] );
+ y[30] = CL_add( s[2], s[3] );
+ y[27] = CL_add( s[4], s[7] );
+ y[31] = CL_sub( s[4], s[7] );
+ y[25] = CL_add( s[5], s[6] );
+ y[29] = CL_sub( s[5], s[6] );
{
- as = y18;
- bs = y19;
- y18 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 0 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 0 + 1] ) );
- y19 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 0 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 0 + 0] ) );
+ ab = y[9];
+ y[9] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[0] ), CL_scale( ab, FFT_RotVector_32_fx[1] ) );
};
{
- as = y20;
- bs = y21;
- y20 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 1 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 1 + 1] ) );
- y21 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 1 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 1 + 0] ) );
+ ab = y[10];
+ y[10] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[2] ), CL_scale( ab, FFT_RotVector_32_fx[3] ) );
};
{
- as = y22;
- bs = y23;
- y22 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 2 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 2 + 1] ) );
- y23 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 2 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 2 + 0] ) );
+ ab = y[11];
+ y[11] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[4] ), CL_scale( ab, FFT_RotVector_32_fx[5] ) );
};
{
- as = y24;
- bs = y25;
- y24 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 3 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 3 + 1] ) );
- y25 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 3 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 3 + 0] ) );
+ ab = y[12];
+ y[12] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[6] ), CL_scale( ab, FFT_RotVector_32_fx[7] ) );
};
{
- as = y26;
- bs = y27;
- y26 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 4 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 4 + 1] ) );
- y27 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 4 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 4 + 0] ) );
+ ab = y[13];
+ y[13] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[8] ), CL_scale( ab, FFT_RotVector_32_fx[9] ) );
};
{
- as = y28;
- bs = y29;
- y28 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 5 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 5 + 1] ) );
- y29 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 5 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 5 + 0] ) );
+ ab = y[14];
+ y[14] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[10] ), CL_scale( ab, FFT_RotVector_32_fx[11] ) );
};
{
- as = y30;
- bs = y31;
- y30 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 6 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 6 + 1] ) );
- y31 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 6 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 6 + 0] ) );
+ ab = y[15];
+ y[15] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[12] ), CL_scale( ab, FFT_RotVector_32_fx[13] ) );
};
{
- as = y34;
- bs = y35;
- y34 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 7 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 7 + 1] ) );
- y35 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 7 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 7 + 0] ) );
+ ab = y[17];
+ y[17] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[14] ), CL_scale( ab, FFT_RotVector_32_fx[15] ) );
};
{
- as = y36;
- bs = y37;
- y36 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 8 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 8 + 1] ) );
- y37 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 8 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 8 + 0] ) );
+ ab = y[18];
+ y[18] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[16] ), CL_scale( ab, FFT_RotVector_32_fx[17] ) );
};
{
- as = y38;
- bs = y39;
- y38 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 9 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 9 + 1] ) );
- y39 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 9 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 9 + 0] ) );
+ ab = y[19];
+ y[19] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[18] ), CL_scale( ab, FFT_RotVector_32_fx[19] ) );
};
{
- as = y42;
- bs = y43;
- y42 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 10 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 10 + 1] ) );
- y43 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 10 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 10 + 0] ) );
+ ab = y[21];
+ y[21] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[20] ), CL_scale( ab, FFT_RotVector_32_fx[21] ) );
};
{
- as = y44;
- bs = y45;
- y44 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 11 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 11 + 1] ) );
- y45 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 11 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 11 + 0] ) );
+ ab = y[22];
+ y[22] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[22] ), CL_scale( ab, FFT_RotVector_32_fx[23] ) );
};
{
- as = y46;
- bs = y47;
- y46 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 12 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 12 + 1] ) );
- y47 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 12 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 12 + 0] ) );
+ ab = y[23];
+ y[23] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[24] ), CL_scale( ab, FFT_RotVector_32_fx[25] ) );
};
{
- as = y50;
- bs = y51;
- y50 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 13 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 13 + 1] ) );
- y51 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 13 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 13 + 0] ) );
+ ab = y[25];
+ y[25] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[26] ), CL_scale( ab, FFT_RotVector_32_fx[27] ) );
};
{
- as = y52;
- bs = y53;
- y52 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 14 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 14 + 1] ) );
- y53 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 14 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 14 + 0] ) );
+ ab = y[26];
+ y[26] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[28] ), CL_scale( ab, FFT_RotVector_32_fx[29] ) );
};
{
- as = y54;
- bs = y55;
- y54 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 15 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 15 + 1] ) );
- y55 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 15 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 15 + 0] ) );
+ ab = y[27];
+ y[27] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[30] ), CL_scale( ab, FFT_RotVector_32_fx[31] ) );
};
{
- as = y56;
- bs = y57;
- y56 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 16 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 16 + 1] ) );
- y57 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 16 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 16 + 0] ) );
+ ab = y[28];
+ y[28] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[32] ), CL_scale( ab, FFT_RotVector_32_fx[33] ) );
};
{
- as = y58;
- bs = y59;
- y58 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 17 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 17 + 1] ) );
- y59 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 17 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 17 + 0] ) );
+ ab = y[29];
+ y[29] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[34] ), CL_scale( ab, FFT_RotVector_32_fx[35] ) );
};
{
- as = y60;
- bs = y61;
- y60 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 18 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 18 + 1] ) );
- y61 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 18 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 18 + 0] ) );
+ ab = y[30];
+ y[30] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[36] ), CL_scale( ab, FFT_RotVector_32_fx[37] ) );
};
{
- as = y62;
- bs = y63;
- y62 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 19 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 19 + 1] ) );
- y63 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 19 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 19 + 0] ) );
+ ab = y[31];
+ y[31] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[38] ), CL_scale( ab, FFT_RotVector_32_fx[39] ) );
};
- t00 = L_add( y00, y32 );
- t02 = L_sub( y00, y32 );
- t01 = L_add( y01, y33 );
- t03 = L_sub( y01, y33 );
- t04 = L_add( y16, y48 );
- t07 = L_sub( y16, y48 );
- t05 = L_add( y49, y17 );
- t06 = L_sub( y49, y17 );
-
- re[s * 0] = L_add( t00, t04 );
- im[s * 0] = L_add( t01, t05 );
- re[s * 8] = L_sub( t02, t06 );
- im[s * 8] = L_sub( t03, t07 );
- re[s * 16] = L_sub( t00, t04 );
- im[s * 16] = L_sub( t01, t05 );
- re[s * 24] = L_add( t02, t06 );
- im[s * 24] = L_add( t03, t07 );
-
- t00 = L_add( y02, y34 );
- t02 = L_sub( y02, y34 );
- t01 = L_add( y03, y35 );
- t03 = L_sub( y03, y35 );
- t04 = L_add( y18, y50 );
- t07 = L_sub( y18, y50 );
- t05 = L_add( y51, y19 );
- t06 = L_sub( y51, y19 );
-
- re[s * 1] = L_add( t00, t04 );
- im[s * 1] = L_add( t01, t05 );
- re[s * 9] = L_sub( t02, t06 );
- im[s * 9] = L_sub( t03, t07 );
- re[s * 17] = L_sub( t00, t04 );
- im[s * 17] = L_sub( t01, t05 );
- re[s * 25] = L_add( t02, t06 );
- im[s * 25] = L_add( t03, t07 );
-
- t00 = L_add( y04, y36 );
- t02 = L_sub( y04, y36 );
- t01 = L_add( y05, y37 );
- t03 = L_sub( y05, y37 );
- t04 = L_add( y20, y52 );
- t07 = L_sub( y20, y52 );
- t05 = L_add( y53, y21 );
- t06 = L_sub( y53, y21 );
-
- re[s * 2] = L_add( t00, t04 );
- im[s * 2] = L_add( t01, t05 );
- re[s * 10] = L_sub( t02, t06 );
- im[s * 10] = L_sub( t03, t07 );
- re[s * 18] = L_sub( t00, t04 );
- im[s * 18] = L_sub( t01, t05 );
- re[s * 26] = L_add( t02, t06 );
- im[s * 26] = L_add( t03, t07 );
-
- t00 = L_add( y06, y38 );
- t02 = L_sub( y06, y38 );
- t01 = L_add( y07, y39 );
- t03 = L_sub( y07, y39 );
- t04 = L_add( y22, y54 );
- t07 = L_sub( y22, y54 );
- t05 = L_add( y55, y23 );
- t06 = L_sub( y55, y23 );
-
- re[s * 3] = L_add( t00, t04 );
- im[s * 3] = L_add( t01, t05 );
- re[s * 11] = L_sub( t02, t06 );
- im[s * 11] = L_sub( t03, t07 );
- re[s * 19] = L_sub( t00, t04 );
- im[s * 19] = L_sub( t01, t05 );
- re[s * 27] = L_add( t02, t06 );
- im[s * 27] = L_add( t03, t07 );
-
- t00 = L_add( y08, y41 );
- t02 = L_sub( y08, y41 );
- t01 = L_sub( y09, y40 );
- t03 = L_add( y09, y40 );
- t04 = L_add( y24, y56 );
- t07 = L_sub( y24, y56 );
- t05 = L_add( y57, y25 );
- t06 = L_sub( y57, y25 );
-
- re[s * 4] = L_add( t00, t04 );
- im[s * 4] = L_add( t01, t05 );
- re[s * 12] = L_sub( t02, t06 );
- im[s * 12] = L_sub( t03, t07 );
- re[s * 20] = L_sub( t00, t04 );
- im[s * 20] = L_sub( t01, t05 );
- re[s * 28] = L_add( t02, t06 );
- im[s * 28] = L_add( t03, t07 );
-
- t00 = L_add( y10, y42 );
- t02 = L_sub( y10, y42 );
- t01 = L_add( y11, y43 );
- t03 = L_sub( y11, y43 );
- t04 = L_add( y26, y58 );
- t07 = L_sub( y26, y58 );
- t05 = L_add( y59, y27 );
- t06 = L_sub( y59, y27 );
-
- re[s * 5] = L_add( t00, t04 );
- im[s * 5] = L_add( t01, t05 );
- re[s * 13] = L_sub( t02, t06 );
- im[s * 13] = L_sub( t03, t07 );
- re[s * 21] = L_sub( t00, t04 );
- im[s * 21] = L_sub( t01, t05 );
- re[s * 29] = L_add( t02, t06 );
- im[s * 29] = L_add( t03, t07 );
-
- t00 = L_add( y12, y44 );
- t02 = L_sub( y12, y44 );
- t01 = L_add( y13, y45 );
- t03 = L_sub( y13, y45 );
- t04 = L_add( y28, y60 );
- t07 = L_sub( y28, y60 );
- t05 = L_add( y61, y29 );
- t06 = L_sub( y61, y29 );
-
- re[s * 6] = L_add( t00, t04 );
- im[s * 6] = L_add( t01, t05 );
- re[s * 14] = L_sub( t02, t06 );
- im[s * 14] = L_sub( t03, t07 );
- re[s * 22] = L_sub( t00, t04 );
- im[s * 22] = L_sub( t01, t05 );
- re[s * 30] = L_add( t02, t06 );
- im[s * 30] = L_add( t03, t07 );
-
- t00 = L_add( y14, y46 );
- t02 = L_sub( y14, y46 );
- t01 = L_add( y15, y47 );
- t03 = L_sub( y15, y47 );
- t04 = L_add( y30, y62 );
- t07 = L_sub( y30, y62 );
- t05 = L_add( y63, y31 );
- t06 = L_sub( y63, y31 );
-
- re[s * 7] = L_add( t00, t04 );
- im[s * 7] = L_add( t01, t05 );
- re[s * 15] = L_sub( t02, t06 );
- im[s * 15] = L_sub( t03, t07 );
- re[s * 23] = L_sub( t00, t04 );
- im[s * 23] = L_sub( t01, t05 );
- re[s * 31] = L_add( t02, t06 );
- im[s * 31] = L_add( t03, t07 );
+ t[0] = CL_add( y[0], y[16] );
+ t[1] = CL_sub( y[0], y[16] );
+ t[2] = CL_add( y[8], y[24] );
+ t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[8] ), CL_conjugate( y[24] ) ) );
+
+ x[0] = CL_add( t[0], t[2] );
+ x[8] = CL_sub( t[1], t[3] );
+ x[16] = CL_sub( t[0], t[2] );
+ x[24] = CL_add( t[1], t[3] );
+
+ t[0] = CL_add( y[1], y[17] );
+ t[1] = CL_sub( y[1], y[17] );
+ t[2] = CL_add( y[9], y[25] );
+ t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[9] ), CL_conjugate( y[25] ) ) );
+
+ x[1] = CL_add( t[0], t[2] );
+ x[9] = CL_sub( t[1], t[3] );
+ x[17] = CL_sub( t[0], t[2] );
+ x[25] = CL_add( t[1], t[3] );
+
+ t[0] = CL_add( y[2], y[18] );
+ t[1] = CL_sub( y[2], y[18] );
+ t[2] = CL_add( y[10], y[26] );
+ t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[10] ), CL_conjugate( y[26] ) ) );
+
+ x[2] = CL_add( t[0], t[2] );
+ x[10] = CL_sub( t[1], t[3] );
+ x[18] = CL_sub( t[0], t[2] );
+ x[26] = CL_add( t[1], t[3] );
+
+ t[0] = CL_add( y[3], y[19] );
+ t[1] = CL_sub( y[3], y[19] );
+ t[2] = CL_add( y[11], y[27] );
+ t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[11] ), CL_conjugate( y[27] ) ) );
+
+ x[3] = CL_add( t[0], t[2] );
+ x[11] = CL_sub( t[1], t[3] );
+ x[19] = CL_sub( t[0], t[2] );
+ x[27] = CL_add( t[1], t[3] );
+
+ t[0] = CL_msu_j( y[4], y[20] );
+ t[1] = CL_mac_j( y[4], y[20] );
+ t[2] = CL_add( y[12], y[28] );
+ t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[12] ), CL_conjugate( y[28] ) ) );
+
+ x[4] = CL_add( t[0], t[2] );
+ x[12] = CL_sub( t[1], t[3] );
+ x[20] = CL_sub( t[0], t[2] );
+ x[28] = CL_add( t[1], t[3] );
+
+ t[0] = CL_add( y[5], y[21] );
+ t[1] = CL_sub( y[5], y[21] );
+ t[2] = CL_add( y[13], y[29] );
+ t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[13] ), CL_conjugate( y[29] ) ) );
+
+ x[5] = CL_add( t[0], t[2] );
+ x[13] = CL_sub( t[1], t[3] );
+ x[21] = CL_sub( t[0], t[2] );
+ x[29] = CL_add( t[1], t[3] );
+
+ t[0] = CL_add( y[6], y[22] );
+ t[1] = CL_sub( y[6], y[22] );
+ t[2] = CL_add( y[14], y[30] );
+ t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[14] ), CL_conjugate( y[30] ) ) );
+
+ x[6] = CL_add( t[0], t[2] );
+ x[14] = CL_sub( t[1], t[3] );
+ x[22] = CL_sub( t[0], t[2] );
+ x[30] = CL_add( t[1], t[3] );
+
+ t[0] = CL_add( y[7], y[23] );
+ t[1] = CL_sub( y[7], y[23] );
+ t[2] = CL_add( y[15], y[31] );
+ t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[15] ), CL_conjugate( y[31] ) ) );
+
+ x[7] = CL_add( t[0], t[2] );
+ x[15] = CL_sub( t[1], t[3] );
+ x[23] = CL_sub( t[0], t[2] );
+ x[31] = CL_add( t[1], t[3] );
return;
}
static void fft_lenN(
- Word32 *re,
- Word32 *im,
+ cmplx *x,
const Word16 *W,
const Word16 len,
const Word16 dim1,
@@ -5360,226 +4799,178 @@ static void fft_lenN(
const Word16 Woff )
{
Word16 i, j;
- Word32 x[L_FRAME_MAX * 2];
+ cmplx xx[L_FRAME_MAX];
FOR ( i = 0; i < dim2; i++ )
{
FOR ( j = 0; j < dim1; j++ )
{
- x[2 * i * dim1 + 2 * j] = re[sx * i + sx * j * dim2];
- x[2 * i * dim1 + 2 * j + 1] = im[sx * i + sx * j * dim2];
+ xx[i * dim1 + j].re = x[sx * i + sx * j * dim2].re;
+ xx[i * dim1 + j].im = x[sx * i + sx * j * dim2].im;
}
}
- SWITCH ( dim1 )
+ SWITCH( dim1 )
{
case 5:
FOR ( i = 0; i < dim2; i++ )
{
- fft_len5( &x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2 );
+ fft_len5( &xx[i * dim1] );
}
BREAK;
case 8:
FOR ( i = 0; i < dim2; i++ )
{
- fft_len8( &x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2 );
+ fft_len8( &xx[i * dim1] );
}
BREAK;
case 10:
FOR ( i = 0; i < dim2; i++ )
{
- fft_len10( &x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2 );
+ fft_len10( &xx[i * dim1] );
}
BREAK;
case 15:
FOR ( i = 0; i < dim2; i++ )
{
- fft_len15( &x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2 );
+ fft_len15( &xx[i * dim1] );
}
BREAK;
case 16:
FOR ( i = 0; i < dim2; i++ )
{
- fft_len16( &x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2 );
+ fft_len16( &xx[i * dim1] );
}
BREAK;
case 20:
FOR ( i = 0; i < dim2; i++ )
{
- fft_len20_fx( &x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2 );
+ fft_len20_fx( &xx[i * dim1] );
}
BREAK;
case 30:
FOR ( i = 0; i < dim2; i++ )
{
- fft_len30( &x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2 );
+ fft_len30( &xx[i * dim1] );
}
BREAK;
case 32:
FOR ( i = 0; i < dim2; i++ )
{
- fft_len32( &x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2 );
+ fft_len32( &xx[i * dim1] );
}
BREAK;
}
-
- SWITCH ( dim2 )
+ SWITCH( dim2 )
{
case 8:
{
- Word32 x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, x11, x12, x13, x14, x15;
- Word32 t00, t01, t02, t03, t04, t05, t06, t07, t08, t09, t10, t11, t12, t13, t14, t15;
- Word32 s00, s01, s02, s03, s04, s05, s06, s07, s08, s09, s10, s11, s12, s13, s14, s15;
+ cmplx t[8];
+ cmplx s[8];
+ cmplx y[8];
- IF (EQ_16(dim1, 30) || EQ_16(dim1, 20) || EQ_16(dim1, 15) || EQ_16(dim1, 10) || EQ_16(dim1, 5))
+ IF( EQ_16( dim1, 30 ) || EQ_16( dim1, 20 ) || EQ_16( dim1, 15 ) || EQ_16( dim1, 10 ) || EQ_16( dim1, 5 ) )
{
FOR ( i = 0; i < dim1; i++ )
{
{
- x00 = x[2 * i + 2 * 0 * dim1];
- x01 = x[2 * i + 2 * 0 * dim1 + 1];
+ y[0] = xx[i + 0 * dim1];
};
- IF (EQ_16(i, 0))
+ IF( EQ_16( i, 0 ) )
{
{
- x02 = x[2 * i + 2 * 1 * dim1];
- x03 = x[2 * i + 2 * 1 * dim1 + 1];
+ y[1] = xx[i + 1 * dim1];
};
{
- x04 = x[2 * i + 2 * 2 * dim1];
- x05 = x[2 * i + 2 * 2 * dim1 + 1];
+ y[2] = xx[i + 2 * dim1];
};
{
- x06 = x[2 * i + 2 * 3 * dim1];
- x07 = x[2 * i + 2 * 3 * dim1 + 1];
+ y[3] = xx[i + 3 * dim1];
};
{
- x08 = x[2 * i + 2 * 4 * dim1];
- x09 = x[2 * i + 2 * 4 * dim1 + 1];
+ y[4] = xx[i + 4 * dim1];
};
{
- x10 = x[2 * i + 2 * 5 * dim1];
- x11 = x[2 * i + 2 * 5 * dim1 + 1];
+ y[5] = xx[i + 5 * dim1];
};
{
- x12 = x[2 * i + 2 * 6 * dim1];
- x13 = x[2 * i + 2 * 6 * dim1 + 1];
+ y[6] = xx[i + 6 * dim1];
};
{
- x14 = x[2 * i + 2 * 7 * dim1];
- x15 = x[2 * i + 2 * 7 * dim1 + 1];
+ y[7] = xx[i + 7 * dim1];
};
}
ELSE
{
{
- x02 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 1 * dim1], W[sc * i + sc * 1 * dim1 * 2 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * 1 * dim1 + 1], W[sc * i + sc * 1 * dim1 * 2 + 1 - Woff]));
- x03 = L_add( Mpy_32_16_1(x[2 * i + 2 * 1 * dim1], W[sc * i + sc * 1 * dim1 * 2 + 1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * 1 * dim1 + 1], W[sc * i + sc * 1 * dim1 * 2 - Woff] ));
+ y[1] = CL_mac_j( CL_scale( xx[i + 1 * dim1], W[sc * i + sc * 1 * dim1 * 2 - Woff] ),
+ CL_scale( xx[i + 1 * dim1], W[sc * i + sc * 1 * dim1 * 2 + 1 - Woff] ) );
};
{
- x04 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 2 * dim1], W[sc * i + sc * 2 * dim1 * 2 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * 2 * dim1 + 1], W[sc * i + sc * 2 * dim1 * 2 + 1 - Woff]));
- x05 = L_add( Mpy_32_16_1(x[2 * i + 2 * 2 * dim1], W[sc * i + sc * 2 * dim1 * 2 + 1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * 2 * dim1 + 1], W[sc * i + sc * 2 * dim1 * 2 - Woff] ));
+ y[2] = CL_mac_j( CL_scale( xx[i + 2 * dim1], W[sc * i + sc * 2 * dim1 * 2 - Woff] ),
+ CL_scale( xx[i + 2 * dim1], W[sc * i + sc * 2 * dim1 * 2 + 1 - Woff] ) );
};
{
- x06 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 3 * dim1], W[sc * i + sc * 3 * dim1 * 2 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * 3 * dim1 + 1], W[sc * i + sc * 3 * dim1 * 2 + 1 - Woff] ));
- x07 = L_add( Mpy_32_16_1(x[2 * i + 2 * 3 * dim1], W[sc * i + sc * 3 * dim1 * 2 + 1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * 3 * dim1 + 1], W[sc * i + sc * 3 * dim1 * 2 - Woff] ));
+ y[3] = CL_mac_j( CL_scale( xx[i + 3 * dim1], W[sc * i + sc * 3 * dim1 * 2 - Woff] ),
+ CL_scale( xx[i + 3 * dim1], W[sc * i + sc * 3 * dim1 * 2 + 1 - Woff] ) );
};
{
- x08 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 4 * dim1], W[sc * i + sc * 4 * dim1 * 2 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * 4 * dim1 + 1], W[sc * i + sc * 4 * dim1 * 2 + 1 - Woff] ));
- x09 = L_add( Mpy_32_16_1(x[2 * i + 2 * 4 * dim1], W[sc * i + sc * 4 * dim1 * 2 + 1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * 4 * dim1 + 1], W[sc * i + sc * 4 * dim1 * 2 - Woff] ));
+ y[4] = CL_mac_j( CL_scale( xx[i + 4 * dim1], W[sc * i + sc * 4 * dim1 * 2 - Woff] ),
+ CL_scale( xx[i + 4 * dim1], W[sc * i + sc * 4 * dim1 * 2 + 1 - Woff] ) );
};
{
- x10 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 5 * dim1], W[sc * i + sc * 5 * dim1 * 2 - Woff]),
- Mpy_32_16_1(x[2 * i + 2 * 5 * dim1 + 1], W[sc * i + sc * 5 * dim1 * 2 + 1 - Woff] ));
- x11 = L_add( Mpy_32_16_1(x[2 * i + 2 * 5 * dim1], W[sc * i + sc * 5 * dim1 * 2 + 1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * 5 * dim1 + 1], W[sc * i + sc * 5 * dim1 * 2 - Woff] ));
+ y[5] = CL_mac_j( CL_scale( xx[i + 5 * dim1], W[sc * i + sc * 5 * dim1 * 2 - Woff] ),
+ CL_scale( xx[i + 5 * dim1], W[sc * i + sc * 5 * dim1 * 2 + 1 - Woff] ) );
};
{
- x12 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 6 * dim1], W[sc * i + sc * 6 * dim1 * 2 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * 6 * dim1 + 1], W[sc * i + sc * 6 * dim1 * 2 + 1 - Woff] ));
- x13 = L_add( Mpy_32_16_1(x[2 * i + 2 * 6 * dim1], W[sc * i + sc * 6 * dim1 * 2 + 1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * 6 * dim1 + 1], W[sc * i + sc * 6 * dim1 * 2 - Woff] ));
+ y[6] = CL_mac_j( CL_scale( xx[i + 6 * dim1], W[sc * i + sc * 6 * dim1 * 2 - Woff] ),
+ CL_scale( xx[i + 6 * dim1], W[sc * i + sc * 6 * dim1 * 2 + 1 - Woff] ) );
};
{
- x14 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 7 * dim1], W[sc * i + sc * 7 * dim1 * 2 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * 7 * dim1 + 1], W[sc * i + sc * 7 * dim1 * 2 + 1 - Woff] ));
- x15 = L_add( Mpy_32_16_1(x[2 * i + 2 * 7 * dim1], W[sc * i + sc * 7 * dim1 * 2 + 1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * 7 * dim1 + 1], W[sc * i + sc * 7 * dim1 * 2 - Woff] ));
+ y[7] = CL_mac_j( CL_scale( xx[i + 7 * dim1], W[sc * i + sc * 7 * dim1 * 2 - Woff] ),
+ CL_scale( xx[i + 7 * dim1], W[sc * i + sc * 7 * dim1 * 2 + 1 - Woff] ) );
};
}
- t00 = L_add( x00, x08 );
- t02 = L_sub( x00, x08 );
- t01 = L_add( x01, x09 );
- t03 = L_sub( x01, x09 );
- t04 = L_add( x02, x10 );
- t06 = L_sub( x02, x10 );
- t05 = L_add( x03, x11 );
- t07 = L_sub( x03, x11 );
- t08 = L_add( x04, x12 );
- t10 = L_sub( x04, x12 );
- t09 = L_add( x05, x13 );
- t11 = L_sub( x05, x13 );
- t12 = L_add( x06, x14 );
- t14 = L_sub( x06, x14 );
- t13 = L_add( x07, x15 );
- t15 = L_sub( x07, x15 );
-
- s00 = L_add( t00, t08 );
- s04 = L_sub( t00, t08 );
- s01 = L_add( t01, t09 );
- s05 = L_sub( t01, t09 );
- s08 = L_sub( t02, t11 );
- s10 = L_add( t02, t11 );
- s09 = L_add( t03, t10 );
- s11 = L_sub( t03, t10 );
- s02 = L_add( t04, t12 );
- s07 = L_sub( t04, t12 );
- s03 = L_add( t05, t13 );
- s06 = L_sub( t13, t05 );
-
- t01 = L_add( t06, t14 );
- t02 = L_sub( t06, t14 );
- t00 = L_add( t07, t15 );
- t03 = L_sub( t07, t15 );
-
- s12 = Mpy_32_16_1( L_add( t00, t02 ), FFT_C81 );
- s14 = Mpy_32_16_1( L_sub( t00, t02 ), FFT_C81 );
- s13 = Mpy_32_16_1( L_sub( t03, t01 ), FFT_C81 );
- s15 = Mpy_32_16_1( L_add( t01, t03 ), FFT_C82 );
-
- re[sx * i + sx * 0 * dim1] = L_add( s00, s02 );
- im[sx * i + sx * 0 * dim1] = L_add( s01, s03 );
- re[sx * i + sx * 1 * dim1] = L_add( s10, s12 );
- im[sx * i + sx * 1 * dim1] = L_add( s11, s13 );
- re[sx * i + sx * 2 * dim1] = L_sub( s04, s06 );
- im[sx * i + sx * 2 * dim1] = L_sub( s05, s07 );
- re[sx * i + sx * 3 * dim1] = L_add( s08, s14 );
- im[sx * i + sx * 3 * dim1] = L_add( s09, s15 );
- re[sx * i + sx * 4 * dim1] = L_sub( s00, s02 );
- im[sx * i + sx * 4 * dim1] = L_sub( s01, s03 );
- re[sx * i + sx * 5 * dim1] = L_sub( s10, s12 );
- im[sx * i + sx * 5 * dim1] = L_sub( s11, s13 );
- re[sx * i + sx * 6 * dim1] = L_add( s04, s06 );
- im[sx * i + sx * 6 * dim1] = L_add( s05, s07 );
- re[sx * i + sx * 7 * dim1] = L_sub( s08, s14 );
- im[sx * i + sx * 7 * dim1] = L_sub( s09, s15 );
+ t[0] = CL_add( y[0], y[4] );
+ t[1] = CL_sub( y[0], y[4] );
+ t[2] = CL_add( y[1], y[5] );
+ t[3] = CL_sub( y[1], y[5] );
+ t[4] = CL_add( y[2], y[6] );
+ t[5] = CL_sub( y[2], y[6] );
+ t[6] = CL_add( y[3], y[7] );
+ t[7] = CL_sub( y[3], y[7] );
+
+ s[0] = CL_add( t[0], t[4] );
+ s[2] = CL_sub( t[0], t[4] );
+ s[4] = CL_mac_j( t[1], t[5] );
+ s[5] = CL_msu_j( t[1], t[5] );
+ s[1] = CL_add( t[2], t[6] );
+ s[3] = CL_swap_real_imag( CL_sub( CL_conjugate( t[2] ), CL_conjugate( t[6] ) ) );
+
+ t[0] = CL_swap_real_imag( CL_add( t[3], t[7] ) );
+ t[1] = CL_sub( t[3], t[7] );
+
+ s[6] = CL_scale( CL_add( CL_conjugate( t[0] ), t[1] ), FFT_C81 );
+ s[7] = CL_scale( CL_sub( t[0], CL_conjugate( t[1] ) ), FFT_C81 );
+ s[7] = CL_conjugate( s[7] );
+
+ x[sx * i + sx * 0 * dim1] = CL_add( s[0], s[1] );
+ x[sx * i + sx * 1 * dim1] = CL_add( s[5], s[6] );
+ x[sx * i + sx * 2 * dim1] = CL_sub( s[2], s[3] );
+ x[sx * i + sx * 3 * dim1] = CL_add( s[4], s[7] );
+ x[sx * i + sx * 4 * dim1] = CL_sub( s[0], s[1] );
+ x[sx * i + sx * 5 * dim1] = CL_sub( s[5], s[6] );
+ x[sx * i + sx * 6 * dim1] = CL_add( s[2], s[3] );
+ x[sx * i + sx * 7 * dim1] = CL_sub( s[4], s[7] );
}
}
ELSE
@@ -5587,142 +4978,95 @@ static void fft_lenN(
FOR ( i = 0; i < dim1; i++ )
{
{
- x00 = x[2 * i + 2 * 0 * dim1];
- x01 = x[2 * i + 2 * 0 * dim1 + 1];
+ y[0] = xx[i + 0 * dim1];
};
- IF (EQ_16(i, 0))
+ IF( EQ_16( i, 0 ) )
{
{
- x02 = x[2 * i + 2 * 1 * dim1];
- x03 = x[2 * i + 2 * 1 * dim1 + 1];
+ y[1] = xx[i + 1 * dim1];
};
{
- x04 = x[2 * i + 2 * 2 * dim1];
- x05 = x[2 * i + 2 * 2 * dim1 + 1];
+ y[2] = xx[i + 2 * dim1];
};
{
- x06 = x[2 * i + 2 * 3 * dim1];
- x07 = x[2 * i + 2 * 3 * dim1 + 1];
+ y[3] = xx[i + 3 * dim1];
};
{
- x08 = x[2 * i + 2 * 4 * dim1];
- x09 = x[2 * i + 2 * 4 * dim1 + 1];
+ y[4] = xx[i + 4 * dim1];
};
{
- x10 = x[2 * i + 2 * 5 * dim1];
- x11 = x[2 * i + 2 * 5 * dim1 + 1];
+ y[5] = xx[i + 5 * dim1];
};
{
- x12 = x[2 * i + 2 * 6 * dim1];
- x13 = x[2 * i + 2 * 6 * dim1 + 1];
+ y[6] = xx[i + 6 * dim1];
};
{
- x14 = x[2 * i + 2 * 7 * dim1];
- x15 = x[2 * i + 2 * 7 * dim1 + 1];
+ y[7] = xx[i + 7 * dim1];
};
}
ELSE
{
{
- x02 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 1 * dim1], W[sc * i + sc * 1 * dim1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * 1 * dim1 + 1], W[sc * i + sc * 1 * dim1 + 1 - Woff]));
- x03 = L_add( Mpy_32_16_1(x[2 * i + 2 * 1 * dim1], W[sc * i + sc * 1 * dim1 + 1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * 1 * dim1 + 1], W[sc * i + sc * 1 * dim1 - Woff] ));
+ y[1] = CL_mac_j( CL_scale( xx[i + 1 * dim1], W[sc * i + sc * 1 * dim1 - Woff] ),
+ CL_scale( xx[i + 1 * dim1], W[sc * i + sc * 1 * dim1 + 1 - Woff] ) );
};
{
- x04 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 2 * dim1], W[sc * i + sc * 2 * dim1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * 2 * dim1 + 1], W[sc * i + sc * 2 * dim1 + 1 - Woff]));
- x05 = L_add( Mpy_32_16_1(x[2 * i + 2 * 2 * dim1], W[sc * i + sc * 2 * dim1 + 1 - Woff] ),
- Mpy_32_16_1( x[2 * i + 2 * 2 * dim1 + 1], W[sc * i + sc * 2 * dim1 - Woff]));
+ y[2] = CL_mac_j( CL_scale( xx[i + 2 * dim1], W[sc * i + sc * 2 * dim1 - Woff] ),
+ CL_scale( xx[i + 2 * dim1], W[sc * i + sc * 2 * dim1 + 1 - Woff] ) );
};
{
- x06 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 3 * dim1], W[sc * i + sc * 3 * dim1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * 3 * dim1 + 1], W[sc * i + sc * 3 * dim1 + 1 - Woff] ));
- x07 = L_add( Mpy_32_16_1(x[2 * i + 2 * 3 * dim1], W[sc * i + sc * 3 * dim1 + 1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * 3 * dim1 + 1], W[sc * i + sc * 3 * dim1 - Woff] ));
+ y[3] = CL_mac_j( CL_scale( xx[i + 3 * dim1], W[sc * i + sc * 3 * dim1 - Woff] ),
+ CL_scale( xx[i + 3 * dim1], W[sc * i + sc * 3 * dim1 + 1 - Woff] ) );
};
{
- x08 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 4 * dim1], W[sc * i + sc * 4 * dim1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * 4 * dim1 + 1], W[sc * i + sc * 4 * dim1 + 1 - Woff]));
- x09 = L_add( Mpy_32_16_1(x[2 * i + 2 * 4 * dim1], W[sc * i + sc * 4 * dim1 + 1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * 4 * dim1 + 1], W[sc * i + sc * 4 * dim1 - Woff] ));
+ y[4] = CL_mac_j( CL_scale( xx[i + 4 * dim1], W[sc * i + sc * 4 * dim1 - Woff] ),
+ CL_scale( xx[i + 4 * dim1], W[sc * i + sc * 4 * dim1 + 1 - Woff] ) );
};
{
- x10 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 5 * dim1], W[sc * i + sc * 5 * dim1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * 5 * dim1 + 1], W[sc * i + sc * 5 * dim1 + 1 - Woff] ));
- x11 = L_add( Mpy_32_16_1(x[2 * i + 2 * 5 * dim1], W[sc * i + sc * 5 * dim1 + 1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * 5 * dim1 + 1], W[sc * i + sc * 5 * dim1 - Woff] ));
+ y[5] = CL_mac_j( CL_scale( xx[i + 5 * dim1], W[sc * i + sc * 5 * dim1 - Woff] ),
+ CL_scale( xx[i + 5 * dim1], W[sc * i + sc * 5 * dim1 + 1 - Woff] ) );
};
{
- x12 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 6 * dim1], W[sc * i + sc * 6 * dim1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * 6 * dim1 + 1], W[sc * i + sc * 6 * dim1 + 1 - Woff] ));
- x13 = L_add( Mpy_32_16_1(x[2 * i + 2 * 6 * dim1], W[sc * i + sc * 6 * dim1 + 1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * 6 * dim1 + 1], W[sc * i + sc * 6 * dim1 - Woff] ));
+ y[6] = CL_mac_j( CL_scale( xx[i + 6 * dim1], W[sc * i + sc * 6 * dim1 - Woff] ),
+ CL_scale( xx[i + 6 * dim1], W[sc * i + sc * 6 * dim1 + 1 - Woff] ) );
};
{
- x14 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 7 * dim1], W[sc * i + sc * 7 * dim1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * 7 * dim1 + 1], W[sc * i + sc * 7 * dim1 + 1 - Woff] ));
- x15 = L_add( Mpy_32_16_1(x[2 * i + 2 * 7 * dim1], W[sc * i + sc * 7 * dim1 + 1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * 7 * dim1 + 1], W[sc * i + sc * 7 * dim1 - Woff] ));
+ y[7] = CL_mac_j( CL_scale( xx[i + 7 * dim1], W[sc * i + sc * 7 * dim1 - Woff] ),
+ CL_scale( xx[i + 7 * dim1], W[sc * i + sc * 7 * dim1 + 1 - Woff] ) );
};
}
- t00 = L_add( x00, x08 );
- t02 = L_sub( x00, x08 );
- t01 = L_add( x01, x09 );
- t03 = L_sub( x01, x09 );
- t04 = L_add( x02, x10 );
- t06 = L_sub( x02, x10 );
- t05 = L_add( x03, x11 );
- t07 = L_sub( x03, x11 );
- t08 = L_add( x04, x12 );
- t10 = L_sub( x04, x12 );
- t09 = L_add( x05, x13 );
- t11 = L_sub( x05, x13 );
- t12 = L_add( x06, x14 );
- t14 = L_sub( x06, x14 );
- t13 = L_add( x07, x15 );
- t15 = L_sub( x07, x15 );
-
- s00 = L_add( t00, t08 );
- s04 = L_sub( t00, t08 );
- s01 = L_add( t01, t09 );
- s05 = L_sub( t01, t09 );
- s08 = L_sub( t02, t11 );
- s10 = L_add( t02, t11 );
- s09 = L_add( t03, t10 );
- s11 = L_sub( t03, t10 );
- s02 = L_add( t04, t12 );
- s07 = L_sub( t04, t12 );
- s03 = L_add( t05, t13 );
- s06 = L_sub( t13, t05 );
-
- t01 = L_add( t06, t14 );
- t02 = L_sub( t06, t14 );
- t00 = L_add( t07, t15 );
- t03 = L_sub( t07, t15 );
-
- s12 = Mpy_32_16_1( L_add( t00, t02 ), FFT_C81 );
- s14 = Mpy_32_16_1( L_sub( t00, t02 ), FFT_C81 );
- s13 = Mpy_32_16_1( L_sub( t03, t01 ), FFT_C81 );
- s15 = Mpy_32_16_1( L_add( t01, t03 ), FFT_C82 );
-
- re[sx * i + sx * 0 * dim1] = L_add( s00, s02 );
- im[sx * i + sx * 0 * dim1] = L_add( s01, s03 );
- re[sx * i + sx * 1 * dim1] = L_add( s10, s12 );
- im[sx * i + sx * 1 * dim1] = L_add( s11, s13 );
- re[sx * i + sx * 2 * dim1] = L_sub( s04, s06 );
- im[sx * i + sx * 2 * dim1] = L_sub( s05, s07 );
- re[sx * i + sx * 3 * dim1] = L_add( s08, s14 );
- im[sx * i + sx * 3 * dim1] = L_add( s09, s15 );
- re[sx * i + sx * 4 * dim1] = L_sub( s00, s02 );
- im[sx * i + sx * 4 * dim1] = L_sub( s01, s03 );
- re[sx * i + sx * 5 * dim1] = L_sub( s10, s12 );
- im[sx * i + sx * 5 * dim1] = L_sub( s11, s13 );
- re[sx * i + sx * 6 * dim1] = L_add( s04, s06 );
- im[sx * i + sx * 6 * dim1] = L_add( s05, s07 );
- re[sx * i + sx * 7 * dim1] = L_sub( s08, s14 );
- im[sx * i + sx * 7 * dim1] = L_sub( s09, s15 );
+ t[0] = CL_add( y[0], y[4] );
+ t[1] = CL_sub( y[0], y[4] );
+ t[2] = CL_add( y[1], y[5] );
+ t[3] = CL_sub( y[1], y[5] );
+ t[4] = CL_add( y[2], y[6] );
+ t[5] = CL_sub( y[2], y[6] );
+ t[6] = CL_add( y[3], y[7] );
+ t[7] = CL_sub( y[3], y[7] );
+
+ s[0] = CL_add( t[0], t[4] );
+ s[2] = CL_sub( t[0], t[4] );
+ s[4] = CL_mac_j( t[1], t[5] );
+ s[5] = CL_msu_j( t[1], t[5] );
+ s[1] = CL_add( t[2], t[6] );
+ s[3] = CL_swap_real_imag( CL_sub( CL_conjugate( t[2] ), CL_conjugate( t[6] ) ) );
+
+ t[0] = CL_swap_real_imag( CL_add( t[3], t[7] ) );
+ t[1] = CL_sub( t[3], t[7] );
+
+ s[6] = CL_scale( CL_add( CL_conjugate( t[0] ), t[1] ), FFT_C81 );
+ s[7] = CL_scale( CL_sub( t[0], CL_conjugate( t[1] ) ), FFT_C81 );
+ s[7] = CL_conjugate( s[7] );
+
+ x[sx * i + sx * 0 * dim1] = CL_add( s[0], s[1] );
+ x[sx * i + sx * 1 * dim1] = CL_add( s[5], s[6] );
+ x[sx * i + sx * 2 * dim1] = CL_sub( s[2], s[3] );
+ x[sx * i + sx * 3 * dim1] = CL_add( s[4], s[7] );
+ x[sx * i + sx * 4 * dim1] = CL_sub( s[0], s[1] );
+ x[sx * i + sx * 5 * dim1] = CL_sub( s[5], s[6] );
+ x[sx * i + sx * 6 * dim1] = CL_add( s[2], s[3] );
+ x[sx * i + sx * 7 * dim1] = CL_sub( s[4], s[7] );
}
}
BREAK;
@@ -5730,42 +5074,36 @@ static void fft_lenN(
case 10:
{
- Word32 y[2 * 10];
+ cmplx y[10];
FOR ( j = 0; j < dim2; j++ )
{
{
- y[2 * j] = x[2 * 0 + 2 * j * dim1];
- y[2 * j + 1] = x[2 * 0 + 2 * j * dim1 + 1];
+ y[j] = xx[0 + j * dim1];
};
}
- fft_len10( &y[0], &y[1], 2 );
+ fft_len10( &y[0] );
FOR ( j = 0; j < dim2; j++ )
{
- re[sx * 0 + sx * j * dim1] = y[2 * j];
- im[sx * 0 + sx * j * dim1] = y[2 * j + 1];
+ x[sx * 0 + sx * j * dim1] = y[j];
}
FOR ( i = 1; i < dim1; i++ )
{
{
- y[2 * ( 0 + 0 )] = x[2 * i + 2 * ( 0 + 0 ) * dim1];
- y[2 * ( 0 + 0 ) + 1] = x[2 * i + 2 * ( 0 + 0 ) * dim1 + 1];
+ y[( 0 + 0 )] = xx[i + ( 0 + 0 ) * dim1];
}
FOR ( j = 1; j < dim2; j++ )
{
{
- y[2 * ( j + 0 )] = L_sub( Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1], W[sc * i + sc * j * dim1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1 + 1], W[sc * i + sc * j * dim1 + 1 - Woff] ));
- y[2 * ( j + 0 ) + 1] = L_add( Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1], W[sc * i + sc * j * dim1 + 1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1 + 1], W[sc * i + sc * j * dim1 - Woff] ));
+ y[( j + 0 )] = CL_mac_j( CL_scale( xx[i + ( j + 0 ) * dim1], W[sc * i + sc * j * dim1 - Woff] ),
+ CL_scale( xx[i + ( j + 0 ) * dim1], W[sc * i + sc * j * dim1 + 1 - Woff] ) );
}
}
- fft_len10( &y[0], &y[1], 2 );
+ fft_len10( &y[0] );
FOR ( j = 0; j < dim2; j++ )
{
- re[sx * i + sx * j * dim1] = y[2 * j];
- im[sx * i + sx * j * dim1] = y[2 * j + 1];
+ x[sx * i + sx * j * dim1] = y[j];
}
}
BREAK;
@@ -5773,42 +5111,36 @@ static void fft_lenN(
case 16:
{
- Word32 y[2 * 16];
+ cmplx y[16];
FOR ( j = 0; j < dim2; j++ )
{
{
- y[2 * j] = x[2 * 0 + 2 * j * dim1];
- y[2 * j + 1] = x[2 * 0 + 2 * j * dim1 + 1];
+ y[j] = xx[0 + j * dim1];
};
}
- fft_len16( &y[0], &y[1], 2 );
+ fft_len16( &y[0] );
FOR ( j = 0; j < dim2; j++ )
{
- re[sx * 0 + sx * j * dim1] = y[2 * j];
- im[sx * 0 + sx * j * dim1] = y[2 * j + 1];
+ x[sx * 0 + sx * j * dim1] = y[j];
}
FOR ( i = 1; i < dim1; i++ )
{
{
- y[2 * ( 0 + 0 )] = x[2 * i + 2 * ( 0 + 0 ) * dim1];
- y[2 * ( 0 + 0 ) + 1] = x[2 * i + 2 * ( 0 + 0 ) * dim1 + 1];
+ y[( 0 + 0 )] = xx[i + ( 0 + 0 ) * dim1];
}
FOR ( j = 1; j < dim2; j++ )
{
{
- y[2 * ( j + 0 )] = L_sub( Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1], W[sc * i + sc * j * dim1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1 + 1], W[sc * i + sc * j * dim1 + 1 - Woff] ));
- y[2 * ( j + 0 ) + 1] = L_add( Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1], W[sc * i + sc * j * dim1 + 1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1 + 1], W[sc * i + sc * j * dim1 - Woff] ));
+ y[( j + 0 )] = CL_mac_j( CL_scale( xx[i + ( j + 0 ) * dim1], W[sc * i + sc * j * dim1 - Woff] ),
+ CL_scale( xx[i + ( j + 0 ) * dim1], W[sc * i + sc * j * dim1 + 1 - Woff] ) );
}
}
- fft_len16( &y[0], &y[1], 2 );
+ fft_len16( &y[0] );
FOR ( j = 0; j < dim2; j++ )
{
- re[sx * i + sx * j * dim1] = y[2 * j];
- im[sx * i + sx * j * dim1] = y[2 * j + 1];
+ x[sx * i + sx * j * dim1] = y[j];
}
}
BREAK;
@@ -5816,54 +5148,44 @@ static void fft_lenN(
case 20:
{
- Word32 y[2 * 20];
+ cmplx y[20];
FOR ( j = 0; j < dim2; j++ )
{
{
- y[2 * j] = x[2 * 0 + 2 * j * dim1];
- y[2 * j + 1] = x[2 * 0 + 2 * j * dim1 + 1];
+ y[j] = xx[0 + j * dim1];
};
}
- fft_len20_fx( &y[0], &y[1], 2 );
+ fft_len20_fx( &y[0] );
FOR ( j = 0; j < dim2; j++ )
{
- re[sx * 0 + sx * j * dim1] = y[2 * j];
- im[sx * 0 + sx * j * dim1] = y[2 * j + 1];
+ x[sx * 0 + sx * j * dim1] = y[j];
}
FOR ( i = 1; i < dim1; i++ )
{
{
- y[2 * ( 0 + 0 )] = x[2 * i + 2 * ( 0 + 0 ) * dim1];
- y[2 * ( 0 + 0 ) + 1] = x[2 * i + 2 * ( 0 + 0 ) * dim1 + 1];
+ y[( 0 + 0 )] = xx[i + ( 0 + 0 ) * dim1];
}
{
- y[2 * ( 0 + 1 )] = L_sub( Mpy_32_16_1(x[2 * i + 2 * ( 0 + 1 ) * dim1], W[len + sc * i + 0 * dim1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * ( 0 + 1 ) * dim1 + 1], W[len + sc * i + 0 * dim1 + 1 - Woff]));
- y[2 * ( 0 + 1 ) + 1] = L_add( Mpy_32_16_1(x[2 * i + 2 * ( 0 + 1 ) * dim1], W[len + sc * i + 0 * dim1 + 1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * ( 0 + 1 ) * dim1 + 1], W[len + sc * i + 0 * dim1 - Woff]));
+ y[( 0 + 1 )] = CL_mac_j( CL_scale( xx[i + ( 0 + 1 ) * dim1], W[len + sc * i + 0 * dim1 - Woff] ),
+ CL_scale( xx[i + ( 0 + 1 ) * dim1], W[len + sc * i + 0 * dim1 + 1 - Woff] ) );
}
FOR ( j = 2; j < dim2; j = j + 2 )
{
{
- y[2 * ( j + 0 )] = L_sub( Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1], W[sc * i + j * dim1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1 + 1], W[sc * i + j * dim1 + 1 - Woff] ));
- y[2 * ( j + 0 ) + 1] = L_add( Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1], W[sc * i + j * dim1 + 1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1 + 1], W[sc * i + j * dim1 - Woff] ));
+ y[( j + 0 )] = CL_mac_j( CL_scale( xx[i + ( j + 0 ) * dim1], W[sc * i + j * dim1 - Woff] ),
+ CL_scale( xx[i + ( j + 0 ) * dim1], W[sc * i + j * dim1 + 1 - Woff] ) );
}
{
- y[2 * ( j + 1 )] = L_sub( Mpy_32_16_1(x[2 * i + 2 * ( j + 1 ) * dim1], W[len + sc * i + j * dim1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * ( j + 1 ) * dim1 + 1], W[len + sc * i + j * dim1 + 1 - Woff] ));
- y[2 * ( j + 1 ) + 1] = L_add( Mpy_32_16_1(x[2 * i + 2 * ( j + 1 ) * dim1], W[len + sc * i + j * dim1 + 1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * ( j + 1 ) * dim1 + 1], W[len + sc * i + j * dim1 - Woff] ));
+ y[( j + 1 )] = CL_mac_j( CL_scale( xx[i + ( j + 1 ) * dim1], W[len + sc * i + j * dim1 - Woff] ),
+ CL_scale( xx[i + ( j + 1 ) * dim1], W[len + sc * i + j * dim1 + 1 - Woff] ) );
}
}
- fft_len20_fx( &y[0], &y[1], 2 );
+ fft_len20_fx( &y[0] );
FOR ( j = 0; j < dim2; j++ )
{
- re[sx * i + sx * j * dim1] = y[2 * j];
- im[sx * i + sx * j * dim1] = y[2 * j + 1];
+ x[sx * i + sx * j * dim1] = y[j];
}
}
BREAK;
@@ -5871,64 +5193,52 @@ static void fft_lenN(
case 32:
{
- Word32 y[2 * 32];
+ cmplx y[32];
FOR ( j = 0; j < dim2; j++ )
{
{
- y[2 * j] = x[2 * 0 + 2 * j * dim1];
- y[2 * j + 1] = x[2 * 0 + 2 * j * dim1 + 1];
+ y[j] = xx[0 + j * dim1];
};
}
- fft_len32( &y[0], &y[1], 2 );
+ fft_len32( &y[0] );
FOR ( j = 0; j < dim2; j++ )
{
- re[sx * 0 + sx * j * dim1] = y[2 * j];
- im[sx * 0 + sx * j * dim1] = y[2 * j + 1];
+ x[sx * 0 + sx * j * dim1] = y[j];
}
FOR ( i = 1; i < dim1; i++ )
{
{
- y[2 * ( 0 + 0 )] = x[2 * i + 2 * ( 0 + 0 ) * dim1];
- y[2 * ( 0 + 0 ) + 1] = x[2 * i + 2 * ( 0 + 0 ) * dim1 + 1];
+ y[( 0 + 0 )] = xx[i + ( 0 + 0 ) * dim1];
}
{
- y[2 * ( 0 + 1 )] = L_sub( Mpy_32_16_1(x[2 * i + 2 * ( 0 + 1 ) * dim1], W[len + sc * i + 0 * dim1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * ( 0 + 1 ) * dim1 + 1], W[len + sc * i + 0 * dim1 + 1 - Woff] ));
- y[2 * ( 0 + 1 ) + 1] = L_add( Mpy_32_16_1(x[2 * i + 2 * ( 0 + 1 ) * dim1], W[len + sc * i + 0 * dim1 + 1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * ( 0 + 1 ) * dim1 + 1], W[len + sc * i + 0 * dim1 - Woff] ));
+ y[( 0 + 1 )] = CL_mac_j( CL_scale( xx[i + ( 0 + 1 ) * dim1], W[len + sc * i + 0 * dim1 - Woff] ),
+ CL_scale( xx[i + ( 0 + 1 ) * dim1], W[len + sc * i + 0 * dim1 + 1 - Woff] ) );
}
FOR ( j = 2; j < dim2; j = j + 2 )
{
{
- y[2 * ( j + 0 )] = L_sub( Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1], W[sc * i + j * dim1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1 + 1], W[sc * i + j * dim1 + 1 - Woff] ));
- y[2 * ( j + 0 ) + 1] = L_add( Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1], W[sc * i + j * dim1 + 1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1 + 1], W[sc * i + j * dim1 - Woff] ));
+ y[( j + 0 )] = CL_mac_j( CL_scale( xx[i + ( j + 0 ) * dim1], W[sc * i + j * dim1 - Woff] ),
+ CL_scale( xx[i + ( j + 0 ) * dim1], W[sc * i + j * dim1 + 1 - Woff] ) );
}
{
- y[2 * ( j + 1 )] = L_sub( Mpy_32_16_1(x[2 * i + 2 * ( j + 1 ) * dim1], W[len + sc * i + j * dim1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * ( j + 1 ) * dim1 + 1], W[len + sc * i + j * dim1 + 1 - Woff] ));
- y[2 * ( j + 1 ) + 1] = L_add( Mpy_32_16_1(x[2 * i + 2 * ( j + 1 ) * dim1], W[len + sc * i + j * dim1 + 1 - Woff] ),
- Mpy_32_16_1(x[2 * i + 2 * ( j + 1 ) * dim1 + 1], W[len + sc * i + j * dim1 - Woff] ));
+ y[( j + 1 )] = CL_mac_j( CL_scale( xx[i + ( j + 1 ) * dim1], W[len + sc * i + j * dim1 - Woff] ),
+ CL_scale( xx[i + ( j + 1 ) * dim1], W[len + sc * i + j * dim1 + 1 - Woff] ) );
}
}
- fft_len32( &y[0], &y[1], 2 );
+ fft_len32( &y[0] );
FOR ( j = 0; j < dim2; j++ )
{
- re[sx * i + sx * j * dim1] = y[2 * j];
- im[sx * i + sx * j * dim1] = y[2 * j + 1];
+ x[sx * i + sx * j * dim1] = y[j];
}
}
BREAK;
}
}
-
return;
}
-
/*-----------------------------------------------------------------*
* fft_fx()
*
@@ -5936,72 +5246,89 @@ static void fft_lenN(
*-----------------------------------------------------------------*/
void fft_fx(
- Word32 *re, /* i/o: real part */
- Word32 *im, /* i/o: imag part */
+ Word32 *re, /* i/o: real part */
+ Word32 *im, /* i/o: imag part */
const Word16 length, /* i : length of fft */
const Word16 s /* i : sign */
)
{
- SWITCH ( length )
+ cmplx x[960];
+
+ FOR ( Word32 j = 0; j < length; j++ )
+ {
+ x[j].re = re[s * j];
+ x[j].im = im[s * j];
+ }
+
+ SWITCH( length )
{
case 20:
- fft_len20_fx( re, im, s );
+ fft_len20_fx( x );
BREAK;
case 40:
- fft_lenN( re, im, FFT_RotVector_640_fx, 640, 5, 8, s, 8, 40 );
+ fft_lenN( x, FFT_RotVector_640_fx, 640, 5, 8, s, 8, 40 );
BREAK;
case 64:
- fft_lenN( re, im, FFT_RotVector_256_fx, 256, 8, 8, s, 8, 64 );
+ fft_lenN( x, FFT_RotVector_256_fx, 256, 8, 8, s, 8, 64 );
BREAK;
case 80:
- fft_lenN( re, im, FFT_RotVector_640_fx, 640, 10, 8, s, 4, 40 );
+ fft_lenN( x, FFT_RotVector_640_fx, 640, 10, 8, s, 4, 40 );
BREAK;
case 100:
- fft_lenN( re, im, FFT_RotVector_400_fx, 400, 10, 10, s, 4, 40 );
+ fft_lenN( x, FFT_RotVector_400_fx, 400, 10, 10, s, 4, 40 );
BREAK;
case 120:
- fft_lenN( re, im, FFT_RotVector_960_fx, 960, 15, 8, s, 4, 60 );
+ fft_lenN( x, FFT_RotVector_960_fx, 960, 15, 8, s, 4, 60 );
BREAK;
case 128:
- fft_lenN( re, im, FFT_RotVector_256_fx, 256, 16, 8, s, 4, 64 );
+ fft_lenN( x, FFT_RotVector_256_fx, 256, 16, 8, s, 4, 64 );
BREAK;
case 160:
- fft_lenN( re, im, FFT_RotVector_640_fx, 640, 20, 8, s, 2, 40 );
+ fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 8, s, 2, 40 );
BREAK;
case 200:
- fft_lenN( re, im, FFT_RotVector_400_fx, 400, 20, 10, s, 2, 40 );
+ fft_lenN( x, FFT_RotVector_400_fx, 400, 20, 10, s, 2, 40 );
BREAK;
case 240:
- fft_lenN( re, im, FFT_RotVector_960_fx, 960, 30, 8, s, 2, 60 );
+ fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 8, s, 2, 60 );
BREAK;
case 256:
- fft_lenN( re, im, FFT_RotVector_256_fx, 256, 32, 8, s, 2, 64 );
+ fft_lenN( x, FFT_RotVector_256_fx, 256, 32, 8, s, 2, 64 );
BREAK;
case 320:
- fft_lenN( re, im, FFT_RotVector_640_fx, 640, 20, 16, s, 2, 40 );
+ fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 16, s, 2, 40 );
BREAK;
case 400:
- fft_lenN( re, im, FFT_RotVector_400_fx, 400, 20, 20, s, 2, 40 );
+ fft_lenN( x, FFT_RotVector_400_fx, 400, 20, 20, s, 2, 40 );
BREAK;
case 480:
- fft_lenN( re, im, FFT_RotVector_960_fx, 960, 30, 16, s, 2, 60 );
+ fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 16, s, 2, 60 );
BREAK;
case 600:
- fft_lenN( re, im, FFT_RotVector_600_fx, 600, 30, 20, s, 2, 60 );
+ fft_lenN( x, FFT_RotVector_600_fx, 600, 30, 20, s, 2, 60 );
BREAK;
case 640:
- fft_lenN( re, im, FFT_RotVector_640_fx, 640, 20, 32, s, 2, 40 );
+ fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 32, s, 2, 40 );
BREAK;
case 960:
- fft_lenN( re, im, FFT_RotVector_960_fx, 960, 30, 32, s, 2, 60 );
+ fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 32, s, 2, 60 );
BREAK;
default:
assert( !"fft length is not supported!" );
}
+ FOR ( Word32 j = 0; j < length; j++ )
+ {
+ re[s * j] = x[j].re;
+ im[s * j] = x[j].im;
+ }
+
return;
}
+#if 0
+/* Functions are already in fixed point and available in fft.c file */
+
#define WMC_TOOL_SKIP
#define SHC( x ) ( (Word16) x )
@@ -6170,9 +5497,9 @@ static void BASOP_fftN2(
Word32 t00, t01, t02, t03, t04, t05, t06, t07, t08, t09, t10, t11, t12, t13, t14, t15;
Word32 s00, s01, s02, s03, s04, s05, s06, s07, s08, s09, s10, s11, s12, s13, s14, s15;
- FOR( i = 0; i < dim2; i++ )
+ FOR ( i = 0; i < dim2; i++ )
{
- FOR( j = 0; j < dim1; j++ )
+ FOR ( j = 0; j < dim1; j++ )
{
x[2 * i * dim1 + 2 * j] = re[sx * i + sx * j * dim2];
move32();
@@ -6182,13 +5509,13 @@ static void BASOP_fftN2(
}
/* dim1 == 8 */
- FOR( i = 0; i < dim2; i++ )
+ FOR ( i = 0; i < dim2; i++ )
{
BASOP_fft8( &x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2 );
}
/* dim2 == 8 */
- FOR( i = 0; i < dim1; i++ )
+ FOR ( i = 0; i < dim1; i++ )
{
cplxMpy4_8_1( x00, x01, x[2 * i + 2 * 0 * dim1], x[2 * i + 2 * 0 * dim1 + 1] );
@@ -6314,5 +5641,5 @@ void BASOP_cfft_fx(
return;
}
-
#undef WMC_TOOL_SKIP
+#endif
diff --git a/lib_com/fft_rel.c b/lib_com/fft_rel.c
index 810c3a7e2fb5564f5699554ead3f62284a126bd6..3de7219079986bbbd2132c43452238c35b500cde 100644
--- a/lib_com/fft_rel.c
+++ b/lib_com/fft_rel.c
@@ -479,4 +479,412 @@ void fft_rel_fx(
}
return;
-}
\ No newline at end of file
+}
+
+#if 0
+void fft_rel_fx32(
+ Word32 x[], /* i/o: input/output vector */
+ const Word16 n, /* i : vector length */
+ const Word16 m /* i : log2 of vector length */
+)
+{
+ Word16 i, j, k, n1, n2, n4;
+ Word16 step;
+ Word32 xt, t1, t2;
+ Word32 *x0, *x1, *x2;
+ const Word16 *s, *c;
+ Word32 *xi2, *xi3, *xi4, *xi1;
+#ifdef BASOP_NOGLOB_DECLARE_LOCAL
+ Flag Overflow = 0;
+#endif
+
+
+ /*-----------------------------------------------------------------*
+ * Digit reverse counter
+ *-----------------------------------------------------------------*/
+
+ j = 0;
+ move16();
+ x0 = &x[0];
+ move32();
+ FOR(i = 0; i < n - 1; i++)
+ {
+ IF(LT_16(i, j))
+ {
+ xt = x[j];
+ move32();
+ x[j] = *x0;
+ move32();
+ *x0 = xt;
+ move32();
+ }
+ x0++;
+ k = shr(n, 1);
+ WHILE(LE_16(k, j))
+ {
+ j = sub(j, k);
+ k = shr(k, 1);
+ }
+ j = add(j, k);
+ }
+
+ /*-----------------------------------------------------------------*
+ * Length two butterflies
+ *-----------------------------------------------------------------*/
+
+ x0 = &x[0];
+ move32();
+ x1 = &x[1];
+ move32();
+ FOR(i = 0; i < n / 2; i++)
+ {
+ xt = *x0;
+ move32();
+#ifdef BASOP_NOGLOB
+ *x0 = L_add_o(xt, *x1, &Overflow);
+#else
+ *x0 = L_add(xt, *x1);
+#endif
+ move32();
+#ifdef BASOP_NOGLOB
+ *x1 = L_sub_o(xt, *x1, &Overflow);
+#else
+ *x1 = L_sub(xt, *x1);
+#endif
+ move32();
+ x0++;
+ x0++;
+ x1++;
+ x1++;
+ }
+
+ /*-----------------------------------------------------------------*
+ * Other butterflies
+ *
+ * The implementation described in [1] has been changed by using
+ * table lookup for evaluating sine and cosine functions. The
+ * variable ind and its increment step are needed to access table
+ * entries. Note that this implementation assumes n4 to be so
+ * small that ind will never exceed the table. Thus the input
+ * argument n and the constant N_MAX_SAS must be set properly.
+ *-----------------------------------------------------------------*/
+
+ n2 = 1;
+ move16();
+ /* step = N_MAX_SAS/4; */
+ FOR(k = 2; k <= m; k++)
+ {
+ n4 = n2;
+ move16();
+ n2 = shl(n4, 1);
+ n1 = shl(n2, 1);
+
+ step = N_MAX_SAS / n1;
+
+ x0 = x;
+ x1 = x + n2;
+ x2 = x + add(n2, n4);
+ FOR(i = 0; i < n; i += n1)
+ {
+ xt = *x0;
+ move32(); /* xt = x[i]; */
+#ifdef BASOP_NOGLOB
+ *x0 = L_add_o(xt, *x1, &Overflow);
+#else /* BASOP_NOGLOB */
+ *x0 = L_add(xt, *x1);
+#endif /* BASOP_NOGLOB */
+ move32(); /* x[i] = xt + x[i+n2]; */
+#ifdef BASOP_NOGLOB
+ *x1 = L_sub_o(xt, *x1, &Overflow);
+#else /* BASOP_NOGLOB */
+ *x1 = L_sub(xt, *x1);
+#endif /* BASOP_NOGLOB */
+ move32(); /* x[i+n2] = xt - x[i+n2]; */
+ *x2 = L_negate(*x2);
+ move32(); /* x[i+n2+n4] = -x[i+n2+n4]; */
+
+
+ s = sincos_t_fx + step;
+ c = s + 64;
+ xi1 = x + add(i, 1);
+ xi3 = xi1 + n2;
+ xi2 = xi3 - 2;
+ xi4 = xi1 + sub(n1, 2);
+
+ FOR(j = 1; j < n4; j++)
+ {
+#ifdef BASOP_NOGLOB
+ t1 = L_add_o(Mpy_32_16_1(*xi3, *c), Mpy_32_16_1(*xi4, *s), &Overflow); /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind); */
+ t2 = L_sub_o(Mpy_32_16_1(*xi3, *s), Mpy_32_16_1(*xi4, *c), &Overflow); /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind); */
+ *xi4 = L_sub_o(*xi2, t2, &Overflow);
+#else /* BASOP_NOGLOB */
+ t1 = L_add(Mpy_32_16_1(*xi3, *c), Mpy_32_16_1(*xi4, *s)); /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind); */
+ t2 = L_sub(Mpy_32_16_1(*xi3, *s), Mpy_32_16_1(*xi4, *c)); /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind); */
+ *xi4 = L_sub(*xi2, t2);
+#endif /* BASOP_NOGLOB */
+ move16();
+#ifdef BASOP_NOGLOB
+ *xi3 = L_negate(L_add_o(*xi2, t2, &Overflow));
+#else /* BASOP_NOGLOB */
+ *xi3 = L_negate(L_add(*xi2, t2));
+#endif /* BASOP_NOGLOB */
+ move32();
+#ifdef BASOP_NOGLOB
+ *xi2 = L_sub_o(*xi1, t1, &Overflow);
+#else /* BASOP_NOGLOB */
+ *xi2 = L_sub(*xi1, t1);
+#endif /* BASOP_NOGLOB */
+ move32();
+#ifdef BASOP_NOGLOB
+ *xi1 = L_add_o(*xi1, t1, &Overflow);
+#else /* BASOP_NOGLOB */
+ *xi1 = L_add(*xi1, t1);
+#endif /* BASOP_NOGLOB */
+ move32();
+
+ xi4--;
+ xi2--;
+ xi3++;
+ xi1++;
+ c += step;
+ s += step; /* autoincrement by ar0 */
+ }
+
+ x0 += n1;
+ x1 += n1;
+ x2 += n1;
+ }
+ /* step = shr(step, 1); */
+ }
+
+ return;
+}
+#else
+void fft_rel_fx32(
+ Word32 x[], /* i/o: input/output vector */
+ const Word16 n, /* i : vector length */
+ const Word16 m /* i : log2 of vector length */
+)
+{
+ Word16 i, j, k, n1, n2, n4;
+ Word16 step;
+ Word32 xt, t1, t2;
+ Word32 *x0, *x1, *x2;
+ Word32 *xi2, *xi3, *xi4, *xi1;
+ const Word16 *s, *c;
+ const Word16 *idx;
+
+ /* !!!! NMAX = 256 is hardcoded here (similar optimizations should be done for NMAX > 256) !!! */
+
+ Word32 *x2even, *x2odd;
+ Word32 temp[512];
+
+ IF ( EQ_16(n, 128) || EQ_16(n, 256) || EQ_16(n, 512) )
+ {
+ idx = fft256_read_indexes;
+
+ /* Combined Digit reverse counter & Length two butterflies */
+ IF ( EQ_16(n, 128) )
+ {
+ x2 = temp;
+ FOR ( i = 0; i < 64; i++ )
+ {
+ j = *idx++;
+ k = *idx++;
+
+ *x2++ = L_add(x[shr(j, 1)], x[shr(k, 1)]);
+ *x2++ = L_sub(x[shr(j, 1)], x[shr(k, 1)]);
+ }
+ }
+ ELSE IF ( EQ_16(n, 256) )
+ {
+ x2 = temp;
+ FOR ( i = 0; i < 128; i++ )
+ {
+ j = *idx++;
+ k = *idx++;
+
+ *x2++ = L_add(x[j], x[k]);
+ *x2++ = L_sub(x[j], x[k]);
+ }
+ }
+ ELSE IF ( EQ_16(n, 512) )
+ {
+ x2even = temp;
+ x2odd = temp + 256;
+
+ FOR ( i = 0; i < 128; i++ )
+ {
+ j = shl(*idx, 1); idx++;
+ k = shl(*idx, 1); idx++;
+
+ *x2even++ = L_add(x[j], x[k]);
+ *x2even++ = L_sub(x[j], x[k]);
+ j++; k++;
+ *x2odd++ = L_add(x[j], x[k]);
+ *x2odd++ = L_sub(x[j], x[k]);
+ }
+ }
+
+ /*-----------------------------------------------------------------*
+ * 1st Stage Loop has been Unrolled because n4 is '1' and that
+ * allows the elimination of the 'for_ (j = 1; j < n4; j++)' loop
+ * and the associated pointers initialization.
+ * Also, it allows to Put the Data from 'temp' back into 'x' due
+ * to the previous Combined Digit Reverse and Length two butterflies
+ *-----------------------------------------------------------------*/
+
+ /*for_ (k = 2; k < 3; k++)*/
+ {
+ x0 = temp;
+ x1 = x0 + 2;
+ x2 = x;
+
+ FOR ( i = 0; i < n; i += 4 )
+ {
+ *x2++ = L_add(*x0++, *x1); /* x[i] = xt + x[i+n2]; */
+ *x2++ = *x0;
+ x0--;
+ *x2++ = L_sub(*x0, *x1); /* x[i+n2] = xt - x[i+n2]; */
+ x1++;
+ *x2++ = L_negate(*x1); /* x[i+n2+n4] = -x[i+n2+n4]; */
+
+ x0 += 4;
+ x1 += 3; /* x1 has already advanced */
+ }
+ }
+ }
+ ELSE
+ {
+ /*-----------------------------------------------------------------*
+ * Digit reverse counter
+ *-----------------------------------------------------------------*/
+
+ j = 0;
+ x0 = &x[0];
+ FOR ( i = 0; i < n - 1; i++ )
+ {
+ IF ( LT_16(i, j) )
+ {
+ xt = x[j];
+ x[j] = *x0;
+ *x0 = xt;
+ }
+ x0++;
+ k = n / 2;
+ WHILE ( LE_16(k, j) )
+ {
+ j -= k;
+ k = shr(k, 1);
+ }
+ j += k;
+ }
+
+ /*-----------------------------------------------------------------*
+ * Length two butterflies
+ *-----------------------------------------------------------------*/
+
+ x0 = &x[0];
+ x1 = &x[1];
+ FOR ( i = 0; i < n / 2; i++ )
+ {
+ *x1 = L_sub(*x0, *x1);
+ *x0 = L_sub(L_shl(*x0, 1), *x1);
+
+ x0++;
+ x0++;
+ x1++;
+ x1++;
+ }
+
+ /*-----------------------------------------------------------------*
+ * 1st Stage Loop has been Unrolled because n4 is '1' and that
+ * allows the elimination of the 'for_ (j = 1; j < n4; j++)' loop
+ * and the associated pointers initialization.
+ *-----------------------------------------------------------------*/
+
+ /* for_ (k = 2; k < 3; k++) */
+ {
+ x0 = x;
+ x1 = x0 + 2;
+
+ FOR ( i = 0; i < n; i += 4 )
+ {
+ *x1 = L_sub(*x0, *x1); /* x[i+n2] = xt - x[i+n2]; */
+ *x0 = L_sub(L_shl(*x0, 1), *x1++); /* x[i] = xt + x[i+n2]; */
+ *x1 = L_negate(*x1); /* x[i+n2+n4] = -x[i+n2+n4]; */
+
+ x0 += 4;
+ x1 += 3; /* x1 has already advanced */
+ }
+ }
+ }
+
+ /*-----------------------------------------------------------------*
+ * Other butterflies
+ *
+ * The implementation described in [1] has been changed by using
+ * table lookup for evaluating sine and cosine functions. The
+ * variable ind and its increment step are needed to access table
+ * entries. Note that this implementation assumes n4 to be so
+ * small that ind will never exceed the table. Thus the input
+ * argument n and the constant N_MAX_FFT must be set properly.
+ *-----------------------------------------------------------------*/
+
+ n4 = 1;
+ n2 = 2;
+ n1 = 4;
+
+ step = N_MAX_DIV4;
+
+ FOR ( k = 3; k <= m; k++ )
+ {
+ step = shr(step, 1);
+ n4 = shl(n4, 1);
+ n2 = shl(n2, 1);
+ n1 = shl(n1, 1);
+
+ x0 = x;
+ x1 = x0 + n2;
+ x2 = x1 + n4;
+
+ FOR ( i = 0; i < n; i += n1 )
+ {
+ *x1 = L_sub(*x0, *x1); /* x[i+n2] = xt - x[i+n2]; */
+ *x0 = L_sub(L_shl(*x0, 1), *x1); /* x[i] = xt + x[i+n2]; */
+ *x2 = L_negate(*x2); /* x[i+n2+n4] = -x[i+n2+n4]; */
+
+ s = sincos_t_ext_fx;
+ c = s + N_MAX_FFT / 4; /* 1024/4 = 256, 256/4=64 */
+ xi1 = x0;
+ xi3 = xi1 + n2;
+ xi2 = xi3;
+ x0 += n1;
+ xi4 = x0;
+
+ FOR ( j = 1; j < n4; j++ )
+ {
+ xi3++;
+ xi1++;
+ xi4--;
+ xi2--;
+ c += step;
+ s += step; /* autoincrement by ar0 */
+
+ t1 = L_add(Mpy_32_16_1(*xi3, *c), Mpy_32_16_1(*xi4, *s)); /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind); */
+ t2 = L_sub(Mpy_32_16_1(*xi3, *s), Mpy_32_16_1(*xi4, *c)); /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind); */
+
+ *xi4 = L_sub(*xi2, t2);
+ *xi2 = L_sub(*xi1, t1);
+ *xi1 = L_sub(L_shl(*xi1, 1), *xi2);
+ *xi3 = L_negate(L_add(L_shl(t2, 1), *xi4));
+ }
+
+ x1 += n1;
+ x2 += n1;
+ }
+ }
+
+ return;
+}
+#endif
\ No newline at end of file
diff --git a/lib_com/ifft_rel.c b/lib_com/ifft_rel.c
index 1a08533e7354033051da1602d1323fce8ecb16b9..6da82af5088211e9d96dd2495cf2345a5da340f0 100644
--- a/lib_com/ifft_rel.c
+++ b/lib_com/ifft_rel.c
@@ -650,3 +650,609 @@ void ifft_rel_fx(
return;
}
+
+#if 0
+void ifft_rel_fx32(
+ Word32 io[], /* i/o: input/output vector */
+ const Word16 n, /* i : vector length */
+ const Word16 m /* i : log2 of vector length */
+)
+{
+ Word16 i, j, k;
+ Word16 step;
+ Word16 n2, n4, n8, i0;
+ Word16 is, id;
+ Word32 *x, *xi0, *xi1, *xi2, *xi3, *xi4, *xup1, *xdn6, *xup3, *xdn8;
+ Word32 xt;
+ Word32 r1;
+ Word32 t1, t2, t3, t4, t5;
+ const Word16 *s, *c, *s3, *c3;
+
+ Word16 cc1, cc3, ss1, ss3;
+ Word16 tmp;
+#ifdef BASOP_NOGLOB_DECLARE_LOCAL
+ Flag Overflow = 0;
+#endif
+
+
+ /*-----------------------------------------------------------------*
+ * ifft
+ *-----------------------------------------------------------------*/
+
+ x = &io[-1];
+ move32();
+ n2 = shl(n, 1);
+ FOR(k = 1; k < m; k++)
+ {
+ is = 0;
+ move16();
+ id = n2;
+ move16();
+ n2 = shr(n2, 1);
+ move16();
+ n4 = shr(n2, 2);
+ move16();
+ n8 = shr(n4, 1);
+ move16();
+ tmp = sub(n, 1);
+ WHILE(LT_16(is, tmp))
+ {
+ xi1 = x + is + 1;
+ move32();
+ xi2 = xi1 + n4;
+ move32();
+ xi3 = xi2 + n4;
+ move32();
+ xi4 = xi3 + n4;
+ move32();
+
+ FOR(i = is; i < n; i += id)
+ {
+#ifdef BASOP_NOGLOB
+ t1 = L_sub_o(*xi1, *xi3, &Overflow);
+ *xi1 = L_add_o(*xi1, *xi3, &Overflow);
+#else /* BASOP_NOGLOB */
+ t1 = L_sub(*xi1, *xi3);
+ *xi1 = L_add(*xi1, *xi3);
+#endif /* BASOP_NOGLOB */
+ move32();
+#ifdef BASOP_NOGLOB
+ *xi2 = L_shl_o(*xi2, 1, &Overflow);
+#else /* BASOP_NOGLOB */
+ *xi2 = L_shl(*xi2, 1);
+#endif /* BASOP_NOGLOB */
+ move32();
+#ifdef BASOP_NOGLOB
+ *xi3 = L_sub_o(t1, L_shl_o(*xi4, 1, &Overflow), &Overflow);
+#else /* BASOP_NOGLOB */
+ *xi3 = L_sub(t1, L_shl(*xi4, 1));
+#endif /* BASOP_NOGLOB */
+ move32();
+#ifdef BASOP_NOGLOB
+ *xi4 = L_add_o(t1, L_shl_o(*xi4, 1, &Overflow), &Overflow);
+#else /* BASOP_NOGLOB */
+ *xi4 = L_add(t1, L_shl(*xi4, 1));
+#endif /* BASOP_NOGLOB */
+ move32();
+
+ IF(NE_16(n4, 1))
+ {
+#ifdef BASOP_NOGLOB
+ t1 = Mpy_32_16_1(L_sub_o(*(xi2 + n8), *(xi1 + n8), &Overflow), INV_SQR2_FX);
+ t2 = Mpy_32_16_1(L_add_o(*(xi4 + n8), *(xi3 + n8), &Overflow), INV_SQR2_FX);
+#else /* BASOP_NOGLOB */
+ t1 = Mpy_32_16_1(L_sub(*(xi2 + n8), *(xi1 + n8)), INV_SQR2_FX);
+ t2 = Mpy_32_16_1(L_add(*(xi4 + n8), *(xi3 + n8)), INV_SQR2_FX);
+#endif /* BASOP_NOGLOB */
+
+#ifdef BASOP_NOGLOB
+ *(xi1 + n8) = L_add_o(*(xi1 + n8), *(xi2 + n8), &Overflow);
+#else /* BASOP_NOGLOB */
+ *(xi1 + n8) = add(*(xi1 + n8), *(xi2 + n8));
+#endif /* BASOP_NOGLOB */
+ move32();
+#ifdef BASOP_NOGLOB
+ *(xi2 + n8) = L_sub_o(*(xi4 + n8), *(xi3 + n8), &Overflow);
+#else /* BASOP_NOGLOB */
+ *(xi2 + n8) = L_sub(*(xi4 + n8), *(xi3 + n8));
+#endif /* BASOP_NOGLOB */
+ move32();
+#ifdef BASOP_NOGLOB
+ *(xi3 + n8) = L_negate(L_shl_o(L_add_o(t2, t1, &Overflow), 1, &Overflow));
+#else /* BASOP_NOGLOB */
+ *(xi3 + n8) = L_negate(shl(add(t2, t1), 1));
+#endif /* BASOP_NOGLOB */
+ move32();
+#ifdef BASOP_NOGLOB
+ *(xi4 + n8) = L_shl_o(L_sub_o(t1, t2, &Overflow), 1, &Overflow);
+#else /* BASOP_NOGLOB */
+ *(xi4 + n8) = L_shl(L_sub(t1, t2), 1);
+#endif /* BASOP_NOGLOB */
+ move32();
+ }
+ xi1 += id;
+ move32();
+ xi2 += id;
+ move32();
+ xi3 += id;
+ move32();
+ xi4 += id;
+ move32();
+ }
+ is = sub(shl(id, 1), n2);
+ id = shl(id, 2);
+ }
+ /*Can be acheived with a shr */
+ step = N_MAX_SAS / n2;
+ move16();
+
+ s = sincos_t_fx + step;
+ move16();
+ c = s + 64;
+ move16();
+ s3 = sincos_t_fx + i_mult2(step, 3);
+ move16();
+ c3 = s3 + 64;
+ move16();
+ FOR(j = 2; j <= n8; j++)
+ {
+ cc1 = *c;
+ move16();
+ ss1 = *s;
+ move16();
+ cc3 = *c3;
+ move16();
+ ss3 = *s3;
+ move16();
+
+ is = 0;
+ move16();
+ id = shl(n2, 1);
+
+ c += step;
+ move16();
+ s += step;
+ move16();
+
+ c3 += 3 * step;
+ move16();
+ s3 += 3 * step;
+ move16();
+ WHILE(LT_16(is, sub(n, 1)))
+ {
+ xup1 = x + j + is;
+ move32();
+ xup3 = xup1 + shl(n4, 1);
+ move32();
+ xdn6 = xup3 - shl(j, 1) + 2;
+ move32();
+
+ xdn8 = xdn6 + shl(n4, 1);
+ move32();
+
+ FOR(i = is; i < n; i += id)
+ {
+#ifdef BASOP_NOGLOB
+ t1 = L_sub_o(*xup1, *xdn6, &Overflow);
+ *xup1 = L_add_o(*xup1, *xdn6, &Overflow);
+#else /* BASOP_NOGLOB */
+ t1 = L_sub(*xup1, *xdn6);
+ *xup1 = L_add(*xup1, *xdn6);
+#endif /* BASOP_NOGLOB */
+ move32();
+ xup1 += n4;
+ move32();
+ xdn6 -= n4;
+ move32();
+
+#ifdef BASOP_NOGLOB
+ t2 = L_sub_o(*xdn6, *xup1, &Overflow);
+ *xdn6 = L_add_o(*xup1, *xdn6, &Overflow);
+#else /* BASOP_NOGLOB */
+ t2 = L_sub(*xdn6, *xup1);
+ *xdn6 = L_add(*xup1, *xdn6);
+#endif /* BASOP_NOGLOB */
+ move32();
+
+ xdn6 += n4;
+ move32();
+#ifdef BASOP_NOGLOB
+ t3 = L_add_o(*xdn8, *xup3, &Overflow);
+ *xdn6 = L_sub_o(*xdn8, *xup3, &Overflow);
+#else /* BASOP_NOGLOB */
+ t3 = L_add(*xdn8, *xup3);
+ *xdn6 = L_sub(*xdn8, *xup3);
+#endif /* BASOP_NOGLOB */
+ move32();
+
+ xup3 += n4;
+ move32();
+ xdn8 -= n4;
+ move32();
+
+#ifdef BASOP_NOGLOB
+ t4 = L_add_o(*xup3, *xdn8, &Overflow);
+ *xup1 = L_sub_o(*xup3, *xdn8, &Overflow);
+#else /* BASOP_NOGLOB */
+ t4 = L_add(*xup3, *xdn8);
+ *xup1 = L_sub(*xup3, *xdn8);
+#endif /* BASOP_NOGLOB */
+ move32();
+
+#ifdef BASOP_NOGLOB
+ t5 = L_sub_o(t1, t4, &Overflow);
+ t1 = L_add_o(t1, t4, &Overflow);
+ t4 = L_sub_o(t2, t3, &Overflow);
+ t2 = L_add_o(t2, t3, &Overflow);
+ *xup3 = L_sub_o(Mpy_32_16_1(t1, cc3), Mpy_32_16_1(t2, ss3), &Overflow);
+#else /* BASOP_NOGLOB */
+ t5 = L_sub(t1, t4);
+ t1 = L_add(t1, t4);
+ t4 = L_sub(t2, t3);
+ t2 = L_add(t2, t3);
+ *xup3 = L_sub(Mpy_32_16_1(t1, cc3), Mpy_32_16_1(t2, ss3));
+#endif /* BASOP_NOGLOB */
+ move32();
+ xup3 -= n4;
+ move32();
+#ifdef BASOP_NOGLOB
+ *xup3 = L_add_o(Mpy_32_16_1(t5, cc1), Mpy_32_16_1(t4, ss1), &Overflow);
+#else /* BASOP_NOGLOB */
+ *xup3 = L_add(Mpy_32_16_1(t5, cc1), Mpy_32_16_1(t4, ss1));
+#endif /* BASOP_NOGLOB */
+ move32();
+#ifdef BASOP_NOGLOB
+ *xdn8 = L_sub_o(Mpy_32_16_1(t5, ss1), Mpy_32_16_1(t4, cc1), &Overflow);
+#else /* BASOP_NOGLOB */
+ *xdn8 = L_sub(Mpy_32_16_1(t5, ss1), Mpy_32_16_1(t4, cc1));
+#endif /* BASOP_NOGLOB */
+ move32();
+
+ xdn8 += n4;
+ move32();
+#ifdef BASOP_NOGLOB
+ *xdn8 = L_add_o(Mpy_32_16_1(t2, cc3), Mpy_32_16_1(t1, ss3), &Overflow);
+#else /* BASOP_NOGLOB */
+ *xdn8 = L_add(Mpy_32_16_1(t2, cc3), Mpy_32_16_1(t1, ss3));
+#endif /* BASOP_NOGLOB */
+ move32();
+
+ xup1 -= n4;
+ move32();
+ xup1 += id;
+ move32();
+ xup3 += id;
+ move32();
+ xdn6 += id;
+ move32();
+ xdn8 += id;
+ move32();
+ }
+ is = sub(shl(id, 1), n2);
+ id = shl(id, 2);
+ }
+ }
+ }
+
+ /*-----------------------------------------------------------------*
+ * Length two butterflies
+ *-----------------------------------------------------------------*/
+
+ is = 1;
+ move16();
+ id = 4;
+ move16();
+ WHILE(is < n)
+ {
+ xi0 = x + is;
+ move32();
+ xi1 = xi0 + 1;
+ move32();
+
+ FOR(i0 = is; i0 <= n; i0 += id)
+ {
+ r1 = *xi0;
+ move32();
+#ifdef BASOP_NOGLOB
+ *xi0 = L_add_o(r1, *xi1, &Overflow);
+#else /* BASOP_NOGLOB */
+ *xi0 = add(r1, *xi1);
+#endif /* BASOP_NOGLOB */
+ move32();
+#ifdef BASOP_NOGLOB
+ *xi1 = L_sub_o(r1, *xi1, &Overflow);
+#else /* BASOP_NOGLOB */
+ *xi1 = sub(r1, *xi1);
+#endif
+ move32();
+ xi0 += id;
+ move32();
+ xi1 += id;
+ move32();
+ }
+ is = sub(shl(id, 1), 1);
+ id = shl(id, 2);
+ }
+
+ /*-----------------------------------------------------------------*
+ * Digit reverse counter
+ *-----------------------------------------------------------------*/
+
+ j = 1;
+ move16();
+ FOR(i = 1; i < n; i++)
+ {
+ IF(LT_16(i, j))
+ {
+ xt = x[j];
+ move32();
+ x[j] = x[i];
+ move32();
+ x[i] = xt;
+ move32();
+ }
+ k = shr(n, 1);
+ WHILE(LT_16(k, j))
+ {
+ j = sub(j, k);
+ k = shr(k, 1);
+ }
+ j = add(j, k);
+ }
+
+ /*-----------------------------------------------------------------*
+ * Normalization
+ *-----------------------------------------------------------------*/
+
+ tmp = div_s(1, n); /*Q15 */
+ FOR(i = 1; i <= n; i++)
+ {
+ x[i] = Mpy_32_16_1(x[i], tmp);
+ move32();
+ }
+
+ return;
+}
+#else
+#define INV_SQRT_2_16 (Word16)(0x5A82)
+void ifft_rel_fx32(
+ Word32 io[], /* i/o: input/output vector */
+ const Word16 n, /* i : vector length */
+ const Word16 m /* i : log2 of vector length */
+)
+{
+ Word16 i, j, k;
+ Word16 step;
+ Word16 n2, n4, n8, i0;
+ Word16 is, id;
+ Word32 *x, *xi0, *xi1, *xi2, *xi3, *xi4, *xup1, *xdn6, *xup3, *xdn8;
+ Word32 xt;
+ Word32 r1;
+ Word32 t1, t2, t3, t4, t5;
+ Word16 cc1, cc3, ss1, ss3;
+ const Word16 *s, *s3, *c, *c3;
+ const Word16 *idx;
+ Word32 temp[512];
+ Word16 n_inv = 128;
+
+ SWITCH (n)
+ {
+ case 128:
+ n_inv = 256;
+ BREAK;
+ case 256:
+ n_inv = 128;
+ BREAK;
+ case 512:
+ n_inv = 64;
+ BREAK;
+ default:
+ assert(0);
+ BREAK;
+ }
+
+ /*-----------------------------------------------------------------*
+ * IFFT
+ *-----------------------------------------------------------------*/
+
+ x = &io[-1];
+ n2 = shl(n, 1);
+ FOR ( k = 1; k < m; k++ )
+ {
+ is = 0;
+ id = n2;
+ n2 = shr(n2, 1);
+ n4 = shr(n2, 2);
+ n8 = shr(n4, 1);
+ WHILE ( LT_16(is, n - 1) )
+ {
+ xi1 = x + is + 1;
+ xi2 = xi1 + n4;
+ xi3 = xi2 + n4;
+ xi4 = xi3 + n4;
+
+ FOR ( i = is; i < n; i += id )
+ {
+ t1 = L_sub(*xi1, *xi3);
+ *xi1 = L_add(*xi1, *xi3);
+ *xi2 = L_shl(*xi2, 1);
+ *xi3 = L_sub(t1, L_shl(*xi4, 1));
+ *xi4 = L_add(t1, L_shl(*xi4, 1));
+ IF ( n4 != 1 )
+ {
+ t1 = Mpy_32_16_1(L_sub( *( xi2 + n8 ), *( xi1 + n8 ) ), INV_SQRT_2_16);
+ t2 = Mpy_32_16_1(L_add( *( xi4 + n8 ), *( xi3 + n8 ) ), INV_SQRT_2_16);
+
+ *( xi1 + n8 ) = L_add(*( xi1 + n8 ), *( xi2 + n8 ));
+ *( xi2 + n8 ) = L_sub(*( xi4 + n8 ), *( xi3 + n8 ));
+ *( xi3 + n8 ) = L_shl(L_negate(L_add(t2, t1)), 1 );
+ *( xi4 + n8 ) = L_shl(L_sub(t1, t2), 1 );
+ }
+ xi1 += id;
+ xi2 += id;
+ xi3 += id;
+ xi4 += id;
+ }
+ is = 2 * id - n2;
+ id = 4 * id;
+ }
+ step = N_MAX_FFT / n2;
+
+ s = sincos_t_ext_fx + step;
+ c = s + N_MAX_FFT / 4;
+ s3 = sincos_t_ext_fx + 3 * step;
+ c3 = s3 + N_MAX_FFT / 4;
+ FOR ( j = 2; j <= n8; j++ )
+ {
+ cc1 = *c;
+ ss1 = *s;
+ cc3 = *c3;
+ ss3 = *s3;
+
+ is = 0;
+ id = 2 * n2;
+
+ c += step;
+ s += step;
+
+ c3 += 3 * step;
+ s3 += 3 * step;
+ WHILE ( LT_16(is, n - 1) )
+ {
+ xup1 = x + j + is;
+ xup3 = xup1 + 2 * n4;
+ xdn6 = xup3 - 2 * j + 2;
+ xdn8 = xdn6 + 2 * n4;
+
+ FOR ( i = is; i < n; i += id )
+ {
+ t1 = L_sub(*xup1, *xdn6);
+ *xup1 = L_add(*xup1, *xdn6);
+ xup1 += n4;
+ xdn6 -= n4;
+
+ t2 = L_sub(*xdn6, *xup1);
+ *xdn6 = L_add(*xup1, *xdn6);
+
+ xdn6 += n4;
+ t3 = L_add(*xdn8, *xup3);
+ *xdn6 = L_sub(*xdn8, *xup3);
+
+ xup3 += n4;
+ xdn8 -= n4;
+
+ t4 = L_add(*xup3, *xdn8);
+ *xup1 = L_sub(*xup3, *xdn8);
+
+ t5 = L_sub(t1, t4);
+ t1 = L_add(t1, t4);
+ t4 = L_sub(t2, t3);
+ t2 = L_add(t2, t3);
+ *xup3 = L_sub(Mpy_32_16_1(t1, cc3), Mpy_32_16_1(t2, ss3));
+ xup3 -= n4;
+ *xup3 = L_add(Mpy_32_16_1(t5, cc1), Mpy_32_16_1(t4, ss1));
+ *xdn8 = L_sub(Mpy_32_16_1(t5, ss1), Mpy_32_16_1(t4, cc1));
+
+ xdn8 += n4;
+ *xdn8 = L_add(Mpy_32_16_1(t2, cc3), Mpy_32_16_1(t1, ss3));
+
+ xup1 -= n4;
+ xup1 += id;
+ xup3 += id;
+ xdn6 += id;
+ xdn8 += id;
+ }
+ is = shl(id, 1) - n2;
+ id = shl(id, 2);
+ }
+ }
+ }
+
+ /*-----------------------------------------------------------------*
+ * Length two butterflies
+ *-----------------------------------------------------------------*/
+
+ is = 1;
+ id = 4;
+ WHILE ( LT_16(is, n) )
+ {
+ xi0 = x + is;
+ xi1 = xi0 + 1;
+
+ FOR ( i0 = is; i0 <= n; i0 += id )
+ {
+ r1 = *xi0;
+ *xi0 = L_add(r1, *xi1);
+ *xi1 = L_sub(r1, *xi1);
+ xi0 += id;
+ xi1 += id;
+ }
+ is = shl(id, 1) - 1;
+ id = shl(id, 2);
+ }
+
+ /*-----------------------------------------------------------------*
+ * Digit reverse counter
+ *-----------------------------------------------------------------*/
+
+ idx = fft256_read_indexes;
+ xi0 = &temp[0] - 1;
+ IF ( EQ_16(n, 128) )
+ {
+ FOR ( i = 0; i < n; i++ )
+ {
+ j = *idx++;
+ temp[i] = x[1 + shr( j, 1 )];
+ }
+ }
+ ELSE IF ( EQ_16(n, 256) )
+ {
+ FOR ( i = 0; i < n; i++ )
+ {
+ j = *idx++;
+ temp[i] = x[1 + j];
+ }
+ }
+ ELSE IF ( EQ_16(n, 512) )
+ {
+ FOR ( i = 0; i < 256; i++ )
+ {
+ j = *idx++;
+ temp[i] = x[1 + 2 * j];
+ temp[i + 256] = x[2 + 2 * j];
+ }
+ }
+ ELSE
+ {
+ xi0 = x;
+ j = 1;
+ FOR ( i = 1; i < n; i++ )
+ {
+ IF ( LT_16(i, j) )
+ {
+ xt = x[j];
+ x[j] = x[i];
+ x[i] = xt;
+ }
+ k = shr(n, 1);
+ WHILE ( LT_16(k, j) )
+ {
+ j = sub(j, k);
+ k = shr(k, 1);
+ }
+ j = add(j, k);
+ }
+ }
+
+ /*-----------------------------------------------------------------*
+ * Normalization
+ *-----------------------------------------------------------------*/
+
+ FOR ( i = 1; i <= n; i++ )
+ {
+ x[i] = Mpy_32_16_1(xi0[i], n_inv);
+ }
+
+ return;
+}
+#endif
\ No newline at end of file
diff --git a/lib_com/ivas_mdct_imdct_fx.c b/lib_com/ivas_mdct_imdct_fx.c
new file mode 100644
index 0000000000000000000000000000000000000000..82e22a6b820639a6f166530906677a3674308354
--- /dev/null
+++ b/lib_com/ivas_mdct_imdct_fx.c
@@ -0,0 +1,399 @@
+/******************************************************************************************************
+
+ (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+ Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+ Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+ Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+ contributors to this repository. All Rights Reserved.
+
+ This software is protected by copyright law and by international treaties.
+ The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+ Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+ Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+ Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+ contributors to this repository retain full ownership rights in their respective contributions in
+ the software. This notice grants no license of any kind, including but not limited to patent
+ license, nor is any license granted by implication, estoppel or otherwise.
+
+ Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+ contributions.
+
+ This software is provided "AS IS", without any express or implied warranties. The software is in the
+ development stage. It is intended exclusively for experts who have experience with such software and
+ solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+ and fitness for a particular purpose are hereby disclaimed and excluded.
+
+ Any dispute, controversy or claim arising under or in relation to providing this software shall be
+ submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+ accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+ the United Nations Convention on Contracts on the International Sales of Goods.
+
+*******************************************************************************************************/
+
+#include
+#include "options.h"
+#include "prot.h"
+#include "prot_fx1.h"
+#include "ivas_rom_com_fx.h"
+#ifdef DEBUGGING
+#include "debug.h"
+#endif
+#include "ivas_stat_com.h"
+#include "wmc_auto.h"
+
+void ivas_get_twid_factors_fx1(
+ const Word16 length,
+ const Word16 **pTwid_re,
+ const Word16 **pTwid_im );
+
+void DoFFT_fx(
+ Word32 *re2,
+ Word32 *im2,
+ const Word16 length );
+
+Word32 ivas_get_mdct_scaling_gain_fx(
+ const Word16 dct_len_by_2 );
+
+
+/*------------------------------------------------------------------------------------------*
+ * Local constants
+ *------------------------------------------------------------------------------------------*/
+
+#define IVAS_MDCT_SCALING_GAIN_48k_Q31 0x00001086 /* 1.9699011974118126e-06f */
+#define IVAS_MDCT_SCALING_GAIN_48k_Q46 0x08432A51
+#define IVAS_MDCT_SCALING_GAIN_32k_Q31 0x000018C9 /* 2.9548517961177197e-06f */
+#define IVAS_MDCT_SCALING_GAIN_32k_Q46 0x0C64BF7A
+#define IVAS_MDCT_SCALING_GAIN_16k_Q31 0X00003193 /* 5.909703592235439e-06f */
+#define IVAS_MDCT_SCALING_GAIN_16k_Q46 0x18C97EF4
+
+
+#define IVAS_ONE_BY_IMDCT_SCALING_GAIN_Q16 0x08432A51 /* 1 / 2115.165304808f */
+
+
+/*-----------------------------------------------------------------------------------------*
+ * Function ivas_tda_fx()
+ *
+ * Time domain aliasing
+ *-----------------------------------------------------------------------------------------*/
+
+void ivas_tda_fx(
+ const Word32 *pIn, /* i : time domain buffer of size 2*length */
+ Word32 *pOut, /* o : time domain buffer of size length */
+ const Word16 length /* i : length of time alised signal buffer */
+)
+{
+ Word16 i;
+ Word16 len_by_2 = shr(length, 1);
+
+ FOR ( i = 0; i < len_by_2; i++ )
+ {
+ pOut[i] = L_sub(pIn[len_by_2 + i], pIn[len_by_2 - i - 1]);
+ pOut[len_by_2 + i] = L_add(pIn[length * 2 - i - 1], pIn[length + i]);
+ }
+
+ return;
+}
+
+
+/*-----------------------------------------------------------------------------------------*
+ * Function ivas_dct_windowing_fx()
+ *
+ * Windowing block, input is passed through Fielder window
+ *-----------------------------------------------------------------------------------------*/
+
+void ivas_dct_windowing_fx(
+ const Word16 fade_len,
+ const Word16 full_len,
+ const Word16 dct_len,
+ const Word16 zero_pad_len,
+ const Word32 *pWindow_coeffs,
+ const Word16 frame_len,
+ Word32 *pOut_buf,
+ Word32 *pBuffer_prev,
+ Word32 *pTemp_lfe )
+{
+ Word16 i;
+ Word16 rem_len = 0;
+
+ Copy32( pBuffer_prev, pOut_buf + zero_pad_len, fade_len );
+
+ Copy32( pTemp_lfe, ( pOut_buf + fade_len + zero_pad_len ), dct_len );
+
+ set_l(pOut_buf, zero_pad_len, 0);
+
+ Copy32( ( pOut_buf + full_len - fade_len ), pBuffer_prev, fade_len );
+
+ FOR ( i = 0; i < fade_len; i++ )
+ {
+ pOut_buf[zero_pad_len + i] = Mult_32_32(pOut_buf[zero_pad_len + i], pWindow_coeffs[i]);
+ }
+
+ rem_len = full_len - ( zero_pad_len * 3 + fade_len );
+
+ FOR ( i = 0; i < rem_len; i++ )
+ {
+ pOut_buf[zero_pad_len * 3 + fade_len + i] = Mult_32_32(pOut_buf[zero_pad_len * 3 + fade_len + i], pWindow_coeffs[fade_len - i - 1]);
+ }
+
+ set_l(&pOut_buf[full_len], frame_len - full_len, 0);
+
+ return;
+}
+
+
+/*-----------------------------------------------------------------------------------------*
+ * Function ivas_mdct_fx()
+ *
+ * MDCT implementation
+ *-----------------------------------------------------------------------------------------*/
+
+void ivas_mdct_fx(
+ const Word32 *pIn,
+ Word32 *pOut,
+ const Word16 length,
+ Word16 *q_out)
+{
+ const Word16 *pTwid_re, *pTwid_im;
+ Word16 i, len_by_2;
+ Word32 re[IVAS_480_PT_LEN], im[IVAS_480_PT_LEN];
+ Word32 ivas_mdct_scaling_gain;
+
+ len_by_2 = shr(length, 1);
+ ivas_mdct_scaling_gain = ivas_get_mdct_scaling_gain_fx( len_by_2 );
+
+ ivas_get_twid_factors_fx1( length, &pTwid_re, &pTwid_im );
+
+ FOR ( i = 0; i < len_by_2; i++ )
+ {
+ pOut[i] = L_sub(pIn[len_by_2 + i], pIn[len_by_2 - i - 1]);
+ pOut[len_by_2 + i] = L_add(pIn[length * 2 - i - 1], pIn[length + i]);
+ }
+
+ FOR ( i = 0; i < len_by_2; i++ )
+ {
+ re[i] = L_negate(L_add(Mpy_32_16_1( pOut[2 * i], pTwid_re[i]), Mpy_32_16_1(pOut[length - 1 - 2 * i], pTwid_im[i])));
+ im[i] = L_sub(Mpy_32_16_1(pOut[length - 1 - 2 * i], pTwid_re[i]), Mpy_32_16_1(pOut[2 * i], pTwid_im[i]));
+ }
+
+ DoFFT_fx( &re[0], &im[0], len_by_2 );
+
+ FOR ( i = 0; i < len_by_2; i++ )
+ {
+ re[i] = Mult_32_32(re[i], ivas_mdct_scaling_gain);
+ im[i] = Mult_32_32(im[i], ivas_mdct_scaling_gain);
+ }
+ *q_out = *q_out + Q15;
+ FOR ( i = 0; i < len_by_2; i++ )
+ {
+ Word32 tmp;
+ tmp = L_sub(Mpy_32_16_1(re[i], pTwid_re[i]), Mpy_32_16_1(im[i], pTwid_im[i]));
+ im[i] = L_add(Mpy_32_16_1(im[i], pTwid_re[i]), Mpy_32_16_1(re[i], pTwid_im[i]));
+ re[i] = tmp;
+ }
+
+ FOR ( i = 0; i < len_by_2; i++ )
+ {
+ pOut[length - 2 * i - 1] = re[i];
+ pOut[2 * i] = im[i];
+ }
+
+ return;
+}
+
+
+/*-----------------------------------------------------------------------------------------*
+ * Function ivas_ifft_cplx()
+ *
+ * Complex IFFT implementation using DoFFT
+ *-----------------------------------------------------------------------------------------*/
+
+static void ivas_ifft_cplx(
+ Word32 *re,
+ Word32 *im,
+ const Word16 length )
+{
+ Word16 i;
+ Word32 ivas_imdct_one_by_powergain = IVAS_ONE_BY_IMDCT_SCALING_GAIN_Q16;
+ /*re-arrange inputs to use fft as ifft */
+ re[0] = Mult_32_32(re[0], ivas_imdct_one_by_powergain);
+ im[0] = Mult_32_32(im[0], ivas_imdct_one_by_powergain);
+
+ FOR ( i = 1; i <= shr(length, 1); i++ )
+ {
+ Word32 tmp = Mult_32_32(re[length - i], ivas_imdct_one_by_powergain);
+ re[length - i] = Mult_32_32(re[i], ivas_imdct_one_by_powergain);
+ re[i] = tmp;
+
+ tmp = Mult_32_32(im[length - i], ivas_imdct_one_by_powergain);
+ im[length - i] = Mult_32_32(im[i], ivas_imdct_one_by_powergain);
+ im[i] = tmp;
+ }
+
+ DoFFT_fx( re, im, (Word16) length );
+
+ return;
+}
+
+
+/*-----------------------------------------------------------------------------------------*
+ * Function ivas_itda()
+ *
+ * Inverse time domain alias implementation
+ *-----------------------------------------------------------------------------------------*/
+
+void ivas_itda_fx(
+ const Word32 *re, /* i : time alised signal after IDCT */
+ Word32 *pOut, /* o : time domain buffer of size 2*length */
+ const Word16 length /* i : length of time alised signal buffer */
+)
+{
+ Word16 i;
+ Word16 len_by_2 = shr(length, 1);
+
+ FOR ( i = 0; i < len_by_2; i++ )
+ {
+ pOut[i] = -re[len_by_2 - i - 1];
+ pOut[len_by_2 + i] = re[i];
+ pOut[length + i] = re[len_by_2 + i];
+ pOut[3 * len_by_2 + i] = re[length - i - 1];
+ }
+
+ return;
+}
+
+
+/*-----------------------------------------------------------------------------------------*
+ * Function ivas_imdct_fx()
+ *
+ * IMDCT implementation
+ *-----------------------------------------------------------------------------------------*/
+
+void ivas_imdct_fx(
+ const Word32 *pIn,
+ Word32 *pOut,
+ const Word16 length,
+ Word16 *q_out)
+{
+ const Word16 *pTwid_re, *pTwid_im;
+ Word16 len_by_2 = shr(length, 1);
+ Word16 i;
+ Word32 re[IVAS_480_PT_LEN];
+ Word32 im[IVAS_480_PT_LEN];
+
+ ivas_get_twid_factors_fx1( length, &pTwid_re, &pTwid_im );
+
+ FOR ( i = 0; i < len_by_2; i++ )
+ {
+ re[i] = L_add(Mpy_32_16_1(pIn[length - 2 * i - 1], pTwid_re[i]), Mpy_32_16_1(pIn[2 * i], pTwid_im[i])); /*stl_arr_index*/
+ im[i] = L_sub( Mpy_32_16_1( pIn[2 * i], pTwid_re[i] ), Mpy_32_16_1( pIn[length - 2 * i - 1], pTwid_im[i] ) ); /*stl_arr_index*/
+ }
+
+ ivas_ifft_cplx( &re[0], &im[0], len_by_2 );
+ IF (GT_16(len_by_2, 0))
+ {
+ *q_out = *q_out - Q15;
+ }
+
+ FOR ( i = 0; i < len_by_2; i++ )
+ {
+ Word32 tmp;
+ tmp = L_add(Mpy_32_16_1(re[i], pTwid_re[i]), Mpy_32_16_1(im[i], pTwid_im[i]));
+ im[i] = L_sub(Mpy_32_16_1(im[i], pTwid_re[i]), Mpy_32_16_1(re[i], pTwid_im[i]));
+ re[i] = tmp;
+ }
+
+ FOR ( i = ( len_by_2 - 1 ); i >= 0; i-- )
+ {
+ re[2 * i + 1] = im[( len_by_2 - 1 ) - i];
+ re[2 * i] = -re[i];
+ }
+
+ FOR ( i = 0; i < len_by_2; i++ )
+ {
+ pOut[i] = -re[len_by_2 - i - 1];
+ pOut[len_by_2 + i] = re[i];
+ pOut[length + i] = re[len_by_2 + i];
+ pOut[3 * len_by_2 + i] = re[length - i - 1];
+ }
+
+ return;
+}
+
+
+/*-----------------------------------------------------------------------------------------*
+ * Function ivas_get_twid_factors_fx1()
+ *
+ * Sets/Maps the fft twiddle tables based on fft length
+ *-----------------------------------------------------------------------------------------*/
+
+void ivas_get_twid_factors_fx1(
+ const Word16 length,
+ const Word16 **pTwid_re,
+ const Word16 **pTwid_im )
+{
+ IF ( EQ_16(length, 480) )
+ {
+ *pTwid_re = (Word16 *)&ivas_cos_twiddle_480_fx[0];
+ *pTwid_im = (Word16 *)&ivas_sin_twiddle_480_fx[0];
+ }
+ ELSE IF ( EQ_16(length, 320) )
+ {
+ *pTwid_re = (Word16 *)&ivas_cos_twiddle_320_fx[0];
+ *pTwid_im = (Word16 *)&ivas_sin_twiddle_320_fx[0];
+ }
+ ELSE IF ( EQ_16(length, 160) )
+ {
+ *pTwid_re = (Word16 *)&ivas_cos_twiddle_160_fx[0];
+ *pTwid_im = (Word16 *)&ivas_sin_twiddle_160_fx[0];
+ }
+ ELSE IF ( EQ_16(length, 80 ) )
+ {
+ *pTwid_re = (Word16 *)&ivas_cos_twiddle_80_fx[0];
+ *pTwid_im = (Word16 *)&ivas_sin_twiddle_80_fx[0];
+ }
+ ELSE
+ {
+ assert( !"Not supported FFT length!" );
+ }
+
+ return;
+}
+
+
+/*-----------------------------------------------------------------------------------------*
+ * Function ivas_get_mdct_scaling_gain_fx()
+ *
+ * Get scaling gain for MDCT functions
+ *-----------------------------------------------------------------------------------------*/
+
+Word32 ivas_get_mdct_scaling_gain_fx(
+ const Word16 dct_len_by_2 )
+{
+ Word32 gain = 0;
+
+ SWITCH ( dct_len_by_2 )
+ {
+ case L_FRAME48k >> 2:
+ {
+ gain = IVAS_MDCT_SCALING_GAIN_48k_Q46;
+ BREAK;
+ }
+ case L_FRAME32k >> 2:
+ {
+ gain = IVAS_MDCT_SCALING_GAIN_32k_Q46;
+ BREAK;
+ }
+ case L_FRAME16k >> 2:
+ {
+ gain = IVAS_MDCT_SCALING_GAIN_16k_Q46;
+ BREAK;
+ }
+ default:
+ {
+ assert( !"Unsupported frame length!" );
+ BREAK;
+ }
+ }
+
+ return gain;
+}
diff --git a/lib_com/ivas_rom_com_fx.c b/lib_com/ivas_rom_com_fx.c
new file mode 100644
index 0000000000000000000000000000000000000000..d5da101f333c2601d8c3801ed07770849c2c5870
--- /dev/null
+++ b/lib_com/ivas_rom_com_fx.c
@@ -0,0 +1,199 @@
+/******************************************************************************************************
+
+ (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+ Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+ Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+ Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+ contributors to this repository. All Rights Reserved.
+
+ This software is protected by copyright law and by international treaties.
+ The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+ Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+ Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+ Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+ contributors to this repository retain full ownership rights in their respective contributions in
+ the software. This notice grants no license of any kind, including but not limited to patent
+ license, nor is any license granted by implication, estoppel or otherwise.
+
+ Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+ contributions.
+
+ This software is provided "AS IS", without any express or implied warranties. The software is in the
+ development stage. It is intended exclusively for experts who have experience with such software and
+ solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+ and fitness for a particular purpose are hereby disclaimed and excluded.
+
+ Any dispute, controversy or claim arising under or in relation to providing this software shall be
+ submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+ accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+ the United Nations Convention on Contracts on the International Sales of Goods.
+
+*******************************************************************************************************/
+
+#include
+#include "options.h"
+#include
+#include "cnst.h"
+#include "ivas_cnst.h"
+#include "ivas_rom_com_fx.h"
+#include "wmc_auto.h"
+
+#define SHC( x ) ( (Word16) x )
+
+/* clang-format off */
+const Word16 ivas_sin_twiddle_480_fx[ IVAS_480_PT_LEN >> 1 ] = {
+ SHC( 0xffe6 ), SHC( 0xff0f ), SHC( 0xfe39 ), SHC( 0xfd62 ), SHC( 0xfc8c ), SHC( 0xfbb6 ), SHC( 0xfadf ), SHC( 0xfa09 ),
+ SHC( 0xf933 ), SHC( 0xf85d ), SHC( 0xf787 ), SHC( 0xf6b1 ), SHC( 0xf5db ), SHC( 0xf505 ), SHC( 0xf42f ), SHC( 0xf35a ),
+ SHC( 0xf285 ), SHC( 0xf1af ), SHC( 0xf0da ), SHC( 0xf006 ), SHC( 0xef31 ), SHC( 0xee5c ), SHC( 0xed88 ), SHC( 0xecb4 ),
+ SHC( 0xebe0 ), SHC( 0xeb0c ), SHC( 0xea39 ), SHC( 0xe966 ), SHC( 0xe893 ), SHC( 0xe7c0 ), SHC( 0xe6ed ), SHC( 0xe61b ),
+ SHC( 0xe549 ), SHC( 0xe478 ), SHC( 0xe3a7 ), SHC( 0xe2d6 ), SHC( 0xe205 ), SHC( 0xe135 ), SHC( 0xe065 ), SHC( 0xdf95 ),
+ SHC( 0xdec6 ), SHC( 0xddf7 ), SHC( 0xdd28 ), SHC( 0xdc5a ), SHC( 0xdb8c ), SHC( 0xdabf ), SHC( 0xd9f2 ), SHC( 0xd925 ),
+ SHC( 0xd859 ), SHC( 0xd78d ), SHC( 0xd6c2 ), SHC( 0xd5f7 ), SHC( 0xd52d ), SHC( 0xd463 ), SHC( 0xd39a ), SHC( 0xd2d1 ),
+ SHC( 0xd208 ), SHC( 0xd141 ), SHC( 0xd079 ), SHC( 0xcfb2 ), SHC( 0xceec ), SHC( 0xce26 ), SHC( 0xcd61 ), SHC( 0xcc9c ),
+ SHC( 0xcbd8 ), SHC( 0xcb15 ), SHC( 0xca52 ), SHC( 0xc98f ), SHC( 0xc8cd ), SHC( 0xc80c ), SHC( 0xc74c ), SHC( 0xc68c ),
+ SHC( 0xc5cc ), SHC( 0xc50e ), SHC( 0xc450 ), SHC( 0xc392 ), SHC( 0xc2d5 ), SHC( 0xc219 ), SHC( 0xc15e ), SHC( 0xc0a3 ),
+ SHC( 0xbfe9 ), SHC( 0xbf30 ), SHC( 0xbe77 ), SHC( 0xbdc0 ), SHC( 0xbd08 ), SHC( 0xbc52 ), SHC( 0xbb9c ), SHC( 0xbae7 ),
+ SHC( 0xba33 ), SHC( 0xb980 ), SHC( 0xb8cd ), SHC( 0xb81b ), SHC( 0xb76a ), SHC( 0xb6ba ), SHC( 0xb60b ), SHC( 0xb55c ),
+ SHC( 0xb4ae ), SHC( 0xb401 ), SHC( 0xb355 ), SHC( 0xb2aa ), SHC( 0xb1ff ), SHC( 0xb156 ), SHC( 0xb0ad ), SHC( 0xb005 ),
+ SHC( 0xaf5e ), SHC( 0xaeb8 ), SHC( 0xae13 ), SHC( 0xad6e ), SHC( 0xaccb ), SHC( 0xac28 ), SHC( 0xab87 ), SHC( 0xaae6 ),
+ SHC( 0xaa47 ), SHC( 0xa9a8 ), SHC( 0xa90a ), SHC( 0xa86d ), SHC( 0xa7d1 ), SHC( 0xa736 ), SHC( 0xa69c ), SHC( 0xa603 ),
+ SHC( 0xa56b ), SHC( 0xa4d4 ), SHC( 0xa43e ), SHC( 0xa3a9 ), SHC( 0xa315 ), SHC( 0xa282 ), SHC( 0xa1f0 ), SHC( 0xa15f ),
+ SHC( 0xa0cf ), SHC( 0xa040 ), SHC( 0x9fb3 ), SHC( 0x9f26 ), SHC( 0x9e9a ), SHC( 0x9e0f ), SHC( 0x9d86 ), SHC( 0x9cfd ),
+ SHC( 0x9c76 ), SHC( 0x9bf0 ), SHC( 0x9b6b ), SHC( 0x9ae7 ), SHC( 0x9a64 ), SHC( 0x99e2 ), SHC( 0x9961 ), SHC( 0x98e1 ),
+ SHC( 0x9863 ), SHC( 0x97e6 ), SHC( 0x9769 ), SHC( 0x96ee ), SHC( 0x9674 ), SHC( 0x95fc ), SHC( 0x9584 ), SHC( 0x950e ),
+ SHC( 0x9498 ), SHC( 0x9424 ), SHC( 0x93b1 ), SHC( 0x9340 ), SHC( 0x92cf ), SHC( 0x9260 ), SHC( 0x91f2 ), SHC( 0x9185 ),
+ SHC( 0x9119 ), SHC( 0x90af ), SHC( 0x9045 ), SHC( 0x8fdd ), SHC( 0x8f77 ), SHC( 0x8f11 ), SHC( 0x8ead ), SHC( 0x8e4a ),
+ SHC( 0x8de8 ), SHC( 0x8d87 ), SHC( 0x8d28 ), SHC( 0x8cca ), SHC( 0x8c6d ), SHC( 0x8c12 ), SHC( 0x8bb7 ), SHC( 0x8b5e ),
+ SHC( 0x8b07 ), SHC( 0x8ab0 ), SHC( 0x8a5b ), SHC( 0x8a07 ), SHC( 0x89b5 ), SHC( 0x8963 ), SHC( 0x8913 ), SHC( 0x88c5 ),
+ SHC( 0x8877 ), SHC( 0x882b ), SHC( 0x87e1 ), SHC( 0x8797 ), SHC( 0x874f ), SHC( 0x8708 ), SHC( 0x86c3 ), SHC( 0x867f ),
+ SHC( 0x863c ), SHC( 0x85fb ), SHC( 0x85ba ), SHC( 0x857c ), SHC( 0x853e ), SHC( 0x8502 ), SHC( 0x84c7 ), SHC( 0x848e ),
+ SHC( 0x8456 ), SHC( 0x841f ), SHC( 0x83ea ), SHC( 0x83b6 ), SHC( 0x8384 ), SHC( 0x8352 ), SHC( 0x8323 ), SHC( 0x82f4 ),
+ SHC( 0x82c7 ), SHC( 0x829b ), SHC( 0x8271 ), SHC( 0x8248 ), SHC( 0x8220 ), SHC( 0x81fa ), SHC( 0x81d5 ), SHC( 0x81b2 ),
+ SHC( 0x8190 ), SHC( 0x816f ), SHC( 0x8150 ), SHC( 0x8132 ), SHC( 0x8115 ), SHC( 0x80fa ), SHC( 0x80e0 ), SHC( 0x80c8 ),
+ SHC( 0x80b1 ), SHC( 0x809c ), SHC( 0x8088 ), SHC( 0x8075 ), SHC( 0x8063 ), SHC( 0x8053 ), SHC( 0x8045 ), SHC( 0x8038 ),
+ SHC( 0x802c ), SHC( 0x8022 ), SHC( 0x8019 ), SHC( 0x8011 ), SHC( 0x800b ), SHC( 0x8006 ), SHC( 0x8003 ), SHC( 0x8001 ),
+};
+
+const Word16 ivas_cos_twiddle_480_fx[ IVAS_480_PT_LEN >> 1 ] = {
+ SHC( 0x7fff ), SHC( 0x7fff ), SHC( 0x7ffc ), SHC( 0x7ff9 ), SHC( 0x7ff4 ), SHC( 0x7fed ), SHC( 0x7fe5 ), SHC( 0x7fdc ),
+ SHC( 0x7fd1 ), SHC( 0x7fc5 ), SHC( 0x7fb8 ), SHC( 0x7fa9 ), SHC( 0x7f98 ), SHC( 0x7f87 ), SHC( 0x7f74 ), SHC( 0x7f5f ),
+ SHC( 0x7f49 ), SHC( 0x7f32 ), SHC( 0x7f19 ), SHC( 0x7eff ), SHC( 0x7ee4 ), SHC( 0x7ec7 ), SHC( 0x7ea9 ), SHC( 0x7e89 ),
+ SHC( 0x7e68 ), SHC( 0x7e45 ), SHC( 0x7e22 ), SHC( 0x7dfc ), SHC( 0x7dd6 ), SHC( 0x7dae ), SHC( 0x7d85 ), SHC( 0x7d5a ),
+ SHC( 0x7d2e ), SHC( 0x7d00 ), SHC( 0x7cd2 ), SHC( 0x7ca1 ), SHC( 0x7c70 ), SHC( 0x7c3d ), SHC( 0x7c09 ), SHC( 0x7bd3 ),
+ SHC( 0x7b9c ), SHC( 0x7b64 ), SHC( 0x7b2a ), SHC( 0x7aef ), SHC( 0x7ab2 ), SHC( 0x7a75 ), SHC( 0x7a36 ), SHC( 0x79f5 ),
+ SHC( 0x79b3 ), SHC( 0x7970 ), SHC( 0x792c ), SHC( 0x78e6 ), SHC( 0x789f ), SHC( 0x7857 ), SHC( 0x780d ), SHC( 0x77c2 ),
+ SHC( 0x7775 ), SHC( 0x7728 ), SHC( 0x76d9 ), SHC( 0x7689 ), SHC( 0x7637 ), SHC( 0x75e4 ), SHC( 0x7590 ), SHC( 0x753a ),
+ SHC( 0x74e4 ), SHC( 0x748c ), SHC( 0x7432 ), SHC( 0x73d8 ), SHC( 0x737c ), SHC( 0x731f ), SHC( 0x72c0 ), SHC( 0x7261 ),
+ SHC( 0x7200 ), SHC( 0x719e ), SHC( 0x713a ), SHC( 0x70d6 ), SHC( 0x7070 ), SHC( 0x7009 ), SHC( 0x6fa0 ), SHC( 0x6f37 ),
+ SHC( 0x6ecc ), SHC( 0x6e60 ), SHC( 0x6df3 ), SHC( 0x6d84 ), SHC( 0x6d15 ), SHC( 0x6ca4 ), SHC( 0x6c32 ), SHC( 0x6bbf ),
+ SHC( 0x6b4a ), SHC( 0x6ad5 ), SHC( 0x6a5e ), SHC( 0x69e6 ), SHC( 0x696d ), SHC( 0x68f3 ), SHC( 0x6878 ), SHC( 0x67fb ),
+ SHC( 0x677e ), SHC( 0x66ff ), SHC( 0x667f ), SHC( 0x65fe ), SHC( 0x657c ), SHC( 0x64f9 ), SHC( 0x6474 ), SHC( 0x63ef ),
+ SHC( 0x6368 ), SHC( 0x62e0 ), SHC( 0x6258 ), SHC( 0x61ce ), SHC( 0x6143 ), SHC( 0x60b7 ), SHC( 0x602a ), SHC( 0x5f9c ),
+ SHC( 0x5f0d ), SHC( 0x5e7d ), SHC( 0x5dec ), SHC( 0x5d59 ), SHC( 0x5cc6 ), SHC( 0x5c32 ), SHC( 0x5b9d ), SHC( 0x5b06 ),
+ SHC( 0x5a6f ), SHC( 0x59d7 ), SHC( 0x593d ), SHC( 0x58a3 ), SHC( 0x5808 ), SHC( 0x576c ), SHC( 0x56cf ), SHC( 0x5631 ),
+ SHC( 0x5592 ), SHC( 0x54f2 ), SHC( 0x5451 ), SHC( 0x53af ), SHC( 0x530c ), SHC( 0x5269 ), SHC( 0x51c4 ), SHC( 0x511f ),
+ SHC( 0x5078 ), SHC( 0x4fd1 ), SHC( 0x4f29 ), SHC( 0x4e80 ), SHC( 0x4dd6 ), SHC( 0x4d2b ), SHC( 0x4c80 ), SHC( 0x4bd4 ),
+ SHC( 0x4b26 ), SHC( 0x4a78 ), SHC( 0x49c9 ), SHC( 0x491a ), SHC( 0x4869 ), SHC( 0x47b8 ), SHC( 0x4706 ), SHC( 0x4653 ),
+ SHC( 0x45a0 ), SHC( 0x44eb ), SHC( 0x4436 ), SHC( 0x4380 ), SHC( 0x42ca ), SHC( 0x4213 ), SHC( 0x415b ), SHC( 0x40a2 ),
+ SHC( 0x3fe8 ), SHC( 0x3f2e ), SHC( 0x3e73 ), SHC( 0x3db8 ), SHC( 0x3cfb ), SHC( 0x3c3f ), SHC( 0x3b81 ), SHC( 0x3ac3 ),
+ SHC( 0x3a04 ), SHC( 0x3944 ), SHC( 0x3884 ), SHC( 0x37c4 ), SHC( 0x3702 ), SHC( 0x3640 ), SHC( 0x357e ), SHC( 0x34bb ),
+ SHC( 0x33f7 ), SHC( 0x3333 ), SHC( 0x326e ), SHC( 0x31a8 ), SHC( 0x30e2 ), SHC( 0x301c ), SHC( 0x2f55 ), SHC( 0x2e8e ),
+ SHC( 0x2dc5 ), SHC( 0x2cfd ), SHC( 0x2c34 ), SHC( 0x2b6a ), SHC( 0x2aa0 ), SHC( 0x29d6 ), SHC( 0x290b ), SHC( 0x2840 ),
+ SHC( 0x2774 ), SHC( 0x26a8 ), SHC( 0x25db ), SHC( 0x250e ), SHC( 0x2440 ), SHC( 0x2373 ), SHC( 0x22a4 ), SHC( 0x21d6 ),
+ SHC( 0x2107 ), SHC( 0x2037 ), SHC( 0x1f67 ), SHC( 0x1e97 ), SHC( 0x1dc7 ), SHC( 0x1cf6 ), SHC( 0x1c25 ), SHC( 0x1b54 ),
+ SHC( 0x1a82 ), SHC( 0x19b0 ), SHC( 0x18de ), SHC( 0x180b ), SHC( 0x1739 ), SHC( 0x1666 ), SHC( 0x1592 ), SHC( 0x14bf ),
+ SHC( 0x13eb ), SHC( 0x1317 ), SHC( 0x1243 ), SHC( 0x116f ), SHC( 0x109a ), SHC( 0x0fc5 ), SHC( 0x0ef0 ), SHC( 0x0e1b ),
+ SHC( 0x0d46 ), SHC( 0x0c71 ), SHC( 0x0b9b ), SHC( 0x0ac5 ), SHC( 0x09f0 ), SHC( 0x091a ), SHC( 0x0844 ), SHC( 0x076e ),
+ SHC( 0x0698 ), SHC( 0x05c1 ), SHC( 0x04eb ), SHC( 0x0415 ), SHC( 0x033e ), SHC( 0x0268 ), SHC( 0x0192 ), SHC( 0x00bb ),
+};
+
+const Word16 ivas_sin_twiddle_320_fx[ IVAS_320_PT_LEN >> 1 ] = {
+ SHC( 0xffd8 ), SHC( 0xfe97 ), SHC( 0xfd55 ), SHC( 0xfc13 ), SHC( 0xfad2 ), SHC( 0xf990 ), SHC( 0xf84f ), SHC( 0xf70e ),
+ SHC( 0xf5cd ), SHC( 0xf48d ), SHC( 0xf34d ), SHC( 0xf20d ), SHC( 0xf0cd ), SHC( 0xef8e ), SHC( 0xee4f ), SHC( 0xed11 ),
+ SHC( 0xebd3 ), SHC( 0xea95 ), SHC( 0xe958 ), SHC( 0xe81c ), SHC( 0xe6e0 ), SHC( 0xe5a5 ), SHC( 0xe46b ), SHC( 0xe331 ),
+ SHC( 0xe1f8 ), SHC( 0xe0c0 ), SHC( 0xdf88 ), SHC( 0xde51 ), SHC( 0xdd1b ), SHC( 0xdbe6 ), SHC( 0xdab2 ), SHC( 0xd97f ),
+ SHC( 0xd84c ), SHC( 0xd71b ), SHC( 0xd5eb ), SHC( 0xd4bb ), SHC( 0xd38d ), SHC( 0xd260 ), SHC( 0xd134 ), SHC( 0xd009 ),
+ SHC( 0xcee0 ), SHC( 0xcdb7 ), SHC( 0xcc90 ), SHC( 0xcb6a ), SHC( 0xca45 ), SHC( 0xc922 ), SHC( 0xc800 ), SHC( 0xc6df ),
+ SHC( 0xc5c0 ), SHC( 0xc4a3 ), SHC( 0xc386 ), SHC( 0xc26c ), SHC( 0xc152 ), SHC( 0xc03b ), SHC( 0xbf24 ), SHC( 0xbe10 ),
+ SHC( 0xbcfd ), SHC( 0xbbec ), SHC( 0xbadc ), SHC( 0xb9ce ), SHC( 0xb8c2 ), SHC( 0xb7b8 ), SHC( 0xb6af ), SHC( 0xb5a8 ),
+ SHC( 0xb4a3 ), SHC( 0xb3a0 ), SHC( 0xb29f ), SHC( 0xb1a0 ), SHC( 0xb0a2 ), SHC( 0xafa7 ), SHC( 0xaeae ), SHC( 0xadb6 ),
+ SHC( 0xacc1 ), SHC( 0xabcd ), SHC( 0xaadc ), SHC( 0xa9ed ), SHC( 0xa900 ), SHC( 0xa815 ), SHC( 0xa72c ), SHC( 0xa646 ),
+ SHC( 0xa562 ), SHC( 0xa480 ), SHC( 0xa3a0 ), SHC( 0xa2c2 ), SHC( 0xa1e7 ), SHC( 0xa10e ), SHC( 0xa037 ), SHC( 0x9f63 ),
+ SHC( 0x9e91 ), SHC( 0x9dc2 ), SHC( 0x9cf5 ), SHC( 0x9c2a ), SHC( 0x9b62 ), SHC( 0x9a9d ), SHC( 0x99da ), SHC( 0x9919 ),
+ SHC( 0x985b ), SHC( 0x97a0 ), SHC( 0x96e7 ), SHC( 0x9630 ), SHC( 0x957d ), SHC( 0x94cc ), SHC( 0x941d ), SHC( 0x9371 ),
+ SHC( 0x92c8 ), SHC( 0x9222 ), SHC( 0x917e ), SHC( 0x90dd ), SHC( 0x903f ), SHC( 0x8fa3 ), SHC( 0x8f0b ), SHC( 0x8e75 ),
+ SHC( 0x8de2 ), SHC( 0x8d51 ), SHC( 0x8cc4 ), SHC( 0x8c39 ), SHC( 0x8bb2 ), SHC( 0x8b2d ), SHC( 0x8aab ), SHC( 0x8a2c ),
+ SHC( 0x89af ), SHC( 0x8936 ), SHC( 0x88c0 ), SHC( 0x884c ), SHC( 0x87dc ), SHC( 0x876e ), SHC( 0x8704 ), SHC( 0x869c ),
+ SHC( 0x8638 ), SHC( 0x85d6 ), SHC( 0x8578 ), SHC( 0x851c ), SHC( 0x84c4 ), SHC( 0x846e ), SHC( 0x841c ), SHC( 0x83cd ),
+ SHC( 0x8381 ), SHC( 0x8337 ), SHC( 0x82f1 ), SHC( 0x82ae ), SHC( 0x826e ), SHC( 0x8231 ), SHC( 0x81f8 ), SHC( 0x81c1 ),
+ SHC( 0x818e ), SHC( 0x815d ), SHC( 0x8130 ), SHC( 0x8106 ), SHC( 0x80df ), SHC( 0x80bb ), SHC( 0x809a ), SHC( 0x807d ),
+ SHC( 0x8062 ), SHC( 0x804b ), SHC( 0x8037 ), SHC( 0x8026 ), SHC( 0x8018 ), SHC( 0x800e ), SHC( 0x8006 ), SHC( 0x8002 ),
+};
+
+const Word16 ivas_cos_twiddle_320_fx[ IVAS_320_PT_LEN >> 1 ] = {
+ SHC( 0x7fff ), SHC( 0x7ffe ), SHC( 0x7ff8 ), SHC( 0x7ff0 ), SHC( 0x7fe5 ), SHC( 0x7fd6 ), SHC( 0x7fc4 ), SHC( 0x7faf ),
+ SHC( 0x7f97 ), SHC( 0x7f7c ), SHC( 0x7f5e ), SHC( 0x7f3c ), SHC( 0x7f18 ), SHC( 0x7ef0 ), SHC( 0x7ec5 ), SHC( 0x7e97 ),
+ SHC( 0x7e66 ), SHC( 0x7e31 ), SHC( 0x7dfa ), SHC( 0x7dc0 ), SHC( 0x7d82 ), SHC( 0x7d41 ), SHC( 0x7cfe ), SHC( 0x7cb7 ),
+ SHC( 0x7c6d ), SHC( 0x7c20 ), SHC( 0x7bd0 ), SHC( 0x7b7c ), SHC( 0x7b26 ), SHC( 0x7acd ), SHC( 0x7a71 ), SHC( 0x7a12 ),
+ SHC( 0x79af ), SHC( 0x794a ), SHC( 0x78e2 ), SHC( 0x7876 ), SHC( 0x7808 ), SHC( 0x7797 ), SHC( 0x7723 ), SHC( 0x76ac ),
+ SHC( 0x7632 ), SHC( 0x75b5 ), SHC( 0x7535 ), SHC( 0x74b2 ), SHC( 0x742d ), SHC( 0x73a4 ), SHC( 0x7319 ), SHC( 0x728b ),
+ SHC( 0x71fa ), SHC( 0x7166 ), SHC( 0x70cf ), SHC( 0x7036 ), SHC( 0x6f9a ), SHC( 0x6efb ), SHC( 0x6e59 ), SHC( 0x6db5 ),
+ SHC( 0x6d0e ), SHC( 0x6c64 ), SHC( 0x6bb8 ), SHC( 0x6b08 ), SHC( 0x6a57 ), SHC( 0x69a2 ), SHC( 0x68eb ), SHC( 0x6832 ),
+ SHC( 0x6776 ), SHC( 0x66b7 ), SHC( 0x65f6 ), SHC( 0x6532 ), SHC( 0x646c ), SHC( 0x63a3 ), SHC( 0x62d8 ), SHC( 0x620a ),
+ SHC( 0x613a ), SHC( 0x6068 ), SHC( 0x5f93 ), SHC( 0x5ebc ), SHC( 0x5de2 ), SHC( 0x5d07 ), SHC( 0x5c29 ), SHC( 0x5b48 ),
+ SHC( 0x5a66 ), SHC( 0x5981 ), SHC( 0x589a ), SHC( 0x57b0 ), SHC( 0x56c5 ), SHC( 0x55d7 ), SHC( 0x54e8 ), SHC( 0x53f6 ),
+ SHC( 0x5302 ), SHC( 0x520c ), SHC( 0x5114 ), SHC( 0x501a ), SHC( 0x4f1e ), SHC( 0x4e21 ), SHC( 0x4d21 ), SHC( 0x4c1f ),
+ SHC( 0x4b1c ), SHC( 0x4a16 ), SHC( 0x490f ), SHC( 0x4806 ), SHC( 0x46fb ), SHC( 0x45ee ), SHC( 0x44e0 ), SHC( 0x43d0 ),
+ SHC( 0x42be ), SHC( 0x41ab ), SHC( 0x4096 ), SHC( 0x3f80 ), SHC( 0x3e68 ), SHC( 0x3d4e ), SHC( 0x3c33 ), SHC( 0x3b16 ),
+ SHC( 0x39f8 ), SHC( 0x38d8 ), SHC( 0x37b8 ), SHC( 0x3695 ), SHC( 0x3572 ), SHC( 0x344d ), SHC( 0x3326 ), SHC( 0x31ff ),
+ SHC( 0x30d6 ), SHC( 0x2fac ), SHC( 0x2e81 ), SHC( 0x2d55 ), SHC( 0x2c27 ), SHC( 0x2af9 ), SHC( 0x29c9 ), SHC( 0x2899 ),
+ SHC( 0x2767 ), SHC( 0x2635 ), SHC( 0x2501 ), SHC( 0x23cd ), SHC( 0x2297 ), SHC( 0x2161 ), SHC( 0x202a ), SHC( 0x1ef2 ),
+ SHC( 0x1dba ), SHC( 0x1c81 ), SHC( 0x1b47 ), SHC( 0x1a0c ), SHC( 0x18d1 ), SHC( 0x1795 ), SHC( 0x1658 ), SHC( 0x151b ),
+ SHC( 0x13de ), SHC( 0x12a0 ), SHC( 0x1161 ), SHC( 0x1022 ), SHC( 0x0ee3 ), SHC( 0x0da3 ), SHC( 0x0c63 ), SHC( 0x0b23 ),
+ SHC( 0x09e2 ), SHC( 0x08a2 ), SHC( 0x0760 ), SHC( 0x061f ), SHC( 0x04de ), SHC( 0x039c ), SHC( 0x025b ), SHC( 0x0119 ),
+};
+
+const Word16 ivas_sin_twiddle_160_fx[ IVAS_160_PT_LEN >> 1 ] = {
+ SHC( 0xffb0 ), SHC( 0xfd2d ), SHC( 0xfaaa ), SHC( 0xf827 ), SHC( 0xf5a5 ), SHC( 0xf325 ), SHC( 0xf0a5 ), SHC( 0xee27 ),
+ SHC( 0xebab ), SHC( 0xe931 ), SHC( 0xe6b9 ), SHC( 0xe443 ), SHC( 0xe1d1 ), SHC( 0xdf61 ), SHC( 0xdcf5 ), SHC( 0xda8c ),
+ SHC( 0xd826 ), SHC( 0xd5c5 ), SHC( 0xd368 ), SHC( 0xd10f ), SHC( 0xceba ), SHC( 0xcc6b ), SHC( 0xca21 ), SHC( 0xc7dc ),
+ SHC( 0xc59d ), SHC( 0xc363 ), SHC( 0xc12f ), SHC( 0xbf02 ), SHC( 0xbcdb ), SHC( 0xbaba ), SHC( 0xb8a1 ), SHC( 0xb68e ),
+ SHC( 0xb483 ), SHC( 0xb27f ), SHC( 0xb083 ), SHC( 0xae8f ), SHC( 0xaca2 ), SHC( 0xaabe ), SHC( 0xa8e3 ), SHC( 0xa710 ),
+ SHC( 0xa545 ), SHC( 0xa384 ), SHC( 0xa1cc ), SHC( 0xa01d ), SHC( 0x9e77 ), SHC( 0x9cdc ), SHC( 0x9b4a ), SHC( 0x99c1 ),
+ SHC( 0x9843 ), SHC( 0x96d0 ), SHC( 0x9566 ), SHC( 0x9407 ), SHC( 0x92b3 ), SHC( 0x916a ), SHC( 0x902b ), SHC( 0x8ef8 ),
+ SHC( 0x8dd0 ), SHC( 0x8cb3 ), SHC( 0x8ba1 ), SHC( 0x8a9b ), SHC( 0x89a0 ), SHC( 0x88b1 ), SHC( 0x87ce ), SHC( 0x86f7 ),
+ SHC( 0x862c ), SHC( 0x856c ), SHC( 0x84b9 ), SHC( 0x8412 ), SHC( 0x8377 ), SHC( 0x82e9 ), SHC( 0x8267 ), SHC( 0x81f1 ),
+ SHC( 0x8187 ), SHC( 0x812b ), SHC( 0x80da ), SHC( 0x8097 ), SHC( 0x805f ), SHC( 0x8035 ), SHC( 0x8017 ), SHC( 0x8005 ),
+};
+
+const Word16 ivas_cos_twiddle_160_fx[ IVAS_160_PT_LEN >> 1 ] = {
+ SHC( 0x7fff ), SHC( 0x7ff8 ), SHC( 0x7fe3 ), SHC( 0x7fc2 ), SHC( 0x7f94 ), SHC( 0x7f5a ), SHC( 0x7f13 ), SHC( 0x7ebf ),
+ SHC( 0x7e5f ), SHC( 0x7df3 ), SHC( 0x7d7a ), SHC( 0x7cf5 ), SHC( 0x7c63 ), SHC( 0x7bc5 ), SHC( 0x7b1b ), SHC( 0x7a65 ),
+ SHC( 0x79a3 ), SHC( 0x78d4 ), SHC( 0x77fa ), SHC( 0x7714 ), SHC( 0x7622 ), SHC( 0x7525 ), SHC( 0x741c ), SHC( 0x7307 ),
+ SHC( 0x71e7 ), SHC( 0x70bc ), SHC( 0x6f86 ), SHC( 0x6e45 ), SHC( 0x6cf9 ), SHC( 0x6ba2 ), SHC( 0x6a40 ), SHC( 0x68d4 ),
+ SHC( 0x675e ), SHC( 0x65dd ), SHC( 0x6453 ), SHC( 0x62be ), SHC( 0x6120 ), SHC( 0x5f78 ), SHC( 0x5dc7 ), SHC( 0x5c0d ),
+ SHC( 0x5a49 ), SHC( 0x587d ), SHC( 0x56a7 ), SHC( 0x54ca ), SHC( 0x52e3 ), SHC( 0x50f5 ), SHC( 0x4eff ), SHC( 0x4d01 ),
+ SHC( 0x4afb ), SHC( 0x48ee ), SHC( 0x46da ), SHC( 0x44be ), SHC( 0x429c ), SHC( 0x4073 ), SHC( 0x3e44 ), SHC( 0x3c0f ),
+ SHC( 0x39d4 ), SHC( 0x3793 ), SHC( 0x354d ), SHC( 0x3302 ), SHC( 0x30b1 ), SHC( 0x2e5c ), SHC( 0x2c02 ), SHC( 0x29a3 ),
+ SHC( 0x2741 ), SHC( 0x24db ), SHC( 0x2271 ), SHC( 0x2003 ), SHC( 0x1d93 ), SHC( 0x1b1f ), SHC( 0x18a9 ), SHC( 0x1631 ),
+ SHC( 0x13b6 ), SHC( 0x1139 ), SHC( 0x0ebb ), SHC( 0x0c3b ), SHC( 0x09ba ), SHC( 0x0738 ), SHC( 0x04b6 ), SHC( 0x0232 ),
+};
+
+const Word16 ivas_sin_twiddle_80_fx[ IVAS_80_PT_LEN >> 1 ] = {
+ SHC( 0xff60 ), SHC( 0xfa59 ), SHC( 0xf555 ), SHC( 0xf055 ), SHC( 0xeb5c ), SHC( 0xe66a ), SHC( 0xe183 ), SHC( 0xdca7 ),
+ SHC( 0xd7da ), SHC( 0xd31c ), SHC( 0xce70 ), SHC( 0xc9d8 ), SHC( 0xc555 ), SHC( 0xc0e9 ), SHC( 0xbc96 ), SHC( 0xb85e ),
+ SHC( 0xb442 ), SHC( 0xb044 ), SHC( 0xac65 ), SHC( 0xa8a8 ), SHC( 0xa50d ), SHC( 0xa195 ), SHC( 0x9e43 ), SHC( 0x9b18 ),
+ SHC( 0x9814 ), SHC( 0x953a ), SHC( 0x9289 ), SHC( 0x9004 ), SHC( 0x8dab ), SHC( 0x8b7f ), SHC( 0x8982 ), SHC( 0x87b3 ),
+ SHC( 0x8613 ), SHC( 0x84a3 ), SHC( 0x8365 ), SHC( 0x8257 ), SHC( 0x817b ), SHC( 0x80d1 ), SHC( 0x8059 ), SHC( 0x8014 ),
+};
+
+const Word16 ivas_cos_twiddle_80_fx[ IVAS_80_PT_LEN >> 1 ] = {
+ SHC( 0x7fff ), SHC( 0x7fe0 ), SHC( 0x7f8d ), SHC( 0x7f09 ), SHC( 0x7e53 ), SHC( 0x7d6a ), SHC( 0x7c50 ), SHC( 0x7b05 ),
+ SHC( 0x798a ), SHC( 0x77de ), SHC( 0x7603 ), SHC( 0x73fa ), SHC( 0x71c3 ), SHC( 0x6f5f ), SHC( 0x6cce ), SHC( 0x6a13 ),
+ SHC( 0x672f ), SHC( 0x6421 ), SHC( 0x60ec ), SHC( 0x5d90 ), SHC( 0x5a10 ), SHC( 0x566c ), SHC( 0x52a6 ), SHC( 0x4ebf ),
+ SHC( 0x4aba ), SHC( 0x4696 ), SHC( 0x4257 ), SHC( 0x3dfe ), SHC( 0x398c ), SHC( 0x3504 ), SHC( 0x3067 ), SHC( 0x2bb6 ),
+ SHC( 0x26f4 ), SHC( 0x2223 ), SHC( 0x1d45 ), SHC( 0x185a ), SHC( 0x1367 ), SHC( 0x0e6b ), SHC( 0x096a ), SHC( 0x0465 ),
+};
+
+
+/* clang-format on */
diff --git a/lib_com/ivas_rom_com_fx.h b/lib_com/ivas_rom_com_fx.h
new file mode 100644
index 0000000000000000000000000000000000000000..b696c2c5717b227fa5d2247aef374c12c23ee84e
--- /dev/null
+++ b/lib_com/ivas_rom_com_fx.h
@@ -0,0 +1,52 @@
+/******************************************************************************************************
+
+ (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+ Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+ Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+ Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+ contributors to this repository. All Rights Reserved.
+
+ This software is protected by copyright law and by international treaties.
+ The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+ Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+ Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+ Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+ contributors to this repository retain full ownership rights in their respective contributions in
+ the software. This notice grants no license of any kind, including but not limited to patent
+ license, nor is any license granted by implication, estoppel or otherwise.
+
+ Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+ contributions.
+
+ This software is provided "AS IS", without any express or implied warranties. The software is in the
+ development stage. It is intended exclusively for experts who have experience with such software and
+ solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+ and fitness for a particular purpose are hereby disclaimed and excluded.
+
+ Any dispute, controversy or claim arising under or in relation to providing this software shall be
+ submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+ accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+ the United Nations Convention on Contracts on the International Sales of Goods.
+
+*******************************************************************************************************/
+
+#ifndef IVAS_ROM_COM_FX_H
+#define IVAS_ROM_COM_FX_H
+
+#include
+#include "options.h"
+#include "cnst.h"
+#include "ivas_cnst.h"
+#include "stat_com.h"
+#include "ivas_stat_com.h"
+
+extern const Word16 ivas_sin_twiddle_480_fx[ IVAS_480_PT_LEN >> 1 ];
+extern const Word16 ivas_cos_twiddle_480_fx[ IVAS_480_PT_LEN >> 1 ];
+extern const Word16 ivas_sin_twiddle_320_fx[ IVAS_320_PT_LEN >> 1 ];
+extern const Word16 ivas_cos_twiddle_320_fx[ IVAS_320_PT_LEN >> 1 ];
+extern const Word16 ivas_sin_twiddle_160_fx[ IVAS_160_PT_LEN >> 1 ];
+extern const Word16 ivas_cos_twiddle_160_fx[ IVAS_160_PT_LEN >> 1 ];
+extern const Word16 ivas_sin_twiddle_80_fx[ IVAS_80_PT_LEN >> 1 ];
+extern const Word16 ivas_cos_twiddle_80_fx[ IVAS_80_PT_LEN >> 1 ];
+
+#endif
\ No newline at end of file
diff --git a/lib_com/prot_fx2.h b/lib_com/prot_fx2.h
index 37708c0f6a5cfdab9c82a332f2303214af19b2c6..041bc3068be3d63dc2daf2bbf2323923fc6caaf0 100644
--- a/lib_com/prot_fx2.h
+++ b/lib_com/prot_fx2.h
@@ -1195,12 +1195,22 @@ void fft_rel_fx(
const Word16 n, /* i : vector length */
const Word16 m /* i : log2 of vector length */
);
+void fft_rel_fx32(
+ Word32 x[], /* i/o: i /output vector */
+ const Word16 n, /* i : vector length */
+ const Word16 m /* i : log2 of vector length */
+);
//ifft_rel.c
void ifft_rel_fx(
Word16 io[], /* i/o: i /output vector */
const Word16 n, /* i : vector length */
const Word16 m /* i : log2 of vector length */
);
+void ifft_rel_fx32(
+ Word32 io[], /* i/o: i /output vector */
+ const Word16 n, /* i : vector length */
+ const Word16 m /* i : log2 of vector length */
+);
//gs_gains_fx.c
Word16 gsc_gaindec_fx( /* o : average frequency gain */
Decoder_State *st_fx, /* i/o: decoder state structure */
@@ -3700,6 +3710,15 @@ void DoRTFT128_16fx(
void fft3_fx(const Word16[], Word16[], const Word16);
void ifft3_fx(const Word16[], Word16[], const Word16);
+void fft3_fx_ivas(
+ const Word32 X[],
+ Word32 Y[],
+ const Word16 n );
+void ifft3_fx_ivas(
+ const Word32 X[],
+ Word32 Y[],
+ const Word16 n );
+
//fft_rel_fx.c
void r_fft_fx_lc(
const Word16 *phs_tbl, /* i : Table of phase */
diff --git a/lib_com/rom_com.c b/lib_com/rom_com.c
index c5fa590b446e7a0f8fc9417678b5653edf6dfc8e..2787dffcd4d4a92234d8340a5ec0c9f048eb1bbe 100644
--- a/lib_com/rom_com.c
+++ b/lib_com/rom_com.c
@@ -37747,6 +37747,137 @@ const Word16 cos_diff_table[512] =
};
+#if 0
+const Word16 sincos_t_ext_fx[ 641 ] = {
+ SHC( 0x0000 ), SHC( 0x00c9 ), SHC( 0x0192 ), SHC( 0x025b ), SHC( 0x0324 ), SHC( 0x03ed ), SHC( 0x04b6 ), SHC( 0x057f ),
+ SHC( 0x0647 ), SHC( 0x0710 ), SHC( 0x07d9 ), SHC( 0x08a2 ), SHC( 0x096a ), SHC( 0x0a33 ), SHC( 0x0afb ), SHC( 0x0bc3 ),
+ SHC( 0x0c8b ), SHC( 0x0d53 ), SHC( 0x0e1b ), SHC( 0x0ee3 ), SHC( 0x0fab ), SHC( 0x1072 ), SHC( 0x1139 ), SHC( 0x1201 ),
+ SHC( 0x12c8 ), SHC( 0x138e ), SHC( 0x1455 ), SHC( 0x151b ), SHC( 0x15e2 ), SHC( 0x16a8 ), SHC( 0x176d ), SHC( 0x1833 ),
+ SHC( 0x18f8 ), SHC( 0x19bd ), SHC( 0x1a82 ), SHC( 0x1b47 ), SHC( 0x1c0b ), SHC( 0x1ccf ), SHC( 0x1d93 ), SHC( 0x1e56 ),
+ SHC( 0x1f19 ), SHC( 0x1fdc ), SHC( 0x209f ), SHC( 0x2161 ), SHC( 0x2223 ), SHC( 0x22e5 ), SHC( 0x23a6 ), SHC( 0x2467 ),
+ SHC( 0x2528 ), SHC( 0x25e8 ), SHC( 0x26a8 ), SHC( 0x2767 ), SHC( 0x2826 ), SHC( 0x28e5 ), SHC( 0x29a3 ), SHC( 0x2a61 ),
+ SHC( 0x2b1f ), SHC( 0x2bdc ), SHC( 0x2c98 ), SHC( 0x2d55 ), SHC( 0x2e11 ), SHC( 0x2ecc ), SHC( 0x2f87 ), SHC( 0x3041 ),
+ SHC( 0x30fb ), SHC( 0x31b5 ), SHC( 0x326e ), SHC( 0x3326 ), SHC( 0x33de ), SHC( 0x3496 ), SHC( 0x354d ), SHC( 0x3604 ),
+ SHC( 0x36ba ), SHC( 0x376f ), SHC( 0x3824 ), SHC( 0x38d8 ), SHC( 0x398c ), SHC( 0x3a40 ), SHC( 0x3af2 ), SHC( 0x3ba5 ),
+ SHC( 0x3c56 ), SHC( 0x3d07 ), SHC( 0x3db8 ), SHC( 0x3e68 ), SHC( 0x3f17 ), SHC( 0x3fc5 ), SHC( 0x4073 ), SHC( 0x4121 ),
+ SHC( 0x41ce ), SHC( 0x427a ), SHC( 0x4325 ), SHC( 0x43d0 ), SHC( 0x447a ), SHC( 0x4524 ), SHC( 0x45cd ), SHC( 0x4675 ),
+ SHC( 0x471c ), SHC( 0x47c3 ), SHC( 0x4869 ), SHC( 0x490f ), SHC( 0x49b4 ), SHC( 0x4a58 ), SHC( 0x4afb ), SHC( 0x4b9e ),
+ SHC( 0x4c3f ), SHC( 0x4ce1 ), SHC( 0x4d81 ), SHC( 0x4e21 ), SHC( 0x4ebf ), SHC( 0x4f5e ), SHC( 0x4ffb ), SHC( 0x5097 ),
+ SHC( 0x5133 ), SHC( 0x51ce ), SHC( 0x5269 ), SHC( 0x5302 ), SHC( 0x539b ), SHC( 0x5433 ), SHC( 0x54ca ), SHC( 0x5560 ),
+ SHC( 0x55f5 ), SHC( 0x568a ), SHC( 0x571d ), SHC( 0x57b0 ), SHC( 0x5842 ), SHC( 0x58d4 ), SHC( 0x5964 ), SHC( 0x59f3 ),
+ SHC( 0x5a82 ), SHC( 0x5b10 ), SHC( 0x5b9d ), SHC( 0x5c29 ), SHC( 0x5cb4 ), SHC( 0x5d3e ), SHC( 0x5dc7 ), SHC( 0x5e50 ),
+ SHC( 0x5ed7 ), SHC( 0x5f5e ), SHC( 0x5fe3 ), SHC( 0x6068 ), SHC( 0x60ec ), SHC( 0x616f ), SHC( 0x61f1 ), SHC( 0x6271 ),
+ SHC( 0x62f2 ), SHC( 0x6371 ), SHC( 0x63ef ), SHC( 0x646c ), SHC( 0x64e8 ), SHC( 0x6563 ), SHC( 0x65dd ), SHC( 0x6657 ),
+ SHC( 0x66cf ), SHC( 0x6746 ), SHC( 0x67bd ), SHC( 0x6832 ), SHC( 0x68a6 ), SHC( 0x6919 ), SHC( 0x698c ), SHC( 0x69fd ),
+ SHC( 0x6a6d ), SHC( 0x6adc ), SHC( 0x6b4a ), SHC( 0x6bb8 ), SHC( 0x6c24 ), SHC( 0x6c8f ), SHC( 0x6cf9 ), SHC( 0x6d62 ),
+ SHC( 0x6dca ), SHC( 0x6e30 ), SHC( 0x6e96 ), SHC( 0x6efb ), SHC( 0x6f5f ), SHC( 0x6fc1 ), SHC( 0x7023 ), SHC( 0x7083 ),
+ SHC( 0x70e2 ), SHC( 0x7141 ), SHC( 0x719e ), SHC( 0x71fa ), SHC( 0x7255 ), SHC( 0x72af ), SHC( 0x7307 ), SHC( 0x735f ),
+ SHC( 0x73b5 ), SHC( 0x740b ), SHC( 0x745f ), SHC( 0x74b2 ), SHC( 0x7504 ), SHC( 0x7555 ), SHC( 0x75a5 ), SHC( 0x75f4 ),
+ SHC( 0x7641 ), SHC( 0x768e ), SHC( 0x76d9 ), SHC( 0x7723 ), SHC( 0x776c ), SHC( 0x77b4 ), SHC( 0x77fa ), SHC( 0x7840 ),
+ SHC( 0x7884 ), SHC( 0x78c7 ), SHC( 0x7909 ), SHC( 0x794a ), SHC( 0x798a ), SHC( 0x79c8 ), SHC( 0x7a05 ), SHC( 0x7a42 ),
+ SHC( 0x7a7d ), SHC( 0x7ab6 ), SHC( 0x7aef ), SHC( 0x7b26 ), SHC( 0x7b5d ), SHC( 0x7b92 ), SHC( 0x7bc5 ), SHC( 0x7bf8 ),
+ SHC( 0x7c29 ), SHC( 0x7c5a ), SHC( 0x7c89 ), SHC( 0x7cb7 ), SHC( 0x7ce3 ), SHC( 0x7d0f ), SHC( 0x7d39 ), SHC( 0x7d62 ),
+ SHC( 0x7d8a ), SHC( 0x7db0 ), SHC( 0x7dd6 ), SHC( 0x7dfa ), SHC( 0x7e1d ), SHC( 0x7e3f ), SHC( 0x7e5f ), SHC( 0x7e7f ),
+ SHC( 0x7e9d ), SHC( 0x7eba ), SHC( 0x7ed5 ), SHC( 0x7ef0 ), SHC( 0x7f09 ), SHC( 0x7f21 ), SHC( 0x7f38 ), SHC( 0x7f4d ),
+ SHC( 0x7f62 ), SHC( 0x7f75 ), SHC( 0x7f87 ), SHC( 0x7f97 ), SHC( 0x7fa7 ), SHC( 0x7fb5 ), SHC( 0x7fc2 ), SHC( 0x7fce ),
+ SHC( 0x7fd8 ), SHC( 0x7fe1 ), SHC( 0x7fe9 ), SHC( 0x7ff0 ), SHC( 0x7ff6 ), SHC( 0x7ffa ), SHC( 0x7ffd ), SHC( 0x7fff ),
+ SHC( 0x7fff ), SHC( 0x7fff ), SHC( 0x7ffd ), SHC( 0x7ffa ), SHC( 0x7ff6 ), SHC( 0x7ff0 ), SHC( 0x7fe9 ), SHC( 0x7fe1 ),
+ SHC( 0x7fd8 ), SHC( 0x7fce ), SHC( 0x7fc2 ), SHC( 0x7fb5 ), SHC( 0x7fa7 ), SHC( 0x7f97 ), SHC( 0x7f87 ), SHC( 0x7f75 ),
+ SHC( 0x7f62 ), SHC( 0x7f4d ), SHC( 0x7f38 ), SHC( 0x7f21 ), SHC( 0x7f09 ), SHC( 0x7ef0 ), SHC( 0x7ed5 ), SHC( 0x7eba ),
+ SHC( 0x7e9d ), SHC( 0x7e7f ), SHC( 0x7e5f ), SHC( 0x7e3f ), SHC( 0x7e1d ), SHC( 0x7dfa ), SHC( 0x7dd6 ), SHC( 0x7db0 ),
+ SHC( 0x7d8a ), SHC( 0x7d62 ), SHC( 0x7d39 ), SHC( 0x7d0f ), SHC( 0x7ce3 ), SHC( 0x7cb7 ), SHC( 0x7c89 ), SHC( 0x7c5a ),
+ SHC( 0x7c29 ), SHC( 0x7bf8 ), SHC( 0x7bc5 ), SHC( 0x7b92 ), SHC( 0x7b5d ), SHC( 0x7b26 ), SHC( 0x7aef ), SHC( 0x7ab6 ),
+ SHC( 0x7a7d ), SHC( 0x7a42 ), SHC( 0x7a05 ), SHC( 0x79c8 ), SHC( 0x798a ), SHC( 0x794a ), SHC( 0x7909 ), SHC( 0x78c7 ),
+ SHC( 0x7884 ), SHC( 0x7840 ), SHC( 0x77fa ), SHC( 0x77b4 ), SHC( 0x776c ), SHC( 0x7723 ), SHC( 0x76d9 ), SHC( 0x768e ),
+ SHC( 0x7641 ), SHC( 0x75f4 ), SHC( 0x75a5 ), SHC( 0x7555 ), SHC( 0x7504 ), SHC( 0x74b2 ), SHC( 0x745f ), SHC( 0x740b ),
+ SHC( 0x73b5 ), SHC( 0x735f ), SHC( 0x7307 ), SHC( 0x72af ), SHC( 0x7255 ), SHC( 0x71fa ), SHC( 0x719e ), SHC( 0x7141 ),
+ SHC( 0x70e2 ), SHC( 0x7083 ), SHC( 0x7023 ), SHC( 0x6fc1 ), SHC( 0x6f5f ), SHC( 0x6efb ), SHC( 0x6e96 ), SHC( 0x6e30 ),
+ SHC( 0x6dca ), SHC( 0x6d62 ), SHC( 0x6cf9 ), SHC( 0x6c8f ), SHC( 0x6c24 ), SHC( 0x6bb8 ), SHC( 0x6b4a ), SHC( 0x6adc ),
+ SHC( 0x6a6d ), SHC( 0x69fd ), SHC( 0x698c ), SHC( 0x6919 ), SHC( 0x68a6 ), SHC( 0x6832 ), SHC( 0x67bd ), SHC( 0x6746 ),
+ SHC( 0x66cf ), SHC( 0x6657 ), SHC( 0x65dd ), SHC( 0x6563 ), SHC( 0x64e8 ), SHC( 0x646c ), SHC( 0x63ef ), SHC( 0x6371 ),
+ SHC( 0x62f2 ), SHC( 0x6271 ), SHC( 0x61f1 ), SHC( 0x616f ), SHC( 0x60ec ), SHC( 0x6068 ), SHC( 0x5fe3 ), SHC( 0x5f5e ),
+ SHC( 0x5ed7 ), SHC( 0x5e50 ), SHC( 0x5dc7 ), SHC( 0x5d3e ), SHC( 0x5cb4 ), SHC( 0x5c29 ), SHC( 0x5b9d ), SHC( 0x5b10 ),
+ SHC( 0x5a82 ), SHC( 0x59f3 ), SHC( 0x5964 ), SHC( 0x58d4 ), SHC( 0x5842 ), SHC( 0x57b0 ), SHC( 0x571d ), SHC( 0x568a ),
+ SHC( 0x55f5 ), SHC( 0x5560 ), SHC( 0x54ca ), SHC( 0x5433 ), SHC( 0x539b ), SHC( 0x5302 ), SHC( 0x5269 ), SHC( 0x51ce ),
+ SHC( 0x5133 ), SHC( 0x5097 ), SHC( 0x4ffb ), SHC( 0x4f5e ), SHC( 0x4ebf ), SHC( 0x4e21 ), SHC( 0x4d81 ), SHC( 0x4ce1 ),
+ SHC( 0x4c3f ), SHC( 0x4b9e ), SHC( 0x4afb ), SHC( 0x4a58 ), SHC( 0x49b4 ), SHC( 0x490f ), SHC( 0x4869 ), SHC( 0x47c3 ),
+ SHC( 0x471c ), SHC( 0x4675 ), SHC( 0x45cd ), SHC( 0x4524 ), SHC( 0x447a ), SHC( 0x43d0 ), SHC( 0x4325 ), SHC( 0x427a ),
+ SHC( 0x41ce ), SHC( 0x4121 ), SHC( 0x4073 ), SHC( 0x3fc5 ), SHC( 0x3f17 ), SHC( 0x3e68 ), SHC( 0x3db8 ), SHC( 0x3d07 ),
+ SHC( 0x3c56 ), SHC( 0x3ba5 ), SHC( 0x3af2 ), SHC( 0x3a40 ), SHC( 0x398c ), SHC( 0x38d8 ), SHC( 0x3824 ), SHC( 0x376f ),
+ SHC( 0x36ba ), SHC( 0x3604 ), SHC( 0x354d ), SHC( 0x3496 ), SHC( 0x33de ), SHC( 0x3326 ), SHC( 0x326e ), SHC( 0x31b5 ),
+ SHC( 0x30fb ), SHC( 0x3041 ), SHC( 0x2f87 ), SHC( 0x2ecc ), SHC( 0x2e11 ), SHC( 0x2d55 ), SHC( 0x2c98 ), SHC( 0x2bdc ),
+ SHC( 0x2b1f ), SHC( 0x2a61 ), SHC( 0x29a3 ), SHC( 0x28e5 ), SHC( 0x2826 ), SHC( 0x2767 ), SHC( 0x26a8 ), SHC( 0x25e8 ),
+ SHC( 0x2528 ), SHC( 0x2467 ), SHC( 0x23a6 ), SHC( 0x22e5 ), SHC( 0x2223 ), SHC( 0x2161 ), SHC( 0x209f ), SHC( 0x1fdc ),
+ SHC( 0x1f19 ), SHC( 0x1e56 ), SHC( 0x1d93 ), SHC( 0x1ccf ), SHC( 0x1c0b ), SHC( 0x1b47 ), SHC( 0x1a82 ), SHC( 0x19bd ),
+ SHC( 0x18f8 ), SHC( 0x1833 ), SHC( 0x176d ), SHC( 0x16a8 ), SHC( 0x15e2 ), SHC( 0x151b ), SHC( 0x1455 ), SHC( 0x138e ),
+ SHC( 0x12c8 ), SHC( 0x1201 ), SHC( 0x1139 ), SHC( 0x1072 ), SHC( 0x0fab ), SHC( 0x0ee3 ), SHC( 0x0e1b ), SHC( 0x0d53 ),
+ SHC( 0x0c8b ), SHC( 0x0bc3 ), SHC( 0x0afb ), SHC( 0x0a33 ), SHC( 0x096a ), SHC( 0x08a2 ), SHC( 0x07d9 ), SHC( 0x0710 ),
+ SHC( 0x0647 ), SHC( 0x057f ), SHC( 0x04b6 ), SHC( 0x03ed ), SHC( 0x0324 ), SHC( 0x025b ), SHC( 0x0192 ), SHC( 0x00c9 ),
+ SHC( 0x0000 ), SHC( 0xff37 ), SHC( 0xfe6e ), SHC( 0xfda5 ), SHC( 0xfcdc ), SHC( 0xfc13 ), SHC( 0xfb4a ), SHC( 0xfa81 ),
+ SHC( 0xf9b9 ), SHC( 0xf8f0 ), SHC( 0xf827 ), SHC( 0xf75e ), SHC( 0xf696 ), SHC( 0xf5cd ), SHC( 0xf505 ), SHC( 0xf43d ),
+ SHC( 0xf375 ), SHC( 0xf2ad ), SHC( 0xf1e5 ), SHC( 0xf11d ), SHC( 0xf055 ), SHC( 0xef8e ), SHC( 0xeec7 ), SHC( 0xedff ),
+ SHC( 0xed38 ), SHC( 0xec72 ), SHC( 0xebab ), SHC( 0xeae5 ), SHC( 0xea1e ), SHC( 0xe958 ), SHC( 0xe893 ), SHC( 0xe7cd ),
+ SHC( 0xe708 ), SHC( 0xe643 ), SHC( 0xe57e ), SHC( 0xe4b9 ), SHC( 0xe3f5 ), SHC( 0xe331 ), SHC( 0xe26d ), SHC( 0xe1aa ),
+ SHC( 0xe0e7 ), SHC( 0xe024 ), SHC( 0xdf61 ), SHC( 0xde9f ), SHC( 0xdddd ), SHC( 0xdd1b ), SHC( 0xdc5a ), SHC( 0xdb99 ),
+ SHC( 0xdad8 ), SHC( 0xda18 ), SHC( 0xd958 ), SHC( 0xd899 ), SHC( 0xd7da ), SHC( 0xd71b ), SHC( 0xd65d ), SHC( 0xd59f ),
+ SHC( 0xd4e1 ), SHC( 0xd424 ), SHC( 0xd368 ), SHC( 0xd2ab ), SHC( 0xd1ef ), SHC( 0xd134 ), SHC( 0xd079 ), SHC( 0xcfbf ),
+ SHC( 0xcf05 ), SHC( 0xce4b ), SHC( 0xcd92 ), SHC( 0xccda ), SHC( 0xcc22 ), SHC( 0xcb6a ), SHC( 0xcab3 ), SHC( 0xc9fc ),
+ SHC( 0xc946 ), SHC( 0xc891 ), SHC( 0xc7dc ), SHC( 0xc728 ), SHC( 0xc674 ), SHC( 0xc5c0 ), SHC( 0xc50e ), SHC( 0xc45b ),
+ SHC( 0xc3aa ), SHC( 0xc2f9 ), SHC( 0xc248 ), SHC( 0xc198 ), SHC( 0xc0e9 ), SHC( 0xc03b ), SHC( 0xbf8d ), SHC( 0xbedf ),
+ SHC( 0xbe32 ), SHC( 0xbd86 ), SHC( 0xbcdb ), SHC( 0xbc30 ), SHC( 0xbb86 ), SHC( 0xbadc ), SHC( 0xba33 ), SHC( 0xb98b ),
+ SHC( 0xb8e4 ), SHC( 0xb83d ), SHC( 0xb797 ), SHC( 0xb6f1 ), SHC( 0xb64c ), SHC( 0xb5a8 ), SHC( 0xb505 ), SHC( 0xb462 ),
+ SHC( 0xb3c1 ), SHC( 0xb31f ), SHC( 0xb27f ), SHC( 0xb1df ), SHC( 0xb141 ), SHC( 0xb0a2 ), SHC( 0xb005 ), SHC( 0xaf69 ),
+ SHC( 0xaecd ), SHC( 0xae32 ), SHC( 0xad97 ), SHC( 0xacfe ), SHC( 0xac65 ), SHC( 0xabcd ), SHC( 0xab36 ), SHC( 0xaaa0 ),
+ SHC( 0xaa0b ), SHC( 0xa976 ), SHC( 0xa8e3 ), SHC( 0xa850 ), SHC( 0xa7be ), SHC( 0xa72c ), SHC( 0xa69c ), SHC( 0xa60d ),
+ SHC( 0xa57e ),
+};
+#endif
+
+const Word16 fftSineTab640_fx[ 321 ] = {
+ SHC( 0x0000 ), SHC( 0x0141 ), SHC( 0x0283 ), SHC( 0x03c4 ), SHC( 0x0506 ), SHC( 0x0647 ), SHC( 0x0789 ), SHC( 0x08ca ),
+ SHC( 0x0a0a ), SHC( 0x0b4b ), SHC( 0x0c8b ), SHC( 0x0dcb ), SHC( 0x0f0b ), SHC( 0x104a ), SHC( 0x1189 ), SHC( 0x12c8 ),
+ SHC( 0x1406 ), SHC( 0x1543 ), SHC( 0x1680 ), SHC( 0x17bc ), SHC( 0x18f8 ), SHC( 0x1a33 ), SHC( 0x1b6e ), SHC( 0x1ca8 ),
+ SHC( 0x1de1 ), SHC( 0x1f19 ), SHC( 0x2051 ), SHC( 0x2188 ), SHC( 0x22be ), SHC( 0x23f3 ), SHC( 0x2528 ), SHC( 0x265b ),
+ SHC( 0x278d ), SHC( 0x28bf ), SHC( 0x29ef ), SHC( 0x2b1f ), SHC( 0x2c4d ), SHC( 0x2d7a ), SHC( 0x2ea6 ), SHC( 0x2fd1 ),
+ SHC( 0x30fb ), SHC( 0x3224 ), SHC( 0x334b ), SHC( 0x3471 ), SHC( 0x3596 ), SHC( 0x36ba ), SHC( 0x37dc ), SHC( 0x38fd ),
+ SHC( 0x3a1c ), SHC( 0x3b3a ), SHC( 0x3c56 ), SHC( 0x3d71 ), SHC( 0x3e8b ), SHC( 0x3fa3 ), SHC( 0x40b9 ), SHC( 0x41ce ),
+ SHC( 0x42e1 ), SHC( 0x43f2 ), SHC( 0x4502 ), SHC( 0x4610 ), SHC( 0x471c ), SHC( 0x4827 ), SHC( 0x4930 ), SHC( 0x4a37 ),
+ SHC( 0x4b3c ), SHC( 0x4c3f ), SHC( 0x4d41 ), SHC( 0x4e40 ), SHC( 0x4f3e ), SHC( 0x503a ), SHC( 0x5133 ), SHC( 0x522b ),
+ SHC( 0x5321 ), SHC( 0x5414 ), SHC( 0x5506 ), SHC( 0x55f5 ), SHC( 0x56e2 ), SHC( 0x57ce ), SHC( 0x58b7 ), SHC( 0x599d ),
+ SHC( 0x5a82 ), SHC( 0x5b64 ), SHC( 0x5c44 ), SHC( 0x5d22 ), SHC( 0x5dfe ), SHC( 0x5ed7 ), SHC( 0x5fae ), SHC( 0x6082 ),
+ SHC( 0x6154 ), SHC( 0x6224 ), SHC( 0x62f2 ), SHC( 0x63bc ), SHC( 0x6485 ), SHC( 0x654b ), SHC( 0x660e ), SHC( 0x66cf ),
+ SHC( 0x678d ), SHC( 0x6849 ), SHC( 0x6902 ), SHC( 0x69b9 ), SHC( 0x6a6d ), SHC( 0x6b1f ), SHC( 0x6bcd ), SHC( 0x6c79 ),
+ SHC( 0x6d23 ), SHC( 0x6dca ), SHC( 0x6e6e ), SHC( 0x6f0f ), SHC( 0x6fad ), SHC( 0x7049 ), SHC( 0x70e2 ), SHC( 0x7179 ),
+ SHC( 0x720c ), SHC( 0x729d ), SHC( 0x732a ), SHC( 0x73b5 ), SHC( 0x743e ), SHC( 0x74c3 ), SHC( 0x7545 ), SHC( 0x75c5 ),
+ SHC( 0x7641 ), SHC( 0x76bb ), SHC( 0x7732 ), SHC( 0x77a5 ), SHC( 0x7816 ), SHC( 0x7884 ), SHC( 0x78ef ), SHC( 0x7957 ),
+ SHC( 0x79bc ), SHC( 0x7a1e ), SHC( 0x7a7d ), SHC( 0x7ad8 ), SHC( 0x7b31 ), SHC( 0x7b87 ), SHC( 0x7bda ), SHC( 0x7c29 ),
+ SHC( 0x7c76 ), SHC( 0x7cc0 ), SHC( 0x7d06 ), SHC( 0x7d4a ), SHC( 0x7d8a ), SHC( 0x7dc7 ), SHC( 0x7e01 ), SHC( 0x7e38 ),
+ SHC( 0x7e6c ), SHC( 0x7e9d ), SHC( 0x7eca ), SHC( 0x7ef5 ), SHC( 0x7f1c ), SHC( 0x7f41 ), SHC( 0x7f62 ), SHC( 0x7f80 ),
+ SHC( 0x7f9a ), SHC( 0x7fb2 ), SHC( 0x7fc7 ), SHC( 0x7fd8 ), SHC( 0x7fe6 ), SHC( 0x7ff1 ), SHC( 0x7ff9 ), SHC( 0x7ffe ),
+ SHC( 0x7fff ), SHC( 0x7ffe ), SHC( 0x7ff9 ), SHC( 0x7ff1 ), SHC( 0x7fe6 ), SHC( 0x7fd8 ), SHC( 0x7fc7 ), SHC( 0x7fb2 ),
+ SHC( 0x7f9a ), SHC( 0x7f80 ), SHC( 0x7f62 ), SHC( 0x7f41 ), SHC( 0x7f1c ), SHC( 0x7ef5 ), SHC( 0x7eca ), SHC( 0x7e9d ),
+ SHC( 0x7e6c ), SHC( 0x7e38 ), SHC( 0x7e01 ), SHC( 0x7dc7 ), SHC( 0x7d8a ), SHC( 0x7d4a ), SHC( 0x7d06 ), SHC( 0x7cc0 ),
+ SHC( 0x7c76 ), SHC( 0x7c29 ), SHC( 0x7bda ), SHC( 0x7b87 ), SHC( 0x7b31 ), SHC( 0x7ad8 ), SHC( 0x7a7d ), SHC( 0x7a1e ),
+ SHC( 0x79bc ), SHC( 0x7957 ), SHC( 0x78ef ), SHC( 0x7884 ), SHC( 0x7816 ), SHC( 0x77a5 ), SHC( 0x7732 ), SHC( 0x76bb ),
+ SHC( 0x7641 ), SHC( 0x75c5 ), SHC( 0x7545 ), SHC( 0x74c3 ), SHC( 0x743e ), SHC( 0x73b5 ), SHC( 0x732a ), SHC( 0x729d ),
+ SHC( 0x720c ), SHC( 0x7179 ), SHC( 0x70e2 ), SHC( 0x7049 ), SHC( 0x6fad ), SHC( 0x6f0f ), SHC( 0x6e6e ), SHC( 0x6dca ),
+ SHC( 0x6d23 ), SHC( 0x6c79 ), SHC( 0x6bcd ), SHC( 0x6b1f ), SHC( 0x6a6d ), SHC( 0x69b9 ), SHC( 0x6902 ), SHC( 0x6849 ),
+ SHC( 0x678d ), SHC( 0x66cf ), SHC( 0x660e ), SHC( 0x654b ), SHC( 0x6485 ), SHC( 0x63bc ), SHC( 0x62f2 ), SHC( 0x6224 ),
+ SHC( 0x6154 ), SHC( 0x6082 ), SHC( 0x5fae ), SHC( 0x5ed7 ), SHC( 0x5dfe ), SHC( 0x5d22 ), SHC( 0x5c44 ), SHC( 0x5b64 ),
+ SHC( 0x5a82 ), SHC( 0x599d ), SHC( 0x58b7 ), SHC( 0x57ce ), SHC( 0x56e2 ), SHC( 0x55f5 ), SHC( 0x5506 ), SHC( 0x5414 ),
+ SHC( 0x5321 ), SHC( 0x522b ), SHC( 0x5133 ), SHC( 0x503a ), SHC( 0x4f3e ), SHC( 0x4e40 ), SHC( 0x4d41 ), SHC( 0x4c3f ),
+ SHC( 0x4b3c ), SHC( 0x4a37 ), SHC( 0x4930 ), SHC( 0x4827 ), SHC( 0x471c ), SHC( 0x4610 ), SHC( 0x4502 ), SHC( 0x43f2 ),
+ SHC( 0x42e1 ), SHC( 0x41ce ), SHC( 0x40b9 ), SHC( 0x3fa3 ), SHC( 0x3e8b ), SHC( 0x3d71 ), SHC( 0x3c56 ), SHC( 0x3b3a ),
+ SHC( 0x3a1c ), SHC( 0x38fd ), SHC( 0x37dc ), SHC( 0x36ba ), SHC( 0x3596 ), SHC( 0x3471 ), SHC( 0x334b ), SHC( 0x3224 ),
+ SHC( 0x30fb ), SHC( 0x2fd1 ), SHC( 0x2ea6 ), SHC( 0x2d7a ), SHC( 0x2c4d ), SHC( 0x2b1f ), SHC( 0x29ef ), SHC( 0x28bf ),
+ SHC( 0x278d ), SHC( 0x265b ), SHC( 0x2528 ), SHC( 0x23f3 ), SHC( 0x22be ), SHC( 0x2188 ), SHC( 0x2051 ), SHC( 0x1f19 ),
+ SHC( 0x1de1 ), SHC( 0x1ca8 ), SHC( 0x1b6e ), SHC( 0x1a33 ), SHC( 0x18f8 ), SHC( 0x17bc ), SHC( 0x1680 ), SHC( 0x1543 ),
+ SHC( 0x1406 ), SHC( 0x12c8 ), SHC( 0x1189 ), SHC( 0x104a ), SHC( 0x0f0b ), SHC( 0x0dcb ), SHC( 0x0c8b ), SHC( 0x0b4b ),
+ SHC( 0x0a0a ), SHC( 0x08ca ), SHC( 0x0789 ), SHC( 0x0647 ), SHC( 0x0506 ), SHC( 0x03c4 ), SHC( 0x0283 ), SHC( 0x0141 ),
+ SHC( 0x0000 ),
+};
+
+
const Word32 bwMode2fs[4] = { 8000, 16000, 32000, 48000 };
const Word16 swb_lsp_prev_interp_init[10] = { 32767, 31164, 26510, 19261, 10126, 0, -10126, -19261, -26510, -31164 };
diff --git a/lib_com/rom_com.h b/lib_com/rom_com.h
index 5cf66078c3ca683e9650c571d7bc60553085077c..a00dcd342f5ec0a616f10222bdbcdfb415def953 100644
--- a/lib_com/rom_com.h
+++ b/lib_com/rom_com.h
@@ -1647,6 +1647,7 @@ extern const float *const cdk_37bits_flt[];
extern const float *const cdk_37bits_ivas[];
extern Word16 const * const cdk_37bits[];
extern const float fftSineTab640[321];
+extern const Word16 fftSineTab640_fx[321];
extern const float olapWinAna512[512];
extern const float olapWinAna640[640];
diff --git a/lib_com/rom_com_fx.c b/lib_com/rom_com_fx.c
index 7c749975fa6b960715c68a48ff166ae901cebe2f..63532eaabef8b88bab24b8b682d4735263993e76 100644
--- a/lib_com/rom_com_fx.c
+++ b/lib_com/rom_com_fx.c
@@ -873,5 +873,21 @@ const Word16 sin_twiddle_table_16_8_2[ 16 ] = {
SHC( 0x0000 ), SHC( 0xcf05 ),
};
+#if 0
+#define Flt2Word16(a) SHC((Word16) (a * 32768.0))
+const Word16 w_edct2_64_fx[80] =
+{
+ SHC(0x7FFF), Flt2Word16(0.00000000f), Flt2Word16(0.70710678f), Flt2Word16(0.70710678f), Flt2Word16(0.92387953f), Flt2Word16(0.38268343f), Flt2Word16(0.38268343f), Flt2Word16(0.92387953f),
+ Flt2Word16(0.98078528f), Flt2Word16(0.19509032f), Flt2Word16(0.55557023f), Flt2Word16(0.83146961f), Flt2Word16(0.83146961f), Flt2Word16(0.55557023f), Flt2Word16(0.19509032f), Flt2Word16(0.98078528f),
+ Flt2Word16(0.70710678f), Flt2Word16(0.49984940f), Flt2Word16(0.49939772f), Flt2Word16(0.49864522f), Flt2Word16(0.49759236f), Flt2Word16(0.49623976f), Flt2Word16(0.49458825f), Flt2Word16(0.49263882f),
+ Flt2Word16(0.49039264f), Flt2Word16(0.48785106f), Flt2Word16(0.48501562f), Flt2Word16(0.48188803f), Flt2Word16(0.47847016f), Flt2Word16(0.47476409f), Flt2Word16(0.47077203f), Flt2Word16(0.46649639f),
+ Flt2Word16(0.46193976f), Flt2Word16(0.45710487f), Flt2Word16(0.45199464f), Flt2Word16(0.44661215f), Flt2Word16(0.44096063f), Flt2Word16(0.43504349f), Flt2Word16(0.42886430f), Flt2Word16(0.42242678f),
+ Flt2Word16(0.41573480f), Flt2Word16(0.40879240f), Flt2Word16(0.40160376f), Flt2Word16(0.39417321f), Flt2Word16(0.38650522f), Flt2Word16(0.37860442f), Flt2Word16(0.37047556f), Flt2Word16(0.36212354f),
+ Flt2Word16(0.35355339f), Flt2Word16(0.34477027f), Flt2Word16(0.33577947f), Flt2Word16(0.32658642f), Flt2Word16(0.31719664f), Flt2Word16(0.30761579f), Flt2Word16(0.29784965f), Flt2Word16(0.28790409f),
+ Flt2Word16(0.27778511f), Flt2Word16(0.26749880f), Flt2Word16(0.25705137f), Flt2Word16(0.24644909f), Flt2Word16(0.23569836f), Flt2Word16(0.22480566f), Flt2Word16(0.21377754f), Flt2Word16(0.20262065f),
+ Flt2Word16(0.19134171f), Flt2Word16(0.17994751f), Flt2Word16(0.16844492f), Flt2Word16(0.15684087f), Flt2Word16(0.14514233f), Flt2Word16(0.13335637f), Flt2Word16(0.12149008f), Flt2Word16(0.10955062f),
+ Flt2Word16(0.09754516f), Flt2Word16(0.08548094f), Flt2Word16(0.07336523f), Flt2Word16(0.06120533f), Flt2Word16(0.04900857f), Flt2Word16(0.03678228f), Flt2Word16(0.02453383f), Flt2Word16(0.01227061f)
+};
+#endif
diff --git a/lib_util/test_fft.c b/lib_util/test_fft.c
index 264ea41b444b40fda3c4c2653bab8cff15e76e15..3a8d97f8aeb324fbe6bfa1151f8e50a4214d438b 100644
--- a/lib_util/test_fft.c
+++ b/lib_util/test_fft.c
@@ -34,23 +34,34 @@
#include
#include
#include
+#include
#include
#include
-#include "options.h"
-#include "test_fft.h"
+#include "assert.h"
+#include "basop32.h"
typedef int Word32;
typedef short Word16;
typedef short int16_t;
-#define ALLOWED_DEVIATION (0.005)
-#define Q31 (2147483647.0f)
+#define ALLOWED_DEVIATION ( 0.005 )
+#define Q31 ( 2147483647.0f )
+
+#ifndef min
+#define min( a, b ) ( ( ( a ) < ( b ) ) ? ( a ) : ( b ) )
+#endif
+
+#ifndef max
+#define max( a, b ) ( ( ( a ) > ( b ) ) ? ( a ) : ( b ) )
+#endif
+
+extern const float fftSineTab640[321];
void DoFFT_fx(
Word32 *re2,
Word32 *im2,
- const int16_t length );
-
+ const Word16 length );
+
void DoFFT(
float *re2,
float *im2,
@@ -64,152 +75,758 @@ void fft_fx(
);
void fft(
- float *re, /* i/o: real part */
- float *im, /* i/o: imag part */
+ float *re, /* i/o: real part */
+ float *im, /* i/o: imag part */
const int16_t length, /* i : length of fft */
const int16_t s /* i : sign */
);
-#define print_output(fRe, fIm, iRe, iIm) \
- printf("Real = %.2f %d Imag = %.2f %d\n", fRe, iRe, fIm, iIm)
+void fft_rel(
+ float x[], /* i/o: input/output vector */
+ const int16_t n, /* i : vector length */
+ const int16_t m /* i : log2 of vector length */
+);
+
+void ifft_rel(
+ float x[], /* i/o: input/output vector */
+ const int16_t n, /* i : vector length */
+ const int16_t m /* i : log2 of vector length */
+);
+
+void fft_rel_fx(
+ Word16 x[], /* i/o: input/output vector */
+ const Word16 n, /* i : vector length */
+ const Word16 m /* i : log2 of vector length */
+);
+
+void fft3_fx_ivas(
+ const Word32 X[],
+ Word32 Y[],
+ const Word16 n );
+
+void fft3(
+ const float X[],
+ float Y[],
+ const int16_t n );
+
+void ifft3_fx_ivas(
+ const Word32 X[],
+ Word32 Y[],
+ const Word16 n );
+
+void ifft3(
+ const float X[], /* i : input frame */
+ float Y[], /* o : iDFT of input frame */
+ const int16_t n /* i : block length (must be radix 3) */
+);
+
+extern const Word16 fftSineTab640_fx[321];
+
+#if 0
+void fft_rel_fx32(
+ Word32 x[], /* i/o: input/output vector */
+ const Word16 n, /* i : vector length */
+ const Word16 m /* i : log2 of vector length */
+);
+#else
+void fft_rel_fx32(
+ Word32 *x, /* i/o: input/output vector */
+ const Word16 n, /* i : vector length */
+ const Word16 m /* i : log2 of vector length */
+);
+void ifft_rel_fx32(
+ Word32 x[], /* i/o: input/output vector */
+ const Word16 n, /* i : vector length */
+ const Word16 m /* i : log2 of vector length */
+);
+#endif
+
+void fft_cldfb(
+ float *data, /* i/o: input/output vector */
+ const int16_t size /* size of fft operation */
+);
+
+void fft_cldfb_fx(
+ Word32 *data, /* i/o: input/output vector */
+ const int16_t size /* size of fft operation */
+);
+
+int16_t RFFTN(
+ float *afftData,
+ const float *trigPtr,
+ const int16_t len,
+ const int16_t isign );
+
+int16_t RFFTN_fx(
+ Word32 *data,
+ const Word16 *sine_table,
+ const Word16 len,
+ const Word16 sign );
+
+void BASOP_cfft(
+ float *re, /* i/o: real part */
+ float *im, /* i/o: imag part */
+ Word16 s, /* i : stride real and imag part */
+ Word16 *scale /* i : scalefactor */
+);
+
+void BASOP_cfft_fx(
+ Word32 *re, /* i/o: real part */
+ Word32 *im, /* i/o: imag part */
+ Word16 s, /* i : stride real and imag part */
+ Word16 *scale /* i : scalefactor */
+);
+
+#define print_output( fRe, fIm, iRe, iIm ) \
+ printf( "Real = %.2f %d Imag = %.2f %d\n", fRe, iRe, fIm, iIm )
+
+#define TYPE_DoFFT ( 0 )
+#define TYPE_fft ( 1 )
+#define TYPE_fft_rel ( 2 )
+#define TYPE_fft_rel32 ( 3 )
+#define TYPE_ifft_rel32 ( 4 )
+#define TYPE_fft_cldfb ( 5 )
+#define TYPE_fft_RFFTN ( 6 )
+#define TYPE_fft_fft3 ( 7 )
+#define TYPE_fft_ifft3 ( 8 )
+#define TYPE_BASOP_cfft ( 9 )
-float test_fixed_fft(Word16 N, Word32 isDoFFT)
+Word16 find_guarded_bits_fx( Word32 n )
{
- Word32 *iRe = malloc(N * sizeof(iRe[0]));
- Word32 *iIm = malloc(N * sizeof(iRe[0]));
- float *fRe = malloc(N * sizeof(fRe[0]));
- float *fIm = malloc(N * sizeof(fRe[0]));
- float max_deviation = 0.0f;
- Word32 max_val_in = 0;
- Word32 max_val_out = 0;
-
- if ((iRe == NULL) || (iIm == NULL) || (fRe == NULL) || (fIm == NULL))
- {
- printf("!!!!Malloc failed!!!!");
- exit(1);
- }
-
- do {
- srand(N);
- for(int i = 0; i < N; i++)
+ return n <= 1 ? 0 : n <= 2 ? 1
+ : n <= 4 ? 2
+ : n <= 8 ? 3
+ : n <= 16 ? 4
+ : n <= 32 ? 5
+ : n <= 64 ? 6
+ : n <= 128 ? 7
+ : n <= 256 ? 8
+ : n <= 512 ? 9
+ : n <= 1024 ? 10
+ : 11;
+}
+
+Word16 L_norm_arr( Word32 *arr, int size )
+{
+ Word16 q = 31;
+ for ( int i = 0; i < size; i++ )
+ if ( arr[i] != 0 )
+ {
+ q = min( q, norm_l( arr[i] ) );
+ }
+ return q;
+}
+
+Word16 norm_arr( Word16 *arr, int size )
+{
+ Word16 q = 15;
+ for ( int i = 0; i < size; i++ )
+ if ( arr[i] != 0 )
+ {
+ q = min( q, norm_s( arr[i] ) );
+ }
+ return q;
+}
+
+
+void populate_input_interleave( Word32 *in32, float *fIn, Word16 N )
+{
+ Word32 max_val_in = 0;
+ Word16 r_shift, l_shift;
+ do
+ {
+ srand( N );
+ for ( int i = 0; i < N; i++ )
+ {
+ in32[2 * i + 0] = rand();
+ in32[2 * i + 1] = rand();
+ if ( max_val_in < abs( in32[2 * i + 0] ) )
+ max_val_in = abs( in32[2 * i + 0] );
+ if ( max_val_in < abs( in32[2 * i + 1] ) )
+ max_val_in = abs( in32[2 * i + 1] );
+ fIn[2 * i + 0] = (float) in32[2 * i + 0];
+ fIn[2 * i + 1] = (float) in32[2 * i + 1];
+ }
+ } while ( max_val_in == 0 );
+ r_shift = find_guarded_bits_fx( 2 * N );
+ l_shift = L_norm_arr( in32, 2 * N );
+ if ( l_shift > r_shift )
+ {
+ Word16 shift_val = l_shift - r_shift;
+ for ( int i = 0; i < N; i++ )
+ {
+ in32[2 * i + 0] = in32[2 * i + 0] << shift_val;
+ fIn[2 * i + 0] = (float) in32[2 * i + 0];
+ in32[2 * i + 1] = in32[2 * i + 1] << shift_val;
+ fIn[2 * i + 1] = (float) in32[2 * i + 1];
+ }
+ }
+ if ( l_shift < r_shift )
{
- iRe[i] = rand();
- iIm[i] = rand();
- if (max_val_in < abs(iRe[i]))
- max_val_in = abs(iRe[i]);
- if (max_val_in < abs(iIm[i]))
- max_val_in = abs(iIm[i]);
- fRe[i] = (float)iRe[i];
- fIm[i] = (float)iIm[i];
+ Word16 shift_val = r_shift - l_shift;
+ for ( int i = 0; i < N; i++ )
+ {
+ in32[2 * i + 0] = in32[2 * i + 0] >> shift_val;
+ fIn[2 * i + 0] = (float) in32[2 * i + 0];
+ in32[2 * i + 1] = in32[2 * i + 1] >> shift_val;
+ fIn[2 * i + 1] = (float) in32[2 * i + 1];
+ }
}
- } while(max_val_in == 0);
-
- if (isDoFFT)
- {
- DoFFT(fRe, fIm, N);
- DoFFT_fx(iRe, iIm, N);
- }
- else
- {
- fft(fRe, fIm, N, 1);
- fft_fx(iRe, iIm, N, 1);
- }
-
- for(int i = 0; i < N; i++)
- {
- float OutRe = fRe[i];
- float OutIm = fIm[i];
- float dev_val_real = (float)(fabs(OutRe - (float)iRe[i]));
- float dev_val_imag = (float)(fabs(OutIm - (float)iIm[i]));
- if (max_val_out < iRe[i])
- max_val_out = iRe[i];
- if (max_val_out < iIm[i])
- max_val_out = iIm[i];
- //print_output(OutRe, OutIm, iRe[i], iIm[i]);
- if (dev_val_real > max_deviation)
- max_deviation = dev_val_real;
- if (dev_val_imag > max_deviation)
- max_deviation = dev_val_imag;
- }
- free( iRe );
- free( iIm );
- free( fRe );
- free( fIm );
- return (( max_deviation / max_val_out) * 100);
}
-void test_DoFFT(Word16 *fft_lengths, Word16 num_lengths)
+void populate_input_interleave_16( Word16 *in16, float *fIn, Word16 N )
{
- Word32 num_tests = 0, num_passed = 0, num_failed = 0;
- printf("\033[0;33m");
- printf("============== Starting DoFFT tests ================\n");
- for ( int i = num_lengths - 1; i >=0 ; i-- )
- {
- float max_deviation = test_fixed_fft( fft_lengths[i], 1);
- num_tests++;
- if (max_deviation < ALLOWED_DEVIATION)
+ Word16 max_val_in = 0;
+ Word16 r_shift, l_shift;
+ do
+ {
+ srand( N );
+ for ( int i = 0; i < N; i++ )
+ {
+ in16[2 * i + 0] = (Word16) rand();
+ in16[2 * i + 1] = (Word16) rand();
+ if ( max_val_in < abs( in16[2 * i + 0] ) )
+ max_val_in = (Word16) abs( in16[2 * i + 0] );
+ if ( max_val_in < abs( in16[2 * i + 1] ) )
+ max_val_in = (Word16) abs( in16[2 * i + 1] );
+ fIn[2 * i + 0] = (float) in16[2 * i + 0];
+ fIn[2 * i + 1] = (float) in16[2 * i + 1];
+ }
+ } while ( max_val_in == 0 );
+
+ r_shift = find_guarded_bits_fx( 2 * N );
+ l_shift = norm_arr( in16, 2 * N );
+ if ( l_shift > r_shift )
{
- printf("\033[0;32m");
- printf("[PASSED] FFT test of length %d\n", fft_lengths[i]);
- num_passed++;
+ Word16 shift_val = l_shift - r_shift;
+ for ( int i = 0; i < N; i++ )
+ {
+ in16[2 * i + 0] = in16[2 * i + 0] << shift_val;
+ fIn[2 * i + 0] = (float) in16[2 * i + 0];
+ in16[2 * i + 1] = in16[2 * i + 1] << shift_val;
+ fIn[2 * i + 1] = (float) in16[2 * i + 1];
+ }
}
- else
+ if ( l_shift < r_shift )
{
- printf("\033[0;31m");
- printf("[FAILED] FFT test of length %d Max_deviation = %.6f\n", fft_lengths[i], max_deviation);
- num_failed++;
+ Word16 shift_val = r_shift - l_shift;
+ for ( int i = 0; i < N; i++ )
+ {
+ in16[2 * i + 0] = in16[2 * i + 0] >> shift_val;
+ fIn[2 * i + 0] = (float) in16[2 * i + 0];
+ in16[2 * i + 1] = in16[2 * i + 1] >> shift_val;
+ fIn[2 * i + 1] = (float) in16[2 * i + 1];
+ }
}
- }
- printf("\033[0;33m");
- printf("============== Completed DoFFT tests ================\n\n");
- printf("Summary of FFT unit tests:\n");
- printf("--------------------------\n");
- printf("Total tests: %d\n", num_tests);
- printf("Passed: %d\n", num_passed);
- printf("\033[0;31m");
- printf("Failed: %d\n\n", num_failed);
- printf("\033[0m");
}
+static void populate_input_deinterleave( Word32 *iRe, Word32 *iIm, float *fRe, float *fIm, Word16 N )
+{
+ Word32 max_val_in = 0;
+ Word16 r_shift, l_shift;
+ do
+ {
+ srand( N );
+ for ( int i = 0; i < N; i++ )
+ {
+ iRe[i] = rand();
+ iIm[i] = rand();
+ if ( max_val_in < abs( iRe[i] ) )
+ max_val_in = abs( iRe[i] );
+ if ( max_val_in < abs( iIm[i] ) )
+ max_val_in = abs( iIm[i] );
+ fRe[i] = (float) iRe[i];
+ fIm[i] = (float) iIm[i];
+ }
+ } while ( max_val_in == 0 );
+ r_shift = find_guarded_bits_fx( 2 * N );
+ l_shift = L_norm_arr( iRe, N );
+ l_shift = min( L_norm_arr( iIm, N ), l_shift );
+ if ( l_shift > r_shift )
+ {
+ Word16 shift_val = l_shift - r_shift;
+ for ( int i = 0; i < N; i++ )
+ {
+ iRe[i] = iRe[i] << shift_val;
+ fRe[i] = (float) iRe[i];
+ iIm[i] = iIm[i] << shift_val;
+ fIm[i] = (float) iIm[i];
+ }
+ }
+ if ( l_shift < r_shift )
+ {
+ Word16 shift_val = r_shift - l_shift;
+ for ( int i = 0; i < N; i++ )
+ {
+ iRe[i] = iRe[i] >> shift_val;
+ fRe[i] = (float) iRe[i];
+ iIm[i] = iIm[i] >> shift_val;
+ fIm[i] = (float) iIm[i];
+ }
+ }
+}
-void test_fft(Word16 *fft_lengths, Word16 num_lengths)
+static float test_fixed_fft( Word16 N, Word32 test_type )
{
- Word32 num_tests = 0, num_passed = 0, num_failed = 0;
- printf("\033[0;33m");
- printf("============== Starting fft tests ================\n");
- for ( int i = num_lengths - 1; i >=0 ; i-- )
- {
- float max_deviation = test_fixed_fft( fft_lengths[i], 0);
- num_tests++;
- if (max_deviation < ALLOWED_DEVIATION)
+ float max_deviation = 0.0f;
+ Word32 max_val_out = 0;
+
+ switch ( test_type )
{
- printf("\033[0;32m");
- printf("[PASSED] FFT test of length %d\n", fft_lengths[i]);
- num_passed++;
+ case TYPE_DoFFT:
+ {
+ Word32 *iRe = calloc( N, sizeof( iRe[0] ) );
+ Word32 *iIm = calloc( N, sizeof( iIm[0] ) );
+ float *fRe = calloc( N, sizeof( fRe[0] ) );
+ float *fIm = calloc( N, sizeof( fIm[0] ) );
+ if ( ( iRe == NULL ) || ( iIm == NULL ) || ( fRe == NULL ) || ( fIm == NULL ) )
+ {
+ printf( "!!!!Malloc failed!!!!" );
+ exit( 1 );
+ }
+ else
+ {
+ populate_input_deinterleave( iRe, iIm, fRe, fIm, N );
+ }
+ DoFFT( fRe, fIm, N );
+ DoFFT_fx( iRe, iIm, N );
+
+ for ( int i = 0; i < N; i++ )
+ {
+ float OutRe = fRe[i];
+ float OutIm = fIm[i];
+ float dev_val_real = (float) ( fabs( OutRe - (float) iRe[i] ) );
+ float dev_val_imag = (float) ( fabs( OutIm - (float) iIm[i] ) );
+ if ( max_val_out < iRe[i] )
+ max_val_out = iRe[i];
+ if ( max_val_out < iIm[i] )
+ max_val_out = iIm[i];
+ if ( dev_val_real > max_deviation )
+ max_deviation = dev_val_real;
+ if ( dev_val_imag > max_deviation )
+ max_deviation = dev_val_imag;
+ }
+ free( iRe );
+ free( iIm );
+ free( fRe );
+ free( fIm );
+ break;
+ }
+ case TYPE_fft:
+ {
+ Word32 *iRe = calloc( N, sizeof( iRe[0] ) );
+ Word32 *iIm = calloc( N, sizeof( iIm[0] ) );
+ float *fRe = calloc( N, sizeof( fRe[0] ) );
+ float *fIm = calloc( N, sizeof( fIm[0] ) );
+ if ( ( iRe == NULL ) || ( iIm == NULL ) || ( fRe == NULL ) || ( fIm == NULL ) )
+ {
+ printf( "!!!!Malloc failed!!!!" );
+ exit( 1 );
+ }
+ else
+ {
+ populate_input_deinterleave( iRe, iIm, fRe, fIm, N );
+ }
+ fft( fRe, fIm, N, 1 );
+ fft_fx( iRe, iIm, N, 1 );
+ for ( int i = 0; i < N; i++ )
+ {
+ float OutRe = fRe[i];
+ float OutIm = fIm[i];
+ float dev_val_real = (float) ( fabs( OutRe - (float) iRe[i] ) );
+ float dev_val_imag = (float) ( fabs( OutIm - (float) iIm[i] ) );
+ if ( max_val_out < iRe[i] )
+ max_val_out = iRe[i];
+ if ( max_val_out < iIm[i] )
+ max_val_out = iIm[i];
+ if ( dev_val_real > max_deviation )
+ max_deviation = dev_val_real;
+ if ( dev_val_imag > max_deviation )
+ max_deviation = dev_val_imag;
+ }
+ free( iRe );
+ free( iIm );
+ free( fRe );
+ free( fIm );
+ break;
+ }
+ case TYPE_fft_rel:
+ {
+ Word16 *in16 = calloc( 2 * N, sizeof( in16[0] ) );
+ float *fIn = calloc( 2 * N, sizeof( fIn[0] ) );
+ Word16 guard_bits = 9;
+ if ( N == 256 )
+ {
+ guard_bits = 8;
+ }
+ else if ( N == 128 )
+ {
+ guard_bits = 7;
+ }
+ else if ( N != 512 )
+ {
+ assert( !"Unsupported length for fft_rel!" );
+ }
+ if ( ( in16 == NULL ) || ( fIn == NULL ) )
+ {
+ printf( "!!!!Malloc failed!!!!" );
+ exit( 1 );
+ }
+ else
+ {
+ populate_input_interleave_16( in16, fIn, N );
+ }
+ fft_rel( fIn, N, guard_bits );
+ fft_rel_fx( in16, N, guard_bits );
+ for ( int i = 0; i < N; i++ )
+ {
+ float OutRe = fIn[2 * i + 0];
+ float OutIm = fIn[2 * i + 1];
+ float dev_val_real = (float) ( fabs( OutRe - (float) in16[2 * i + 0] ) );
+ float dev_val_imag = (float) ( fabs( OutIm - (float) in16[2 * i + 1] ) );
+ if ( max_val_out < abs( in16[2 * i + 0] ) )
+ max_val_out = abs( in16[2 * i + 0] );
+ if ( max_val_out < abs( in16[2 * i + 1] ) )
+ max_val_out = abs( in16[2 * i + 1] );
+ if ( dev_val_real > max_deviation )
+ max_deviation = dev_val_real;
+ if ( dev_val_imag > max_deviation )
+ max_deviation = dev_val_imag;
+ }
+ free( in16 );
+ free( fIn );
+ break;
+ }
+ case TYPE_fft_rel32:
+ {
+ Word32 *in32 = calloc( 2 * N, sizeof( in32[0] ) );
+ float *fIn = calloc( 2 * N, sizeof( fIn[0] ) );
+ Word16 guard_bits = 9;
+ if ( N == 256 )
+ {
+ guard_bits = 8;
+ }
+ else if ( N == 128 )
+ {
+ guard_bits = 7;
+ }
+ else if ( N != 512 )
+ {
+ assert( !"Unsupported length for fft_rel!" );
+ }
+ if ( ( in32 == NULL ) || ( fIn == NULL ) )
+ {
+ printf( "!!!!Malloc failed!!!!" );
+ exit( 1 );
+ }
+ else
+ {
+ populate_input_interleave( in32, fIn, N );
+ }
+ fft_rel( fIn, N, guard_bits );
+ fft_rel_fx32( in32, N, guard_bits );
+ for ( int i = 0; i < N; i++ )
+ {
+ float OutRe = fIn[2 * i + 0];
+ float OutIm = fIn[2 * i + 1];
+ float dev_val_real = (float) ( fabs( OutRe - (float) in32[2 * i + 0] ) );
+ float dev_val_imag = (float) ( fabs( OutIm - (float) in32[2 * i + 1] ) );
+ if ( max_val_out < abs( in32[2 * i + 0] ) )
+ max_val_out = abs( in32[2 * i + 0] );
+ if ( max_val_out < abs( in32[2 * i + 1] ) )
+ max_val_out = abs( in32[2 * i + 1] );
+ if ( dev_val_real > max_deviation )
+ {
+ max_deviation = dev_val_real;
+ }
+ if ( dev_val_imag > max_deviation )
+ {
+ max_deviation = dev_val_imag;
+ }
+ }
+ free( in32 );
+ free( fIn );
+ break;
+ }
+ case TYPE_ifft_rel32:
+ {
+ Word32 *in32 = calloc( 2 * N, sizeof( in32[0] ) );
+ float *fIn = calloc( 2 * N, sizeof( fIn[0] ) );
+ Word16 guard_bits = 9;
+ if ( N == 256 )
+ {
+ guard_bits = 8;
+ }
+ else if ( N == 128 )
+ {
+ guard_bits = 7;
+ }
+ else if ( N != 512 )
+ {
+ assert( !"Unsupported length for fft_rel!" );
+ }
+ if ( ( in32 == NULL ) || ( fIn == NULL ) )
+ {
+ printf( "!!!!Malloc failed!!!!" );
+ exit( 1 );
+ }
+ else
+ {
+ populate_input_interleave( in32, fIn, N );
+ }
+ ifft_rel( fIn, N, guard_bits );
+ ifft_rel_fx32( in32, N, guard_bits );
+ for ( int i = 0; i < N; i++ )
+ {
+ float OutRe = fIn[2 * i + 0];
+ float OutIm = fIn[2 * i + 1];
+ float dev_val_real = (float) ( fabs( OutRe - (float) in32[2 * i + 0] ) );
+ float dev_val_imag = (float) ( fabs( OutIm - (float) in32[2 * i + 1] ) );
+ if ( max_val_out < abs( in32[2 * i + 0] ) )
+ max_val_out = abs( in32[2 * i + 0] );
+ if ( max_val_out < abs( in32[2 * i + 1] ) )
+ max_val_out = abs( in32[2 * i + 1] );
+ if ( dev_val_real > max_deviation )
+ {
+ max_deviation = dev_val_real;
+ }
+ if ( dev_val_imag > max_deviation )
+ {
+ max_deviation = dev_val_imag;
+ }
+ }
+ free( in32 );
+ free( fIn );
+ break;
+ }
+ case TYPE_fft_fft3:
+ {
+ Word32 *in32 = calloc( 2 * N, sizeof( in32[0] ) );
+ Word32 *out32 = calloc( 2 * N, sizeof( out32[0] ) );
+ float *fIn = calloc( 2 * N, sizeof( fIn[0] ) );
+ float *fOut = calloc( 2 * N, sizeof( fOut[0] ) );
+ if ( ( in32 == NULL ) || ( fIn == NULL ) || ( out32 == NULL ) || ( fOut == NULL ) )
+ {
+ printf( "!!!!Malloc failed!!!!" );
+ exit( 1 );
+ }
+ else
+ {
+ populate_input_interleave( in32, fIn, N );
+ }
+ fft3( fIn, fOut, N );
+ fft3_fx_ivas( in32, out32, N );
+ for ( int i = 0; i < N; i++ )
+ {
+ float OutRe = fOut[2 * i + 0];
+ float OutIm = fOut[2 * i + 1];
+ float dev_val_real = (float) ( fabs( OutRe - (float) out32[2 * i + 0] ) );
+ float dev_val_imag = (float) ( fabs( OutIm - (float) out32[2 * i + 1] ) );
+ if ( max_val_out < abs( out32[2 * i + 0] ) )
+ max_val_out = abs( out32[2 * i + 0] );
+ if ( max_val_out < abs( out32[2 * i + 1] ) )
+ max_val_out = abs( out32[2 * i + 1] );
+ if ( dev_val_real > max_deviation )
+ {
+ max_deviation = dev_val_real;
+ }
+ if ( dev_val_imag > max_deviation )
+ {
+ max_deviation = dev_val_imag;
+ }
+ }
+ free( in32 );
+ free( fIn );
+ free( out32 );
+ free( fOut );
+ break;
+ }
+ case TYPE_fft_ifft3:
+ {
+ Word32 *in32 = calloc( 2 * N, sizeof( in32[0] ) );
+ Word32 *out32 = calloc( 2 * N, sizeof( out32[0] ) );
+ float *fIn = calloc( 2 * N, sizeof( fIn[0] ) );
+ float *fOut = calloc( 2 * N, sizeof( fOut[0] ) );
+ if ( ( in32 == NULL ) || ( fIn == NULL ) || ( out32 == NULL ) || ( fOut == NULL ) )
+ {
+ printf( "!!!!Malloc failed!!!!" );
+ exit( 1 );
+ }
+ else
+ {
+ populate_input_interleave( in32, fIn, N );
+ }
+ ifft3( fIn, fOut, N );
+ ifft3_fx_ivas( in32, out32, N );
+ for ( int i = 0; i < N; i++ )
+ {
+ float OutRe = fOut[2 * i + 0];
+ float OutIm = fOut[2 * i + 1];
+ float dev_val_real = (float) ( fabs( OutRe - (float) out32[2 * i + 0] ) );
+ float dev_val_imag = (float) ( fabs( OutIm - (float) out32[2 * i + 1] ) );
+ if ( max_val_out < abs( out32[2 * i + 0] ) )
+ max_val_out = abs( out32[2 * i + 0] );
+ if ( max_val_out < abs( out32[2 * i + 1] ) )
+ max_val_out = abs( out32[2 * i + 1] );
+ if ( dev_val_real > max_deviation )
+ {
+ max_deviation = dev_val_real;
+ }
+ if ( dev_val_imag > max_deviation )
+ {
+ max_deviation = dev_val_imag;
+ }
+ }
+ free( in32 );
+ free( fIn );
+ free( out32 );
+ free( fOut );
+ break;
+ }
+ case TYPE_fft_cldfb:
+ {
+ Word32 *in32 = calloc( 2 * N, sizeof( in32[0] ) );
+ float *fIn = calloc( 2 * N, sizeof( fIn[0] ) );
+ if ( ( in32 == NULL ) || ( fIn == NULL ) )
+ {
+ printf( "!!!!Malloc failed!!!!" );
+ exit( 1 );
+ }
+ else
+ {
+ populate_input_interleave( in32, fIn, N );
+ }
+ fft_cldfb( fIn, N );
+ fft_cldfb_fx( in32, N );
+ for ( int i = 0; i < N; i++ )
+ {
+ float OutRe = fIn[2 * i + 0];
+ float OutIm = fIn[2 * i + 1];
+ float dev_val_real = (float) ( fabs( OutRe - (float) in32[2 * i + 0] ) );
+ float dev_val_imag = (float) ( fabs( OutIm - (float) in32[2 * i + 1] ) );
+ if ( max_val_out < abs( in32[2 * i + 0] ) )
+ max_val_out = abs( in32[2 * i + 0] );
+ if ( max_val_out < abs( in32[2 * i + 1] ) )
+ max_val_out = abs( in32[2 * i + 1] );
+ if ( dev_val_real > max_deviation )
+ max_deviation = dev_val_real;
+ if ( dev_val_imag > max_deviation )
+ max_deviation = dev_val_imag;
+ }
+ free( fIn );
+ free( in32 );
+ break;
+ }
+ case TYPE_fft_RFFTN:
+ {
+ Word32 *in32 = calloc( 2 * N, sizeof( in32[0] ) );
+ float *fIn = calloc( 2 * N, sizeof( fIn[0] ) );
+ if ( ( in32 == NULL ) || ( fIn == NULL ) )
+ {
+ printf( "!!!!Malloc failed!!!!" );
+ exit( 1 );
+ }
+ else
+ {
+ populate_input_interleave( in32, fIn, N );
+ }
+ RFFTN( fIn, fftSineTab640, N, 1 );
+ RFFTN_fx( in32, fftSineTab640_fx, N, 1 );
+ for ( int i = 0; i < N; i++ )
+ {
+ float OutRe = fIn[2 * i + 0];
+ float OutIm = fIn[2 * i + 1];
+ float dev_val_real = (float) ( fabs( OutRe - (float) in32[2 * i + 0] ) );
+ float dev_val_imag = (float) ( fabs( OutIm - (float) in32[2 * i + 1] ) );
+ if ( max_val_out < abs( in32[2 * i + 0] ) )
+ max_val_out = abs( in32[2 * i + 0] );
+ if ( max_val_out < abs( in32[2 * i + 1] ) )
+ max_val_out = abs( in32[2 * i + 1] );
+ if ( dev_val_real > max_deviation )
+ {
+ max_deviation = dev_val_real;
+ }
+ if ( dev_val_imag > max_deviation )
+ {
+ max_deviation = dev_val_imag;
+ }
+ }
+ free( in32 );
+ free( fIn );
+ break;
+ }
}
- else
+
+
+ return ( ( max_deviation / max_val_out ) * 100 );
+}
+
+static void test_main( Word16 *fft_lengths, Word16 num_lengths, Word16 test_type )
+{
+ char *test_name[] = { "DoFFT", "fft", "fft_rel", "fft_rel32", "ifft_rel32", "fft_cldfb", "fft_RFFTN", "fft_fft3", "fft_ifft3" };
+ Word32 num_tests = 0, num_passed = 0, num_failed = 0;
+ printf( "\033[0;33m" );
+ printf( "============== Starting %s tests ================\n", test_name[test_type] );
+ for ( int i = num_lengths - 1; i >= 0; i-- )
{
- printf("\033[0;31m");
- printf("[FAILED] FFT test of length %d Max_deviation = %.6f\n", fft_lengths[i], max_deviation);
- num_failed++;
+ float max_deviation = test_fixed_fft( fft_lengths[i], test_type );
+ num_tests++;
+ if ( max_deviation < ALLOWED_DEVIATION )
+ {
+ printf( "\033[0;32m" );
+ printf( "[PASSED] %s test of length %d\n", test_name[test_type], fft_lengths[i] );
+ num_passed++;
+ }
+ else
+ {
+ printf( "\033[0;31m" );
+ printf( "[FAILED] %s test of length %d Max_deviation = %.6f\n", test_name[test_type], fft_lengths[i], max_deviation );
+ num_failed++;
+ }
}
- }
- printf("\033[0;33m");
- printf("============== Completed fft tests ================\n\n");
- printf("Summary of FFT unit tests:\n");
- printf("--------------------------\n");
- printf("Total tests: %d\n", num_tests);
- printf("Passed: %d\n", num_passed);
- printf("\033[0;31m");
- printf("Failed: %d\n\n", num_failed);
- printf("\033[0m");
+ printf( "\033[0;33m" );
+ printf( "============== Completed %s tests ================\n\n", test_name[test_type] );
+ printf( "Summary of %s unit tests:\n", test_name[test_type] );
+ printf( "--------------------------\n" );
+ printf( "Total tests: %d\n", num_tests );
+ printf( "Passed: %d\n", num_passed );
+ printf( "\033[0;31m" );
+ printf( "Failed: %d\n\n", num_failed );
+ printf( "\033[0m" );
}
-void run_fft_unit_test(void)
-{
- Word16 fft_lengths[] = {600, 480, 400, 320, 256, 240, 200, 160, 128, 120, 100, 80, 64, 40, 20,};
- Word16 fft_lengths_2[] = {960, 640, 600, 480, 400, 320, 256, 240, 200, 160, 128, 120, 100, 80, 64, 40, 20};
- test_DoFFT(&fft_lengths[0], sizeof(fft_lengths) / sizeof(fft_lengths[0]));
- test_fft(&fft_lengths_2[0], sizeof(fft_lengths_2) / sizeof(fft_lengths_2[0]));
- return;
-}
\ No newline at end of file
+void run_fft_unit_test( void )
+{
+ Word16 DoFFT_lengths[] = {
+ 600,
+ 480,
+ 400,
+ 320,
+ 256,
+ 240,
+ 200,
+ 160,
+ 128,
+ 120,
+ 100,
+ 80,
+ 64,
+ 40,
+ 20,
+ };
+ Word16 fft_lengths[] = { 960, 640, 600, 480, 400, 320, 256, 240, 200, 160, 128, 120, 100, 80, 64, 40, 20 };
+ Word16 fft_rel_lengths[] = { 128, 256, 512 };
+ Word16 fft_cldfb_lengths[] = { 5, 8, 10, 16, 20, 30 };
+ Word16 fft_RFFTN_lengths[] = { 640, 512 };
+ Word16 fft_fft3_lengths[] = { 1536, 384 };
+ test_main( &DoFFT_lengths[0], sizeof( DoFFT_lengths ) / sizeof( DoFFT_lengths[0] ), TYPE_DoFFT );
+ test_main( &fft_lengths[0], sizeof( fft_lengths ) / sizeof( fft_lengths[0] ), TYPE_fft );
+ test_main( &fft_rel_lengths[0], sizeof( fft_rel_lengths ) / sizeof( fft_rel_lengths[0] ), TYPE_fft_rel );
+ test_main( &fft_rel_lengths[0], sizeof( fft_rel_lengths ) / sizeof( fft_rel_lengths[0] ), TYPE_fft_rel32 );
+ test_main( &fft_rel_lengths[0], sizeof( fft_rel_lengths ) / sizeof( fft_rel_lengths[0] ), TYPE_ifft_rel32 );
+ test_main( &fft_cldfb_lengths[0], sizeof( fft_cldfb_lengths ) / sizeof( fft_cldfb_lengths[0] ), TYPE_fft_cldfb );
+ test_main( &fft_RFFTN_lengths[0], sizeof( fft_RFFTN_lengths ) / sizeof( fft_RFFTN_lengths[0] ), TYPE_fft_RFFTN );
+ test_main( &fft_fft3_lengths[0], sizeof( fft_fft3_lengths ) / sizeof( fft_fft3_lengths[0] ), TYPE_fft_fft3 );
+ test_main( &fft_fft3_lengths[0], sizeof( fft_fft3_lengths ) / sizeof( fft_fft3_lengths[0] ), TYPE_fft_ifft3 );
+ return;
+}
diff --git a/lib_util/test_mdct.c b/lib_util/test_mdct.c
new file mode 100644
index 0000000000000000000000000000000000000000..13ada376a4ca1c5133f4d238baee07c65fd5d45d
--- /dev/null
+++ b/lib_util/test_mdct.c
@@ -0,0 +1,222 @@
+/******************************************************************************************************
+
+ (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
+ Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+ Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+ Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+ contributors to this repository. All Rights Reserved.
+
+ This software is protected by copyright law and by international treaties.
+ The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
+ Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
+ Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
+ Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
+ contributors to this repository retain full ownership rights in their respective contributions in
+ the software. This notice grants no license of any kind, including but not limited to patent
+ license, nor is any license granted by implication, estoppel or otherwise.
+
+ Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
+ contributions.
+
+ This software is provided "AS IS", without any express or implied warranties. The software is in the
+ development stage. It is intended exclusively for experts who have experience with such software and
+ solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
+ and fitness for a particular purpose are hereby disclaimed and excluded.
+
+ Any dispute, controversy or claim arising under or in relation to providing this software shall be
+ submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
+ accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
+ the United Nations Convention on Contracts on the International Sales of Goods.
+
+*******************************************************************************************************/
+
+
+#include
+#include
+#include
+#include
+#include
+
+typedef int Word32;
+typedef short Word16;
+typedef short int16_t;
+
+#define TYPE_MDCT (0)
+#define TYPE_IMDCT (1)
+#define TYPE_EDCT2 (2)
+#define TYPE_NEG_EDCT2 (3)
+
+#define ALLOWED_DEVIATION (0.05)
+#define Q31 (2147483647.0f)
+
+extern const int16_t ip_edct2_64[6];
+extern const float w_edct2_64[80];
+extern const Word16 w_edct2_64_fx[80];
+
+void edct2(
+ const int16_t n,
+ const int16_t isgn,
+ float *in,
+ float *a,
+ const int16_t *ip,
+ const float *w );
+
+void edct2_fx(
+ const Word16 n,
+ const Word16 isgn,
+ Word16 *in,
+ Word32 *a,
+ Word16 *q,
+ const Word16 *ip,
+ const Word16 *w);
+
+void ivas_mdct(
+ const float *pIn,
+ float *pOut,
+ const int16_t length );
+
+void ivas_mdct_fx(
+ const Word32 *pIn,
+ Word32 *pOut,
+ const Word16 length,
+ Word16 *q_out);
+
+void ivas_imdct(
+ const float *pIn,
+ float *pOut,
+ const int16_t length );
+
+void ivas_imdct_fx(
+ const Word32 *pIn,
+ Word32 *pOut,
+ const Word16 length,
+ Word16 *q_out);
+
+void populate_input_interleave(Word32 *in32, float *fIn, Word16 N);
+void populate_input_interleave_16(Word16 *in16, float *fIn, Word16 N);
+
+static float test_ivas_mdct_imdct(Word16 N, Word32 test_type)
+{
+ Word32 *in = calloc(2 * N, sizeof(in[0]));
+ Word16 *in16 = calloc(2 * N, sizeof(in16[0]));
+ float *fIn = calloc(2 * N, sizeof(fIn[0]));
+ Word32 *out = calloc(2 * N, sizeof(out[0]));
+ float *fOut = calloc(2 * N, sizeof(fOut[0]));
+ float max_deviation = 0.0f;
+ Word32 max_val_out = 0;
+ Word16 q_out = 0;
+ float div_fac = 1.0f;
+
+ if ((in == NULL) || (fIn == NULL) || (out == NULL) || (fOut == NULL))
+ {
+ printf("!!!!Malloc failed!!!!");
+ exit(1);
+ }
+
+ switch(test_type)
+ {
+ case (TYPE_MDCT):
+ {
+ populate_input_interleave(in, fIn, N);
+ ivas_mdct(fIn, fOut, N);
+ ivas_mdct_fx(in, out, N, &q_out);
+ break;
+ }
+ case (TYPE_IMDCT):
+ {
+ populate_input_interleave(in, fIn, N);
+ ivas_imdct(fIn, fOut, N);
+ ivas_imdct_fx(in, out, N, &q_out);
+ break;
+ }
+ case (TYPE_EDCT2):
+ {
+ populate_input_interleave_16(in16, fIn, N);
+ edct2(64, 1, fIn, fOut, ip_edct2_64, w_edct2_64);
+ edct2_fx(64, 1, in16, out, &q_out, ip_edct2_64, w_edct2_64_fx);
+ break;
+ }
+ case (TYPE_NEG_EDCT2):
+ {
+ populate_input_interleave_16(in16, fIn, N);
+ edct2(64, -1, fIn, fOut, ip_edct2_64, w_edct2_64);
+ edct2_fx(64, -1, in16, out, &q_out, ip_edct2_64, w_edct2_64_fx);
+ break;
+ }
+ }
+ if (q_out >= 0)
+ {
+ div_fac = (float)1.0f / (float)(1 << q_out);
+ }
+ else
+ {
+ div_fac = (float)(1 << (-q_out));
+ }
+
+ for(int i = 0; i < (N >> 1); i++)
+ {
+ float OutRe = fOut[2 * i + 0];
+ float OutIm = fOut[2 * i + 1];
+ float dev_val_real = (float)(fabs((OutRe / div_fac) - (float)(out[2 * i + 0])));
+ float dev_val_imag = (float)(fabs((OutIm / div_fac) - (float)(out[2 * i + 1])));
+ if (max_val_out < abs(out[2 * i + 0]))
+ max_val_out = abs(out[2 * i + 0]);
+ if (max_val_out < abs(out[2 * i + 1]))
+ max_val_out = abs(out[2 * i + 1]);
+ if (dev_val_real > max_deviation)
+ max_deviation = dev_val_real;
+ if (dev_val_imag > max_deviation)
+ max_deviation = dev_val_imag;
+ }
+ free( in );
+ free( fIn );
+ free( out );
+ free( fOut );
+ return (( max_deviation / max_val_out) * 100);
+}
+
+static void test_main(Word16 *imdct_lengths, Word16 num_lengths, Word16 test_type)
+{
+ char *test_name[]= {"IVAS MDCT", "IVAS IMDCT", "IVAS EDCT2", "IVAS NEG_EDCT2"};
+ Word32 num_tests = 0, num_passed = 0, num_failed = 0;
+ printf("\033[0;33m");
+ printf("============== Starting %s tests ================\n", test_name[test_type]);
+ for ( int i = num_lengths - 1; i >=0 ; i-- )
+ {
+ float max_deviation = test_ivas_mdct_imdct( imdct_lengths[i], test_type);
+ num_tests++;
+ if (max_deviation < ALLOWED_DEVIATION)
+ {
+ printf("\033[0;32m");
+ printf("[PASSED] %s test of length %d\n", test_name[test_type], imdct_lengths[i]);
+ num_passed++;
+ }
+ else
+ {
+ printf("\033[0;31m");
+ printf("[FAILED] %s test of length %d Max_deviation = %.6f\n", test_name[test_type], imdct_lengths[i], max_deviation);
+ num_failed++;
+ }
+ }
+ printf("\033[0;33m");
+ printf("============== Completed %s tests ================\n\n", test_name[test_type]);
+ printf("Summary of %s unit tests:\n", test_name[test_type]);
+ printf("--------------------------\n");
+ printf("Total tests: %d\n", num_tests);
+ printf("Passed: %d\n", num_passed);
+ printf("\033[0;31m");
+ printf("Failed: %d\n\n", num_failed);
+ printf("\033[0m");
+}
+
+void run_mdct_unit_test(void)
+{
+ Word16 ivas_mdct_lengths[] = {480, 320, 160,};
+ Word16 ivas_imdct_lengths[] = {480, 320, 160, 80,};
+ Word16 edct2_lengths[] = {64};
+ test_main(&ivas_mdct_lengths[0], sizeof(ivas_mdct_lengths) / sizeof(ivas_mdct_lengths[0]), TYPE_MDCT);
+ test_main(&ivas_imdct_lengths[0], sizeof(ivas_imdct_lengths) / sizeof(ivas_imdct_lengths[0]), TYPE_IMDCT);
+ test_main(&edct2_lengths[0], 1, TYPE_EDCT2);
+ test_main(&edct2_lengths[0], 1, TYPE_NEG_EDCT2);
+ return;
+}
\ No newline at end of file