From 1f3c7e5fe2173e45d80e2eda11fccf3b0c0462b7 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Thu, 21 Dec 2023 21:55:01 +0530 Subject: [PATCH] ivas mdct, imdct, rfft, fft3, ifft3 updates Significance: ============= [x] Changes pertaining to MDCT/IMDCT fixed point conversion [x] Added new unit tests under FFT - includes testing of fft_rel function [x] The version of fft_rel_fx taken from EVS fails conformance [x] Two alternate versions of fft_rel_fx are been created version 1: By extending 16-bit input of evs fft_rel_fx to 32-bit. This fails Conformance for 512 point FFT. version 2: Through fixed point conversion of fft_rel function in IVAS This passes conformance for all lengths supported by fft_rel. [x] Converted ifft_rel to fixed point as well. [x] Input scaling is needed sometimes for the random input generated as the input values generated are at times very small causing relative error in output to exceed 0.005% (threshold for unit tests currently) [x] Changed real basops to complex basops in few FFT functions. Testing: ======== [x] Unit tests of FFT and MDCT Known Issues: ============= [x] Unit test of RFFTN for 640 length is failing with random input. --- Workspace_msvc/decoder.vcxproj | 1 + Workspace_msvc/lib_com.vcxproj | 3 + Workspace_msvc/lib_com.vcxproj.filters | 3 + apps/decoder.c | 2 + lib_com/fft_cldfb_fx.c | 1070 ++++++ lib_com/fft_fx.c | 4589 ++++++++++-------------- lib_com/fft_rel.c | 410 ++- lib_com/ifft_rel.c | 606 ++++ lib_com/ivas_mdct_imdct_fx.c | 399 ++ lib_com/ivas_rom_com_fx.c | 199 + lib_com/ivas_rom_com_fx.h | 52 + lib_com/prot_fx2.h | 19 + lib_com/rom_com.c | 131 + lib_com/rom_com.h | 1 + lib_com/rom_com_fx.c | 16 + lib_util/test_fft.c | 877 ++++- lib_util/test_mdct.c | 222 ++ 17 files changed, 5838 insertions(+), 2762 deletions(-) create mode 100644 lib_com/fft_cldfb_fx.c create mode 100644 lib_com/ivas_mdct_imdct_fx.c create mode 100644 lib_com/ivas_rom_com_fx.c create mode 100644 lib_com/ivas_rom_com_fx.h create mode 100644 lib_util/test_mdct.c diff --git a/Workspace_msvc/decoder.vcxproj b/Workspace_msvc/decoder.vcxproj index 98827b70c..ca0d96f44 100644 --- a/Workspace_msvc/decoder.vcxproj +++ b/Workspace_msvc/decoder.vcxproj @@ -150,6 +150,7 @@ + diff --git a/Workspace_msvc/lib_com.vcxproj b/Workspace_msvc/lib_com.vcxproj index eeb61b2d9..af6c1ed7b 100644 --- a/Workspace_msvc/lib_com.vcxproj +++ b/Workspace_msvc/lib_com.vcxproj @@ -179,6 +179,7 @@ + @@ -243,12 +244,14 @@ + + diff --git a/Workspace_msvc/lib_com.vcxproj.filters b/Workspace_msvc/lib_com.vcxproj.filters index 8eac6b783..4c72cb99e 100644 --- a/Workspace_msvc/lib_com.vcxproj.filters +++ b/Workspace_msvc/lib_com.vcxproj.filters @@ -451,6 +451,9 @@ common_ivas_c + + common_ivas_c + common_ivas_c diff --git a/apps/decoder.c b/apps/decoder.c index dbbda4362..a5b1949b0 100644 --- a/apps/decoder.c +++ b/apps/decoder.c @@ -130,6 +130,7 @@ static void usage_dec( void ); static ivas_error decodeG192( DecArguments arg, BS_READER_HANDLE hBsReader, RotFileReader *headRotReader, RotFileReader *externalOrientationFileReader, RotFileReader *refRotReader, Vector3PairFileReader *referenceVectorReader, IVAS_DEC_HANDLE hIvasDec, int16_t *pcmBuf ); static ivas_error decodeVoIP( DecArguments arg, BS_READER_HANDLE hBsReader, RotFileReader *headRotReader, RotFileReader *externalOrientationFileReader, RotFileReader *refRotReader, Vector3PairFileReader *referenceVectorReader, IVAS_DEC_HANDLE hIvasDec ); void run_fft_unit_test(void); +void run_mdct_unit_test(void); /*------------------------------------------------------------------------------------------* * main() @@ -173,6 +174,7 @@ int main( if (run_unit_tests) { run_fft_unit_test(); + run_mdct_unit_test(); return; } diff --git a/lib_com/fft_cldfb_fx.c b/lib_com/fft_cldfb_fx.c new file mode 100644 index 000000000..0aa46d605 --- /dev/null +++ b/lib_com/fft_cldfb_fx.c @@ -0,0 +1,1070 @@ +/****************************************************************************************************** + + (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, + Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., + Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, + Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other + contributors to this repository. All Rights Reserved. + + This software is protected by copyright law and by international treaties. + The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, + Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., + Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, + Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other + contributors to this repository retain full ownership rights in their respective contributions in + the software. This notice grants no license of any kind, including but not limited to patent + license, nor is any license granted by implication, estoppel or otherwise. + + Contributors are required to enter into the IVAS codec Public Collaboration agreement before making + contributions. + + This software is provided "AS IS", without any express or implied warranties. The software is in the + development stage. It is intended exclusively for experts who have experience with such software and + solely for the purpose of inspection. All implied warranties of non-infringement, merchantability + and fitness for a particular purpose are hereby disclaimed and excluded. + + Any dispute, controversy or claim arising under or in relation to providing this software shall be + submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in + accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and + the United Nations Convention on Contracts on the International Sales of Goods. + +*******************************************************************************************************/ + +/*==================================================================================== + EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0 + ====================================================================================*/ + +#include +#include "options.h" +#include +#include "prot.h" +#include "ivas_cnst.h" +#include "wmc_auto.h" +#include "basop_util.h" +#include "complex_basop.h" + +#define Mpy_32_xx Mpy_32_16_1 + +#define FFTC(x) WORD322WORD16((Word32)x) + +#define C31 (FFTC(0x91261468)) /* FL2WORD32( -0.86602540) -sqrt(3)/2 */ + +#define C51 (FFTC(0x79bc3854)) /* FL2WORD32( 0.95105652) */ +#define C52 (FFTC(0x9d839db0)) /* FL2WORD32(-1.53884180/2) */ +#define C53 (FFTC(0xd18053ce)) /* FL2WORD32(-0.36327126) */ +#define C54 (FFTC(0x478dde64)) /* FL2WORD32( 0.55901699) */ +#define C55 (FFTC(0xb0000001)) /* FL2WORD32(-1.25/2) */ + +#define C81 (FFTC(0x5a82799a)) /* FL2WORD32( 7.071067811865475e-1) */ +#define C82 (FFTC(0xa57d8666)) /* FL2WORD32(-7.071067811865475e-1) */ + +#define C161 (FFTC(0x5a82799a)) /* FL2WORD32( 7.071067811865475e-1) INV_SQRT2 */ +#define C162 (FFTC(0xa57d8666)) /* FL2WORD32(-7.071067811865475e-1) -INV_SQRT2 */ + +#define C163 (FFTC(0x7641af3d)) /* FL2WORD32( 9.238795325112867e-1) COS_PI_DIV8 */ +#define C164 (FFTC(0x89be50c3)) /* FL2WORD32(-9.238795325112867e-1) -COS_PI_DIV8 */ + +#define C165 (FFTC(0x30fbc54d)) /* FL2WORD32( 3.826834323650898e-1) COS_3PI_DIV8 */ +#define C166 (FFTC(0xcf043ab3)) /* FL2WORD32(-3.826834323650898e-1) -COS_3PI_DIV8 */ + + +#define cplxMpy4_8_0(re,im,a,b,c,d) re = L_shr(L_sub(Mpy_32_xx(a,c),Mpy_32_xx(b,d)),1); \ + im = L_shr(L_add(Mpy_32_xx(a,d),Mpy_32_xx(b,c)),1); + +#define cplxMpy4_8_1(re,im,a,b) re = L_shr(a,1); \ + im = L_shr(b,1); + +void fft16_with_cmplx_data(cmplx *pInp, Word16 bsacle); + + +#if 0 +#define SCALEFACTOR5 ( 4) +#define SCALEFACTOR8 ( 4) +#define SCALEFACTOR10 ( 5) +#define SCALEFACTOR16 ( 5) +#define SCALEFACTOR20 ( 5) +#define SCALEFACTOR30 ( 6) +#define SCALEFACTOR30_1 ( 5) +#define SCALEFACTOR30_2 ( 1) +#else +#undef SCALEFACTOR5 +#undef SCALEFACTOR8 +#undef SCALEFACTOR10 +#undef SCALEFACTOR16 +#undef SCALEFACTOR20 +#undef SCALEFACTOR30 +#undef SCALEFACTOR30_1 +#undef SCALEFACTOR30_2 + +#define SCALEFACTOR5 ( 0) +#define SCALEFACTOR8 ( 0) +#define SCALEFACTOR10 ( 0) +#define SCALEFACTOR16 ( 0) +#define SCALEFACTOR20 ( 0) +#define SCALEFACTOR30 ( 0) +#define SCALEFACTOR30_1 ( 0) +#define SCALEFACTOR30_2 ( 0) +#endif + +cmplx CL_scale_t(cmplx x, Word16 y); +cmplx CL_dscale_t(cmplx x, Word16 y1, Word16 y2); + +/** + * \brief Function performs a complex 8-point FFT + * The FFT is performed inplace. The result of the FFT + * is scaled by SCALEFACTOR8 bits. + * + * WOPS with 32x16 bit multiplications: 108 cycles + * + * \param [i/o] re real input / output + * \param [i/o] im imag input / output + * \param [i ] s stride real and imag input / output + * + * \return void + */ +static void fft8_with_cmplx_data(cmplx *inp) +{ + cmplx x0, x1, x2, x3, x4, x5, x6, x7; + cmplx s0, s1, s2, s3, s4, s5, s6, s7; + cmplx t0, t1, t2, t3, t4, t5, t6, t7; + + /* Pre-additions */ + x0 = CL_shr(inp[0], SCALEFACTOR8); + x1 = CL_shr(inp[1], SCALEFACTOR8); + x2 = CL_shr(inp[2], SCALEFACTOR8); + x3 = CL_shr(inp[3], SCALEFACTOR8); + x4 = CL_shr(inp[4], SCALEFACTOR8); + x5 = CL_shr(inp[5], SCALEFACTOR8); + x6 = CL_shr(inp[6], SCALEFACTOR8); + x7 = CL_shr(inp[7], SCALEFACTOR8); + + /* loops are unrolled */ + { + t0 = CL_add(x0,x4); + t1 = CL_sub(x0,x4); + + t2 = CL_add(x1,x5); + t3 = CL_sub(x1,x5); + + t4 = CL_add(x2,x6); + t5 = CL_sub(x2,x6); + + t6 = CL_add(x3,x7); + t7 = CL_sub(x3,x7); + } + + /* Pre-additions and core multiplications */ + + s0 = CL_add(t0, t4); + s2 = CL_sub(t0, t4); + + s4 = CL_mac_j(t1, t5); + s5 = CL_msu_j(t1, t5); + + s1 = CL_add(t2, t6); + s3 = CL_sub(t2, t6); + s3 = CL_mul_j(s3); + + t0 = CL_add(t3, t7); + t1 = CL_sub(t3, t7); + + s6 = CL_scale_t(CL_msu_j(t1, t0), C81); + s7 = CL_dscale_t(CL_swap_real_imag(CL_msu_j(t0, t1)), C81, C82); + + /* Post-additions */ + + inp[0] = CL_add(s0, s1); + inp[4] = CL_sub(s0, s1); + + inp[2] = CL_sub(s2, s3); + inp[6] = CL_add(s2, s3); + + inp[3] = CL_add(s4, s7); + inp[7] = CL_sub(s4, s7); + + inp[1] = CL_add(s5, s6); + inp[5] = CL_sub(s5, s6); +#if (WMOPS) + multiCounter[currCounter].CL_move += 8; +#endif +} + +/** + * \brief Function performs a complex 5-point FFT + * The FFT is performed inplace. The result of the FFT + * is scaled by SCALEFACTOR5 bits. + * + * WOPS with 32x16 bit multiplications: 88 cycles + * + * \param [i/o] re real input / output + * \param [i/o] im imag input / output + * \param [i ] s stride real and imag input / output + * + * \return void + */ +static void fft5_with_cmplx_data(cmplx *inp) +{ + cmplx x0,x1,x2,x3,x4; + cmplx y1,y2,y3,y4; + cmplx t; + + x0 = CL_shr(inp[0],SCALEFACTOR5); + x1 = CL_shr(inp[1],SCALEFACTOR5); + x2 = CL_shr(inp[2],SCALEFACTOR5); + x3 = CL_shr(inp[3],SCALEFACTOR5); + x4 = CL_shr(inp[4],SCALEFACTOR5); + + y1 = CL_add(x1,x4); + y4 = CL_sub(x1,x4); + y3 = CL_add(x2,x3); + y2 = CL_sub(x2,x3); + t = CL_scale_t(CL_sub(y1,y3),C54); + y1 = CL_add(y1,y3); + inp[0] = CL_add(x0,y1); + + /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of + the values as fracts */ + y1 = CL_add(inp[0],(CL_shl(CL_scale_t(y1,C55),1))); + y3 = CL_sub(y1,t); + y1 = CL_add(y1,t); + + t = CL_scale_t(CL_add(y4,y2),C51); + /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of + the values as fracts */ + y4 = CL_add(t,CL_shl(CL_scale_t(y4, C52),1)); + y2 = CL_add(t,CL_scale_t(y2,C53)); + + + /* combination */ + inp[1] = CL_msu_j(y1,y2); + inp[4] = CL_mac_j(y1,y2); + + inp[2] = CL_mac_j(y3,y4); + inp[3] = CL_msu_j(y3,y4); + +#if (WMOPS) + multiCounter[currCounter].CL_move += 5; +#endif + +} + +/** + * \brief Function performs a complex 10-point FFT + * The FFT is performed inplace. The result of the FFT + * is scaled by SCALEFACTOR10 bits. + * + * WOPS with 32x16 bit multiplications: 196 cycles + * + * \param [i/o] re real input / output + * \param [i/o] im imag input / output + * \param [i ] s stride real and imag input / output + * + * \return void + */ +static void fft10_with_cmplx_data(cmplx *inp_data) +{ + cmplx r1,r2,r3,r4; + cmplx x0,x1,x2,x3,x4,t; + cmplx y[10]; + + /* FOR i=0 */ + { + x0 = CL_shr(inp_data[0],SCALEFACTOR10); + x1 = CL_shr(inp_data[2],SCALEFACTOR10); + x2 = CL_shr(inp_data[4],SCALEFACTOR10); + x3 = CL_shr(inp_data[6],SCALEFACTOR10); + x4 = CL_shr(inp_data[8],SCALEFACTOR10); + + r1 = CL_add(x3,x2); + r4 = CL_sub(x3,x2); + r3 = CL_add(x1,x4); + r2 = CL_sub(x1,x4); + t = CL_scale_t(CL_sub(r1,r3),C54); + r1 = CL_add(r1,r3); + y[0] = CL_add(x0,r1); + r1 = CL_add(y[0],(CL_shl(CL_scale_t(r1,C55),1))); + r3 = CL_sub(r1,t); + r1 = CL_add(r1,t); + t = CL_scale_t((CL_add(r4,r2)),C51); + r4 = CL_add(t,CL_shl(CL_scale_t(r4, C52),1)); + r2 = CL_add(t,CL_scale_t(r2,C53)); + + + y[2] = CL_msu_j(r1,r2); + y[8] = CL_mac_j(r1,r2); + y[4] = CL_mac_j(r3,r4); + y[6] = CL_msu_j(r3,r4); + } + /* FOR i=1 */ + { + x0 = CL_shr(inp_data[5],SCALEFACTOR10); + x1 = CL_shr(inp_data[1],SCALEFACTOR10); + x2 = CL_shr(inp_data[3],SCALEFACTOR10); + x3 = CL_shr(inp_data[7],SCALEFACTOR10); + x4 = CL_shr(inp_data[9],SCALEFACTOR10); + + r1 = CL_add(x1,x4); + r4 = CL_sub(x1,x4); + r3 = CL_add(x3,x2); + r2 = CL_sub(x3,x2); + t = CL_scale_t(CL_sub(r1,r3),C54); + r1 = CL_add(r1,r3); + y[1] = CL_add(x0,r1); + r1 = CL_add(y[1],(CL_shl(CL_scale_t(r1,C55),1))); + r3 = CL_sub(r1,t); + r1 = CL_add(r1,t); + t = CL_scale_t((CL_add(r4,r2)),C51); + r4 = CL_add(t,CL_shl(CL_scale_t(r4, C52),1)); + r2 = CL_add(t,CL_scale_t(r2,C53)); + + + y[3] = CL_msu_j(r1,r2); + y[9] = CL_mac_j(r1,r2); + y[5] = CL_mac_j(r3,r4); + y[7] = CL_msu_j(r3,r4); + } + + /* FOR i=0 */ + { + inp_data[0] = CL_add(y[0],y[1]); + inp_data[5] = CL_sub(y[0],y[1]); + } + /* FOR i=2 */ + { + inp_data[2] = CL_add(y[2],y[3]); + inp_data[7] = CL_sub(y[2],y[3]); + } + /* FOR i=4 */ + { + inp_data[4] = CL_add(y[4],y[5]); + inp_data[9] = CL_sub(y[4],y[5]); + } + /* FOR i=6 */ + { + inp_data[6] = CL_add(y[6],y[7]); + inp_data[1] = CL_sub(y[6],y[7]); + } + /* FOR i=8 */ + { + inp_data[8] = CL_add(y[8],y[9]); + inp_data[3] = CL_sub(y[8],y[9]); + } + +#if (WMOPS) + multiCounter[currCounter].CL_move += 10; +#endif + +} + +/** + * \brief Function performs a complex 20-point FFT + * The FFT is performed inplace. The result of the FFT + * is scaled by SCALEFACTOR20 bits. + * + * WOPS with 32x16 bit multiplications: 432 cycles + * + * \param [i/o] re real input / output + * \param [i/o] im imag input / output + * \param [i ] s stride real and imag input / output + * + * \return void + */ +static void fft20_with_cmplx_data(cmplx *inp_data) +{ + cmplx r1,r2,r3,r4; + cmplx x0,x1,x2,x3,x4; + cmplx t,t0,t1,t2,t3; + cmplx y[20]; + cmplx *y0, *y1,*y2,*y3,*y4; + + y0 = y; + y1 = &y[4]; + y2 = &y[16]; + y3 = &y[8]; + y4 = &y[12]; + + { + x0 = CL_shr(inp_data[0],SCALEFACTOR20); + x1 = CL_shr(inp_data[16],SCALEFACTOR20); + x2 = CL_shr(inp_data[12],SCALEFACTOR20); + x3 = CL_shr(inp_data[8],SCALEFACTOR20); + x4 = CL_shr(inp_data[4],SCALEFACTOR20); + + r4 = CL_sub(x1,x4); + r2 = CL_sub(x2,x3); + r1 = CL_add(x1,x4); + r3 = CL_add(x2,x3); + t = CL_scale_t(CL_sub(r1,r3),C54); + r1 = CL_add(r1,r3); + y0[0] = CL_add(x0,r1); + r1 = CL_add(y0[0],(CL_shl(CL_scale_t(r1,C55),1))); + r3 = CL_sub(r1,t); + r1 = CL_add(r1,t); + t = CL_scale_t((CL_add(r4,r2)),C51); + r4 = CL_add(t,CL_shl(CL_scale_t(r4, C52),1)); + r2 = CL_add(t,CL_scale_t(r2,C53)); + + + y1[0] = CL_msu_j(r1,r2); + y2[0] = CL_mac_j(r1,r2); + y3[0] = CL_mac_j(r3,r4); + y4[0] = CL_msu_j(r3,r4); + } + { + x0 = CL_shr(inp_data[5],SCALEFACTOR20); + x1 = CL_shr(inp_data[1],SCALEFACTOR20); + x2 = CL_shr(inp_data[17],SCALEFACTOR20); + x3 = CL_shr(inp_data[13],SCALEFACTOR20); + x4 = CL_shr(inp_data[9],SCALEFACTOR20); + + r4 = CL_sub(x1,x4); + r2 = CL_sub(x2,x3); + r1 = CL_add(x1,x4); + r3 = CL_add(x2,x3); + t = CL_scale_t(CL_sub(r1,r3),C54); + r1 = CL_add(r1,r3); + y0[1] = CL_add(x0,r1); + r1 = CL_add(y0[1],(CL_shl(CL_scale_t(r1,C55),1))); + r3 = CL_sub(r1,t); + r1 = CL_add(r1,t); + t = CL_scale_t((CL_add(r4,r2)),C51); + r4 = CL_add(t,CL_shl(CL_scale_t(r4, C52),1)); + r2 = CL_add(t,CL_scale_t(r2,C53)); + + + y1[1] = CL_msu_j(r1,r2); + y2[1] = CL_mac_j(r1,r2); + y3[1] = CL_mac_j(r3,r4); + y4[1] = CL_msu_j(r3,r4); + } + { + x0 = CL_shr(inp_data[10],SCALEFACTOR20); + x1 = CL_shr(inp_data[6],SCALEFACTOR20); + x2 = CL_shr(inp_data[2],SCALEFACTOR20); + x3 = CL_shr(inp_data[18],SCALEFACTOR20); + x4 = CL_shr(inp_data[14],SCALEFACTOR20); + + r4 = CL_sub(x1,x4); + r2 = CL_sub(x2,x3); + r1 = CL_add(x1,x4); + r3 = CL_add(x2,x3); + t = CL_scale_t(CL_sub(r1,r3),C54); + r1 = CL_add(r1,r3); + y0[2] = CL_add(x0,r1); + r1 = CL_add(y0[2],(CL_shl(CL_scale_t(r1,C55),1))); + r3 = CL_sub(r1,t); + r1 = CL_add(r1,t); + t = CL_scale_t((CL_add(r4,r2)),C51); + r4 = CL_add(t,CL_shl(CL_scale_t(r4, C52),1)); + r2 = CL_add(t,CL_scale_t(r2,C53)); + + + y1[2] = CL_msu_j(r1,r2); + y2[2] = CL_mac_j(r1,r2); + y3[2] = CL_mac_j(r3,r4); + y4[2] = CL_msu_j(r3,r4); + } + { + x0 = CL_shr(inp_data[15],SCALEFACTOR20); + x1 = CL_shr(inp_data[11],SCALEFACTOR20); + x2 = CL_shr(inp_data[7],SCALEFACTOR20); + x3 = CL_shr(inp_data[3],SCALEFACTOR20); + x4 = CL_shr(inp_data[19],SCALEFACTOR20); + + r4 = CL_sub(x1,x4); + r2 = CL_sub(x2,x3); + r1 = CL_add(x1,x4); + r3 = CL_add(x2,x3); + t = CL_scale_t(CL_sub(r1,r3),C54); + r1 = CL_add(r1,r3); + y0[3] = CL_add(x0,r1); + r1 = CL_add(y0[3],(CL_shl(CL_scale_t(r1,C55),1))); + r3 = CL_sub(r1,t); + r1 = CL_add(r1,t); + t = CL_scale_t((CL_add(r4,r2)),C51); + r4 = CL_add(t,CL_shl(CL_scale_t(r4, C52),1)); + r2 = CL_add(t,CL_scale_t(r2,C53)); + + + y1[3] = CL_msu_j(r1,r2); + y2[3] = CL_mac_j(r1,r2); + y3[3] = CL_mac_j(r3,r4); + y4[3] = CL_msu_j(r3,r4); + } + + { + cmplx * ptr_y = y; + { + cmplx Cy0, Cy1, Cy2, Cy3; + + Cy0 = *ptr_y++; + Cy1 = *ptr_y++; + Cy2 = *ptr_y++; + Cy3 = *ptr_y++; + + /* Pre-additions */ + t0 = CL_add(Cy0,Cy2); + t1 = CL_sub(Cy0,Cy2); + t2 = CL_add(Cy1,Cy3); + t3 = CL_sub(Cy1,Cy3); + + + inp_data[0] = CL_add(t0,t2); + inp_data[5] = CL_msu_j(t1,t3); + inp_data[10] = CL_sub(t0,t2); + inp_data[15] = CL_mac_j(t1,t3); + } + + { + cmplx Cy0, Cy1, Cy2, Cy3; + + Cy0 = *ptr_y++; + Cy1 = *ptr_y++; + Cy2 = *ptr_y++; + Cy3 = *ptr_y++; + + /* Pre-additions */ + t0 = CL_add(Cy0,Cy2); + t1 = CL_sub(Cy0,Cy2); + t2 = CL_add(Cy1,Cy3); + t3 = CL_sub(Cy1,Cy3); + + + inp_data[4] = CL_add(t0,t2); + inp_data[9] = CL_msu_j(t1,t3); + inp_data[14] = CL_sub(t0,t2); + inp_data[19] = CL_mac_j(t1,t3); + } + + { + cmplx Cy0, Cy1, Cy2, Cy3; + + Cy0 = *ptr_y++; + Cy1 = *ptr_y++; + Cy2 = *ptr_y++; + Cy3 = *ptr_y++; + + /* Pre-additions */ + t0 = CL_add(Cy0,Cy2); + t1 = CL_sub(Cy0,Cy2); + t2 = CL_add(Cy1,Cy3); + t3 = CL_sub(Cy1,Cy3); + + + inp_data[8] = CL_add(t0,t2); + inp_data[13] = CL_msu_j(t1,t3); + inp_data[18] = CL_sub(t0,t2); + inp_data[3] = CL_mac_j(t1,t3); + } + + { + cmplx Cy0, Cy1, Cy2, Cy3; + + Cy0 = *ptr_y++; + Cy1 = *ptr_y++; + Cy2 = *ptr_y++; + Cy3 = *ptr_y++; + + /* Pre-additions */ + t0 = CL_add(Cy0,Cy2); + t1 = CL_sub(Cy0,Cy2); + t2 = CL_add(Cy1,Cy3); + t3 = CL_sub(Cy1,Cy3); + + inp_data[12] = CL_add(t0,t2); + inp_data[17] = CL_msu_j(t1,t3); + inp_data[2] = CL_sub(t0,t2); + inp_data[7] = CL_mac_j(t1,t3); + } + + { + cmplx Cy0, Cy1, Cy2, Cy3; + + Cy0 = *ptr_y++; + Cy1 = *ptr_y++; + Cy2 = *ptr_y++; + Cy3 = *ptr_y++; + + /* Pre-additions */ + t0 = CL_add(Cy0,Cy2); + t1 = CL_sub(Cy0,Cy2); + t2 = CL_add(Cy1,Cy3); + t3 = CL_sub(Cy1,Cy3); + + + inp_data[16] = CL_add(t0,t2); + inp_data[1] = CL_msu_j(t1,t3); + inp_data[6] = CL_sub(t0,t2); + inp_data[11] = CL_mac_j(t1,t3); + } + } +#if (WMOPS) + multiCounter[currCounter].CL_move += 20; +#endif + +} + + +/** + * \brief Function performs a complex 30-point FFT + * The FFT is performed inplace. The result of the FFT + * is scaled by SCALEFACTOR30 bits. + * + * WOPS with 32x16 bit multiplications: 828 cycles + * + * \param [i/o] re real input / output + * \param [i/o] im imag input / output + * \param [i ] s stride real and imag input / output + * + * \return void + */ +static void fft30_with_cmplx_data(cmplx * inp) +{ + cmplx *l = &inp[0]; + cmplx *h = &inp[15]; + + cmplx z[30], y[15], x[15], rs1, rs2, rs3, rs4, t; + + /* 1. FFT15 stage */ + + x[0] = CL_shr(inp[0],SCALEFACTOR30_1); + x[1] = CL_shr(inp[18],SCALEFACTOR30_1); + x[2] = CL_shr(inp[6],SCALEFACTOR30_1); + x[3] = CL_shr(inp[24],SCALEFACTOR30_1); + x[4] = CL_shr(inp[12],SCALEFACTOR30_1); + + x[5] = CL_shr(inp[20],SCALEFACTOR30_1); + x[6] = CL_shr(inp[8],SCALEFACTOR30_1); + x[7] = CL_shr(inp[26],SCALEFACTOR30_1); + x[8] = CL_shr(inp[14],SCALEFACTOR30_1); + x[9] = CL_shr(inp[2],SCALEFACTOR30_1); + + x[10] = CL_shr(inp[10],SCALEFACTOR30_1); + x[11] = CL_shr(inp[28],SCALEFACTOR30_1); + x[12] = CL_shr(inp[16],SCALEFACTOR30_1); + x[13] = CL_shr(inp[4],SCALEFACTOR30_1); + x[14] = CL_shr(inp[22],SCALEFACTOR30_1); + + + /* 1. FFT5 stage */ + rs1 = CL_add(x[1],x[4]); + rs4 = CL_sub(x[1],x[4]); + rs3 = CL_add(x[2],x[3]); + rs2 = CL_sub(x[2],x[3]); + t = CL_scale_t(CL_sub(rs1,rs3),C54); + rs1 = CL_add(rs1,rs3); + y[0] = CL_add(x[0],rs1); + rs1 = CL_add(y[0],(CL_shl(CL_scale_t(rs1,C55),1))); + rs3 = CL_sub(rs1,t); + rs1 = CL_add(rs1,t); + t = CL_scale_t(CL_add(rs4,rs2),C51); + rs4 = CL_add(t,CL_shl(CL_scale_t(rs4, C52),1)); + rs2 = CL_add(t,CL_scale_t(rs2,C53)); + + /* combination */ + y[1] = CL_msu_j(rs1,rs2); + y[4] = CL_mac_j(rs1,rs2); + y[2] = CL_mac_j(rs3,rs4); + y[3] = CL_msu_j(rs3,rs4); + + + /* 2. FFT5 stage */ + rs1 = CL_add(x[6],x[9]); + rs4 = CL_sub(x[6],x[9]); + rs3 = CL_add(x[7],x[8]); + rs2 = CL_sub(x[7],x[8]); + t = CL_scale_t(CL_sub(rs1,rs3),C54); + rs1 = CL_add(rs1,rs3); + y[5] = CL_add(x[5],rs1); + rs1 = CL_add(y[5],(CL_shl(CL_scale_t(rs1,C55),1))); + rs3 = CL_sub(rs1,t); + rs1 = CL_add(rs1,t); + t = CL_scale_t(CL_add(rs4,rs2),C51); + rs4 = CL_add(t,CL_shl(CL_scale_t(rs4, C52),1)); + rs2 = CL_add(t,CL_scale_t(rs2,C53)); + + /* combination */ + y[6] = CL_msu_j(rs1,rs2); + y[9] = CL_mac_j(rs1,rs2); + y[7] = CL_mac_j(rs3,rs4); + y[8] = CL_msu_j(rs3,rs4); + + + /* 3. FFT5 stage */ + rs1 = CL_add(x[11],x[14]); + rs4 = CL_sub(x[11],x[14]); + rs3 = CL_add(x[12],x[13]); + rs2 = CL_sub(x[12],x[13]); + t = CL_scale_t(CL_sub(rs1,rs3),C54); + rs1 = CL_add(rs1,rs3); + y[10] = CL_add(x[10],rs1); + rs1 = CL_add(y[10],(CL_shl(CL_scale_t(rs1,C55),1))); + rs3 = CL_sub(rs1,t); + rs1 = CL_add(rs1,t); + t = CL_scale_t(CL_add(rs4,rs2),C51); + rs4 = CL_add(t,CL_shl(CL_scale_t(rs4, C52),1)); + rs2 = CL_add(t,CL_scale_t(rs2,C53)); + + /* combination */ + y[11] = CL_msu_j(rs1,rs2); + y[14] = CL_mac_j(rs1,rs2); + y[12] = CL_mac_j(rs3,rs4); + y[13] = CL_msu_j(rs3,rs4); + /*for (i=10; i<15; i++) + { + printf("%d,\t %d,\t",y[i].re, y[i].im); + } + printf("\n\n");*/ + + + /* 1. FFT3 stage */ + /* real part */ + rs1 = CL_add(y[5],y[10]); + rs2 = CL_scale_t(CL_sub(y[5],y[10]),C31); + z[0] = CL_add(y[0],rs1); + rs1 = CL_sub(y[0],CL_shr(rs1,1)); + + z[10] = CL_mac_j(rs1,rs2); + z[5] = CL_msu_j(rs1,rs2); + + /* 2. FFT3 stage */ + rs1 = CL_add(y[6],y[11]); + rs2 = CL_scale_t(CL_sub(y[6],y[11]),C31); + z[6] = CL_add(y[1],rs1); + rs1 = CL_sub(y[1],CL_shr(rs1,1)); + + z[1] = CL_mac_j(rs1,rs2); + z[11] = CL_msu_j(rs1,rs2); + + + /* 3. FFT3 stage */ + rs1 = CL_add(y[7],y[12]); + rs2 = CL_scale_t(CL_sub(y[7],y[12]),C31); + z[12] = CL_add(y[2],rs1); + rs1 = CL_sub(y[2],CL_shr(rs1,1)); + + z[7] = CL_mac_j(rs1,rs2); + z[2] = CL_msu_j(rs1,rs2); + + + /* 4. FFT3 stage */ + rs1 = CL_add(y[8],y[13]); + rs2 = CL_scale_t(CL_sub(y[8],y[13]),C31); + z[3] = CL_add(y[3],rs1); + rs1 = CL_sub(y[3],CL_shr(rs1,1)); + + z[13] = CL_mac_j(rs1,rs2); + z[8] = CL_msu_j(rs1,rs2); + + + /* 5. FFT3 stage */ + rs1 = CL_add(y[9],y[14]); + rs2 = CL_scale_t(CL_sub(y[9],y[14]),C31); + z[9] = CL_add(y[4],rs1); + rs1 = CL_sub(y[4],CL_shr(rs1,1)); + + z[4] = CL_mac_j(rs1,rs2); + z[14] = CL_msu_j(rs1,rs2); + + /*for (i=0; i<15; i++) + printf("%d,\t %d,\t",z[i].re, z[i].im); + printf("\n\n");*/ + + + /* 2. FFT15 stage */ + + x[0] = CL_shr(inp[15],SCALEFACTOR30_1); + x[1] = CL_shr(inp[3],SCALEFACTOR30_1); + x[2] = CL_shr(inp[21],SCALEFACTOR30_1); + x[3] = CL_shr(inp[9],SCALEFACTOR30_1); + x[4] = CL_shr(inp[27],SCALEFACTOR30_1); + + x[5] = CL_shr(inp[5],SCALEFACTOR30_1); + x[6] = CL_shr(inp[23],SCALEFACTOR30_1); + x[7] = CL_shr(inp[11],SCALEFACTOR30_1); + x[8] = CL_shr(inp[29],SCALEFACTOR30_1); + x[9] = CL_shr(inp[17],SCALEFACTOR30_1); + + x[10] = CL_shr(inp[25],SCALEFACTOR30_1); + x[11] = CL_shr(inp[13],SCALEFACTOR30_1); + x[12] = CL_shr(inp[1],SCALEFACTOR30_1); + x[13] = CL_shr(inp[19],SCALEFACTOR30_1); + x[14] = CL_shr(inp[7],SCALEFACTOR30_1); + + /* 1. FFT5 stage */ + rs1 = CL_add(x[1],x[4]); + rs4 = CL_sub(x[1],x[4]); + rs3 = CL_add(x[2],x[3]); + rs2 = CL_sub(x[2],x[3]); + t = CL_scale_t(CL_sub(rs1,rs3),C54); + rs1 = CL_add(rs1,rs3); + y[0] = CL_add(x[0],rs1); + rs1 = CL_add(y[0],(CL_shl(CL_scale_t(rs1,C55),1))); + rs3 = CL_sub(rs1,t); + rs1 = CL_add(rs1,t); + t = CL_scale_t(CL_add(rs4,rs2),C51); + rs4 = CL_add(t,CL_shl(CL_scale_t(rs4, C52),1)); + rs2 = CL_add(t,CL_scale_t(rs2,C53)); + + /* combination */ + y[1] = CL_msu_j(rs1,rs2); + y[4] = CL_mac_j(rs1,rs2); + y[2] = CL_mac_j(rs3,rs4); + y[3] = CL_msu_j(rs3,rs4); + + + /* 2. FFT5 stage */ + rs1 = CL_add(x[6],x[9]); + rs4 = CL_sub(x[6],x[9]); + rs3 = CL_add(x[7],x[8]); + rs2 = CL_sub(x[7],x[8]); + t = CL_scale_t(CL_sub(rs1,rs3),C54); + rs1 = CL_add(rs1,rs3); + y[5] = CL_add(x[5],rs1); + rs1 = CL_add(y[5],(CL_shl(CL_scale_t(rs1,C55),1))); + rs3 = CL_sub(rs1,t); + rs1 = CL_add(rs1,t); + t = CL_scale_t(CL_add(rs4,rs2),C51); + rs4 = CL_add(t,CL_shl(CL_scale_t(rs4, C52),1)); + rs2 = CL_add(t,CL_scale_t(rs2,C53)); + + /* combination */ + y[6] = CL_msu_j(rs1,rs2); + y[9] = CL_mac_j(rs1,rs2); + y[7] = CL_mac_j(rs3,rs4); + y[8] = CL_msu_j(rs3,rs4); + + + /* 3. FFT5 stage */ + rs1 = CL_add(x[11],x[14]); + rs4 = CL_sub(x[11],x[14]); + rs3 = CL_add(x[12],x[13]); + rs2 = CL_sub(x[12],x[13]); + t = CL_scale_t(CL_sub(rs1,rs3),C54); + rs1 = CL_add(rs1,rs3); + y[10] = CL_add(x[10],rs1); + rs1 = CL_add(y[10],(CL_shl(CL_scale_t(rs1,C55),1))); + rs3 = CL_sub(rs1,t); + rs1 = CL_add(rs1,t); + t = CL_scale_t(CL_add(rs4,rs2),C51); + rs4 = CL_add(t,CL_shl(CL_scale_t(rs4, C52),1)); + rs2 = CL_add(t,CL_scale_t(rs2,C53)); + + /* combination */ + y[11] = CL_msu_j(rs1,rs2); + y[14] = CL_mac_j(rs1,rs2); + y[12] = CL_mac_j(rs3,rs4); + y[13] = CL_msu_j(rs3,rs4); + /*for (i=10; i<15; i++) + { + printf("%d,\t %d,\t",y[i].re, y[i].im); + } + printf("\n\n");*/ + + + /* 1. FFT3 stage */ + /* real part */ + rs1 = CL_add(y[5],y[10]); + rs2 = CL_scale_t(CL_sub(y[5],y[10]),C31); + z[15] = CL_add(y[0],rs1); + rs1 = CL_sub(y[0],CL_shr(rs1,1)); + + z[25] = CL_mac_j(rs1,rs2); + z[20] = CL_msu_j(rs1,rs2); + + /* 2. FFT3 stage */ + rs1 = CL_add(y[6],y[11]); + rs2 = CL_scale_t(CL_sub(y[6],y[11]),C31); + z[21] = CL_add(y[1],rs1); + rs1 = CL_sub(y[1],CL_shr(rs1,1)); + + z[16] = CL_mac_j(rs1,rs2); + z[26] = CL_msu_j(rs1,rs2); + + + /* 3. FFT3 stage */ + rs1 = CL_add(y[7],y[12]); + rs2 = CL_scale_t(CL_sub(y[7],y[12]),C31); + z[27] = CL_add(y[2],rs1); + rs1 = CL_sub(y[2],CL_shr(rs1,1)); + + z[22] = CL_mac_j(rs1,rs2); + z[17] = CL_msu_j(rs1,rs2); + + + /* 4. FFT3 stage */ + rs1 = CL_add(y[8],y[13]); + rs2 = CL_scale_t(CL_sub(y[8],y[13]),C31); + z[18] = CL_add(y[3],rs1); + rs1 = CL_sub(y[3],CL_shr(rs1,1)); + + z[28] = CL_mac_j(rs1,rs2); + z[23] = CL_msu_j(rs1,rs2); + + + /* 5. FFT3 stage */ + rs1 = CL_add(y[9],y[14]); + rs2 = CL_scale_t(CL_sub(y[9],y[14]),C31); + z[24] = CL_add(y[4],rs1); + rs1 = CL_sub(y[4],CL_shr(rs1,1)); + + z[19] = CL_mac_j(rs1,rs2); + z[29] = CL_msu_j(rs1,rs2); + + /*for (i=0; i<30; i++) + printf("%d,\t %d,\t",z[i].re, z[i].im); + printf("\n\n");*/ + + + /* 1. FFT2 stage */ + rs1 = CL_shr(z[0], SCALEFACTOR30_2); + rs2 = CL_shr(z[15],SCALEFACTOR30_2); + *l = CL_add(rs1,rs2); + *h = CL_sub(rs1,rs2); + l+=1; h+=1; + + /* 2. FFT2 stage */ + rs1 = CL_shr(z[8], SCALEFACTOR30_2); + rs2 = CL_shr(z[23],SCALEFACTOR30_2); + *h = CL_add(rs1,rs2); + *l = CL_sub(rs1,rs2); + l+=1; h+=1; + + + /* 3. FFT2 stage */ + rs1 = CL_shr(z[1], SCALEFACTOR30_2); + rs2 = CL_shr(z[16],SCALEFACTOR30_2); + *l = CL_add(rs1,rs2); + *h = CL_sub(rs1,rs2); + l+=1; h+=1; + + + /* 4. FFT2 stage */ + rs1 = CL_shr(z[9], SCALEFACTOR30_2); + rs2 = CL_shr(z[24],SCALEFACTOR30_2); + *h = CL_add(rs1,rs2); + *l = CL_sub(rs1,rs2); + l+=1; h+=1; + + /* 5. FFT2 stage */ + rs1 = CL_shr(z[2], SCALEFACTOR30_2); + rs2 = CL_shr(z[17],SCALEFACTOR30_2); + *l = CL_add(rs1,rs2); + *h = CL_sub(rs1,rs2); + l+=1; h+=1; + + /* 6. FFT2 stage */ + rs1 = CL_shr(z[10], SCALEFACTOR30_2); + rs2 = CL_shr(z[25],SCALEFACTOR30_2); + *h = CL_add(rs1,rs2); + *l = CL_sub(rs1,rs2); + l+=1; h+=1; + + /* 7. FFT2 stage */ + rs1 = CL_shr(z[3], SCALEFACTOR30_2); + rs2 = CL_shr(z[18],SCALEFACTOR30_2); + *l = CL_add(rs1,rs2); + *h = CL_sub(rs1,rs2); + l+=1; h+=1; + + /* 8. FFT2 stage */ + rs1 = CL_shr(z[11], SCALEFACTOR30_2); + rs2 = CL_shr(z[26],SCALEFACTOR30_2); + *h = CL_add(rs1,rs2); + *l = CL_sub(rs1,rs2); + l+=1; h+=1; + + /* 9. FFT2 stage */ + rs1 = CL_shr(z[4], SCALEFACTOR30_2); + rs2 = CL_shr(z[19],SCALEFACTOR30_2); + *l = CL_add(rs1,rs2); + *h = CL_sub(rs1,rs2); + l+=1; h+=1; + + /* 10. FFT2 stage */ + rs1 = CL_shr(z[12], SCALEFACTOR30_2); + rs2 = CL_shr(z[27],SCALEFACTOR30_2); + *h = CL_add(rs1,rs2); + *l = CL_sub(rs1,rs2); + l+=1; h+=1; + + /* 11. FFT2 stage */ + rs1 = CL_shr(z[5], SCALEFACTOR30_2); + rs2 = CL_shr(z[20],SCALEFACTOR30_2); + *l = CL_add(rs1,rs2); + *h = CL_sub(rs1,rs2); + l+=1; h+=1; + + /* 12. FFT2 stage */ + rs1 = CL_shr(z[13], SCALEFACTOR30_2); + rs2 = CL_shr(z[28],SCALEFACTOR30_2); + *h = CL_add(rs1,rs2); + *l = CL_sub(rs1,rs2); + l+=1; h+=1; + + /* 13. FFT2 stage */ + rs1 = CL_shr(z[6], SCALEFACTOR30_2); + rs2 = CL_shr(z[21],SCALEFACTOR30_2); + *l = CL_add(rs1,rs2); + *h = CL_sub(rs1,rs2); + l+=1; h+=1; + + /* 14. FFT2 stage */ + rs1 = CL_shr(z[14], SCALEFACTOR30_2); + rs2 = CL_shr(z[29],SCALEFACTOR30_2); + *h = CL_add(rs1,rs2); + *l = CL_sub(rs1,rs2); + l+=1; h+=1; + + /* 15. FFT2 stage */ + rs1 = CL_shr(z[7], SCALEFACTOR30_2); + rs2 = CL_shr(z[22],SCALEFACTOR30_2); + *l = CL_add(rs1,rs2); + *h = CL_sub(rs1,rs2); + l+=1; h+=1; + +#if (WMOPS) + multiCounter[currCounter].CL_move += 30; +#endif + +} + +/*-------------------------------------------------------------------* + * fft_cldfb() + * + * Interface functions FFT subroutines + *--------------------------------------------------------------------*/ +void fft_cldfb_fx( + Word32 *data, /* i/o: input/output vector */ + const Word16 size /* size of fft operation */ +) +{ + + SWITCH ( size ) + { + case 5: + fft5_with_cmplx_data( (cmplx *)data ); + BREAK; + case 8: + fft8_with_cmplx_data( (cmplx *)data ); + BREAK; + case 10: + fft10_with_cmplx_data( (cmplx *)data ); + BREAK; + case 16: + fft16_with_cmplx_data( (cmplx *)data, 0); + BREAK; + case 20: + fft20_with_cmplx_data( (cmplx *)data ); + BREAK; + case 30: + fft30_with_cmplx_data( (cmplx *)data ); + BREAK; + + default: + assert( 0 ); + BREAK; + } + + return; +} diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c index efde7ba8d..32551b685 100644 --- a/lib_com/fft_fx.c +++ b/lib_com/fft_fx.c @@ -46,10 +46,12 @@ #include "cnst.h" //#include "prot.h" #include "prot_fx1.h" +#include "prot_fx2.h" //#include "cnst_fx.h" #include "rom_com.h" #include "rom_com_fx.h" #include "wmc_auto.h" +#include "complex_basop.h" #ifdef _MSC_VER #pragma warning( disable : 4310 ) @@ -59,32 +61,6 @@ * Local constants *-----------------------------------------------------------------*/ -#if 0 - -#define FFT_15PONIT_WNK1 0.55901699f /* EDCT & EMDCT constants */ -#define FFT_15PONIT_WNK2 0.95105652f /* EDCT & EMDCT constants */ -#define FFT_15PONIT_WNK3 0.58778525f /* EDCT & EMDCT constants */ -#define FFT_15PONIT_WNK4 0.86602540f /* EDCT & EMDCT constants */ -#define FFT_15PONIT_WNK5 0.25000000f /* EDCT & EMDCT constants */ - -/* FFT constants */ -#define FFT_C31 -0.8660254037f -#define FFT_C51 0.9510565195f -#define FFT_C52 -1.5388417989f -#define FFT_C53 -0.3632712597f -#define FFT_C54 0.5590169895f -#define FFT_C55 -1.2500000000f -#define FFT_C61 0.8660254036f -#define FFT_C81 0.7071067811f -#define FFT_C82 -0.7071067811f -#define FFT_C161 0.7071067811f -#define FFT_C162 -0.7071067811f -#define FFT_C163 0.9238795325f -#define FFT_C164 -0.9238795325f -#define FFT_C165 0.3826834323f -#define FFT_C166 -0.3826834323f - -#else #define Mpy_32_xx Mpy_32_16_1 @@ -153,8 +129,6 @@ #define SCALEFACTOR480 (11) #define SCALEFACTOR600 (10) -#endif - /*-----------------------------------------------------------------* * Local function prototypes @@ -1694,12 +1668,12 @@ static void cftfsub( Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; l = 2; - IF ( n > 8 ) + IF ( GT_16(n, 8) ) { cft1st( n, a, w ); l = 8; - WHILE ( shl(l, 2 ) < n ) + WHILE ( LT_16(shl(l, 2 ), n) ) { cftmdl( n, l, a, w ); l = shl(l, 2); @@ -2037,12 +2011,12 @@ static void cftbsub( Word32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; l = 2; - IF ( n > 8 ) + IF ( GT_16(n, 8) ) { cft1st( n, a, w ); l = 8; - WHILE ( shl(l, 2) < n ) + WHILE ( LT_16(shl(l, 2), n) ) { cftmdl( n, l, a, w ); l = shl(l, 2); @@ -2135,7 +2109,7 @@ static void rftbsub( Word32 xr, xi, yr, yi; a[1] = L_negate(a[1]); - m = n >> 1; + m = shr(n, 1); ks = 2 * nc / m; kk = 0; FOR ( j = 2; j < m; j += 2 ) @@ -2149,7 +2123,7 @@ static void rftbsub( yr = L_add(Mpy_32_16_1(xr, wkr), Mpy_32_16_1(xi, wki)); yi = L_sub(Mpy_32_16_1(xi, wkr), Mpy_32_16_1(xr, wki)); a[j] = L_sub(a[j], yr); - a[j + 1] = L_add(yi, a[j + 1]); + a[j + 1] = L_sub(yi, a[j + 1]); a[k] = L_add(a[k], yr); a[k + 1] = L_sub(yi, a[k + 1]); } @@ -2169,12 +2143,12 @@ static void dctsub( Word16 wkr, wki; Word32 xr; - m = n >> 1; + m = shr(n, 1); ks = nc / n; kk = 0; FOR ( j = 1; j < m; j++ ) { - k = n - j; + k = sub(n, j); kk += ks; wkr = sub(c[kk], c[nc - kk]); wki = add(c[kk], c[nc - kk]); @@ -2208,18 +2182,18 @@ void edct2_fx_ivas( Copy32(in, a, n); nw = ip[0]; - IF ( n > ( nw << 2 ) ) + IF ( GT_16(n, shl( nw, 2 ) ) ) { - nw = n >> 2; + nw = shr(n, 2); } nc = ip[1]; - IF ( n > nc ) + IF ( GT_16(n, nc) ) { nc = n; } - IF ( isgn < 0 ) + IF ( LT_16(isgn, 0) ) { xr = a[n - 1]; FOR ( j = n - 2; j >= 2; j -= 2 ) @@ -2230,7 +2204,7 @@ void edct2_fx_ivas( a[1] = L_sub(a[0], xr); a[0] = L_add(a[0], xr); - IF ( n > 4 ) + IF ( GT_16(n, 4) ) { rftbsub( n, a, nc, w + nw ); bitrv2_SR( n, ip + 2, a ); @@ -2242,16 +2216,16 @@ void edct2_fx_ivas( } } - IF ( isgn >= 0 ) + IF ( GE_16(isgn, 0) ) { a[0] = L_shr(a[0], 1); } dctsub( n, a, nc, w + nw ); - IF ( isgn >= 0 ) + IF ( GE_16(isgn, 0) ) { - IF ( n > 4 ) + IF ( GT_16(n, 4) ) { bitrv2_SR( n, ip + 2, a ); cftfsub( n, a, w ); @@ -2328,6 +2302,571 @@ void DoRTFTn_fx_ivas( return; } +void fft3_fx_ivas( + const Word32 X[], + Word32 Y[], + const Word16 n ) +{ + Word32 Z[PH_ECU_SPEC_SIZE]; + Word32 *Z0, *Z1, *Z2; + Word32 *z0, *z1, *z2; + const Word32 *x; + const Word16 *t_sin = sincos_t_rad3_fx; + Word16 m, step, order; + Word16 i, j; + Word16 c1_ind, s1_ind, c2_ind, s2_ind; + Word16 c1_step, s1_step, c2_step, s2_step; + Word32 *RY, *IY, *RZ0, *IZ0, *RZ1, *IZ1, *RZ2, *IZ2; + + /* Determine the order of the transform, the length of decimated */ + /* transforms m, and the step for the sine and cosine tables. */ + switch ( n ) + { + case 1536: + order = 9; + m = 512; + step = 1; + break; + case 384: + order = 7; + m = 128; + step = 4; + break; + default: + order = 9; + m = 512; + step = 1; + } + + /* Compose decimated sequences X[3i], X[3i+1],X[3i+2] */ + /* compute their FFT of length m. */ + Z0 = &Z[0]; + z0 = &Z0[0]; + Z1 = &Z0[m]; + z1 = &Z1[0]; /* Z1 = &Z[ m]; */ + Z2 = &Z1[m]; + z2 = &Z2[0]; /* Z2 = &Z[2m]; */ + x = &X[0]; + FOR ( i = 0; i < n / 3; i++ ) + { + *z0++ = *x++; /* Z0[i] = X[3i]; */ + *z1++ = *x++; /* Z1[i] = X[3i+1]; */ + *z2++ = *x++; /* Z2[i] = X[3i+2]; */ + } + + fft_rel_fx32( &Z0[0], m, order ); + fft_rel_fx32( &Z1[0], m, order ); + fft_rel_fx32( &Z2[0], m, order ); + + /* Butterflies of order 3. */ + /* pointer initialization */ + RY = &Y[0]; + IY = &Y[n]; + RZ0 = &Z0[0]; + IZ0 = &Z0[m]; + RZ1 = &Z1[0]; + IZ1 = &Z1[m]; + RZ2 = &Z2[0]; + IZ2 = &Z2[m]; + + c1_step = negate(step); + s1_step = step; + c2_step = negate(shl(step, 1)); + s2_step = shl(step, 1); + c1_ind = add(T_SIN_PI_2, c1_step); + s1_ind = s1_step; + c2_ind = add(T_SIN_PI_2, c2_step); + s2_ind = s2_step; + + /* special case: i = 0 */ + RY[0] = L_add(RZ0[0], L_add(RZ1[0], RZ2[0])); + + /* first 3/12 */ + for (i = 1; i < 3 * m / 8; i++, c1_ind = add(c1_ind, c1_step), s1_ind = add(s1_ind, s1_step), c2_ind = add(c2_ind,c2_step), s2_ind = add(s2_ind, s2_step)) + { + RY[i] = L_add(RZ0[i], L_add(Mpy_32_16_1(RZ1[i], t_sin[c1_ind]), + L_add(Mpy_32_16_1(IZ1[-i], t_sin[s1_ind]), + L_add(Mpy_32_16_1(RZ2[i], t_sin[c2_ind]), + Mpy_32_16_1(IZ2[-i], t_sin[s2_ind]))))); + IY[-i] = L_sub(IZ0[-i], L_add(L_sub(Mpy_32_16_1(RZ1[i], t_sin[s1_ind]), + Mpy_32_16_1(IZ1[-i], t_sin[c1_ind])), + L_sub(Mpy_32_16_1(RZ2[i], t_sin[s2_ind]), + Mpy_32_16_1(IZ2[-i], t_sin[c2_ind])))); + } + + /* next 1/12 */ + for ( ; i < 4 * m / 8; i++, c1_ind = add(c1_ind, c1_step), s1_ind = add(s1_ind, s1_step), c2_ind = sub(c2_ind, c2_step), s2_ind = sub(s2_ind, s2_step) ) + { + RY[i] = L_add(RZ0[i], L_sub(L_add(Mpy_32_16_1(RZ1[i], t_sin[c1_ind]), + Mpy_32_16_1(IZ1[-i], t_sin[s1_ind])), + L_sub(Mpy_32_16_1(RZ2[i], t_sin[c2_ind]), + Mpy_32_16_1(IZ2[-i], t_sin[s2_ind])))); + IY[-i] = L_sub(IZ0[-i], L_sub(Mpy_32_16_1(RZ1[ i], t_sin[s1_ind]), + L_sub(Mpy_32_16_1(IZ1[-i], t_sin[c1_ind]), + L_add(Mpy_32_16_1(RZ2[ i], t_sin[s2_ind]), + Mpy_32_16_1(IZ2[-i], t_sin[c2_ind]))))); + } + + /* special case: i = m/2 i.e. 1/3 */ + RY[i] = L_add(RZ0[i], + L_sub(Mpy_32_16_1(RZ1[i], t_sin[c1_ind]), + Mpy_32_16_1(RZ2[i], t_sin[c2_ind]))); + IY[-i] = L_negate(L_add(Mpy_32_16_1(RZ1[i], t_sin[s1_ind]), + Mpy_32_16_1(RZ2[i], t_sin[s2_ind]))); + i++; + + c1_ind = add(c1_ind, c1_step); + s1_ind = add(s1_ind, s1_step); + c2_ind = sub(c2_ind, c2_step); + s2_ind = sub(s2_ind, s2_step); + + /* next 2/12 */ + for ( j = i - 2; i < 6 * m / 8; i++, j--, c1_ind = add(c1_ind, c1_step), s1_ind = add(s1_ind, s1_step), c2_ind = sub(c2_ind, c2_step), s2_ind = sub(s2_ind, s2_step) ) + { + RY[i] = L_add(RZ0[j], L_sub(Mpy_32_16_1(RZ1[j], t_sin[c1_ind]), + L_add(Mpy_32_16_1(IZ1[-j], t_sin[s1_ind]), + L_add(Mpy_32_16_1(RZ2[j], t_sin[c2_ind]), + Mpy_32_16_1(IZ2[-j], t_sin[s2_ind]))))); + + IY[-i] = L_negate(L_add(IZ0[-j], L_add(Mpy_32_16_1(RZ1[j], t_sin[s1_ind]), + L_add(Mpy_32_16_1(IZ1[-j], t_sin[c1_ind]), + L_sub(Mpy_32_16_1(RZ2[j], t_sin[s2_ind]), + Mpy_32_16_1(IZ2[-j], t_sin[c2_ind])))))); + } + + /*--------------------------half--------------------------*/ + /* next 2/12 */ + for ( ; i < 8 * m / 8; i++, j--, c1_ind = sub(c1_ind, c1_step), s1_ind = sub(s1_ind, s1_step), c2_ind = add(c2_ind, c2_step), s2_ind = add(s2_ind, s2_step) ) + { + RY[i] = L_sub(RZ0[j], L_add(Mpy_32_16_1(RZ1[j], t_sin[c1_ind]), + L_add(Mpy_32_16_1(IZ1[-j], t_sin[s1_ind]), + L_sub(Mpy_32_16_1(RZ2[j], t_sin[c2_ind]), + Mpy_32_16_1(IZ2[-j], t_sin[s2_ind]))))); + IY[-i] = L_negate(L_add(IZ0[-j], L_sub(Mpy_32_16_1(RZ1[j], t_sin[s1_ind]), + L_add(Mpy_32_16_1(IZ1[-j], t_sin[c1_ind]), + L_add(Mpy_32_16_1(RZ2[j], t_sin[s2_ind]), + Mpy_32_16_1(IZ2[-j], t_sin[c2_ind])))))); + } + + /* special case: i = m, i.e 2/3 */ + RY[i] = L_sub(RZ0[j], L_add(Mpy_32_16_1(RZ1[j], t_sin[c1_ind]), + Mpy_32_16_1(RZ2[j], t_sin[c2_ind]))); + IY[-i++] = L_sub(Mpy_32_16_1(RZ2[j], t_sin[s2_ind]), + Mpy_32_16_1(RZ1[j], t_sin[s1_ind])); + c1_ind = sub(c1_ind, c1_step), s1_ind = sub(s1_ind, s1_step), c2_ind = add(c2_ind, c2_step), s2_ind = add(s2_ind, s2_step); + + /* next 1/12 */ + for ( j = 1; i < 9 * m / 8; i++, j++, c1_ind = sub(c1_ind, c1_step), s1_ind = sub(s1_ind, s1_step), c2_ind = add(c2_ind, c2_step), s2_ind = add(s2_ind, s2_step) ) + { + RY[i] = L_sub(RZ0[j], L_sub(Mpy_32_16_1(RZ1[j], t_sin[c1_ind]), + L_sub(Mpy_32_16_1(IZ1[-j], t_sin[s1_ind]), + L_add(Mpy_32_16_1(RZ2[j], t_sin[c2_ind]), + Mpy_32_16_1(IZ2[-j], t_sin[s2_ind]))))); + IY[-i] = L_sub(IZ0[-j], L_add(Mpy_32_16_1(RZ1[j], t_sin[s1_ind]), + L_sub(Mpy_32_16_1(IZ1[-j], t_sin[c1_ind]), + L_sub(Mpy_32_16_1(RZ2[j], t_sin[s2_ind]), + Mpy_32_16_1(IZ2[-j], t_sin[c2_ind]))))); + } + + /* last 3/12 */ + for ( ; i < 12 * m / 8; i++, j++, c1_ind = sub(c1_ind, c1_step), s1_ind = sub(s1_ind, s1_step), c2_ind = sub(c2_ind, c2_step), s2_ind = sub(s2_ind, s2_step) ) + { + RY[i] = L_sub(RZ0[j], L_sub(L_sub(Mpy_32_16_1(RZ1[j], t_sin[c1_ind]), + Mpy_32_16_1(IZ1[-j], t_sin[s1_ind])), + L_sub(Mpy_32_16_1(RZ2[j], t_sin[c2_ind]), + Mpy_32_16_1(IZ2[-j], t_sin[s2_ind])))); + IY[-i] = L_sub(IZ0[-j], L_sub(L_add(Mpy_32_16_1(RZ1[j], t_sin[s1_ind]), + Mpy_32_16_1(IZ1[-j], t_sin[c1_ind])), + L_add(Mpy_32_16_1(RZ2[j], t_sin[s2_ind]), + Mpy_32_16_1(IZ2[-j], t_sin[c2_ind])))); + } + + /* special case: i = 3*m/2 */ + RY[i] = L_sub(RZ0[j], L_sub(Mpy_32_16_1(RZ1[j], t_sin[c1_ind]), + Mpy_32_16_1(RZ2[j], t_sin[c2_ind]))); + + return; +} + +void ifft3_fx_ivas( + const Word32 Z[], + Word32 X[], + const Word16 n ) +{ + Word32 Y[PH_ECU_SPEC_SIZE]; + const Word16 *t_sin = sincos_t_rad3_fx; + Word16 m, step, step2, order; + Word16 i; + Word16 c0_ind, s0_ind, c1_ind, s1_ind, c2_ind, s2_ind; + Word16 scale; + const Word32 *RZ0, *IZ0, *RZ1, *IZ1, *RZ2, *IZ2; + Word32 *RY0, *IY0, *RY1, *IY1, *RY2, *IY2, *y0, *y1, *y2; + + /* Determine the order of the transform, the length of decimated */ + /* transforms m, and the step for the sine and cosine tables. */ + switch ( n ) + { + case 1536: + order = 9; + m = 512; + step = 1; + break; + case 384: + order = 7; + m = 128; + step = 4; + break; + default: + order = 9; + m = 512; + step = 1; + } + + /* pointer initialization */ + RY0 = &Y[0]; + IY0 = &RY0[m]; + RY1 = &RY0[m]; + IY1 = &RY1[m]; + RY2 = &RY1[m]; + IY2 = &RY2[m]; + + RZ0 = &Z[0]; + RZ1 = RZ0 + m; + RZ2 = RZ0 + n / 2 - m / 2; + IZ0 = &Z[n]; + IZ1 = IZ0 - m; + IZ2 = IZ0 - n / 2 + m / 2; + + /* Inverse butterflies of order 3. */ + + /* Construction of Y0 */ + RY0[0] = L_add(RZ0[0], L_add(RZ1[0], RZ2[0])); + FOR ( i = 1; i < m / 2; i++ ) + { + RY0[i] = L_add(RZ0[i], L_add(RZ1[i], RZ2[-i])); + IY0[-i] = L_add(IZ0[-i], L_sub(IZ1[-i], IZ2[i])); + } + + /* m/2 */ + RY0[i] = L_add(RZ0[i], L_add(RZ1[i], RZ2[-i])); + + /* Construction of Y1 */ + c0_ind = T_SIN_PI_2; + s0_ind = 0; + c1_ind = T_SIN_PI_2 * 1 / 3; + s1_ind = T_SIN_PI_2 * 2 / 3; + c2_ind = T_SIN_PI_2 * 1 / 3; + s2_ind = T_SIN_PI_2 * 2 / 3; + + RY1[0] = L_sub(Mpy_32_16_1(RZ0[0], t_sin[c0_ind]), + L_add(Mpy_32_16_1(RZ1[0], t_sin[c1_ind]), + L_add(Mpy_32_16_1(RZ2[0], t_sin[c2_ind]), + L_add(Mpy_32_16_1(IZ1[0], t_sin[s1_ind]), + Mpy_32_16_1(IZ2[0], t_sin[s2_ind]))))); + + c0_ind = sub(c0_ind, step); + s0_ind = add(s0_ind, step); + c1_ind = add(c1_ind, step); + s1_ind = sub(s1_ind, step); + c2_ind = sub(c2_ind, step); + s2_ind = add(s2_ind, step); + for ( i = 1; i < m / 4; i++, c0_ind = sub(c0_ind, step), s0_ind = add(s0_ind, step), c1_ind = add(c1_ind, step), s1_ind = sub(s1_ind, step), c2_ind = sub(c2_ind, step), s2_ind = add(s2_ind, step) ) + { + RY1[i] = L_sub(Mpy_32_16_1(RZ0[i], t_sin[c0_ind]), + L_add(Mpy_32_16_1(RZ1[i], t_sin[c1_ind]), + L_add(Mpy_32_16_1(RZ2[-i], t_sin[c2_ind]), + L_add(Mpy_32_16_1(IZ0[-i], t_sin[s0_ind]), + L_add(Mpy_32_16_1(IZ1[-i], t_sin[s1_ind]), + Mpy_32_16_1(IZ2[i], t_sin[s2_ind])))))); + IY1[-i] = L_add(L_sub(Mpy_32_16_1(IZ0[-i], t_sin[c0_ind]), + Mpy_32_16_1(IZ1[-i], t_sin[c1_ind])), + L_add(Mpy_32_16_1(IZ2[i], t_sin[c2_ind]), + L_add(Mpy_32_16_1(RZ0[i], t_sin[s0_ind]), + L_sub(Mpy_32_16_1(RZ1[i], t_sin[s1_ind]), + Mpy_32_16_1(RZ2[-i], t_sin[s2_ind]))))); + } + + for ( ; i < m / 2; i++, c0_ind = sub(c0_ind, step), s0_ind = add(s0_ind, step), c1_ind = add(c1_ind, step), s1_ind = sub(s1_ind, step), c2_ind = add(c2_ind, step), s2_ind = sub(s2_ind, step) ) + { + RY1[i] = L_sub(Mpy_32_16_1(RZ0[i], t_sin[c0_ind]), + L_add(L_sub(Mpy_32_16_1(RZ1[i], t_sin[c1_ind]), + Mpy_32_16_1(RZ2[-i], t_sin[c2_ind])), + L_add(Mpy_32_16_1(IZ0[-i], t_sin[s0_ind]), + L_add(Mpy_32_16_1(IZ1[-i], t_sin[s1_ind]), + Mpy_32_16_1(IZ2[i], t_sin[s2_ind]))))); + IY1[-i] = L_sub(Mpy_32_16_1(IZ0[-i], t_sin[c0_ind]), + L_sub(L_add(Mpy_32_16_1(IZ1[-i], t_sin[c1_ind]), + Mpy_32_16_1(IZ2[i], t_sin[c2_ind])), + L_add(Mpy_32_16_1(RZ0[i], t_sin[s0_ind]), + L_sub(Mpy_32_16_1(RZ1[i], t_sin[s1_ind]), + Mpy_32_16_1(RZ2[-i], t_sin[s2_ind]))))); + } + + /* m/2 */ + RY1[i] = L_sub(Mpy_32_16_1(RZ0[i], t_sin[c0_ind]), + L_add(L_sub(Mpy_32_16_1(RZ1[i], t_sin[c1_ind]), + Mpy_32_16_1(RZ2[-i], t_sin[c2_ind])), + L_add(Mpy_32_16_1(IZ0[-i], t_sin[s0_ind]), + L_add(Mpy_32_16_1(IZ1[-i], t_sin[s1_ind]), + Mpy_32_16_1(IZ2[i], t_sin[s2_ind]))))); + + /* Construction of Y2 */ + c0_ind = T_SIN_PI_2; + s0_ind = 0; + c1_ind = T_SIN_PI_2 * 1 / 3; + s1_ind = T_SIN_PI_2 * 2 / 3; + c2_ind = T_SIN_PI_2 * 1 / 3; + s2_ind = T_SIN_PI_2 * 2 / 3; + step2 = 2 * step; + RY2[0] = L_sub(Mpy_32_16_1(RZ0[0], t_sin[c0_ind]), + L_sub(L_add(Mpy_32_16_1(RZ1[0], t_sin[c1_ind]), + Mpy_32_16_1(RZ2[0], t_sin[c2_ind])), + L_add(Mpy_32_16_1(IZ1[0], t_sin[s1_ind]), + Mpy_32_16_1(IZ2[0], t_sin[s2_ind])))); + + c0_ind = sub(c0_ind, step2); + s0_ind = add(s0_ind, step2); + c1_ind = sub(c1_ind, step2); + s1_ind = add(s1_ind, step2); + c2_ind = add(c2_ind, step2); + s2_ind = sub(s2_ind, step2); + for ( i = 1; i < m / 8; i++, c0_ind = sub(c0_ind, step2), s0_ind = add(s0_ind, step2), c1_ind = sub(c1_ind, step2), s1_ind = add(s1_ind, step2), c2_ind = add(c2_ind, step2), s2_ind = sub(s2_ind, step2) ) + { + RY2[i] = L_sub(Mpy_32_16_1(RZ0[i], t_sin[c0_ind]), + L_add(L_add(Mpy_32_16_1(RZ1[i], t_sin[c1_ind]), + Mpy_32_16_1(RZ2[-i], t_sin[c2_ind])), + L_sub(Mpy_32_16_1(IZ0[-i], t_sin[s0_ind]), + L_add(Mpy_32_16_1(IZ1[-i], t_sin[s1_ind]), + Mpy_32_16_1(IZ2[i], t_sin[s2_ind]))))); + IY2[-i] = L_add(L_sub(Mpy_32_16_1(IZ0[-i], t_sin[c0_ind]), + Mpy_32_16_1(IZ1[-i], t_sin[c1_ind])), + L_add(Mpy_32_16_1(IZ2[i], t_sin[c2_ind]), + L_sub(Mpy_32_16_1(RZ0[i], t_sin[s0_ind]), + L_sub(Mpy_32_16_1(RZ1[i], t_sin[s1_ind]), + Mpy_32_16_1(RZ2[-i], t_sin[s2_ind]))))); + } + + for ( ; i < m / 4; i++, c0_ind = sub(c0_ind, step2), s0_ind = add(s0_ind, step2), c1_ind = add(c1_ind, step2), s1_ind = sub(s1_ind, step2), c2_ind = add(c2_ind, step2), s2_ind = sub(s2_ind, step2) ) + { + RY2[i] = L_add(Mpy_32_16_1(RZ0[i], t_sin[c0_ind]), + L_sub(Mpy_32_16_1(RZ1[i], t_sin[c1_ind]), + L_sub(L_add(Mpy_32_16_1(RZ2[-i], t_sin[c2_ind]), + Mpy_32_16_1(IZ0[-i], t_sin[s0_ind])), + L_add(Mpy_32_16_1(IZ1[-i], t_sin[s1_ind]), + Mpy_32_16_1(IZ2[i], t_sin[s2_ind]))))); + IY2[-i] = L_add(Mpy_32_16_1(IZ0[-i], t_sin[c0_ind]), + L_add(Mpy_32_16_1(IZ1[-i], t_sin[c1_ind]), + L_add(Mpy_32_16_1(IZ2[i], t_sin[c2_ind]), + L_sub(Mpy_32_16_1(RZ0[i], t_sin[s0_ind]), + L_sub(Mpy_32_16_1(RZ1[i], t_sin[s1_ind]), + Mpy_32_16_1(RZ2[-i], t_sin[s2_ind])))))); + } + + for ( ; i < 3 * m / 8; i++, c0_ind = sub(c0_ind, step2), s0_ind = add(s0_ind, step2), c1_ind = add(c1_ind, step2), s1_ind = sub(s1_ind, step2), c2_ind = sub(c2_ind, step2), s2_ind = add(s2_ind, step2) ) + { + RY2[i] = L_sub(L_add(Mpy_32_16_1(RZ0[i], t_sin[c0_ind]), + Mpy_32_16_1(RZ1[i], t_sin[c1_ind])), + L_sub(L_add(Mpy_32_16_1(RZ2[-i], t_sin[c2_ind]), + Mpy_32_16_1(IZ0[-i], t_sin[s0_ind])), + L_sub(Mpy_32_16_1(IZ1[-i], t_sin[s1_ind]), + Mpy_32_16_1(IZ2[i], t_sin[s2_ind])))); + IY2[-i] = L_sub(L_add(Mpy_32_16_1(IZ0[-i], t_sin[c0_ind]), + L_add(Mpy_32_16_1(IZ1[-i], t_sin[c1_ind]), + L_add(Mpy_32_16_1(IZ2[i], t_sin[c2_ind]), + Mpy_32_16_1(RZ0[i], t_sin[s0_ind])))), + L_add(Mpy_32_16_1(RZ1[i], t_sin[s1_ind]), + Mpy_32_16_1(RZ2[-i], t_sin[s2_ind]))); + } + + for ( ; i < m / 2; i++, c0_ind = add(c0_ind, step2), s0_ind = sub(s0_ind, step2), c1_ind = add(c1_ind, step2), s1_ind = sub(s1_ind, step2), c2_ind = sub(c2_ind, step2), s2_ind = add(s2_ind, step2) ) + { + RY2[i] = L_sub(L_sub(Mpy_32_16_1(RZ1[i], t_sin[c1_ind]), + Mpy_32_16_1(RZ0[i], t_sin[c0_ind])), + L_sub(L_add(Mpy_32_16_1(RZ2[-i], t_sin[c2_ind]), + Mpy_32_16_1(IZ0[-i], t_sin[s0_ind])), + L_sub(Mpy_32_16_1(IZ1[-i], t_sin[s1_ind]), + Mpy_32_16_1(IZ2[i], t_sin[s2_ind])))); + IY2[-i] = L_add(L_sub(Mpy_32_16_1(IZ1[-i], t_sin[c1_ind]), + Mpy_32_16_1(IZ0[-i], t_sin[c0_ind])), + L_sub(L_add(Mpy_32_16_1(IZ2[i], t_sin[c2_ind]), + Mpy_32_16_1(RZ0[i], t_sin[s0_ind])), + L_add(Mpy_32_16_1(RZ1[i], t_sin[s1_ind]), + Mpy_32_16_1(RZ2[-i], t_sin[s2_ind])))); + } + + /* m/2 */ + RY2[i] = L_sub(L_sub(Mpy_32_16_1(RZ1[i], t_sin[c1_ind]), + Mpy_32_16_1(RZ0[i], t_sin[c0_ind])), + L_sub(L_add(Mpy_32_16_1(RZ2[-i], t_sin[c2_ind]), + Mpy_32_16_1(IZ0[-i], t_sin[s0_ind])), + L_sub(Mpy_32_16_1(IZ1[-i], t_sin[s1_ind]), + Mpy_32_16_1(IZ2[i], t_sin[s2_ind])))); + + /* Compute the inverse FFT for all 3 blocks. */ + ifft_rel_fx32( RY0, m, order ); + ifft_rel_fx32( RY1, m, order ); + ifft_rel_fx32( RY2, m, order ); + + y0 = RY0; + y1 = RY1; + y2 = RY2; + + /* Interlacing and scaling, scale = 1/3 */ + scale = (Word16)(0x2AAB); + FOR ( i = 0; i < n; ) + { + X[i++] = Mpy_32_16_1(( *y0++ ), scale); + X[i++] = Mpy_32_16_1(( *y1++ ), scale); + X[i++] = Mpy_32_16_1(( *y2++ ), scale); + } + + return; +} + + +static void rfft_post( + const Word16 *sine_table, + Word32 *buf, + const Word16 len ) +{ + Word32 tmp1, tmp2, tmp3, tmp4; + Word16 s, c; + Word16 i = 0; + + tmp1 = L_add(buf[0], buf[1]); + buf[1] = L_sub(buf[0], buf[1]); + buf[0] = tmp1; + + FOR ( i = 1; i <= ( len + 2 ) / 4; i++ ) + { + s = sine_table[i]; /* sin(pi*i/(len/2)) */ + c = sine_table[i + len / 4]; /* cos(pi*i/(len/2)) */ + + tmp1 = L_sub(buf[2 * i], buf[len - 2 * i]); + tmp2 = L_add(buf[2 * i + 1], buf[len - 2 * i + 1]); + tmp3 = L_sub(Mpy_32_16_1(tmp1, s), Mpy_32_16_1(tmp2, c)); /* real part of j*W(k,N)*[T(k) - T'(N-k)] */ + tmp4 = L_add(Mpy_32_16_1(tmp1, c), Mpy_32_16_1(tmp2, s)); /* imag part of j*W(k,N)*[T(k) - T'(N-k)] */ + tmp1 = L_add(buf[2 * i], buf[len - 2 * i]); + tmp2 = L_sub(buf[2 * i + 1], buf[len - 2 * i + 1]); + + buf[2 * i] = L_shr(L_sub(tmp1, tmp3), 1); + buf[2 * i + 1] = L_shr(L_sub(tmp2, tmp4), 1); + buf[len - 2 * i] = L_shr(L_add(tmp1, tmp3), 1); + buf[len - 2 * i + 1] = L_negate(L_shr(L_add(tmp2, tmp4), 1)); + } +} + +static void rfft_pre( + const Word16 *sine_table, + Word32 *buf, + const Word16 len ) +{ + const Word16 scale = (Word16)(1.0f / len * 32768.0); + Word32 tmp1, tmp2, tmp3, tmp4; + Word16 s, c; + Word16 i = 0; + + tmp1 = L_add(buf[0], buf[1]); + buf[1] = Mpy_32_16_1(L_sub(buf[0], buf[1]), scale); + buf[0] = Mpy_32_16_1(tmp1, scale); + + FOR ( i = 1; i <= ( len + 2 ) / 4; i++ ) + { + s = sine_table[i]; /* sin(pi*i/(len/2)) */ + c = sine_table[i + len / 4]; /* cos(pi*i/(len/2)) */ + + tmp1 = L_sub(buf[2 * i], buf[len - 2 * i]); + tmp2 = L_add(buf[2 * i + 1], buf[len - 2 * i + 1]); + tmp3 = L_add(Mpy_32_16_1(tmp1, s), Mpy_32_16_1(tmp2, c)); /* real part of j*W(k,N)*[T(k) - T'(N-k)] */ + tmp4 = L_sub(Mpy_32_16_1(tmp2, s), Mpy_32_16_1(tmp1, c)); /* imag part of j*W(k,N)*[T(k) - T'(N-k)] */ + tmp1 = L_add(buf[2 * i], buf[len - 2 * i]); + tmp2 = L_sub(buf[2 * i + 1], buf[len - 2 * i + 1]); + + buf[2 * i] = Mpy_32_16_1(L_add(tmp1, tmp3), scale); + buf[2 * i + 1] = L_negate(Mpy_32_16_1(L_add(tmp2, tmp4), scale)); + buf[len - 2 * i] = Mpy_32_16_1(L_sub(tmp1, tmp3), scale); + buf[len - 2 * i + 1] = Mpy_32_16_1(L_sub(tmp2, tmp4), scale); + } + + return; +} + +Word16 RFFTN_fx( + Word32 *data, + const Word16 *sine_table, + const Word16 len, + const Word16 sign ) +{ + assert( len <= 640 && len > 0 ); + + IF ( EQ_16(len, 640) ) + { + Word32 x[320], y[320]; + Word16 i; + + IF ( NE_16(sign, -1) ) + { + rfft_pre( sine_table, data, len ); + } + + FOR ( i = 0; i < 320; i++ ) + { + x[i] = data[2 * i]; + y[i] = data[2 * i + 1]; + } + DoRTFT320_fx( x, y ); + FOR ( i = 0; i < 320; i++ ) + { + data[2 * i] = x[i]; + data[2 * i + 1] = y[i]; + } + + IF ( EQ_16(sign, -1) ) + { + rfft_post( sine_table, data, len ); + } + } + ELSE + { + IF ( EQ_16(len, 512) ) + { + Word16 i; + const Word16 log2 = 9; + Word32 reordered_data[512]; + + IF ( EQ_16(sign, -1) ) + { + fft_rel_fx32( data, len, log2 ); + reordered_data[0] = data[0]; + reordered_data[1] = data[len / 2]; + FOR ( i = 1; i < len / 2; i++ ) + { + reordered_data[2 * i] = data[i]; + reordered_data[2 * i + 1] = data[len - i]; + } + } + ELSE + { + reordered_data[0] = data[0]; + reordered_data[len / 2] = data[1]; + FOR ( i = 1; i < len / 2; i++ ) + { + reordered_data[i] = data[2 * i]; + reordered_data[len - i] = data[2 * i + 1]; + } + ifft_rel_fx32( reordered_data, len, log2 ); + } + Copy32( reordered_data, data, len ); + } + ELSE + { + assert( !"Not supported FFT length!" ); + } + } + + return 0; +} + static void butterfly( const Word32 a, const Word32 b, @@ -2458,30 +2997,16 @@ static const Word16 C53 = 0x678D; /* 0.809016994374947f cos( PI/5); */ static const Word16 C54 = 0x4B3D; /* 0.587785252292473f sin( PI/5); */ static void fft5( - Word32 *pInOut ) + cmplx *pInOut ) { - Word32 re1, im1; - Word32 re2, im2; - Word32 re3, im3; - Word32 re4, im4; - Word32 re5, im5; + cmplx x[5]; + cmplx t[4]; - Word32 tmp1, tmp2; - Word32 tmp3, tmp4; - Word32 tmp5, tmp6; - Word32 tmp7, tmp8; - - - re1 = pInOut[0]; - im1 = pInOut[1]; - re2 = pInOut[2]; - im2 = pInOut[3]; - re3 = pInOut[4]; - im3 = pInOut[5]; - re4 = pInOut[6]; - im4 = pInOut[7]; - re5 = pInOut[8]; - im5 = pInOut[9]; + x[0] = pInOut[0]; + x[1] = pInOut[1]; + x[2] = pInOut[2]; + x[3] = pInOut[3]; + x[4] = pInOut[4]; /* 1.0000 1.0000 1.0000 1.0000 1.0000 @@ -2491,27 +3016,18 @@ static void fft5( 1.0000 -0.8090 + 0.5878i 0.3090 - 0.9511i 0.3090 + 0.9511i -0.8090 - 0.5878i 1.0000 0.3090 + 0.9511i -0.8090 + 0.5878i -0.8090 - 0.5878i 0.3090 - 0.9511i */ - tmp1 = L_add( re2, re5 ); - tmp2 = L_sub( re2, re5 ); - tmp3 = L_add( im2, im5 ); - tmp4 = L_sub( im2, im5 ); - tmp5 = L_add( re3, re4 ); - tmp6 = L_sub( re3, re4 ); - tmp7 = L_add( im3, im4 ); - tmp8 = L_sub( im3, im4 ); - - - pInOut[0] = L_add( re1, L_add( tmp1, tmp5 ) ); - pInOut[1] = L_add( im1, L_add( tmp3, tmp7 ) ); - - pInOut[2] = re1 + Mpy_32_16_1( tmp1, C51 ) - Mpy_32_16_1( tmp5, C53 ) + Mpy_32_16_1( tmp4, C52 ) + Mpy_32_16_1( tmp8, C54 ); - pInOut[8] = re1 + Mpy_32_16_1( tmp1, C51 ) - Mpy_32_16_1( tmp5, C53 ) - Mpy_32_16_1( tmp4, C52 ) - Mpy_32_16_1( tmp8, C54 ); - pInOut[3] = im1 - Mpy_32_16_1( tmp2, C52 ) - Mpy_32_16_1( tmp6, C54 ) + Mpy_32_16_1( tmp3, C51 ) - Mpy_32_16_1( tmp7, C53 ); - pInOut[9] = im1 + Mpy_32_16_1( tmp2, C52 ) + Mpy_32_16_1( tmp6, C54 ) + Mpy_32_16_1( tmp3, C51 ) - Mpy_32_16_1( tmp7, C53 ); - pInOut[4] = re1 - Mpy_32_16_1( tmp1, C53 ) + Mpy_32_16_1( tmp5, C51 ) + Mpy_32_16_1( tmp4, C54 ) - Mpy_32_16_1( tmp8, C52 ); - pInOut[6] = re1 - Mpy_32_16_1( tmp1, C53 ) + Mpy_32_16_1( tmp5, C51 ) - Mpy_32_16_1( tmp4, C54 ) + Mpy_32_16_1( tmp8, C52 ); - pInOut[5] = im1 - Mpy_32_16_1( tmp2, C54 ) + Mpy_32_16_1( tmp6, C52 ) - Mpy_32_16_1( tmp3, C53 ) + Mpy_32_16_1( tmp7, C51 ); - pInOut[7] = im1 + Mpy_32_16_1( tmp2, C54 ) - Mpy_32_16_1( tmp6, C52 ) - Mpy_32_16_1( tmp3, C53 ) + Mpy_32_16_1( tmp7, C51 ); + t[0] = CL_add( x[1], x[4] ); + t[1] = CL_sub( x[1], x[4] ); + t[2] = CL_add( x[2], x[3] ); + t[3] = CL_sub( x[2], x[3] ); + + + pInOut[0] = CL_add( x[0], CL_add( t[0], t[2] ) ); + + pInOut[1] = CL_add( CL_add( x[0], CL_sub( CL_scale( t[0], C51 ), CL_scale( t[2], C53 ) ) ), CL_add( CL_conjugate( CL_scale( CL_swap_real_imag( t[1] ), C52 ) ), CL_conjugate( CL_scale( CL_swap_real_imag( t[3] ), C54 ) ) ) ); + pInOut[4] = CL_add( x[0], CL_sub( CL_scale( t[0], C51 ), CL_add( CL_scale( t[2], C53 ), CL_add( CL_conjugate( CL_scale( CL_swap_real_imag( t[1] ), C52 ) ), CL_conjugate( CL_scale( CL_swap_real_imag( t[3] ), C54 ) ) ) ) ) ); + pInOut[2] = CL_add( CL_sub( x[0], CL_scale( t[0], C53 ) ), CL_add( CL_scale( t[2], C51 ), CL_sub( CL_conjugate( CL_scale( CL_swap_real_imag( t[1] ), C54 ) ), CL_conjugate( CL_scale( CL_swap_real_imag( t[3] ), C52 ) ) ) ) ); + pInOut[3] = CL_add( CL_sub( x[0], CL_scale( t[0], C53 ) ), CL_add( CL_sub( CL_scale( t[2], C51 ), CL_conjugate( CL_scale( CL_swap_real_imag( t[1] ), C54 ) ) ), CL_conjugate( CL_scale( CL_swap_real_imag( t[3] ), C52 ) ) ) ); return; } @@ -2582,6 +3098,7 @@ static void nextFFT( Word32 *x, const Word16 length ) { + cmplx val[5]; SWITCH ( length ) { case 2: @@ -2594,7 +3111,17 @@ static void nextFFT( fft4( x ); BREAK; case 5: - fft5( x ); + FOR ( Word32 i = 0; i < 5; i++ ) + { + val[i].re = x[2 * i]; + val[i].im = x[2 * i + 1]; + } + fft5( val ); + FOR ( Word32 i = 0; i < 5; i++ ) + { + x[2 * i] = val[i].re; + x[2 * i + 1] = val[i].im; + } BREAK; case 8: fft8_2( x ); @@ -2670,6 +3197,7 @@ static void cooleyTukeyFFT( Word16 n1, n2; Word16 cnt = 0; Word32 *src, *dest; + cmplx val[5]; SWITCH ( length ) { @@ -2685,7 +3213,17 @@ static void cooleyTukeyFFT( fft4( x ); BREAK; case 5: - fft5( x ); + FOR ( i = 0; i < 5; i++ ) + { + val[i].re = x[2 * i]; + val[i].im = x[2 * i + 1]; + } + fft5( val ); + FOR ( i = 0; i < 5; i++ ) + { + x[2 * i] = val[i].re; + x[2 * i + 1] = val[i].im; + } BREAK; case 8: fft8_2( x ); @@ -2693,7 +3231,7 @@ static void cooleyTukeyFFT( default: { factor = findFactor( length ); - IF ( factor > 0 && ( length / factor > 1 ) ) + IF ( GT_16(factor, 0) && GT_16( length / factor, 1 ) ) { n1 = factor; n2 = length / factor; @@ -2773,7 +3311,7 @@ static void pfaDFT( Word16 i, ii; Word16 cnt; - IF ( numFactors > 1 ) + IF ( GT_16(numFactors, 1) ) { Word32 *tmp = scratch1; Word16 n1_inv = 1, n2_inv = 1; @@ -2800,7 +3338,7 @@ static void pfaDFT( tmp[cnt++] = x[2 * idx + 1]; idx += incr; - IF ( idx > length ) + IF ( GT_16(idx, length) ) { idx -= length; } @@ -2835,7 +3373,7 @@ static void pfaDFT( tmp[2 * idx] = x[cnt++]; tmp[2 * idx + 1] = x[cnt++]; idx += n2; - IF ( idx > length ) + IF ( GT_16(idx, length) ) { idx -= length; } @@ -3030,2327 +3568,1228 @@ void DoFFT_fx( *-----------------------------------------------------------------*/ static void fft_len5( - Word32 *re, - Word32 *im, - const Word16 s ) + cmplx *x ) { - Word32 x0, x1, x2, x3, x4; - Word32 r1, r2, r3, r4; - Word32 s1, s2, s3, s4; - Word32 t; - - x0 = re[s * 0]; - x1 = re[s * 1]; - x2 = re[s * 2]; - x3 = re[s * 3]; - x4 = re[s * 4]; - - r1 = L_add( x1, x4 ); - r4 = L_sub( x1, x4 ); - r3 = L_add( x2, x3 ); - r2 = L_sub( x2, x3 ); - t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 ); - r1 = L_add( r1, r3 ); - re[0] = L_add( x0, r1 ); - - r1 = L_add( re[0], L_shl(Mpy_32_16_1( r1, FFT_C55 ), 1) ); - r3 = L_sub( r1, t ); - r1 = L_add( r1, t ); - t = Mpy_32_16_1(L_add( r4, r2 ), FFT_C51 ); - - r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) ); - r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) ); - - x0 = im[s * 0]; - x1 = im[s * 1]; - x2 = im[s * 2]; - x3 = im[s * 3]; - x4 = im[s * 4]; - - s1 = L_add( x1, x4 ); - s4 = L_sub( x1, x4 ); - s3 = L_add( x2, x3 ); - s2 = L_sub( x2, x3 ); - t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 ); - s1 = L_add( s1, s3 ); - im[0] = L_add( x0, s1 ); - - s1 = L_add( im[0], L_shl(Mpy_32_16_1( s1, FFT_C55 ), 1) ); - s3 = L_sub( s1, t ); - s1 = L_add( s1, t ); - t = Mpy_32_16_1(L_add( s4, s2 ), FFT_C51 ); - - s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) ); - s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) ); - - re[s * 1] = L_add( r1, s2 ); - re[s * 4] = L_sub( r1, s2 ); - re[s * 2] = L_sub( r3, s4 ); - re[s * 3] = L_add( r3, s4 ); - - im[s * 1] = L_sub( s1, r2 ); - im[s * 4] = L_add( s1, r2 ); - im[s * 2] = L_add( s3, r4 ); - im[s * 3] = L_sub( s3, r4 ); + cmplx y1, y2, y3, y4; + cmplx t; + + y1 = CL_add( x[1], x[4] ); + y4 = CL_sub( x[1], x[4] ); + y3 = CL_add( x[2], x[3] ); + y2 = CL_sub( x[2], x[3] ); + t = CL_scale( CL_sub( y1, y3 ), FFT_C54 ); + y1 = CL_add( y1, y3 ); + x[0] = CL_add( x[0], y1 ); + + y1 = CL_add( x[0], CL_shl( CL_scale( y1, FFT_C55 ), 1 ) ); + y3 = CL_sub( y1, t ); + y1 = CL_add( y1, t ); + t = CL_scale( CL_add( y4, y2 ), FFT_C51 ); + + y4 = CL_add( t, CL_shl( CL_scale( y4, FFT_C52 ), 1 ) ); + y2 = CL_add( t, CL_scale( y2, FFT_C53 ) ); + + x[1] = CL_msu_j( y1, y2 ); + x[4] = CL_mac_j( y1, y2 ); + x[2] = CL_mac_j( y3, y4 ); + x[3] = CL_msu_j( y3, y4 ); return; } static void fft_len8( - Word32 *re, - Word32 *im, - const Word16 s ) + cmplx *x ) { - Word32 x00, x01, x02, x03, x04, x05, x06, x07; - Word32 x08, x09, x10, x11, x12, x13, x14, x15; - Word32 t00, t01, t02, t03, t04, t05, t06, t07; - Word32 t08, t09, t10, t11, t12, t13, t14, t15; - Word32 s00, s01, s02, s03, s04, s05, s06, s07; - Word32 s08, s09, s10, s11, s12, s13, s14, s15; - - x00 = re[s * 0]; - x01 = im[s * 0]; - x02 = re[s * 1]; - x03 = im[s * 1]; - x04 = re[s * 2]; - x05 = im[s * 2]; - x06 = re[s * 3]; - x07 = im[s * 3]; - x08 = re[s * 4]; - x09 = im[s * 4]; - x10 = re[s * 5]; - x11 = im[s * 5]; - x12 = re[s * 6]; - x13 = im[s * 6]; - x14 = re[s * 7]; - x15 = im[s * 7]; - - t00 = L_add( x00, x08 ); - t02 = L_sub( x00, x08 ); - t01 = L_add( x01, x09 ); - t03 = L_sub( x01, x09 ); - t04 = L_add( x02, x10 ); - t06 = L_sub( x02, x10 ); - t05 = L_add( x03, x11 ); - t07 = L_sub( x03, x11 ); - t08 = L_add( x04, x12 ); - t10 = L_sub( x04, x12 ); - t09 = L_add( x05, x13 ); - t11 = L_sub( x05, x13 ); - t12 = L_add( x06, x14 ); - t14 = L_sub( x06, x14 ); - t13 = L_add( x07, x15 ); - t15 = L_sub( x07, x15 ); - - s00 = L_add( t00, t08 ); - s04 = L_sub( t00, t08 ); - s01 = L_add( t01, t09 ); - s05 = L_sub( t01, t09 ); - s08 = L_sub( t02, t11 ); - s10 = L_add( t02, t11 ); - s09 = L_add( t03, t10 ); - s11 = L_sub( t03, t10 ); - s02 = L_add( t04, t12 ); - s07 = L_sub( t04, t12 ); - s03 = L_add( t05, t13 ); - s06 = L_sub( t13, t05 ); - - t01 = L_add( t06, t14 ); - t02 = L_sub( t06, t14 ); - t00 = L_add( t07, t15 ); - t03 = L_sub( t07, t15 ); - - s12 = Mpy_32_16_1( L_add( t00, t02 ), FFT_C81 ); - s14 = Mpy_32_16_1( L_sub( t00, t02 ), FFT_C81 ); - s13 = Mpy_32_16_1( L_sub( t03, t01 ), FFT_C81 ); - s15 = Mpy_32_16_1( L_add( t01, t03 ), FFT_C82 ); - - re[s * 0] = L_add( s00, s02 ); - re[s * 4] = L_sub( s00, s02 ); - im[s * 0] = L_add( s01, s03 ); - im[s * 4] = L_sub( s01, s03 ); - re[s * 2] = L_sub( s04, s06 ); - re[s * 6] = L_add( s04, s06 ); - im[s * 2] = L_sub( s05, s07 ); - im[s * 6] = L_add( s05, s07 ); - re[s * 3] = L_add( s08, s14 ); - re[s * 7] = L_sub( s08, s14 ); - im[s * 3] = L_add( s09, s15 ); - im[s * 7] = L_sub( s09, s15 ); - re[s * 1] = L_add( s10, s12 ); - re[s * 5] = L_sub( s10, s12 ); - im[s * 1] = L_add( s11, s13 ); - im[s * 5] = L_sub( s11, s13 ); + cmplx t[8], s[8]; + + t[0] = CL_add( x[0], x[4] ); + t[1] = CL_sub( x[0], x[4] ); + t[2] = CL_add( x[1], x[5] ); + t[3] = CL_sub( x[1], x[5] ); + t[4] = CL_add( x[2], x[6] ); + t[5] = CL_sub( x[2], x[6] ); + t[6] = CL_add( x[3], x[7] ); + t[7] = CL_sub( x[3], x[7] ); + + s[0] = CL_add( t[0], t[4] ); + s[2] = CL_sub( t[0], t[4] ); + s[4] = CL_mac_j( t[1], t[5] ); + s[5] = CL_msu_j( t[1], t[5] ); + s[1] = CL_add( t[2], t[6] ); + s[3] = CL_swap_real_imag( CL_sub( CL_conjugate( t[2] ), CL_conjugate( t[6] ) ) ); + + t[0] = CL_swap_real_imag( CL_add( t[3], t[7] ) ); + t[1] = CL_sub( t[3], t[7] ); + + s[6] = CL_scale( CL_add( CL_conjugate( t[0] ), t[1] ), FFT_C81 ); + s[7] = CL_scale( CL_sub( t[0], CL_conjugate( t[1] ) ), FFT_C81 ); + s[7] = CL_conjugate( s[7] ); + + x[0] = CL_add( s[0], s[1] ); + x[4] = CL_sub( s[0], s[1] ); + x[2] = CL_sub( s[2], s[3] ); + x[6] = CL_add( s[2], s[3] ); + x[3] = CL_add( s[4], s[7] ); + x[7] = CL_sub( s[4], s[7] ); + x[1] = CL_add( s[5], s[6] ); + x[5] = CL_sub( s[5], s[6] ); return; } static void fft_len10( - Word32 *re, - Word32 *im, - const Word16 s ) + cmplx *x ) { - Word32 t; - Word32 x0, x1, x2, x3, x4; - Word32 r1, r2, r3, r4; - Word32 s1, s2, s3, s4; - Word32 y00, y01, y02, y03, y04, y05, y06, y07, y08, y09; - Word32 y10, y11, y12, y13, y14, y15, y16, y17, y18, y19; - - x0 = re[s * 0]; - x1 = re[s * 2]; - x2 = re[s * 4]; - x3 = re[s * 6]; - x4 = re[s * 8]; - - r1 = L_add( x3, x2 ); - r4 = L_sub( x3, x2 ); - r3 = L_add( x1, x4 ); - r2 = L_sub( x1, x4 ); - t = Mpy_32_16_1(L_sub( r1, r3 ), FFT_C54 ); - r1 = L_add( r1, r3 ); - y00 = L_add( x0, r1 ); - r1 = L_add( y00, L_shl(Mpy_32_16_1(r1, FFT_C55), 1)); - r3 = L_sub( r1, t ); - r1 = L_add( r1, t ); - t = Mpy_32_16_1( L_add( r4, r2 ) , FFT_C51 ); - r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) ); - r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) ); - - x0 = im[s * 0]; - x1 = im[s * 2]; - x2 = im[s * 4]; - x3 = im[s * 6]; - x4 = im[s * 8]; - - s1 = L_add( x3, x2 ); - s4 = L_sub( x3, x2 ); - s3 = L_add( x1, x4 ); - s2 = L_sub( x1, x4 ); - t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 ); - s1 = L_add( s1, s3 ); - y01 = L_add( x0, s1 ); - s1 = L_add( y01, L_shl(Mpy_32_16_1( s1, FFT_C55 ), 1) ); - s3 = L_sub( s1, t ); - s1 = L_add( s1, t ); - t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 ); - s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) ); - s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) ); - - y04 = L_add( r1, s2 ); - y16 = L_sub( r1, s2 ); - y08 = L_sub( r3, s4 ); - y12 = L_add( r3, s4 ); - - y05 = L_sub( s1, r2 ); - y17 = L_add( s1, r2 ); - y09 = L_add( s3, r4 ); - y13 = L_sub( s3, r4 ); - - x0 = re[s * 5]; - x1 = re[s * 1]; - x2 = re[s * 3]; - x3 = re[s * 7]; - x4 = re[s * 9]; - - r1 = L_add( x1, x4 ); - r4 = L_sub( x1, x4 ); - r3 = L_add( x3, x2 ); - r2 = L_sub( x3, x2 ); - t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 ); - r1 = L_add( r1, r3 ); - y02 = L_add( x0, r1 ); - r1 = L_add( y02, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) ); - r3 = L_sub( r1, t ); - r1 = L_add( r1, t ); - t = Mpy_32_16_1(L_add( r4, r2 ), FFT_C51 ); - r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) ); - r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) ); - - x0 = im[s * 5]; - x1 = im[s * 1]; - x2 = im[s * 3]; - x3 = im[s * 7]; - x4 = im[s * 9]; - - s1 = L_add( x1, x4 ); - s4 = L_sub( x1, x4 ); - s3 = L_add( x3, x2 ); - s2 = L_sub( x3, x2 ); - t = Mpy_32_16_1(L_sub( s1, s3 ), FFT_C54 ); - s1 = L_add( s1, s3 ); - y03 = L_add( x0, s1 ); - s1 = L_add( y03, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) ); - s3 = L_sub( s1, t ); - s1 = L_add( s1, t ); - t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 ); - s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) ); - s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) ); - - y06 = L_add( r1, s2 ); - y18 = L_sub( r1, s2 ); - y10 = L_sub( r3, s4 ); - y14 = L_add( r3, s4 ); - - y07 = L_sub( s1, r2 ); - y19 = L_add( s1, r2 ); - y11 = L_add( s3, r4 ); - y15 = L_sub( s3, r4 ); - - re[s * 0] = L_add( y00, y02 ); - im[s * 0] = L_add( y01, y03 ); - re[s * 5] = L_sub( y00, y02 ); - im[s * 5] = L_sub( y01, y03 ); - - re[s * 2] = L_add( y04, y06 ); - im[s * 2] = L_add( y05, y07 ); - re[s * 7] = L_sub( y04, y06 ); - im[s * 7] = L_sub( y05, y07 ); - - re[s * 4] = L_add( y08, y10 ); - im[s * 4] = L_add( y09, y11 ); - re[s * 9] = L_sub( y08, y10 ); - im[s * 9] = L_sub( y09, y11 ); - - re[s * 6] = L_add( y12, y14 ); - im[s * 6] = L_add( y13, y15 ); - re[s * 1] = L_sub( y12, y14 ); - im[s * 1] = L_sub( y13, y15 ); - - re[s * 8] = L_add( y16, y18 ); - im[s * 8] = L_add( y17, y19 ); - re[s * 3] = L_sub( y16, y18 ); - im[s * 3] = L_sub( y17, y19 ); + cmplx t; + cmplx s[4]; + cmplx y[10]; + + s[0] = CL_add( x[6], x[4] ); + s[3] = CL_sub( x[6], x[4] ); + s[2] = CL_add( x[2], x[8] ); + s[1] = CL_sub( x[2], x[8] ); + t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); + s[0] = CL_add( s[0], s[2] ); + y[0] = CL_add( x[0], s[0] ); + s[0] = CL_add( y[0], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + s[2] = CL_sub( s[0], t ); + s[0] = CL_add( s[0], t ); + t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); + s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + + y[2] = CL_msu_j( s[0], s[1] ); + y[8] = CL_mac_j( s[0], s[1] ); + y[4] = CL_mac_j( s[2], s[3] ); + y[6] = CL_msu_j( s[2], s[3] ); + + s[0] = CL_add( x[1], x[9] ); + s[3] = CL_sub( x[1], x[9] ); + s[2] = CL_add( x[7], x[3] ); + s[1] = CL_sub( x[7], x[3] ); + t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); + s[0] = CL_add( s[0], s[2] ); + y[1] = CL_add( x[5], s[0] ); + s[0] = CL_add( y[1], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + s[2] = CL_sub( s[0], t ); + s[0] = CL_add( s[0], t ); + t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); + s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + + y[3] = CL_msu_j( s[0], s[1] ); + y[9] = CL_mac_j( s[0], s[1] ); + y[5] = CL_mac_j( s[2], s[3] ); + y[7] = CL_msu_j( s[2], s[3] ); + + + x[0] = CL_add( y[0], y[1] ); + x[5] = CL_sub( y[0], y[1] ); + + x[2] = CL_add( y[2], y[3] ); + x[7] = CL_sub( y[2], y[3] ); + + x[4] = CL_add( y[4], y[5] ); + x[9] = CL_sub( y[4], y[5] ); + + x[6] = CL_add( y[6], y[7] ); + x[1] = CL_sub( y[6], y[7] ); + + x[8] = CL_add( y[8], y[9] ); + x[3] = CL_sub( y[8], y[9] ); return; } static void fft_len15( - Word32 *re, - Word32 *im, - const Word16 s ) + cmplx *x ) { - Word32 t; - Word32 r1, r2, r3, r4; - Word32 s1, s2, s3, s4; - Word32 x00, x01, x02, x03, x04, x05, x06, x07, x08, x09; - Word32 x10, x11, x12, x13, x14, x15, x16, x17, x18, x19; - Word32 x20, x21, x22, x23, x24, x25, x26, x27, x28, x29; - Word32 y00, y01, y02, y03, y04, y05, y06, y07, y08, y09; - Word32 y10, y11, y12, y13, y14, y15, y16, y17, y18, y19; - Word32 y20, y21, y22, y23, y24, y25, y26, y27, y28, y29; - - x00 = re[s * 0]; - x01 = im[s * 0]; - x02 = re[s * 3]; - x03 = im[s * 3]; - x04 = re[s * 6]; - x05 = im[s * 6]; - x06 = re[s * 9]; - x07 = im[s * 9]; - x08 = re[s * 12]; - x09 = im[s * 12]; - - x10 = re[s * 5]; - x11 = im[s * 5]; - x12 = re[s * 8]; - x13 = im[s * 8]; - x14 = re[s * 11]; - x15 = im[s * 11]; - x16 = re[s * 14]; - x17 = im[s * 14]; - x18 = re[s * 2]; - x19 = im[s * 2]; - - x20 = re[s * 10]; - x21 = im[s * 10]; - x22 = re[s * 13]; - x23 = im[s * 13]; - x24 = re[s * 1]; - x25 = im[s * 1]; - x26 = re[s * 4]; - x27 = im[s * 4]; - x28 = re[s * 7]; - x29 = im[s * 7]; - - r1 = L_add( x02, x08 ); - r4 = L_sub( x02, x08 ); - r3 = L_add( x04, x06 ); - r2 = L_sub( x04, x06 ); - t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 ); - r1 = L_add( r1, r3 ); - y00 = L_add( x00, r1 ); - r1 = L_add( y00, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) ); - r3 = L_sub( r1, t ); - r1 = L_add( r1, t ); - t = Mpy_32_16_1( L_add( r4, r2 ), FFT_C51 ); - r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) ); - r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) ); - - s1 = L_add( x03, x09 ); - s4 = L_sub( x03, x09 ); - s3 = L_add( x05, x07 ); - s2 = L_sub( x05, x07 ); - t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 ); - s1 = L_add( s1, s3 ); - y01 = L_add( x01, s1 ); - s1 = L_add( y01, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) ); - s3 = L_sub( s1, t ); - s1 = L_add( s1, t ); - t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 ); - s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) ); - s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) ); - - y02 = L_add( r1, s2 ); - y08 = L_sub( r1, s2 ); - y04 = L_sub( r3, s4 ); - y06 = L_add( r3, s4 ); - - y03 = L_sub( s1, r2 ); - y09 = L_add( s1, r2 ); - y05 = L_add( s3, r4 ); - y07 = L_sub( s3, r4 ); - - r1 = L_add( x12, x18 ); - r4 = L_sub( x12, x18 ); - r3 = L_add( x14, x16 ); - r2 = L_sub( x14, x16 ); - t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 ); - r1 = L_add( r1, r3 ); - y10 = L_add( x10, r1 ); - r1 = L_add( y10, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) ); - r3 = L_sub( r1, t ); - r1 = L_add( r1, t ); - t = Mpy_32_16_1( L_add( r4, r2 ), FFT_C51 ); - r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) ); - r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) ); - - s1 = L_add( x13, x19 ); - s4 = L_sub( x13, x19 ); - s3 = L_add( x15, x17 ); - s2 = L_sub( x15, x17 ); - t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 ); - s1 = L_add( s1, s3 ); - y11 = L_add( x11, s1 ); - s1 = L_add( y11, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) ); - s3 = L_sub( s1, t ); - s1 = L_add( s1, t ); - t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 ); - s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) ); - s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) ); - - y12 = L_add( r1, s2 ); - y18 = L_sub( r1, s2 ); - y14 = L_sub( r3, s4 ); - y16 = L_add( r3, s4 ); - - y13 = L_sub( s1, r2 ); - y19 = L_add( s1, r2 ); - y15 = L_add( s3, r4 ); - y17 = L_sub( s3, r4 ); - - r1 = L_add( x22, x28 ); - r4 = L_sub( x22, x28 ); - r3 = L_add( x24, x26 ); - r2 = L_sub( x24, x26 ); - t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 ); - r1 = L_add( r1, r3 ); - y20 = L_add( x20, r1 ); - r1 = L_add( y20, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) ); - r3 = L_sub( r1, t ); - r1 = L_add( r1, t ); - t = Mpy_32_16_1( L_add( r4, r2 ), FFT_C51 ); - r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) ); - r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) ); - - s1 = L_add( x23, x29 ); - s4 = L_sub( x23, x29 ); - s3 = L_add( x25, x27 ); - s2 = L_sub( x25, x27 ); - t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 ); - s1 = L_add( s1, s3 ); - y21 = L_add( x21, s1 ); - s1 = L_add( y21, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) ); - s3 = L_sub( s1, t ); - s1 = L_add( s1, t ); - t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 ); - s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) ); - s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) ); - - y22 = L_add( r1, s2 ); - y28 = L_sub( r1, s2 ); - y24 = L_sub( r3, s4 ); - y26 = L_add( r3, s4 ); - - y23 = L_sub( s1, r2 ); - y29 = L_add( s1, r2 ); - y25 = L_add( s3, r4 ); - y27 = L_sub( s3, r4 ); - - r1 = L_add( y10, y20 ); - r2 = Mpy_32_16_1(L_sub( y10, y20 ), FFT_C31 ); - re[s * 0] = L_add( y00, r1 ); - r1 = L_sub( y00, L_shr(r1, 1)); - - s1 = L_add( y11, y21 ); - s2 = Mpy_32_16_1( L_sub( y11, y21 ), FFT_C31 ); - im[s * 0] = L_add( y01, s1 ); - s1 = L_sub( y01, L_shr(s1, 1) ); - - re[s * 10] = L_sub( r1, s2 ); - re[s * 5] = L_add( r1, s2 ); - im[s * 10] = L_add( s1, r2 ); - im[s * 5] = L_sub( s1, r2 ); - - r1 = L_add( y12, y22 ); - r2 = Mpy_32_16_1(L_sub( y12, y22 ), FFT_C31 ); - re[s * 6] = L_add( y02, r1 ); - r1 = L_sub( y02, L_shr(r1, 1) ); - - s1 = L_add( y13, y23 ); - s2 = Mpy_32_16_1( L_sub( y13, y23 ), FFT_C31 ); - im[s * 6] = L_add( y03, s1 ); - s1 = L_sub( y03, L_shr(s1, 1) ); - - re[s * 1] = L_sub( r1, s2 ); - re[s * 11] = L_add( r1, s2 ); - im[s * 1] = L_add( s1, r2 ); - im[s * 11] = L_sub( s1, r2 ); - - r1 = L_add( y14, y24 ); - r2 = Mpy_32_16_1(L_sub( y14, y24 ), FFT_C31 ); - re[s * 12] = L_add( y04, r1 ); - r1 = L_sub( y04, L_shr(r1, 1) ); - - s1 = L_add( y15, y25 ); - s2 = Mpy_32_16_1( L_sub( y15, y25 ), FFT_C31 ); - im[s * 12] = L_add( y05, s1 ); - s1 = L_sub( y05, L_shr(s1, 1) ); - - re[s * 7] = L_sub( r1, s2 ); - re[s * 2] = L_add( r1, s2 ); - im[s * 7] = L_add( s1, r2 ); - im[s * 2] = L_sub( s1, r2 ); - - r1 = L_add( y16, y26 ); - r2 = Mpy_32_16_1( L_sub( y16, y26 ), FFT_C31 ); - re[s * 3] = L_add( y06, r1 ); - r1 = L_sub( y06, L_shr(r1, 1) ); - - s1 = L_add( y17, y27 ); - s2 = Mpy_32_16_1(L_sub( y17, y27 ), FFT_C31 ); - im[s * 3] = L_add( y07, s1 ); - s1 = L_sub( y07, L_shr(s1, 1) ); - - re[s * 13] = L_sub( r1, s2 ); - re[s * 8] = L_add( r1, s2 ); - im[s * 13] = L_add( s1, r2 ); - im[s * 8] = L_sub( s1, r2 ); - - r1 = L_add( y18, y28 ); - r2 = Mpy_32_16_1( L_sub( y18, y28 ), FFT_C31 ); - re[s * 9] = L_add( y08, r1 ); - r1 = L_sub( y08, L_shr(r1, 1) ); - - s1 = L_add( y19, y29 ); - s2 = Mpy_32_16_1( L_sub( y19, y29 ), FFT_C31 ); - im[s * 9] = L_add( y09, s1 ); - s1 = L_sub( y09, L_shr(s1, 1)); - - re[s * 4] = L_sub( r1, s2 ); - re[s * 14] = L_add( r1, s2 ); - im[s * 4] = L_add( s1, r2 ); - im[s * 14] = L_sub( s1, r2 ); + cmplx t; + cmplx s[5]; + cmplx y[15]; + + s[0] = CL_add( x[3], x[12] ); + s[3] = CL_sub( x[3], x[12] ); + s[2] = CL_add( x[6], x[9] ); + s[1] = CL_sub( x[6], x[9] ); + t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); + s[0] = CL_add( s[0], s[2] ); + y[0] = CL_add( x[0], s[0] ); + s[0] = CL_add( y[0], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + s[2] = CL_sub( s[0], t ); + s[0] = CL_add( s[0], t ); + t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); + s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + + y[1] = CL_msu_j( s[0], s[1] ); + y[4] = CL_mac_j( s[0], s[1] ); + y[2] = CL_mac_j( s[2], s[3] ); + y[3] = CL_msu_j( s[2], s[3] ); + + s[0] = CL_add( x[8], x[2] ); + s[3] = CL_sub( x[8], x[2] ); + s[2] = CL_add( x[11], x[14] ); + s[1] = CL_sub( x[11], x[14] ); + t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); + s[0] = CL_add( s[0], s[2] ); + y[5] = CL_add( x[5], s[0] ); + s[0] = CL_add( y[5], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + s[2] = CL_sub( s[0], t ); + s[0] = CL_add( s[0], t ); + t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); + s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + + y[6] = CL_msu_j( s[0], s[1] ); + y[9] = CL_mac_j( s[0], s[1] ); + y[7] = CL_mac_j( s[2], s[3] ); + y[8] = CL_msu_j( s[2], s[3] ); + + s[0] = CL_add( x[13], x[7] ); + s[3] = CL_sub( x[13], x[7] ); + s[2] = CL_add( x[1], x[4] ); + s[1] = CL_sub( x[1], x[4] ); + t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); + s[0] = CL_add( s[0], s[2] ); + y[10] = CL_add( x[10], s[0] ); + s[0] = CL_add( y[10], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + s[2] = CL_sub( s[0], t ); + s[0] = CL_add( s[0], t ); + t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); + s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + + y[11] = CL_msu_j( s[0], s[1] ); + y[14] = CL_mac_j( s[0], s[1] ); + y[12] = CL_mac_j( s[2], s[3] ); + y[13] = CL_msu_j( s[2], s[3] ); + + s[0] = CL_add( y[5], y[10] ); + s[1] = CL_scale( CL_sub( y[5], y[10] ), FFT_C31 ); + x[0] = CL_add( y[0], s[0] ); + s[0] = CL_sub( y[0], CL_shr( s[0], 1 ) ); + + x[10] = CL_mac_j( s[0], s[1] ); + x[5] = CL_msu_j( s[0], s[1] ); + + s[0] = CL_add( y[6], y[11] ); + s[1] = CL_scale( CL_sub( y[6], y[11] ), FFT_C31 ); + x[6] = CL_add( y[1], s[0] ); + s[0] = CL_sub( y[1], CL_shr( s[0], 1 ) ); + + x[1] = CL_mac_j( s[0], s[1] ); + x[11] = CL_msu_j( s[0], s[1] ); + + s[0] = CL_add( y[7], y[12] ); + s[1] = CL_scale( CL_sub( y[7], y[12] ), FFT_C31 ); + x[12] = CL_add( y[2], s[0] ); + s[0] = CL_sub( y[2], CL_shr( s[0], 1 ) ); + + x[7] = CL_mac_j( s[0], s[1] ); + x[2] = CL_msu_j( s[0], s[1] ); + + s[0] = CL_add( y[8], y[13] ); + s[1] = CL_scale( CL_sub( y[8], y[13] ), FFT_C31 ); + x[3] = CL_add( y[3], s[0] ); + s[0] = CL_sub( y[3], CL_shr( s[0], 1 ) ); + + x[13] = CL_mac_j( s[0], s[1] ); + x[8] = CL_msu_j( s[0], s[1] ); + + s[0] = CL_add( y[9], y[14] ); + s[1] = CL_scale( CL_sub( y[9], y[14] ), FFT_C31 ); + x[9] = CL_add( y[4], s[0] ); + s[0] = CL_sub( y[4], CL_shr( s[0], 1 ) ); + + x[4] = CL_mac_j( s[0], s[1] ); + x[14] = CL_msu_j( s[0], s[1] ); return; } static void fft_len16( - Word32 *re, - Word32 *im, - const Word16 s ) + cmplx *x ) { - Word32 x0, x1, x2, x3, x4, x5, x6, x7; - Word32 t0, t1, t2, t3, t4, t5, t6, t7; - Word32 y00, y01, y02, y03, y04, y05, y06, y07; - Word32 y08, y09, y10, y11, y12, y13, y14, y15; - Word32 y16, y17, y18, y19, y20, y21, y22, y23; - Word32 y24, y25, y26, y27, y28, y29, y30, y31; - - x0 = L_shr( re[s * 0], SCALEFACTOR16 ); - x1 = L_shr( im[s * 0], SCALEFACTOR16 ); - x2 = L_shr( re[s * 4], SCALEFACTOR16 ); - x3 = L_shr( im[s * 4], SCALEFACTOR16 ); - x4 = L_shr( re[s * 8], SCALEFACTOR16 ); - x5 = L_shr( im[s * 8], SCALEFACTOR16 ); - x6 = L_shr( re[s * 12], SCALEFACTOR16 ); - x7 = L_shr( im[s * 12], SCALEFACTOR16 ); - - t0 = L_add( x0, x4 ); - t2 = L_sub( x0, x4 ); - t1 = L_add( x1, x5 ); - t3 = L_sub( x1, x5 ); - t4 = L_add( x2, x6 ); - t7 = L_sub( x2, x6 ); - t5 = L_add( x7, x3 ); - t6 = L_sub( x7, x3 ); - - y00 = L_add( t0, t4 ); - y01 = L_add( t1, t5 ); - y02 = L_sub( t2, t6 ); - y03 = L_sub( t3, t7 ); - y04 = L_sub( t0, t4 ); - y05 = L_sub( t1, t5 ); - y06 = L_add( t2, t6 ); - y07 = L_add( t3, t7 ); - - x0 = L_shr( re[s * 1], SCALEFACTOR16 ); - x1 = L_shr( im[s * 1], SCALEFACTOR16 ); - x2 = L_shr( re[s * 5], SCALEFACTOR16 ); - x3 = L_shr( im[s * 5], SCALEFACTOR16 ); - x4 = L_shr( re[s * 9], SCALEFACTOR16 ); - x5 = L_shr( im[s * 9], SCALEFACTOR16 ); - x6 = L_shr( re[s * 13], SCALEFACTOR16 ); - x7 = L_shr( im[s * 13], SCALEFACTOR16 ); - - t0 = L_add( x0, x4 ); - t2 = L_sub( x0, x4 ); - t1 = L_add( x1, x5 ); - t3 = L_sub( x1, x5 ); - t4 = L_add( x2, x6 ); - t7 = L_sub( x2, x6 ); - t5 = L_add( x7, x3 ); - t6 = L_sub( x7, x3 ); - - y08 = L_add( t0, t4 ); - y09 = L_add( t1, t5 ); - y10 = L_sub( t2, t6 ); - y11 = L_sub( t3, t7 ); - y12 = L_sub( t0, t4 ); - y13 = L_sub( t1, t5 ); - y14 = L_add( t2, t6 ); - y15 = L_add( t3, t7 ); - - x0 = L_shr( re[s * 2], SCALEFACTOR16 ); - x1 = L_shr( im[s * 2], SCALEFACTOR16 ); - x2 = L_shr( re[s * 6], SCALEFACTOR16 ); - x3 = L_shr( im[s * 6], SCALEFACTOR16 ); - x4 = L_shr( re[s * 10], SCALEFACTOR16 ); - x5 = L_shr( im[s * 10], SCALEFACTOR16 ); - x6 = L_shr( re[s * 14], SCALEFACTOR16 ); - x7 = L_shr( im[s * 14], SCALEFACTOR16 ); - - t0 = L_add( x0, x4 ); - t2 = L_sub( x0, x4 ); - t1 = L_add( x1, x5 ); - t3 = L_sub( x1, x5 ); - t4 = L_add( x2, x6 ); - t7 = L_sub( x2, x6 ); - t5 = L_add( x7, x3 ); - t6 = L_sub( x7, x3 ); - - y16 = L_add( t0, t4 ); - y17 = L_add( t1, t5 ); - y18 = L_sub( t2, t6 ); - y19 = L_sub( t3, t7 ); - y20 = L_sub( t1, t5 ); - y21 = L_sub( t4, t0 ); - y22 = L_add( t2, t6 ); - y23 = L_add( t3, t7 ); - - x0 = L_shr( re[s * 3], SCALEFACTOR16 ); - x1 = L_shr( im[s * 3], SCALEFACTOR16 ); - x2 = L_shr( re[s * 7], SCALEFACTOR16 ); - x3 = L_shr( im[s * 7], SCALEFACTOR16 ); - x4 = L_shr( re[s * 11], SCALEFACTOR16 ); - x5 = L_shr( im[s * 11], SCALEFACTOR16 ); - x6 = L_shr( re[s * 15], SCALEFACTOR16 ); - x7 = L_shr( im[s * 15], SCALEFACTOR16 ); - - t0 = L_add( x0, x4 ); - t2 = L_sub( x0, x4 ); - t1 = L_add( x1, x5 ); - t3 = L_sub( x1, x5 ); - t4 = L_add( x2, x6 ); - t7 = L_sub( x2, x6 ); - t5 = L_add( x7, x3 ); - t6 = L_sub( x7, x3 ); - - y24 = L_add( t0, t4 ); - y25 = L_add( t1, t5 ); - y26 = L_sub( t2, t6 ); - y27 = L_sub( t3, t7 ); - y28 = L_sub( t0, t4 ); - y29 = L_sub( t1, t5 ); - y30 = L_add( t2, t6 ); - y31 = L_add( t3, t7 ); - - x0 = Mpy_32_16_1( y22, FFT_C162 ); - x1 = Mpy_32_16_1( y23, FFT_C162 ); - y22 = L_sub( x0, x1 ); - y23 = L_add( x0, x1 ); - - x0 = Mpy_32_16_1( y28, FFT_C162 ); - x1 = Mpy_32_16_1( y29, FFT_C162 ); - y28 = L_sub( x0, x1 ); - y29 = L_add( x0, x1 ); - - x0 = Mpy_32_16_1( y12, FFT_C161 ); - x1 = Mpy_32_16_1( y13, FFT_C161 ); - y12 = L_add( x0, x1 ); - y13 = L_sub( x1, x0 ); - - x0 = Mpy_32_16_1( y18, FFT_C161 ); - x1 = Mpy_32_16_1( y19, FFT_C161 ); - y18 = L_add( x0, x1 ); - y19 = L_sub( x1, x0 ); - - x0 = Mpy_32_16_1( y10, FFT_C163 ); - x1 = Mpy_32_16_1( y11, FFT_C166 ); - x2 = Mpy_32_16_1( y10, FFT_C166 ); - x3 = Mpy_32_16_1( y11, FFT_C163 ); - y10 = L_sub( x0, x1 ); - y11 = L_add( x2, x3 ); - - x0 = Mpy_32_16_1( y14, FFT_C165 ); - x1 = Mpy_32_16_1( y15, FFT_C164 ); - x2 = Mpy_32_16_1( y14, FFT_C164 ); - x3 = Mpy_32_16_1( y15, FFT_C165 ); - y14 = L_sub( x0, x1 ); - y15 = L_add( x2, x3 ); - - x0 = Mpy_32_16_1( y26, FFT_C165 ); - x1 = Mpy_32_16_1( y27, FFT_C164 ); - x2 = Mpy_32_16_1( y26, FFT_C164 ); - x3 = Mpy_32_16_1( y27, FFT_C165 ); - y26 = L_sub( x0, x1 ); - y27 = L_add( x2, x3 ); - - x0 = Mpy_32_16_1( y30, FFT_C164 ); - x1 = Mpy_32_16_1( y31, FFT_C165 ); - x2 = Mpy_32_16_1( y30, FFT_C165 ); - x3 = Mpy_32_16_1( y31, FFT_C164 ); - y30 = L_sub( x0, x1 ); - y31 = L_add( x2, x3 ); - - t0 = L_add( y00, y16 ); - t2 = L_sub( y00, y16 ); - t1 = L_add( y01, y17 ); - t3 = L_sub( y01, y17 ); - t4 = L_add( y08, y24 ); - t7 = L_sub( y08, y24 ); - t5 = L_add( y25, y09 ); - t6 = L_sub( y25, y09 ); - - re[s * 0] = L_add( t0, t4 ); - im[s * 0] = L_add( t1, t5 ); - re[s * 4] = L_sub( t2, t6 ); - im[s * 4] = L_sub( t3, t7 ); - re[s * 8] = L_sub( t0, t4 ); - im[s * 8] = L_sub( t1, t5 ); - re[s * 12] = L_add( t2, t6 ); - im[s * 12] = L_add( t3, t7 ); - - t0 = L_add( y02, y18 ); - t2 = L_sub( y02, y18 ); - t1 = L_add( y03, y19 ); - t3 = L_sub( y03, y19 ); - t4 = L_add( y10, y26 ); - t7 = L_sub( y10, y26 ); - t5 = L_add( y27, y11 ); - t6 = L_sub( y27, y11 ); - - re[s * 1] = L_add( t0, t4 ); - im[s * 1] = L_add( t1, t5 ); - re[s * 5] = L_sub( t2, t6 ); - im[s * 5] = L_sub( t3, t7 ); - re[s * 9] = L_sub( t0, t4 ); - im[s * 9] = L_sub( t1, t5 ); - re[s * 13] = L_add( t2, t6 ); - im[s * 13] = L_add( t3, t7 ); - - t0 = L_add( y04, y20 ); - t2 = L_sub( y04, y20 ); - t1 = L_add( y05, y21 ); - t3 = L_sub( y05, y21 ); - t4 = L_add( y12, y28 ); - t7 = L_sub( y12, y28 ); - t5 = L_add( y29, y13 ); - t6 = L_sub( y29, y13 ); - - re[s * 2] = L_add( t0, t4 ); - im[s * 2] = L_add( t1, t5 ); - re[s * 6] = L_sub( t2, t6 ); - im[s * 6] = L_sub( t3, t7 ); - re[s * 10] = L_sub( t0, t4 ); - im[s * 10] = L_sub( t1, t5 ); - re[s * 14] = L_add( t2, t6 ); - im[s * 14] = L_add( t3, t7 ); - - t0 = L_add( y06, y22 ); - t2 = L_sub( y06, y22 ); - t1 = L_add( y07, y23 ); - t3 = L_sub( y07, y23 ); - t4 = L_add( y14, y30 ); - t7 = L_sub( y14, y30 ); - t5 = L_add( y31, y15 ); - t6 = L_sub( y31, y15 ); - - re[s * 3] = L_add( t0, t4 ); - im[s * 3] = L_add( t1, t5 ); - re[s * 7] = L_sub( t2, t6 ); - im[s * 7] = L_sub( t3, t7 ); - re[s * 11] = L_sub( t0, t4 ); - im[s * 11] = L_sub( t1, t5 ); - re[s * 15] = L_add( t2, t6 ); - im[s * 15] = L_add( t3, t7 ); + cmplx s[4]; + cmplx t[4]; + cmplx y[16]; + + s[0] = CL_shr( x[0], SCALEFACTOR16 ); + s[1] = CL_shr( x[4], SCALEFACTOR16 ); + s[2] = CL_shr( x[8], SCALEFACTOR16 ); + s[3] = CL_shr( x[12], SCALEFACTOR16 ); + + t[0] = CL_add( s[0], s[2] ); + t[1] = CL_sub( s[0], s[2] ); + t[2] = CL_add( s[1], s[3] ); + t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( s[1] ), CL_conjugate( s[3] ) ) ); + + y[0] = CL_add( t[0], t[2] ); + y[1] = CL_sub( t[1], t[3] ); + y[2] = CL_sub( t[0], t[2] ); + y[3] = CL_add( t[1], t[3] ); + + s[0] = CL_shr( x[1], SCALEFACTOR16 ); + s[1] = CL_shr( x[5], SCALEFACTOR16 ); + s[2] = CL_shr( x[9], SCALEFACTOR16 ); + s[3] = CL_shr( x[13], SCALEFACTOR16 ); + + t[0] = CL_add( s[0], s[2] ); + t[1] = CL_sub( s[0], s[2] ); + t[2] = CL_add( s[1], s[3] ); + t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( s[1] ), CL_conjugate( s[3] ) ) ); + + y[4] = CL_add( t[0], t[2] ); + y[5] = CL_sub( t[1], t[3] ); + y[6] = CL_sub( t[0], t[2] ); + y[7] = CL_add( t[1], t[3] ); + + s[0] = CL_shr( x[2], SCALEFACTOR16 ); + s[1] = CL_shr( x[6], SCALEFACTOR16 ); + s[2] = CL_shr( x[10], SCALEFACTOR16 ); + s[3] = CL_shr( x[14], SCALEFACTOR16 ); + + t[0] = CL_add( s[0], s[2] ); + t[1] = CL_sub( s[0], s[2] ); + t[2] = CL_add( s[1], s[3] ); + t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( s[1] ), CL_conjugate( s[3] ) ) ); + + y[8] = CL_add( t[0], t[2] ); + y[9] = CL_sub( t[1], t[3] ); + y[10] = CL_swap_real_imag( CL_sub( t[0], t[2] ) ); + y[10] = CL_conjugate( y[10] ); + y[11] = CL_add( t[1], t[3] ); + + s[0] = CL_shr( x[3], SCALEFACTOR16 ); + s[1] = CL_shr( x[7], SCALEFACTOR16 ); + s[2] = CL_shr( x[11], SCALEFACTOR16 ); + s[3] = CL_shr( x[15], SCALEFACTOR16 ); + + t[0] = CL_add( s[0], s[2] ); + t[1] = CL_sub( s[0], s[2] ); + t[2] = CL_add( s[1], s[3] ); + t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( s[1] ), CL_conjugate( s[3] ) ) ); + + y[12] = CL_add( t[0], t[2] ); + y[13] = CL_sub( t[1], t[3] ); + y[14] = CL_sub( t[0], t[2] ); + y[15] = CL_add( t[1], t[3] ); + + s[0] = CL_scale( y[11], FFT_C162 ); + y[11] = CL_mac_j( s[0], s[0] ); + + s[0] = CL_scale( y[14], FFT_C162 ); + y[14] = CL_mac_j( s[0], s[0] ); + + s[0] = CL_scale( y[6], FFT_C161 ); + y[6] = CL_mac_j( s[0], s[0] ); + y[6] = CL_swap_real_imag( y[6] ); + y[6] = CL_conjugate( y[6] ); + + s[0] = CL_scale( y[9], FFT_C161 ); + y[9] = CL_mac_j( s[0], s[0] ); + y[9] = CL_swap_real_imag( y[9] ); + y[9] = CL_conjugate( y[9] ); + + s[0] = CL_scale( y[5], FFT_C163 ); + s[1] = CL_scale( y[5], FFT_C166 ); + y[5] = CL_mac_j( s[0], s[1] ); + + s[0] = CL_scale( y[7], FFT_C165 ); + s[1] = CL_scale( y[7], FFT_C164 ); + y[7] = CL_mac_j( s[0], s[1] ); + + s[0] = CL_scale( y[13], FFT_C165 ); + s[1] = CL_scale( y[13], FFT_C164 ); + y[13] = CL_mac_j( s[0], s[1] ); + + s[0] = CL_scale( y[15], FFT_C164 ); + s[1] = CL_scale( y[15], FFT_C165 ); + y[15] = CL_mac_j( s[0], s[1] ); + + t[0] = CL_add( y[0], y[8] ); + t[1] = CL_sub( y[0], y[8] ); + t[2] = CL_add( y[4], y[12] ); + t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[4] ), CL_conjugate( y[12] ) ) ); + + x[0] = CL_add( t[0], t[2] ); + x[4] = CL_sub( t[1], t[3] ); + x[8] = CL_sub( t[0], t[2] ); + x[12] = CL_add( t[1], t[3] ); + + t[0] = CL_add( y[1], y[9] ); + t[1] = CL_sub( y[1], y[9] ); + t[2] = CL_add( y[5], y[13] ); + t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[5] ), CL_conjugate( y[13] ) ) ); + + x[1] = CL_add( t[0], t[2] ); + x[5] = CL_sub( t[1], t[3] ); + x[9] = CL_sub( t[0], t[2] ); + x[13] = CL_add( t[1], t[3] ); + + t[0] = CL_add( y[2], y[10] ); + t[1] = CL_sub( y[2], y[10] ); + t[2] = CL_add( y[6], y[14] ); + t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[6] ), CL_conjugate( y[14] ) ) ); + + x[2] = CL_add( t[0], t[2] ); + x[6] = CL_sub( t[1], t[3] ); + x[10] = CL_sub( t[0], t[2] ); + x[14] = CL_add( t[1], t[3] ); + + t[0] = CL_add( y[3], y[11] ); + t[1] = CL_sub( y[3], y[11] ); + t[2] = CL_add( y[7], y[15] ); + t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[7] ), CL_conjugate( y[15] ) ) ); + + x[3] = CL_add( t[0], t[2] ); + x[7] = CL_sub( t[1], t[3] ); + x[11] = CL_sub( t[0], t[2] ); + x[15] = CL_add( t[1], t[3] ); return; } static void fft_len20_fx( - Word32 *re, - Word32 *im, - const Word16 s ) + cmplx *x ) { - Word32 r1, r2, r3, r4; - Word32 s1, s2, s3, s4; - Word32 x0, x1, x2, x3, x4; - Word32 t, t0, t1, t2, t3, t4, t5, t6, t7; - Word32 y00, y01, y02, y03, y04, y05, y06, y07, y08, y09; - Word32 y10, y11, y12, y13, y14, y15, y16, y17, y18, y19; - Word32 y20, y21, y22, y23, y24, y25, y26, y27, y28, y29; - Word32 y30, y31, y32, y33, y34, y35, y36, y37, y38, y39; - - x0 = L_shr( re[s * 0], SCALEFACTOR20 ); - x1 = L_shr( re[s * 16], SCALEFACTOR20 ); - x2 = L_shr( re[s * 12], SCALEFACTOR20 ); - x3 = L_shr( re[s * 8], SCALEFACTOR20 ); - x4 = L_shr( re[s * 4], SCALEFACTOR20 ); - - r1 = L_add( x1, x4 ); - r4 = L_sub( x1, x4 ); - r3 = L_add( x2, x3 ); - r2 = L_sub( x2, x3 ); - t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 ); - r1 = L_add( r1, r3 ); - y00 = L_add( x0, r1 ); - r1 = L_add( y00, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) ); - r3 = L_sub( r1, t ); - r1 = L_add( r1, t ); - t = Mpy_32_16_1( L_add( r4, r2 ), FFT_C51 ); - r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) ); - r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) ); - - x0 = L_shr(im[s * 0], SCALEFACTOR20 ); - x1 = L_shr(im[s * 16], SCALEFACTOR20); - x2 = L_shr(im[s * 12], SCALEFACTOR20); - x3 = L_shr(im[s * 8], SCALEFACTOR20); - x4 = L_shr(im[s * 4], SCALEFACTOR20); - - s1 = L_add( x1, x4 ); - s4 = L_sub( x1, x4 ); - s3 = L_add( x2, x3 ); - s2 = L_sub( x2, x3 ); - t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 ); - s1 = L_add( s1, s3 ); - y01 = L_add( x0, s1 ); - s1 = L_add( y01, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) ); - s3 = L_sub( s1, t ); - s1 = L_add( s1, t ); - t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 ); - s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) ); - s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) ); - - y08 = L_add( r1, s2 ); - y32 = L_sub( r1, s2 ); - y16 = L_sub( r3, s4 ); - y24 = L_add( r3, s4 ); - - y09 = L_sub( s1, r2 ); - y33 = L_add( s1, r2 ); - y17 = L_add( s3, r4 ); - y25 = L_sub( s3, r4 ); - - x0 = L_shr( re[s * 5], SCALEFACTOR20 ); - x1 = L_shr( re[s * 1], SCALEFACTOR20 ); - x2 = L_shr( re[s * 17], SCALEFACTOR20 ); - x3 = L_shr( re[s * 13], SCALEFACTOR20 ); - x4 = L_shr( re[s * 9], SCALEFACTOR20 ); - - r1 = L_add( x1, x4 ); - r4 = L_sub( x1, x4 ); - r3 = L_add( x2, x3 ); - r2 = L_sub( x2, x3 ); - t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 ); - r1 = L_add( r1, r3 ); - y02 = L_add( x0, r1 ); - r1 = L_add( y02, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) ); - r3 = L_sub( r1, t ); - r1 = L_add( r1, t ); - t = Mpy_32_16_1( L_add( r4, r2 ), FFT_C51 ); - r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) ); - r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) ); - - x0 = L_shr( im[s * 5], SCALEFACTOR20 ); - x1 = L_shr( im[s * 1], SCALEFACTOR20 ); - x2 = L_shr( im[s * 17], SCALEFACTOR20 ); - x3 = L_shr( im[s * 13], SCALEFACTOR20 ); - x4 = L_shr( im[s * 9], SCALEFACTOR20 ); - - s1 = L_add( x1, x4 ); - s4 = L_sub( x1, x4 ); - s3 = L_add( x2, x3 ); - s2 = L_sub( x2, x3 ); - t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 ); - s1 = L_add( s1, s3 ); - y03 = L_add( x0, s1 ); - s1 = L_add( y03, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) ); - s3 = L_sub( s1, t ); - s1 = L_add( s1, t ); - t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 ); - s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) ); - s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) ); - - y10 = L_add( r1, s2 ); - y34 = L_sub( r1, s2 ); - y18 = L_sub( r3, s4 ); - y26 = L_add( r3, s4 ); - - y11 = L_sub( s1, r2 ); - y35 = L_add( s1, r2 ); - y19 = L_add( s3, r4 ); - y27 = L_sub( s3, r4 ); - - x0 = L_shr( re[s * 10], SCALEFACTOR20 ); - x1 = L_shr( re[s * 6], SCALEFACTOR20 ); - x2 = L_shr( re[s * 2], SCALEFACTOR20 ); - x3 = L_shr( re[s * 18], SCALEFACTOR20 ); - x4 = L_shr( re[s * 14], SCALEFACTOR20 ); - - r1 = L_add( x1, x4 ); - r4 = L_sub( x1, x4 ); - r3 = L_add( x2, x3 ); - r2 = L_sub( x2, x3 ); - t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 ); - r1 = L_add( r1, r3 ); - y04 = L_add( x0, r1 ); - r1 = L_add( y04, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) ); - r3 = L_sub( r1, t ); - r1 = L_add( r1, t ); - t = Mpy_32_16_1( L_add( r4, r2 ), FFT_C51 ); - r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) ); - r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) ); - - x0 = L_shr( im[s * 10], SCALEFACTOR20 ); - x1 = L_shr( im[s * 6], SCALEFACTOR20 ); - x2 = L_shr( im[s * 2], SCALEFACTOR20 ); - x3 = L_shr( im[s * 18], SCALEFACTOR20 ); - x4 = L_shr( im[s * 14], SCALEFACTOR20 ); - - s1 = L_add( x1, x4 ); - s4 = L_sub( x1, x4 ); - s3 = L_add( x2, x3 ); - s2 = L_sub( x2, x3 ); - t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 ); - s1 = L_add( s1, s3 ); - y05 = L_add( x0, s1 ); - s1 = L_add( y05, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) ); - s3 = L_sub( s1, t ); - s1 = L_add( s1, t ); - t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 ); - s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) ); - s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) ); - - y12 = L_add( r1, s2 ); - y36 = L_sub( r1, s2 ); - y20 = L_sub( r3, s4 ); - y28 = L_add( r3, s4 ); - - y13 = L_sub( s1, r2 ); - y37 = L_add( s1, r2 ); - y21 = L_add( s3, r4 ); - y29 = L_sub( s3, r4 ); - - x0 = L_shr( re[s * 15], SCALEFACTOR20 ); - x1 = L_shr( re[s * 11], SCALEFACTOR20 ); - x2 = L_shr( re[s * 7], SCALEFACTOR20 ); - x3 = L_shr( re[s * 3], SCALEFACTOR20 ); - x4 = L_shr( re[s * 19], SCALEFACTOR20 ); - - r1 = L_add( x1, x4 ); - r4 = L_sub( x1, x4 ); - r3 = L_add( x2, x3 ); - r2 = L_sub( x2, x3 ); - t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 ); - r1 = L_add( r1, r3 ); - y06 = L_add( x0, r1 ); - r1 = L_add( y06, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) ); - r3 = L_sub( r1, t ); - r1 = L_add( r1, t ); - t = Mpy_32_16_1( L_add( r4, r2 ), FFT_C51 ); - r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) ); - r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) ); - - x0 = L_shr( im[s * 15], SCALEFACTOR20 ); - x1 = L_shr( im[s * 11], SCALEFACTOR20 ); - x2 = L_shr( im[s * 7], SCALEFACTOR20 ); - x3 = L_shr( im[s * 3], SCALEFACTOR20 ); - x4 = L_shr( im[s * 19], SCALEFACTOR20 ); - - s1 = L_add( x1, x4 ); - s4 = L_sub( x1, x4 ); - s3 = L_add( x2, x3 ); - s2 = L_sub( x2, x3 ); - t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 ); - s1 = L_add( s1, s3 ); - y07 = L_add( x0, s1 ); - s1 = L_add( y07, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) ); - s3 = L_sub( s1, t ); - s1 = L_add( s1, t ); - t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 ); - s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) ); - s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) ); - - y14 = L_add( r1, s2 ); - y38 = L_sub( r1, s2 ); - y22 = L_sub( r3, s4 ); - y30 = L_add( r3, s4 ); - - y15 = L_sub( s1, r2 ); - y39 = L_add( s1, r2 ); - y23 = L_add( s3, r4 ); - y31 = L_sub( s3, r4 ); - - t0 = L_add( y00, y04 ); - t2 = L_sub( y00, y04 ); - t1 = L_add( y01, y05 ); - t3 = L_sub( y01, y05 ); - t4 = L_add( y02, y06 ); - t7 = L_sub( y02, y06 ); - t5 = L_add( y07, y03 ); - t6 = L_sub( y07, y03 ); - - re[s * 0] = L_add( t0, t4 ); - im[s * 0] = L_add( t1, t5 ); - re[s * 5] = L_sub( t2, t6 ); - im[s * 5] = L_sub( t3, t7 ); - re[s * 10] = L_sub( t0, t4 ); - im[s * 10] = L_sub( t1, t5 ); - re[s * 15] = L_add( t2, t6 ); - im[s * 15] = L_add( t3, t7 ); - - t0 = L_add( y08, y12 ); - t2 = L_sub( y08, y12 ); - t1 = L_add( y09, y13 ); - t3 = L_sub( y09, y13 ); - t4 = L_add( y10, y14 ); - t7 = L_sub( y10, y14 ); - t5 = L_add( y15, y11 ); - t6 = L_sub( y15, y11 ); - - re[s * 4] = L_add( t0, t4 ); - im[s * 4] = L_add( t1, t5 ); - re[s * 9] = L_sub( t2, t6 ); - im[s * 9] = L_sub( t3, t7 ); - re[s * 14] = L_sub( t0, t4 ); - im[s * 14] = L_sub( t1, t5 ); - re[s * 19] = L_add( t2, t6 ); - im[s * 19] = L_add( t3, t7 ); - - t0 = L_add( y16, y20 ); - t2 = L_sub( y16, y20 ); - t1 = L_add( y17, y21 ); - t3 = L_sub( y17, y21 ); - t4 = L_add( y18, y22 ); - t7 = L_sub( y18, y22 ); - t5 = L_add( y23, y19 ); - t6 = L_sub( y23, y19 ); - - re[s * 8] = L_add( t0, t4 ); - im[s * 8] = L_add( t1, t5 ); - re[s * 13] = L_sub( t2, t6 ); - im[s * 13] = L_sub( t3, t7 ); - re[s * 18] = L_sub( t0, t4 ); - im[s * 18] = L_sub( t1, t5 ); - re[s * 3] = L_add( t2, t6 ); - im[s * 3] = L_add( t3, t7 ); - - t0 = L_add( y24, y28 ); - t2 = L_sub( y24, y28 ); - t1 = L_add( y25, y29 ); - t3 = L_sub( y25, y29 ); - t4 = L_add( y26, y30 ); - t7 = L_sub( y26, y30 ); - t5 = L_add( y31, y27 ); - t6 = L_sub( y31, y27 ); - - re[s * 12] = L_add( t0, t4 ); - im[s * 12] = L_add( t1, t5 ); - re[s * 17] = L_sub( t2, t6 ); - im[s * 17] = L_sub( t3, t7 ); - re[s * 2] = L_sub( t0, t4 ); - im[s * 2] = L_sub( t1, t5 ); - re[s * 7] = L_add( t2, t6 ); - im[s * 7] = L_add( t3, t7 ); - - t0 = L_add( y32, y36 ); - t2 = L_sub( y32, y36 ); - t1 = L_add( y33, y37 ); - t3 = L_sub( y33, y37 ); - t4 = L_add( y34, y38 ); - t7 = L_sub( y34, y38 ); - t5 = L_add( y39, y35 ); - t6 = L_sub( y39, y35 ); - - re[s * 16] = L_add( t0, t4 ); - im[s * 16] = L_add( t1, t5 ); - re[s * 1] = L_sub( t2, t6 ); - im[s * 1] = L_sub( t3, t7 ); - re[s * 6] = L_sub( t0, t4 ); - im[s * 6] = L_sub( t1, t5 ); - re[s * 11] = L_add( t2, t6 ); - im[s * 11] = L_add( t3, t7 ); + cmplx s[4]; + cmplx xx[5]; + cmplx t; + cmplx tt[4]; + cmplx y[20]; + + xx[0] = CL_shr( x[0], SCALEFACTOR20 ); + xx[1] = CL_shr( x[16], SCALEFACTOR20 ); + xx[2] = CL_shr( x[12], SCALEFACTOR20 ); + xx[3] = CL_shr( x[8], SCALEFACTOR20 ); + xx[4] = CL_shr( x[4], SCALEFACTOR20 ); + + s[0] = CL_add( xx[1], xx[4] ); + s[3] = CL_sub( xx[1], xx[4] ); + s[2] = CL_add( xx[2], xx[3] ); + s[1] = CL_sub( xx[2], xx[3] ); + t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); + s[0] = CL_add( s[0], s[2] ); + y[0] = CL_add( xx[0], s[0] ); + s[0] = CL_add( y[0], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + s[2] = CL_sub( s[0], t ); + s[0] = CL_add( s[0], t ); + t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); + s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + + y[4] = CL_msu_j( s[0], s[1] ); + y[16] = CL_mac_j( s[0], s[1] ); + y[8] = CL_mac_j( s[2], s[3] ); + y[12] = CL_msu_j( s[2], s[3] ); + + xx[0] = CL_shr( x[5], SCALEFACTOR20 ); + xx[1] = CL_shr( x[1], SCALEFACTOR20 ); + xx[2] = CL_shr( x[17], SCALEFACTOR20 ); + xx[3] = CL_shr( x[13], SCALEFACTOR20 ); + xx[4] = CL_shr( x[9], SCALEFACTOR20 ); + + s[0] = CL_add( xx[1], xx[4] ); + s[3] = CL_sub( xx[1], xx[4] ); + s[2] = CL_add( xx[2], xx[3] ); + s[1] = CL_sub( xx[2], xx[3] ); + t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); + s[0] = CL_add( s[0], s[2] ); + y[1] = CL_add( xx[0], s[0] ); + s[0] = CL_add( y[1], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + s[2] = CL_sub( s[0], t ); + s[0] = CL_add( s[0], t ); + t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); + s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + + y[5] = CL_msu_j( s[0], s[1] ); + y[17] = CL_mac_j( s[0], s[1] ); + y[9] = CL_mac_j( s[2], s[3] ); + y[13] = CL_msu_j( s[2], s[3] ); + + xx[0] = CL_shr( x[10], SCALEFACTOR20 ); + xx[1] = CL_shr( x[6], SCALEFACTOR20 ); + xx[2] = CL_shr( x[2], SCALEFACTOR20 ); + xx[3] = CL_shr( x[18], SCALEFACTOR20 ); + xx[4] = CL_shr( x[14], SCALEFACTOR20 ); + + s[0] = CL_add( xx[1], xx[4] ); + s[3] = CL_sub( xx[1], xx[4] ); + s[2] = CL_add( xx[2], xx[3] ); + s[1] = CL_sub( xx[2], xx[3] ); + t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); + s[0] = CL_add( s[0], s[2] ); + y[2] = CL_add( xx[0], s[0] ); + s[0] = CL_add( y[2], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + s[2] = CL_sub( s[0], t ); + s[0] = CL_add( s[0], t ); + t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); + s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + + y[6] = CL_msu_j( s[0], s[1] ); + y[18] = CL_mac_j( s[0], s[1] ); + y[10] = CL_mac_j( s[2], s[3] ); + y[14] = CL_msu_j( s[2], s[3] ); + + xx[0] = CL_shr( x[15], SCALEFACTOR20 ); + xx[1] = CL_shr( x[11], SCALEFACTOR20 ); + xx[2] = CL_shr( x[7], SCALEFACTOR20 ); + xx[3] = CL_shr( x[3], SCALEFACTOR20 ); + xx[4] = CL_shr( x[19], SCALEFACTOR20 ); + + s[0] = CL_add( xx[1], xx[4] ); + s[3] = CL_sub( xx[1], xx[4] ); + s[2] = CL_add( xx[2], xx[3] ); + s[1] = CL_sub( xx[2], xx[3] ); + t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); + s[0] = CL_add( s[0], s[2] ); + y[3] = CL_add( xx[0], s[0] ); + s[0] = CL_add( y[3], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + s[2] = CL_sub( s[0], t ); + s[0] = CL_add( s[0], t ); + t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); + s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + + y[7] = CL_msu_j( s[0], s[1] ); + y[19] = CL_mac_j( s[0], s[1] ); + y[11] = CL_mac_j( s[2], s[3] ); + y[15] = CL_msu_j( s[2], s[3] ); + + tt[0] = CL_add( y[0], y[2] ); + tt[1] = CL_sub( y[0], y[2] ); + tt[2] = CL_add( y[1], y[3] ); + tt[3] = CL_swap_real_imag( CL_conjugate( CL_sub( y[1], y[3] ) ) ); + + x[0] = CL_add( tt[0], tt[2] ); + x[5] = CL_sub( tt[1], tt[3] ); + x[10] = CL_sub( tt[0], tt[2] ); + x[15] = CL_add( tt[1], tt[3] ); + + tt[0] = CL_add( y[4], y[6] ); + tt[1] = CL_sub( y[4], y[6] ); + tt[2] = CL_add( y[5], y[7] ); + tt[3] = CL_swap_real_imag( CL_conjugate( CL_sub( y[5], y[7] ) ) ); + + x[4] = CL_add( tt[0], tt[2] ); + x[9] = CL_sub( tt[1], tt[3] ); + x[14] = CL_sub( tt[0], tt[2] ); + x[19] = CL_add( tt[1], tt[3] ); + + tt[0] = CL_add( y[8], y[10] ); + tt[1] = CL_sub( y[8], y[10] ); + tt[2] = CL_add( y[9], y[11] ); + tt[3] = CL_swap_real_imag( CL_conjugate( CL_sub( y[9], y[11] ) ) ); + + x[8] = CL_add( tt[0], tt[2] ); + x[13] = CL_sub( tt[1], tt[3] ); + x[18] = CL_sub( tt[0], tt[2] ); + x[3] = CL_add( tt[1], tt[3] ); + + tt[0] = CL_add( y[12], y[14] ); + tt[1] = CL_sub( y[12], y[14] ); + tt[2] = CL_add( y[13], y[15] ); + tt[3] = CL_swap_real_imag( CL_conjugate( CL_sub( y[13], y[15] ) ) ); + + x[12] = CL_add( tt[0], tt[2] ); + x[17] = CL_sub( tt[1], tt[3] ); + x[2] = CL_sub( tt[0], tt[2] ); + x[7] = CL_add( tt[1], tt[3] ); + + tt[0] = CL_add( y[16], y[18] ); + tt[1] = CL_sub( y[16], y[18] ); + tt[2] = CL_add( y[17], y[19] ); + tt[3] = CL_swap_real_imag( CL_conjugate( CL_sub( y[17], y[19] ) ) ); + + x[16] = CL_add( tt[0], tt[2] ); + x[1] = CL_sub( tt[1], tt[3] ); + x[6] = CL_sub( tt[0], tt[2] ); + x[11] = CL_add( tt[1], tt[3] ); return; } static void fft_len30( - Word32 *re, - Word32 *im, - const Word16 s ) + cmplx *x ) { - Word32 t; - Word32 r1, r2, r3, r4; - Word32 s1, s2, s3, s4; - Word32 x00, x01, x02, x03, x04, x05, x06, x07, x08, x09; - Word32 x10, x11, x12, x13, x14, x15, x16, x17, x18, x19; - Word32 x20, x21, x22, x23, x24, x25, x26, x27, x28, x29; - - Word32 y00, y01, y02, y03, y04, y05, y06, y07, y08, y09; - Word32 y10, y11, y12, y13, y14, y15, y16, y17, y18, y19; - Word32 y20, y21, y22, y23, y24, y25, y26, y27, y28, y29; - - Word32 z00, z01, z02, z03, z04, z05, z06, z07, z08, z09; - Word32 z10, z11, z12, z13, z14, z15, z16, z17, z18, z19; - Word32 z20, z21, z22, z23, z24, z25, z26, z27, z28, z29; - Word32 z30, z31, z32, z33, z34, z35, z36, z37, z38, z39; - Word32 z40, z41, z42, z43, z44, z45, z46, z47, z48, z49; - Word32 z50, z51, z52, z53, z54, z55, z56, z57, z58, z59; - - Word32 *rel, *reh, *iml, *imh; - - rel = &re[s * 0]; - reh = &re[s * 15]; - iml = &im[s * 0]; - imh = &im[s * 15]; - - x00 = re[s * 0]; - x01 = im[s * 0]; - x02 = re[s * 18]; - x03 = im[s * 18]; - x04 = re[s * 6]; - x05 = im[s * 6]; - x06 = re[s * 24]; - x07 = im[s * 24]; - x08 = re[s * 12]; - x09 = im[s * 12]; - - x10 = re[s * 20]; - x11 = im[s * 20]; - x12 = re[s * 8]; - x13 = im[s * 8]; - x14 = re[s * 26]; - x15 = im[s * 26]; - x16 = re[s * 14]; - x17 = im[s * 14]; - x18 = re[s * 2]; - x19 = im[s * 2]; - - x20 = re[s * 10]; - x21 = im[s * 10]; - x22 = re[s * 28]; - x23 = im[s * 28]; - x24 = re[s * 16]; - x25 = im[s * 16]; - x26 = re[s * 4]; - x27 = im[s * 4]; - x28 = re[s * 22]; - x29 = im[s * 22]; - - r1 = L_add( x02, x08 ); - r4 = L_sub( x02, x08 ); - r3 = L_add( x04, x06 ); - r2 = L_sub( x04, x06 ); - t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 ); - r1 = L_add( r1, r3 ); - y00 = L_add( x00, r1 ); - r1 = L_add( y00, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) ); - r3 = L_sub( r1, t ); - r1 = L_add( r1, t ); - t = Mpy_32_16_1( L_add( r4, r2 ), FFT_C51 ); - r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) ); - r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) ); - - s1 = L_add( x03, x09 ); - s4 = L_sub( x03, x09 ); - s3 = L_add( x05, x07 ); - s2 = L_sub( x05, x07 ); - t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 ); - s1 = L_add( s1, s3 ); - y01 = L_add( x01, s1 ); - s1 = L_add( y01, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) ); - s3 = L_sub( s1, t ); - s1 = L_add( s1, t ); - t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 ); - s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) ); - s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) ); - - y02 = L_add( r1, s2 ); - y08 = L_sub( r1, s2 ); - y04 = L_sub( r3, s4 ); - y06 = L_add( r3, s4 ); - - y03 = L_sub( s1, r2 ); - y09 = L_add( s1, r2 ); - y05 = L_add( s3, r4 ); - y07 = L_sub( s3, r4 ); - - r1 = L_add( x12, x18 ); - r4 = L_sub( x12, x18 ); - r3 = L_add( x14, x16 ); - r2 = L_sub( x14, x16 ); - t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 ); - r1 = L_add( r1, r3 ); - y10 = L_add( x10, r1 ); - r1 = L_add( y10, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) ); - r3 = L_sub( r1, t ); - r1 = L_add( r1, t ); - t = Mpy_32_16_1( L_add( r4, r2 ) , FFT_C51 ); - r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) ); - r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) ); - - s1 = L_add( x13, x19 ); - s4 = L_sub( x13, x19 ); - s3 = L_add( x15, x17 ); - s2 = L_sub( x15, x17 ); - t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 ); - s1 = L_add( s1, s3 ); - y11 = L_add( x11, s1 ); - s1 = L_add( y11, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) ); - s3 = L_sub( s1, t ); - s1 = L_add( s1, t ); - t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 ); - s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) ); - s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) ); - - y12 = L_add( r1, s2 ); - y18 = L_sub( r1, s2 ); - y14 = L_sub( r3, s4 ); - y16 = L_add( r3, s4 ); - - y13 = L_sub( s1, r2 ); - y19 = L_add( s1, r2 ); - y15 = L_add( s3, r4 ); - y17 = L_sub( s3, r4 ); - - r1 = L_add( x22, x28 ); - r4 = L_sub( x22, x28 ); - r3 = L_add( x24, x26 ); - r2 = L_sub( x24, x26 ); - t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 ); - r1 = L_add( r1, r3 ); - y20 = L_add( x20, r1 ); - r1 = L_add( y20, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) ); - r3 = L_sub( r1, t ); - r1 = L_add( r1, t ); - t = Mpy_32_16_1( L_add( r4, r2 ), FFT_C51 ); - r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) ); - r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) ); - - s1 = L_add( x23, x29 ); - s4 = L_sub( x23, x29 ); - s3 = L_add( x25, x27 ); - s2 = L_sub( x25, x27 ); - t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 ); - s1 = L_add( s1, s3 ); - y21 = L_add( x21, s1 ); - s1 = L_add( y21, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) ); - s3 = L_sub( s1, t ); - s1 = L_add( s1, t ); - t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 ); - s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) ); - s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) ); - - y22 = L_add( r1, s2 ); - y28 = L_sub( r1, s2 ); - y24 = L_sub( r3, s4 ); - y26 = L_add( r3, s4 ); - - y23 = L_sub( s1, r2 ); - y29 = L_add( s1, r2 ); - y25 = L_add( s3, r4 ); - y27 = L_sub( s3, r4 ); - - r1 = L_add( y10, y20 ); - r2 = Mpy_32_16_1( L_sub( y10, y20 ), FFT_C31 ); - z00 = L_add( y00, r1 ); - r1 = L_sub( y00, L_shr(r1, 1) ); - - s1 = L_add( y11, y21 ); - s2 = Mpy_32_16_1( L_sub( y11, y21 ), FFT_C31 ); - z01 = L_add( y01, s1 ); - s1 = L_sub( y01, L_shr(s1, 1) ); - - z20 = L_sub( r1, s2 ); - z10 = L_add( r1, s2 ); - z21 = L_add( s1, r2 ); - z11 = L_sub( s1, r2 ); - - r1 = L_add( y12, y22 ); - r2 = Mpy_32_16_1( L_sub( y12, y22 ), FFT_C31 ); - z12 = L_add( y02, r1 ); - r1 = L_sub( y02, L_shr(r1, 1) ); - - s1 = L_add( y13, y23 ); - s2 = Mpy_32_16_1( L_sub( y13, y23 ), FFT_C31 ); - z13 = L_add( y03, s1 ); - s1 = L_sub( y03, L_shr(s1, 1)); - - z02 = L_sub( r1, s2 ); - z22 = L_add( r1, s2 ); - z03 = L_add( s1, r2 ); - z23 = L_sub( s1, r2 ); - - r1 = L_add( y14, y24 ); - r2 = Mpy_32_16_1( L_sub( y14, y24 ), FFT_C31 ); - z24 = L_add( y04, r1 ); - r1 = L_sub( y04, L_shr(r1, 1) ); - - s1 = L_add( y15, y25 ); - s2 = Mpy_32_16_1( L_sub( y15, y25 ), FFT_C31 ); - z25 = L_add( y05, s1 ); - s1 = L_sub( y05, L_shr(s1, 1) ); - - z14 = L_sub( r1, s2 ); - z04 = L_add( r1, s2 ); - z15 = L_add( s1, r2 ); - z05 = L_sub( s1, r2 ); - - r1 = L_add( y16, y26 ); - r2 = Mpy_32_16_1( L_sub( y16, y26 ), FFT_C31 ); - z06 = L_add( y06, r1 ); - r1 = L_sub( y06, L_shr(r1, 1) ); - - s1 = L_add( y17, y27 ); - s2 = Mpy_32_16_1( L_sub( y17, y27 ), FFT_C31 ); - z07 = L_add( y07, s1 ); - s1 = L_sub( y07, L_shr(s1, 1) ); - - z26 = L_sub( r1, s2 ); - z16 = L_add( r1, s2 ); - z27 = L_add( s1, r2 ); - z17 = L_sub( s1, r2 ); - - r1 = L_add( y18, y28 ); - r2 = Mpy_32_16_1( L_sub( y18, y28 ), FFT_C31 ); - z18 = L_add( y08, r1 ); - r1 = L_sub( y08, L_shr(r1, 1) ); - - s1 = L_add( y19, y29 ); - s2 = Mpy_32_16_1( L_sub( y19, y29 ), FFT_C31 ); - z19 = L_add( y09, s1 ); - s1 = L_sub(y09, L_shr(s1, 1)); - - z08 = L_sub( r1, s2 ); - z28 = L_add( r1, s2 ); - z09 = L_add( s1, r2 ); - z29 = L_sub( s1, r2 ); - - x00 = re[s * 15]; - x01 = im[s * 15]; - x02 = re[s * 3]; - x03 = im[s * 3]; - x04 = re[s * 21]; - x05 = im[s * 21]; - x06 = re[s * 9]; - x07 = im[s * 9]; - x08 = re[s * 27]; - x09 = im[s * 27]; - - x10 = re[s * 5]; - x11 = im[s * 5]; - x12 = re[s * 23]; - x13 = im[s * 23]; - x14 = re[s * 11]; - x15 = im[s * 11]; - x16 = re[s * 29]; - x17 = im[s * 29]; - x18 = re[s * 17]; - x19 = im[s * 17]; - - x20 = re[s * 25]; - x21 = im[s * 25]; - x22 = re[s * 13]; - x23 = im[s * 13]; - x24 = re[s * 1]; - x25 = im[s * 1]; - x26 = re[s * 19]; - x27 = im[s * 19]; - x28 = re[s * 7]; - x29 = im[s * 7]; - - r1 = L_add( x02, x08 ); - r4 = L_sub( x02, x08 ); - r3 = L_add( x04, x06 ); - r2 = L_sub( x04, x06 ); - t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 ); - r1 = L_add( r1, r3 ); - y00 = L_add( x00, r1 ); - r1 = L_add( y00, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) ); - r3 = L_sub( r1, t ); - r1 = L_add( r1, t ); - t = Mpy_32_16_1( L_add( r4, r2 ), FFT_C51 ); - r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) ); - r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) ); - - s1 = L_add( x03, x09 ); - s4 = L_sub( x03, x09 ); - s3 = L_add( x05, x07 ); - s2 = L_sub( x05, x07 ); - t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 ); - s1 = L_add( s1, s3 ); - y01 = L_add( x01, s1 ); - s1 = L_add( y01, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) ); - s3 = L_sub( s1, t ); - s1 = L_add( s1, t ); - t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 ); - s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) ); - s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) ); - - y02 = L_add( r1, s2 ); - y08 = L_sub( r1, s2 ); - y04 = L_sub( r3, s4 ); - y06 = L_add( r3, s4 ); - - y03 = L_sub( s1, r2 ); - y09 = L_add( s1, r2 ); - y05 = L_add( s3, r4 ); - y07 = L_sub( s3, r4 ); - - r1 = L_add( x12, x18 ); - r4 = L_sub( x12, x18 ); - r3 = L_add( x14, x16 ); - r2 = L_sub( x14, x16 ); - t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 ); - r1 = L_add( r1, r3 ); - y10 = L_add( x10, r1 ); - r1 = L_add( y10, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) ); - r3 = L_sub( r1, t ); - r1 = L_add( r1, t ); - t = Mpy_32_16_1( L_add( r4, r2 ), FFT_C51 ); - r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) ); - r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) ); - - s1 = L_add( x13, x19 ); - s4 = L_sub( x13, x19 ); - s3 = L_add( x15, x17 ); - s2 = L_sub( x15, x17 ); - t = Mpy_32_16_1(L_sub( s1, s3 ), FFT_C54 ); - s1 = L_add( s1, s3 ); - y11 = L_add( x11, s1 ); - s1 = L_add( y11, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) ); - s3 = L_sub( s1, t ); - s1 = L_add( s1, t ); - t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 ); - s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) ); - s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) ); - - y12 = L_add( r1, s2 ); - y18 = L_sub( r1, s2 ); - y14 = L_sub( r3, s4 ); - y16 = L_add( r3, s4 ); - - y13 = L_sub( s1, r2 ); - y19 = L_add( s1, r2 ); - y15 = L_add( s3, r4 ); - y17 = L_sub( s3, r4 ); - - r1 = L_add( x22, x28 ); - r4 = L_sub( x22, x28 ); - r3 = L_add( x24, x26 ); - r2 = L_sub( x24, x26 ); - t = Mpy_32_16_1( L_sub( r1, r3 ), FFT_C54 ); - r1 = L_add( r1, r3 ); - y20 = L_add( x20, r1 ); - r1 = L_add( y20, L_shl(Mpy_32_16_1(r1, FFT_C55), 1) ); - r3 = L_sub( r1, t ); - r1 = L_add( r1, t ); - t = Mpy_32_16_1( L_add( r4, r2 ), FFT_C51 ); - r4 = L_add( t, L_shl(Mpy_32_16_1( r4, FFT_C52 ), 1) ); - r2 = L_add( t, Mpy_32_16_1( r2, FFT_C53 ) ); - - s1 = L_add( x23, x29 ); - s4 = L_sub( x23, x29 ); - s3 = L_add( x25, x27 ); - s2 = L_sub( x25, x27 ); - t = Mpy_32_16_1( L_sub( s1, s3 ), FFT_C54 ); - s1 = L_add( s1, s3 ); - y21 = L_add( x21, s1 ); - s1 = L_add( y21, L_shl(Mpy_32_16_1(s1, FFT_C55), 1) ); - s3 = L_sub( s1, t ); - s1 = L_add( s1, t ); - t = Mpy_32_16_1( L_add( s4, s2 ), FFT_C51 ); - s4 = L_add( t, L_shl(Mpy_32_16_1( s4, FFT_C52 ), 1) ); - s2 = L_add( t, Mpy_32_16_1( s2, FFT_C53 ) ); - - y22 = L_add( r1, s2 ); - y28 = L_sub( r1, s2 ); - y24 = L_sub( r3, s4 ); - y26 = L_add( r3, s4 ); - - y23 = L_sub( s1, r2 ); - y29 = L_add( s1, r2 ); - y25 = L_add( s3, r4 ); - y27 = L_sub( s3, r4 ); - - r1 = L_add( y10, y20 ); - r2 = Mpy_32_16_1( L_sub( y10, y20 ), FFT_C31 ); - z30 = L_add( y00, r1 ); - r1 = L_sub( y00, L_shr(r1, 1) ); - - s1 = L_add( y11, y21 ); - s2 = Mpy_32_16_1( L_sub( y11, y21 ), FFT_C31 ); - z31 = L_add( y01, s1 ); - s1 = L_sub( y01, L_shr(s1, 1) ); - - z50 = L_sub( r1, s2 ); - z40 = L_add( r1, s2 ); - z51 = L_add( s1, r2 ); - z41 = L_sub( s1, r2 ); - - r1 = L_add( y12, y22 ); - r2 = Mpy_32_16_1( L_sub( y12, y22 ), FFT_C31 ); - z42 = L_add( y02, r1 ); - r1 = L_sub( y02, L_shr(r1, 1) ); - - s1 = L_add( y13, y23 ); - s2 = Mpy_32_16_1( L_sub( y13, y23 ), FFT_C31 ); - z43 = L_add( y03, s1 ); - s1 = L_sub( y03, L_shr(s1, 1) ); - - z32 = L_sub( r1, s2 ); - z52 = L_add( r1, s2 ); - z33 = L_add( s1, r2 ); - z53 = L_sub( s1, r2 ); - - r1 = L_add( y14, y24 ); - r2 = Mpy_32_16_1( L_sub( y14, y24 ), FFT_C31 ); - z54 = L_add( y04, r1 ); - r1 = L_sub( y04, L_shr(r1, 1) ); - - s1 = L_add( y15, y25 ); - s2 = Mpy_32_16_1( L_sub( y15, y25 ), FFT_C31 ); - z55 = L_add( y05, s1 ); - s1 = L_sub( y05, L_shr(s1, 1) ); - - z44 = L_sub( r1, s2 ); - z34 = L_add( r1, s2 ); - z45 = L_add( s1, r2 ); - z35 = L_sub( s1, r2 ); - - r1 = L_add( y16, y26 ); - r2 = Mpy_32_16_1( L_sub( y16, y26 ), FFT_C31 ); - z36 = L_add( y06, r1 ); - r1 = L_sub( y06, L_shr(r1, 1) ); - - s1 = L_add( y17, y27 ); - s2 = Mpy_32_16_1( L_sub( y17, y27 ), FFT_C31 ); - z37 = L_add( y07, s1 ); - s1 = L_sub( y07, L_shr(s1, 1) ); - - z56 = L_sub( r1, s2 ); - z46 = L_add( r1, s2 ); - z57 = L_add( s1, r2 ); - z47 = L_sub( s1, r2 ); - - r1 = L_add( y18, y28 ); - r2 = Mpy_32_16_1( L_sub( y18, y28 ), FFT_C31 ); - z48 = L_add( y08, r1 ); - r1 = L_sub( y08, L_shr(r1, 1) ); - - s1 = L_add( y19, y29 ); - s2 = Mpy_32_16_1( L_sub( y19, y29 ), FFT_C31 ); - z49 = L_add( y09, s1 ); - s1 = L_sub( y09, L_shr(s1, 1) ); - - z38 = L_sub( r1, s2 ); - z58 = L_add( r1, s2 ); - z39 = L_add( s1, r2 ); - z59 = L_sub( s1, r2 ); - - r1 = z00; - r2 = z30; - r3 = z01; - r4 = z31; - *rel = L_add( r1, r2 ); - *reh = L_sub( r1, r2 ); - *iml = L_add( r3, r4 ); - *imh = L_sub( r3, r4 ); - rel += s, reh += s, iml += s; - imh += s; - - r1 = z16; - r2 = z46; - r3 = z17; - r4 = z47; - *reh = L_add( r1, r2 ); - *rel = L_sub( r1, r2 ); - *imh = L_add( r3, r4 ); - *iml = L_sub( r3, r4 ); - rel += s, reh += s, iml += s; - imh += s; - - r1 = z02; - r2 = z32; - r3 = z03; - r4 = z33; - *rel = L_add( r1, r2 ); - *reh = L_sub( r1, r2 ); - *iml = L_add( r3, r4 ); - *imh = L_sub( r3, r4 ); - rel += s, reh += s, iml += s; - imh += s; - - r1 = z18; - r2 = z48; - r3 = z19; - r4 = z49; - *reh = L_add( r1, r2 ); - *rel = L_sub( r1, r2 ); - *imh = L_add( r3, r4 ); - *iml = L_sub( r3, r4 ); - rel += s, reh += s, iml += s; - imh += s; - - r1 = z04; - r2 = z34; - r3 = z05; - r4 = z35; - *rel = L_add( r1, r2 ); - *reh = L_sub( r1, r2 ); - *iml = L_add( r3, r4 ); - *imh = L_sub( r3, r4 ); - rel += s, reh += s, iml += s; - imh += s; - - r1 = z20; - r2 = z50; - r3 = z21; - r4 = z51; - *reh = L_add( r1, r2 ); - *rel = L_sub( r1, r2 ); - *imh = L_add( r3, r4 ); - *iml = L_sub( r3, r4 ); - rel += s, reh += s, iml += s; - imh += s; - - r1 = z06; - r2 = z36; - r3 = z07; - r4 = z37; - *rel = L_add( r1, r2 ); - *reh = L_sub( r1, r2 ); - *iml = L_add( r3, r4 ); - *imh = L_sub( r3, r4 ); - rel += s, reh += s, iml += s; - imh += s; - - r1 = z22; - r2 = z52; - r3 = z23; - r4 = z53; - *reh = L_add( r1, r2 ); - *rel = L_sub( r1, r2 ); - *imh = L_add( r3, r4 ); - *iml = L_sub( r3, r4 ); - rel += s, reh += s, iml += s; - imh += s; - - r1 = z08; - r2 = z38; - r3 = z09; - r4 = z39; - *rel = L_add( r1, r2 ); - *reh = L_sub( r1, r2 ); - *iml = L_add( r3, r4 ); - *imh = L_sub( r3, r4 ); - rel += s, reh += s, iml += s; - imh += s; - - r1 = z24; - r2 = z54; - r3 = z25; - r4 = z55; - *reh = L_add( r1, r2 ); - *rel = L_sub( r1, r2 ); - *imh = L_add( r3, r4 ); - *iml = L_sub( r3, r4 ); - rel += s, reh += s, iml += s; - imh += s; - - r1 = z10; - r2 = z40; - r3 = z11; - r4 = z41; - *rel = L_add( r1, r2 ); - *reh = L_sub( r1, r2 ); - *iml = L_add( r3, r4 ); - *imh = L_sub( r3, r4 ); - rel += s, reh += s, iml += s; - imh += s; - - r1 = z26; - r2 = z56; - r3 = z27; - r4 = z57; - *reh = L_add( r1, r2 ); - *rel = L_sub( r1, r2 ); - *imh = L_add( r3, r4 ); - *iml = L_sub( r3, r4 ); - rel += s, reh += s, iml += s; - imh += s; - - r1 = z12; - r2 = z42; - r3 = z13; - r4 = z43; - *rel = L_add( r1, r2 ); - *reh = L_sub( r1, r2 ); - *iml = L_add( r3, r4 ); - *imh = L_sub( r3, r4 ); - rel += s, reh += s, iml += s; - imh += s; - - r1 = z28; - r2 = z58; - r3 = z29; - r4 = z59; - *reh = L_add( r1, r2 ); - *rel = L_sub( r1, r2 ); - *imh = L_add( r3, r4 ); - *iml = L_sub( r3, r4 ); - rel += s, reh += s, iml += s; - imh += s; - - r1 = z14; - r2 = z44; - r3 = z15; - r4 = z45; - *rel = L_add( r1, r2 ); - *reh = L_sub( r1, r2 ); - *iml = L_add( r3, r4 ); - *imh = L_sub( r3, r4 ); - rel += s, reh += s, iml += s; - imh += s; + cmplx t; + cmplx s[4]; + cmplx xx[15]; + cmplx y[15]; + cmplx z[30]; + cmplx *l, *h; + + l = &x[0]; + h = &x[15]; + + xx[0] = x[0]; + xx[1] = x[18]; + xx[2] = x[6]; + xx[3] = x[24]; + xx[4] = x[12]; + + xx[5] = x[20]; + xx[6] = x[8]; + xx[7] = x[26]; + xx[8] = x[14]; + xx[9] = x[2]; + + xx[10] = x[10]; + xx[11] = x[28]; + xx[12] = x[16]; + xx[13] = x[4]; + xx[14] = x[22]; + + s[0] = CL_add( xx[1], xx[4] ); + s[3] = CL_sub( xx[1], xx[4] ); + s[2] = CL_add( xx[2], xx[3] ); + s[1] = CL_sub( xx[2], xx[3] ); + t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); + s[0] = CL_add( s[0], s[2] ); + y[0] = CL_add( xx[0], s[0] ); + s[0] = CL_add( y[0], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + s[2] = CL_sub( s[0], t ); + s[0] = CL_add( s[0], t ); + t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); + s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + + y[1] = CL_msu_j( s[0], s[1] ); + y[4] = CL_mac_j( s[0], s[1] ); + y[2] = CL_mac_j( s[2], s[3] ); + y[3] = CL_msu_j( s[2], s[3] ); + + s[0] = CL_add( xx[6], xx[9] ); + s[3] = CL_sub( xx[6], xx[9] ); + s[2] = CL_add( xx[7], xx[8] ); + s[1] = CL_sub( xx[7], xx[8] ); + t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); + s[0] = CL_add( s[0], s[2] ); + y[5] = CL_add( xx[5], s[0] ); + s[0] = CL_add( y[5], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + s[2] = CL_sub( s[0], t ); + s[0] = CL_add( s[0], t ); + t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); + s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + + y[6] = CL_msu_j( s[0], s[1] ); + y[9] = CL_mac_j( s[0], s[1] ); + y[7] = CL_mac_j( s[2], s[3] ); + y[8] = CL_msu_j( s[2], s[3] ); + + s[0] = CL_add( xx[11], xx[14] ); + s[3] = CL_sub( xx[11], xx[14] ); + s[2] = CL_add( xx[12], xx[13] ); + s[1] = CL_sub( xx[12], xx[13] ); + t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); + s[0] = CL_add( s[0], s[2] ); + y[10] = CL_add( xx[10], s[0] ); + s[0] = CL_add( y[10], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + s[2] = CL_sub( s[0], t ); + s[0] = CL_add( s[0], t ); + t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); + s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + + y[11] = CL_msu_j( s[0], s[1] ); + y[14] = CL_mac_j( s[0], s[1] ); + y[12] = CL_mac_j( s[2], s[3] ); + y[13] = CL_msu_j( s[2], s[3] ); + + s[0] = CL_add( y[5], y[10] ); + s[1] = CL_scale( CL_sub( y[5], y[10] ), FFT_C31 ); + z[0] = CL_add( y[0], s[0] ); + s[0] = CL_sub( y[0], CL_shr( s[0], 1 ) ); + + z[10] = CL_mac_j( s[0], s[1] ); + z[5] = CL_msu_j( s[0], s[1] ); + + s[0] = CL_add( y[6], y[11] ); + s[1] = CL_scale( CL_sub( y[6], y[11] ), FFT_C31 ); + z[6] = CL_add( y[1], s[0] ); + s[0] = CL_sub( y[1], CL_shr( s[0], 1 ) ); + + z[1] = CL_mac_j( s[0], s[1] ); + z[11] = CL_msu_j( s[0], s[1] ); + + s[0] = CL_add( y[7], y[12] ); + s[1] = CL_scale( CL_sub( y[7], y[12] ), FFT_C31 ); + z[12] = CL_add( y[2], s[0] ); + s[0] = CL_sub( y[2], CL_shr( s[0], 1 ) ); + + z[7] = CL_mac_j( s[0], s[1] ); + z[2] = CL_msu_j( s[0], s[1] ); + + s[0] = CL_add( y[8], y[13] ); + s[1] = CL_scale( CL_sub( y[8], y[13] ), FFT_C31 ); + z[3] = CL_add( y[3], s[0] ); + s[0] = CL_sub( y[3], CL_shr( s[0], 1 ) ); + + z[13] = CL_mac_j( s[0], s[1] ); + z[8] = CL_msu_j( s[0], s[1] ); + + s[0] = CL_add( y[9], y[14] ); + s[1] = CL_scale( CL_sub( y[9], y[14] ), FFT_C31 ); + z[9] = CL_add( y[4], s[0] ); + s[0] = CL_sub( y[4], CL_shr( s[0], 1 ) ); + + z[4] = CL_mac_j( s[0], s[1] ); + z[14] = CL_msu_j( s[0], s[1] ); + + xx[0] = x[15]; + xx[1] = x[3]; + xx[2] = x[21]; + xx[3] = x[9]; + xx[4] = x[27]; + + xx[5] = x[5]; + xx[6] = x[23]; + xx[7] = x[11]; + xx[8] = x[29]; + xx[9] = x[17]; + + xx[10] = x[25]; + xx[11] = x[13]; + xx[12] = x[1]; + xx[13] = x[19]; + xx[14] = x[7]; + + s[0] = CL_add( xx[1], xx[4] ); + s[3] = CL_sub( xx[1], xx[4] ); + s[2] = CL_add( xx[2], xx[3] ); + s[1] = CL_sub( xx[2], xx[3] ); + t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); + s[0] = CL_add( s[0], s[2] ); + y[0] = CL_add( xx[0], s[0] ); + s[0] = CL_add( y[0], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + s[2] = CL_sub( s[0], t ); + s[0] = CL_add( s[0], t ); + t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); + s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + + y[1] = CL_msu_j( s[0], s[1] ); + y[4] = CL_mac_j( s[0], s[1] ); + y[2] = CL_mac_j( s[2], s[3] ); + y[3] = CL_msu_j( s[2], s[3] ); + + s[0] = CL_add( xx[6], xx[9] ); + s[3] = CL_sub( xx[6], xx[9] ); + s[2] = CL_add( xx[7], xx[8] ); + s[1] = CL_sub( xx[7], xx[8] ); + t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); + s[0] = CL_add( s[0], s[2] ); + y[5] = CL_add( xx[5], s[0] ); + s[0] = CL_add( y[5], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + s[2] = CL_sub( s[0], t ); + s[0] = CL_add( s[0], t ); + t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); + s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + + y[6] = CL_msu_j( s[0], s[1] ); + y[9] = CL_mac_j( s[0], s[1] ); + y[7] = CL_mac_j( s[2], s[3] ); + y[8] = CL_msu_j( s[2], s[3] ); + + s[0] = CL_add( xx[11], xx[14] ); + s[3] = CL_sub( xx[11], xx[14] ); + s[2] = CL_add( xx[12], xx[13] ); + s[1] = CL_sub( xx[12], xx[13] ); + t = CL_scale( CL_sub( s[0], s[2] ), FFT_C54 ); + s[0] = CL_add( s[0], s[2] ); + y[10] = CL_add( xx[10], s[0] ); + s[0] = CL_add( y[10], CL_shl( CL_scale( s[0], FFT_C55 ), 1 ) ); + s[2] = CL_sub( s[0], t ); + s[0] = CL_add( s[0], t ); + t = CL_scale( CL_add( s[3], s[1] ), FFT_C51 ); + s[3] = CL_add( t, CL_shl( CL_scale( s[3], FFT_C52 ), 1 ) ); + s[1] = CL_add( t, CL_scale( s[1], FFT_C53 ) ); + + y[11] = CL_msu_j( s[0], s[1] ); + y[14] = CL_mac_j( s[0], s[1] ); + y[12] = CL_mac_j( s[2], s[3] ); + y[13] = CL_msu_j( s[2], s[3] ); + + s[0] = CL_add( y[5], y[10] ); + s[1] = CL_scale( CL_sub( y[5], y[10] ), FFT_C31 ); + z[15] = CL_add( y[0], s[0] ); + s[0] = CL_sub( y[0], CL_shr( s[0], 1 ) ); + + z[25] = CL_mac_j( s[0], s[1] ); + z[20] = CL_msu_j( s[0], s[1] ); + + s[0] = CL_add( y[6], y[11] ); + s[1] = CL_scale( CL_sub( y[6], y[11] ), FFT_C31 ); + z[21] = CL_add( y[1], s[0] ); + s[0] = CL_sub( y[1], CL_shr( s[0], 1 ) ); + + z[16] = CL_mac_j( s[0], s[1] ); + z[26] = CL_msu_j( s[0], s[1] ); + + s[0] = CL_add( y[7], y[12] ); + s[1] = CL_scale( CL_sub( y[7], y[12] ), FFT_C31 ); + z[27] = CL_add( y[2], s[0] ); + s[0] = CL_sub( y[2], CL_shr( s[0], 1 ) ); + + z[22] = CL_mac_j( s[0], s[1] ); + z[17] = CL_msu_j( s[0], s[1] ); + + s[0] = CL_add( y[8], y[13] ); + s[1] = CL_scale( CL_sub( y[8], y[13] ), FFT_C31 ); + z[18] = CL_add( y[3], s[0] ); + s[0] = CL_sub( y[3], CL_shr( s[0], 1 ) ); + + z[28] = CL_mac_j( s[0], s[1] ); + z[23] = CL_msu_j( s[0], s[1] ); + + s[0] = CL_add( y[9], y[14] ); + s[1] = CL_scale( CL_sub( y[9], y[14] ), FFT_C31 ); + z[24] = CL_add( y[4], s[0] ); + s[0] = CL_sub( y[4], CL_shr( s[0], 1 ) ); + + z[19] = CL_mac_j( s[0], s[1] ); + z[29] = CL_msu_j( s[0], s[1] ); + + s[0] = z[0]; + s[1] = z[15]; + *l = CL_add( s[0], s[1] ); + *h = CL_sub( s[0], s[1] ); + l += 1, h += 1; + + s[0] = z[8]; + s[1] = z[23]; + *h = CL_add( s[0], s[1] ); + *l = CL_sub( s[0], s[1] ); + l += 1, h += 1; + + s[0] = z[1]; + s[1] = z[16]; + *l = CL_add( s[0], s[1] ); + *h = CL_sub( s[0], s[1] ); + l += 1, h += 1; + + s[0] = z[9]; + s[1] = z[24]; + *h = CL_add( s[0], s[1] ); + *l = CL_sub( s[0], s[1] ); + l += 1, h += 1; + + s[0] = z[2]; + s[1] = z[17]; + *l = CL_add( s[0], s[1] ); + *h = CL_sub( s[0], s[1] ); + l += 1, h += 1; + + s[0] = z[10]; + s[1] = z[25]; + *h = CL_add( s[0], s[1] ); + *l = CL_sub( s[0], s[1] ); + l += 1, h += 1; + + s[0] = z[3]; + s[1] = z[18]; + *l = CL_add( s[0], s[1] ); + *h = CL_sub( s[0], s[1] ); + l += 1, h += 1; + + s[0] = z[11]; + s[1] = z[26]; + *h = CL_add( s[0], s[1] ); + *l = CL_sub( s[0], s[1] ); + l += 1, h += 1; + + s[0] = z[4]; + s[1] = z[19]; + *l = CL_add( s[0], s[1] ); + *h = CL_sub( s[0], s[1] ); + l += 1, h += 1; + + s[0] = z[12]; + s[1] = z[27]; + *h = CL_add( s[0], s[1] ); + *l = CL_sub( s[0], s[1] ); + l += 1, h += 1; + + s[0] = z[5]; + s[1] = z[20]; + *l = CL_add( s[0], s[1] ); + *h = CL_sub( s[0], s[1] ); + l += 1, h += 1; + + s[0] = z[13]; + s[1] = z[28]; + *h = CL_add( s[0], s[1] ); + *l = CL_sub( s[0], s[1] ); + l += 1, h += 1; + + s[0] = z[6]; + s[1] = z[21]; + *l = CL_add( s[0], s[1] ); + *h = CL_sub( s[0], s[1] ); + l += 1, h += 1; + + s[0] = z[14]; + s[1] = z[29]; + *h = CL_add( s[0], s[1] ); + *l = CL_sub( s[0], s[1] ); + l += 1, h += 1; + + s[0] = z[7]; + s[1] = z[22]; + *l = CL_add( s[0], s[1] ); + *h = CL_sub( s[0], s[1] ); + l += 1, h += 1; return; } static void fft_len32( - Word32 *re, - Word32 *im, - const Word16 s ) + cmplx *x ) { - Word32 as, bs; - Word32 x00, x01, x02, x03, x04, x05, x06, x07; - Word32 x08, x09, x10, x11, x12, x13, x14, x15; - Word32 t00, t01, t02, t03, t04, t05, t06, t07; - Word32 t08, t09, t10, t11, t12, t13, t14, t15; - Word32 s00, s01, s02, s03, s04, s05, s06, s07; - Word32 s08, s09, s10, s11, s12, s13, s14, s15; - - Word32 y00, y01, y02, y03, y04, y05, y06, y07; - Word32 y08, y09, y10, y11, y12, y13, y14, y15; - Word32 y16, y17, y18, y19, y20, y21, y22, y23; - Word32 y24, y25, y26, y27, y28, y29, y30, y31; - Word32 y32, y33, y34, y35, y36, y37, y38, y39; - Word32 y40, y41, y42, y43, y44, y45, y46, y47; - Word32 y48, y49, y50, y51, y52, y53, y54, y55; - Word32 y56, y57, y58, y59, y60, y61, y62, y63; - - x00 = re[s * 0]; - x01 = im[s * 0]; - x02 = re[s * 4]; - x03 = im[s * 4]; - x04 = re[s * 8]; - x05 = im[s * 8]; - x06 = re[s * 12]; - x07 = im[s * 12]; - x08 = re[s * 16]; - x09 = im[s * 16]; - x10 = re[s * 20]; - x11 = im[s * 20]; - x12 = re[s * 24]; - x13 = im[s * 24]; - x14 = re[s * 28]; - x15 = im[s * 28]; - - t00 = L_add( x00, x08 ); - t02 = L_sub( x00, x08 ); - t01 = L_add( x01, x09 ); - t03 = L_sub( x01, x09 ); - t04 = L_add( x02, x10 ); - t06 = L_sub( x02, x10 ); - t05 = L_add( x03, x11 ); - t07 = L_sub( x03, x11 ); - t08 = L_add( x04, x12 ); - t10 = L_sub( x04, x12 ); - t09 = L_add( x05, x13 ); - t11 = L_sub( x05, x13 ); - t12 = L_add( x06, x14 ); - t14 = L_sub( x06, x14 ); - t13 = L_add( x07, x15 ); - t15 = L_sub( x07, x15 ); - - s00 = L_add( t00, t08 ); - s04 = L_sub( t00, t08 ); - s01 = L_add( t01, t09 ); - s05 = L_sub( t01, t09 ); - s08 = L_sub( t02, t11 ); - s10 = L_add( t02, t11 ); - s09 = L_add( t03, t10 ); - s11 = L_sub( t03, t10 ); - s02 = L_add( t04, t12 ); - s07 = L_sub( t04, t12 ); - s03 = L_add( t05, t13 ); - s06 = L_sub( t13, t05 ); - t01 = L_add( t06, t14 ); - t02 = L_sub( t06, t14 ); - t00 = L_add( t07, t15 ); - t03 = L_sub( t07, t15 ); + cmplx t[8], s[8], xx[8]; + cmplx y[32]; + cmplx ab; + + xx[0] = x[0]; + xx[1] = x[4]; + xx[2] = x[8]; + xx[3] = x[12]; + xx[4] = x[16]; + xx[5] = x[20]; + xx[6] = x[24]; + xx[7] = x[28]; + + t[0] = CL_add( xx[0], xx[4] ); + t[1] = CL_sub( xx[0], xx[4] ); + t[2] = CL_add( xx[1], xx[5] ); + t[3] = CL_sub( xx[1], xx[5] ); + t[4] = CL_add( xx[2], xx[6] ); + t[5] = CL_sub( xx[2], xx[6] ); + t[6] = CL_add( xx[3], xx[7] ); + t[7] = CL_sub( xx[3], xx[7] ); + + s[0] = CL_add( t[0], t[4] ); + s[2] = CL_sub( t[0], t[4] ); + s[4] = CL_mac_j( t[1], t[5] ); + s[5] = CL_msu_j( t[1], t[5] ); + s[1] = CL_add( t[2], t[6] ); + s[3] = CL_swap_real_imag( CL_sub( CL_conjugate( t[2] ), CL_conjugate( t[6] ) ) ); + t[0] = CL_swap_real_imag( CL_add( t[3], t[7] ) ); + t[1] = CL_sub( t[3], t[7] ); { - s12 = Mpy_32_16_1( L_add( t00, t02 ), FFT_C81 ); - s14 = Mpy_32_16_1( L_sub( t00, t02 ), FFT_C81 ); - s13 = Mpy_32_16_1( L_sub( t03, t01 ), FFT_C81 ); - s15 = Mpy_32_16_1( L_add( t01, t03 ), FFT_C82 ); + s[6] = CL_scale( CL_add( CL_conjugate( t[0] ), t[1] ), FFT_C81 ); + s[7] = CL_scale( CL_sub( t[0], CL_conjugate( t[1] ) ), FFT_C81 ); + s[7] = CL_conjugate( s[7] ); }; - y00 = L_add( s00, s02 ); - y08 = L_sub( s00, s02 ); - y01 = L_add( s01, s03 ); - y09 = L_sub( s01, s03 ); - y04 = L_sub( s04, s06 ); - y12 = L_add( s04, s06 ); - y05 = L_sub( s05, s07 ); - y13 = L_add( s05, s07 ); - y06 = L_add( s08, s14 ); - y14 = L_sub( s08, s14 ); - y07 = L_add( s09, s15 ); - y15 = L_sub( s09, s15 ); - y02 = L_add( s10, s12 ); - y10 = L_sub( s10, s12 ); - y03 = L_add( s11, s13 ); - y11 = L_sub( s11, s13 ); - - x00 = re[s * 1]; - x01 = im[s * 1]; - x02 = re[s * 5]; - x03 = im[s * 5]; - x04 = re[s * 9]; - x05 = im[s * 9]; - x06 = re[s * 13]; - x07 = im[s * 13]; - x08 = re[s * 17]; - x09 = im[s * 17]; - x10 = re[s * 21]; - x11 = im[s * 21]; - x12 = re[s * 25]; - x13 = im[s * 25]; - x14 = re[s * 29]; - x15 = im[s * 29]; - - t00 = L_add( x00, x08 ); - t02 = L_sub( x00, x08 ); - t01 = L_add( x01, x09 ); - t03 = L_sub( x01, x09 ); - t04 = L_add( x02, x10 ); - t06 = L_sub( x02, x10 ); - t05 = L_add( x03, x11 ); - t07 = L_sub( x03, x11 ); - t08 = L_add( x04, x12 ); - t10 = L_sub( x04, x12 ); - t09 = L_add( x05, x13 ); - t11 = L_sub( x05, x13 ); - t12 = L_add( x06, x14 ); - t14 = L_sub( x06, x14 ); - t13 = L_add( x07, x15 ); - t15 = L_sub( x07, x15 ); - - s00 = L_add( t00, t08 ); - s04 = L_sub( t00, t08 ); - s01 = L_add( t01, t09 ); - s05 = L_sub( t01, t09 ); - s08 = L_sub( t02, t11 ); - s10 = L_add( t02, t11 ); - s09 = L_add( t03, t10 ); - s11 = L_sub( t03, t10 ); - s02 = L_add( t04, t12 ); - s07 = L_sub( t04, t12 ); - s03 = L_add( t05, t13 ); - s06 = L_sub( t13, t05 ); - t01 = L_add( t06, t14 ); - t02 = L_sub( t06, t14 ); - t00 = L_add( t07, t15 ); - t03 = L_sub( t07, t15 ); + y[0] = CL_add( s[0], s[1] ); + y[4] = CL_sub( s[0], s[1] ); + y[2] = CL_sub( s[2], s[3] ); + y[6] = CL_add( s[2], s[3] ); + y[3] = CL_add( s[4], s[7] ); + y[7] = CL_sub( s[4], s[7] ); + y[1] = CL_add( s[5], s[6] ); + y[5] = CL_sub( s[5], s[6] ); + + xx[0] = x[1]; + xx[1] = x[5]; + xx[2] = x[9]; + xx[3] = x[13]; + xx[4] = x[17]; + xx[5] = x[21]; + xx[6] = x[25]; + xx[7] = x[29]; + + t[0] = CL_add( xx[0], xx[4] ); + t[1] = CL_sub( xx[0], xx[4] ); + t[2] = CL_add( xx[1], xx[5] ); + t[3] = CL_sub( xx[1], xx[5] ); + t[4] = CL_add( xx[2], xx[6] ); + t[5] = CL_sub( xx[2], xx[6] ); + t[6] = CL_add( xx[3], xx[7] ); + t[7] = CL_sub( xx[3], xx[7] ); + + s[0] = CL_add( t[0], t[4] ); + s[2] = CL_sub( t[0], t[4] ); + s[4] = CL_mac_j( t[1], t[5] ); + s[5] = CL_msu_j( t[1], t[5] ); + s[1] = CL_add( t[2], t[6] ); + s[3] = CL_swap_real_imag( CL_sub( CL_conjugate( t[2] ), CL_conjugate( t[6] ) ) ); + t[0] = CL_swap_real_imag( CL_add( t[3], t[7] ) ); + t[1] = CL_sub( t[3], t[7] ); { - s12 = Mpy_32_16_1( L_add( t00, t02 ), FFT_C81 ); - s14 = Mpy_32_16_1( L_sub( t00, t02 ), FFT_C81 ); - s13 = Mpy_32_16_1( L_sub( t03, t01 ), FFT_C81 ); - s15 = Mpy_32_16_1( L_add( t01, t03 ), FFT_C82 ); + s[6] = CL_scale( CL_add( CL_conjugate( t[0] ), t[1] ), FFT_C81 ); + s[7] = CL_scale( CL_sub( t[0], CL_conjugate( t[1] ) ), FFT_C81 ); + s[7] = CL_conjugate( s[7] ); }; - y16 = L_add( s00, s02 ); - y24 = L_sub( s00, s02 ); - y17 = L_add( s01, s03 ); - y25 = L_sub( s01, s03 ); - y20 = L_sub( s04, s06 ); - y28 = L_add( s04, s06 ); - y21 = L_sub( s05, s07 ); - y29 = L_add( s05, s07 ); - y22 = L_add( s08, s14 ); - y30 = L_sub( s08, s14 ); - y23 = L_add( s09, s15 ); - y31 = L_sub( s09, s15 ); - y18 = L_add( s10, s12 ); - y26 = L_sub( s10, s12 ); - y19 = L_add( s11, s13 ); - y27 = L_sub( s11, s13 ); - - x00 = re[s * 2]; - x01 = im[s * 2]; - x02 = re[s * 6]; - x03 = im[s * 6]; - x04 = re[s * 10]; - x05 = im[s * 10]; - x06 = re[s * 14]; - x07 = im[s * 14]; - x08 = re[s * 18]; - x09 = im[s * 18]; - x10 = re[s * 22]; - x11 = im[s * 22]; - x12 = re[s * 26]; - x13 = im[s * 26]; - x14 = re[s * 30]; - x15 = im[s * 30]; - - t00 = L_add( x00, x08 ); - t02 = L_sub( x00, x08 ); - t01 = L_add( x01, x09 ); - t03 = L_sub( x01, x09 ); - t04 = L_add( x02, x10 ); - t06 = L_sub( x02, x10 ); - t05 = L_add( x03, x11 ); - t07 = L_sub( x03, x11 ); - t08 = L_add( x04, x12 ); - t10 = L_sub( x04, x12 ); - t09 = L_add( x05, x13 ); - t11 = L_sub( x05, x13 ); - t12 = L_add( x06, x14 ); - t14 = L_sub( x06, x14 ); - t13 = L_add( x07, x15 ); - t15 = L_sub( x07, x15 ); - - s00 = L_add( t00, t08 ); - s04 = L_sub( t00, t08 ); - s01 = L_add( t01, t09 ); - s05 = L_sub( t01, t09 ); - s08 = L_sub( t02, t11 ); - s10 = L_add( t02, t11 ); - s09 = L_add( t03, t10 ); - s11 = L_sub( t03, t10 ); - s02 = L_add( t04, t12 ); - s07 = L_sub( t04, t12 ); - s03 = L_add( t05, t13 ); - s06 = L_sub( t13, t05 ); - t01 = L_add( t06, t14 ); - t02 = L_sub( t06, t14 ); - t00 = L_add( t07, t15 ); - t03 = L_sub( t07, t15 ); + y[8] = CL_add( s[0], s[1] ); + y[12] = CL_sub( s[0], s[1] ); + y[10] = CL_sub( s[2], s[3] ); + y[14] = CL_add( s[2], s[3] ); + y[11] = CL_add( s[4], s[7] ); + y[15] = CL_sub( s[4], s[7] ); + y[9] = CL_add( s[5], s[6] ); + y[13] = CL_sub( s[5], s[6] ); + + xx[0] = x[2]; + xx[1] = x[6]; + xx[2] = x[10]; + xx[3] = x[14]; + xx[4] = x[18]; + xx[5] = x[22]; + xx[6] = x[26]; + xx[7] = x[30]; + + t[0] = CL_add( xx[0], xx[4] ); + t[1] = CL_sub( xx[0], xx[4] ); + t[2] = CL_add( xx[1], xx[5] ); + t[3] = CL_sub( xx[1], xx[5] ); + t[4] = CL_add( xx[2], xx[6] ); + t[5] = CL_sub( xx[2], xx[6] ); + t[6] = CL_add( xx[3], xx[7] ); + t[7] = CL_sub( xx[3], xx[7] ); + + s[0] = CL_add( t[0], t[4] ); + s[2] = CL_sub( t[0], t[4] ); + s[4] = CL_mac_j( t[1], t[5] ); + s[5] = CL_msu_j( t[1], t[5] ); + s[1] = CL_add( t[2], t[6] ); + s[3] = CL_swap_real_imag( CL_sub( CL_conjugate( t[2] ), CL_conjugate( t[6] ) ) ); + t[0] = CL_swap_real_imag( CL_add( t[3], t[7] ) ); + t[1] = CL_sub( t[3], t[7] ); { - s12 = Mpy_32_16_1( L_add( t00, t02 ), FFT_C81 ); - s14 = Mpy_32_16_1( L_sub( t00, t02 ), FFT_C81 ); - s13 = Mpy_32_16_1( L_sub( t03, t01 ), FFT_C81 ); - s15 = Mpy_32_16_1( L_add( t01, t03 ), FFT_C82 ); + s[6] = CL_scale( CL_add( CL_conjugate( t[0] ), t[1] ), FFT_C81 ); + s[7] = CL_scale( CL_sub( t[0], CL_conjugate( t[1] ) ), FFT_C81 ); + s[7] = CL_conjugate( s[7] ); }; - y32 = L_add( s00, s02 ); - y40 = L_sub( s00, s02 ); - y33 = L_add( s01, s03 ); - y41 = L_sub( s01, s03 ); - y36 = L_sub( s04, s06 ); - y44 = L_add( s04, s06 ); - y37 = L_sub( s05, s07 ); - y45 = L_add( s05, s07 ); - y38 = L_add( s08, s14 ); - y46 = L_sub( s08, s14 ); - y39 = L_add( s09, s15 ); - y47 = L_sub( s09, s15 ); - y34 = L_add( s10, s12 ); - y42 = L_sub( s10, s12 ); - y35 = L_add( s11, s13 ); - y43 = L_sub( s11, s13 ); - - x00 = re[s * 3]; - x01 = im[s * 3]; - x02 = re[s * 7]; - x03 = im[s * 7]; - x04 = re[s * 11]; - x05 = im[s * 11]; - x06 = re[s * 15]; - x07 = im[s * 15]; - x08 = re[s * 19]; - x09 = im[s * 19]; - x10 = re[s * 23]; - x11 = im[s * 23]; - x12 = re[s * 27]; - x13 = im[s * 27]; - x14 = re[s * 31]; - x15 = im[s * 31]; - - t00 = L_add( x00, x08 ); - t02 = L_sub( x00, x08 ); - t01 = L_add( x01, x09 ); - t03 = L_sub( x01, x09 ); - t04 = L_add( x02, x10 ); - t06 = L_sub( x02, x10 ); - t05 = L_add( x03, x11 ); - t07 = L_sub( x03, x11 ); - t08 = L_add( x04, x12 ); - t10 = L_sub( x04, x12 ); - t09 = L_add( x05, x13 ); - t11 = L_sub( x05, x13 ); - t12 = L_add( x06, x14 ); - t14 = L_sub( x06, x14 ); - t13 = L_add( x07, x15 ); - t15 = L_sub( x07, x15 ); - - s00 = L_add( t00, t08 ); - s04 = L_sub( t00, t08 ); - s01 = L_add( t01, t09 ); - s05 = L_sub( t01, t09 ); - s08 = L_sub( t02, t11 ); - s10 = L_add( t02, t11 ); - s09 = L_add( t03, t10 ); - s11 = L_sub( t03, t10 ); - s02 = L_add( t04, t12 ); - s07 = L_sub( t04, t12 ); - s03 = L_add( t05, t13 ); - s06 = L_sub( t13, t05 ); - t01 = L_add( t06, t14 ); - t02 = L_sub( t06, t14 ); - t00 = L_add( t07, t15 ); - t03 = L_sub( t07, t15 ); + y[16] = CL_add( s[0], s[1] ); + y[20] = CL_sub( s[0], s[1] ); + y[18] = CL_sub( s[2], s[3] ); + y[22] = CL_add( s[2], s[3] ); + y[19] = CL_add( s[4], s[7] ); + y[23] = CL_sub( s[4], s[7] ); + y[17] = CL_add( s[5], s[6] ); + y[21] = CL_sub( s[5], s[6] ); + + xx[0] = x[3]; + xx[1] = x[7]; + xx[2] = x[11]; + xx[3] = x[15]; + xx[4] = x[19]; + xx[5] = x[23]; + xx[6] = x[27]; + xx[7] = x[31]; + + t[0] = CL_add( xx[0], xx[4] ); + t[1] = CL_sub( xx[0], xx[4] ); + t[2] = CL_add( xx[1], xx[5] ); + t[3] = CL_sub( xx[1], xx[5] ); + t[4] = CL_add( xx[2], xx[6] ); + t[5] = CL_sub( xx[2], xx[6] ); + t[6] = CL_add( xx[3], xx[7] ); + t[7] = CL_sub( xx[3], xx[7] ); + + s[0] = CL_add( t[0], t[4] ); + s[2] = CL_sub( t[0], t[4] ); + s[4] = CL_mac_j( t[1], t[5] ); + s[5] = CL_msu_j( t[1], t[5] ); + s[1] = CL_add( t[2], t[6] ); + s[3] = CL_swap_real_imag( CL_sub( CL_conjugate( t[2] ), CL_conjugate( t[6] ) ) ); + t[0] = CL_swap_real_imag( CL_add( t[3], t[7] ) ); + t[1] = CL_sub( t[3], t[7] ); { - s12 = Mpy_32_16_1( L_add( t00, t02 ), FFT_C81 ); - s14 = Mpy_32_16_1( L_sub( t00, t02 ), FFT_C81 ); - s13 = Mpy_32_16_1( L_sub( t03, t01 ), FFT_C81 ); - s15 = Mpy_32_16_1( L_add( t01, t03 ), FFT_C82 ); + s[6] = CL_scale( CL_add( CL_conjugate( t[0] ), t[1] ), FFT_C81 ); + s[7] = CL_scale( CL_sub( t[0], CL_conjugate( t[1] ) ), FFT_C81 ); + s[7] = CL_conjugate( s[7] ); }; - y48 = L_add( s00, s02 ); - y56 = L_sub( s00, s02 ); - y49 = L_add( s01, s03 ); - y57 = L_sub( s01, s03 ); - y52 = L_sub( s04, s06 ); - y60 = L_add( s04, s06 ); - y53 = L_sub( s05, s07 ); - y61 = L_add( s05, s07 ); - y54 = L_add( s08, s14 ); - y62 = L_sub( s08, s14 ); - y55 = L_add( s09, s15 ); - y63 = L_sub( s09, s15 ); - y50 = L_add( s10, s12 ); - y58 = L_sub( s10, s12 ); - y51 = L_add( s11, s13 ); - y59 = L_sub( s11, s13 ); - + y[24] = CL_add( s[0], s[1] ); + y[28] = CL_sub( s[0], s[1] ); + y[26] = CL_sub( s[2], s[3] ); + y[30] = CL_add( s[2], s[3] ); + y[27] = CL_add( s[4], s[7] ); + y[31] = CL_sub( s[4], s[7] ); + y[25] = CL_add( s[5], s[6] ); + y[29] = CL_sub( s[5], s[6] ); { - as = y18; - bs = y19; - y18 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 0 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 0 + 1] ) ); - y19 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 0 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 0 + 0] ) ); + ab = y[9]; + y[9] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[0] ), CL_scale( ab, FFT_RotVector_32_fx[1] ) ); }; { - as = y20; - bs = y21; - y20 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 1 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 1 + 1] ) ); - y21 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 1 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 1 + 0] ) ); + ab = y[10]; + y[10] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[2] ), CL_scale( ab, FFT_RotVector_32_fx[3] ) ); }; { - as = y22; - bs = y23; - y22 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 2 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 2 + 1] ) ); - y23 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 2 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 2 + 0] ) ); + ab = y[11]; + y[11] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[4] ), CL_scale( ab, FFT_RotVector_32_fx[5] ) ); }; { - as = y24; - bs = y25; - y24 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 3 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 3 + 1] ) ); - y25 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 3 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 3 + 0] ) ); + ab = y[12]; + y[12] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[6] ), CL_scale( ab, FFT_RotVector_32_fx[7] ) ); }; { - as = y26; - bs = y27; - y26 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 4 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 4 + 1] ) ); - y27 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 4 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 4 + 0] ) ); + ab = y[13]; + y[13] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[8] ), CL_scale( ab, FFT_RotVector_32_fx[9] ) ); }; { - as = y28; - bs = y29; - y28 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 5 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 5 + 1] ) ); - y29 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 5 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 5 + 0] ) ); + ab = y[14]; + y[14] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[10] ), CL_scale( ab, FFT_RotVector_32_fx[11] ) ); }; { - as = y30; - bs = y31; - y30 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 6 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 6 + 1] ) ); - y31 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 6 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 6 + 0] ) ); + ab = y[15]; + y[15] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[12] ), CL_scale( ab, FFT_RotVector_32_fx[13] ) ); }; { - as = y34; - bs = y35; - y34 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 7 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 7 + 1] ) ); - y35 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 7 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 7 + 0] ) ); + ab = y[17]; + y[17] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[14] ), CL_scale( ab, FFT_RotVector_32_fx[15] ) ); }; { - as = y36; - bs = y37; - y36 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 8 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 8 + 1] ) ); - y37 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 8 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 8 + 0] ) ); + ab = y[18]; + y[18] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[16] ), CL_scale( ab, FFT_RotVector_32_fx[17] ) ); }; { - as = y38; - bs = y39; - y38 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 9 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 9 + 1] ) ); - y39 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 9 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 9 + 0] ) ); + ab = y[19]; + y[19] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[18] ), CL_scale( ab, FFT_RotVector_32_fx[19] ) ); }; { - as = y42; - bs = y43; - y42 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 10 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 10 + 1] ) ); - y43 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 10 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 10 + 0] ) ); + ab = y[21]; + y[21] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[20] ), CL_scale( ab, FFT_RotVector_32_fx[21] ) ); }; { - as = y44; - bs = y45; - y44 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 11 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 11 + 1] ) ); - y45 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 11 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 11 + 0] ) ); + ab = y[22]; + y[22] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[22] ), CL_scale( ab, FFT_RotVector_32_fx[23] ) ); }; { - as = y46; - bs = y47; - y46 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 12 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 12 + 1] ) ); - y47 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 12 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 12 + 0] ) ); + ab = y[23]; + y[23] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[24] ), CL_scale( ab, FFT_RotVector_32_fx[25] ) ); }; { - as = y50; - bs = y51; - y50 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 13 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 13 + 1] ) ); - y51 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 13 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 13 + 0] ) ); + ab = y[25]; + y[25] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[26] ), CL_scale( ab, FFT_RotVector_32_fx[27] ) ); }; { - as = y52; - bs = y53; - y52 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 14 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 14 + 1] ) ); - y53 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 14 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 14 + 0] ) ); + ab = y[26]; + y[26] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[28] ), CL_scale( ab, FFT_RotVector_32_fx[29] ) ); }; { - as = y54; - bs = y55; - y54 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 15 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 15 + 1] ) ); - y55 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 15 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 15 + 0] ) ); + ab = y[27]; + y[27] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[30] ), CL_scale( ab, FFT_RotVector_32_fx[31] ) ); }; { - as = y56; - bs = y57; - y56 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 16 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 16 + 1] ) ); - y57 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 16 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 16 + 0] ) ); + ab = y[28]; + y[28] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[32] ), CL_scale( ab, FFT_RotVector_32_fx[33] ) ); }; { - as = y58; - bs = y59; - y58 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 17 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 17 + 1] ) ); - y59 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 17 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 17 + 0] ) ); + ab = y[29]; + y[29] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[34] ), CL_scale( ab, FFT_RotVector_32_fx[35] ) ); }; { - as = y60; - bs = y61; - y60 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 18 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 18 + 1] ) ); - y61 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 18 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 18 + 0] ) ); + ab = y[30]; + y[30] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[36] ), CL_scale( ab, FFT_RotVector_32_fx[37] ) ); }; { - as = y62; - bs = y63; - y62 = L_sub( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 19 + 0] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 19 + 1] ) ); - y63 = L_add( Mpy_32_16_1( as, FFT_RotVector_32_fx[2 * 19 + 1] ), Mpy_32_16_1( bs, FFT_RotVector_32_fx[2 * 19 + 0] ) ); + ab = y[31]; + y[31] = CL_mac_j( CL_scale( ab, FFT_RotVector_32_fx[38] ), CL_scale( ab, FFT_RotVector_32_fx[39] ) ); }; - t00 = L_add( y00, y32 ); - t02 = L_sub( y00, y32 ); - t01 = L_add( y01, y33 ); - t03 = L_sub( y01, y33 ); - t04 = L_add( y16, y48 ); - t07 = L_sub( y16, y48 ); - t05 = L_add( y49, y17 ); - t06 = L_sub( y49, y17 ); - - re[s * 0] = L_add( t00, t04 ); - im[s * 0] = L_add( t01, t05 ); - re[s * 8] = L_sub( t02, t06 ); - im[s * 8] = L_sub( t03, t07 ); - re[s * 16] = L_sub( t00, t04 ); - im[s * 16] = L_sub( t01, t05 ); - re[s * 24] = L_add( t02, t06 ); - im[s * 24] = L_add( t03, t07 ); - - t00 = L_add( y02, y34 ); - t02 = L_sub( y02, y34 ); - t01 = L_add( y03, y35 ); - t03 = L_sub( y03, y35 ); - t04 = L_add( y18, y50 ); - t07 = L_sub( y18, y50 ); - t05 = L_add( y51, y19 ); - t06 = L_sub( y51, y19 ); - - re[s * 1] = L_add( t00, t04 ); - im[s * 1] = L_add( t01, t05 ); - re[s * 9] = L_sub( t02, t06 ); - im[s * 9] = L_sub( t03, t07 ); - re[s * 17] = L_sub( t00, t04 ); - im[s * 17] = L_sub( t01, t05 ); - re[s * 25] = L_add( t02, t06 ); - im[s * 25] = L_add( t03, t07 ); - - t00 = L_add( y04, y36 ); - t02 = L_sub( y04, y36 ); - t01 = L_add( y05, y37 ); - t03 = L_sub( y05, y37 ); - t04 = L_add( y20, y52 ); - t07 = L_sub( y20, y52 ); - t05 = L_add( y53, y21 ); - t06 = L_sub( y53, y21 ); - - re[s * 2] = L_add( t00, t04 ); - im[s * 2] = L_add( t01, t05 ); - re[s * 10] = L_sub( t02, t06 ); - im[s * 10] = L_sub( t03, t07 ); - re[s * 18] = L_sub( t00, t04 ); - im[s * 18] = L_sub( t01, t05 ); - re[s * 26] = L_add( t02, t06 ); - im[s * 26] = L_add( t03, t07 ); - - t00 = L_add( y06, y38 ); - t02 = L_sub( y06, y38 ); - t01 = L_add( y07, y39 ); - t03 = L_sub( y07, y39 ); - t04 = L_add( y22, y54 ); - t07 = L_sub( y22, y54 ); - t05 = L_add( y55, y23 ); - t06 = L_sub( y55, y23 ); - - re[s * 3] = L_add( t00, t04 ); - im[s * 3] = L_add( t01, t05 ); - re[s * 11] = L_sub( t02, t06 ); - im[s * 11] = L_sub( t03, t07 ); - re[s * 19] = L_sub( t00, t04 ); - im[s * 19] = L_sub( t01, t05 ); - re[s * 27] = L_add( t02, t06 ); - im[s * 27] = L_add( t03, t07 ); - - t00 = L_add( y08, y41 ); - t02 = L_sub( y08, y41 ); - t01 = L_sub( y09, y40 ); - t03 = L_add( y09, y40 ); - t04 = L_add( y24, y56 ); - t07 = L_sub( y24, y56 ); - t05 = L_add( y57, y25 ); - t06 = L_sub( y57, y25 ); - - re[s * 4] = L_add( t00, t04 ); - im[s * 4] = L_add( t01, t05 ); - re[s * 12] = L_sub( t02, t06 ); - im[s * 12] = L_sub( t03, t07 ); - re[s * 20] = L_sub( t00, t04 ); - im[s * 20] = L_sub( t01, t05 ); - re[s * 28] = L_add( t02, t06 ); - im[s * 28] = L_add( t03, t07 ); - - t00 = L_add( y10, y42 ); - t02 = L_sub( y10, y42 ); - t01 = L_add( y11, y43 ); - t03 = L_sub( y11, y43 ); - t04 = L_add( y26, y58 ); - t07 = L_sub( y26, y58 ); - t05 = L_add( y59, y27 ); - t06 = L_sub( y59, y27 ); - - re[s * 5] = L_add( t00, t04 ); - im[s * 5] = L_add( t01, t05 ); - re[s * 13] = L_sub( t02, t06 ); - im[s * 13] = L_sub( t03, t07 ); - re[s * 21] = L_sub( t00, t04 ); - im[s * 21] = L_sub( t01, t05 ); - re[s * 29] = L_add( t02, t06 ); - im[s * 29] = L_add( t03, t07 ); - - t00 = L_add( y12, y44 ); - t02 = L_sub( y12, y44 ); - t01 = L_add( y13, y45 ); - t03 = L_sub( y13, y45 ); - t04 = L_add( y28, y60 ); - t07 = L_sub( y28, y60 ); - t05 = L_add( y61, y29 ); - t06 = L_sub( y61, y29 ); - - re[s * 6] = L_add( t00, t04 ); - im[s * 6] = L_add( t01, t05 ); - re[s * 14] = L_sub( t02, t06 ); - im[s * 14] = L_sub( t03, t07 ); - re[s * 22] = L_sub( t00, t04 ); - im[s * 22] = L_sub( t01, t05 ); - re[s * 30] = L_add( t02, t06 ); - im[s * 30] = L_add( t03, t07 ); - - t00 = L_add( y14, y46 ); - t02 = L_sub( y14, y46 ); - t01 = L_add( y15, y47 ); - t03 = L_sub( y15, y47 ); - t04 = L_add( y30, y62 ); - t07 = L_sub( y30, y62 ); - t05 = L_add( y63, y31 ); - t06 = L_sub( y63, y31 ); - - re[s * 7] = L_add( t00, t04 ); - im[s * 7] = L_add( t01, t05 ); - re[s * 15] = L_sub( t02, t06 ); - im[s * 15] = L_sub( t03, t07 ); - re[s * 23] = L_sub( t00, t04 ); - im[s * 23] = L_sub( t01, t05 ); - re[s * 31] = L_add( t02, t06 ); - im[s * 31] = L_add( t03, t07 ); + t[0] = CL_add( y[0], y[16] ); + t[1] = CL_sub( y[0], y[16] ); + t[2] = CL_add( y[8], y[24] ); + t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[8] ), CL_conjugate( y[24] ) ) ); + + x[0] = CL_add( t[0], t[2] ); + x[8] = CL_sub( t[1], t[3] ); + x[16] = CL_sub( t[0], t[2] ); + x[24] = CL_add( t[1], t[3] ); + + t[0] = CL_add( y[1], y[17] ); + t[1] = CL_sub( y[1], y[17] ); + t[2] = CL_add( y[9], y[25] ); + t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[9] ), CL_conjugate( y[25] ) ) ); + + x[1] = CL_add( t[0], t[2] ); + x[9] = CL_sub( t[1], t[3] ); + x[17] = CL_sub( t[0], t[2] ); + x[25] = CL_add( t[1], t[3] ); + + t[0] = CL_add( y[2], y[18] ); + t[1] = CL_sub( y[2], y[18] ); + t[2] = CL_add( y[10], y[26] ); + t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[10] ), CL_conjugate( y[26] ) ) ); + + x[2] = CL_add( t[0], t[2] ); + x[10] = CL_sub( t[1], t[3] ); + x[18] = CL_sub( t[0], t[2] ); + x[26] = CL_add( t[1], t[3] ); + + t[0] = CL_add( y[3], y[19] ); + t[1] = CL_sub( y[3], y[19] ); + t[2] = CL_add( y[11], y[27] ); + t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[11] ), CL_conjugate( y[27] ) ) ); + + x[3] = CL_add( t[0], t[2] ); + x[11] = CL_sub( t[1], t[3] ); + x[19] = CL_sub( t[0], t[2] ); + x[27] = CL_add( t[1], t[3] ); + + t[0] = CL_msu_j( y[4], y[20] ); + t[1] = CL_mac_j( y[4], y[20] ); + t[2] = CL_add( y[12], y[28] ); + t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[12] ), CL_conjugate( y[28] ) ) ); + + x[4] = CL_add( t[0], t[2] ); + x[12] = CL_sub( t[1], t[3] ); + x[20] = CL_sub( t[0], t[2] ); + x[28] = CL_add( t[1], t[3] ); + + t[0] = CL_add( y[5], y[21] ); + t[1] = CL_sub( y[5], y[21] ); + t[2] = CL_add( y[13], y[29] ); + t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[13] ), CL_conjugate( y[29] ) ) ); + + x[5] = CL_add( t[0], t[2] ); + x[13] = CL_sub( t[1], t[3] ); + x[21] = CL_sub( t[0], t[2] ); + x[29] = CL_add( t[1], t[3] ); + + t[0] = CL_add( y[6], y[22] ); + t[1] = CL_sub( y[6], y[22] ); + t[2] = CL_add( y[14], y[30] ); + t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[14] ), CL_conjugate( y[30] ) ) ); + + x[6] = CL_add( t[0], t[2] ); + x[14] = CL_sub( t[1], t[3] ); + x[22] = CL_sub( t[0], t[2] ); + x[30] = CL_add( t[1], t[3] ); + + t[0] = CL_add( y[7], y[23] ); + t[1] = CL_sub( y[7], y[23] ); + t[2] = CL_add( y[15], y[31] ); + t[3] = CL_swap_real_imag( CL_sub( CL_conjugate( y[15] ), CL_conjugate( y[31] ) ) ); + + x[7] = CL_add( t[0], t[2] ); + x[15] = CL_sub( t[1], t[3] ); + x[23] = CL_sub( t[0], t[2] ); + x[31] = CL_add( t[1], t[3] ); return; } static void fft_lenN( - Word32 *re, - Word32 *im, + cmplx *x, const Word16 *W, const Word16 len, const Word16 dim1, @@ -5360,226 +4799,178 @@ static void fft_lenN( const Word16 Woff ) { Word16 i, j; - Word32 x[L_FRAME_MAX * 2]; + cmplx xx[L_FRAME_MAX]; FOR ( i = 0; i < dim2; i++ ) { FOR ( j = 0; j < dim1; j++ ) { - x[2 * i * dim1 + 2 * j] = re[sx * i + sx * j * dim2]; - x[2 * i * dim1 + 2 * j + 1] = im[sx * i + sx * j * dim2]; + xx[i * dim1 + j].re = x[sx * i + sx * j * dim2].re; + xx[i * dim1 + j].im = x[sx * i + sx * j * dim2].im; } } - SWITCH ( dim1 ) + SWITCH( dim1 ) { case 5: FOR ( i = 0; i < dim2; i++ ) { - fft_len5( &x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2 ); + fft_len5( &xx[i * dim1] ); } BREAK; case 8: FOR ( i = 0; i < dim2; i++ ) { - fft_len8( &x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2 ); + fft_len8( &xx[i * dim1] ); } BREAK; case 10: FOR ( i = 0; i < dim2; i++ ) { - fft_len10( &x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2 ); + fft_len10( &xx[i * dim1] ); } BREAK; case 15: FOR ( i = 0; i < dim2; i++ ) { - fft_len15( &x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2 ); + fft_len15( &xx[i * dim1] ); } BREAK; case 16: FOR ( i = 0; i < dim2; i++ ) { - fft_len16( &x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2 ); + fft_len16( &xx[i * dim1] ); } BREAK; case 20: FOR ( i = 0; i < dim2; i++ ) { - fft_len20_fx( &x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2 ); + fft_len20_fx( &xx[i * dim1] ); } BREAK; case 30: FOR ( i = 0; i < dim2; i++ ) { - fft_len30( &x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2 ); + fft_len30( &xx[i * dim1] ); } BREAK; case 32: FOR ( i = 0; i < dim2; i++ ) { - fft_len32( &x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2 ); + fft_len32( &xx[i * dim1] ); } BREAK; } - - SWITCH ( dim2 ) + SWITCH( dim2 ) { case 8: { - Word32 x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, x11, x12, x13, x14, x15; - Word32 t00, t01, t02, t03, t04, t05, t06, t07, t08, t09, t10, t11, t12, t13, t14, t15; - Word32 s00, s01, s02, s03, s04, s05, s06, s07, s08, s09, s10, s11, s12, s13, s14, s15; + cmplx t[8]; + cmplx s[8]; + cmplx y[8]; - IF (EQ_16(dim1, 30) || EQ_16(dim1, 20) || EQ_16(dim1, 15) || EQ_16(dim1, 10) || EQ_16(dim1, 5)) + IF( EQ_16( dim1, 30 ) || EQ_16( dim1, 20 ) || EQ_16( dim1, 15 ) || EQ_16( dim1, 10 ) || EQ_16( dim1, 5 ) ) { FOR ( i = 0; i < dim1; i++ ) { { - x00 = x[2 * i + 2 * 0 * dim1]; - x01 = x[2 * i + 2 * 0 * dim1 + 1]; + y[0] = xx[i + 0 * dim1]; }; - IF (EQ_16(i, 0)) + IF( EQ_16( i, 0 ) ) { { - x02 = x[2 * i + 2 * 1 * dim1]; - x03 = x[2 * i + 2 * 1 * dim1 + 1]; + y[1] = xx[i + 1 * dim1]; }; { - x04 = x[2 * i + 2 * 2 * dim1]; - x05 = x[2 * i + 2 * 2 * dim1 + 1]; + y[2] = xx[i + 2 * dim1]; }; { - x06 = x[2 * i + 2 * 3 * dim1]; - x07 = x[2 * i + 2 * 3 * dim1 + 1]; + y[3] = xx[i + 3 * dim1]; }; { - x08 = x[2 * i + 2 * 4 * dim1]; - x09 = x[2 * i + 2 * 4 * dim1 + 1]; + y[4] = xx[i + 4 * dim1]; }; { - x10 = x[2 * i + 2 * 5 * dim1]; - x11 = x[2 * i + 2 * 5 * dim1 + 1]; + y[5] = xx[i + 5 * dim1]; }; { - x12 = x[2 * i + 2 * 6 * dim1]; - x13 = x[2 * i + 2 * 6 * dim1 + 1]; + y[6] = xx[i + 6 * dim1]; }; { - x14 = x[2 * i + 2 * 7 * dim1]; - x15 = x[2 * i + 2 * 7 * dim1 + 1]; + y[7] = xx[i + 7 * dim1]; }; } ELSE { { - x02 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 1 * dim1], W[sc * i + sc * 1 * dim1 * 2 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * 1 * dim1 + 1], W[sc * i + sc * 1 * dim1 * 2 + 1 - Woff])); - x03 = L_add( Mpy_32_16_1(x[2 * i + 2 * 1 * dim1], W[sc * i + sc * 1 * dim1 * 2 + 1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * 1 * dim1 + 1], W[sc * i + sc * 1 * dim1 * 2 - Woff] )); + y[1] = CL_mac_j( CL_scale( xx[i + 1 * dim1], W[sc * i + sc * 1 * dim1 * 2 - Woff] ), + CL_scale( xx[i + 1 * dim1], W[sc * i + sc * 1 * dim1 * 2 + 1 - Woff] ) ); }; { - x04 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 2 * dim1], W[sc * i + sc * 2 * dim1 * 2 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * 2 * dim1 + 1], W[sc * i + sc * 2 * dim1 * 2 + 1 - Woff])); - x05 = L_add( Mpy_32_16_1(x[2 * i + 2 * 2 * dim1], W[sc * i + sc * 2 * dim1 * 2 + 1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * 2 * dim1 + 1], W[sc * i + sc * 2 * dim1 * 2 - Woff] )); + y[2] = CL_mac_j( CL_scale( xx[i + 2 * dim1], W[sc * i + sc * 2 * dim1 * 2 - Woff] ), + CL_scale( xx[i + 2 * dim1], W[sc * i + sc * 2 * dim1 * 2 + 1 - Woff] ) ); }; { - x06 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 3 * dim1], W[sc * i + sc * 3 * dim1 * 2 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * 3 * dim1 + 1], W[sc * i + sc * 3 * dim1 * 2 + 1 - Woff] )); - x07 = L_add( Mpy_32_16_1(x[2 * i + 2 * 3 * dim1], W[sc * i + sc * 3 * dim1 * 2 + 1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * 3 * dim1 + 1], W[sc * i + sc * 3 * dim1 * 2 - Woff] )); + y[3] = CL_mac_j( CL_scale( xx[i + 3 * dim1], W[sc * i + sc * 3 * dim1 * 2 - Woff] ), + CL_scale( xx[i + 3 * dim1], W[sc * i + sc * 3 * dim1 * 2 + 1 - Woff] ) ); }; { - x08 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 4 * dim1], W[sc * i + sc * 4 * dim1 * 2 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * 4 * dim1 + 1], W[sc * i + sc * 4 * dim1 * 2 + 1 - Woff] )); - x09 = L_add( Mpy_32_16_1(x[2 * i + 2 * 4 * dim1], W[sc * i + sc * 4 * dim1 * 2 + 1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * 4 * dim1 + 1], W[sc * i + sc * 4 * dim1 * 2 - Woff] )); + y[4] = CL_mac_j( CL_scale( xx[i + 4 * dim1], W[sc * i + sc * 4 * dim1 * 2 - Woff] ), + CL_scale( xx[i + 4 * dim1], W[sc * i + sc * 4 * dim1 * 2 + 1 - Woff] ) ); }; { - x10 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 5 * dim1], W[sc * i + sc * 5 * dim1 * 2 - Woff]), - Mpy_32_16_1(x[2 * i + 2 * 5 * dim1 + 1], W[sc * i + sc * 5 * dim1 * 2 + 1 - Woff] )); - x11 = L_add( Mpy_32_16_1(x[2 * i + 2 * 5 * dim1], W[sc * i + sc * 5 * dim1 * 2 + 1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * 5 * dim1 + 1], W[sc * i + sc * 5 * dim1 * 2 - Woff] )); + y[5] = CL_mac_j( CL_scale( xx[i + 5 * dim1], W[sc * i + sc * 5 * dim1 * 2 - Woff] ), + CL_scale( xx[i + 5 * dim1], W[sc * i + sc * 5 * dim1 * 2 + 1 - Woff] ) ); }; { - x12 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 6 * dim1], W[sc * i + sc * 6 * dim1 * 2 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * 6 * dim1 + 1], W[sc * i + sc * 6 * dim1 * 2 + 1 - Woff] )); - x13 = L_add( Mpy_32_16_1(x[2 * i + 2 * 6 * dim1], W[sc * i + sc * 6 * dim1 * 2 + 1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * 6 * dim1 + 1], W[sc * i + sc * 6 * dim1 * 2 - Woff] )); + y[6] = CL_mac_j( CL_scale( xx[i + 6 * dim1], W[sc * i + sc * 6 * dim1 * 2 - Woff] ), + CL_scale( xx[i + 6 * dim1], W[sc * i + sc * 6 * dim1 * 2 + 1 - Woff] ) ); }; { - x14 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 7 * dim1], W[sc * i + sc * 7 * dim1 * 2 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * 7 * dim1 + 1], W[sc * i + sc * 7 * dim1 * 2 + 1 - Woff] )); - x15 = L_add( Mpy_32_16_1(x[2 * i + 2 * 7 * dim1], W[sc * i + sc * 7 * dim1 * 2 + 1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * 7 * dim1 + 1], W[sc * i + sc * 7 * dim1 * 2 - Woff] )); + y[7] = CL_mac_j( CL_scale( xx[i + 7 * dim1], W[sc * i + sc * 7 * dim1 * 2 - Woff] ), + CL_scale( xx[i + 7 * dim1], W[sc * i + sc * 7 * dim1 * 2 + 1 - Woff] ) ); }; } - t00 = L_add( x00, x08 ); - t02 = L_sub( x00, x08 ); - t01 = L_add( x01, x09 ); - t03 = L_sub( x01, x09 ); - t04 = L_add( x02, x10 ); - t06 = L_sub( x02, x10 ); - t05 = L_add( x03, x11 ); - t07 = L_sub( x03, x11 ); - t08 = L_add( x04, x12 ); - t10 = L_sub( x04, x12 ); - t09 = L_add( x05, x13 ); - t11 = L_sub( x05, x13 ); - t12 = L_add( x06, x14 ); - t14 = L_sub( x06, x14 ); - t13 = L_add( x07, x15 ); - t15 = L_sub( x07, x15 ); - - s00 = L_add( t00, t08 ); - s04 = L_sub( t00, t08 ); - s01 = L_add( t01, t09 ); - s05 = L_sub( t01, t09 ); - s08 = L_sub( t02, t11 ); - s10 = L_add( t02, t11 ); - s09 = L_add( t03, t10 ); - s11 = L_sub( t03, t10 ); - s02 = L_add( t04, t12 ); - s07 = L_sub( t04, t12 ); - s03 = L_add( t05, t13 ); - s06 = L_sub( t13, t05 ); - - t01 = L_add( t06, t14 ); - t02 = L_sub( t06, t14 ); - t00 = L_add( t07, t15 ); - t03 = L_sub( t07, t15 ); - - s12 = Mpy_32_16_1( L_add( t00, t02 ), FFT_C81 ); - s14 = Mpy_32_16_1( L_sub( t00, t02 ), FFT_C81 ); - s13 = Mpy_32_16_1( L_sub( t03, t01 ), FFT_C81 ); - s15 = Mpy_32_16_1( L_add( t01, t03 ), FFT_C82 ); - - re[sx * i + sx * 0 * dim1] = L_add( s00, s02 ); - im[sx * i + sx * 0 * dim1] = L_add( s01, s03 ); - re[sx * i + sx * 1 * dim1] = L_add( s10, s12 ); - im[sx * i + sx * 1 * dim1] = L_add( s11, s13 ); - re[sx * i + sx * 2 * dim1] = L_sub( s04, s06 ); - im[sx * i + sx * 2 * dim1] = L_sub( s05, s07 ); - re[sx * i + sx * 3 * dim1] = L_add( s08, s14 ); - im[sx * i + sx * 3 * dim1] = L_add( s09, s15 ); - re[sx * i + sx * 4 * dim1] = L_sub( s00, s02 ); - im[sx * i + sx * 4 * dim1] = L_sub( s01, s03 ); - re[sx * i + sx * 5 * dim1] = L_sub( s10, s12 ); - im[sx * i + sx * 5 * dim1] = L_sub( s11, s13 ); - re[sx * i + sx * 6 * dim1] = L_add( s04, s06 ); - im[sx * i + sx * 6 * dim1] = L_add( s05, s07 ); - re[sx * i + sx * 7 * dim1] = L_sub( s08, s14 ); - im[sx * i + sx * 7 * dim1] = L_sub( s09, s15 ); + t[0] = CL_add( y[0], y[4] ); + t[1] = CL_sub( y[0], y[4] ); + t[2] = CL_add( y[1], y[5] ); + t[3] = CL_sub( y[1], y[5] ); + t[4] = CL_add( y[2], y[6] ); + t[5] = CL_sub( y[2], y[6] ); + t[6] = CL_add( y[3], y[7] ); + t[7] = CL_sub( y[3], y[7] ); + + s[0] = CL_add( t[0], t[4] ); + s[2] = CL_sub( t[0], t[4] ); + s[4] = CL_mac_j( t[1], t[5] ); + s[5] = CL_msu_j( t[1], t[5] ); + s[1] = CL_add( t[2], t[6] ); + s[3] = CL_swap_real_imag( CL_sub( CL_conjugate( t[2] ), CL_conjugate( t[6] ) ) ); + + t[0] = CL_swap_real_imag( CL_add( t[3], t[7] ) ); + t[1] = CL_sub( t[3], t[7] ); + + s[6] = CL_scale( CL_add( CL_conjugate( t[0] ), t[1] ), FFT_C81 ); + s[7] = CL_scale( CL_sub( t[0], CL_conjugate( t[1] ) ), FFT_C81 ); + s[7] = CL_conjugate( s[7] ); + + x[sx * i + sx * 0 * dim1] = CL_add( s[0], s[1] ); + x[sx * i + sx * 1 * dim1] = CL_add( s[5], s[6] ); + x[sx * i + sx * 2 * dim1] = CL_sub( s[2], s[3] ); + x[sx * i + sx * 3 * dim1] = CL_add( s[4], s[7] ); + x[sx * i + sx * 4 * dim1] = CL_sub( s[0], s[1] ); + x[sx * i + sx * 5 * dim1] = CL_sub( s[5], s[6] ); + x[sx * i + sx * 6 * dim1] = CL_add( s[2], s[3] ); + x[sx * i + sx * 7 * dim1] = CL_sub( s[4], s[7] ); } } ELSE @@ -5587,142 +4978,95 @@ static void fft_lenN( FOR ( i = 0; i < dim1; i++ ) { { - x00 = x[2 * i + 2 * 0 * dim1]; - x01 = x[2 * i + 2 * 0 * dim1 + 1]; + y[0] = xx[i + 0 * dim1]; }; - IF (EQ_16(i, 0)) + IF( EQ_16( i, 0 ) ) { { - x02 = x[2 * i + 2 * 1 * dim1]; - x03 = x[2 * i + 2 * 1 * dim1 + 1]; + y[1] = xx[i + 1 * dim1]; }; { - x04 = x[2 * i + 2 * 2 * dim1]; - x05 = x[2 * i + 2 * 2 * dim1 + 1]; + y[2] = xx[i + 2 * dim1]; }; { - x06 = x[2 * i + 2 * 3 * dim1]; - x07 = x[2 * i + 2 * 3 * dim1 + 1]; + y[3] = xx[i + 3 * dim1]; }; { - x08 = x[2 * i + 2 * 4 * dim1]; - x09 = x[2 * i + 2 * 4 * dim1 + 1]; + y[4] = xx[i + 4 * dim1]; }; { - x10 = x[2 * i + 2 * 5 * dim1]; - x11 = x[2 * i + 2 * 5 * dim1 + 1]; + y[5] = xx[i + 5 * dim1]; }; { - x12 = x[2 * i + 2 * 6 * dim1]; - x13 = x[2 * i + 2 * 6 * dim1 + 1]; + y[6] = xx[i + 6 * dim1]; }; { - x14 = x[2 * i + 2 * 7 * dim1]; - x15 = x[2 * i + 2 * 7 * dim1 + 1]; + y[7] = xx[i + 7 * dim1]; }; } ELSE { { - x02 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 1 * dim1], W[sc * i + sc * 1 * dim1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * 1 * dim1 + 1], W[sc * i + sc * 1 * dim1 + 1 - Woff])); - x03 = L_add( Mpy_32_16_1(x[2 * i + 2 * 1 * dim1], W[sc * i + sc * 1 * dim1 + 1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * 1 * dim1 + 1], W[sc * i + sc * 1 * dim1 - Woff] )); + y[1] = CL_mac_j( CL_scale( xx[i + 1 * dim1], W[sc * i + sc * 1 * dim1 - Woff] ), + CL_scale( xx[i + 1 * dim1], W[sc * i + sc * 1 * dim1 + 1 - Woff] ) ); }; { - x04 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 2 * dim1], W[sc * i + sc * 2 * dim1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * 2 * dim1 + 1], W[sc * i + sc * 2 * dim1 + 1 - Woff])); - x05 = L_add( Mpy_32_16_1(x[2 * i + 2 * 2 * dim1], W[sc * i + sc * 2 * dim1 + 1 - Woff] ), - Mpy_32_16_1( x[2 * i + 2 * 2 * dim1 + 1], W[sc * i + sc * 2 * dim1 - Woff])); + y[2] = CL_mac_j( CL_scale( xx[i + 2 * dim1], W[sc * i + sc * 2 * dim1 - Woff] ), + CL_scale( xx[i + 2 * dim1], W[sc * i + sc * 2 * dim1 + 1 - Woff] ) ); }; { - x06 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 3 * dim1], W[sc * i + sc * 3 * dim1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * 3 * dim1 + 1], W[sc * i + sc * 3 * dim1 + 1 - Woff] )); - x07 = L_add( Mpy_32_16_1(x[2 * i + 2 * 3 * dim1], W[sc * i + sc * 3 * dim1 + 1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * 3 * dim1 + 1], W[sc * i + sc * 3 * dim1 - Woff] )); + y[3] = CL_mac_j( CL_scale( xx[i + 3 * dim1], W[sc * i + sc * 3 * dim1 - Woff] ), + CL_scale( xx[i + 3 * dim1], W[sc * i + sc * 3 * dim1 + 1 - Woff] ) ); }; { - x08 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 4 * dim1], W[sc * i + sc * 4 * dim1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * 4 * dim1 + 1], W[sc * i + sc * 4 * dim1 + 1 - Woff])); - x09 = L_add( Mpy_32_16_1(x[2 * i + 2 * 4 * dim1], W[sc * i + sc * 4 * dim1 + 1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * 4 * dim1 + 1], W[sc * i + sc * 4 * dim1 - Woff] )); + y[4] = CL_mac_j( CL_scale( xx[i + 4 * dim1], W[sc * i + sc * 4 * dim1 - Woff] ), + CL_scale( xx[i + 4 * dim1], W[sc * i + sc * 4 * dim1 + 1 - Woff] ) ); }; { - x10 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 5 * dim1], W[sc * i + sc * 5 * dim1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * 5 * dim1 + 1], W[sc * i + sc * 5 * dim1 + 1 - Woff] )); - x11 = L_add( Mpy_32_16_1(x[2 * i + 2 * 5 * dim1], W[sc * i + sc * 5 * dim1 + 1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * 5 * dim1 + 1], W[sc * i + sc * 5 * dim1 - Woff] )); + y[5] = CL_mac_j( CL_scale( xx[i + 5 * dim1], W[sc * i + sc * 5 * dim1 - Woff] ), + CL_scale( xx[i + 5 * dim1], W[sc * i + sc * 5 * dim1 + 1 - Woff] ) ); }; { - x12 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 6 * dim1], W[sc * i + sc * 6 * dim1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * 6 * dim1 + 1], W[sc * i + sc * 6 * dim1 + 1 - Woff] )); - x13 = L_add( Mpy_32_16_1(x[2 * i + 2 * 6 * dim1], W[sc * i + sc * 6 * dim1 + 1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * 6 * dim1 + 1], W[sc * i + sc * 6 * dim1 - Woff] )); + y[6] = CL_mac_j( CL_scale( xx[i + 6 * dim1], W[sc * i + sc * 6 * dim1 - Woff] ), + CL_scale( xx[i + 6 * dim1], W[sc * i + sc * 6 * dim1 + 1 - Woff] ) ); }; { - x14 = L_sub( Mpy_32_16_1(x[2 * i + 2 * 7 * dim1], W[sc * i + sc * 7 * dim1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * 7 * dim1 + 1], W[sc * i + sc * 7 * dim1 + 1 - Woff] )); - x15 = L_add( Mpy_32_16_1(x[2 * i + 2 * 7 * dim1], W[sc * i + sc * 7 * dim1 + 1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * 7 * dim1 + 1], W[sc * i + sc * 7 * dim1 - Woff] )); + y[7] = CL_mac_j( CL_scale( xx[i + 7 * dim1], W[sc * i + sc * 7 * dim1 - Woff] ), + CL_scale( xx[i + 7 * dim1], W[sc * i + sc * 7 * dim1 + 1 - Woff] ) ); }; } - t00 = L_add( x00, x08 ); - t02 = L_sub( x00, x08 ); - t01 = L_add( x01, x09 ); - t03 = L_sub( x01, x09 ); - t04 = L_add( x02, x10 ); - t06 = L_sub( x02, x10 ); - t05 = L_add( x03, x11 ); - t07 = L_sub( x03, x11 ); - t08 = L_add( x04, x12 ); - t10 = L_sub( x04, x12 ); - t09 = L_add( x05, x13 ); - t11 = L_sub( x05, x13 ); - t12 = L_add( x06, x14 ); - t14 = L_sub( x06, x14 ); - t13 = L_add( x07, x15 ); - t15 = L_sub( x07, x15 ); - - s00 = L_add( t00, t08 ); - s04 = L_sub( t00, t08 ); - s01 = L_add( t01, t09 ); - s05 = L_sub( t01, t09 ); - s08 = L_sub( t02, t11 ); - s10 = L_add( t02, t11 ); - s09 = L_add( t03, t10 ); - s11 = L_sub( t03, t10 ); - s02 = L_add( t04, t12 ); - s07 = L_sub( t04, t12 ); - s03 = L_add( t05, t13 ); - s06 = L_sub( t13, t05 ); - - t01 = L_add( t06, t14 ); - t02 = L_sub( t06, t14 ); - t00 = L_add( t07, t15 ); - t03 = L_sub( t07, t15 ); - - s12 = Mpy_32_16_1( L_add( t00, t02 ), FFT_C81 ); - s14 = Mpy_32_16_1( L_sub( t00, t02 ), FFT_C81 ); - s13 = Mpy_32_16_1( L_sub( t03, t01 ), FFT_C81 ); - s15 = Mpy_32_16_1( L_add( t01, t03 ), FFT_C82 ); - - re[sx * i + sx * 0 * dim1] = L_add( s00, s02 ); - im[sx * i + sx * 0 * dim1] = L_add( s01, s03 ); - re[sx * i + sx * 1 * dim1] = L_add( s10, s12 ); - im[sx * i + sx * 1 * dim1] = L_add( s11, s13 ); - re[sx * i + sx * 2 * dim1] = L_sub( s04, s06 ); - im[sx * i + sx * 2 * dim1] = L_sub( s05, s07 ); - re[sx * i + sx * 3 * dim1] = L_add( s08, s14 ); - im[sx * i + sx * 3 * dim1] = L_add( s09, s15 ); - re[sx * i + sx * 4 * dim1] = L_sub( s00, s02 ); - im[sx * i + sx * 4 * dim1] = L_sub( s01, s03 ); - re[sx * i + sx * 5 * dim1] = L_sub( s10, s12 ); - im[sx * i + sx * 5 * dim1] = L_sub( s11, s13 ); - re[sx * i + sx * 6 * dim1] = L_add( s04, s06 ); - im[sx * i + sx * 6 * dim1] = L_add( s05, s07 ); - re[sx * i + sx * 7 * dim1] = L_sub( s08, s14 ); - im[sx * i + sx * 7 * dim1] = L_sub( s09, s15 ); + t[0] = CL_add( y[0], y[4] ); + t[1] = CL_sub( y[0], y[4] ); + t[2] = CL_add( y[1], y[5] ); + t[3] = CL_sub( y[1], y[5] ); + t[4] = CL_add( y[2], y[6] ); + t[5] = CL_sub( y[2], y[6] ); + t[6] = CL_add( y[3], y[7] ); + t[7] = CL_sub( y[3], y[7] ); + + s[0] = CL_add( t[0], t[4] ); + s[2] = CL_sub( t[0], t[4] ); + s[4] = CL_mac_j( t[1], t[5] ); + s[5] = CL_msu_j( t[1], t[5] ); + s[1] = CL_add( t[2], t[6] ); + s[3] = CL_swap_real_imag( CL_sub( CL_conjugate( t[2] ), CL_conjugate( t[6] ) ) ); + + t[0] = CL_swap_real_imag( CL_add( t[3], t[7] ) ); + t[1] = CL_sub( t[3], t[7] ); + + s[6] = CL_scale( CL_add( CL_conjugate( t[0] ), t[1] ), FFT_C81 ); + s[7] = CL_scale( CL_sub( t[0], CL_conjugate( t[1] ) ), FFT_C81 ); + s[7] = CL_conjugate( s[7] ); + + x[sx * i + sx * 0 * dim1] = CL_add( s[0], s[1] ); + x[sx * i + sx * 1 * dim1] = CL_add( s[5], s[6] ); + x[sx * i + sx * 2 * dim1] = CL_sub( s[2], s[3] ); + x[sx * i + sx * 3 * dim1] = CL_add( s[4], s[7] ); + x[sx * i + sx * 4 * dim1] = CL_sub( s[0], s[1] ); + x[sx * i + sx * 5 * dim1] = CL_sub( s[5], s[6] ); + x[sx * i + sx * 6 * dim1] = CL_add( s[2], s[3] ); + x[sx * i + sx * 7 * dim1] = CL_sub( s[4], s[7] ); } } BREAK; @@ -5730,42 +5074,36 @@ static void fft_lenN( case 10: { - Word32 y[2 * 10]; + cmplx y[10]; FOR ( j = 0; j < dim2; j++ ) { { - y[2 * j] = x[2 * 0 + 2 * j * dim1]; - y[2 * j + 1] = x[2 * 0 + 2 * j * dim1 + 1]; + y[j] = xx[0 + j * dim1]; }; } - fft_len10( &y[0], &y[1], 2 ); + fft_len10( &y[0] ); FOR ( j = 0; j < dim2; j++ ) { - re[sx * 0 + sx * j * dim1] = y[2 * j]; - im[sx * 0 + sx * j * dim1] = y[2 * j + 1]; + x[sx * 0 + sx * j * dim1] = y[j]; } FOR ( i = 1; i < dim1; i++ ) { { - y[2 * ( 0 + 0 )] = x[2 * i + 2 * ( 0 + 0 ) * dim1]; - y[2 * ( 0 + 0 ) + 1] = x[2 * i + 2 * ( 0 + 0 ) * dim1 + 1]; + y[( 0 + 0 )] = xx[i + ( 0 + 0 ) * dim1]; } FOR ( j = 1; j < dim2; j++ ) { { - y[2 * ( j + 0 )] = L_sub( Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1], W[sc * i + sc * j * dim1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1 + 1], W[sc * i + sc * j * dim1 + 1 - Woff] )); - y[2 * ( j + 0 ) + 1] = L_add( Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1], W[sc * i + sc * j * dim1 + 1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1 + 1], W[sc * i + sc * j * dim1 - Woff] )); + y[( j + 0 )] = CL_mac_j( CL_scale( xx[i + ( j + 0 ) * dim1], W[sc * i + sc * j * dim1 - Woff] ), + CL_scale( xx[i + ( j + 0 ) * dim1], W[sc * i + sc * j * dim1 + 1 - Woff] ) ); } } - fft_len10( &y[0], &y[1], 2 ); + fft_len10( &y[0] ); FOR ( j = 0; j < dim2; j++ ) { - re[sx * i + sx * j * dim1] = y[2 * j]; - im[sx * i + sx * j * dim1] = y[2 * j + 1]; + x[sx * i + sx * j * dim1] = y[j]; } } BREAK; @@ -5773,42 +5111,36 @@ static void fft_lenN( case 16: { - Word32 y[2 * 16]; + cmplx y[16]; FOR ( j = 0; j < dim2; j++ ) { { - y[2 * j] = x[2 * 0 + 2 * j * dim1]; - y[2 * j + 1] = x[2 * 0 + 2 * j * dim1 + 1]; + y[j] = xx[0 + j * dim1]; }; } - fft_len16( &y[0], &y[1], 2 ); + fft_len16( &y[0] ); FOR ( j = 0; j < dim2; j++ ) { - re[sx * 0 + sx * j * dim1] = y[2 * j]; - im[sx * 0 + sx * j * dim1] = y[2 * j + 1]; + x[sx * 0 + sx * j * dim1] = y[j]; } FOR ( i = 1; i < dim1; i++ ) { { - y[2 * ( 0 + 0 )] = x[2 * i + 2 * ( 0 + 0 ) * dim1]; - y[2 * ( 0 + 0 ) + 1] = x[2 * i + 2 * ( 0 + 0 ) * dim1 + 1]; + y[( 0 + 0 )] = xx[i + ( 0 + 0 ) * dim1]; } FOR ( j = 1; j < dim2; j++ ) { { - y[2 * ( j + 0 )] = L_sub( Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1], W[sc * i + sc * j * dim1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1 + 1], W[sc * i + sc * j * dim1 + 1 - Woff] )); - y[2 * ( j + 0 ) + 1] = L_add( Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1], W[sc * i + sc * j * dim1 + 1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1 + 1], W[sc * i + sc * j * dim1 - Woff] )); + y[( j + 0 )] = CL_mac_j( CL_scale( xx[i + ( j + 0 ) * dim1], W[sc * i + sc * j * dim1 - Woff] ), + CL_scale( xx[i + ( j + 0 ) * dim1], W[sc * i + sc * j * dim1 + 1 - Woff] ) ); } } - fft_len16( &y[0], &y[1], 2 ); + fft_len16( &y[0] ); FOR ( j = 0; j < dim2; j++ ) { - re[sx * i + sx * j * dim1] = y[2 * j]; - im[sx * i + sx * j * dim1] = y[2 * j + 1]; + x[sx * i + sx * j * dim1] = y[j]; } } BREAK; @@ -5816,54 +5148,44 @@ static void fft_lenN( case 20: { - Word32 y[2 * 20]; + cmplx y[20]; FOR ( j = 0; j < dim2; j++ ) { { - y[2 * j] = x[2 * 0 + 2 * j * dim1]; - y[2 * j + 1] = x[2 * 0 + 2 * j * dim1 + 1]; + y[j] = xx[0 + j * dim1]; }; } - fft_len20_fx( &y[0], &y[1], 2 ); + fft_len20_fx( &y[0] ); FOR ( j = 0; j < dim2; j++ ) { - re[sx * 0 + sx * j * dim1] = y[2 * j]; - im[sx * 0 + sx * j * dim1] = y[2 * j + 1]; + x[sx * 0 + sx * j * dim1] = y[j]; } FOR ( i = 1; i < dim1; i++ ) { { - y[2 * ( 0 + 0 )] = x[2 * i + 2 * ( 0 + 0 ) * dim1]; - y[2 * ( 0 + 0 ) + 1] = x[2 * i + 2 * ( 0 + 0 ) * dim1 + 1]; + y[( 0 + 0 )] = xx[i + ( 0 + 0 ) * dim1]; } { - y[2 * ( 0 + 1 )] = L_sub( Mpy_32_16_1(x[2 * i + 2 * ( 0 + 1 ) * dim1], W[len + sc * i + 0 * dim1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * ( 0 + 1 ) * dim1 + 1], W[len + sc * i + 0 * dim1 + 1 - Woff])); - y[2 * ( 0 + 1 ) + 1] = L_add( Mpy_32_16_1(x[2 * i + 2 * ( 0 + 1 ) * dim1], W[len + sc * i + 0 * dim1 + 1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * ( 0 + 1 ) * dim1 + 1], W[len + sc * i + 0 * dim1 - Woff])); + y[( 0 + 1 )] = CL_mac_j( CL_scale( xx[i + ( 0 + 1 ) * dim1], W[len + sc * i + 0 * dim1 - Woff] ), + CL_scale( xx[i + ( 0 + 1 ) * dim1], W[len + sc * i + 0 * dim1 + 1 - Woff] ) ); } FOR ( j = 2; j < dim2; j = j + 2 ) { { - y[2 * ( j + 0 )] = L_sub( Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1], W[sc * i + j * dim1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1 + 1], W[sc * i + j * dim1 + 1 - Woff] )); - y[2 * ( j + 0 ) + 1] = L_add( Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1], W[sc * i + j * dim1 + 1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1 + 1], W[sc * i + j * dim1 - Woff] )); + y[( j + 0 )] = CL_mac_j( CL_scale( xx[i + ( j + 0 ) * dim1], W[sc * i + j * dim1 - Woff] ), + CL_scale( xx[i + ( j + 0 ) * dim1], W[sc * i + j * dim1 + 1 - Woff] ) ); } { - y[2 * ( j + 1 )] = L_sub( Mpy_32_16_1(x[2 * i + 2 * ( j + 1 ) * dim1], W[len + sc * i + j * dim1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * ( j + 1 ) * dim1 + 1], W[len + sc * i + j * dim1 + 1 - Woff] )); - y[2 * ( j + 1 ) + 1] = L_add( Mpy_32_16_1(x[2 * i + 2 * ( j + 1 ) * dim1], W[len + sc * i + j * dim1 + 1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * ( j + 1 ) * dim1 + 1], W[len + sc * i + j * dim1 - Woff] )); + y[( j + 1 )] = CL_mac_j( CL_scale( xx[i + ( j + 1 ) * dim1], W[len + sc * i + j * dim1 - Woff] ), + CL_scale( xx[i + ( j + 1 ) * dim1], W[len + sc * i + j * dim1 + 1 - Woff] ) ); } } - fft_len20_fx( &y[0], &y[1], 2 ); + fft_len20_fx( &y[0] ); FOR ( j = 0; j < dim2; j++ ) { - re[sx * i + sx * j * dim1] = y[2 * j]; - im[sx * i + sx * j * dim1] = y[2 * j + 1]; + x[sx * i + sx * j * dim1] = y[j]; } } BREAK; @@ -5871,64 +5193,52 @@ static void fft_lenN( case 32: { - Word32 y[2 * 32]; + cmplx y[32]; FOR ( j = 0; j < dim2; j++ ) { { - y[2 * j] = x[2 * 0 + 2 * j * dim1]; - y[2 * j + 1] = x[2 * 0 + 2 * j * dim1 + 1]; + y[j] = xx[0 + j * dim1]; }; } - fft_len32( &y[0], &y[1], 2 ); + fft_len32( &y[0] ); FOR ( j = 0; j < dim2; j++ ) { - re[sx * 0 + sx * j * dim1] = y[2 * j]; - im[sx * 0 + sx * j * dim1] = y[2 * j + 1]; + x[sx * 0 + sx * j * dim1] = y[j]; } FOR ( i = 1; i < dim1; i++ ) { { - y[2 * ( 0 + 0 )] = x[2 * i + 2 * ( 0 + 0 ) * dim1]; - y[2 * ( 0 + 0 ) + 1] = x[2 * i + 2 * ( 0 + 0 ) * dim1 + 1]; + y[( 0 + 0 )] = xx[i + ( 0 + 0 ) * dim1]; } { - y[2 * ( 0 + 1 )] = L_sub( Mpy_32_16_1(x[2 * i + 2 * ( 0 + 1 ) * dim1], W[len + sc * i + 0 * dim1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * ( 0 + 1 ) * dim1 + 1], W[len + sc * i + 0 * dim1 + 1 - Woff] )); - y[2 * ( 0 + 1 ) + 1] = L_add( Mpy_32_16_1(x[2 * i + 2 * ( 0 + 1 ) * dim1], W[len + sc * i + 0 * dim1 + 1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * ( 0 + 1 ) * dim1 + 1], W[len + sc * i + 0 * dim1 - Woff] )); + y[( 0 + 1 )] = CL_mac_j( CL_scale( xx[i + ( 0 + 1 ) * dim1], W[len + sc * i + 0 * dim1 - Woff] ), + CL_scale( xx[i + ( 0 + 1 ) * dim1], W[len + sc * i + 0 * dim1 + 1 - Woff] ) ); } FOR ( j = 2; j < dim2; j = j + 2 ) { { - y[2 * ( j + 0 )] = L_sub( Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1], W[sc * i + j * dim1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1 + 1], W[sc * i + j * dim1 + 1 - Woff] )); - y[2 * ( j + 0 ) + 1] = L_add( Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1], W[sc * i + j * dim1 + 1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * ( j + 0 ) * dim1 + 1], W[sc * i + j * dim1 - Woff] )); + y[( j + 0 )] = CL_mac_j( CL_scale( xx[i + ( j + 0 ) * dim1], W[sc * i + j * dim1 - Woff] ), + CL_scale( xx[i + ( j + 0 ) * dim1], W[sc * i + j * dim1 + 1 - Woff] ) ); } { - y[2 * ( j + 1 )] = L_sub( Mpy_32_16_1(x[2 * i + 2 * ( j + 1 ) * dim1], W[len + sc * i + j * dim1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * ( j + 1 ) * dim1 + 1], W[len + sc * i + j * dim1 + 1 - Woff] )); - y[2 * ( j + 1 ) + 1] = L_add( Mpy_32_16_1(x[2 * i + 2 * ( j + 1 ) * dim1], W[len + sc * i + j * dim1 + 1 - Woff] ), - Mpy_32_16_1(x[2 * i + 2 * ( j + 1 ) * dim1 + 1], W[len + sc * i + j * dim1 - Woff] )); + y[( j + 1 )] = CL_mac_j( CL_scale( xx[i + ( j + 1 ) * dim1], W[len + sc * i + j * dim1 - Woff] ), + CL_scale( xx[i + ( j + 1 ) * dim1], W[len + sc * i + j * dim1 + 1 - Woff] ) ); } } - fft_len32( &y[0], &y[1], 2 ); + fft_len32( &y[0] ); FOR ( j = 0; j < dim2; j++ ) { - re[sx * i + sx * j * dim1] = y[2 * j]; - im[sx * i + sx * j * dim1] = y[2 * j + 1]; + x[sx * i + sx * j * dim1] = y[j]; } } BREAK; } } - return; } - /*-----------------------------------------------------------------* * fft_fx() * @@ -5936,72 +5246,89 @@ static void fft_lenN( *-----------------------------------------------------------------*/ void fft_fx( - Word32 *re, /* i/o: real part */ - Word32 *im, /* i/o: imag part */ + Word32 *re, /* i/o: real part */ + Word32 *im, /* i/o: imag part */ const Word16 length, /* i : length of fft */ const Word16 s /* i : sign */ ) { - SWITCH ( length ) + cmplx x[960]; + + FOR ( Word32 j = 0; j < length; j++ ) + { + x[j].re = re[s * j]; + x[j].im = im[s * j]; + } + + SWITCH( length ) { case 20: - fft_len20_fx( re, im, s ); + fft_len20_fx( x ); BREAK; case 40: - fft_lenN( re, im, FFT_RotVector_640_fx, 640, 5, 8, s, 8, 40 ); + fft_lenN( x, FFT_RotVector_640_fx, 640, 5, 8, s, 8, 40 ); BREAK; case 64: - fft_lenN( re, im, FFT_RotVector_256_fx, 256, 8, 8, s, 8, 64 ); + fft_lenN( x, FFT_RotVector_256_fx, 256, 8, 8, s, 8, 64 ); BREAK; case 80: - fft_lenN( re, im, FFT_RotVector_640_fx, 640, 10, 8, s, 4, 40 ); + fft_lenN( x, FFT_RotVector_640_fx, 640, 10, 8, s, 4, 40 ); BREAK; case 100: - fft_lenN( re, im, FFT_RotVector_400_fx, 400, 10, 10, s, 4, 40 ); + fft_lenN( x, FFT_RotVector_400_fx, 400, 10, 10, s, 4, 40 ); BREAK; case 120: - fft_lenN( re, im, FFT_RotVector_960_fx, 960, 15, 8, s, 4, 60 ); + fft_lenN( x, FFT_RotVector_960_fx, 960, 15, 8, s, 4, 60 ); BREAK; case 128: - fft_lenN( re, im, FFT_RotVector_256_fx, 256, 16, 8, s, 4, 64 ); + fft_lenN( x, FFT_RotVector_256_fx, 256, 16, 8, s, 4, 64 ); BREAK; case 160: - fft_lenN( re, im, FFT_RotVector_640_fx, 640, 20, 8, s, 2, 40 ); + fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 8, s, 2, 40 ); BREAK; case 200: - fft_lenN( re, im, FFT_RotVector_400_fx, 400, 20, 10, s, 2, 40 ); + fft_lenN( x, FFT_RotVector_400_fx, 400, 20, 10, s, 2, 40 ); BREAK; case 240: - fft_lenN( re, im, FFT_RotVector_960_fx, 960, 30, 8, s, 2, 60 ); + fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 8, s, 2, 60 ); BREAK; case 256: - fft_lenN( re, im, FFT_RotVector_256_fx, 256, 32, 8, s, 2, 64 ); + fft_lenN( x, FFT_RotVector_256_fx, 256, 32, 8, s, 2, 64 ); BREAK; case 320: - fft_lenN( re, im, FFT_RotVector_640_fx, 640, 20, 16, s, 2, 40 ); + fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 16, s, 2, 40 ); BREAK; case 400: - fft_lenN( re, im, FFT_RotVector_400_fx, 400, 20, 20, s, 2, 40 ); + fft_lenN( x, FFT_RotVector_400_fx, 400, 20, 20, s, 2, 40 ); BREAK; case 480: - fft_lenN( re, im, FFT_RotVector_960_fx, 960, 30, 16, s, 2, 60 ); + fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 16, s, 2, 60 ); BREAK; case 600: - fft_lenN( re, im, FFT_RotVector_600_fx, 600, 30, 20, s, 2, 60 ); + fft_lenN( x, FFT_RotVector_600_fx, 600, 30, 20, s, 2, 60 ); BREAK; case 640: - fft_lenN( re, im, FFT_RotVector_640_fx, 640, 20, 32, s, 2, 40 ); + fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 32, s, 2, 40 ); BREAK; case 960: - fft_lenN( re, im, FFT_RotVector_960_fx, 960, 30, 32, s, 2, 60 ); + fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 32, s, 2, 60 ); BREAK; default: assert( !"fft length is not supported!" ); } + FOR ( Word32 j = 0; j < length; j++ ) + { + re[s * j] = x[j].re; + im[s * j] = x[j].im; + } + return; } +#if 0 +/* Functions are already in fixed point and available in fft.c file */ + #define WMC_TOOL_SKIP #define SHC( x ) ( (Word16) x ) @@ -6170,9 +5497,9 @@ static void BASOP_fftN2( Word32 t00, t01, t02, t03, t04, t05, t06, t07, t08, t09, t10, t11, t12, t13, t14, t15; Word32 s00, s01, s02, s03, s04, s05, s06, s07, s08, s09, s10, s11, s12, s13, s14, s15; - FOR( i = 0; i < dim2; i++ ) + FOR ( i = 0; i < dim2; i++ ) { - FOR( j = 0; j < dim1; j++ ) + FOR ( j = 0; j < dim1; j++ ) { x[2 * i * dim1 + 2 * j] = re[sx * i + sx * j * dim2]; move32(); @@ -6182,13 +5509,13 @@ static void BASOP_fftN2( } /* dim1 == 8 */ - FOR( i = 0; i < dim2; i++ ) + FOR ( i = 0; i < dim2; i++ ) { BASOP_fft8( &x[i * 2 * dim1], &x[i * 2 * dim1 + 1], 2 ); } /* dim2 == 8 */ - FOR( i = 0; i < dim1; i++ ) + FOR ( i = 0; i < dim1; i++ ) { cplxMpy4_8_1( x00, x01, x[2 * i + 2 * 0 * dim1], x[2 * i + 2 * 0 * dim1 + 1] ); @@ -6314,5 +5641,5 @@ void BASOP_cfft_fx( return; } - #undef WMC_TOOL_SKIP +#endif diff --git a/lib_com/fft_rel.c b/lib_com/fft_rel.c index 810c3a7e2..3de721907 100644 --- a/lib_com/fft_rel.c +++ b/lib_com/fft_rel.c @@ -479,4 +479,412 @@ void fft_rel_fx( } return; -} \ No newline at end of file +} + +#if 0 +void fft_rel_fx32( + Word32 x[], /* i/o: input/output vector */ + const Word16 n, /* i : vector length */ + const Word16 m /* i : log2 of vector length */ +) +{ + Word16 i, j, k, n1, n2, n4; + Word16 step; + Word32 xt, t1, t2; + Word32 *x0, *x1, *x2; + const Word16 *s, *c; + Word32 *xi2, *xi3, *xi4, *xi1; +#ifdef BASOP_NOGLOB_DECLARE_LOCAL + Flag Overflow = 0; +#endif + + + /*-----------------------------------------------------------------* + * Digit reverse counter + *-----------------------------------------------------------------*/ + + j = 0; + move16(); + x0 = &x[0]; + move32(); + FOR(i = 0; i < n - 1; i++) + { + IF(LT_16(i, j)) + { + xt = x[j]; + move32(); + x[j] = *x0; + move32(); + *x0 = xt; + move32(); + } + x0++; + k = shr(n, 1); + WHILE(LE_16(k, j)) + { + j = sub(j, k); + k = shr(k, 1); + } + j = add(j, k); + } + + /*-----------------------------------------------------------------* + * Length two butterflies + *-----------------------------------------------------------------*/ + + x0 = &x[0]; + move32(); + x1 = &x[1]; + move32(); + FOR(i = 0; i < n / 2; i++) + { + xt = *x0; + move32(); +#ifdef BASOP_NOGLOB + *x0 = L_add_o(xt, *x1, &Overflow); +#else + *x0 = L_add(xt, *x1); +#endif + move32(); +#ifdef BASOP_NOGLOB + *x1 = L_sub_o(xt, *x1, &Overflow); +#else + *x1 = L_sub(xt, *x1); +#endif + move32(); + x0++; + x0++; + x1++; + x1++; + } + + /*-----------------------------------------------------------------* + * Other butterflies + * + * The implementation described in [1] has been changed by using + * table lookup for evaluating sine and cosine functions. The + * variable ind and its increment step are needed to access table + * entries. Note that this implementation assumes n4 to be so + * small that ind will never exceed the table. Thus the input + * argument n and the constant N_MAX_SAS must be set properly. + *-----------------------------------------------------------------*/ + + n2 = 1; + move16(); + /* step = N_MAX_SAS/4; */ + FOR(k = 2; k <= m; k++) + { + n4 = n2; + move16(); + n2 = shl(n4, 1); + n1 = shl(n2, 1); + + step = N_MAX_SAS / n1; + + x0 = x; + x1 = x + n2; + x2 = x + add(n2, n4); + FOR(i = 0; i < n; i += n1) + { + xt = *x0; + move32(); /* xt = x[i]; */ +#ifdef BASOP_NOGLOB + *x0 = L_add_o(xt, *x1, &Overflow); +#else /* BASOP_NOGLOB */ + *x0 = L_add(xt, *x1); +#endif /* BASOP_NOGLOB */ + move32(); /* x[i] = xt + x[i+n2]; */ +#ifdef BASOP_NOGLOB + *x1 = L_sub_o(xt, *x1, &Overflow); +#else /* BASOP_NOGLOB */ + *x1 = L_sub(xt, *x1); +#endif /* BASOP_NOGLOB */ + move32(); /* x[i+n2] = xt - x[i+n2]; */ + *x2 = L_negate(*x2); + move32(); /* x[i+n2+n4] = -x[i+n2+n4]; */ + + + s = sincos_t_fx + step; + c = s + 64; + xi1 = x + add(i, 1); + xi3 = xi1 + n2; + xi2 = xi3 - 2; + xi4 = xi1 + sub(n1, 2); + + FOR(j = 1; j < n4; j++) + { +#ifdef BASOP_NOGLOB + t1 = L_add_o(Mpy_32_16_1(*xi3, *c), Mpy_32_16_1(*xi4, *s), &Overflow); /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind); */ + t2 = L_sub_o(Mpy_32_16_1(*xi3, *s), Mpy_32_16_1(*xi4, *c), &Overflow); /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind); */ + *xi4 = L_sub_o(*xi2, t2, &Overflow); +#else /* BASOP_NOGLOB */ + t1 = L_add(Mpy_32_16_1(*xi3, *c), Mpy_32_16_1(*xi4, *s)); /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind); */ + t2 = L_sub(Mpy_32_16_1(*xi3, *s), Mpy_32_16_1(*xi4, *c)); /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind); */ + *xi4 = L_sub(*xi2, t2); +#endif /* BASOP_NOGLOB */ + move16(); +#ifdef BASOP_NOGLOB + *xi3 = L_negate(L_add_o(*xi2, t2, &Overflow)); +#else /* BASOP_NOGLOB */ + *xi3 = L_negate(L_add(*xi2, t2)); +#endif /* BASOP_NOGLOB */ + move32(); +#ifdef BASOP_NOGLOB + *xi2 = L_sub_o(*xi1, t1, &Overflow); +#else /* BASOP_NOGLOB */ + *xi2 = L_sub(*xi1, t1); +#endif /* BASOP_NOGLOB */ + move32(); +#ifdef BASOP_NOGLOB + *xi1 = L_add_o(*xi1, t1, &Overflow); +#else /* BASOP_NOGLOB */ + *xi1 = L_add(*xi1, t1); +#endif /* BASOP_NOGLOB */ + move32(); + + xi4--; + xi2--; + xi3++; + xi1++; + c += step; + s += step; /* autoincrement by ar0 */ + } + + x0 += n1; + x1 += n1; + x2 += n1; + } + /* step = shr(step, 1); */ + } + + return; +} +#else +void fft_rel_fx32( + Word32 x[], /* i/o: input/output vector */ + const Word16 n, /* i : vector length */ + const Word16 m /* i : log2 of vector length */ +) +{ + Word16 i, j, k, n1, n2, n4; + Word16 step; + Word32 xt, t1, t2; + Word32 *x0, *x1, *x2; + Word32 *xi2, *xi3, *xi4, *xi1; + const Word16 *s, *c; + const Word16 *idx; + + /* !!!! NMAX = 256 is hardcoded here (similar optimizations should be done for NMAX > 256) !!! */ + + Word32 *x2even, *x2odd; + Word32 temp[512]; + + IF ( EQ_16(n, 128) || EQ_16(n, 256) || EQ_16(n, 512) ) + { + idx = fft256_read_indexes; + + /* Combined Digit reverse counter & Length two butterflies */ + IF ( EQ_16(n, 128) ) + { + x2 = temp; + FOR ( i = 0; i < 64; i++ ) + { + j = *idx++; + k = *idx++; + + *x2++ = L_add(x[shr(j, 1)], x[shr(k, 1)]); + *x2++ = L_sub(x[shr(j, 1)], x[shr(k, 1)]); + } + } + ELSE IF ( EQ_16(n, 256) ) + { + x2 = temp; + FOR ( i = 0; i < 128; i++ ) + { + j = *idx++; + k = *idx++; + + *x2++ = L_add(x[j], x[k]); + *x2++ = L_sub(x[j], x[k]); + } + } + ELSE IF ( EQ_16(n, 512) ) + { + x2even = temp; + x2odd = temp + 256; + + FOR ( i = 0; i < 128; i++ ) + { + j = shl(*idx, 1); idx++; + k = shl(*idx, 1); idx++; + + *x2even++ = L_add(x[j], x[k]); + *x2even++ = L_sub(x[j], x[k]); + j++; k++; + *x2odd++ = L_add(x[j], x[k]); + *x2odd++ = L_sub(x[j], x[k]); + } + } + + /*-----------------------------------------------------------------* + * 1st Stage Loop has been Unrolled because n4 is '1' and that + * allows the elimination of the 'for_ (j = 1; j < n4; j++)' loop + * and the associated pointers initialization. + * Also, it allows to Put the Data from 'temp' back into 'x' due + * to the previous Combined Digit Reverse and Length two butterflies + *-----------------------------------------------------------------*/ + + /*for_ (k = 2; k < 3; k++)*/ + { + x0 = temp; + x1 = x0 + 2; + x2 = x; + + FOR ( i = 0; i < n; i += 4 ) + { + *x2++ = L_add(*x0++, *x1); /* x[i] = xt + x[i+n2]; */ + *x2++ = *x0; + x0--; + *x2++ = L_sub(*x0, *x1); /* x[i+n2] = xt - x[i+n2]; */ + x1++; + *x2++ = L_negate(*x1); /* x[i+n2+n4] = -x[i+n2+n4]; */ + + x0 += 4; + x1 += 3; /* x1 has already advanced */ + } + } + } + ELSE + { + /*-----------------------------------------------------------------* + * Digit reverse counter + *-----------------------------------------------------------------*/ + + j = 0; + x0 = &x[0]; + FOR ( i = 0; i < n - 1; i++ ) + { + IF ( LT_16(i, j) ) + { + xt = x[j]; + x[j] = *x0; + *x0 = xt; + } + x0++; + k = n / 2; + WHILE ( LE_16(k, j) ) + { + j -= k; + k = shr(k, 1); + } + j += k; + } + + /*-----------------------------------------------------------------* + * Length two butterflies + *-----------------------------------------------------------------*/ + + x0 = &x[0]; + x1 = &x[1]; + FOR ( i = 0; i < n / 2; i++ ) + { + *x1 = L_sub(*x0, *x1); + *x0 = L_sub(L_shl(*x0, 1), *x1); + + x0++; + x0++; + x1++; + x1++; + } + + /*-----------------------------------------------------------------* + * 1st Stage Loop has been Unrolled because n4 is '1' and that + * allows the elimination of the 'for_ (j = 1; j < n4; j++)' loop + * and the associated pointers initialization. + *-----------------------------------------------------------------*/ + + /* for_ (k = 2; k < 3; k++) */ + { + x0 = x; + x1 = x0 + 2; + + FOR ( i = 0; i < n; i += 4 ) + { + *x1 = L_sub(*x0, *x1); /* x[i+n2] = xt - x[i+n2]; */ + *x0 = L_sub(L_shl(*x0, 1), *x1++); /* x[i] = xt + x[i+n2]; */ + *x1 = L_negate(*x1); /* x[i+n2+n4] = -x[i+n2+n4]; */ + + x0 += 4; + x1 += 3; /* x1 has already advanced */ + } + } + } + + /*-----------------------------------------------------------------* + * Other butterflies + * + * The implementation described in [1] has been changed by using + * table lookup for evaluating sine and cosine functions. The + * variable ind and its increment step are needed to access table + * entries. Note that this implementation assumes n4 to be so + * small that ind will never exceed the table. Thus the input + * argument n and the constant N_MAX_FFT must be set properly. + *-----------------------------------------------------------------*/ + + n4 = 1; + n2 = 2; + n1 = 4; + + step = N_MAX_DIV4; + + FOR ( k = 3; k <= m; k++ ) + { + step = shr(step, 1); + n4 = shl(n4, 1); + n2 = shl(n2, 1); + n1 = shl(n1, 1); + + x0 = x; + x1 = x0 + n2; + x2 = x1 + n4; + + FOR ( i = 0; i < n; i += n1 ) + { + *x1 = L_sub(*x0, *x1); /* x[i+n2] = xt - x[i+n2]; */ + *x0 = L_sub(L_shl(*x0, 1), *x1); /* x[i] = xt + x[i+n2]; */ + *x2 = L_negate(*x2); /* x[i+n2+n4] = -x[i+n2+n4]; */ + + s = sincos_t_ext_fx; + c = s + N_MAX_FFT / 4; /* 1024/4 = 256, 256/4=64 */ + xi1 = x0; + xi3 = xi1 + n2; + xi2 = xi3; + x0 += n1; + xi4 = x0; + + FOR ( j = 1; j < n4; j++ ) + { + xi3++; + xi1++; + xi4--; + xi2--; + c += step; + s += step; /* autoincrement by ar0 */ + + t1 = L_add(Mpy_32_16_1(*xi3, *c), Mpy_32_16_1(*xi4, *s)); /* t1 = *xi3**(pt_c+ind) + *xi4**(pt_s+ind); */ + t2 = L_sub(Mpy_32_16_1(*xi3, *s), Mpy_32_16_1(*xi4, *c)); /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind); */ + + *xi4 = L_sub(*xi2, t2); + *xi2 = L_sub(*xi1, t1); + *xi1 = L_sub(L_shl(*xi1, 1), *xi2); + *xi3 = L_negate(L_add(L_shl(t2, 1), *xi4)); + } + + x1 += n1; + x2 += n1; + } + } + + return; +} +#endif \ No newline at end of file diff --git a/lib_com/ifft_rel.c b/lib_com/ifft_rel.c index 1a08533e7..6da82af50 100644 --- a/lib_com/ifft_rel.c +++ b/lib_com/ifft_rel.c @@ -650,3 +650,609 @@ void ifft_rel_fx( return; } + +#if 0 +void ifft_rel_fx32( + Word32 io[], /* i/o: input/output vector */ + const Word16 n, /* i : vector length */ + const Word16 m /* i : log2 of vector length */ +) +{ + Word16 i, j, k; + Word16 step; + Word16 n2, n4, n8, i0; + Word16 is, id; + Word32 *x, *xi0, *xi1, *xi2, *xi3, *xi4, *xup1, *xdn6, *xup3, *xdn8; + Word32 xt; + Word32 r1; + Word32 t1, t2, t3, t4, t5; + const Word16 *s, *c, *s3, *c3; + + Word16 cc1, cc3, ss1, ss3; + Word16 tmp; +#ifdef BASOP_NOGLOB_DECLARE_LOCAL + Flag Overflow = 0; +#endif + + + /*-----------------------------------------------------------------* + * ifft + *-----------------------------------------------------------------*/ + + x = &io[-1]; + move32(); + n2 = shl(n, 1); + FOR(k = 1; k < m; k++) + { + is = 0; + move16(); + id = n2; + move16(); + n2 = shr(n2, 1); + move16(); + n4 = shr(n2, 2); + move16(); + n8 = shr(n4, 1); + move16(); + tmp = sub(n, 1); + WHILE(LT_16(is, tmp)) + { + xi1 = x + is + 1; + move32(); + xi2 = xi1 + n4; + move32(); + xi3 = xi2 + n4; + move32(); + xi4 = xi3 + n4; + move32(); + + FOR(i = is; i < n; i += id) + { +#ifdef BASOP_NOGLOB + t1 = L_sub_o(*xi1, *xi3, &Overflow); + *xi1 = L_add_o(*xi1, *xi3, &Overflow); +#else /* BASOP_NOGLOB */ + t1 = L_sub(*xi1, *xi3); + *xi1 = L_add(*xi1, *xi3); +#endif /* BASOP_NOGLOB */ + move32(); +#ifdef BASOP_NOGLOB + *xi2 = L_shl_o(*xi2, 1, &Overflow); +#else /* BASOP_NOGLOB */ + *xi2 = L_shl(*xi2, 1); +#endif /* BASOP_NOGLOB */ + move32(); +#ifdef BASOP_NOGLOB + *xi3 = L_sub_o(t1, L_shl_o(*xi4, 1, &Overflow), &Overflow); +#else /* BASOP_NOGLOB */ + *xi3 = L_sub(t1, L_shl(*xi4, 1)); +#endif /* BASOP_NOGLOB */ + move32(); +#ifdef BASOP_NOGLOB + *xi4 = L_add_o(t1, L_shl_o(*xi4, 1, &Overflow), &Overflow); +#else /* BASOP_NOGLOB */ + *xi4 = L_add(t1, L_shl(*xi4, 1)); +#endif /* BASOP_NOGLOB */ + move32(); + + IF(NE_16(n4, 1)) + { +#ifdef BASOP_NOGLOB + t1 = Mpy_32_16_1(L_sub_o(*(xi2 + n8), *(xi1 + n8), &Overflow), INV_SQR2_FX); + t2 = Mpy_32_16_1(L_add_o(*(xi4 + n8), *(xi3 + n8), &Overflow), INV_SQR2_FX); +#else /* BASOP_NOGLOB */ + t1 = Mpy_32_16_1(L_sub(*(xi2 + n8), *(xi1 + n8)), INV_SQR2_FX); + t2 = Mpy_32_16_1(L_add(*(xi4 + n8), *(xi3 + n8)), INV_SQR2_FX); +#endif /* BASOP_NOGLOB */ + +#ifdef BASOP_NOGLOB + *(xi1 + n8) = L_add_o(*(xi1 + n8), *(xi2 + n8), &Overflow); +#else /* BASOP_NOGLOB */ + *(xi1 + n8) = add(*(xi1 + n8), *(xi2 + n8)); +#endif /* BASOP_NOGLOB */ + move32(); +#ifdef BASOP_NOGLOB + *(xi2 + n8) = L_sub_o(*(xi4 + n8), *(xi3 + n8), &Overflow); +#else /* BASOP_NOGLOB */ + *(xi2 + n8) = L_sub(*(xi4 + n8), *(xi3 + n8)); +#endif /* BASOP_NOGLOB */ + move32(); +#ifdef BASOP_NOGLOB + *(xi3 + n8) = L_negate(L_shl_o(L_add_o(t2, t1, &Overflow), 1, &Overflow)); +#else /* BASOP_NOGLOB */ + *(xi3 + n8) = L_negate(shl(add(t2, t1), 1)); +#endif /* BASOP_NOGLOB */ + move32(); +#ifdef BASOP_NOGLOB + *(xi4 + n8) = L_shl_o(L_sub_o(t1, t2, &Overflow), 1, &Overflow); +#else /* BASOP_NOGLOB */ + *(xi4 + n8) = L_shl(L_sub(t1, t2), 1); +#endif /* BASOP_NOGLOB */ + move32(); + } + xi1 += id; + move32(); + xi2 += id; + move32(); + xi3 += id; + move32(); + xi4 += id; + move32(); + } + is = sub(shl(id, 1), n2); + id = shl(id, 2); + } + /*Can be acheived with a shr */ + step = N_MAX_SAS / n2; + move16(); + + s = sincos_t_fx + step; + move16(); + c = s + 64; + move16(); + s3 = sincos_t_fx + i_mult2(step, 3); + move16(); + c3 = s3 + 64; + move16(); + FOR(j = 2; j <= n8; j++) + { + cc1 = *c; + move16(); + ss1 = *s; + move16(); + cc3 = *c3; + move16(); + ss3 = *s3; + move16(); + + is = 0; + move16(); + id = shl(n2, 1); + + c += step; + move16(); + s += step; + move16(); + + c3 += 3 * step; + move16(); + s3 += 3 * step; + move16(); + WHILE(LT_16(is, sub(n, 1))) + { + xup1 = x + j + is; + move32(); + xup3 = xup1 + shl(n4, 1); + move32(); + xdn6 = xup3 - shl(j, 1) + 2; + move32(); + + xdn8 = xdn6 + shl(n4, 1); + move32(); + + FOR(i = is; i < n; i += id) + { +#ifdef BASOP_NOGLOB + t1 = L_sub_o(*xup1, *xdn6, &Overflow); + *xup1 = L_add_o(*xup1, *xdn6, &Overflow); +#else /* BASOP_NOGLOB */ + t1 = L_sub(*xup1, *xdn6); + *xup1 = L_add(*xup1, *xdn6); +#endif /* BASOP_NOGLOB */ + move32(); + xup1 += n4; + move32(); + xdn6 -= n4; + move32(); + +#ifdef BASOP_NOGLOB + t2 = L_sub_o(*xdn6, *xup1, &Overflow); + *xdn6 = L_add_o(*xup1, *xdn6, &Overflow); +#else /* BASOP_NOGLOB */ + t2 = L_sub(*xdn6, *xup1); + *xdn6 = L_add(*xup1, *xdn6); +#endif /* BASOP_NOGLOB */ + move32(); + + xdn6 += n4; + move32(); +#ifdef BASOP_NOGLOB + t3 = L_add_o(*xdn8, *xup3, &Overflow); + *xdn6 = L_sub_o(*xdn8, *xup3, &Overflow); +#else /* BASOP_NOGLOB */ + t3 = L_add(*xdn8, *xup3); + *xdn6 = L_sub(*xdn8, *xup3); +#endif /* BASOP_NOGLOB */ + move32(); + + xup3 += n4; + move32(); + xdn8 -= n4; + move32(); + +#ifdef BASOP_NOGLOB + t4 = L_add_o(*xup3, *xdn8, &Overflow); + *xup1 = L_sub_o(*xup3, *xdn8, &Overflow); +#else /* BASOP_NOGLOB */ + t4 = L_add(*xup3, *xdn8); + *xup1 = L_sub(*xup3, *xdn8); +#endif /* BASOP_NOGLOB */ + move32(); + +#ifdef BASOP_NOGLOB + t5 = L_sub_o(t1, t4, &Overflow); + t1 = L_add_o(t1, t4, &Overflow); + t4 = L_sub_o(t2, t3, &Overflow); + t2 = L_add_o(t2, t3, &Overflow); + *xup3 = L_sub_o(Mpy_32_16_1(t1, cc3), Mpy_32_16_1(t2, ss3), &Overflow); +#else /* BASOP_NOGLOB */ + t5 = L_sub(t1, t4); + t1 = L_add(t1, t4); + t4 = L_sub(t2, t3); + t2 = L_add(t2, t3); + *xup3 = L_sub(Mpy_32_16_1(t1, cc3), Mpy_32_16_1(t2, ss3)); +#endif /* BASOP_NOGLOB */ + move32(); + xup3 -= n4; + move32(); +#ifdef BASOP_NOGLOB + *xup3 = L_add_o(Mpy_32_16_1(t5, cc1), Mpy_32_16_1(t4, ss1), &Overflow); +#else /* BASOP_NOGLOB */ + *xup3 = L_add(Mpy_32_16_1(t5, cc1), Mpy_32_16_1(t4, ss1)); +#endif /* BASOP_NOGLOB */ + move32(); +#ifdef BASOP_NOGLOB + *xdn8 = L_sub_o(Mpy_32_16_1(t5, ss1), Mpy_32_16_1(t4, cc1), &Overflow); +#else /* BASOP_NOGLOB */ + *xdn8 = L_sub(Mpy_32_16_1(t5, ss1), Mpy_32_16_1(t4, cc1)); +#endif /* BASOP_NOGLOB */ + move32(); + + xdn8 += n4; + move32(); +#ifdef BASOP_NOGLOB + *xdn8 = L_add_o(Mpy_32_16_1(t2, cc3), Mpy_32_16_1(t1, ss3), &Overflow); +#else /* BASOP_NOGLOB */ + *xdn8 = L_add(Mpy_32_16_1(t2, cc3), Mpy_32_16_1(t1, ss3)); +#endif /* BASOP_NOGLOB */ + move32(); + + xup1 -= n4; + move32(); + xup1 += id; + move32(); + xup3 += id; + move32(); + xdn6 += id; + move32(); + xdn8 += id; + move32(); + } + is = sub(shl(id, 1), n2); + id = shl(id, 2); + } + } + } + + /*-----------------------------------------------------------------* + * Length two butterflies + *-----------------------------------------------------------------*/ + + is = 1; + move16(); + id = 4; + move16(); + WHILE(is < n) + { + xi0 = x + is; + move32(); + xi1 = xi0 + 1; + move32(); + + FOR(i0 = is; i0 <= n; i0 += id) + { + r1 = *xi0; + move32(); +#ifdef BASOP_NOGLOB + *xi0 = L_add_o(r1, *xi1, &Overflow); +#else /* BASOP_NOGLOB */ + *xi0 = add(r1, *xi1); +#endif /* BASOP_NOGLOB */ + move32(); +#ifdef BASOP_NOGLOB + *xi1 = L_sub_o(r1, *xi1, &Overflow); +#else /* BASOP_NOGLOB */ + *xi1 = sub(r1, *xi1); +#endif + move32(); + xi0 += id; + move32(); + xi1 += id; + move32(); + } + is = sub(shl(id, 1), 1); + id = shl(id, 2); + } + + /*-----------------------------------------------------------------* + * Digit reverse counter + *-----------------------------------------------------------------*/ + + j = 1; + move16(); + FOR(i = 1; i < n; i++) + { + IF(LT_16(i, j)) + { + xt = x[j]; + move32(); + x[j] = x[i]; + move32(); + x[i] = xt; + move32(); + } + k = shr(n, 1); + WHILE(LT_16(k, j)) + { + j = sub(j, k); + k = shr(k, 1); + } + j = add(j, k); + } + + /*-----------------------------------------------------------------* + * Normalization + *-----------------------------------------------------------------*/ + + tmp = div_s(1, n); /*Q15 */ + FOR(i = 1; i <= n; i++) + { + x[i] = Mpy_32_16_1(x[i], tmp); + move32(); + } + + return; +} +#else +#define INV_SQRT_2_16 (Word16)(0x5A82) +void ifft_rel_fx32( + Word32 io[], /* i/o: input/output vector */ + const Word16 n, /* i : vector length */ + const Word16 m /* i : log2 of vector length */ +) +{ + Word16 i, j, k; + Word16 step; + Word16 n2, n4, n8, i0; + Word16 is, id; + Word32 *x, *xi0, *xi1, *xi2, *xi3, *xi4, *xup1, *xdn6, *xup3, *xdn8; + Word32 xt; + Word32 r1; + Word32 t1, t2, t3, t4, t5; + Word16 cc1, cc3, ss1, ss3; + const Word16 *s, *s3, *c, *c3; + const Word16 *idx; + Word32 temp[512]; + Word16 n_inv = 128; + + SWITCH (n) + { + case 128: + n_inv = 256; + BREAK; + case 256: + n_inv = 128; + BREAK; + case 512: + n_inv = 64; + BREAK; + default: + assert(0); + BREAK; + } + + /*-----------------------------------------------------------------* + * IFFT + *-----------------------------------------------------------------*/ + + x = &io[-1]; + n2 = shl(n, 1); + FOR ( k = 1; k < m; k++ ) + { + is = 0; + id = n2; + n2 = shr(n2, 1); + n4 = shr(n2, 2); + n8 = shr(n4, 1); + WHILE ( LT_16(is, n - 1) ) + { + xi1 = x + is + 1; + xi2 = xi1 + n4; + xi3 = xi2 + n4; + xi4 = xi3 + n4; + + FOR ( i = is; i < n; i += id ) + { + t1 = L_sub(*xi1, *xi3); + *xi1 = L_add(*xi1, *xi3); + *xi2 = L_shl(*xi2, 1); + *xi3 = L_sub(t1, L_shl(*xi4, 1)); + *xi4 = L_add(t1, L_shl(*xi4, 1)); + IF ( n4 != 1 ) + { + t1 = Mpy_32_16_1(L_sub( *( xi2 + n8 ), *( xi1 + n8 ) ), INV_SQRT_2_16); + t2 = Mpy_32_16_1(L_add( *( xi4 + n8 ), *( xi3 + n8 ) ), INV_SQRT_2_16); + + *( xi1 + n8 ) = L_add(*( xi1 + n8 ), *( xi2 + n8 )); + *( xi2 + n8 ) = L_sub(*( xi4 + n8 ), *( xi3 + n8 )); + *( xi3 + n8 ) = L_shl(L_negate(L_add(t2, t1)), 1 ); + *( xi4 + n8 ) = L_shl(L_sub(t1, t2), 1 ); + } + xi1 += id; + xi2 += id; + xi3 += id; + xi4 += id; + } + is = 2 * id - n2; + id = 4 * id; + } + step = N_MAX_FFT / n2; + + s = sincos_t_ext_fx + step; + c = s + N_MAX_FFT / 4; + s3 = sincos_t_ext_fx + 3 * step; + c3 = s3 + N_MAX_FFT / 4; + FOR ( j = 2; j <= n8; j++ ) + { + cc1 = *c; + ss1 = *s; + cc3 = *c3; + ss3 = *s3; + + is = 0; + id = 2 * n2; + + c += step; + s += step; + + c3 += 3 * step; + s3 += 3 * step; + WHILE ( LT_16(is, n - 1) ) + { + xup1 = x + j + is; + xup3 = xup1 + 2 * n4; + xdn6 = xup3 - 2 * j + 2; + xdn8 = xdn6 + 2 * n4; + + FOR ( i = is; i < n; i += id ) + { + t1 = L_sub(*xup1, *xdn6); + *xup1 = L_add(*xup1, *xdn6); + xup1 += n4; + xdn6 -= n4; + + t2 = L_sub(*xdn6, *xup1); + *xdn6 = L_add(*xup1, *xdn6); + + xdn6 += n4; + t3 = L_add(*xdn8, *xup3); + *xdn6 = L_sub(*xdn8, *xup3); + + xup3 += n4; + xdn8 -= n4; + + t4 = L_add(*xup3, *xdn8); + *xup1 = L_sub(*xup3, *xdn8); + + t5 = L_sub(t1, t4); + t1 = L_add(t1, t4); + t4 = L_sub(t2, t3); + t2 = L_add(t2, t3); + *xup3 = L_sub(Mpy_32_16_1(t1, cc3), Mpy_32_16_1(t2, ss3)); + xup3 -= n4; + *xup3 = L_add(Mpy_32_16_1(t5, cc1), Mpy_32_16_1(t4, ss1)); + *xdn8 = L_sub(Mpy_32_16_1(t5, ss1), Mpy_32_16_1(t4, cc1)); + + xdn8 += n4; + *xdn8 = L_add(Mpy_32_16_1(t2, cc3), Mpy_32_16_1(t1, ss3)); + + xup1 -= n4; + xup1 += id; + xup3 += id; + xdn6 += id; + xdn8 += id; + } + is = shl(id, 1) - n2; + id = shl(id, 2); + } + } + } + + /*-----------------------------------------------------------------* + * Length two butterflies + *-----------------------------------------------------------------*/ + + is = 1; + id = 4; + WHILE ( LT_16(is, n) ) + { + xi0 = x + is; + xi1 = xi0 + 1; + + FOR ( i0 = is; i0 <= n; i0 += id ) + { + r1 = *xi0; + *xi0 = L_add(r1, *xi1); + *xi1 = L_sub(r1, *xi1); + xi0 += id; + xi1 += id; + } + is = shl(id, 1) - 1; + id = shl(id, 2); + } + + /*-----------------------------------------------------------------* + * Digit reverse counter + *-----------------------------------------------------------------*/ + + idx = fft256_read_indexes; + xi0 = &temp[0] - 1; + IF ( EQ_16(n, 128) ) + { + FOR ( i = 0; i < n; i++ ) + { + j = *idx++; + temp[i] = x[1 + shr( j, 1 )]; + } + } + ELSE IF ( EQ_16(n, 256) ) + { + FOR ( i = 0; i < n; i++ ) + { + j = *idx++; + temp[i] = x[1 + j]; + } + } + ELSE IF ( EQ_16(n, 512) ) + { + FOR ( i = 0; i < 256; i++ ) + { + j = *idx++; + temp[i] = x[1 + 2 * j]; + temp[i + 256] = x[2 + 2 * j]; + } + } + ELSE + { + xi0 = x; + j = 1; + FOR ( i = 1; i < n; i++ ) + { + IF ( LT_16(i, j) ) + { + xt = x[j]; + x[j] = x[i]; + x[i] = xt; + } + k = shr(n, 1); + WHILE ( LT_16(k, j) ) + { + j = sub(j, k); + k = shr(k, 1); + } + j = add(j, k); + } + } + + /*-----------------------------------------------------------------* + * Normalization + *-----------------------------------------------------------------*/ + + FOR ( i = 1; i <= n; i++ ) + { + x[i] = Mpy_32_16_1(xi0[i], n_inv); + } + + return; +} +#endif \ No newline at end of file diff --git a/lib_com/ivas_mdct_imdct_fx.c b/lib_com/ivas_mdct_imdct_fx.c new file mode 100644 index 000000000..82e22a6b8 --- /dev/null +++ b/lib_com/ivas_mdct_imdct_fx.c @@ -0,0 +1,399 @@ +/****************************************************************************************************** + + (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, + Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., + Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, + Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other + contributors to this repository. All Rights Reserved. + + This software is protected by copyright law and by international treaties. + The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, + Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., + Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, + Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other + contributors to this repository retain full ownership rights in their respective contributions in + the software. This notice grants no license of any kind, including but not limited to patent + license, nor is any license granted by implication, estoppel or otherwise. + + Contributors are required to enter into the IVAS codec Public Collaboration agreement before making + contributions. + + This software is provided "AS IS", without any express or implied warranties. The software is in the + development stage. It is intended exclusively for experts who have experience with such software and + solely for the purpose of inspection. All implied warranties of non-infringement, merchantability + and fitness for a particular purpose are hereby disclaimed and excluded. + + Any dispute, controversy or claim arising under or in relation to providing this software shall be + submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in + accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and + the United Nations Convention on Contracts on the International Sales of Goods. + +*******************************************************************************************************/ + +#include +#include "options.h" +#include "prot.h" +#include "prot_fx1.h" +#include "ivas_rom_com_fx.h" +#ifdef DEBUGGING +#include "debug.h" +#endif +#include "ivas_stat_com.h" +#include "wmc_auto.h" + +void ivas_get_twid_factors_fx1( + const Word16 length, + const Word16 **pTwid_re, + const Word16 **pTwid_im ); + +void DoFFT_fx( + Word32 *re2, + Word32 *im2, + const Word16 length ); + +Word32 ivas_get_mdct_scaling_gain_fx( + const Word16 dct_len_by_2 ); + + +/*------------------------------------------------------------------------------------------* + * Local constants + *------------------------------------------------------------------------------------------*/ + +#define IVAS_MDCT_SCALING_GAIN_48k_Q31 0x00001086 /* 1.9699011974118126e-06f */ +#define IVAS_MDCT_SCALING_GAIN_48k_Q46 0x08432A51 +#define IVAS_MDCT_SCALING_GAIN_32k_Q31 0x000018C9 /* 2.9548517961177197e-06f */ +#define IVAS_MDCT_SCALING_GAIN_32k_Q46 0x0C64BF7A +#define IVAS_MDCT_SCALING_GAIN_16k_Q31 0X00003193 /* 5.909703592235439e-06f */ +#define IVAS_MDCT_SCALING_GAIN_16k_Q46 0x18C97EF4 + + +#define IVAS_ONE_BY_IMDCT_SCALING_GAIN_Q16 0x08432A51 /* 1 / 2115.165304808f */ + + +/*-----------------------------------------------------------------------------------------* + * Function ivas_tda_fx() + * + * Time domain aliasing + *-----------------------------------------------------------------------------------------*/ + +void ivas_tda_fx( + const Word32 *pIn, /* i : time domain buffer of size 2*length */ + Word32 *pOut, /* o : time domain buffer of size length */ + const Word16 length /* i : length of time alised signal buffer */ +) +{ + Word16 i; + Word16 len_by_2 = shr(length, 1); + + FOR ( i = 0; i < len_by_2; i++ ) + { + pOut[i] = L_sub(pIn[len_by_2 + i], pIn[len_by_2 - i - 1]); + pOut[len_by_2 + i] = L_add(pIn[length * 2 - i - 1], pIn[length + i]); + } + + return; +} + + +/*-----------------------------------------------------------------------------------------* + * Function ivas_dct_windowing_fx() + * + * Windowing block, input is passed through Fielder window + *-----------------------------------------------------------------------------------------*/ + +void ivas_dct_windowing_fx( + const Word16 fade_len, + const Word16 full_len, + const Word16 dct_len, + const Word16 zero_pad_len, + const Word32 *pWindow_coeffs, + const Word16 frame_len, + Word32 *pOut_buf, + Word32 *pBuffer_prev, + Word32 *pTemp_lfe ) +{ + Word16 i; + Word16 rem_len = 0; + + Copy32( pBuffer_prev, pOut_buf + zero_pad_len, fade_len ); + + Copy32( pTemp_lfe, ( pOut_buf + fade_len + zero_pad_len ), dct_len ); + + set_l(pOut_buf, zero_pad_len, 0); + + Copy32( ( pOut_buf + full_len - fade_len ), pBuffer_prev, fade_len ); + + FOR ( i = 0; i < fade_len; i++ ) + { + pOut_buf[zero_pad_len + i] = Mult_32_32(pOut_buf[zero_pad_len + i], pWindow_coeffs[i]); + } + + rem_len = full_len - ( zero_pad_len * 3 + fade_len ); + + FOR ( i = 0; i < rem_len; i++ ) + { + pOut_buf[zero_pad_len * 3 + fade_len + i] = Mult_32_32(pOut_buf[zero_pad_len * 3 + fade_len + i], pWindow_coeffs[fade_len - i - 1]); + } + + set_l(&pOut_buf[full_len], frame_len - full_len, 0); + + return; +} + + +/*-----------------------------------------------------------------------------------------* + * Function ivas_mdct_fx() + * + * MDCT implementation + *-----------------------------------------------------------------------------------------*/ + +void ivas_mdct_fx( + const Word32 *pIn, + Word32 *pOut, + const Word16 length, + Word16 *q_out) +{ + const Word16 *pTwid_re, *pTwid_im; + Word16 i, len_by_2; + Word32 re[IVAS_480_PT_LEN], im[IVAS_480_PT_LEN]; + Word32 ivas_mdct_scaling_gain; + + len_by_2 = shr(length, 1); + ivas_mdct_scaling_gain = ivas_get_mdct_scaling_gain_fx( len_by_2 ); + + ivas_get_twid_factors_fx1( length, &pTwid_re, &pTwid_im ); + + FOR ( i = 0; i < len_by_2; i++ ) + { + pOut[i] = L_sub(pIn[len_by_2 + i], pIn[len_by_2 - i - 1]); + pOut[len_by_2 + i] = L_add(pIn[length * 2 - i - 1], pIn[length + i]); + } + + FOR ( i = 0; i < len_by_2; i++ ) + { + re[i] = L_negate(L_add(Mpy_32_16_1( pOut[2 * i], pTwid_re[i]), Mpy_32_16_1(pOut[length - 1 - 2 * i], pTwid_im[i]))); + im[i] = L_sub(Mpy_32_16_1(pOut[length - 1 - 2 * i], pTwid_re[i]), Mpy_32_16_1(pOut[2 * i], pTwid_im[i])); + } + + DoFFT_fx( &re[0], &im[0], len_by_2 ); + + FOR ( i = 0; i < len_by_2; i++ ) + { + re[i] = Mult_32_32(re[i], ivas_mdct_scaling_gain); + im[i] = Mult_32_32(im[i], ivas_mdct_scaling_gain); + } + *q_out = *q_out + Q15; + FOR ( i = 0; i < len_by_2; i++ ) + { + Word32 tmp; + tmp = L_sub(Mpy_32_16_1(re[i], pTwid_re[i]), Mpy_32_16_1(im[i], pTwid_im[i])); + im[i] = L_add(Mpy_32_16_1(im[i], pTwid_re[i]), Mpy_32_16_1(re[i], pTwid_im[i])); + re[i] = tmp; + } + + FOR ( i = 0; i < len_by_2; i++ ) + { + pOut[length - 2 * i - 1] = re[i]; + pOut[2 * i] = im[i]; + } + + return; +} + + +/*-----------------------------------------------------------------------------------------* + * Function ivas_ifft_cplx() + * + * Complex IFFT implementation using DoFFT + *-----------------------------------------------------------------------------------------*/ + +static void ivas_ifft_cplx( + Word32 *re, + Word32 *im, + const Word16 length ) +{ + Word16 i; + Word32 ivas_imdct_one_by_powergain = IVAS_ONE_BY_IMDCT_SCALING_GAIN_Q16; + /*re-arrange inputs to use fft as ifft */ + re[0] = Mult_32_32(re[0], ivas_imdct_one_by_powergain); + im[0] = Mult_32_32(im[0], ivas_imdct_one_by_powergain); + + FOR ( i = 1; i <= shr(length, 1); i++ ) + { + Word32 tmp = Mult_32_32(re[length - i], ivas_imdct_one_by_powergain); + re[length - i] = Mult_32_32(re[i], ivas_imdct_one_by_powergain); + re[i] = tmp; + + tmp = Mult_32_32(im[length - i], ivas_imdct_one_by_powergain); + im[length - i] = Mult_32_32(im[i], ivas_imdct_one_by_powergain); + im[i] = tmp; + } + + DoFFT_fx( re, im, (Word16) length ); + + return; +} + + +/*-----------------------------------------------------------------------------------------* + * Function ivas_itda() + * + * Inverse time domain alias implementation + *-----------------------------------------------------------------------------------------*/ + +void ivas_itda_fx( + const Word32 *re, /* i : time alised signal after IDCT */ + Word32 *pOut, /* o : time domain buffer of size 2*length */ + const Word16 length /* i : length of time alised signal buffer */ +) +{ + Word16 i; + Word16 len_by_2 = shr(length, 1); + + FOR ( i = 0; i < len_by_2; i++ ) + { + pOut[i] = -re[len_by_2 - i - 1]; + pOut[len_by_2 + i] = re[i]; + pOut[length + i] = re[len_by_2 + i]; + pOut[3 * len_by_2 + i] = re[length - i - 1]; + } + + return; +} + + +/*-----------------------------------------------------------------------------------------* + * Function ivas_imdct_fx() + * + * IMDCT implementation + *-----------------------------------------------------------------------------------------*/ + +void ivas_imdct_fx( + const Word32 *pIn, + Word32 *pOut, + const Word16 length, + Word16 *q_out) +{ + const Word16 *pTwid_re, *pTwid_im; + Word16 len_by_2 = shr(length, 1); + Word16 i; + Word32 re[IVAS_480_PT_LEN]; + Word32 im[IVAS_480_PT_LEN]; + + ivas_get_twid_factors_fx1( length, &pTwid_re, &pTwid_im ); + + FOR ( i = 0; i < len_by_2; i++ ) + { + re[i] = L_add(Mpy_32_16_1(pIn[length - 2 * i - 1], pTwid_re[i]), Mpy_32_16_1(pIn[2 * i], pTwid_im[i])); /*stl_arr_index*/ + im[i] = L_sub( Mpy_32_16_1( pIn[2 * i], pTwid_re[i] ), Mpy_32_16_1( pIn[length - 2 * i - 1], pTwid_im[i] ) ); /*stl_arr_index*/ + } + + ivas_ifft_cplx( &re[0], &im[0], len_by_2 ); + IF (GT_16(len_by_2, 0)) + { + *q_out = *q_out - Q15; + } + + FOR ( i = 0; i < len_by_2; i++ ) + { + Word32 tmp; + tmp = L_add(Mpy_32_16_1(re[i], pTwid_re[i]), Mpy_32_16_1(im[i], pTwid_im[i])); + im[i] = L_sub(Mpy_32_16_1(im[i], pTwid_re[i]), Mpy_32_16_1(re[i], pTwid_im[i])); + re[i] = tmp; + } + + FOR ( i = ( len_by_2 - 1 ); i >= 0; i-- ) + { + re[2 * i + 1] = im[( len_by_2 - 1 ) - i]; + re[2 * i] = -re[i]; + } + + FOR ( i = 0; i < len_by_2; i++ ) + { + pOut[i] = -re[len_by_2 - i - 1]; + pOut[len_by_2 + i] = re[i]; + pOut[length + i] = re[len_by_2 + i]; + pOut[3 * len_by_2 + i] = re[length - i - 1]; + } + + return; +} + + +/*-----------------------------------------------------------------------------------------* + * Function ivas_get_twid_factors_fx1() + * + * Sets/Maps the fft twiddle tables based on fft length + *-----------------------------------------------------------------------------------------*/ + +void ivas_get_twid_factors_fx1( + const Word16 length, + const Word16 **pTwid_re, + const Word16 **pTwid_im ) +{ + IF ( EQ_16(length, 480) ) + { + *pTwid_re = (Word16 *)&ivas_cos_twiddle_480_fx[0]; + *pTwid_im = (Word16 *)&ivas_sin_twiddle_480_fx[0]; + } + ELSE IF ( EQ_16(length, 320) ) + { + *pTwid_re = (Word16 *)&ivas_cos_twiddle_320_fx[0]; + *pTwid_im = (Word16 *)&ivas_sin_twiddle_320_fx[0]; + } + ELSE IF ( EQ_16(length, 160) ) + { + *pTwid_re = (Word16 *)&ivas_cos_twiddle_160_fx[0]; + *pTwid_im = (Word16 *)&ivas_sin_twiddle_160_fx[0]; + } + ELSE IF ( EQ_16(length, 80 ) ) + { + *pTwid_re = (Word16 *)&ivas_cos_twiddle_80_fx[0]; + *pTwid_im = (Word16 *)&ivas_sin_twiddle_80_fx[0]; + } + ELSE + { + assert( !"Not supported FFT length!" ); + } + + return; +} + + +/*-----------------------------------------------------------------------------------------* + * Function ivas_get_mdct_scaling_gain_fx() + * + * Get scaling gain for MDCT functions + *-----------------------------------------------------------------------------------------*/ + +Word32 ivas_get_mdct_scaling_gain_fx( + const Word16 dct_len_by_2 ) +{ + Word32 gain = 0; + + SWITCH ( dct_len_by_2 ) + { + case L_FRAME48k >> 2: + { + gain = IVAS_MDCT_SCALING_GAIN_48k_Q46; + BREAK; + } + case L_FRAME32k >> 2: + { + gain = IVAS_MDCT_SCALING_GAIN_32k_Q46; + BREAK; + } + case L_FRAME16k >> 2: + { + gain = IVAS_MDCT_SCALING_GAIN_16k_Q46; + BREAK; + } + default: + { + assert( !"Unsupported frame length!" ); + BREAK; + } + } + + return gain; +} diff --git a/lib_com/ivas_rom_com_fx.c b/lib_com/ivas_rom_com_fx.c new file mode 100644 index 000000000..d5da101f3 --- /dev/null +++ b/lib_com/ivas_rom_com_fx.c @@ -0,0 +1,199 @@ +/****************************************************************************************************** + + (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, + Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., + Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, + Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other + contributors to this repository. All Rights Reserved. + + This software is protected by copyright law and by international treaties. + The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, + Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., + Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, + Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other + contributors to this repository retain full ownership rights in their respective contributions in + the software. This notice grants no license of any kind, including but not limited to patent + license, nor is any license granted by implication, estoppel or otherwise. + + Contributors are required to enter into the IVAS codec Public Collaboration agreement before making + contributions. + + This software is provided "AS IS", without any express or implied warranties. The software is in the + development stage. It is intended exclusively for experts who have experience with such software and + solely for the purpose of inspection. All implied warranties of non-infringement, merchantability + and fitness for a particular purpose are hereby disclaimed and excluded. + + Any dispute, controversy or claim arising under or in relation to providing this software shall be + submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in + accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and + the United Nations Convention on Contracts on the International Sales of Goods. + +*******************************************************************************************************/ + +#include +#include "options.h" +#include +#include "cnst.h" +#include "ivas_cnst.h" +#include "ivas_rom_com_fx.h" +#include "wmc_auto.h" + +#define SHC( x ) ( (Word16) x ) + +/* clang-format off */ +const Word16 ivas_sin_twiddle_480_fx[ IVAS_480_PT_LEN >> 1 ] = { + SHC( 0xffe6 ), SHC( 0xff0f ), SHC( 0xfe39 ), SHC( 0xfd62 ), SHC( 0xfc8c ), SHC( 0xfbb6 ), SHC( 0xfadf ), SHC( 0xfa09 ), + SHC( 0xf933 ), SHC( 0xf85d ), SHC( 0xf787 ), SHC( 0xf6b1 ), SHC( 0xf5db ), SHC( 0xf505 ), SHC( 0xf42f ), SHC( 0xf35a ), + SHC( 0xf285 ), SHC( 0xf1af ), SHC( 0xf0da ), SHC( 0xf006 ), SHC( 0xef31 ), SHC( 0xee5c ), SHC( 0xed88 ), SHC( 0xecb4 ), + SHC( 0xebe0 ), SHC( 0xeb0c ), SHC( 0xea39 ), SHC( 0xe966 ), SHC( 0xe893 ), SHC( 0xe7c0 ), SHC( 0xe6ed ), SHC( 0xe61b ), + SHC( 0xe549 ), SHC( 0xe478 ), SHC( 0xe3a7 ), SHC( 0xe2d6 ), SHC( 0xe205 ), SHC( 0xe135 ), SHC( 0xe065 ), SHC( 0xdf95 ), + SHC( 0xdec6 ), SHC( 0xddf7 ), SHC( 0xdd28 ), SHC( 0xdc5a ), SHC( 0xdb8c ), SHC( 0xdabf ), SHC( 0xd9f2 ), SHC( 0xd925 ), + SHC( 0xd859 ), SHC( 0xd78d ), SHC( 0xd6c2 ), SHC( 0xd5f7 ), SHC( 0xd52d ), SHC( 0xd463 ), SHC( 0xd39a ), SHC( 0xd2d1 ), + SHC( 0xd208 ), SHC( 0xd141 ), SHC( 0xd079 ), SHC( 0xcfb2 ), SHC( 0xceec ), SHC( 0xce26 ), SHC( 0xcd61 ), SHC( 0xcc9c ), + SHC( 0xcbd8 ), SHC( 0xcb15 ), SHC( 0xca52 ), SHC( 0xc98f ), SHC( 0xc8cd ), SHC( 0xc80c ), SHC( 0xc74c ), SHC( 0xc68c ), + SHC( 0xc5cc ), SHC( 0xc50e ), SHC( 0xc450 ), SHC( 0xc392 ), SHC( 0xc2d5 ), SHC( 0xc219 ), SHC( 0xc15e ), SHC( 0xc0a3 ), + SHC( 0xbfe9 ), SHC( 0xbf30 ), SHC( 0xbe77 ), SHC( 0xbdc0 ), SHC( 0xbd08 ), SHC( 0xbc52 ), SHC( 0xbb9c ), SHC( 0xbae7 ), + SHC( 0xba33 ), SHC( 0xb980 ), SHC( 0xb8cd ), SHC( 0xb81b ), SHC( 0xb76a ), SHC( 0xb6ba ), SHC( 0xb60b ), SHC( 0xb55c ), + SHC( 0xb4ae ), SHC( 0xb401 ), SHC( 0xb355 ), SHC( 0xb2aa ), SHC( 0xb1ff ), SHC( 0xb156 ), SHC( 0xb0ad ), SHC( 0xb005 ), + SHC( 0xaf5e ), SHC( 0xaeb8 ), SHC( 0xae13 ), SHC( 0xad6e ), SHC( 0xaccb ), SHC( 0xac28 ), SHC( 0xab87 ), SHC( 0xaae6 ), + SHC( 0xaa47 ), SHC( 0xa9a8 ), SHC( 0xa90a ), SHC( 0xa86d ), SHC( 0xa7d1 ), SHC( 0xa736 ), SHC( 0xa69c ), SHC( 0xa603 ), + SHC( 0xa56b ), SHC( 0xa4d4 ), SHC( 0xa43e ), SHC( 0xa3a9 ), SHC( 0xa315 ), SHC( 0xa282 ), SHC( 0xa1f0 ), SHC( 0xa15f ), + SHC( 0xa0cf ), SHC( 0xa040 ), SHC( 0x9fb3 ), SHC( 0x9f26 ), SHC( 0x9e9a ), SHC( 0x9e0f ), SHC( 0x9d86 ), SHC( 0x9cfd ), + SHC( 0x9c76 ), SHC( 0x9bf0 ), SHC( 0x9b6b ), SHC( 0x9ae7 ), SHC( 0x9a64 ), SHC( 0x99e2 ), SHC( 0x9961 ), SHC( 0x98e1 ), + SHC( 0x9863 ), SHC( 0x97e6 ), SHC( 0x9769 ), SHC( 0x96ee ), SHC( 0x9674 ), SHC( 0x95fc ), SHC( 0x9584 ), SHC( 0x950e ), + SHC( 0x9498 ), SHC( 0x9424 ), SHC( 0x93b1 ), SHC( 0x9340 ), SHC( 0x92cf ), SHC( 0x9260 ), SHC( 0x91f2 ), SHC( 0x9185 ), + SHC( 0x9119 ), SHC( 0x90af ), SHC( 0x9045 ), SHC( 0x8fdd ), SHC( 0x8f77 ), SHC( 0x8f11 ), SHC( 0x8ead ), SHC( 0x8e4a ), + SHC( 0x8de8 ), SHC( 0x8d87 ), SHC( 0x8d28 ), SHC( 0x8cca ), SHC( 0x8c6d ), SHC( 0x8c12 ), SHC( 0x8bb7 ), SHC( 0x8b5e ), + SHC( 0x8b07 ), SHC( 0x8ab0 ), SHC( 0x8a5b ), SHC( 0x8a07 ), SHC( 0x89b5 ), SHC( 0x8963 ), SHC( 0x8913 ), SHC( 0x88c5 ), + SHC( 0x8877 ), SHC( 0x882b ), SHC( 0x87e1 ), SHC( 0x8797 ), SHC( 0x874f ), SHC( 0x8708 ), SHC( 0x86c3 ), SHC( 0x867f ), + SHC( 0x863c ), SHC( 0x85fb ), SHC( 0x85ba ), SHC( 0x857c ), SHC( 0x853e ), SHC( 0x8502 ), SHC( 0x84c7 ), SHC( 0x848e ), + SHC( 0x8456 ), SHC( 0x841f ), SHC( 0x83ea ), SHC( 0x83b6 ), SHC( 0x8384 ), SHC( 0x8352 ), SHC( 0x8323 ), SHC( 0x82f4 ), + SHC( 0x82c7 ), SHC( 0x829b ), SHC( 0x8271 ), SHC( 0x8248 ), SHC( 0x8220 ), SHC( 0x81fa ), SHC( 0x81d5 ), SHC( 0x81b2 ), + SHC( 0x8190 ), SHC( 0x816f ), SHC( 0x8150 ), SHC( 0x8132 ), SHC( 0x8115 ), SHC( 0x80fa ), SHC( 0x80e0 ), SHC( 0x80c8 ), + SHC( 0x80b1 ), SHC( 0x809c ), SHC( 0x8088 ), SHC( 0x8075 ), SHC( 0x8063 ), SHC( 0x8053 ), SHC( 0x8045 ), SHC( 0x8038 ), + SHC( 0x802c ), SHC( 0x8022 ), SHC( 0x8019 ), SHC( 0x8011 ), SHC( 0x800b ), SHC( 0x8006 ), SHC( 0x8003 ), SHC( 0x8001 ), +}; + +const Word16 ivas_cos_twiddle_480_fx[ IVAS_480_PT_LEN >> 1 ] = { + SHC( 0x7fff ), SHC( 0x7fff ), SHC( 0x7ffc ), SHC( 0x7ff9 ), SHC( 0x7ff4 ), SHC( 0x7fed ), SHC( 0x7fe5 ), SHC( 0x7fdc ), + SHC( 0x7fd1 ), SHC( 0x7fc5 ), SHC( 0x7fb8 ), SHC( 0x7fa9 ), SHC( 0x7f98 ), SHC( 0x7f87 ), SHC( 0x7f74 ), SHC( 0x7f5f ), + SHC( 0x7f49 ), SHC( 0x7f32 ), SHC( 0x7f19 ), SHC( 0x7eff ), SHC( 0x7ee4 ), SHC( 0x7ec7 ), SHC( 0x7ea9 ), SHC( 0x7e89 ), + SHC( 0x7e68 ), SHC( 0x7e45 ), SHC( 0x7e22 ), SHC( 0x7dfc ), SHC( 0x7dd6 ), SHC( 0x7dae ), SHC( 0x7d85 ), SHC( 0x7d5a ), + SHC( 0x7d2e ), SHC( 0x7d00 ), SHC( 0x7cd2 ), SHC( 0x7ca1 ), SHC( 0x7c70 ), SHC( 0x7c3d ), SHC( 0x7c09 ), SHC( 0x7bd3 ), + SHC( 0x7b9c ), SHC( 0x7b64 ), SHC( 0x7b2a ), SHC( 0x7aef ), SHC( 0x7ab2 ), SHC( 0x7a75 ), SHC( 0x7a36 ), SHC( 0x79f5 ), + SHC( 0x79b3 ), SHC( 0x7970 ), SHC( 0x792c ), SHC( 0x78e6 ), SHC( 0x789f ), SHC( 0x7857 ), SHC( 0x780d ), SHC( 0x77c2 ), + SHC( 0x7775 ), SHC( 0x7728 ), SHC( 0x76d9 ), SHC( 0x7689 ), SHC( 0x7637 ), SHC( 0x75e4 ), SHC( 0x7590 ), SHC( 0x753a ), + SHC( 0x74e4 ), SHC( 0x748c ), SHC( 0x7432 ), SHC( 0x73d8 ), SHC( 0x737c ), SHC( 0x731f ), SHC( 0x72c0 ), SHC( 0x7261 ), + SHC( 0x7200 ), SHC( 0x719e ), SHC( 0x713a ), SHC( 0x70d6 ), SHC( 0x7070 ), SHC( 0x7009 ), SHC( 0x6fa0 ), SHC( 0x6f37 ), + SHC( 0x6ecc ), SHC( 0x6e60 ), SHC( 0x6df3 ), SHC( 0x6d84 ), SHC( 0x6d15 ), SHC( 0x6ca4 ), SHC( 0x6c32 ), SHC( 0x6bbf ), + SHC( 0x6b4a ), SHC( 0x6ad5 ), SHC( 0x6a5e ), SHC( 0x69e6 ), SHC( 0x696d ), SHC( 0x68f3 ), SHC( 0x6878 ), SHC( 0x67fb ), + SHC( 0x677e ), SHC( 0x66ff ), SHC( 0x667f ), SHC( 0x65fe ), SHC( 0x657c ), SHC( 0x64f9 ), SHC( 0x6474 ), SHC( 0x63ef ), + SHC( 0x6368 ), SHC( 0x62e0 ), SHC( 0x6258 ), SHC( 0x61ce ), SHC( 0x6143 ), SHC( 0x60b7 ), SHC( 0x602a ), SHC( 0x5f9c ), + SHC( 0x5f0d ), SHC( 0x5e7d ), SHC( 0x5dec ), SHC( 0x5d59 ), SHC( 0x5cc6 ), SHC( 0x5c32 ), SHC( 0x5b9d ), SHC( 0x5b06 ), + SHC( 0x5a6f ), SHC( 0x59d7 ), SHC( 0x593d ), SHC( 0x58a3 ), SHC( 0x5808 ), SHC( 0x576c ), SHC( 0x56cf ), SHC( 0x5631 ), + SHC( 0x5592 ), SHC( 0x54f2 ), SHC( 0x5451 ), SHC( 0x53af ), SHC( 0x530c ), SHC( 0x5269 ), SHC( 0x51c4 ), SHC( 0x511f ), + SHC( 0x5078 ), SHC( 0x4fd1 ), SHC( 0x4f29 ), SHC( 0x4e80 ), SHC( 0x4dd6 ), SHC( 0x4d2b ), SHC( 0x4c80 ), SHC( 0x4bd4 ), + SHC( 0x4b26 ), SHC( 0x4a78 ), SHC( 0x49c9 ), SHC( 0x491a ), SHC( 0x4869 ), SHC( 0x47b8 ), SHC( 0x4706 ), SHC( 0x4653 ), + SHC( 0x45a0 ), SHC( 0x44eb ), SHC( 0x4436 ), SHC( 0x4380 ), SHC( 0x42ca ), SHC( 0x4213 ), SHC( 0x415b ), SHC( 0x40a2 ), + SHC( 0x3fe8 ), SHC( 0x3f2e ), SHC( 0x3e73 ), SHC( 0x3db8 ), SHC( 0x3cfb ), SHC( 0x3c3f ), SHC( 0x3b81 ), SHC( 0x3ac3 ), + SHC( 0x3a04 ), SHC( 0x3944 ), SHC( 0x3884 ), SHC( 0x37c4 ), SHC( 0x3702 ), SHC( 0x3640 ), SHC( 0x357e ), SHC( 0x34bb ), + SHC( 0x33f7 ), SHC( 0x3333 ), SHC( 0x326e ), SHC( 0x31a8 ), SHC( 0x30e2 ), SHC( 0x301c ), SHC( 0x2f55 ), SHC( 0x2e8e ), + SHC( 0x2dc5 ), SHC( 0x2cfd ), SHC( 0x2c34 ), SHC( 0x2b6a ), SHC( 0x2aa0 ), SHC( 0x29d6 ), SHC( 0x290b ), SHC( 0x2840 ), + SHC( 0x2774 ), SHC( 0x26a8 ), SHC( 0x25db ), SHC( 0x250e ), SHC( 0x2440 ), SHC( 0x2373 ), SHC( 0x22a4 ), SHC( 0x21d6 ), + SHC( 0x2107 ), SHC( 0x2037 ), SHC( 0x1f67 ), SHC( 0x1e97 ), SHC( 0x1dc7 ), SHC( 0x1cf6 ), SHC( 0x1c25 ), SHC( 0x1b54 ), + SHC( 0x1a82 ), SHC( 0x19b0 ), SHC( 0x18de ), SHC( 0x180b ), SHC( 0x1739 ), SHC( 0x1666 ), SHC( 0x1592 ), SHC( 0x14bf ), + SHC( 0x13eb ), SHC( 0x1317 ), SHC( 0x1243 ), SHC( 0x116f ), SHC( 0x109a ), SHC( 0x0fc5 ), SHC( 0x0ef0 ), SHC( 0x0e1b ), + SHC( 0x0d46 ), SHC( 0x0c71 ), SHC( 0x0b9b ), SHC( 0x0ac5 ), SHC( 0x09f0 ), SHC( 0x091a ), SHC( 0x0844 ), SHC( 0x076e ), + SHC( 0x0698 ), SHC( 0x05c1 ), SHC( 0x04eb ), SHC( 0x0415 ), SHC( 0x033e ), SHC( 0x0268 ), SHC( 0x0192 ), SHC( 0x00bb ), +}; + +const Word16 ivas_sin_twiddle_320_fx[ IVAS_320_PT_LEN >> 1 ] = { + SHC( 0xffd8 ), SHC( 0xfe97 ), SHC( 0xfd55 ), SHC( 0xfc13 ), SHC( 0xfad2 ), SHC( 0xf990 ), SHC( 0xf84f ), SHC( 0xf70e ), + SHC( 0xf5cd ), SHC( 0xf48d ), SHC( 0xf34d ), SHC( 0xf20d ), SHC( 0xf0cd ), SHC( 0xef8e ), SHC( 0xee4f ), SHC( 0xed11 ), + SHC( 0xebd3 ), SHC( 0xea95 ), SHC( 0xe958 ), SHC( 0xe81c ), SHC( 0xe6e0 ), SHC( 0xe5a5 ), SHC( 0xe46b ), SHC( 0xe331 ), + SHC( 0xe1f8 ), SHC( 0xe0c0 ), SHC( 0xdf88 ), SHC( 0xde51 ), SHC( 0xdd1b ), SHC( 0xdbe6 ), SHC( 0xdab2 ), SHC( 0xd97f ), + SHC( 0xd84c ), SHC( 0xd71b ), SHC( 0xd5eb ), SHC( 0xd4bb ), SHC( 0xd38d ), SHC( 0xd260 ), SHC( 0xd134 ), SHC( 0xd009 ), + SHC( 0xcee0 ), SHC( 0xcdb7 ), SHC( 0xcc90 ), SHC( 0xcb6a ), SHC( 0xca45 ), SHC( 0xc922 ), SHC( 0xc800 ), SHC( 0xc6df ), + SHC( 0xc5c0 ), SHC( 0xc4a3 ), SHC( 0xc386 ), SHC( 0xc26c ), SHC( 0xc152 ), SHC( 0xc03b ), SHC( 0xbf24 ), SHC( 0xbe10 ), + SHC( 0xbcfd ), SHC( 0xbbec ), SHC( 0xbadc ), SHC( 0xb9ce ), SHC( 0xb8c2 ), SHC( 0xb7b8 ), SHC( 0xb6af ), SHC( 0xb5a8 ), + SHC( 0xb4a3 ), SHC( 0xb3a0 ), SHC( 0xb29f ), SHC( 0xb1a0 ), SHC( 0xb0a2 ), SHC( 0xafa7 ), SHC( 0xaeae ), SHC( 0xadb6 ), + SHC( 0xacc1 ), SHC( 0xabcd ), SHC( 0xaadc ), SHC( 0xa9ed ), SHC( 0xa900 ), SHC( 0xa815 ), SHC( 0xa72c ), SHC( 0xa646 ), + SHC( 0xa562 ), SHC( 0xa480 ), SHC( 0xa3a0 ), SHC( 0xa2c2 ), SHC( 0xa1e7 ), SHC( 0xa10e ), SHC( 0xa037 ), SHC( 0x9f63 ), + SHC( 0x9e91 ), SHC( 0x9dc2 ), SHC( 0x9cf5 ), SHC( 0x9c2a ), SHC( 0x9b62 ), SHC( 0x9a9d ), SHC( 0x99da ), SHC( 0x9919 ), + SHC( 0x985b ), SHC( 0x97a0 ), SHC( 0x96e7 ), SHC( 0x9630 ), SHC( 0x957d ), SHC( 0x94cc ), SHC( 0x941d ), SHC( 0x9371 ), + SHC( 0x92c8 ), SHC( 0x9222 ), SHC( 0x917e ), SHC( 0x90dd ), SHC( 0x903f ), SHC( 0x8fa3 ), SHC( 0x8f0b ), SHC( 0x8e75 ), + SHC( 0x8de2 ), SHC( 0x8d51 ), SHC( 0x8cc4 ), SHC( 0x8c39 ), SHC( 0x8bb2 ), SHC( 0x8b2d ), SHC( 0x8aab ), SHC( 0x8a2c ), + SHC( 0x89af ), SHC( 0x8936 ), SHC( 0x88c0 ), SHC( 0x884c ), SHC( 0x87dc ), SHC( 0x876e ), SHC( 0x8704 ), SHC( 0x869c ), + SHC( 0x8638 ), SHC( 0x85d6 ), SHC( 0x8578 ), SHC( 0x851c ), SHC( 0x84c4 ), SHC( 0x846e ), SHC( 0x841c ), SHC( 0x83cd ), + SHC( 0x8381 ), SHC( 0x8337 ), SHC( 0x82f1 ), SHC( 0x82ae ), SHC( 0x826e ), SHC( 0x8231 ), SHC( 0x81f8 ), SHC( 0x81c1 ), + SHC( 0x818e ), SHC( 0x815d ), SHC( 0x8130 ), SHC( 0x8106 ), SHC( 0x80df ), SHC( 0x80bb ), SHC( 0x809a ), SHC( 0x807d ), + SHC( 0x8062 ), SHC( 0x804b ), SHC( 0x8037 ), SHC( 0x8026 ), SHC( 0x8018 ), SHC( 0x800e ), SHC( 0x8006 ), SHC( 0x8002 ), +}; + +const Word16 ivas_cos_twiddle_320_fx[ IVAS_320_PT_LEN >> 1 ] = { + SHC( 0x7fff ), SHC( 0x7ffe ), SHC( 0x7ff8 ), SHC( 0x7ff0 ), SHC( 0x7fe5 ), SHC( 0x7fd6 ), SHC( 0x7fc4 ), SHC( 0x7faf ), + SHC( 0x7f97 ), SHC( 0x7f7c ), SHC( 0x7f5e ), SHC( 0x7f3c ), SHC( 0x7f18 ), SHC( 0x7ef0 ), SHC( 0x7ec5 ), SHC( 0x7e97 ), + SHC( 0x7e66 ), SHC( 0x7e31 ), SHC( 0x7dfa ), SHC( 0x7dc0 ), SHC( 0x7d82 ), SHC( 0x7d41 ), SHC( 0x7cfe ), SHC( 0x7cb7 ), + SHC( 0x7c6d ), SHC( 0x7c20 ), SHC( 0x7bd0 ), SHC( 0x7b7c ), SHC( 0x7b26 ), SHC( 0x7acd ), SHC( 0x7a71 ), SHC( 0x7a12 ), + SHC( 0x79af ), SHC( 0x794a ), SHC( 0x78e2 ), SHC( 0x7876 ), SHC( 0x7808 ), SHC( 0x7797 ), SHC( 0x7723 ), SHC( 0x76ac ), + SHC( 0x7632 ), SHC( 0x75b5 ), SHC( 0x7535 ), SHC( 0x74b2 ), SHC( 0x742d ), SHC( 0x73a4 ), SHC( 0x7319 ), SHC( 0x728b ), + SHC( 0x71fa ), SHC( 0x7166 ), SHC( 0x70cf ), SHC( 0x7036 ), SHC( 0x6f9a ), SHC( 0x6efb ), SHC( 0x6e59 ), SHC( 0x6db5 ), + SHC( 0x6d0e ), SHC( 0x6c64 ), SHC( 0x6bb8 ), SHC( 0x6b08 ), SHC( 0x6a57 ), SHC( 0x69a2 ), SHC( 0x68eb ), SHC( 0x6832 ), + SHC( 0x6776 ), SHC( 0x66b7 ), SHC( 0x65f6 ), SHC( 0x6532 ), SHC( 0x646c ), SHC( 0x63a3 ), SHC( 0x62d8 ), SHC( 0x620a ), + SHC( 0x613a ), SHC( 0x6068 ), SHC( 0x5f93 ), SHC( 0x5ebc ), SHC( 0x5de2 ), SHC( 0x5d07 ), SHC( 0x5c29 ), SHC( 0x5b48 ), + SHC( 0x5a66 ), SHC( 0x5981 ), SHC( 0x589a ), SHC( 0x57b0 ), SHC( 0x56c5 ), SHC( 0x55d7 ), SHC( 0x54e8 ), SHC( 0x53f6 ), + SHC( 0x5302 ), SHC( 0x520c ), SHC( 0x5114 ), SHC( 0x501a ), SHC( 0x4f1e ), SHC( 0x4e21 ), SHC( 0x4d21 ), SHC( 0x4c1f ), + SHC( 0x4b1c ), SHC( 0x4a16 ), SHC( 0x490f ), SHC( 0x4806 ), SHC( 0x46fb ), SHC( 0x45ee ), SHC( 0x44e0 ), SHC( 0x43d0 ), + SHC( 0x42be ), SHC( 0x41ab ), SHC( 0x4096 ), SHC( 0x3f80 ), SHC( 0x3e68 ), SHC( 0x3d4e ), SHC( 0x3c33 ), SHC( 0x3b16 ), + SHC( 0x39f8 ), SHC( 0x38d8 ), SHC( 0x37b8 ), SHC( 0x3695 ), SHC( 0x3572 ), SHC( 0x344d ), SHC( 0x3326 ), SHC( 0x31ff ), + SHC( 0x30d6 ), SHC( 0x2fac ), SHC( 0x2e81 ), SHC( 0x2d55 ), SHC( 0x2c27 ), SHC( 0x2af9 ), SHC( 0x29c9 ), SHC( 0x2899 ), + SHC( 0x2767 ), SHC( 0x2635 ), SHC( 0x2501 ), SHC( 0x23cd ), SHC( 0x2297 ), SHC( 0x2161 ), SHC( 0x202a ), SHC( 0x1ef2 ), + SHC( 0x1dba ), SHC( 0x1c81 ), SHC( 0x1b47 ), SHC( 0x1a0c ), SHC( 0x18d1 ), SHC( 0x1795 ), SHC( 0x1658 ), SHC( 0x151b ), + SHC( 0x13de ), SHC( 0x12a0 ), SHC( 0x1161 ), SHC( 0x1022 ), SHC( 0x0ee3 ), SHC( 0x0da3 ), SHC( 0x0c63 ), SHC( 0x0b23 ), + SHC( 0x09e2 ), SHC( 0x08a2 ), SHC( 0x0760 ), SHC( 0x061f ), SHC( 0x04de ), SHC( 0x039c ), SHC( 0x025b ), SHC( 0x0119 ), +}; + +const Word16 ivas_sin_twiddle_160_fx[ IVAS_160_PT_LEN >> 1 ] = { + SHC( 0xffb0 ), SHC( 0xfd2d ), SHC( 0xfaaa ), SHC( 0xf827 ), SHC( 0xf5a5 ), SHC( 0xf325 ), SHC( 0xf0a5 ), SHC( 0xee27 ), + SHC( 0xebab ), SHC( 0xe931 ), SHC( 0xe6b9 ), SHC( 0xe443 ), SHC( 0xe1d1 ), SHC( 0xdf61 ), SHC( 0xdcf5 ), SHC( 0xda8c ), + SHC( 0xd826 ), SHC( 0xd5c5 ), SHC( 0xd368 ), SHC( 0xd10f ), SHC( 0xceba ), SHC( 0xcc6b ), SHC( 0xca21 ), SHC( 0xc7dc ), + SHC( 0xc59d ), SHC( 0xc363 ), SHC( 0xc12f ), SHC( 0xbf02 ), SHC( 0xbcdb ), SHC( 0xbaba ), SHC( 0xb8a1 ), SHC( 0xb68e ), + SHC( 0xb483 ), SHC( 0xb27f ), SHC( 0xb083 ), SHC( 0xae8f ), SHC( 0xaca2 ), SHC( 0xaabe ), SHC( 0xa8e3 ), SHC( 0xa710 ), + SHC( 0xa545 ), SHC( 0xa384 ), SHC( 0xa1cc ), SHC( 0xa01d ), SHC( 0x9e77 ), SHC( 0x9cdc ), SHC( 0x9b4a ), SHC( 0x99c1 ), + SHC( 0x9843 ), SHC( 0x96d0 ), SHC( 0x9566 ), SHC( 0x9407 ), SHC( 0x92b3 ), SHC( 0x916a ), SHC( 0x902b ), SHC( 0x8ef8 ), + SHC( 0x8dd0 ), SHC( 0x8cb3 ), SHC( 0x8ba1 ), SHC( 0x8a9b ), SHC( 0x89a0 ), SHC( 0x88b1 ), SHC( 0x87ce ), SHC( 0x86f7 ), + SHC( 0x862c ), SHC( 0x856c ), SHC( 0x84b9 ), SHC( 0x8412 ), SHC( 0x8377 ), SHC( 0x82e9 ), SHC( 0x8267 ), SHC( 0x81f1 ), + SHC( 0x8187 ), SHC( 0x812b ), SHC( 0x80da ), SHC( 0x8097 ), SHC( 0x805f ), SHC( 0x8035 ), SHC( 0x8017 ), SHC( 0x8005 ), +}; + +const Word16 ivas_cos_twiddle_160_fx[ IVAS_160_PT_LEN >> 1 ] = { + SHC( 0x7fff ), SHC( 0x7ff8 ), SHC( 0x7fe3 ), SHC( 0x7fc2 ), SHC( 0x7f94 ), SHC( 0x7f5a ), SHC( 0x7f13 ), SHC( 0x7ebf ), + SHC( 0x7e5f ), SHC( 0x7df3 ), SHC( 0x7d7a ), SHC( 0x7cf5 ), SHC( 0x7c63 ), SHC( 0x7bc5 ), SHC( 0x7b1b ), SHC( 0x7a65 ), + SHC( 0x79a3 ), SHC( 0x78d4 ), SHC( 0x77fa ), SHC( 0x7714 ), SHC( 0x7622 ), SHC( 0x7525 ), SHC( 0x741c ), SHC( 0x7307 ), + SHC( 0x71e7 ), SHC( 0x70bc ), SHC( 0x6f86 ), SHC( 0x6e45 ), SHC( 0x6cf9 ), SHC( 0x6ba2 ), SHC( 0x6a40 ), SHC( 0x68d4 ), + SHC( 0x675e ), SHC( 0x65dd ), SHC( 0x6453 ), SHC( 0x62be ), SHC( 0x6120 ), SHC( 0x5f78 ), SHC( 0x5dc7 ), SHC( 0x5c0d ), + SHC( 0x5a49 ), SHC( 0x587d ), SHC( 0x56a7 ), SHC( 0x54ca ), SHC( 0x52e3 ), SHC( 0x50f5 ), SHC( 0x4eff ), SHC( 0x4d01 ), + SHC( 0x4afb ), SHC( 0x48ee ), SHC( 0x46da ), SHC( 0x44be ), SHC( 0x429c ), SHC( 0x4073 ), SHC( 0x3e44 ), SHC( 0x3c0f ), + SHC( 0x39d4 ), SHC( 0x3793 ), SHC( 0x354d ), SHC( 0x3302 ), SHC( 0x30b1 ), SHC( 0x2e5c ), SHC( 0x2c02 ), SHC( 0x29a3 ), + SHC( 0x2741 ), SHC( 0x24db ), SHC( 0x2271 ), SHC( 0x2003 ), SHC( 0x1d93 ), SHC( 0x1b1f ), SHC( 0x18a9 ), SHC( 0x1631 ), + SHC( 0x13b6 ), SHC( 0x1139 ), SHC( 0x0ebb ), SHC( 0x0c3b ), SHC( 0x09ba ), SHC( 0x0738 ), SHC( 0x04b6 ), SHC( 0x0232 ), +}; + +const Word16 ivas_sin_twiddle_80_fx[ IVAS_80_PT_LEN >> 1 ] = { + SHC( 0xff60 ), SHC( 0xfa59 ), SHC( 0xf555 ), SHC( 0xf055 ), SHC( 0xeb5c ), SHC( 0xe66a ), SHC( 0xe183 ), SHC( 0xdca7 ), + SHC( 0xd7da ), SHC( 0xd31c ), SHC( 0xce70 ), SHC( 0xc9d8 ), SHC( 0xc555 ), SHC( 0xc0e9 ), SHC( 0xbc96 ), SHC( 0xb85e ), + SHC( 0xb442 ), SHC( 0xb044 ), SHC( 0xac65 ), SHC( 0xa8a8 ), SHC( 0xa50d ), SHC( 0xa195 ), SHC( 0x9e43 ), SHC( 0x9b18 ), + SHC( 0x9814 ), SHC( 0x953a ), SHC( 0x9289 ), SHC( 0x9004 ), SHC( 0x8dab ), SHC( 0x8b7f ), SHC( 0x8982 ), SHC( 0x87b3 ), + SHC( 0x8613 ), SHC( 0x84a3 ), SHC( 0x8365 ), SHC( 0x8257 ), SHC( 0x817b ), SHC( 0x80d1 ), SHC( 0x8059 ), SHC( 0x8014 ), +}; + +const Word16 ivas_cos_twiddle_80_fx[ IVAS_80_PT_LEN >> 1 ] = { + SHC( 0x7fff ), SHC( 0x7fe0 ), SHC( 0x7f8d ), SHC( 0x7f09 ), SHC( 0x7e53 ), SHC( 0x7d6a ), SHC( 0x7c50 ), SHC( 0x7b05 ), + SHC( 0x798a ), SHC( 0x77de ), SHC( 0x7603 ), SHC( 0x73fa ), SHC( 0x71c3 ), SHC( 0x6f5f ), SHC( 0x6cce ), SHC( 0x6a13 ), + SHC( 0x672f ), SHC( 0x6421 ), SHC( 0x60ec ), SHC( 0x5d90 ), SHC( 0x5a10 ), SHC( 0x566c ), SHC( 0x52a6 ), SHC( 0x4ebf ), + SHC( 0x4aba ), SHC( 0x4696 ), SHC( 0x4257 ), SHC( 0x3dfe ), SHC( 0x398c ), SHC( 0x3504 ), SHC( 0x3067 ), SHC( 0x2bb6 ), + SHC( 0x26f4 ), SHC( 0x2223 ), SHC( 0x1d45 ), SHC( 0x185a ), SHC( 0x1367 ), SHC( 0x0e6b ), SHC( 0x096a ), SHC( 0x0465 ), +}; + + +/* clang-format on */ diff --git a/lib_com/ivas_rom_com_fx.h b/lib_com/ivas_rom_com_fx.h new file mode 100644 index 000000000..b696c2c57 --- /dev/null +++ b/lib_com/ivas_rom_com_fx.h @@ -0,0 +1,52 @@ +/****************************************************************************************************** + + (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, + Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., + Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, + Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other + contributors to this repository. All Rights Reserved. + + This software is protected by copyright law and by international treaties. + The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, + Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., + Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, + Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other + contributors to this repository retain full ownership rights in their respective contributions in + the software. This notice grants no license of any kind, including but not limited to patent + license, nor is any license granted by implication, estoppel or otherwise. + + Contributors are required to enter into the IVAS codec Public Collaboration agreement before making + contributions. + + This software is provided "AS IS", without any express or implied warranties. The software is in the + development stage. It is intended exclusively for experts who have experience with such software and + solely for the purpose of inspection. All implied warranties of non-infringement, merchantability + and fitness for a particular purpose are hereby disclaimed and excluded. + + Any dispute, controversy or claim arising under or in relation to providing this software shall be + submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in + accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and + the United Nations Convention on Contracts on the International Sales of Goods. + +*******************************************************************************************************/ + +#ifndef IVAS_ROM_COM_FX_H +#define IVAS_ROM_COM_FX_H + +#include +#include "options.h" +#include "cnst.h" +#include "ivas_cnst.h" +#include "stat_com.h" +#include "ivas_stat_com.h" + +extern const Word16 ivas_sin_twiddle_480_fx[ IVAS_480_PT_LEN >> 1 ]; +extern const Word16 ivas_cos_twiddle_480_fx[ IVAS_480_PT_LEN >> 1 ]; +extern const Word16 ivas_sin_twiddle_320_fx[ IVAS_320_PT_LEN >> 1 ]; +extern const Word16 ivas_cos_twiddle_320_fx[ IVAS_320_PT_LEN >> 1 ]; +extern const Word16 ivas_sin_twiddle_160_fx[ IVAS_160_PT_LEN >> 1 ]; +extern const Word16 ivas_cos_twiddle_160_fx[ IVAS_160_PT_LEN >> 1 ]; +extern const Word16 ivas_sin_twiddle_80_fx[ IVAS_80_PT_LEN >> 1 ]; +extern const Word16 ivas_cos_twiddle_80_fx[ IVAS_80_PT_LEN >> 1 ]; + +#endif \ No newline at end of file diff --git a/lib_com/prot_fx2.h b/lib_com/prot_fx2.h index 37708c0f6..041bc3068 100644 --- a/lib_com/prot_fx2.h +++ b/lib_com/prot_fx2.h @@ -1195,12 +1195,22 @@ void fft_rel_fx( const Word16 n, /* i : vector length */ const Word16 m /* i : log2 of vector length */ ); +void fft_rel_fx32( + Word32 x[], /* i/o: i /output vector */ + const Word16 n, /* i : vector length */ + const Word16 m /* i : log2 of vector length */ +); //ifft_rel.c void ifft_rel_fx( Word16 io[], /* i/o: i /output vector */ const Word16 n, /* i : vector length */ const Word16 m /* i : log2 of vector length */ ); +void ifft_rel_fx32( + Word32 io[], /* i/o: i /output vector */ + const Word16 n, /* i : vector length */ + const Word16 m /* i : log2 of vector length */ +); //gs_gains_fx.c Word16 gsc_gaindec_fx( /* o : average frequency gain */ Decoder_State *st_fx, /* i/o: decoder state structure */ @@ -3700,6 +3710,15 @@ void DoRTFT128_16fx( void fft3_fx(const Word16[], Word16[], const Word16); void ifft3_fx(const Word16[], Word16[], const Word16); +void fft3_fx_ivas( + const Word32 X[], + Word32 Y[], + const Word16 n ); +void ifft3_fx_ivas( + const Word32 X[], + Word32 Y[], + const Word16 n ); + //fft_rel_fx.c void r_fft_fx_lc( const Word16 *phs_tbl, /* i : Table of phase */ diff --git a/lib_com/rom_com.c b/lib_com/rom_com.c index c5fa590b4..2787dffcd 100644 --- a/lib_com/rom_com.c +++ b/lib_com/rom_com.c @@ -37747,6 +37747,137 @@ const Word16 cos_diff_table[512] = }; +#if 0 +const Word16 sincos_t_ext_fx[ 641 ] = { + SHC( 0x0000 ), SHC( 0x00c9 ), SHC( 0x0192 ), SHC( 0x025b ), SHC( 0x0324 ), SHC( 0x03ed ), SHC( 0x04b6 ), SHC( 0x057f ), + SHC( 0x0647 ), SHC( 0x0710 ), SHC( 0x07d9 ), SHC( 0x08a2 ), SHC( 0x096a ), SHC( 0x0a33 ), SHC( 0x0afb ), SHC( 0x0bc3 ), + SHC( 0x0c8b ), SHC( 0x0d53 ), SHC( 0x0e1b ), SHC( 0x0ee3 ), SHC( 0x0fab ), SHC( 0x1072 ), SHC( 0x1139 ), SHC( 0x1201 ), + SHC( 0x12c8 ), SHC( 0x138e ), SHC( 0x1455 ), SHC( 0x151b ), SHC( 0x15e2 ), SHC( 0x16a8 ), SHC( 0x176d ), SHC( 0x1833 ), + SHC( 0x18f8 ), SHC( 0x19bd ), SHC( 0x1a82 ), SHC( 0x1b47 ), SHC( 0x1c0b ), SHC( 0x1ccf ), SHC( 0x1d93 ), SHC( 0x1e56 ), + SHC( 0x1f19 ), SHC( 0x1fdc ), SHC( 0x209f ), SHC( 0x2161 ), SHC( 0x2223 ), SHC( 0x22e5 ), SHC( 0x23a6 ), SHC( 0x2467 ), + SHC( 0x2528 ), SHC( 0x25e8 ), SHC( 0x26a8 ), SHC( 0x2767 ), SHC( 0x2826 ), SHC( 0x28e5 ), SHC( 0x29a3 ), SHC( 0x2a61 ), + SHC( 0x2b1f ), SHC( 0x2bdc ), SHC( 0x2c98 ), SHC( 0x2d55 ), SHC( 0x2e11 ), SHC( 0x2ecc ), SHC( 0x2f87 ), SHC( 0x3041 ), + SHC( 0x30fb ), SHC( 0x31b5 ), SHC( 0x326e ), SHC( 0x3326 ), SHC( 0x33de ), SHC( 0x3496 ), SHC( 0x354d ), SHC( 0x3604 ), + SHC( 0x36ba ), SHC( 0x376f ), SHC( 0x3824 ), SHC( 0x38d8 ), SHC( 0x398c ), SHC( 0x3a40 ), SHC( 0x3af2 ), SHC( 0x3ba5 ), + SHC( 0x3c56 ), SHC( 0x3d07 ), SHC( 0x3db8 ), SHC( 0x3e68 ), SHC( 0x3f17 ), SHC( 0x3fc5 ), SHC( 0x4073 ), SHC( 0x4121 ), + SHC( 0x41ce ), SHC( 0x427a ), SHC( 0x4325 ), SHC( 0x43d0 ), SHC( 0x447a ), SHC( 0x4524 ), SHC( 0x45cd ), SHC( 0x4675 ), + SHC( 0x471c ), SHC( 0x47c3 ), SHC( 0x4869 ), SHC( 0x490f ), SHC( 0x49b4 ), SHC( 0x4a58 ), SHC( 0x4afb ), SHC( 0x4b9e ), + SHC( 0x4c3f ), SHC( 0x4ce1 ), SHC( 0x4d81 ), SHC( 0x4e21 ), SHC( 0x4ebf ), SHC( 0x4f5e ), SHC( 0x4ffb ), SHC( 0x5097 ), + SHC( 0x5133 ), SHC( 0x51ce ), SHC( 0x5269 ), SHC( 0x5302 ), SHC( 0x539b ), SHC( 0x5433 ), SHC( 0x54ca ), SHC( 0x5560 ), + SHC( 0x55f5 ), SHC( 0x568a ), SHC( 0x571d ), SHC( 0x57b0 ), SHC( 0x5842 ), SHC( 0x58d4 ), SHC( 0x5964 ), SHC( 0x59f3 ), + SHC( 0x5a82 ), SHC( 0x5b10 ), SHC( 0x5b9d ), SHC( 0x5c29 ), SHC( 0x5cb4 ), SHC( 0x5d3e ), SHC( 0x5dc7 ), SHC( 0x5e50 ), + SHC( 0x5ed7 ), SHC( 0x5f5e ), SHC( 0x5fe3 ), SHC( 0x6068 ), SHC( 0x60ec ), SHC( 0x616f ), SHC( 0x61f1 ), SHC( 0x6271 ), + SHC( 0x62f2 ), SHC( 0x6371 ), SHC( 0x63ef ), SHC( 0x646c ), SHC( 0x64e8 ), SHC( 0x6563 ), SHC( 0x65dd ), SHC( 0x6657 ), + SHC( 0x66cf ), SHC( 0x6746 ), SHC( 0x67bd ), SHC( 0x6832 ), SHC( 0x68a6 ), SHC( 0x6919 ), SHC( 0x698c ), SHC( 0x69fd ), + SHC( 0x6a6d ), SHC( 0x6adc ), SHC( 0x6b4a ), SHC( 0x6bb8 ), SHC( 0x6c24 ), SHC( 0x6c8f ), SHC( 0x6cf9 ), SHC( 0x6d62 ), + SHC( 0x6dca ), SHC( 0x6e30 ), SHC( 0x6e96 ), SHC( 0x6efb ), SHC( 0x6f5f ), SHC( 0x6fc1 ), SHC( 0x7023 ), SHC( 0x7083 ), + SHC( 0x70e2 ), SHC( 0x7141 ), SHC( 0x719e ), SHC( 0x71fa ), SHC( 0x7255 ), SHC( 0x72af ), SHC( 0x7307 ), SHC( 0x735f ), + SHC( 0x73b5 ), SHC( 0x740b ), SHC( 0x745f ), SHC( 0x74b2 ), SHC( 0x7504 ), SHC( 0x7555 ), SHC( 0x75a5 ), SHC( 0x75f4 ), + SHC( 0x7641 ), SHC( 0x768e ), SHC( 0x76d9 ), SHC( 0x7723 ), SHC( 0x776c ), SHC( 0x77b4 ), SHC( 0x77fa ), SHC( 0x7840 ), + SHC( 0x7884 ), SHC( 0x78c7 ), SHC( 0x7909 ), SHC( 0x794a ), SHC( 0x798a ), SHC( 0x79c8 ), SHC( 0x7a05 ), SHC( 0x7a42 ), + SHC( 0x7a7d ), SHC( 0x7ab6 ), SHC( 0x7aef ), SHC( 0x7b26 ), SHC( 0x7b5d ), SHC( 0x7b92 ), SHC( 0x7bc5 ), SHC( 0x7bf8 ), + SHC( 0x7c29 ), SHC( 0x7c5a ), SHC( 0x7c89 ), SHC( 0x7cb7 ), SHC( 0x7ce3 ), SHC( 0x7d0f ), SHC( 0x7d39 ), SHC( 0x7d62 ), + SHC( 0x7d8a ), SHC( 0x7db0 ), SHC( 0x7dd6 ), SHC( 0x7dfa ), SHC( 0x7e1d ), SHC( 0x7e3f ), SHC( 0x7e5f ), SHC( 0x7e7f ), + SHC( 0x7e9d ), SHC( 0x7eba ), SHC( 0x7ed5 ), SHC( 0x7ef0 ), SHC( 0x7f09 ), SHC( 0x7f21 ), SHC( 0x7f38 ), SHC( 0x7f4d ), + SHC( 0x7f62 ), SHC( 0x7f75 ), SHC( 0x7f87 ), SHC( 0x7f97 ), SHC( 0x7fa7 ), SHC( 0x7fb5 ), SHC( 0x7fc2 ), SHC( 0x7fce ), + SHC( 0x7fd8 ), SHC( 0x7fe1 ), SHC( 0x7fe9 ), SHC( 0x7ff0 ), SHC( 0x7ff6 ), SHC( 0x7ffa ), SHC( 0x7ffd ), SHC( 0x7fff ), + SHC( 0x7fff ), SHC( 0x7fff ), SHC( 0x7ffd ), SHC( 0x7ffa ), SHC( 0x7ff6 ), SHC( 0x7ff0 ), SHC( 0x7fe9 ), SHC( 0x7fe1 ), + SHC( 0x7fd8 ), SHC( 0x7fce ), SHC( 0x7fc2 ), SHC( 0x7fb5 ), SHC( 0x7fa7 ), SHC( 0x7f97 ), SHC( 0x7f87 ), SHC( 0x7f75 ), + SHC( 0x7f62 ), SHC( 0x7f4d ), SHC( 0x7f38 ), SHC( 0x7f21 ), SHC( 0x7f09 ), SHC( 0x7ef0 ), SHC( 0x7ed5 ), SHC( 0x7eba ), + SHC( 0x7e9d ), SHC( 0x7e7f ), SHC( 0x7e5f ), SHC( 0x7e3f ), SHC( 0x7e1d ), SHC( 0x7dfa ), SHC( 0x7dd6 ), SHC( 0x7db0 ), + SHC( 0x7d8a ), SHC( 0x7d62 ), SHC( 0x7d39 ), SHC( 0x7d0f ), SHC( 0x7ce3 ), SHC( 0x7cb7 ), SHC( 0x7c89 ), SHC( 0x7c5a ), + SHC( 0x7c29 ), SHC( 0x7bf8 ), SHC( 0x7bc5 ), SHC( 0x7b92 ), SHC( 0x7b5d ), SHC( 0x7b26 ), SHC( 0x7aef ), SHC( 0x7ab6 ), + SHC( 0x7a7d ), SHC( 0x7a42 ), SHC( 0x7a05 ), SHC( 0x79c8 ), SHC( 0x798a ), SHC( 0x794a ), SHC( 0x7909 ), SHC( 0x78c7 ), + SHC( 0x7884 ), SHC( 0x7840 ), SHC( 0x77fa ), SHC( 0x77b4 ), SHC( 0x776c ), SHC( 0x7723 ), SHC( 0x76d9 ), SHC( 0x768e ), + SHC( 0x7641 ), SHC( 0x75f4 ), SHC( 0x75a5 ), SHC( 0x7555 ), SHC( 0x7504 ), SHC( 0x74b2 ), SHC( 0x745f ), SHC( 0x740b ), + SHC( 0x73b5 ), SHC( 0x735f ), SHC( 0x7307 ), SHC( 0x72af ), SHC( 0x7255 ), SHC( 0x71fa ), SHC( 0x719e ), SHC( 0x7141 ), + SHC( 0x70e2 ), SHC( 0x7083 ), SHC( 0x7023 ), SHC( 0x6fc1 ), SHC( 0x6f5f ), SHC( 0x6efb ), SHC( 0x6e96 ), SHC( 0x6e30 ), + SHC( 0x6dca ), SHC( 0x6d62 ), SHC( 0x6cf9 ), SHC( 0x6c8f ), SHC( 0x6c24 ), SHC( 0x6bb8 ), SHC( 0x6b4a ), SHC( 0x6adc ), + SHC( 0x6a6d ), SHC( 0x69fd ), SHC( 0x698c ), SHC( 0x6919 ), SHC( 0x68a6 ), SHC( 0x6832 ), SHC( 0x67bd ), SHC( 0x6746 ), + SHC( 0x66cf ), SHC( 0x6657 ), SHC( 0x65dd ), SHC( 0x6563 ), SHC( 0x64e8 ), SHC( 0x646c ), SHC( 0x63ef ), SHC( 0x6371 ), + SHC( 0x62f2 ), SHC( 0x6271 ), SHC( 0x61f1 ), SHC( 0x616f ), SHC( 0x60ec ), SHC( 0x6068 ), SHC( 0x5fe3 ), SHC( 0x5f5e ), + SHC( 0x5ed7 ), SHC( 0x5e50 ), SHC( 0x5dc7 ), SHC( 0x5d3e ), SHC( 0x5cb4 ), SHC( 0x5c29 ), SHC( 0x5b9d ), SHC( 0x5b10 ), + SHC( 0x5a82 ), SHC( 0x59f3 ), SHC( 0x5964 ), SHC( 0x58d4 ), SHC( 0x5842 ), SHC( 0x57b0 ), SHC( 0x571d ), SHC( 0x568a ), + SHC( 0x55f5 ), SHC( 0x5560 ), SHC( 0x54ca ), SHC( 0x5433 ), SHC( 0x539b ), SHC( 0x5302 ), SHC( 0x5269 ), SHC( 0x51ce ), + SHC( 0x5133 ), SHC( 0x5097 ), SHC( 0x4ffb ), SHC( 0x4f5e ), SHC( 0x4ebf ), SHC( 0x4e21 ), SHC( 0x4d81 ), SHC( 0x4ce1 ), + SHC( 0x4c3f ), SHC( 0x4b9e ), SHC( 0x4afb ), SHC( 0x4a58 ), SHC( 0x49b4 ), SHC( 0x490f ), SHC( 0x4869 ), SHC( 0x47c3 ), + SHC( 0x471c ), SHC( 0x4675 ), SHC( 0x45cd ), SHC( 0x4524 ), SHC( 0x447a ), SHC( 0x43d0 ), SHC( 0x4325 ), SHC( 0x427a ), + SHC( 0x41ce ), SHC( 0x4121 ), SHC( 0x4073 ), SHC( 0x3fc5 ), SHC( 0x3f17 ), SHC( 0x3e68 ), SHC( 0x3db8 ), SHC( 0x3d07 ), + SHC( 0x3c56 ), SHC( 0x3ba5 ), SHC( 0x3af2 ), SHC( 0x3a40 ), SHC( 0x398c ), SHC( 0x38d8 ), SHC( 0x3824 ), SHC( 0x376f ), + SHC( 0x36ba ), SHC( 0x3604 ), SHC( 0x354d ), SHC( 0x3496 ), SHC( 0x33de ), SHC( 0x3326 ), SHC( 0x326e ), SHC( 0x31b5 ), + SHC( 0x30fb ), SHC( 0x3041 ), SHC( 0x2f87 ), SHC( 0x2ecc ), SHC( 0x2e11 ), SHC( 0x2d55 ), SHC( 0x2c98 ), SHC( 0x2bdc ), + SHC( 0x2b1f ), SHC( 0x2a61 ), SHC( 0x29a3 ), SHC( 0x28e5 ), SHC( 0x2826 ), SHC( 0x2767 ), SHC( 0x26a8 ), SHC( 0x25e8 ), + SHC( 0x2528 ), SHC( 0x2467 ), SHC( 0x23a6 ), SHC( 0x22e5 ), SHC( 0x2223 ), SHC( 0x2161 ), SHC( 0x209f ), SHC( 0x1fdc ), + SHC( 0x1f19 ), SHC( 0x1e56 ), SHC( 0x1d93 ), SHC( 0x1ccf ), SHC( 0x1c0b ), SHC( 0x1b47 ), SHC( 0x1a82 ), SHC( 0x19bd ), + SHC( 0x18f8 ), SHC( 0x1833 ), SHC( 0x176d ), SHC( 0x16a8 ), SHC( 0x15e2 ), SHC( 0x151b ), SHC( 0x1455 ), SHC( 0x138e ), + SHC( 0x12c8 ), SHC( 0x1201 ), SHC( 0x1139 ), SHC( 0x1072 ), SHC( 0x0fab ), SHC( 0x0ee3 ), SHC( 0x0e1b ), SHC( 0x0d53 ), + SHC( 0x0c8b ), SHC( 0x0bc3 ), SHC( 0x0afb ), SHC( 0x0a33 ), SHC( 0x096a ), SHC( 0x08a2 ), SHC( 0x07d9 ), SHC( 0x0710 ), + SHC( 0x0647 ), SHC( 0x057f ), SHC( 0x04b6 ), SHC( 0x03ed ), SHC( 0x0324 ), SHC( 0x025b ), SHC( 0x0192 ), SHC( 0x00c9 ), + SHC( 0x0000 ), SHC( 0xff37 ), SHC( 0xfe6e ), SHC( 0xfda5 ), SHC( 0xfcdc ), SHC( 0xfc13 ), SHC( 0xfb4a ), SHC( 0xfa81 ), + SHC( 0xf9b9 ), SHC( 0xf8f0 ), SHC( 0xf827 ), SHC( 0xf75e ), SHC( 0xf696 ), SHC( 0xf5cd ), SHC( 0xf505 ), SHC( 0xf43d ), + SHC( 0xf375 ), SHC( 0xf2ad ), SHC( 0xf1e5 ), SHC( 0xf11d ), SHC( 0xf055 ), SHC( 0xef8e ), SHC( 0xeec7 ), SHC( 0xedff ), + SHC( 0xed38 ), SHC( 0xec72 ), SHC( 0xebab ), SHC( 0xeae5 ), SHC( 0xea1e ), SHC( 0xe958 ), SHC( 0xe893 ), SHC( 0xe7cd ), + SHC( 0xe708 ), SHC( 0xe643 ), SHC( 0xe57e ), SHC( 0xe4b9 ), SHC( 0xe3f5 ), SHC( 0xe331 ), SHC( 0xe26d ), SHC( 0xe1aa ), + SHC( 0xe0e7 ), SHC( 0xe024 ), SHC( 0xdf61 ), SHC( 0xde9f ), SHC( 0xdddd ), SHC( 0xdd1b ), SHC( 0xdc5a ), SHC( 0xdb99 ), + SHC( 0xdad8 ), SHC( 0xda18 ), SHC( 0xd958 ), SHC( 0xd899 ), SHC( 0xd7da ), SHC( 0xd71b ), SHC( 0xd65d ), SHC( 0xd59f ), + SHC( 0xd4e1 ), SHC( 0xd424 ), SHC( 0xd368 ), SHC( 0xd2ab ), SHC( 0xd1ef ), SHC( 0xd134 ), SHC( 0xd079 ), SHC( 0xcfbf ), + SHC( 0xcf05 ), SHC( 0xce4b ), SHC( 0xcd92 ), SHC( 0xccda ), SHC( 0xcc22 ), SHC( 0xcb6a ), SHC( 0xcab3 ), SHC( 0xc9fc ), + SHC( 0xc946 ), SHC( 0xc891 ), SHC( 0xc7dc ), SHC( 0xc728 ), SHC( 0xc674 ), SHC( 0xc5c0 ), SHC( 0xc50e ), SHC( 0xc45b ), + SHC( 0xc3aa ), SHC( 0xc2f9 ), SHC( 0xc248 ), SHC( 0xc198 ), SHC( 0xc0e9 ), SHC( 0xc03b ), SHC( 0xbf8d ), SHC( 0xbedf ), + SHC( 0xbe32 ), SHC( 0xbd86 ), SHC( 0xbcdb ), SHC( 0xbc30 ), SHC( 0xbb86 ), SHC( 0xbadc ), SHC( 0xba33 ), SHC( 0xb98b ), + SHC( 0xb8e4 ), SHC( 0xb83d ), SHC( 0xb797 ), SHC( 0xb6f1 ), SHC( 0xb64c ), SHC( 0xb5a8 ), SHC( 0xb505 ), SHC( 0xb462 ), + SHC( 0xb3c1 ), SHC( 0xb31f ), SHC( 0xb27f ), SHC( 0xb1df ), SHC( 0xb141 ), SHC( 0xb0a2 ), SHC( 0xb005 ), SHC( 0xaf69 ), + SHC( 0xaecd ), SHC( 0xae32 ), SHC( 0xad97 ), SHC( 0xacfe ), SHC( 0xac65 ), SHC( 0xabcd ), SHC( 0xab36 ), SHC( 0xaaa0 ), + SHC( 0xaa0b ), SHC( 0xa976 ), SHC( 0xa8e3 ), SHC( 0xa850 ), SHC( 0xa7be ), SHC( 0xa72c ), SHC( 0xa69c ), SHC( 0xa60d ), + SHC( 0xa57e ), +}; +#endif + +const Word16 fftSineTab640_fx[ 321 ] = { + SHC( 0x0000 ), SHC( 0x0141 ), SHC( 0x0283 ), SHC( 0x03c4 ), SHC( 0x0506 ), SHC( 0x0647 ), SHC( 0x0789 ), SHC( 0x08ca ), + SHC( 0x0a0a ), SHC( 0x0b4b ), SHC( 0x0c8b ), SHC( 0x0dcb ), SHC( 0x0f0b ), SHC( 0x104a ), SHC( 0x1189 ), SHC( 0x12c8 ), + SHC( 0x1406 ), SHC( 0x1543 ), SHC( 0x1680 ), SHC( 0x17bc ), SHC( 0x18f8 ), SHC( 0x1a33 ), SHC( 0x1b6e ), SHC( 0x1ca8 ), + SHC( 0x1de1 ), SHC( 0x1f19 ), SHC( 0x2051 ), SHC( 0x2188 ), SHC( 0x22be ), SHC( 0x23f3 ), SHC( 0x2528 ), SHC( 0x265b ), + SHC( 0x278d ), SHC( 0x28bf ), SHC( 0x29ef ), SHC( 0x2b1f ), SHC( 0x2c4d ), SHC( 0x2d7a ), SHC( 0x2ea6 ), SHC( 0x2fd1 ), + SHC( 0x30fb ), SHC( 0x3224 ), SHC( 0x334b ), SHC( 0x3471 ), SHC( 0x3596 ), SHC( 0x36ba ), SHC( 0x37dc ), SHC( 0x38fd ), + SHC( 0x3a1c ), SHC( 0x3b3a ), SHC( 0x3c56 ), SHC( 0x3d71 ), SHC( 0x3e8b ), SHC( 0x3fa3 ), SHC( 0x40b9 ), SHC( 0x41ce ), + SHC( 0x42e1 ), SHC( 0x43f2 ), SHC( 0x4502 ), SHC( 0x4610 ), SHC( 0x471c ), SHC( 0x4827 ), SHC( 0x4930 ), SHC( 0x4a37 ), + SHC( 0x4b3c ), SHC( 0x4c3f ), SHC( 0x4d41 ), SHC( 0x4e40 ), SHC( 0x4f3e ), SHC( 0x503a ), SHC( 0x5133 ), SHC( 0x522b ), + SHC( 0x5321 ), SHC( 0x5414 ), SHC( 0x5506 ), SHC( 0x55f5 ), SHC( 0x56e2 ), SHC( 0x57ce ), SHC( 0x58b7 ), SHC( 0x599d ), + SHC( 0x5a82 ), SHC( 0x5b64 ), SHC( 0x5c44 ), SHC( 0x5d22 ), SHC( 0x5dfe ), SHC( 0x5ed7 ), SHC( 0x5fae ), SHC( 0x6082 ), + SHC( 0x6154 ), SHC( 0x6224 ), SHC( 0x62f2 ), SHC( 0x63bc ), SHC( 0x6485 ), SHC( 0x654b ), SHC( 0x660e ), SHC( 0x66cf ), + SHC( 0x678d ), SHC( 0x6849 ), SHC( 0x6902 ), SHC( 0x69b9 ), SHC( 0x6a6d ), SHC( 0x6b1f ), SHC( 0x6bcd ), SHC( 0x6c79 ), + SHC( 0x6d23 ), SHC( 0x6dca ), SHC( 0x6e6e ), SHC( 0x6f0f ), SHC( 0x6fad ), SHC( 0x7049 ), SHC( 0x70e2 ), SHC( 0x7179 ), + SHC( 0x720c ), SHC( 0x729d ), SHC( 0x732a ), SHC( 0x73b5 ), SHC( 0x743e ), SHC( 0x74c3 ), SHC( 0x7545 ), SHC( 0x75c5 ), + SHC( 0x7641 ), SHC( 0x76bb ), SHC( 0x7732 ), SHC( 0x77a5 ), SHC( 0x7816 ), SHC( 0x7884 ), SHC( 0x78ef ), SHC( 0x7957 ), + SHC( 0x79bc ), SHC( 0x7a1e ), SHC( 0x7a7d ), SHC( 0x7ad8 ), SHC( 0x7b31 ), SHC( 0x7b87 ), SHC( 0x7bda ), SHC( 0x7c29 ), + SHC( 0x7c76 ), SHC( 0x7cc0 ), SHC( 0x7d06 ), SHC( 0x7d4a ), SHC( 0x7d8a ), SHC( 0x7dc7 ), SHC( 0x7e01 ), SHC( 0x7e38 ), + SHC( 0x7e6c ), SHC( 0x7e9d ), SHC( 0x7eca ), SHC( 0x7ef5 ), SHC( 0x7f1c ), SHC( 0x7f41 ), SHC( 0x7f62 ), SHC( 0x7f80 ), + SHC( 0x7f9a ), SHC( 0x7fb2 ), SHC( 0x7fc7 ), SHC( 0x7fd8 ), SHC( 0x7fe6 ), SHC( 0x7ff1 ), SHC( 0x7ff9 ), SHC( 0x7ffe ), + SHC( 0x7fff ), SHC( 0x7ffe ), SHC( 0x7ff9 ), SHC( 0x7ff1 ), SHC( 0x7fe6 ), SHC( 0x7fd8 ), SHC( 0x7fc7 ), SHC( 0x7fb2 ), + SHC( 0x7f9a ), SHC( 0x7f80 ), SHC( 0x7f62 ), SHC( 0x7f41 ), SHC( 0x7f1c ), SHC( 0x7ef5 ), SHC( 0x7eca ), SHC( 0x7e9d ), + SHC( 0x7e6c ), SHC( 0x7e38 ), SHC( 0x7e01 ), SHC( 0x7dc7 ), SHC( 0x7d8a ), SHC( 0x7d4a ), SHC( 0x7d06 ), SHC( 0x7cc0 ), + SHC( 0x7c76 ), SHC( 0x7c29 ), SHC( 0x7bda ), SHC( 0x7b87 ), SHC( 0x7b31 ), SHC( 0x7ad8 ), SHC( 0x7a7d ), SHC( 0x7a1e ), + SHC( 0x79bc ), SHC( 0x7957 ), SHC( 0x78ef ), SHC( 0x7884 ), SHC( 0x7816 ), SHC( 0x77a5 ), SHC( 0x7732 ), SHC( 0x76bb ), + SHC( 0x7641 ), SHC( 0x75c5 ), SHC( 0x7545 ), SHC( 0x74c3 ), SHC( 0x743e ), SHC( 0x73b5 ), SHC( 0x732a ), SHC( 0x729d ), + SHC( 0x720c ), SHC( 0x7179 ), SHC( 0x70e2 ), SHC( 0x7049 ), SHC( 0x6fad ), SHC( 0x6f0f ), SHC( 0x6e6e ), SHC( 0x6dca ), + SHC( 0x6d23 ), SHC( 0x6c79 ), SHC( 0x6bcd ), SHC( 0x6b1f ), SHC( 0x6a6d ), SHC( 0x69b9 ), SHC( 0x6902 ), SHC( 0x6849 ), + SHC( 0x678d ), SHC( 0x66cf ), SHC( 0x660e ), SHC( 0x654b ), SHC( 0x6485 ), SHC( 0x63bc ), SHC( 0x62f2 ), SHC( 0x6224 ), + SHC( 0x6154 ), SHC( 0x6082 ), SHC( 0x5fae ), SHC( 0x5ed7 ), SHC( 0x5dfe ), SHC( 0x5d22 ), SHC( 0x5c44 ), SHC( 0x5b64 ), + SHC( 0x5a82 ), SHC( 0x599d ), SHC( 0x58b7 ), SHC( 0x57ce ), SHC( 0x56e2 ), SHC( 0x55f5 ), SHC( 0x5506 ), SHC( 0x5414 ), + SHC( 0x5321 ), SHC( 0x522b ), SHC( 0x5133 ), SHC( 0x503a ), SHC( 0x4f3e ), SHC( 0x4e40 ), SHC( 0x4d41 ), SHC( 0x4c3f ), + SHC( 0x4b3c ), SHC( 0x4a37 ), SHC( 0x4930 ), SHC( 0x4827 ), SHC( 0x471c ), SHC( 0x4610 ), SHC( 0x4502 ), SHC( 0x43f2 ), + SHC( 0x42e1 ), SHC( 0x41ce ), SHC( 0x40b9 ), SHC( 0x3fa3 ), SHC( 0x3e8b ), SHC( 0x3d71 ), SHC( 0x3c56 ), SHC( 0x3b3a ), + SHC( 0x3a1c ), SHC( 0x38fd ), SHC( 0x37dc ), SHC( 0x36ba ), SHC( 0x3596 ), SHC( 0x3471 ), SHC( 0x334b ), SHC( 0x3224 ), + SHC( 0x30fb ), SHC( 0x2fd1 ), SHC( 0x2ea6 ), SHC( 0x2d7a ), SHC( 0x2c4d ), SHC( 0x2b1f ), SHC( 0x29ef ), SHC( 0x28bf ), + SHC( 0x278d ), SHC( 0x265b ), SHC( 0x2528 ), SHC( 0x23f3 ), SHC( 0x22be ), SHC( 0x2188 ), SHC( 0x2051 ), SHC( 0x1f19 ), + SHC( 0x1de1 ), SHC( 0x1ca8 ), SHC( 0x1b6e ), SHC( 0x1a33 ), SHC( 0x18f8 ), SHC( 0x17bc ), SHC( 0x1680 ), SHC( 0x1543 ), + SHC( 0x1406 ), SHC( 0x12c8 ), SHC( 0x1189 ), SHC( 0x104a ), SHC( 0x0f0b ), SHC( 0x0dcb ), SHC( 0x0c8b ), SHC( 0x0b4b ), + SHC( 0x0a0a ), SHC( 0x08ca ), SHC( 0x0789 ), SHC( 0x0647 ), SHC( 0x0506 ), SHC( 0x03c4 ), SHC( 0x0283 ), SHC( 0x0141 ), + SHC( 0x0000 ), +}; + + const Word32 bwMode2fs[4] = { 8000, 16000, 32000, 48000 }; const Word16 swb_lsp_prev_interp_init[10] = { 32767, 31164, 26510, 19261, 10126, 0, -10126, -19261, -26510, -31164 }; diff --git a/lib_com/rom_com.h b/lib_com/rom_com.h index 5cf66078c..a00dcd342 100644 --- a/lib_com/rom_com.h +++ b/lib_com/rom_com.h @@ -1647,6 +1647,7 @@ extern const float *const cdk_37bits_flt[]; extern const float *const cdk_37bits_ivas[]; extern Word16 const * const cdk_37bits[]; extern const float fftSineTab640[321]; +extern const Word16 fftSineTab640_fx[321]; extern const float olapWinAna512[512]; extern const float olapWinAna640[640]; diff --git a/lib_com/rom_com_fx.c b/lib_com/rom_com_fx.c index 7c749975f..63532eaab 100644 --- a/lib_com/rom_com_fx.c +++ b/lib_com/rom_com_fx.c @@ -873,5 +873,21 @@ const Word16 sin_twiddle_table_16_8_2[ 16 ] = { SHC( 0x0000 ), SHC( 0xcf05 ), }; +#if 0 +#define Flt2Word16(a) SHC((Word16) (a * 32768.0)) +const Word16 w_edct2_64_fx[80] = +{ + SHC(0x7FFF), Flt2Word16(0.00000000f), Flt2Word16(0.70710678f), Flt2Word16(0.70710678f), Flt2Word16(0.92387953f), Flt2Word16(0.38268343f), Flt2Word16(0.38268343f), Flt2Word16(0.92387953f), + Flt2Word16(0.98078528f), Flt2Word16(0.19509032f), Flt2Word16(0.55557023f), Flt2Word16(0.83146961f), Flt2Word16(0.83146961f), Flt2Word16(0.55557023f), Flt2Word16(0.19509032f), Flt2Word16(0.98078528f), + Flt2Word16(0.70710678f), Flt2Word16(0.49984940f), Flt2Word16(0.49939772f), Flt2Word16(0.49864522f), Flt2Word16(0.49759236f), Flt2Word16(0.49623976f), Flt2Word16(0.49458825f), Flt2Word16(0.49263882f), + Flt2Word16(0.49039264f), Flt2Word16(0.48785106f), Flt2Word16(0.48501562f), Flt2Word16(0.48188803f), Flt2Word16(0.47847016f), Flt2Word16(0.47476409f), Flt2Word16(0.47077203f), Flt2Word16(0.46649639f), + Flt2Word16(0.46193976f), Flt2Word16(0.45710487f), Flt2Word16(0.45199464f), Flt2Word16(0.44661215f), Flt2Word16(0.44096063f), Flt2Word16(0.43504349f), Flt2Word16(0.42886430f), Flt2Word16(0.42242678f), + Flt2Word16(0.41573480f), Flt2Word16(0.40879240f), Flt2Word16(0.40160376f), Flt2Word16(0.39417321f), Flt2Word16(0.38650522f), Flt2Word16(0.37860442f), Flt2Word16(0.37047556f), Flt2Word16(0.36212354f), + Flt2Word16(0.35355339f), Flt2Word16(0.34477027f), Flt2Word16(0.33577947f), Flt2Word16(0.32658642f), Flt2Word16(0.31719664f), Flt2Word16(0.30761579f), Flt2Word16(0.29784965f), Flt2Word16(0.28790409f), + Flt2Word16(0.27778511f), Flt2Word16(0.26749880f), Flt2Word16(0.25705137f), Flt2Word16(0.24644909f), Flt2Word16(0.23569836f), Flt2Word16(0.22480566f), Flt2Word16(0.21377754f), Flt2Word16(0.20262065f), + Flt2Word16(0.19134171f), Flt2Word16(0.17994751f), Flt2Word16(0.16844492f), Flt2Word16(0.15684087f), Flt2Word16(0.14514233f), Flt2Word16(0.13335637f), Flt2Word16(0.12149008f), Flt2Word16(0.10955062f), + Flt2Word16(0.09754516f), Flt2Word16(0.08548094f), Flt2Word16(0.07336523f), Flt2Word16(0.06120533f), Flt2Word16(0.04900857f), Flt2Word16(0.03678228f), Flt2Word16(0.02453383f), Flt2Word16(0.01227061f) +}; +#endif diff --git a/lib_util/test_fft.c b/lib_util/test_fft.c index 264ea41b4..3a8d97f8a 100644 --- a/lib_util/test_fft.c +++ b/lib_util/test_fft.c @@ -34,23 +34,34 @@ #include #include #include +#include #include #include -#include "options.h" -#include "test_fft.h" +#include "assert.h" +#include "basop32.h" typedef int Word32; typedef short Word16; typedef short int16_t; -#define ALLOWED_DEVIATION (0.005) -#define Q31 (2147483647.0f) +#define ALLOWED_DEVIATION ( 0.005 ) +#define Q31 ( 2147483647.0f ) + +#ifndef min +#define min( a, b ) ( ( ( a ) < ( b ) ) ? ( a ) : ( b ) ) +#endif + +#ifndef max +#define max( a, b ) ( ( ( a ) > ( b ) ) ? ( a ) : ( b ) ) +#endif + +extern const float fftSineTab640[321]; void DoFFT_fx( Word32 *re2, Word32 *im2, - const int16_t length ); - + const Word16 length ); + void DoFFT( float *re2, float *im2, @@ -64,152 +75,758 @@ void fft_fx( ); void fft( - float *re, /* i/o: real part */ - float *im, /* i/o: imag part */ + float *re, /* i/o: real part */ + float *im, /* i/o: imag part */ const int16_t length, /* i : length of fft */ const int16_t s /* i : sign */ ); -#define print_output(fRe, fIm, iRe, iIm) \ - printf("Real = %.2f %d Imag = %.2f %d\n", fRe, iRe, fIm, iIm) +void fft_rel( + float x[], /* i/o: input/output vector */ + const int16_t n, /* i : vector length */ + const int16_t m /* i : log2 of vector length */ +); + +void ifft_rel( + float x[], /* i/o: input/output vector */ + const int16_t n, /* i : vector length */ + const int16_t m /* i : log2 of vector length */ +); + +void fft_rel_fx( + Word16 x[], /* i/o: input/output vector */ + const Word16 n, /* i : vector length */ + const Word16 m /* i : log2 of vector length */ +); + +void fft3_fx_ivas( + const Word32 X[], + Word32 Y[], + const Word16 n ); + +void fft3( + const float X[], + float Y[], + const int16_t n ); + +void ifft3_fx_ivas( + const Word32 X[], + Word32 Y[], + const Word16 n ); + +void ifft3( + const float X[], /* i : input frame */ + float Y[], /* o : iDFT of input frame */ + const int16_t n /* i : block length (must be radix 3) */ +); + +extern const Word16 fftSineTab640_fx[321]; + +#if 0 +void fft_rel_fx32( + Word32 x[], /* i/o: input/output vector */ + const Word16 n, /* i : vector length */ + const Word16 m /* i : log2 of vector length */ +); +#else +void fft_rel_fx32( + Word32 *x, /* i/o: input/output vector */ + const Word16 n, /* i : vector length */ + const Word16 m /* i : log2 of vector length */ +); +void ifft_rel_fx32( + Word32 x[], /* i/o: input/output vector */ + const Word16 n, /* i : vector length */ + const Word16 m /* i : log2 of vector length */ +); +#endif + +void fft_cldfb( + float *data, /* i/o: input/output vector */ + const int16_t size /* size of fft operation */ +); + +void fft_cldfb_fx( + Word32 *data, /* i/o: input/output vector */ + const int16_t size /* size of fft operation */ +); + +int16_t RFFTN( + float *afftData, + const float *trigPtr, + const int16_t len, + const int16_t isign ); + +int16_t RFFTN_fx( + Word32 *data, + const Word16 *sine_table, + const Word16 len, + const Word16 sign ); + +void BASOP_cfft( + float *re, /* i/o: real part */ + float *im, /* i/o: imag part */ + Word16 s, /* i : stride real and imag part */ + Word16 *scale /* i : scalefactor */ +); + +void BASOP_cfft_fx( + Word32 *re, /* i/o: real part */ + Word32 *im, /* i/o: imag part */ + Word16 s, /* i : stride real and imag part */ + Word16 *scale /* i : scalefactor */ +); + +#define print_output( fRe, fIm, iRe, iIm ) \ + printf( "Real = %.2f %d Imag = %.2f %d\n", fRe, iRe, fIm, iIm ) + +#define TYPE_DoFFT ( 0 ) +#define TYPE_fft ( 1 ) +#define TYPE_fft_rel ( 2 ) +#define TYPE_fft_rel32 ( 3 ) +#define TYPE_ifft_rel32 ( 4 ) +#define TYPE_fft_cldfb ( 5 ) +#define TYPE_fft_RFFTN ( 6 ) +#define TYPE_fft_fft3 ( 7 ) +#define TYPE_fft_ifft3 ( 8 ) +#define TYPE_BASOP_cfft ( 9 ) -float test_fixed_fft(Word16 N, Word32 isDoFFT) +Word16 find_guarded_bits_fx( Word32 n ) { - Word32 *iRe = malloc(N * sizeof(iRe[0])); - Word32 *iIm = malloc(N * sizeof(iRe[0])); - float *fRe = malloc(N * sizeof(fRe[0])); - float *fIm = malloc(N * sizeof(fRe[0])); - float max_deviation = 0.0f; - Word32 max_val_in = 0; - Word32 max_val_out = 0; - - if ((iRe == NULL) || (iIm == NULL) || (fRe == NULL) || (fIm == NULL)) - { - printf("!!!!Malloc failed!!!!"); - exit(1); - } - - do { - srand(N); - for(int i = 0; i < N; i++) + return n <= 1 ? 0 : n <= 2 ? 1 + : n <= 4 ? 2 + : n <= 8 ? 3 + : n <= 16 ? 4 + : n <= 32 ? 5 + : n <= 64 ? 6 + : n <= 128 ? 7 + : n <= 256 ? 8 + : n <= 512 ? 9 + : n <= 1024 ? 10 + : 11; +} + +Word16 L_norm_arr( Word32 *arr, int size ) +{ + Word16 q = 31; + for ( int i = 0; i < size; i++ ) + if ( arr[i] != 0 ) + { + q = min( q, norm_l( arr[i] ) ); + } + return q; +} + +Word16 norm_arr( Word16 *arr, int size ) +{ + Word16 q = 15; + for ( int i = 0; i < size; i++ ) + if ( arr[i] != 0 ) + { + q = min( q, norm_s( arr[i] ) ); + } + return q; +} + + +void populate_input_interleave( Word32 *in32, float *fIn, Word16 N ) +{ + Word32 max_val_in = 0; + Word16 r_shift, l_shift; + do + { + srand( N ); + for ( int i = 0; i < N; i++ ) + { + in32[2 * i + 0] = rand(); + in32[2 * i + 1] = rand(); + if ( max_val_in < abs( in32[2 * i + 0] ) ) + max_val_in = abs( in32[2 * i + 0] ); + if ( max_val_in < abs( in32[2 * i + 1] ) ) + max_val_in = abs( in32[2 * i + 1] ); + fIn[2 * i + 0] = (float) in32[2 * i + 0]; + fIn[2 * i + 1] = (float) in32[2 * i + 1]; + } + } while ( max_val_in == 0 ); + r_shift = find_guarded_bits_fx( 2 * N ); + l_shift = L_norm_arr( in32, 2 * N ); + if ( l_shift > r_shift ) + { + Word16 shift_val = l_shift - r_shift; + for ( int i = 0; i < N; i++ ) + { + in32[2 * i + 0] = in32[2 * i + 0] << shift_val; + fIn[2 * i + 0] = (float) in32[2 * i + 0]; + in32[2 * i + 1] = in32[2 * i + 1] << shift_val; + fIn[2 * i + 1] = (float) in32[2 * i + 1]; + } + } + if ( l_shift < r_shift ) { - iRe[i] = rand(); - iIm[i] = rand(); - if (max_val_in < abs(iRe[i])) - max_val_in = abs(iRe[i]); - if (max_val_in < abs(iIm[i])) - max_val_in = abs(iIm[i]); - fRe[i] = (float)iRe[i]; - fIm[i] = (float)iIm[i]; + Word16 shift_val = r_shift - l_shift; + for ( int i = 0; i < N; i++ ) + { + in32[2 * i + 0] = in32[2 * i + 0] >> shift_val; + fIn[2 * i + 0] = (float) in32[2 * i + 0]; + in32[2 * i + 1] = in32[2 * i + 1] >> shift_val; + fIn[2 * i + 1] = (float) in32[2 * i + 1]; + } } - } while(max_val_in == 0); - - if (isDoFFT) - { - DoFFT(fRe, fIm, N); - DoFFT_fx(iRe, iIm, N); - } - else - { - fft(fRe, fIm, N, 1); - fft_fx(iRe, iIm, N, 1); - } - - for(int i = 0; i < N; i++) - { - float OutRe = fRe[i]; - float OutIm = fIm[i]; - float dev_val_real = (float)(fabs(OutRe - (float)iRe[i])); - float dev_val_imag = (float)(fabs(OutIm - (float)iIm[i])); - if (max_val_out < iRe[i]) - max_val_out = iRe[i]; - if (max_val_out < iIm[i]) - max_val_out = iIm[i]; - //print_output(OutRe, OutIm, iRe[i], iIm[i]); - if (dev_val_real > max_deviation) - max_deviation = dev_val_real; - if (dev_val_imag > max_deviation) - max_deviation = dev_val_imag; - } - free( iRe ); - free( iIm ); - free( fRe ); - free( fIm ); - return (( max_deviation / max_val_out) * 100); } -void test_DoFFT(Word16 *fft_lengths, Word16 num_lengths) +void populate_input_interleave_16( Word16 *in16, float *fIn, Word16 N ) { - Word32 num_tests = 0, num_passed = 0, num_failed = 0; - printf("\033[0;33m"); - printf("============== Starting DoFFT tests ================\n"); - for ( int i = num_lengths - 1; i >=0 ; i-- ) - { - float max_deviation = test_fixed_fft( fft_lengths[i], 1); - num_tests++; - if (max_deviation < ALLOWED_DEVIATION) + Word16 max_val_in = 0; + Word16 r_shift, l_shift; + do + { + srand( N ); + for ( int i = 0; i < N; i++ ) + { + in16[2 * i + 0] = (Word16) rand(); + in16[2 * i + 1] = (Word16) rand(); + if ( max_val_in < abs( in16[2 * i + 0] ) ) + max_val_in = (Word16) abs( in16[2 * i + 0] ); + if ( max_val_in < abs( in16[2 * i + 1] ) ) + max_val_in = (Word16) abs( in16[2 * i + 1] ); + fIn[2 * i + 0] = (float) in16[2 * i + 0]; + fIn[2 * i + 1] = (float) in16[2 * i + 1]; + } + } while ( max_val_in == 0 ); + + r_shift = find_guarded_bits_fx( 2 * N ); + l_shift = norm_arr( in16, 2 * N ); + if ( l_shift > r_shift ) { - printf("\033[0;32m"); - printf("[PASSED] FFT test of length %d\n", fft_lengths[i]); - num_passed++; + Word16 shift_val = l_shift - r_shift; + for ( int i = 0; i < N; i++ ) + { + in16[2 * i + 0] = in16[2 * i + 0] << shift_val; + fIn[2 * i + 0] = (float) in16[2 * i + 0]; + in16[2 * i + 1] = in16[2 * i + 1] << shift_val; + fIn[2 * i + 1] = (float) in16[2 * i + 1]; + } } - else + if ( l_shift < r_shift ) { - printf("\033[0;31m"); - printf("[FAILED] FFT test of length %d Max_deviation = %.6f\n", fft_lengths[i], max_deviation); - num_failed++; + Word16 shift_val = r_shift - l_shift; + for ( int i = 0; i < N; i++ ) + { + in16[2 * i + 0] = in16[2 * i + 0] >> shift_val; + fIn[2 * i + 0] = (float) in16[2 * i + 0]; + in16[2 * i + 1] = in16[2 * i + 1] >> shift_val; + fIn[2 * i + 1] = (float) in16[2 * i + 1]; + } } - } - printf("\033[0;33m"); - printf("============== Completed DoFFT tests ================\n\n"); - printf("Summary of FFT unit tests:\n"); - printf("--------------------------\n"); - printf("Total tests: %d\n", num_tests); - printf("Passed: %d\n", num_passed); - printf("\033[0;31m"); - printf("Failed: %d\n\n", num_failed); - printf("\033[0m"); } +static void populate_input_deinterleave( Word32 *iRe, Word32 *iIm, float *fRe, float *fIm, Word16 N ) +{ + Word32 max_val_in = 0; + Word16 r_shift, l_shift; + do + { + srand( N ); + for ( int i = 0; i < N; i++ ) + { + iRe[i] = rand(); + iIm[i] = rand(); + if ( max_val_in < abs( iRe[i] ) ) + max_val_in = abs( iRe[i] ); + if ( max_val_in < abs( iIm[i] ) ) + max_val_in = abs( iIm[i] ); + fRe[i] = (float) iRe[i]; + fIm[i] = (float) iIm[i]; + } + } while ( max_val_in == 0 ); + r_shift = find_guarded_bits_fx( 2 * N ); + l_shift = L_norm_arr( iRe, N ); + l_shift = min( L_norm_arr( iIm, N ), l_shift ); + if ( l_shift > r_shift ) + { + Word16 shift_val = l_shift - r_shift; + for ( int i = 0; i < N; i++ ) + { + iRe[i] = iRe[i] << shift_val; + fRe[i] = (float) iRe[i]; + iIm[i] = iIm[i] << shift_val; + fIm[i] = (float) iIm[i]; + } + } + if ( l_shift < r_shift ) + { + Word16 shift_val = r_shift - l_shift; + for ( int i = 0; i < N; i++ ) + { + iRe[i] = iRe[i] >> shift_val; + fRe[i] = (float) iRe[i]; + iIm[i] = iIm[i] >> shift_val; + fIm[i] = (float) iIm[i]; + } + } +} -void test_fft(Word16 *fft_lengths, Word16 num_lengths) +static float test_fixed_fft( Word16 N, Word32 test_type ) { - Word32 num_tests = 0, num_passed = 0, num_failed = 0; - printf("\033[0;33m"); - printf("============== Starting fft tests ================\n"); - for ( int i = num_lengths - 1; i >=0 ; i-- ) - { - float max_deviation = test_fixed_fft( fft_lengths[i], 0); - num_tests++; - if (max_deviation < ALLOWED_DEVIATION) + float max_deviation = 0.0f; + Word32 max_val_out = 0; + + switch ( test_type ) { - printf("\033[0;32m"); - printf("[PASSED] FFT test of length %d\n", fft_lengths[i]); - num_passed++; + case TYPE_DoFFT: + { + Word32 *iRe = calloc( N, sizeof( iRe[0] ) ); + Word32 *iIm = calloc( N, sizeof( iIm[0] ) ); + float *fRe = calloc( N, sizeof( fRe[0] ) ); + float *fIm = calloc( N, sizeof( fIm[0] ) ); + if ( ( iRe == NULL ) || ( iIm == NULL ) || ( fRe == NULL ) || ( fIm == NULL ) ) + { + printf( "!!!!Malloc failed!!!!" ); + exit( 1 ); + } + else + { + populate_input_deinterleave( iRe, iIm, fRe, fIm, N ); + } + DoFFT( fRe, fIm, N ); + DoFFT_fx( iRe, iIm, N ); + + for ( int i = 0; i < N; i++ ) + { + float OutRe = fRe[i]; + float OutIm = fIm[i]; + float dev_val_real = (float) ( fabs( OutRe - (float) iRe[i] ) ); + float dev_val_imag = (float) ( fabs( OutIm - (float) iIm[i] ) ); + if ( max_val_out < iRe[i] ) + max_val_out = iRe[i]; + if ( max_val_out < iIm[i] ) + max_val_out = iIm[i]; + if ( dev_val_real > max_deviation ) + max_deviation = dev_val_real; + if ( dev_val_imag > max_deviation ) + max_deviation = dev_val_imag; + } + free( iRe ); + free( iIm ); + free( fRe ); + free( fIm ); + break; + } + case TYPE_fft: + { + Word32 *iRe = calloc( N, sizeof( iRe[0] ) ); + Word32 *iIm = calloc( N, sizeof( iIm[0] ) ); + float *fRe = calloc( N, sizeof( fRe[0] ) ); + float *fIm = calloc( N, sizeof( fIm[0] ) ); + if ( ( iRe == NULL ) || ( iIm == NULL ) || ( fRe == NULL ) || ( fIm == NULL ) ) + { + printf( "!!!!Malloc failed!!!!" ); + exit( 1 ); + } + else + { + populate_input_deinterleave( iRe, iIm, fRe, fIm, N ); + } + fft( fRe, fIm, N, 1 ); + fft_fx( iRe, iIm, N, 1 ); + for ( int i = 0; i < N; i++ ) + { + float OutRe = fRe[i]; + float OutIm = fIm[i]; + float dev_val_real = (float) ( fabs( OutRe - (float) iRe[i] ) ); + float dev_val_imag = (float) ( fabs( OutIm - (float) iIm[i] ) ); + if ( max_val_out < iRe[i] ) + max_val_out = iRe[i]; + if ( max_val_out < iIm[i] ) + max_val_out = iIm[i]; + if ( dev_val_real > max_deviation ) + max_deviation = dev_val_real; + if ( dev_val_imag > max_deviation ) + max_deviation = dev_val_imag; + } + free( iRe ); + free( iIm ); + free( fRe ); + free( fIm ); + break; + } + case TYPE_fft_rel: + { + Word16 *in16 = calloc( 2 * N, sizeof( in16[0] ) ); + float *fIn = calloc( 2 * N, sizeof( fIn[0] ) ); + Word16 guard_bits = 9; + if ( N == 256 ) + { + guard_bits = 8; + } + else if ( N == 128 ) + { + guard_bits = 7; + } + else if ( N != 512 ) + { + assert( !"Unsupported length for fft_rel!" ); + } + if ( ( in16 == NULL ) || ( fIn == NULL ) ) + { + printf( "!!!!Malloc failed!!!!" ); + exit( 1 ); + } + else + { + populate_input_interleave_16( in16, fIn, N ); + } + fft_rel( fIn, N, guard_bits ); + fft_rel_fx( in16, N, guard_bits ); + for ( int i = 0; i < N; i++ ) + { + float OutRe = fIn[2 * i + 0]; + float OutIm = fIn[2 * i + 1]; + float dev_val_real = (float) ( fabs( OutRe - (float) in16[2 * i + 0] ) ); + float dev_val_imag = (float) ( fabs( OutIm - (float) in16[2 * i + 1] ) ); + if ( max_val_out < abs( in16[2 * i + 0] ) ) + max_val_out = abs( in16[2 * i + 0] ); + if ( max_val_out < abs( in16[2 * i + 1] ) ) + max_val_out = abs( in16[2 * i + 1] ); + if ( dev_val_real > max_deviation ) + max_deviation = dev_val_real; + if ( dev_val_imag > max_deviation ) + max_deviation = dev_val_imag; + } + free( in16 ); + free( fIn ); + break; + } + case TYPE_fft_rel32: + { + Word32 *in32 = calloc( 2 * N, sizeof( in32[0] ) ); + float *fIn = calloc( 2 * N, sizeof( fIn[0] ) ); + Word16 guard_bits = 9; + if ( N == 256 ) + { + guard_bits = 8; + } + else if ( N == 128 ) + { + guard_bits = 7; + } + else if ( N != 512 ) + { + assert( !"Unsupported length for fft_rel!" ); + } + if ( ( in32 == NULL ) || ( fIn == NULL ) ) + { + printf( "!!!!Malloc failed!!!!" ); + exit( 1 ); + } + else + { + populate_input_interleave( in32, fIn, N ); + } + fft_rel( fIn, N, guard_bits ); + fft_rel_fx32( in32, N, guard_bits ); + for ( int i = 0; i < N; i++ ) + { + float OutRe = fIn[2 * i + 0]; + float OutIm = fIn[2 * i + 1]; + float dev_val_real = (float) ( fabs( OutRe - (float) in32[2 * i + 0] ) ); + float dev_val_imag = (float) ( fabs( OutIm - (float) in32[2 * i + 1] ) ); + if ( max_val_out < abs( in32[2 * i + 0] ) ) + max_val_out = abs( in32[2 * i + 0] ); + if ( max_val_out < abs( in32[2 * i + 1] ) ) + max_val_out = abs( in32[2 * i + 1] ); + if ( dev_val_real > max_deviation ) + { + max_deviation = dev_val_real; + } + if ( dev_val_imag > max_deviation ) + { + max_deviation = dev_val_imag; + } + } + free( in32 ); + free( fIn ); + break; + } + case TYPE_ifft_rel32: + { + Word32 *in32 = calloc( 2 * N, sizeof( in32[0] ) ); + float *fIn = calloc( 2 * N, sizeof( fIn[0] ) ); + Word16 guard_bits = 9; + if ( N == 256 ) + { + guard_bits = 8; + } + else if ( N == 128 ) + { + guard_bits = 7; + } + else if ( N != 512 ) + { + assert( !"Unsupported length for fft_rel!" ); + } + if ( ( in32 == NULL ) || ( fIn == NULL ) ) + { + printf( "!!!!Malloc failed!!!!" ); + exit( 1 ); + } + else + { + populate_input_interleave( in32, fIn, N ); + } + ifft_rel( fIn, N, guard_bits ); + ifft_rel_fx32( in32, N, guard_bits ); + for ( int i = 0; i < N; i++ ) + { + float OutRe = fIn[2 * i + 0]; + float OutIm = fIn[2 * i + 1]; + float dev_val_real = (float) ( fabs( OutRe - (float) in32[2 * i + 0] ) ); + float dev_val_imag = (float) ( fabs( OutIm - (float) in32[2 * i + 1] ) ); + if ( max_val_out < abs( in32[2 * i + 0] ) ) + max_val_out = abs( in32[2 * i + 0] ); + if ( max_val_out < abs( in32[2 * i + 1] ) ) + max_val_out = abs( in32[2 * i + 1] ); + if ( dev_val_real > max_deviation ) + { + max_deviation = dev_val_real; + } + if ( dev_val_imag > max_deviation ) + { + max_deviation = dev_val_imag; + } + } + free( in32 ); + free( fIn ); + break; + } + case TYPE_fft_fft3: + { + Word32 *in32 = calloc( 2 * N, sizeof( in32[0] ) ); + Word32 *out32 = calloc( 2 * N, sizeof( out32[0] ) ); + float *fIn = calloc( 2 * N, sizeof( fIn[0] ) ); + float *fOut = calloc( 2 * N, sizeof( fOut[0] ) ); + if ( ( in32 == NULL ) || ( fIn == NULL ) || ( out32 == NULL ) || ( fOut == NULL ) ) + { + printf( "!!!!Malloc failed!!!!" ); + exit( 1 ); + } + else + { + populate_input_interleave( in32, fIn, N ); + } + fft3( fIn, fOut, N ); + fft3_fx_ivas( in32, out32, N ); + for ( int i = 0; i < N; i++ ) + { + float OutRe = fOut[2 * i + 0]; + float OutIm = fOut[2 * i + 1]; + float dev_val_real = (float) ( fabs( OutRe - (float) out32[2 * i + 0] ) ); + float dev_val_imag = (float) ( fabs( OutIm - (float) out32[2 * i + 1] ) ); + if ( max_val_out < abs( out32[2 * i + 0] ) ) + max_val_out = abs( out32[2 * i + 0] ); + if ( max_val_out < abs( out32[2 * i + 1] ) ) + max_val_out = abs( out32[2 * i + 1] ); + if ( dev_val_real > max_deviation ) + { + max_deviation = dev_val_real; + } + if ( dev_val_imag > max_deviation ) + { + max_deviation = dev_val_imag; + } + } + free( in32 ); + free( fIn ); + free( out32 ); + free( fOut ); + break; + } + case TYPE_fft_ifft3: + { + Word32 *in32 = calloc( 2 * N, sizeof( in32[0] ) ); + Word32 *out32 = calloc( 2 * N, sizeof( out32[0] ) ); + float *fIn = calloc( 2 * N, sizeof( fIn[0] ) ); + float *fOut = calloc( 2 * N, sizeof( fOut[0] ) ); + if ( ( in32 == NULL ) || ( fIn == NULL ) || ( out32 == NULL ) || ( fOut == NULL ) ) + { + printf( "!!!!Malloc failed!!!!" ); + exit( 1 ); + } + else + { + populate_input_interleave( in32, fIn, N ); + } + ifft3( fIn, fOut, N ); + ifft3_fx_ivas( in32, out32, N ); + for ( int i = 0; i < N; i++ ) + { + float OutRe = fOut[2 * i + 0]; + float OutIm = fOut[2 * i + 1]; + float dev_val_real = (float) ( fabs( OutRe - (float) out32[2 * i + 0] ) ); + float dev_val_imag = (float) ( fabs( OutIm - (float) out32[2 * i + 1] ) ); + if ( max_val_out < abs( out32[2 * i + 0] ) ) + max_val_out = abs( out32[2 * i + 0] ); + if ( max_val_out < abs( out32[2 * i + 1] ) ) + max_val_out = abs( out32[2 * i + 1] ); + if ( dev_val_real > max_deviation ) + { + max_deviation = dev_val_real; + } + if ( dev_val_imag > max_deviation ) + { + max_deviation = dev_val_imag; + } + } + free( in32 ); + free( fIn ); + free( out32 ); + free( fOut ); + break; + } + case TYPE_fft_cldfb: + { + Word32 *in32 = calloc( 2 * N, sizeof( in32[0] ) ); + float *fIn = calloc( 2 * N, sizeof( fIn[0] ) ); + if ( ( in32 == NULL ) || ( fIn == NULL ) ) + { + printf( "!!!!Malloc failed!!!!" ); + exit( 1 ); + } + else + { + populate_input_interleave( in32, fIn, N ); + } + fft_cldfb( fIn, N ); + fft_cldfb_fx( in32, N ); + for ( int i = 0; i < N; i++ ) + { + float OutRe = fIn[2 * i + 0]; + float OutIm = fIn[2 * i + 1]; + float dev_val_real = (float) ( fabs( OutRe - (float) in32[2 * i + 0] ) ); + float dev_val_imag = (float) ( fabs( OutIm - (float) in32[2 * i + 1] ) ); + if ( max_val_out < abs( in32[2 * i + 0] ) ) + max_val_out = abs( in32[2 * i + 0] ); + if ( max_val_out < abs( in32[2 * i + 1] ) ) + max_val_out = abs( in32[2 * i + 1] ); + if ( dev_val_real > max_deviation ) + max_deviation = dev_val_real; + if ( dev_val_imag > max_deviation ) + max_deviation = dev_val_imag; + } + free( fIn ); + free( in32 ); + break; + } + case TYPE_fft_RFFTN: + { + Word32 *in32 = calloc( 2 * N, sizeof( in32[0] ) ); + float *fIn = calloc( 2 * N, sizeof( fIn[0] ) ); + if ( ( in32 == NULL ) || ( fIn == NULL ) ) + { + printf( "!!!!Malloc failed!!!!" ); + exit( 1 ); + } + else + { + populate_input_interleave( in32, fIn, N ); + } + RFFTN( fIn, fftSineTab640, N, 1 ); + RFFTN_fx( in32, fftSineTab640_fx, N, 1 ); + for ( int i = 0; i < N; i++ ) + { + float OutRe = fIn[2 * i + 0]; + float OutIm = fIn[2 * i + 1]; + float dev_val_real = (float) ( fabs( OutRe - (float) in32[2 * i + 0] ) ); + float dev_val_imag = (float) ( fabs( OutIm - (float) in32[2 * i + 1] ) ); + if ( max_val_out < abs( in32[2 * i + 0] ) ) + max_val_out = abs( in32[2 * i + 0] ); + if ( max_val_out < abs( in32[2 * i + 1] ) ) + max_val_out = abs( in32[2 * i + 1] ); + if ( dev_val_real > max_deviation ) + { + max_deviation = dev_val_real; + } + if ( dev_val_imag > max_deviation ) + { + max_deviation = dev_val_imag; + } + } + free( in32 ); + free( fIn ); + break; + } } - else + + + return ( ( max_deviation / max_val_out ) * 100 ); +} + +static void test_main( Word16 *fft_lengths, Word16 num_lengths, Word16 test_type ) +{ + char *test_name[] = { "DoFFT", "fft", "fft_rel", "fft_rel32", "ifft_rel32", "fft_cldfb", "fft_RFFTN", "fft_fft3", "fft_ifft3" }; + Word32 num_tests = 0, num_passed = 0, num_failed = 0; + printf( "\033[0;33m" ); + printf( "============== Starting %s tests ================\n", test_name[test_type] ); + for ( int i = num_lengths - 1; i >= 0; i-- ) { - printf("\033[0;31m"); - printf("[FAILED] FFT test of length %d Max_deviation = %.6f\n", fft_lengths[i], max_deviation); - num_failed++; + float max_deviation = test_fixed_fft( fft_lengths[i], test_type ); + num_tests++; + if ( max_deviation < ALLOWED_DEVIATION ) + { + printf( "\033[0;32m" ); + printf( "[PASSED] %s test of length %d\n", test_name[test_type], fft_lengths[i] ); + num_passed++; + } + else + { + printf( "\033[0;31m" ); + printf( "[FAILED] %s test of length %d Max_deviation = %.6f\n", test_name[test_type], fft_lengths[i], max_deviation ); + num_failed++; + } } - } - printf("\033[0;33m"); - printf("============== Completed fft tests ================\n\n"); - printf("Summary of FFT unit tests:\n"); - printf("--------------------------\n"); - printf("Total tests: %d\n", num_tests); - printf("Passed: %d\n", num_passed); - printf("\033[0;31m"); - printf("Failed: %d\n\n", num_failed); - printf("\033[0m"); + printf( "\033[0;33m" ); + printf( "============== Completed %s tests ================\n\n", test_name[test_type] ); + printf( "Summary of %s unit tests:\n", test_name[test_type] ); + printf( "--------------------------\n" ); + printf( "Total tests: %d\n", num_tests ); + printf( "Passed: %d\n", num_passed ); + printf( "\033[0;31m" ); + printf( "Failed: %d\n\n", num_failed ); + printf( "\033[0m" ); } -void run_fft_unit_test(void) -{ - Word16 fft_lengths[] = {600, 480, 400, 320, 256, 240, 200, 160, 128, 120, 100, 80, 64, 40, 20,}; - Word16 fft_lengths_2[] = {960, 640, 600, 480, 400, 320, 256, 240, 200, 160, 128, 120, 100, 80, 64, 40, 20}; - test_DoFFT(&fft_lengths[0], sizeof(fft_lengths) / sizeof(fft_lengths[0])); - test_fft(&fft_lengths_2[0], sizeof(fft_lengths_2) / sizeof(fft_lengths_2[0])); - return; -} \ No newline at end of file +void run_fft_unit_test( void ) +{ + Word16 DoFFT_lengths[] = { + 600, + 480, + 400, + 320, + 256, + 240, + 200, + 160, + 128, + 120, + 100, + 80, + 64, + 40, + 20, + }; + Word16 fft_lengths[] = { 960, 640, 600, 480, 400, 320, 256, 240, 200, 160, 128, 120, 100, 80, 64, 40, 20 }; + Word16 fft_rel_lengths[] = { 128, 256, 512 }; + Word16 fft_cldfb_lengths[] = { 5, 8, 10, 16, 20, 30 }; + Word16 fft_RFFTN_lengths[] = { 640, 512 }; + Word16 fft_fft3_lengths[] = { 1536, 384 }; + test_main( &DoFFT_lengths[0], sizeof( DoFFT_lengths ) / sizeof( DoFFT_lengths[0] ), TYPE_DoFFT ); + test_main( &fft_lengths[0], sizeof( fft_lengths ) / sizeof( fft_lengths[0] ), TYPE_fft ); + test_main( &fft_rel_lengths[0], sizeof( fft_rel_lengths ) / sizeof( fft_rel_lengths[0] ), TYPE_fft_rel ); + test_main( &fft_rel_lengths[0], sizeof( fft_rel_lengths ) / sizeof( fft_rel_lengths[0] ), TYPE_fft_rel32 ); + test_main( &fft_rel_lengths[0], sizeof( fft_rel_lengths ) / sizeof( fft_rel_lengths[0] ), TYPE_ifft_rel32 ); + test_main( &fft_cldfb_lengths[0], sizeof( fft_cldfb_lengths ) / sizeof( fft_cldfb_lengths[0] ), TYPE_fft_cldfb ); + test_main( &fft_RFFTN_lengths[0], sizeof( fft_RFFTN_lengths ) / sizeof( fft_RFFTN_lengths[0] ), TYPE_fft_RFFTN ); + test_main( &fft_fft3_lengths[0], sizeof( fft_fft3_lengths ) / sizeof( fft_fft3_lengths[0] ), TYPE_fft_fft3 ); + test_main( &fft_fft3_lengths[0], sizeof( fft_fft3_lengths ) / sizeof( fft_fft3_lengths[0] ), TYPE_fft_ifft3 ); + return; +} diff --git a/lib_util/test_mdct.c b/lib_util/test_mdct.c new file mode 100644 index 000000000..13ada376a --- /dev/null +++ b/lib_util/test_mdct.c @@ -0,0 +1,222 @@ +/****************************************************************************************************** + + (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, + Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., + Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, + Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other + contributors to this repository. All Rights Reserved. + + This software is protected by copyright law and by international treaties. + The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, + Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., + Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, + Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other + contributors to this repository retain full ownership rights in their respective contributions in + the software. This notice grants no license of any kind, including but not limited to patent + license, nor is any license granted by implication, estoppel or otherwise. + + Contributors are required to enter into the IVAS codec Public Collaboration agreement before making + contributions. + + This software is provided "AS IS", without any express or implied warranties. The software is in the + development stage. It is intended exclusively for experts who have experience with such software and + solely for the purpose of inspection. All implied warranties of non-infringement, merchantability + and fitness for a particular purpose are hereby disclaimed and excluded. + + Any dispute, controversy or claim arising under or in relation to providing this software shall be + submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in + accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and + the United Nations Convention on Contracts on the International Sales of Goods. + +*******************************************************************************************************/ + + +#include +#include +#include +#include +#include + +typedef int Word32; +typedef short Word16; +typedef short int16_t; + +#define TYPE_MDCT (0) +#define TYPE_IMDCT (1) +#define TYPE_EDCT2 (2) +#define TYPE_NEG_EDCT2 (3) + +#define ALLOWED_DEVIATION (0.05) +#define Q31 (2147483647.0f) + +extern const int16_t ip_edct2_64[6]; +extern const float w_edct2_64[80]; +extern const Word16 w_edct2_64_fx[80]; + +void edct2( + const int16_t n, + const int16_t isgn, + float *in, + float *a, + const int16_t *ip, + const float *w ); + +void edct2_fx( + const Word16 n, + const Word16 isgn, + Word16 *in, + Word32 *a, + Word16 *q, + const Word16 *ip, + const Word16 *w); + +void ivas_mdct( + const float *pIn, + float *pOut, + const int16_t length ); + +void ivas_mdct_fx( + const Word32 *pIn, + Word32 *pOut, + const Word16 length, + Word16 *q_out); + +void ivas_imdct( + const float *pIn, + float *pOut, + const int16_t length ); + +void ivas_imdct_fx( + const Word32 *pIn, + Word32 *pOut, + const Word16 length, + Word16 *q_out); + +void populate_input_interleave(Word32 *in32, float *fIn, Word16 N); +void populate_input_interleave_16(Word16 *in16, float *fIn, Word16 N); + +static float test_ivas_mdct_imdct(Word16 N, Word32 test_type) +{ + Word32 *in = calloc(2 * N, sizeof(in[0])); + Word16 *in16 = calloc(2 * N, sizeof(in16[0])); + float *fIn = calloc(2 * N, sizeof(fIn[0])); + Word32 *out = calloc(2 * N, sizeof(out[0])); + float *fOut = calloc(2 * N, sizeof(fOut[0])); + float max_deviation = 0.0f; + Word32 max_val_out = 0; + Word16 q_out = 0; + float div_fac = 1.0f; + + if ((in == NULL) || (fIn == NULL) || (out == NULL) || (fOut == NULL)) + { + printf("!!!!Malloc failed!!!!"); + exit(1); + } + + switch(test_type) + { + case (TYPE_MDCT): + { + populate_input_interleave(in, fIn, N); + ivas_mdct(fIn, fOut, N); + ivas_mdct_fx(in, out, N, &q_out); + break; + } + case (TYPE_IMDCT): + { + populate_input_interleave(in, fIn, N); + ivas_imdct(fIn, fOut, N); + ivas_imdct_fx(in, out, N, &q_out); + break; + } + case (TYPE_EDCT2): + { + populate_input_interleave_16(in16, fIn, N); + edct2(64, 1, fIn, fOut, ip_edct2_64, w_edct2_64); + edct2_fx(64, 1, in16, out, &q_out, ip_edct2_64, w_edct2_64_fx); + break; + } + case (TYPE_NEG_EDCT2): + { + populate_input_interleave_16(in16, fIn, N); + edct2(64, -1, fIn, fOut, ip_edct2_64, w_edct2_64); + edct2_fx(64, -1, in16, out, &q_out, ip_edct2_64, w_edct2_64_fx); + break; + } + } + if (q_out >= 0) + { + div_fac = (float)1.0f / (float)(1 << q_out); + } + else + { + div_fac = (float)(1 << (-q_out)); + } + + for(int i = 0; i < (N >> 1); i++) + { + float OutRe = fOut[2 * i + 0]; + float OutIm = fOut[2 * i + 1]; + float dev_val_real = (float)(fabs((OutRe / div_fac) - (float)(out[2 * i + 0]))); + float dev_val_imag = (float)(fabs((OutIm / div_fac) - (float)(out[2 * i + 1]))); + if (max_val_out < abs(out[2 * i + 0])) + max_val_out = abs(out[2 * i + 0]); + if (max_val_out < abs(out[2 * i + 1])) + max_val_out = abs(out[2 * i + 1]); + if (dev_val_real > max_deviation) + max_deviation = dev_val_real; + if (dev_val_imag > max_deviation) + max_deviation = dev_val_imag; + } + free( in ); + free( fIn ); + free( out ); + free( fOut ); + return (( max_deviation / max_val_out) * 100); +} + +static void test_main(Word16 *imdct_lengths, Word16 num_lengths, Word16 test_type) +{ + char *test_name[]= {"IVAS MDCT", "IVAS IMDCT", "IVAS EDCT2", "IVAS NEG_EDCT2"}; + Word32 num_tests = 0, num_passed = 0, num_failed = 0; + printf("\033[0;33m"); + printf("============== Starting %s tests ================\n", test_name[test_type]); + for ( int i = num_lengths - 1; i >=0 ; i-- ) + { + float max_deviation = test_ivas_mdct_imdct( imdct_lengths[i], test_type); + num_tests++; + if (max_deviation < ALLOWED_DEVIATION) + { + printf("\033[0;32m"); + printf("[PASSED] %s test of length %d\n", test_name[test_type], imdct_lengths[i]); + num_passed++; + } + else + { + printf("\033[0;31m"); + printf("[FAILED] %s test of length %d Max_deviation = %.6f\n", test_name[test_type], imdct_lengths[i], max_deviation); + num_failed++; + } + } + printf("\033[0;33m"); + printf("============== Completed %s tests ================\n\n", test_name[test_type]); + printf("Summary of %s unit tests:\n", test_name[test_type]); + printf("--------------------------\n"); + printf("Total tests: %d\n", num_tests); + printf("Passed: %d\n", num_passed); + printf("\033[0;31m"); + printf("Failed: %d\n\n", num_failed); + printf("\033[0m"); +} + +void run_mdct_unit_test(void) +{ + Word16 ivas_mdct_lengths[] = {480, 320, 160,}; + Word16 ivas_imdct_lengths[] = {480, 320, 160, 80,}; + Word16 edct2_lengths[] = {64}; + test_main(&ivas_mdct_lengths[0], sizeof(ivas_mdct_lengths) / sizeof(ivas_mdct_lengths[0]), TYPE_MDCT); + test_main(&ivas_imdct_lengths[0], sizeof(ivas_imdct_lengths) / sizeof(ivas_imdct_lengths[0]), TYPE_IMDCT); + test_main(&edct2_lengths[0], 1, TYPE_EDCT2); + test_main(&edct2_lengths[0], 1, TYPE_NEG_EDCT2); + return; +} \ No newline at end of file -- GitLab