From 5ee4b22cc49976404d6af9587bf35b5c318064e8 Mon Sep 17 00:00:00 2001 From: Benjamin McDonald Date: Wed, 17 Aug 2022 10:48:35 +1000 Subject: [PATCH] ROM optimisation changes for mdft tables --- lib_com/ivas_mdft_imdft.c | 59 +++++++++++++++++++++++++++++++++++++-- lib_com/ivas_rom_com.c | 28 +++++++++---------- lib_com/ivas_rom_com.h | 16 +++++++++-- lib_com/options.h | 2 +- 4 files changed, 85 insertions(+), 20 deletions(-) diff --git a/lib_com/ivas_mdft_imdft.c b/lib_com/ivas_mdft_imdft.c index ddabf624f4..e534ecb545 100644 --- a/lib_com/ivas_mdft_imdft.c +++ b/lib_com/ivas_mdft_imdft.c @@ -50,38 +50,72 @@ static void ivas_get_mdft_twid_factors( const int16_t length, +#ifndef MDFT_ROM_OPTIMIZE const float **ppTwid_re, - const float **ppTwid_im ) + const float **ppTwid_im +#else + const float **ppTwid +#endif +) { switch ( length ) { case L_FRAME48k: +#ifndef MDFT_ROM_OPTIMIZE *ppTwid_re = &ivas_mdft_coeff_cos_twid_960[0]; *ppTwid_im = &ivas_mdft_coeff_sin_twid_960[0]; +#else + *ppTwid = &ivas_mdft_coeff_cos_twid_960[0]; +#endif break; case L_FRAME32k: +#ifndef MDFT_ROM_OPTIMIZE *ppTwid_re = &ivas_mdft_coeff_cos_twid_640[0]; *ppTwid_im = &ivas_mdft_coeff_sin_twid_640[0]; +#else + *ppTwid = &ivas_mdft_coeff_cos_twid_640[0]; +#endif + break; case L_FRAME16k: +#ifndef MDFT_ROM_OPTIMIZE *ppTwid_re = &ivas_mdft_coeff_cos_twid_320[0]; *ppTwid_im = &ivas_mdft_coeff_sin_twid_320[0]; +#else + *ppTwid = &ivas_mdft_coeff_cos_twid_320[0]; +#endif break; case IVAS_240_PT_LEN: +#ifndef MDFT_ROM_OPTIMIZE *ppTwid_re = &ivas_mdft_coeff_cos_twid_240[0]; *ppTwid_im = &ivas_mdft_coeff_sin_twid_240[0]; +#else + *ppTwid = &ivas_mdft_coeff_cos_twid_240[0]; +#endif break; case IVAS_160_PT_LEN: +#ifndef MDFT_ROM_OPTIMIZE *ppTwid_re = &ivas_mdft_coeff_cos_twid_160[0]; *ppTwid_im = &ivas_mdft_coeff_sin_twid_160[0]; +#else + *ppTwid = &ivas_mdft_coeff_cos_twid_160[0]; +#endif break; case IVAS_80_PT_LEN: +#ifndef MDFT_ROM_OPTIMIZE *ppTwid_re = &ivas_mdft_coeff_cos_twid_80[0]; *ppTwid_im = &ivas_mdft_coeff_sin_twid_80[0]; +#else + *ppTwid = &ivas_mdft_coeff_cos_twid_80[0]; +#endif break; case IVAS_40_PT_LEN: +#ifndef MDFT_ROM_OPTIMIZE *ppTwid_re = &ivas_mdft_coeff_cos_twid_40[0]; *ppTwid_im = &ivas_mdft_coeff_sin_twid_40[0]; +#else + *ppTwid = &ivas_mdft_coeff_cos_twid_40[0]; +#endif break; default: @@ -222,26 +256,45 @@ void ivas_mdft( float re[L_FRAME48k]; float im[L_FRAME48k]; int16_t j, len_by_2; +#ifndef MDFT_ROM_OPTIMIZE const float *pTwid_re, *pTwid_im; - +#else + const float *pTwid; +#endif len_by_2 = mdft_length >> 1; - ivas_get_mdft_twid_factors( mdft_length, &pTwid_re, &pTwid_im ); + ivas_get_mdft_twid_factors( mdft_length, +#ifndef MDFT_ROM_OPTIMIZE + &pTwid_re, &pTwid_im +#else + &pTwid +#endif + ); if ( mdft_length == input_length ) { for ( j = 0; j < mdft_length; j++ ) { +#ifndef MDFT_ROM_OPTIMIZE re[j] = pIn[j] * pTwid_re[j]; im[j] = -pIn[j] * pTwid_im[j]; +#else + re[j] = pIn[j] * pTwid[j]; + im[j] = -pIn[j] * pTwid[mdft_length - j]; +#endif } } else { for ( j = 0; j < mdft_length; j++ ) { +#ifndef MDFT_ROM_OPTIMIZE re[j] = pIn[j] * pTwid_re[j] - pIn[j + mdft_length] * pTwid_im[j]; im[j] = -pIn[j] * pTwid_im[j] - pIn[j + mdft_length] * pTwid_re[j]; +#else + re[j] = pIn[j] * pTwid[j] - pIn[j + mdft_length] * pTwid[mdft_length - j]; + im[j] = -pIn[j] * pTwid[mdft_length - j] - pIn[j + mdft_length] * pTwid[j]; +#endif } } diff --git a/lib_com/ivas_rom_com.c b/lib_com/ivas_rom_com.c index 00b6861c9d..bf115d8f25 100644 --- a/lib_com/ivas_rom_com.c +++ b/lib_com/ivas_rom_com.c @@ -3367,7 +3367,7 @@ const float ivas_mdft_coeff_cos_twid_960[IVAS_960_PT_LEN + 1] = 0.00654493796735196f, 0.00490871880799808f, 0.00327248650652671f, 0.00163624544362412f, 0.00000000000000000f }; - +#ifndef MDFT_ROM_OPTIMIZE const float ivas_mdft_coeff_sin_twid_960[IVAS_960_PT_LEN] = { 0.00000000000000f, 0.00163624544362405f, 0.00327248650652663f, 0.00490871880799799f, @@ -3612,7 +3612,7 @@ const float ivas_mdft_coeff_sin_twid_960[IVAS_960_PT_LEN] = 0.999978581664129f, 0.999987952167257f, 0.999994645401697f, 0.999998661349528f }; - +#endif const float ivas_mdft_coeff_cos_twid_640[IVAS_640_PT_LEN +1] = { 1.00000000000000f, 0.999996988037278f, 0.999987952167257f, 0.999972892444367f, @@ -3777,7 +3777,7 @@ const float ivas_mdft_coeff_cos_twid_640[IVAS_640_PT_LEN +1] = 0.00981731933714973f, 0.00736304124977978f, 0.00490871880799808f, 0.00245436679646048f ,0.00000000000000000f }; - +#ifndef MDFT_ROM_OPTIMIZE const float ivas_mdft_coeff_sin_twid_640[IVAS_640_PT_LEN] = { 0.00000000000000f, 0.00245436679646029f, 0.00490871880799799f, 0.00736304124977957f, @@ -3941,7 +3941,7 @@ const float ivas_mdft_coeff_sin_twid_640[IVAS_640_PT_LEN] = 0.999807240482065f, 0.999852417382795f, 0.999891571247108f, 0.999924701839145f, 0.999951808959328f, 0.999972892444367f, 0.999987952167257f, 0.999996988037278f }; - +#endif const float ivas_mdft_coeff_cos_twid_320[IVAS_320_PT_LEN + 1] = { 1.00000000000000f, 0.999987952167257f, 0.999951808959328f, 0.999891571247108f, @@ -4026,7 +4026,7 @@ const float ivas_mdft_coeff_cos_twid_320[IVAS_320_PT_LEN + 1] = 0.0196336924606283f, 0.0147256833114584f, 0.00981731933714973f, 0.00490871880799808f ,0.0000000000000000f }; - +#ifndef MDFT_ROM_OPTIMIZE const float ivas_mdft_coeff_sin_twid_320[IVAS_320_PT_LEN] = { 0.00000000000000f, 0.00490871880799799f, 0.00981731933714962f, 0.0147256833114585f, @@ -4110,7 +4110,7 @@ const float ivas_mdft_coeff_sin_twid_320[IVAS_320_PT_LEN] = 0.999229036240723f, 0.999409713092437f, 0.999566308502021f, 0.999698818696204f, 0.999807240482065f, 0.999891571247108f, 0.999951808959328f, 0.999987952167257f }; - +#endif const float ivas_mdft_coeff_cos_twid_240[IVAS_240_PT_LEN + 1] = { 1.0000000000f, 0.9999785817f, 0.9999143276f, 0.9998072405f, 0.9996573250f, 0.9994645875f, @@ -4155,7 +4155,7 @@ const float ivas_mdft_coeff_cos_twid_240[IVAS_240_PT_LEN + 1] = 0.0392598158f, 0.0327190828f, 0.0261769483f, 0.0196336925f, 0.0130895956f, 0.0065449380f, 0.000000000f }; - +#ifndef MDFT_ROM_OPTIMIZE const float ivas_mdft_coeff_sin_twid_240[IVAS_240_PT_LEN] = { 0.0000000000f, 0.0065449380f, 0.0130895956f, 0.0196336925f, 0.0261769483f, 0.0327190828f, @@ -4199,7 +4199,7 @@ const float ivas_mdft_coeff_sin_twid_240[IVAS_240_PT_LEN] = 0.9969173337f, 0.9974094913f, 0.9978589232f, 0.9982656102f, 0.9986295348f, 0.9989506814f, 0.9992290362f, 0.9994645875f, 0.9996573250f, 0.9998072405f, 0.9999143276f, 0.9999785817f }; - +#endif const float ivas_mdft_coeff_cos_twid_160[IVAS_160_PT_LEN + 1] = { 1.0000000000f, 0.9999518090f, 0.9998072405f, 0.9995663085f, 0.9992290362f, 0.9987954562f, @@ -4230,7 +4230,7 @@ const float ivas_mdft_coeff_cos_twid_160[IVAS_160_PT_LEN + 1] = 0.0980171403f, 0.0882423705f, 0.0784590957f, 0.0686682589f, 0.0588708037f, 0.0490676743f, 0.0392598158f, 0.0294481732f, 0.0196336925f, 0.0098173193f, 0.000000000f }; - +#ifndef MDFT_ROM_OPTIMIZE const float ivas_mdft_coeff_sin_twid_160[IVAS_160_PT_LEN] = { 0.0000000000f, 0.0098173193f, 0.0196336925f, 0.0294481732f, 0.0392598158f, 0.0490676743f, @@ -4261,7 +4261,7 @@ const float ivas_mdft_coeff_sin_twid_160[IVAS_160_PT_LEN] = 0.9951847267f, 0.9960990333f, 0.9969173337f, 0.9976395492f, 0.9982656102f, 0.9987954562f, 0.9992290362f, 0.9995663085f, 0.9998072405f, 0.9999518090f }; - +#endif const float ivas_mdft_coeff_cos_twid_80[IVAS_80_PT_LEN + 1] = { 1.0000000000f, 0.9998072405f, 0.9992290362f, 0.9982656102f, 0.9969173337f, 0.9951847267f, @@ -4279,7 +4279,7 @@ const float ivas_mdft_coeff_cos_twid_80[IVAS_80_PT_LEN + 1] = 0.1564344650f, 0.1370123417f, 0.1175373975f, 0.0980171403f, 0.0784590957f, 0.0588708037f, 0.0392598158f, 0.0196336925f, 0.000000000f }; - +#ifndef MDFT_ROM_OPTIMIZE const float ivas_mdft_coeff_sin_twid_80[IVAS_80_PT_LEN] = { 0.0000000000f, 0.0196336925f, 0.0392598158f, 0.0588708037f, 0.0784590957f, 0.0980171403f, @@ -4297,7 +4297,7 @@ const float ivas_mdft_coeff_sin_twid_80[IVAS_80_PT_LEN] = 0.9876883406f, 0.9905693404f, 0.9930684570f, 0.9951847267f, 0.9969173337f, 0.9982656102f, 0.9992290362f, 0.9998072405f }; - +#endif const float ivas_mdft_coeff_cos_twid_40[IVAS_40_PT_LEN + 1] = { 1.0000000000f, 0.9992290362f, 0.9969173337f, 0.9930684570f, 0.9876883406f, 0.9807852804f, @@ -4308,7 +4308,7 @@ const float ivas_mdft_coeff_cos_twid_40[IVAS_40_PT_LEN + 1] = 0.3826834324f, 0.3461170571f, 0.3090169944f, 0.2714404499f, 0.2334453639f, 0.1950903220f, 0.1564344650f, 0.1175373975f, 0.0784590957f, 0.0392598158f, 0.000000000f }; - +#ifndef MDFT_ROM_OPTIMIZE const float ivas_mdft_coeff_sin_twid_40[IVAS_40_PT_LEN] = { 0.0000000000f, 0.0392598158f, 0.0784590957f, 0.1175373975f, 0.1564344650f, 0.1950903220f, @@ -4319,7 +4319,7 @@ const float ivas_mdft_coeff_sin_twid_40[IVAS_40_PT_LEN] = 0.9238795325f, 0.9381913359f, 0.9510565163f, 0.9624552365f, 0.9723699204f, 0.9807852804f, 0.9876883406f, 0.9930684570f, 0.9969173337f, 0.9992290362f }; - +#endif const float ivas_sin_twiddle_480[IVAS_480_PT_LEN >> 1] = { -0.000818122995607253f, -0.00736304124977957f, -0.0139076440957708f, -0.0204516511845773f, diff --git a/lib_com/ivas_rom_com.h b/lib_com/ivas_rom_com.h index 9549d9e0e4..084c2d071b 100644 --- a/lib_com/ivas_rom_com.h +++ b/lib_com/ivas_rom_com.h @@ -363,21 +363,33 @@ extern const float ivas_cos_twiddle_80[IVAS_80_PT_LEN >> 1]; *------------------------------------------------------------------------------------------*/ extern const float ivas_mdft_coeff_cos_twid_240[IVAS_240_PT_LEN + 1]; +#ifndef MDFT_ROM_OPTIMIZE extern const float ivas_mdft_coeff_sin_twid_240[IVAS_240_PT_LEN]; +#endif extern const float ivas_mdft_coeff_cos_twid_160[IVAS_160_PT_LEN + 1]; +#ifndef MDFT_ROM_OPTIMIZE extern const float ivas_mdft_coeff_sin_twid_160[IVAS_160_PT_LEN]; +#endif extern const float ivas_mdft_coeff_cos_twid_80[IVAS_80_PT_LEN + 1]; +#ifndef MDFT_ROM_OPTIMIZE extern const float ivas_mdft_coeff_sin_twid_80[IVAS_80_PT_LEN]; +#endif extern const float ivas_mdft_coeff_cos_twid_40[IVAS_40_PT_LEN + 1]; +#ifndef MDFT_ROM_OPTIMIZE extern const float ivas_mdft_coeff_sin_twid_40[IVAS_40_PT_LEN]; - +#endif extern const float ivas_mdft_coeff_cos_twid_960[IVAS_960_PT_LEN + 1]; +#ifndef MDFT_ROM_OPTIMIZE extern const float ivas_mdft_coeff_sin_twid_960[IVAS_960_PT_LEN]; +#endif extern const float ivas_mdft_coeff_cos_twid_640[IVAS_640_PT_LEN + 1]; +#ifndef MDFT_ROM_OPTIMIZE extern const float ivas_mdft_coeff_sin_twid_640[IVAS_640_PT_LEN]; +#endif extern const float ivas_mdft_coeff_cos_twid_320[IVAS_320_PT_LEN + 1]; +#ifndef MDFT_ROM_OPTIMIZE extern const float ivas_mdft_coeff_sin_twid_320[IVAS_320_PT_LEN]; - +#endif extern const int16_t dirac_gains_P_idx[16]; extern const float dirac_gains_norm_term[9]; extern const float dirac_gains_Pnm[91][9]; diff --git a/lib_com/options.h b/lib_com/options.h index a51b60d748..f882080afb 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -156,7 +156,7 @@ #define FIX_I54_LS_CONVERSION /* FhG: fix incorrect downmix matrix for 5_1_4 to 5_1_2 and upmix matrix for 7_1 to 7_1_4 */ #define FIX_I25_FBE_FB_BITS /* issue 25: properly skip reading of TBE FB bits when decoder output sampling rate is not 48 kHz */ #define ORDER_BITS_ADDITION /* issue 14: Transmit SBA order and planar bits at all bitrates */ - +#define MDFT_ROM_OPTIMIZE /*Optimise ROM tables for MDFT/iMDFT*/ /* ################## End DEVELOPMENT switches ######################### */ /* clang-format on */ -- GitLab