From 78c80a0ca36a7e7631f198855faf0856db2c46c0 Mon Sep 17 00:00:00 2001 From: vaclav Date: Mon, 16 Mar 2026 16:51:15 +0100 Subject: [PATCH 01/14] OPTIMIZE_FFT_STACK --- lib_com/edct_fx.c | 193 +++++++++++++++++++++++++++++++--- lib_com/fft_fx.c | 70 ++++++++++++ lib_com/fft_fx_evs.c | 64 +++++++---- lib_com/options.h | 1 + lib_com/prot_fx.h | 25 ++++- lib_com/rom_com.h | 2 + lib_com/rom_com_fx.c | 9 +- lib_com/scale_mem_fx.c | 31 ++++++ lib_com/tools_fx.c | 29 +++++ lib_dec/FEC_HQ_phase_ecu_fx.c | 4 + 10 files changed, 389 insertions(+), 39 deletions(-) diff --git a/lib_com/edct_fx.c b/lib_com/edct_fx.c index 55bc483e0..02ad1d64c 100644 --- a/lib_com/edct_fx.c +++ b/lib_com/edct_fx.c @@ -11,7 +11,9 @@ #include "stl.h" #include "math_32.h" -static Word16 get_edxt_factor( Word16 length ) /* Returns value of sqrtf(2.f/length) in Q15 */ + +static Word16 get_edxt_factor( + const Word16 length ) /* Returns value of sqrtf(2.f/length) in Q15 */ { Word16 factor; /*Q15*/ factor = 0; @@ -53,8 +55,12 @@ static Word16 get_edxt_factor( Word16 length ) /* Returns value of sqrtf(2.f/len } ELSE IF( EQ_16( length, 40 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else factor = 7327; /*0.223 in Q15*/ move16(); +#endif } ELSE IF( EQ_16( length, 960 ) ) { @@ -73,33 +79,57 @@ static Word16 get_edxt_factor( Word16 length ) /* Returns value of sqrtf(2.f/len } ELSE IF( EQ_16( length, 120 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else factor = 4230; /*0.1290 in Q15*/ move16(); +#endif } ELSE IF( EQ_16( length, 1200 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else factor = 1338; /*0.040 in Q15*/ move16(); +#endif } ELSE IF( EQ_16( length, 800 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else factor = 1638; /*0.05 in Q15*/ move16(); +#endif } ELSE IF( EQ_16( length, 400 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else factor = 2317; /*0.070 in Q15*/ move16(); +#endif } ELSE IF( EQ_16( length, 200 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else factor = 3277; /*0.1 in Q15*/ move16(); +#endif } + return factor; /*Q15*/ } -static Word16 const *get_edct_table( Word16 length /*Q0*/, Word16 *q ) + +static Word16 const *get_edct_table( + const Word16 length /*Q0*/, + Word16 *q ) { Word16 const *edct_table; edct_table = NULL; @@ -590,6 +620,7 @@ void edxt_fx( move16(); cosPtr = NULL; sinPtr = NULL; + IF( EQ_16( length, 512 ) ) { cosPtr = cos_scale_tbl_512; /*Q15*/ @@ -641,10 +672,14 @@ void edxt_fx( } ELSE IF( EQ_16( length, 40 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else cosPtr = cos_scale_tbl_640; /*Q15*/ sinPtr = sin_scale_tbl_640; /*Q15*/ n = 16; move16(); +#endif } ELSE IF( EQ_16( length, 960 ) ) { @@ -669,38 +704,58 @@ void edxt_fx( } ELSE IF( EQ_16( length, 120 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else cosPtr = cos_scale_tbl_960; /*Q15*/ sinPtr = sin_scale_tbl_960; /*Q15*/ n = 8; move16(); +#endif } ELSE IF( EQ_16( length, 1200 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else cosPtr = cos_scale_tbl_1200; /*Q15*/ sinPtr = sin_scale_tbl_1200; /*Q15*/ n = 1; move16(); +#endif } ELSE IF( EQ_16( length, 800 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else cosPtr = cos_scale_tbl_800; /*Q15*/ sinPtr = sin_scale_tbl_800; /*Q15*/ n = 1; move16(); +#endif } ELSE IF( EQ_16( length, 400 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else cosPtr = cos_scale_tbl_800; /*Q15*/ sinPtr = sin_scale_tbl_800; /*Q15*/ n = 2; move16(); +#endif } ELSE IF( EQ_16( length, 200 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else cosPtr = cos_scale_tbl_800; /*Q15*/ sinPtr = sin_scale_tbl_800; /*Q15*/ n = 4; move16(); +#endif } test(); @@ -708,16 +763,26 @@ void edxt_fx( { const Word16 Nm1 = sub( length, 1 ); const Word16 xSign = sub( imult1616( 2, shr( kernelType, 1 ) ), 1 ); /*Q0*/ +#ifdef OPTIMIZE_FFT_STACK + cmplx spec[L_FRAME_MAX]; +#else Word32 re[L_FRAME_PLUS]; Word32 im[L_FRAME_PLUS]; +#endif IF( !synthesis ) { FOR( k = ( Nm1 / 2 ); k >= 0; k-- ) /* pre-modulation of audio input */ { +#ifdef OPTIMIZE_FFT_STACK + spec[k].re = x[2 * k]; /*Qx*/ + spec[( Nm1 - k )].re = Mpy_32_16_1( x[( ( k * 2 ) + 1 )], shl_sat( xSign, 15 ) ); /*Qx*/ + spec[k].im = spec[( Nm1 - k )].im = 0; +#else re[k] = x[2 * k]; /*Qx*/ re[( Nm1 - k )] = Mpy_32_16_1( x[( ( k * 2 ) + 1 )], shl_sat( xSign, 15 ) ); /*Qx*/ im[k] = im[( Nm1 - k )] = 0; +#endif move32(); move32(); move32(); @@ -726,26 +791,46 @@ void edxt_fx( IF( EQ_16( length, 512 ) ) { /* Scaling down re and im buffers to avoid overflow in DoRTFTn_fx if the minimum headroom is less than 4 bits */ +#ifdef OPTIMIZE_FFT_STACK + hdrm = L_norm_arr_cmplx( spec, 512 ); +#else hdrm = s_min( L_norm_arr( re, 512 ), L_norm_arr( im, 512 ) ); +#endif IF( LT_16( hdrm, 4 ) ) { tmp = sub( hdrm, 4 ); +#ifdef OPTIMIZE_FFT_STACK + scale_sig32_cmplx( spec, 512, tmp ); +#else scale_sig32( re, 512, tmp ); scale_sig32( im, 512, tmp ); +#endif } +#ifdef OPTIMIZE_FFT_STACK + DoRTFTn_fx( NULL, NULL, spec, 512 ); +#else DoRTFTn_fx( re, im, 512 ); +#endif IF( LT_16( hdrm, 4 ) ) { tmp = negate( tmp ); +#ifdef OPTIMIZE_FFT_STACK + scale_sig32_cmplx( spec, 512, tmp ); +#else scale_sig32( re, 512, tmp ); scale_sig32( im, 512, tmp ); +#endif } } ELSE /* fft() doesn't support 512 */ { +#ifdef OPTIMIZE_FFT_STACK + fft_cmplx_fx( spec, length ); +#else fft_fx( re, im, length, 1 ); +#endif } IF( shr( kernelType, 1 ) ) @@ -757,12 +842,21 @@ void edxt_fx( const Word16 wRe = cosPtr[( k * n )]; /*Q15*/ const Word16 wIm = sinPtr[( k * n )]; /*Q15*/ - y[k] /*pt 1*/ = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) ); /*Qx*/ - y[( length - k )] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) ); /*Qx*/ +#ifdef OPTIMIZE_FFT_STACK + y[k] /*pt 1*/ = L_add( Mpy_32_16_1( spec[k].re, wRe ), Mpy_32_16_1( spec[k].im, wIm ) ); /*Qx*/ + y[( length - k )] = L_sub( Mpy_32_16_1( spec[k].re, wIm ), Mpy_32_16_1( spec[k].im, wRe ) ); /*Qx*/ +#else + y[k] /*pt 1*/ = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) ); /*Qx*/ + y[( length - k )] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) ); /*Qx*/ +#endif move32(); move32(); } - y[( length / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#ifdef OPTIMIZE_FFT_STACK + y[( length / 2 )] = Mpy_32_16_1( spec[( length / 2 )].re, INV_SQRT_2_Q15 ); /*Qx*/ +#else + y[( length / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#endif move32(); } ELSE /* forw. DST-II */ @@ -774,16 +868,29 @@ void edxt_fx( const Word16 wRe = cosPtr[( k * n )]; /*Q15*/ const Word16 wIm = sinPtr[( k * n )]; /*Q15*/ - y[( Nm1 - k )] = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) ); /*Qx*/ - y[k - 1] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) ); /*Qx*/ +#ifdef OPTIMIZE_FFT_STACK + y[( Nm1 - k )] = L_add( Mpy_32_16_1( spec[k].re, wRe ), Mpy_32_16_1( spec[k].im, wIm ) ); /*Qx*/ + y[k - 1] = L_sub( Mpy_32_16_1( spec[k].re, wIm ), Mpy_32_16_1( spec[k].im, wRe ) ); /*Qx*/ +#else + y[( Nm1 - k )] = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) ); /*Qx*/ + y[k - 1] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) ); /*Qx*/ +#endif move32(); move32(); } - y[( Nm1 / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#ifdef OPTIMIZE_FFT_STACK + y[( Nm1 / 2 )] = Mpy_32_16_1( spec[( length / 2 )].re, INV_SQRT_2_Q15 ); /*Qx*/ +#else + y[( Nm1 / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#endif move32(); } - y[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )] = L_shr( re[0], 1 ); /*Qx*/ +#ifdef OPTIMIZE_FFT_STACK + y[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )] = L_shr( spec[0].re, 1 ); /*Qx*/ +#else + y[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )] = L_shr( re[0], 1 ); /*Qx*/ +#endif move32(); } ELSE /* inverse II = III */ @@ -797,12 +904,21 @@ void edxt_fx( const Word16 wRe = shr( cosPtr[imult1616( k, n )], 1 ); const Word16 wIm = shr( sinPtr[imult1616( k, n )], 1 ); - re[k] = L_add( Mpy_32_16_1( x[k], wRe ), Mpy_32_16_1( x[( length - k )], wIm ) ); /*Qx*/ - im[k] = L_sub( Mpy_32_16_1( x[( length - k )], wRe ), Mpy_32_16_1( x[k], wIm ) ); /*Qx*/ +#ifdef OPTIMIZE_FFT_STACK + spec[k].re = L_add( Mpy_32_16_1( x[k], wRe ), Mpy_32_16_1( x[( length - k )], wIm ) ); /*Qx*/ + spec[k].im = L_sub( Mpy_32_16_1( x[( length - k )], wRe ), Mpy_32_16_1( x[k], wIm ) ); /*Qx*/ +#else + re[k] = L_add( Mpy_32_16_1( x[k], wRe ), Mpy_32_16_1( x[( length - k )], wIm ) ); /*Qx*/ + im[k] = L_sub( Mpy_32_16_1( x[( length - k )], wRe ), Mpy_32_16_1( x[k], wIm ) ); /*Qx*/ +#endif move32(); move32(); } - re[( length / 2 )] = Mpy_32_16_1( x[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#ifdef OPTIMIZE_FFT_STACK + spec[( length / 2 )].re = Mpy_32_16_1( x[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#else + re[( length / 2 )] = Mpy_32_16_1( x[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#endif move32(); } ELSE /* DST type III */ @@ -814,23 +930,42 @@ void edxt_fx( const Word16 wRe = shr( cosPtr[( k * n )], 1 ); /*Q15*/ const Word16 wIm = shr( sinPtr[( k * n )], 1 ); /*Q15*/ +#ifdef OPTIMIZE_FFT_STACK + spec[k].re = L_add( Mpy_32_16_1( x[( Nm1 - k )], wRe ), Mpy_32_16_1( x[( k - 1 )], wIm ) ); /*Qx*/ + spec[k].im = L_sub( Mpy_32_16_1( x[( k - 1 )], wRe ), Mpy_32_16_1( x[( Nm1 - k )], wIm ) ); /*Qx*/ +#else re[k] = L_add( Mpy_32_16_1( x[( Nm1 - k )], wRe ), Mpy_32_16_1( x[( k - 1 )], wIm ) ); /*Qx*/ im[k] = L_sub( Mpy_32_16_1( x[( k - 1 )], wRe ), Mpy_32_16_1( x[( Nm1 - k )], wIm ) ); /*Qx*/ +#endif move32(); move32(); } - re[( length / 2 )] = Mpy_32_16_1( x[( Nm1 / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#ifdef OPTIMIZE_FFT_STACK + spec[( length / 2 )].re = Mpy_32_16_1( x[( Nm1 / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#else + re[( length / 2 )] = Mpy_32_16_1( x[( Nm1 / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#endif move32(); } - re[0] = x[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )]; /*Qx*/ +#ifdef OPTIMIZE_FFT_STACK + spec[0].re = x[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )]; /*Qx*/ + spec[0].im = spec[( length / 2 )].im = 0; +#else + re[0] = x[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )]; /*Qx*/ im[0] = im[( length / 2 )] = 0; +#endif move32(); move32(); FOR( k = ( Nm1 / 2 ); k > 0; k-- ) { +#ifdef OPTIMIZE_FFT_STACK + spec[( length - k )].re = spec[k].re; /*Qx*/ + spec[( length - k )].im = L_negate( spec[k].im ); /*Qx*/ +#else re[( length - k )] = re[k]; /*Qx*/ im[( length - k )] = L_negate( im[k] ); /*Qx*/ +#endif move32(); move32(); } @@ -838,35 +973,63 @@ void edxt_fx( IF( EQ_16( length, 512 ) ) { /* Scaling down re and im buffers to avoid overflow in DoRTFTn_fx if the minimum headroom is less than 4 bits */ +#ifdef OPTIMIZE_FFT_STACK + hdrm = L_norm_arr_cmplx( spec, 512 ); +#else hdrm = s_min( L_norm_arr( re, 512 ), L_norm_arr( im, 512 ) ); +#endif IF( LT_16( hdrm, 4 ) ) { tmp = sub( hdrm, 4 ); +#ifdef OPTIMIZE_FFT_STACK + scale_sig32_cmplx( spec, 512, tmp ); +#else scale_sig32( re, 512, tmp ); scale_sig32( im, 512, tmp ); +#endif } +#ifdef OPTIMIZE_FFT_STACK + DoRTFTn_fx( NULL, NULL, spec, 512 ); +#else DoRTFTn_fx( re, im, 512 ); +#endif IF( LT_16( hdrm, 4 ) ) { tmp = negate( tmp ); +#ifdef OPTIMIZE_FFT_STACK + scale_sig32_cmplx( spec, 512, tmp ); +#else scale_sig32( re, 512, tmp ); scale_sig32( im, 512, tmp ); +#endif } } ELSE /* fft() doesn't support 512 */ { +#ifdef OPTIMIZE_FFT_STACK + fft_cmplx_fx( spec, length ); +#else fft_fx( re, im, length, 1 ); +#endif } FOR( k = ( Nm1 / 2 ); k >= 0; k-- ) /* post-modulation of FFT output */ { - y[2 * k] = re[k]; /*Qx*/ +#ifdef OPTIMIZE_FFT_STACK + y[2 * k] = spec[k].re; /*Qx*/ +#else + y[2 * k] = re[k]; /*Qx*/ +#endif move32(); IF( xSign != 0 ) { +#ifdef OPTIMIZE_FFT_STACK + y[( ( k * 2 ) + 1 )] = Mpy_32_16_1( spec[( Nm1 - k )].re, shl_sat( xSign, 15 ) ); /*Qx*/ +#else y[( ( k * 2 ) + 1 )] = Mpy_32_16_1( re[( Nm1 - k )], shl_sat( xSign, 15 ) ); /*Qx*/ +#endif } ELSE { diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c index 97fef62e5..6eca9930f 100644 --- a/lib_com/fft_fx.c +++ b/lib_com/fft_fx.c @@ -6932,6 +6932,7 @@ void fft_fx( const Word16 s /* i : sign */ ) { + cmplx x[960]; FOR( Word16 j = 0; j < length; j++ ) @@ -7010,6 +7011,75 @@ void fft_fx( return; } + +#ifdef OPTIMIZE_FFT_STACK +void fft_cmplx_fx( + cmplx *x, /* i/o: complex data */ + const Word16 length /* i : length of fft */ +) +{ + SWITCH( length ) + { + case 20: + fft_len20_fx( x ); + BREAK; + case 40: + fft_lenN( x, FFT_RotVector_640_fx, 640, 5, 8, 8, 40 ); + BREAK; + case 64: + fft_lenN( x, FFT_RotVector_256_fx, 256, 8, 8, 8, 64 ); + BREAK; + case 80: + fft_lenN( x, FFT_RotVector_640_fx, 640, 10, 8, 4, 40 ); + BREAK; + case 100: + fft_lenN( x, FFT_RotVector_400_fx, 400, 10, 10, 4, 40 ); + BREAK; + case 120: + fft_lenN( x, FFT_RotVector_960_fx, 960, 15, 8, 4, 60 ); + BREAK; + case 128: + fft_lenN( x, FFT_RotVector_256_fx, 256, 16, 8, 4, 64 ); + BREAK; + case 160: + fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 8, 2, 40 ); + BREAK; + case 200: + fft_lenN( x, FFT_RotVector_400_fx, 400, 20, 10, 2, 40 ); + BREAK; + case 240: + fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 8, 2, 60 ); + BREAK; + case 256: + fft_lenN( x, FFT_RotVector_256_fx, 256, 32, 8, 2, 64 ); + BREAK; + case 320: + fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 16, 2, 40 ); + BREAK; + case 400: + fft_lenN( x, FFT_RotVector_400_fx, 400, 20, 20, 2, 40 ); + BREAK; + case 480: + fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 16, 2, 60 ); + BREAK; + case 600: + fft_lenN( x, FFT_RotVector_600_fx, 600, 30, 20, 2, 60 ); + BREAK; + case 640: + fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 32, 2, 40 ); + BREAK; + case 960: + fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 32, 2, 60 ); + BREAK; + default: + assert( !"fft length is not supported!" ); + } + + return; +} +#endif + + void rfft_fx( Word32 *x, /* i/o: values Qx */ const Word16 *w, /* i : window Q15 */ diff --git a/lib_com/fft_fx_evs.c b/lib_com/fft_fx_evs.c index a7b2461cb..c68b21650 100644 --- a/lib_com/fft_fx_evs.c +++ b/lib_com/fft_fx_evs.c @@ -45,23 +45,43 @@ static void cftmdl_fx( Word16 n, Word16 l, Word32 *a, const Word16 *w ); void DoRTFTn_fx( - Word32 *x, /* i/o : real part of input and output data Q(x) */ - Word32 *y, /* i/o : imaginary part of input and output data Q(x) */ + Word32 *x, /* i/o : real part of input and output data Q(x) */ + Word32 *y, /* i/o : imaginary part of input and output data Q(x) */ +#ifdef OPTIMIZE_FFT_STACK + cmplx *spec, /* i/o : complex input and output data */ +#endif const Word16 n /* i : size of the FFT up to 1024 */ ) { - Word16 i; Word32 z[2048], *pt; - pt = z; - FOR( i = 0; i < n; i++ ) +#ifdef OPTIMIZE_FFT_STACK + IF( spec != NULL ) { - *pt++ = x[i]; - move16(); - *pt++ = y[i]; - move16(); + pt = z; + FOR( i = 0; i < n; i++ ) + { + *pt++ = spec[i].re; + move16(); + *pt++ = spec[i].im; + move16(); + } } + ELSE + { +#endif + pt = z; + FOR( i = 0; i < n; i++ ) + { + *pt++ = x[i]; + move16(); + *pt++ = y[i]; + move16(); + } +#ifdef OPTIMIZE_FFT_STACK + } +#endif IF( EQ_16( n, 16 ) ) { @@ -92,19 +112,25 @@ void DoRTFTn_fx( assert( 0 ); } - x[0] = z[0]; - move16(); - y[0] = z[1]; - move16(); - pt = &z[2]; - FOR( i = n - 1; i >= 1; i-- ) +#ifdef OPTIMIZE_FFT_STACK + IF( spec == NULL ) { - x[i] = *pt++; +#endif + x[0] = z[0]; move16(); - y[i] = *pt++; + y[0] = z[1]; move16(); + pt = &z[2]; + FOR( i = n - 1; i >= 1; i-- ) + { + x[i] = *pt++; + move16(); + y[i] = *pt++; + move16(); + } +#ifdef OPTIMIZE_FFT_STACK } - +#endif return; } @@ -124,6 +150,8 @@ static void cdftForw_fx( /* Do FFT */ cftfsub_fx( n, a, w ); + + return; } /*-----------------------------------------------------------------* diff --git a/lib_com/options.h b/lib_com/options.h index b63ee327f..d4ace21b4 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -95,6 +95,7 @@ #define FIX_1525_UNINIT_FORMAT_SWITCHING_DEC /* VA: float issue 1525: fix reading of uninitialized memory in format switching at the decoder */ #define HARMONIZE_2446_CON_TCX_FX /* FhG: basop issue: 2446 harmonization of function con_tcx_fx() */ #define FIX_2433_ARITH_OVERFLOW_IN_QMETA_ENC /* Nokia: Fix to convert non-converted binary operations */ +#define OPTIMIZE_FFT_STACK /* #################### End BE switches ################################## */ diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index a40fba659..2bfdf6935 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -1303,6 +1303,14 @@ void scale_sig32( const Word16 exp0 /* i : exponent: x = round(x << exp) Qx xx exp */ ); +#ifdef OPTIMIZE_FFT_STACK +void scale_sig32_cmplx( + cmplx x[], /* i/o: signal to scale Qx */ + const Word16 lg, /* i : size of x[] Q0 */ + const Word16 exp0 /* i : exponent: x = round(x << exp) Qx exp */ +); + +#endif void Scale_sig64( Word64 x[], /* i/o: signal to scale Qx */ Word16 len, /* i : size of x[] Q0 */ @@ -4050,8 +4058,11 @@ void BASOP_cfft( cmplx *pComplexBuf, Word16 sizeOfFft, Word16 *scale, Word32 wor void BASOP_rfft( Word32 *x, Word16 sizeOfFft, Word16 *scale, Word16 isign ); void DoRTFTn_fx( - Word32 *x, /* i/o : real part of i and output data */ - Word32 *y, /* i/o : imaginary part of i and output data */ + Word32 *x, /* i/o : real part of i and output data */ + Word32 *y, /* i/o : imaginary part of i and output data */ +#ifdef OPTIMIZE_FFT_STACK + cmplx *spec, /* i/o : complex input and output data */ +#endif const Word16 n /* i : size of the FFT up to 1024 */ ); @@ -4113,6 +4124,13 @@ void fft_fx( const Word16 s /* i : sign */ ); +#ifdef OPTIMIZE_FFT_STACK +void fft_cmplx_fx( + cmplx *spec, /* i/o: complex data */ + const Word16 length /* i : length of fft */ +); + +#endif void rfft_fx( Word32 *x, /* i/o: values */ const Word16 *w, /* i : window */ @@ -4130,6 +4148,9 @@ void DoRTFTn_fx_ivas( Word16 find_guarded_bits_fx( const Word32 n ); Word16 L_norm_arr( const Word32 *arr, Word16 size ); +#ifdef OPTIMIZE_FFT_STACK +Word16 L_norm_arr_cmplx( const cmplx *arr, Word16 size ); +#endif Word16 norm_arr( Word16 *arr, Word16 size ); Word16 W_norm_arr( Word64 *arr, Word16 size ); diff --git a/lib_com/rom_com.h b/lib_com/rom_com.h index 2843d1d6b..42ede75fd 100644 --- a/lib_com/rom_com.h +++ b/lib_com/rom_com.h @@ -1559,10 +1559,12 @@ extern const Word16 cos_scale_tbl_640[640]; // Q15 extern const Word16 sin_scale_tbl_640[640]; // Q15 extern const Word16 sin_scale_tbl_512[512]; // Q15 extern const Word16 cos_scale_tbl_512[512]; // Q15 +#ifndef OPTIMIZE_FFT_STACK extern const Word16 cos_scale_tbl_1200[1200]; // Q15 extern const Word16 sin_scale_tbl_1200[1200]; // Q15 extern const Word16 cos_scale_tbl_800[800]; // Q15 extern const Word16 sin_scale_tbl_800[800]; // Q15 +#endif extern const Word16 scales_ivas_fx[][MAX_NO_SCALES * 2]; // Q11 extern const Word16 scales_p_ivas_fx[][MAX_NO_SCALES * 2]; // Q11 diff --git a/lib_com/rom_com_fx.c b/lib_com/rom_com_fx.c index 376333284..760ccf9c0 100644 --- a/lib_com/rom_com_fx.c +++ b/lib_com/rom_com_fx.c @@ -25778,9 +25778,9 @@ const Word16 mfreq_loc_div_25[] = { 7, 15, 31, 47, 63, 79, 95, 111, 127, 143, 15 /* % idx= 0 1 2 3 4 5 6 7; */ /* call with band_len_idx[sfm_size>>3] */ const Word16 band_len_idx[1 + ( MAX_SFM_LEN_FX / 8 )] = { - /*sfm/8*/ /*1*/ /*2 */ /*3 */ /*4 */ /*6 */ /*8 */ /*10*/ /*12*/ - -1, 0 /*8*/, 1 /*16*/, 2 /*24*/, 3 /*32*/, 4 /*40*/, 5 /*48 */, -1, 6 /*64 */, -1, 7 /*80*/, -1, 8 /*96*/ -}; // Q0 + /*sfm/8*/ /*1*/ /*2 */ /*3 */ /*4 */ /*6 */ /*8 */ /*10*/ /*12*/ + -1, 0 /*8*/, 1 /*16*/, 2 /*24*/, 3 /*32*/, 4 /*40*/, 5 /*48 */, -1, 6 /*64 */, -1, 7 /*80*/, -1, 8 /*96*/ +}; // Q0 const Word16 band_len_ener_shift[9] = { 1, 2, 2, 2, 3, 3, 3, 4 /*sfm==80*/, 4 /*sfm==96*/ }; // Q0 /* 96 requires 1 bit more than 48 */ const Word16 fine_gain_pred_sqrt_bw[9] = { 5793, 8192, 10033, 11585, 12953, 14189, 16384, 18318, 20066 }; /* (Q11) */ /* For extended frames in ACELP->HQ transitions in IVAS, map indices floor(sfms*1.25/8)=[1 2 3 5 7 10 12 15] from extended bws: */ @@ -27403,6 +27403,7 @@ const Word16 cos_scale_tbl_512[512] = /* Q15 */ 201, 100 }; +#ifndef OPTIMIZE_FFT_STACK const Word16 sin_scale_tbl_1200[1200] = { /* Q15 */ 0, 42, 85, 128, 171, 214, 257, 300, 343, 386, 428, 471, 514, 557, 600, 643, @@ -27914,7 +27915,7 @@ const Word16 cos_scale_tbl_800[800] = { /* Q15 */ 32750, 32752, 32754, 32756, 32757, 32759, 32760, 32761, 32762, 32763, 32764, 32765, 32765, 32766, 32766, 32766 }; - +#endif const Word16 scales_ivas_fx[][MAX_NO_SCALES * 2] = /* 2 subvectors Q11*/ { { diff --git a/lib_com/scale_mem_fx.c b/lib_com/scale_mem_fx.c index 01c2442a8..3df645e6a 100644 --- a/lib_com/scale_mem_fx.c +++ b/lib_com/scale_mem_fx.c @@ -337,6 +337,37 @@ void scale_sig32_r( return; } +#ifdef OPTIMIZE_FFT_STACK +void scale_sig32_cmplx( + cmplx x[], /* i/o: signal to scale Qx */ + const Word16 lg, /* i : size of x[] Q0 */ + const Word16 exp0 /* i : exponent: x = round(x << exp) Qx exp */ +) +{ + Word16 i; + + FOR( i = 0; i < lg; i++ ) + { + /* saturation can occur here */ + x[i].re = L_shl( x[i].re, exp0 ); + move32(); + if ( 0 == exp0 ) + { + i = lg; + } + x[i].im = L_shl( x[i].im, exp0 ); + move32(); + if ( 0 == exp0 ) + { + i = lg; + } + } + + return; +} +#endif + + /*-------------------------------------------------------------------* * Rescale_mem: * diff --git a/lib_com/tools_fx.c b/lib_com/tools_fx.c index 47111db3e..eb9f7324f 100644 --- a/lib_com/tools_fx.c +++ b/lib_com/tools_fx.c @@ -5376,6 +5376,35 @@ Word16 L_norm_arr( return q; } +#ifdef OPTIMIZE_FFT_STACK +Word16 L_norm_arr_cmplx( + const cmplx *arr, + Word16 size ) +{ + Word16 q = 31; + move16(); + + FOR( Word16 i = 0; i < size; i++ ) + { + Word16 q_tst; + + q_tst = norm_l( arr[i].re ); + if ( arr[i].re != 0 ) + { + q = s_min( q, q_tst ); + } + + q_tst = norm_l( arr[i].im ); + if ( arr[i].im != 0 ) + { + q = s_min( q, q_tst ); + } + } + + return q; +} +#endif + Word16 norm_arr( Word16 *arr, Word16 size ) diff --git a/lib_dec/FEC_HQ_phase_ecu_fx.c b/lib_dec/FEC_HQ_phase_ecu_fx.c index b76037653..1120a889b 100644 --- a/lib_dec/FEC_HQ_phase_ecu_fx.c +++ b/lib_dec/FEC_HQ_phase_ecu_fx.c @@ -2556,7 +2556,11 @@ static void fec_ecu_dft_fx( *exp = s_min( *exp, 15 ); } +#ifdef OPTIMIZE_FFT_STACK + DoRTFTn_fx( Tfr32, Tfi32, NULL, *Nfft ); +#else DoRTFTn_fx( Tfr32, Tfi32, *Nfft ); +#endif N_LP = shr( *Nfft, 1 ); L_tmp = L_deposit_l( 0 ); -- GitLab From 2ac149333f10d10884fd2a0791249f05fc757d00 Mon Sep 17 00:00:00 2001 From: vaclav Date: Mon, 16 Mar 2026 18:16:27 +0100 Subject: [PATCH 02/14] HARMONIZE_DCT --- lib_com/basop_util.c | 42 ++++++ lib_com/basop_util.h | 8 ++ lib_com/edct_fx.c | 182 +++++++++++++++++++++--- lib_com/fft_fx.c | 13 +- lib_com/gs_inact_switching_fx.c | 12 ++ lib_com/options.h | 1 + lib_com/prot_fx.h | 24 +++- lib_com/tcx_mdct_fx.c | 8 ++ lib_com/trans_direct_fx.c | 16 +++ lib_com/trans_inv_fx.c | 4 + lib_dec/FEC_fx.c | 34 +++-- lib_dec/LD_music_post_filter_fx.c | 8 ++ lib_dec/core_switching_dec_fx.c | 4 + lib_dec/dec_tcx_fx.c | 14 +- lib_dec/gs_dec_amr_wb_fx.c | 11 ++ lib_dec/gs_dec_fx.c | 9 ++ lib_dec/hf_synth_fx.c | 8 ++ lib_dec/ivas_td_low_rate_dec_fx.c | 5 + lib_enc/bw_detect_fx.c | 4 + lib_enc/cod_tcx_fx.c | 12 ++ lib_enc/ext_sig_ana_fx.c | 8 ++ lib_enc/gs_enc_fx.c | 10 ++ lib_enc/ivas_td_low_rate_enc_fx.c | 9 ++ lib_rend/ivas_reverb_fft_filter_fx.c | 11 ++ lib_rend/ivas_reverb_filter_design_fx.c | 4 + 25 files changed, 423 insertions(+), 38 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index 551dbdeef..dd9bf4e09 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -758,9 +758,51 @@ Word16 getScaleFactor32( /* o: measured headroom in range [ i = s_and( s_min( i_max, i_min ), 0x1F ); + return i; +} + +#ifdef OPTIMIZE_FFT_STACK +/* o: measured headroom in range [0..31], 0 if all x[i] == 0 */ +Word16 getScaleFactor32_cmplx( + cmplx *x, /* i: array containing 32-bit data */ + const Word16 len_x /* i: length of the array to scan */ +) +{ + Word16 i, i_min, i_max; + Word32 x_min, x_max; + + x_max = 0; + move32(); + x_min = 0; + move32(); + FOR( i = 0; i < len_x; i++ ) + { + if ( x[i].re >= 0 ) + x_max = L_max( x_max, x[i].re ); + if ( x[i].re < 0 ) + x_min = L_min( x_min, x[i].re ); + if ( x[i].im >= 0 ) + x_max = L_max( x_max, x[i].im ); + if ( x[i].im < 0 ) + x_min = L_min( x_min, x[i].im ); + } + + i_max = 0x20; + move16(); + i_min = 0x20; + move16(); + + if ( x_max != 0 ) + i_max = norm_l( x_max ); + + if ( x_min != 0 ) + i_min = norm_l( x_min ); + + i = s_and( s_min( i_max, i_min ), 0x1F ); return i; } +#endif Word16 getScaleFactor32_copy( /* o: measured headroom in range [0..31], 0 if all x[i] == 0 */ const Word32 *x, /* i: array containing 32-bit data */ diff --git a/lib_com/basop_util.h b/lib_com/basop_util.h index 426516248..06d9ee759 100644 --- a/lib_com/basop_util.h +++ b/lib_com/basop_util.h @@ -235,6 +235,14 @@ Word16 getScaleFactor32( const Word32 *x, /* i : array containing 32-bit data */ const Word16 len_x ); /* i : length of the array to scan */ +#ifdef OPTIMIZE_FFT_STACK +/* o: measured headroom in range [0..31], 0 if all x[i] == 0 */ +Word16 getScaleFactor32_cmplx( + cmplx *x, /* i: array containing 32-bit data */ + const Word16 len_x /* i: length of the array to scan */ +); +#endif + Word16 getScaleFactor32_copy( /* o: measured headroom in range [0..31], 0 if all x[i] == 0 */ const Word32 *x, /* i: array containing 32-bit data */ const Word32 len_x ); /* i: length of the array to scan */ diff --git a/lib_com/edct_fx.c b/lib_com/edct_fx.c index 02ad1d64c..0d544182c 100644 --- a/lib_com/edct_fx.c +++ b/lib_com/edct_fx.c @@ -207,14 +207,23 @@ void edct_fx( const Word32 *x, /* i : input signal Qq */ Word32 *y, /* o : output transform Qq */ Word16 length, /* i : length Q0*/ - Word16 *q /* i : Q value of input signal */ +#ifdef HARMONIZE_DCT + Word16 *q, /* i : Q value of input signal */ + const Word16 element_mode /* i : element mode */ +#else + Word16 *q /* i : Q value of input signal */ +#endif ) { Word16 i; Word32 re; Word32 im; const Word16 *edct_table; /*Q16 */ +#ifdef OPTIMIZE_FFT_STACK + cmplx spec[L_FRAME_PLUS / 2]; +#else Word32 complex_buf[2 * ( L_FRAME48k / 2 + 240 )]; +#endif Word32 L_tmp; Word16 tmp; Word16 len1; @@ -224,37 +233,94 @@ void edct_fx( /* Twiddling and Pre-rotate */ FOR( i = 0; i < len1; i++ ) { - L_tmp = Mult_32_16( x[2 * i], edct_table[i] ); /*Q(q+1) */ + L_tmp = Mult_32_16( x[2 * i], edct_table[i] ); /*Q(q+1) */ +#ifdef OPTIMIZE_FFT_STACK + spec[i].re = Madd_32_16( L_tmp, x[( length - ( 1 + ( i * 2 ) ) )], edct_table[( len1 - ( 1 + i ) )] ); /*Q(q+1) */ +#else complex_buf[2 * i] = Madd_32_16( L_tmp, x[( length - ( 1 + ( i * 2 ) ) )], edct_table[( len1 - ( 1 + i ) )] ); /*Q(q+1) */ +#endif move32(); L_tmp = Mult_32_16( x[( length - ( 1 + ( i * 2 ) ) )], edct_table[i] ); /*Q(q+1) */ - - complex_buf[( ( i * 2 ) + 1 )] = Msub_32_16( L_tmp, x[( i * 2 )], edct_table[( len1 - ( 1 + i ) )] ); /*Q(q+1) */ +#ifdef OPTIMIZE_FFT_STACK + spec[i].im = Msub_32_16( L_tmp, x[( i * 2 )], edct_table[( len1 - ( 1 + i ) )] ); /*Q(q+1) */ +#else + complex_buf[( ( i * 2 ) + 1 )] = Msub_32_16( L_tmp, x[( i * 2 )], edct_table[( len1 - ( 1 + i ) )] ); /*Q(q+1) */ +#endif move32(); } - *q = sub( 15, *q ); - move16(); +#ifdef HARMONIZE_DCT + IF( element_mode == EVS_MONO ) + { +#endif + *q = sub( 15, *q ); + move16(); +#ifdef OPTIMIZE_FFT_STACK + BASOP_cfft( spec, len1, q, y ); +#else BASOP_cfft( (cmplx *) complex_buf, len1, q, y ); +#endif + + tmp = div_s( 1, length ); /*Q15 */ + tmp = round_fx( L_shl( L_mult( tmp, 19302 /*0.75f * EVS_PI in Q13*/ ), 2 ) ); /*Q15 */ +#ifdef HARMONIZE_DCT + } + ELSE + { + *q = sub( 31, *q ); + move16(); + tmp = sub( getScaleFactor32_cmplx( spec, len1 ), find_guarded_bits_fx( len1 ) ); + scale_sig32_cmplx( spec, len1, tmp ); + + fft_cmplx_fx( spec, len1 ); + *q = sub( *q, tmp ); + move16(); + + tmp = div_s( 4, length ); /*Q17 */ + tmp = round_fx( L_mult( tmp, 19302 /*0.75f * EVS_PI in Q13*/ ) ); /*Q15 */ + } +#endif - tmp = div_s( 1, length ); /*Q15 */ - tmp = round_fx( L_shl( L_mult( tmp, 19302 /*0.75f * EVS_PI in Q13*/ ), 2 ) ); /*Q15 */ FOR( i = 0; i < len1; i++ ) { - re = Msub_32_16( complex_buf[2 * i], complex_buf[( ( i * 2 ) + 1 )], tmp ); /*Q(q+1) */ - im = Madd_32_16( complex_buf[( ( i * 2 ) + 1 )], complex_buf[2 * i], tmp ); /*Q(q+1) */ +#ifdef OPTIMIZE_FFT_STACK + re = Msub_32_16( spec[i].re, spec[i].im, tmp ); /*Q(q+1) */ + im = Madd_32_16( spec[i].im, spec[i].re, tmp ); /*Q(q+1) */ +#else + re = Msub_32_16( complex_buf[2 * i], complex_buf[( ( i * 2 ) + 1 )], tmp ); /*Q(q+1) */ + im = Madd_32_16( complex_buf[( ( i * 2 ) + 1 )], complex_buf[2 * i], tmp ); /*Q(q+1) */ +#endif y[2 * i] = L_add( Mult_32_16( re, edct_table[i] ), Mult_32_16( im, edct_table[( len1 - ( 1 + i ) )] ) ); /*Q(q+2)*/ move32(); y[( length - ( 1 + ( i * 2 ) ) )] = L_sub( Mult_32_16( re, edct_table[( len1 - ( 1 + i ) )] ), Mult_32_16( im, edct_table[i] ) ); /*Q(q+2)*/ move32(); } /*Q(q-2) */ +#ifdef HARMONIZE_DCTaa + IF( element_mode == EVS_MONO ) + { + *q = sub( 15 + 2, *q ); + } + ELSE + { + *q = sub( 31 + 2, *q ); + } +#else *q = sub( 15 + 2, *q ); +#ifdef HARMONIZE_DCT + IF( element_mode != EVS_MONO ) + { + *q = add( *q, Q16 ); + } +#endif +#endif move16(); + return; } +#ifndef HARMONIZE_DCT void edct_ivas_fx( const Word32 *x, /* i : input signal Qq */ Word32 *y, /* o : output transform Qq */ @@ -314,8 +380,11 @@ void edct_ivas_fx( *q = sub( 31 + 2, *q ); move16(); + return; } +#endif + /*-------------------------------------------------------------------------* * FUNCTION : edst_fx() * @@ -340,7 +409,11 @@ void edst_fx( Word32 re; Word32 im; const Word16 *edct_table; /*Q16 */ +#ifdef OPTIMIZE_FFT_STACK + cmplx complex_buf[L_FRAME_PLUS / 2]; +#else Word32 complex_buf[2 * ( L_FRAME48k / 2 + 240 )]; +#endif Word32 L_tmp; Word16 tmp; Word16 len1; @@ -350,25 +423,42 @@ void edst_fx( /* Twiddling and Pre-rotate */ FOR( i = 0; i < len1; i++ ) { - L_tmp = Mult_32_16( x[( length - ( 1 + ( i * 2 ) ) )], edct_table[i] ); /*Qq+1*/ - complex_buf[2 * i] = Madd_32_16( L_tmp, x[2 * i], edct_table[( len1 - ( 1 + i ) )] ); /*Qq+1*/ + L_tmp = Mult_32_16( x[( length - ( 1 + ( i * 2 ) ) )], edct_table[i] ); /*Qq+1*/ +#ifdef OPTIMIZE_FFT_STACK + complex_buf[i].re = Madd_32_16( L_tmp, x[2 * i], edct_table[( len1 - ( 1 + i ) )] ); /*Qq+1*/ +#else + complex_buf[2 * i] = Madd_32_16( L_tmp, x[2 * i], edct_table[( len1 - ( 1 + i ) )] ); /*Qq+1*/ +#endif move32(); - L_tmp = Mult_32_16( x[2 * i], edct_table[i] ); /*Qq+1*/ + L_tmp = Mult_32_16( x[2 * i], edct_table[i] ); /*Qq+1*/ +#ifdef OPTIMIZE_FFT_STACK + complex_buf[i].im = Msub_32_16( L_tmp, x[( length - ( 1 + ( i * 2 ) ) )], edct_table[( len1 - ( 1 + i ) )] ); /*Qq+1*/ +#else complex_buf[( ( i * 2 ) + 1 )] = Msub_32_16( L_tmp, x[( length - ( 1 + ( i * 2 ) ) )], edct_table[( len1 - ( 1 + i ) )] ); /*Qq+1*/ +#endif move32(); } *q = sub( 15, *q ); move16(); +#ifdef OPTIMIZE_FFT_STACK + BASOP_cfft( complex_buf, len1, q, y ); +#else BASOP_cfft( (cmplx *) complex_buf, len1, q, y ); +#endif tmp = div_s( 1, length ); /*Q15 */ tmp = round_fx( L_shl( L_mult( tmp, 19302 /*0.75f * EVS_PI in Q13*/ ), 2 ) ); /*Q15 */ FOR( i = 0; i < len1; i++ ) { - re = Msub_32_16( complex_buf[2 * i], complex_buf[( ( i * 2 ) + 1 )], tmp ); /*Qq+1*/ - im = Madd_32_16( complex_buf[( ( i * 2 ) + 1 )], complex_buf[2 * i], tmp ); /*Qq+1*/ +#ifdef OPTIMIZE_FFT_STACK + re = Msub_32_16( complex_buf[i].re, complex_buf[i].im, tmp ); /*Qq+1*/ + im = Madd_32_16( complex_buf[i].im, complex_buf[i].re, tmp ); /*Qq+1*/ +#else + re = Msub_32_16( complex_buf[2 * i], complex_buf[( ( i * 2 ) + 1 )], tmp ); /*Qq+1*/ + im = Madd_32_16( complex_buf[( ( i * 2 ) + 1 )], complex_buf[2 * i], tmp ); /*Qq+1*/ +#endif y[2 * i] = L_add( Mult_32_16( re, edct_table[i] ), Mult_32_16( im, edct_table[( len1 - ( 1 + i ) )] ) ); /*Qq+2*/ move32(); y[( length - ( 1 + ( i * 2 ) ) )] = L_sub( Mult_32_16( im, edct_table[i] ), Mult_32_16( re, edct_table[( len1 - ( 1 + i ) )] ) ); /*Qq+2*/ @@ -380,6 +470,8 @@ void edst_fx( return; } + + /*========================================================================*/ /* FUNCTION : edct_fx() */ /*------------------------------------------------------------------------*/ @@ -404,26 +496,35 @@ void edct_16fx( const Word16 *x, /* i : input signal Qx */ Word16 *y, /* o : output transform Qx */ Word16 length, /* i : length */ - Word16 bh, /* bit-headroom */ + Word16 bh /* bit-headroom */ +#ifndef HARMONIZE_DCT + , const Word16 element_mode - +#endif ) { Word16 i; Word16 re[L_FRAME48k / 2]; Word16 im[L_FRAME48k / 2]; const Word16 *edct_table = NULL; +#ifndef OPTIMIZE_FFT_STACK Word16 re2[L_FRAME48k / 2]; Word16 im2[L_FRAME48k / 2]; - +#endif Word32 L_tmp, Lacc, Lmax; +#ifdef OPTIMIZE_FFT_STACK + Word16 tmp, tmp_re, fact; +#else Word16 tmp, fact; +#endif Word16 Q_edct; Word16 Len2, i2; const Word16 *px, *pt; Word16 *py; +#ifndef HARMONIZE_DCT (void) element_mode; /*COMPLETE: some eDCT sub function are missing */ +#endif IF( EQ_16( length, L_FRAME32k ) ) { @@ -477,28 +578,48 @@ void edct_16fx( { i2 = shl( i, 1 ); - L_tmp = L_mult( x[i2], edct_table[i] ); /*Q(Qx+16) */ - Lacc = L_mac_sat( L_tmp, *px, *pt ); /*Q(Qx+16) */ + L_tmp = L_mult( x[i2], edct_table[i] ); /*Q(Qx+16) */ + Lacc = L_mac_sat( L_tmp, *px, *pt ); /*Q(Qx+16) */ +#ifdef OPTIMIZE_FFT_STACK + re[i] = round_fx_sat( L_shl_sat( Lacc, Q_edct ) ); /* Q(Qx+Q_edct) */ +#else re2[i] = round_fx_sat( L_shl_sat( Lacc, Q_edct ) ); /* Q(Qx+Q_edct) */ +#endif move16(); - L_tmp = L_mult( *px, edct_table[i] ); /*Q(Qx+16) */ - Lacc = L_msu_sat( L_tmp, x[i2], *pt ); /*Q(Qx+16) */ + L_tmp = L_mult( *px, edct_table[i] ); /*Q(Qx+16) */ + Lacc = L_msu_sat( L_tmp, x[i2], *pt ); /*Q(Qx+16) */ +#ifdef OPTIMIZE_FFT_STACK + im[i] = round_fx_sat( L_shl_sat( Lacc, Q_edct ) ); /* Q(Qx+Q_edct) */ +#else im2[i] = round_fx_sat( L_shl_sat( Lacc, Q_edct ) ); /* Q(Qx+Q_edct) */ +#endif move16(); px -= 2; pt--; } IF( EQ_16( length, L_FRAME32k ) ) { +#ifdef OPTIMIZE_FFT_STACK + DoRTFT320_16fx( re, im ); +#else DoRTFT320_16fx( re2, im2 ); +#endif } ELSE IF( EQ_16( length, L_FRAME ) ) { +#ifdef OPTIMIZE_FFT_STACK + DoRTFT128_16fx( re, im ); +#else DoRTFT128_16fx( re2, im2 ); +#endif } ELSE IF( EQ_16( length, L_FRAME16k ) ) { +#ifdef OPTIMIZE_FFT_STACK + DoRTFT160_16fx( re, im ); +#else DoRTFT160_16fx( re2, im2 ); +#endif } ELSE { @@ -508,6 +629,18 @@ void edct_16fx( fact = round_fx( L_shl( L_tmp, 2 ) ); /*Q15 */ FOR( i = 0; i < shr( length, 1 ); i++ ) { +#ifdef OPTIMIZE_FFT_STACK + tmp = mult_r( im[i], fact ); /*Q(Qx+Q_edct) */ + tmp_re = sub_sat( re[i], tmp ); /*Q(Qx+Q_edct) */ + move16(); + + tmp = mult_r( re[i], fact ); /*Q(Qx+Q_edct) */ + im[i] = add_sat( im[i], tmp ); /*Q(Qx+Q_edct) */ + move16(); + + re[i] = tmp_re; + move16(); +#else tmp = mult_r( im2[i], fact ); /*Q(Qx+Q_edct) */ re[i] = sub_sat( re2[i], tmp ); /*Q(Qx+Q_edct) */ move16(); @@ -515,6 +648,7 @@ void edct_16fx( tmp = mult_r( re2[i], fact ); /*Q(Qx+Q_edct) */ im[i] = add_sat( im2[i], tmp ); /*Q(Qx+Q_edct) */ move16(); +#endif } /* Post-rotate and obtain the output data */ @@ -567,7 +701,11 @@ void iedct_short_fx( seg_len_div4 = shr( segment_length, 2 ); /*Q0*/ seg_len_3mul_div4 = add( seg_len_div2, seg_len_div4 ); +#ifdef HARMONIZE_DCT + edct_fx( in, alias, seg_len_div2, Q, EVS_MONO ); +#else edct_fx( in, alias, seg_len_div2, Q ); +#endif FOR( i = 0; i < seg_len_div2; i++ ) { IF( alias[i] != 0 ) diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c index 6eca9930f..7af092cd3 100644 --- a/lib_com/fft_fx.c +++ b/lib_com/fft_fx.c @@ -109,11 +109,13 @@ static void fft5_8( Word16 n1, Word32 *zRe, Word32 *zIm, const Word16 *Idx ); static void fft4_5( Word32 *x, Word32 *y, const Word16 *Idx ); static void fft5_4( Word16 n1, Word32 *zRe, Word32 *zIm, const Word16 *Idx ); +#ifndef HARMONIZE_DCT void DoRTFTn_fx_ivas( Word32 *x, /* i/o: real part of input and output data */ Word32 *y, /* i/o: imaginary part of input and output data */ const Word16 n /* i : size of the FFT n=(2^k) up to 1024 */ ); +#endif /*-----------------------------------------------------------------* * fft15_shift2() * 15-point FFT with 2-point circular shift @@ -2630,6 +2632,7 @@ static void dctsub( return; } +#ifndef HARMONIZE_DCT /*-----------------------------------------------------------------* * edct2_fx_ivas() * @@ -2790,7 +2793,7 @@ void DoRTFTn_fx_ivas( return; } - +#endif #ifndef HQ_ALIGN_DUPLICATED_CODE void fft3_fx_ivas( const Word32 X[], // Qx @@ -4274,7 +4277,11 @@ void DoFFT_fx( DoRTFT320_fx( re2, im2 ); BREAK; case 256: +#ifdef HARMONIZE_DCT + DoRTFTn_fx( re2, im2, NULL, 256 ); +#else DoRTFTn_fx_ivas( re2, im2, 256 ); +#endif BREAK; case 240: DoRTFT240( re2, im2 ); @@ -4298,7 +4305,11 @@ void DoFFT_fx( DoRTFT80_fx( re2, im2 ); BREAK; case 64: +#ifdef HARMONIZE_DCT + DoRTFTn_fx( re2, im2, NULL, 64 ); +#else DoRTFTn_fx_ivas( re2, im2, 64 ); +#endif BREAK; case 40: DoRTFT40_fx( re2, im2 ); diff --git a/lib_com/gs_inact_switching_fx.c b/lib_com/gs_inact_switching_fx.c index d00b8e3c0..16e72cd58 100644 --- a/lib_com/gs_inact_switching_fx.c +++ b/lib_com/gs_inact_switching_fx.c @@ -89,7 +89,11 @@ void Inac_switch_ematch_fx( ELSE IF( EQ_16( coder_type, VOICED ) || EQ_16( coder_type, GENERIC ) || EQ_16( coder_type, TRANSITION ) || ( last_core != ACELP_CORE ) || NE_16( last_codec_mode, MODE1 ) || ( ( element_mode > EVS_MONO ) && EQ_16( coder_type, UNVOICED ) ) ) { /* Find spectrum and energy per band for GC and VC frames */ +#ifdef HARMONIZE_DCT + edct_16fx( exc2, dct_exc_tmp, L_frame, 5 ); +#else edct_16fx( exc2, dct_exc_tmp, L_frame, 5, element_mode ); +#endif Ener_per_band_comp_fx( dct_exc_tmp, Ener_per_bd, Q_exc, MBANDS_GN, 1, L_frame ); @@ -103,7 +107,11 @@ void Inac_switch_ematch_fx( ELSE IF( ( coder_type == INACTIVE ) && inactive_coder_type_flag ) { /* Find spectrum and energy per band for inactive frames */ +#ifdef HARMONIZE_DCT + edct_16fx( exc2, dct_exc_tmp, L_frame, 5 ); +#else edct_16fx( exc2, dct_exc_tmp, L_frame, 5, element_mode ); +#endif Ener_per_band_comp_fx( dct_exc_tmp, Ener_per_bd, Q_exc, MBANDS_GN, 1, L_frame ); @@ -188,7 +196,11 @@ void Inac_switch_ematch_fx( Scale_sig( dct_exc_tmp, 240, 1 ); // Q_exc Scale_sig( exc2, 240, 1 ); // Q_exc } +#ifdef HARMONIZE_DCT + edct_16fx( dct_exc_tmp, exc2, L_frame, 5 ); +#else edct_16fx( dct_exc_tmp, exc2, L_frame, 5, element_mode ); +#endif } return; diff --git a/lib_com/options.h b/lib_com/options.h index d4ace21b4..59f6694eb 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -96,6 +96,7 @@ #define HARMONIZE_2446_CON_TCX_FX /* FhG: basop issue: 2446 harmonization of function con_tcx_fx() */ #define FIX_2433_ARITH_OVERFLOW_IN_QMETA_ENC /* Nokia: Fix to convert non-converted binary operations */ #define OPTIMIZE_FFT_STACK +#define HARMONIZE_DCT /* #################### End BE switches ################################## */ diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index 2bfdf6935..e5379cefa 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -4014,16 +4014,22 @@ void edct_fx( const Word32 *x, /* i : i signal Qq */ Word32 *y, /* o : output transform Qq */ Word16 length, /* i : length */ - Word16 *q /* i : Q value of i signal */ +#ifdef HARMONIZE_DCT + Word16 *q, /* i : Q value of input signal */ + const Word16 element_mode /* i : element mode */ +#else + Word16 *q /* i : Q value of i signal */ +#endif ); +#ifndef HARMONIZE_DCT void edct_ivas_fx( const Word32 *x, /* i : input signal Qq */ Word32 *y, /* o : output transform Qq */ Word16 length, /* i : length Q0*/ Word16 *q /* i : Q value of input signal */ ); - +#endif void edst_fx( const Word32 *x, /* i : i signal Qq */ Word32 *y, /* o : output transform Qq */ @@ -4035,8 +4041,12 @@ void edct_16fx( const Word16 *x, /* i : i signal Qx */ Word16 *y, /* o : output transform Qx */ Word16 length, /* i : length */ - Word16 bh, /* bit-headroom */ - const Word16 element_mode ); + Word16 bh /* bit-headroom */ +#ifndef HARMONIZE_DCT + , + const Word16 element_mode +#endif +); void iedct_short_fx( const Word32 *in, /* i : i vector */ @@ -4138,12 +4148,13 @@ void rfft_fx( const Word16 isign /* i : sign */ ); +#ifndef HARMONIZE_DCT void DoRTFTn_fx_ivas( Word32 *x, /* i/o: real part of input and output data */ Word32 *y, /* i/o: imaginary part of input and output data */ const Word16 n /* i : size of the FFT up to 1024 */ ); - +#endif Word16 find_guarded_bits_fx( const Word32 n ); @@ -4160,6 +4171,7 @@ Flag is_zero_arr( Word32 *arr, Word16 size ); Flag is_zero_arr16( Word16 *arr, Word16 size ); Flag is_zero_arr64( Word64 *arr, Word16 size ); +#ifndef HARMONIZE_DCT void edct2_fx_ivas( const Word16 n, const Word16 isgn, @@ -4167,7 +4179,7 @@ void edct2_fx_ivas( Word32 *a, const Word16 *ip, const Word16 *w ); - +#endif void edct2_fx( Word16 n, Word16 isgn, diff --git a/lib_com/tcx_mdct_fx.c b/lib_com/tcx_mdct_fx.c index 4fd016729..d29a4ffed 100644 --- a/lib_com/tcx_mdct_fx.c +++ b/lib_com/tcx_mdct_fx.c @@ -138,7 +138,11 @@ void TCX_MDCT( *y_e = sub( 15, *y_e ); move16(); +#ifdef HARMONIZE_DCT + edct_fx( y, y, l / 2 + m + r / 2, y_e, EVS_MONO ); +#else edct_fx( y, y, l / 2 + m + r / 2, y_e ); +#endif *y_e = sub( 15 - 1, *y_e ); move16(); return; @@ -220,7 +224,11 @@ void TCX_MDCT_Inverse( R2 = shr( r, 1 ); x_e = sub( 15, x_e ); +#ifdef HARMONIZE_DCT + edct_fx( x, tmp_buf + L2, add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &x_e, EVS_MONO ); +#else edct_fx( x, tmp_buf + L2, add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &x_e ); +#endif x_e = sub( 15, x_e ); fac = TCX_MDCT_Inverse_GetScaleFactor( add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &fac_e ); /* exp(fac_e) */ diff --git a/lib_com/trans_direct_fx.c b/lib_com/trans_direct_fx.c index c84cd9efa..fa0a27a9a 100644 --- a/lib_com/trans_direct_fx.c +++ b/lib_com/trans_direct_fx.c @@ -103,7 +103,11 @@ void direct_transform_fx( Qs[0] = *Q; move16(); +#ifdef HARMONIZE_DCT + edct_fx( dctin32_fx, oseg_fx, segment_length2, &Qs[0], EVS_MONO ); +#else edct_fx( dctin32_fx, oseg_fx, segment_length2, &Qs[0] ); +#endif Qmin = s_min( Qs[0], Qmin ); iseg_fx = &in32_r16_fx[segment_length4]; @@ -136,7 +140,11 @@ void direct_transform_fx( Qs[seg] = *Q; move16(); +#ifdef HARMONIZE_DCT + edct_fx( dctin32_fx, oseg_fx, segment_length2, &Qs[seg], EVS_MONO ); +#else edct_fx( dctin32_fx, oseg_fx, segment_length2, &Qs[seg] ); +#endif Qmin = s_min( Qs[seg], Qmin ); iseg_fx += segment_length2; @@ -164,7 +172,11 @@ void direct_transform_fx( } Qs[NUM_TIME_SWITCHING_BLOCKS - 1] = *Q; move16(); +#ifdef HARMONIZE_DCT + edct_fx( dctin32_fx, oseg_fx, segment_length2, &Qs[NUM_TIME_SWITCHING_BLOCKS - 1], EVS_MONO ); +#else edct_fx( dctin32_fx, oseg_fx, segment_length2, &Qs[NUM_TIME_SWITCHING_BLOCKS - 1] ); +#endif Qmin = s_min( Qs[NUM_TIME_SWITCHING_BLOCKS - 1], Qmin ); *Q = Qmin; @@ -183,7 +195,11 @@ void direct_transform_fx( } ELSE { +#ifdef HARMONIZE_DCT + edct_fx( in32_fx, out32_fx, L, Q, EVS_MONO ); +#else edct_fx( in32_fx, out32_fx, L, Q ); +#endif } return; diff --git a/lib_com/trans_inv_fx.c b/lib_com/trans_inv_fx.c index 34d424f26..32e188f2d 100644 --- a/lib_com/trans_inv_fx.c +++ b/lib_com/trans_inv_fx.c @@ -1122,6 +1122,10 @@ void Inverse_Transform( } ELSE { +#ifdef HARMONIZE_DCT + edct_fx( in_mdct, out, L, Q, EVS_MONO ); +#else edct_fx( in_mdct, out, L, Q ); +#endif } } diff --git a/lib_dec/FEC_fx.c b/lib_dec/FEC_fx.c index fe2780a86..9674ab9c7 100644 --- a/lib_dec/FEC_fx.c +++ b/lib_dec/FEC_fx.c @@ -5,15 +5,20 @@ #include #include "options.h" /* Compilation switches */ #include "cnst.h" /* Common constants */ -#include "rom_com.h" /* Common static table prototypes */ +#include "rom_com.h" /* Common static table prototypes */ #include "rom_dec.h" /* Decoder static table prototypes */ #include "prot_fx.h" /* Function prototypes */ #include "basop_util.h" + + /*-------------------------------------------------------------------* * Local function prototypes *-------------------------------------------------------------------*/ + static void pulseRes_preCalc( Word16 *cond1, Word16 *cond2, Word32 *cond3, Word16 new_pit, Word16 Tc, Word16 L_frame ); void gain_dec_bfi_fx( Word16 *past_qua_en ); + + /*======================================================================*/ /* FUNCTION : FEC_exc_estim_fx() */ /*----------------------------------------------------------------------*/ @@ -47,7 +52,6 @@ void gain_dec_bfi_fx( Word16 *past_qua_en ); /* _ (Word16[]) voice_factors_fx : frame error rate Q15 */ /* _ (Word16[]) FEC_pitch_fx(tmp_tc): FEC pitch Q6 */ /*-----------------------------------------------------------------------*/ - /* _ (Word16) st_fx->lp_gainp_fx : FEC -low-pass filtered pitch gain Q14 */ /* _ (Word16) st_fx->seed :FEC-seed for random generator for excitation*/ /* _ (Word16) st_fx->bfi_pitch_fx : LP filter coefficient */ @@ -57,7 +61,6 @@ void gain_dec_bfi_fx( Word16 *past_qua_en ); /* _ None */ /*=======================================================================*/ - void FEC_exc_estim_fx( Decoder_State *st_fx, /* i/o: Decoder static memory */ const Word16 L_frame, /* i : length of the frame */ @@ -73,7 +76,6 @@ void FEC_exc_estim_fx( Word16 *tmp_noise /* o : long-term noise energy Q0 */ ) { - Word16 exc2_buf[L_FRAME16k + MODE1_L_FIR_FER - 1]; Word16 gainCNG, new_pit /*Q0*/; /* Q3*/ Word16 exp; @@ -152,7 +154,6 @@ void FEC_exc_estim_fx( move16(); } - pitch_pred_linear_fit( st_fx->nbLostCmpt, st_fx->last_good, @@ -170,13 +171,11 @@ void FEC_exc_estim_fx( new_pit /*Q0 int*/ = shl( round_fx( predPitchLag ), 0 ); } - /*-----------------------------------------------------------------* * estimate subframe pitch values for the FEC frame *-----------------------------------------------------------------*/ /* initialize pitch to the long-term pitch */ - *tmp_tc = st_fx->bfi_pitch_fx; move16(); /*Q6*/ IF( EQ_16( L_frame, L_FRAME ) ) @@ -473,7 +472,11 @@ void FEC_exc_estim_fx( move16(); /* Transform to frequency domain */ +#ifdef HARMONIZE_DCT + edct_16fx( exc, exc_dct_in, st_fx->L_frame, 5 ); +#else edct_16fx( exc, exc_dct_in, st_fx->L_frame, 5, st_fx->element_mode ); +#endif /* Reset unvaluable part of the adaptive (pitch) excitation contribution */ max_len = sub( st_fx->L_frame, Diff_len ); @@ -498,6 +501,7 @@ void FEC_exc_estim_fx( /*-----------------------------------------------------------------* * Replicate the last spectrum in case the last good frame was coded by GSC *-----------------------------------------------------------------*/ + test(); test(); test(); @@ -514,7 +518,11 @@ void FEC_exc_estim_fx( *tmp_noise = shr_r( st_fx->lp_gainc_fx, 3 ); /*Q0*/ move16(); /* Transform back to time domain */ +#ifdef HARMONIZE_DCT + edct_16fx( exc_dct_in, exc, st_fx->L_frame, 5 ); +#else edct_16fx( exc_dct_in, exc, st_fx->L_frame, 5, st_fx->element_mode ); +#endif } ELSE { @@ -739,12 +747,19 @@ void FEC_exc_estim_fx( move16(); st_fx->bfi_pitch_frame = st_fx->L_frame; move16(); + return; } /*calculates some conditions for Pulse resynchronization to take place*/ -static void pulseRes_preCalc( Word16 *cond1, Word16 *cond2, Word32 *cond3, Word16 new_pit, Word16 Tc, Word16 L_frame ) +static void pulseRes_preCalc( + Word16 *cond1, + Word16 *cond2, + Word32 *cond3, + Word16 new_pit, + Word16 Tc, + Word16 L_frame ) { Word16 tmp_pit, tmp_pit_e, tmp_frame, tmp_frame_e; Word32 tmp_pit2; @@ -773,8 +788,11 @@ static void pulseRes_preCalc( Word16 *cond1, Word16 *cond2, Word32 *cond3, Word1 BASOP_SATURATE_WARNING_ON_EVS *cond3 = L_sub( L_mult0( -1, tmp_pit ), tmp_pit2 ); move32(); + + return; } + /*-------------------------------------------------------------------* * gain_dec_bfi() * diff --git a/lib_dec/LD_music_post_filter_fx.c b/lib_dec/LD_music_post_filter_fx.c index fc3a94a77..989a47e8d 100644 --- a/lib_dec/LD_music_post_filter_fx.c +++ b/lib_dec/LD_music_post_filter_fx.c @@ -877,7 +877,11 @@ void Prep_music_postP_fx( * EDCT and back to 16 bits *------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( exc16, dct_buffer_out, DCT_L_POST, 6 ); +#else edct_16fx( exc16, dct_buffer_out, DCT_L_POST, 6, EVS_MONO ); +#endif *qdct = Q_exc; move16(); @@ -957,7 +961,11 @@ void Post_music_postP_fx( * Go back to time domain *------------------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( dct_buffer_in, exc16, DCT_L_POST, 6 ); +#else edct_16fx( dct_buffer_in, exc16, DCT_L_POST, 6, EVS_MONO ); +#endif Copy( exc16 + OFFSET2, exc2, L_FRAME ); diff --git a/lib_dec/core_switching_dec_fx.c b/lib_dec/core_switching_dec_fx.c index 001cfa975..e1db6fb06 100644 --- a/lib_dec/core_switching_dec_fx.c +++ b/lib_dec/core_switching_dec_fx.c @@ -111,7 +111,11 @@ void bw_switching_pre_proc_fx( * Calculate frequency energy of 0~3.2kHz and 3.2~6.4kHz the ACELP core synthesis *-------------------------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( old_syn_12k8_16k_fx, syn_dct_fx, L_FRAME, 6 ); +#else edct_16fx( old_syn_12k8_16k_fx, syn_dct_fx, L_FRAME, 6, st_fx->element_mode ); +#endif L_tmp = L_deposit_l( 0 ); FOR( i = 0; i < L_FRAME / 2; i++ ) diff --git a/lib_dec/dec_tcx_fx.c b/lib_dec/dec_tcx_fx.c index 0a0810349..7ee784876 100644 --- a/lib_dec/dec_tcx_fx.c +++ b/lib_dec/dec_tcx_fx.c @@ -2199,7 +2199,11 @@ void IMDCT_fx( Word32 *x, Word16 x_e, Word16 *old_syn_overl, Word16 *syn_Overl_T /* DCT */ Q = sub( 31, x_e ); +#ifdef HARMONIZE_DCT + edct_fx( x, tmp_buf, L_frame, &Q, EVS_MONO ); +#else edct_fx( x, tmp_buf, L_frame, &Q ); +#endif /* scale by sqrt(L / NORM_MDCT_FACTOR) */ tmp1 = mult_r( shl( L_frame, 4 ), 26214 /*128.f / NORM_MDCT_FACTOR Q15*/ ); /* 4Q11 */ @@ -2287,7 +2291,7 @@ void IMDCT_fx( Word32 *x, Word16 x_e, Word16 *old_syn_overl, Word16 *syn_Overl_T tmp8, fullbandScale ); } /* TRANSITION_OVERLAP */ - } /* TCX-20 and TCX-only */ + } /* TCX-20 and TCX-only */ /* Window and overlap-add past frame if past frame is TCX */ test(); @@ -2731,7 +2735,11 @@ static void TCX_MDCT_Inverse_qwin_fx( R2 = shr( r, 1 ); x_e = sub( 15, x_e ); +#ifdef HARMONIZE_DCT + edct_fx( x, tmp_buf + L2, add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &x_e, EVS_MONO ); +#else edct_fx( x, tmp_buf + L2, add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &x_e ); +#endif x_e = sub( 15, x_e ); fac = TCX_MDCT_Inverse_GetScaleFactor( add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &fac_e ); /* exp(fac_e) */ @@ -3323,7 +3331,11 @@ void IMDCT_ivas_fx( } ELSE { +#ifdef HARMONIZE_DCT + edct_fx( x_fx, xn_buf_fx_32 + add( shr( overlap, 1 ), nz ), L_frame, &q_xn_buf_fx_32, IVAS_SCE /* just cannot be EVS_MONO */ ); +#else edct_ivas_fx( x_fx, xn_buf_fx_32 + add( shr( overlap, 1 ), nz ), L_frame, &q_xn_buf_fx_32 ); +#endif Word16 res_m, res_e; res_e = 0; move16(); diff --git a/lib_dec/gs_dec_amr_wb_fx.c b/lib_dec/gs_dec_amr_wb_fx.c index 326c2f919..2670ebfef 100644 --- a/lib_dec/gs_dec_amr_wb_fx.c +++ b/lib_dec/gs_dec_amr_wb_fx.c @@ -450,10 +450,21 @@ void improv_amr_wb_gs_fx( * Do the excitation modification according to the content * Go back to time domain -> Overwrite exctiation *------------------------------------------------------------*/ + +#ifdef HARMONIZE_DCT + edct_16fx( exc2_fx, dct_exc_in_fx, L_FRAME, 6 ); +#else edct_16fx( exc2_fx, dct_exc_in_fx, L_FRAME, 6, EVS_MONO ); +#endif + gs_dec_amr_wb_fx( core_brate, seed_tcx, dct_exc_in_fx, Q_exc2, dct_exc_out_fx, Q_exc2, pitch_buf_fx, lt_voice_fac_fx, clas, coder_type ); +#ifdef HARMONIZE_DCT + edct_16fx( dct_exc_out_fx, exc2_fx, L_FRAME, 6 ); +#else edct_16fx( dct_exc_out_fx, exc2_fx, L_FRAME, 6, EVS_MONO ); +#endif + /*------------------------------------------------------------* * Redo core synthesis at 12k8 Hz with the modified excitation *------------------------------------------------------------*/ diff --git a/lib_dec/gs_dec_fx.c b/lib_dec/gs_dec_fx.c index d867298d1..e2aa1dcb1 100644 --- a/lib_dec/gs_dec_fx.c +++ b/lib_dec/gs_dec_fx.c @@ -358,7 +358,11 @@ void decod_audio_fx( * DCT transform *---------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( exc, dct_epit, st_fx->L_frame, 7 ); +#else edct_16fx( exc, dct_epit, st_fx->L_frame, 7, st_fx->element_mode ); +#endif /*---------------------------------------------------------------* * Reset unvaluable part of the adaptive (pitch) excitation contribution @@ -497,8 +501,13 @@ void decod_audio_fx( * iDCT transform *--------------------------------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( dct_epit, exc, st_fx->L_frame, 7 ); + edct_16fx( exc_wo_nf, exc_wo_nf, st_fx->L_frame, 7 ); +#else edct_16fx( dct_epit, exc, st_fx->L_frame, 7, st_fx->element_mode ); edct_16fx( exc_wo_nf, exc_wo_nf, st_fx->L_frame, 7, st_fx->element_mode ); +#endif /*----------------------------------------------------------------------* * Remove potential pre-echo in case an onset has been detected diff --git a/lib_dec/hf_synth_fx.c b/lib_dec/hf_synth_fx.c index 9c4807835..32fcb3311 100644 --- a/lib_dec/hf_synth_fx.c +++ b/lib_dec/hf_synth_fx.c @@ -716,7 +716,11 @@ void hf_synth_amr_wb_fx( Copy_Scale_sig_16_32_DEPREC( exc, exc32, L_FRAME, qdct ); /* Qexc + qdct */ qdct = add( qdct, Q_exc ); +#ifdef HARMONIZE_DCT + edct_fx( exc32, dct_exc32, L_FRAME, &qdct, EVS_MONO ); +#else edct_fx( exc32, dct_exc32, L_FRAME, &qdct ); +#endif q_tmp = Exp32Array( L_FRAME, dct_exc32 ); q_tmp = sub( q_tmp, 16 ); @@ -1006,7 +1010,11 @@ void hf_synth_amr_wb_fx( qhf = sub( q_tmp, 1 ); Copy_Scale_sig_16_32_DEPREC( dct_hb, dct_hb32, L_FRAME16k, qhf ); /* qhf + qdct */ qhf = add( qhf, qdct ); +#ifdef HARMONIZE_DCT + edct_fx( dct_hb32, exc16k32, L_FRAME16k, &qhf, EVS_MONO ); +#else edct_fx( dct_hb32, exc16k32, L_FRAME16k, &qhf ); +#endif q_tmp = Exp32Array( L_FRAME16k, exc16k32 ); q_tmp = sub( q_tmp, 16 ); Copy_Scale_sig_32_16( exc16k32, exc16k, L_FRAME16k, q_tmp ); /* qhf + qtmp */ diff --git a/lib_dec/ivas_td_low_rate_dec_fx.c b/lib_dec/ivas_td_low_rate_dec_fx.c index 916329e66..200443dbc 100644 --- a/lib_dec/ivas_td_low_rate_dec_fx.c +++ b/lib_dec/ivas_td_low_rate_dec_fx.c @@ -134,9 +134,14 @@ void tdm_low_rate_dec_fx( * iDCT transform *--------------------------------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( dct_epit, exc, L_FRAME, find_guarded_bits_fx( L_FRAME ) ); + edct_16fx( exc_wo_nf_fx, exc_wo_nf_fx, L_FRAME, find_guarded_bits_fx( L_FRAME ) ); +#else edct_16fx( dct_epit, exc, L_FRAME, find_guarded_bits_fx( L_FRAME ), IVAS_CPE_TD ); edct_16fx( exc_wo_nf_fx, exc_wo_nf_fx, L_FRAME, find_guarded_bits_fx( L_FRAME ), IVAS_CPE_TD ); +#endif IF( bwe_exc != NULL ) { diff --git a/lib_enc/bw_detect_fx.c b/lib_enc/bw_detect_fx.c index 2e04b986f..200ff9098 100644 --- a/lib_enc/bw_detect_fx.c +++ b/lib_enc/bw_detect_fx.c @@ -308,7 +308,11 @@ void bw_detect_fx( in_win32[i] = L_mult( *pt++, *pt1-- ); move32(); } +#ifdef HARMONIZE_DCT + edct_fx( in_win32, spect32, BWD_TOTAL_WIDTH, &Q_dct, EVS_MONO ); +#else edct_fx( in_win32, spect32, BWD_TOTAL_WIDTH, &Q_dct /*,st->element_mode*/ ); +#endif FOR( i = 0; i < BWD_TOTAL_WIDTH; i++ ) { diff --git a/lib_enc/cod_tcx_fx.c b/lib_enc/cod_tcx_fx.c index aff171740..5fd37f1f5 100644 --- a/lib_enc/cod_tcx_fx.c +++ b/lib_enc/cod_tcx_fx.c @@ -2490,7 +2490,11 @@ void QuantizeSpectrum_fx( /* DCT */ Q = sub( 31, *spectrum_e ); +#ifdef HARMONIZE_DCT + edct_fx( spectrum, tmp_buf, L_frame, &Q, EVS_MONO ); +#else edct_fx( spectrum, tmp_buf, L_frame, &Q ); +#endif /* scale by sqrt(L / NORM_MDCT_FACTOR) */ tmp1 = mult_r( shl( L_frame, 4 ), 26214 /*128.f / NORM_MDCT_FACTOR Q15*/ ); /* 4Q11 */ @@ -3722,7 +3726,11 @@ void coder_tcx_fx( Q = sub( Q, tmp2 ); /* DCT */ +#ifdef HARMONIZE_DCT + edct_fx( tmp_buf, spectrum, L_frame, &Q, EVS_MONO ); +#else edct_fx( tmp_buf, spectrum, L_frame, &Q ); +#endif *spectrum_e = sub( 31, Q ); move16(); } @@ -4408,7 +4416,11 @@ void InternalTCXDecoder_fx( /* DCT */ Q = sub( 31, *spectrum_e ); +#ifdef HARMONIZE_DCT + edct_fx( spectrum_fx, tmp_buf, L_frame, &Q, IVAS_SCE /* just cannot be EVS_MONO */ ); +#else edct_ivas_fx( spectrum_fx, tmp_buf, L_frame, &Q ); +#endif /* scale by sqrt(L / NORM_MDCT_FACTOR) */ tmp1 = mult_r( shl( L_frame, 4 ), 26214 /*128.f / NORM_MDCT_FACTOR Q15*/ ); /* 4Q11 */ diff --git a/lib_enc/ext_sig_ana_fx.c b/lib_enc/ext_sig_ana_fx.c index d7f64c4bc..436de8531 100644 --- a/lib_enc/ext_sig_ana_fx.c +++ b/lib_enc/ext_sig_ana_fx.c @@ -374,7 +374,11 @@ void core_signal_analysis_high_bitrate_fx( Q = sub( Q, tmp2 ); /* DCT */ +#ifdef HARMONIZE_DCT + edct_fx( tmp_buf, spectrum[frameno], L_subframe, &Q, EVS_MONO ); +#else edct_fx( tmp_buf, spectrum[frameno], L_subframe, &Q ); +#endif *spectrum_e = sub( 31, Q ); } ELSE @@ -951,7 +955,11 @@ void core_signal_analysis_high_bitrate_ivas_fx( Word16 Q; Q = q_out_wtda; +#ifdef HARMONIZE_DCT + edct_fx( tcx20Win_32, hTcxEnc->spectrum_fx[frameno], L_subframe, &Q, st->element_mode ); +#else edct_ivas_fx( tcx20Win_32, hTcxEnc->spectrum_fx[frameno], L_subframe, &Q ); +#endif hTcxEnc->spectrum_e[frameno] = sub( 31, Q ); move16(); diff --git a/lib_enc/gs_enc_fx.c b/lib_enc/gs_enc_fx.c index 493e5e8f3..db64b6345 100644 --- a/lib_enc/gs_enc_fx.c +++ b/lib_enc/gs_enc_fx.c @@ -255,8 +255,13 @@ void encod_audio_fx( * DCT transform *---------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( exc, dct_epit, st_fx->L_frame, 7 ); + edct_16fx( res, dct_res, st_fx->L_frame, 7 ); +#else edct_16fx( exc, dct_epit, st_fx->L_frame, 7, st_fx->element_mode ); edct_16fx( res, dct_res, st_fx->L_frame, 7, st_fx->element_mode ); +#endif /*---------------------------------------------------------------* * Calculate energy dynamics @@ -372,8 +377,13 @@ void encod_audio_fx( * iDCT transform *--------------------------------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( dct_epit, exc, st_fx->L_frame, 7 ); + edct_16fx( exc_wo_nf, exc_wo_nf, st_fx->L_frame, 7 ); +#else edct_16fx( dct_epit, exc, st_fx->L_frame, 7, st_fx->element_mode ); edct_16fx( exc_wo_nf, exc_wo_nf, st_fx->L_frame, 7, st_fx->element_mode ); +#endif IF( NE_16( st_fx->element_mode, EVS_MONO ) ) { diff --git a/lib_enc/ivas_td_low_rate_enc_fx.c b/lib_enc/ivas_td_low_rate_enc_fx.c index 8ca5a4a50..17ca57065 100644 --- a/lib_enc/ivas_td_low_rate_enc_fx.c +++ b/lib_enc/ivas_td_low_rate_enc_fx.c @@ -94,7 +94,11 @@ void tdm_low_rate_enc_fx( * DCT transform of the residual and create a subsample residual *---------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( res, dct_res_fx, L_FRAME, 7 ); +#else edct_16fx( res, dct_res_fx, L_FRAME, 7, st->element_mode ); +#endif /*--------------------------------------------------------------------------------------* * GSC encoder @@ -119,9 +123,14 @@ void tdm_low_rate_enc_fx( * iDCT transform *--------------------------------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( dct_epit_fx, exc_fx, L_FRAME, 7 ); + edct_16fx( exc_wo_nf_fx, exc_wo_nf_fx, L_FRAME, 7 ); +#else edct_16fx( dct_epit_fx, exc_fx, L_FRAME, 7, st->element_mode ); edct_16fx( exc_wo_nf_fx, exc_wo_nf_fx, L_FRAME, 7, st->element_mode ); +#endif /*--------------------------------------------------------------------------------------* * Remove potential pre-echo in case an onset has been detected diff --git a/lib_rend/ivas_reverb_fft_filter_fx.c b/lib_rend/ivas_reverb_fft_filter_fx.c index dcf13c153..edc4c0dca 100644 --- a/lib_rend/ivas_reverb_fft_filter_fx.c +++ b/lib_rend/ivas_reverb_fft_filter_fx.c @@ -100,7 +100,12 @@ static void fft_wrapper_2ch_fx( Word16 k, mirror_k; Word32 left_re_fx, left_im_fx, right_re_fx, right_im_fx; +#ifdef HARMONIZE_DCT + DoRTFTn_fx( buffer_L_fx, buffer_R_fx, NULL, fft_size ); +#else DoRTFTn_fx_ivas( buffer_L_fx, buffer_R_fx, fft_size ); +#endif + /* separating left and right channel spectra */ buffer_L_fx[0] = L_shl( buffer_L_fx[0], 1 ); // Qx + 1 move32(); @@ -167,10 +172,16 @@ static void ifft_wrapper_2ch_fx( move32(); } +#ifdef HARMONIZE_DCT + DoRTFTn_fx( buffer_L, buffer_R, NULL, fft_size ); +#else DoRTFTn_fx_ivas( buffer_L, buffer_R, fft_size ); +#endif return; } + + /*-----------------------------------------------------------------------------------------* * Function ivas_reverb_t2f_f2t_init() * diff --git a/lib_rend/ivas_reverb_filter_design_fx.c b/lib_rend/ivas_reverb_filter_design_fx.c index 6d23b0053..9d3d7af6c 100644 --- a/lib_rend/ivas_reverb_filter_design_fx.c +++ b/lib_rend/ivas_reverb_filter_design_fx.c @@ -206,7 +206,11 @@ static void calc_min_phase_fx( /* Convert back and isolate the phase. */ IF( LE_16( fft_size, 512 ) ) /* for size <= 512 using complex-value FFT (more effecient, but available only up to 512 size) */ { +#ifdef HARMONIZE_DCT + DoRTFTn_fx( pFolded_cepstrum_re, pFolded_cepstrum_im, NULL, fft_size ); +#else DoRTFTn_fx_ivas( pFolded_cepstrum_re, pFolded_cepstrum_im, fft_size ); +#endif /* Copying the img part into the output */ FOR( idx = 1; idx < half_fft_size; idx++ ) -- GitLab From ff53bfd7e17cef89bb5a1f85ab336b6a9029710d Mon Sep 17 00:00:00 2001 From: vaclav Date: Mon, 16 Mar 2026 18:27:43 +0100 Subject: [PATCH 03/14] clang-format --- lib_com/basop_util.c | 6 +++--- lib_com/basop_util.h | 4 ++-- lib_com/options.h | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index dd9bf4e09..77ac9fc97 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -763,9 +763,9 @@ Word16 getScaleFactor32( /* o: measured headroom in range [ #ifdef OPTIMIZE_FFT_STACK /* o: measured headroom in range [0..31], 0 if all x[i] == 0 */ -Word16 getScaleFactor32_cmplx( - cmplx *x, /* i: array containing 32-bit data */ - const Word16 len_x /* i: length of the array to scan */ +Word16 getScaleFactor32_cmplx( + cmplx *x, /* i: array containing 32-bit data */ + const Word16 len_x /* i: length of the array to scan */ ) { Word16 i, i_min, i_max; diff --git a/lib_com/basop_util.h b/lib_com/basop_util.h index 06d9ee759..b2290e453 100644 --- a/lib_com/basop_util.h +++ b/lib_com/basop_util.h @@ -238,8 +238,8 @@ Word16 getScaleFactor32( #ifdef OPTIMIZE_FFT_STACK /* o: measured headroom in range [0..31], 0 if all x[i] == 0 */ Word16 getScaleFactor32_cmplx( - cmplx *x, /* i: array containing 32-bit data */ - const Word16 len_x /* i: length of the array to scan */ + cmplx *x, /* i: array containing 32-bit data */ + const Word16 len_x /* i: length of the array to scan */ ); #endif diff --git a/lib_com/options.h b/lib_com/options.h index 59f6694eb..20fcfe925 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -95,8 +95,8 @@ #define FIX_1525_UNINIT_FORMAT_SWITCHING_DEC /* VA: float issue 1525: fix reading of uninitialized memory in format switching at the decoder */ #define HARMONIZE_2446_CON_TCX_FX /* FhG: basop issue: 2446 harmonization of function con_tcx_fx() */ #define FIX_2433_ARITH_OVERFLOW_IN_QMETA_ENC /* Nokia: Fix to convert non-converted binary operations */ -#define OPTIMIZE_FFT_STACK -#define HARMONIZE_DCT +#define OPTIMIZE_FFT_STACK /* VA: removal of intermediate FFT buffers */ +#define HARMONIZE_DCT /* VA: removal of duplicated DCT functions */ /* #################### End BE switches ################################## */ -- GitLab From 554d2d804ff02bb57ab1bff6df44f470397e6ab3 Mon Sep 17 00:00:00 2001 From: vaclav Date: Mon, 16 Mar 2026 18:38:40 +0100 Subject: [PATCH 04/14] clang-format --- lib_com/edct_fx.c | 28 ++++++++++++++-------------- lib_com/prot_fx.h | 2 +- lib_com/rom_com.h | 12 ++++++------ lib_com/rom_com_fx.c | 6 +++--- lib_dec/dec_tcx_fx.c | 2 +- 5 files changed, 25 insertions(+), 25 deletions(-) diff --git a/lib_com/edct_fx.c b/lib_com/edct_fx.c index 0d544182c..43a61d292 100644 --- a/lib_com/edct_fx.c +++ b/lib_com/edct_fx.c @@ -245,7 +245,7 @@ void edct_fx( #ifdef OPTIMIZE_FFT_STACK spec[i].im = Msub_32_16( L_tmp, x[( i * 2 )], edct_table[( len1 - ( 1 + i ) )] ); /*Q(q+1) */ #else - complex_buf[( ( i * 2 ) + 1 )] = Msub_32_16( L_tmp, x[( i * 2 )], edct_table[( len1 - ( 1 + i ) )] ); /*Q(q+1) */ + complex_buf[( ( i * 2 ) + 1 )] = Msub_32_16( L_tmp, x[( i * 2 )], edct_table[( len1 - ( 1 + i ) )] ); /*Q(q+1) */ #endif move32(); } @@ -427,7 +427,7 @@ void edst_fx( #ifdef OPTIMIZE_FFT_STACK complex_buf[i].re = Madd_32_16( L_tmp, x[2 * i], edct_table[( len1 - ( 1 + i ) )] ); /*Qq+1*/ #else - complex_buf[2 * i] = Madd_32_16( L_tmp, x[2 * i], edct_table[( len1 - ( 1 + i ) )] ); /*Qq+1*/ + complex_buf[2 * i] = Madd_32_16( L_tmp, x[2 * i], edct_table[( len1 - ( 1 + i ) )] ); /*Qq+1*/ #endif move32(); @@ -984,8 +984,8 @@ void edxt_fx( y[k] /*pt 1*/ = L_add( Mpy_32_16_1( spec[k].re, wRe ), Mpy_32_16_1( spec[k].im, wIm ) ); /*Qx*/ y[( length - k )] = L_sub( Mpy_32_16_1( spec[k].re, wIm ), Mpy_32_16_1( spec[k].im, wRe ) ); /*Qx*/ #else - y[k] /*pt 1*/ = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) ); /*Qx*/ - y[( length - k )] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) ); /*Qx*/ + y[k] /*pt 1*/ = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) ); /*Qx*/ + y[( length - k )] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) ); /*Qx*/ #endif move32(); move32(); @@ -993,7 +993,7 @@ void edxt_fx( #ifdef OPTIMIZE_FFT_STACK y[( length / 2 )] = Mpy_32_16_1( spec[( length / 2 )].re, INV_SQRT_2_Q15 ); /*Qx*/ #else - y[( length / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ + y[( length / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ #endif move32(); } @@ -1010,8 +1010,8 @@ void edxt_fx( y[( Nm1 - k )] = L_add( Mpy_32_16_1( spec[k].re, wRe ), Mpy_32_16_1( spec[k].im, wIm ) ); /*Qx*/ y[k - 1] = L_sub( Mpy_32_16_1( spec[k].re, wIm ), Mpy_32_16_1( spec[k].im, wRe ) ); /*Qx*/ #else - y[( Nm1 - k )] = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) ); /*Qx*/ - y[k - 1] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) ); /*Qx*/ + y[( Nm1 - k )] = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) ); /*Qx*/ + y[k - 1] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) ); /*Qx*/ #endif move32(); move32(); @@ -1019,7 +1019,7 @@ void edxt_fx( #ifdef OPTIMIZE_FFT_STACK y[( Nm1 / 2 )] = Mpy_32_16_1( spec[( length / 2 )].re, INV_SQRT_2_Q15 ); /*Qx*/ #else - y[( Nm1 / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ + y[( Nm1 / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ #endif move32(); } @@ -1027,7 +1027,7 @@ void edxt_fx( #ifdef OPTIMIZE_FFT_STACK y[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )] = L_shr( spec[0].re, 1 ); /*Qx*/ #else - y[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )] = L_shr( re[0], 1 ); /*Qx*/ + y[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )] = L_shr( re[0], 1 ); /*Qx*/ #endif move32(); } @@ -1046,8 +1046,8 @@ void edxt_fx( spec[k].re = L_add( Mpy_32_16_1( x[k], wRe ), Mpy_32_16_1( x[( length - k )], wIm ) ); /*Qx*/ spec[k].im = L_sub( Mpy_32_16_1( x[( length - k )], wRe ), Mpy_32_16_1( x[k], wIm ) ); /*Qx*/ #else - re[k] = L_add( Mpy_32_16_1( x[k], wRe ), Mpy_32_16_1( x[( length - k )], wIm ) ); /*Qx*/ - im[k] = L_sub( Mpy_32_16_1( x[( length - k )], wRe ), Mpy_32_16_1( x[k], wIm ) ); /*Qx*/ + re[k] = L_add( Mpy_32_16_1( x[k], wRe ), Mpy_32_16_1( x[( length - k )], wIm ) ); /*Qx*/ + im[k] = L_sub( Mpy_32_16_1( x[( length - k )], wRe ), Mpy_32_16_1( x[k], wIm ) ); /*Qx*/ #endif move32(); move32(); @@ -1081,7 +1081,7 @@ void edxt_fx( #ifdef OPTIMIZE_FFT_STACK spec[( length / 2 )].re = Mpy_32_16_1( x[( Nm1 / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ #else - re[( length / 2 )] = Mpy_32_16_1( x[( Nm1 / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ + re[( length / 2 )] = Mpy_32_16_1( x[( Nm1 / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ #endif move32(); } @@ -1090,7 +1090,7 @@ void edxt_fx( spec[0].re = x[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )]; /*Qx*/ spec[0].im = spec[( length / 2 )].im = 0; #else - re[0] = x[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )]; /*Qx*/ + re[0] = x[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )]; /*Qx*/ im[0] = im[( length / 2 )] = 0; #endif move32(); @@ -1158,7 +1158,7 @@ void edxt_fx( #ifdef OPTIMIZE_FFT_STACK y[2 * k] = spec[k].re; /*Qx*/ #else - y[2 * k] = re[k]; /*Qx*/ + y[2 * k] = re[k]; /*Qx*/ #endif move32(); IF( xSign != 0 ) diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index e5379cefa..207fd8234 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -4018,7 +4018,7 @@ void edct_fx( Word16 *q, /* i : Q value of input signal */ const Word16 element_mode /* i : element mode */ #else - Word16 *q /* i : Q value of i signal */ + Word16 *q /* i : Q value of i signal */ #endif ); diff --git a/lib_com/rom_com.h b/lib_com/rom_com.h index 42ede75fd..8f2ff78b6 100644 --- a/lib_com/rom_com.h +++ b/lib_com/rom_com.h @@ -1553,12 +1553,12 @@ extern const Word16 ivas_sine_panning_tbl_fx[601]; // Q15 extern const Word16 ivas_sin_az_fx[361]; // Q15 // edct_fx.c -extern const Word16 sin_scale_tbl_960[960]; // Q15 -extern const Word16 cos_scale_tbl_960[960]; // Q15 -extern const Word16 cos_scale_tbl_640[640]; // Q15 -extern const Word16 sin_scale_tbl_640[640]; // Q15 -extern const Word16 sin_scale_tbl_512[512]; // Q15 -extern const Word16 cos_scale_tbl_512[512]; // Q15 +extern const Word16 sin_scale_tbl_960[960]; // Q15 +extern const Word16 cos_scale_tbl_960[960]; // Q15 +extern const Word16 cos_scale_tbl_640[640]; // Q15 +extern const Word16 sin_scale_tbl_640[640]; // Q15 +extern const Word16 sin_scale_tbl_512[512]; // Q15 +extern const Word16 cos_scale_tbl_512[512]; // Q15 #ifndef OPTIMIZE_FFT_STACK extern const Word16 cos_scale_tbl_1200[1200]; // Q15 extern const Word16 sin_scale_tbl_1200[1200]; // Q15 diff --git a/lib_com/rom_com_fx.c b/lib_com/rom_com_fx.c index 760ccf9c0..81215f548 100644 --- a/lib_com/rom_com_fx.c +++ b/lib_com/rom_com_fx.c @@ -25778,9 +25778,9 @@ const Word16 mfreq_loc_div_25[] = { 7, 15, 31, 47, 63, 79, 95, 111, 127, 143, 15 /* % idx= 0 1 2 3 4 5 6 7; */ /* call with band_len_idx[sfm_size>>3] */ const Word16 band_len_idx[1 + ( MAX_SFM_LEN_FX / 8 )] = { - /*sfm/8*/ /*1*/ /*2 */ /*3 */ /*4 */ /*6 */ /*8 */ /*10*/ /*12*/ - -1, 0 /*8*/, 1 /*16*/, 2 /*24*/, 3 /*32*/, 4 /*40*/, 5 /*48 */, -1, 6 /*64 */, -1, 7 /*80*/, -1, 8 /*96*/ -}; // Q0 + /*sfm/8*/ /*1*/ /*2 */ /*3 */ /*4 */ /*6 */ /*8 */ /*10*/ /*12*/ + -1, 0 /*8*/, 1 /*16*/, 2 /*24*/, 3 /*32*/, 4 /*40*/, 5 /*48 */, -1, 6 /*64 */, -1, 7 /*80*/, -1, 8 /*96*/ +}; // Q0 const Word16 band_len_ener_shift[9] = { 1, 2, 2, 2, 3, 3, 3, 4 /*sfm==80*/, 4 /*sfm==96*/ }; // Q0 /* 96 requires 1 bit more than 48 */ const Word16 fine_gain_pred_sqrt_bw[9] = { 5793, 8192, 10033, 11585, 12953, 14189, 16384, 18318, 20066 }; /* (Q11) */ /* For extended frames in ACELP->HQ transitions in IVAS, map indices floor(sfms*1.25/8)=[1 2 3 5 7 10 12 15] from extended bws: */ diff --git a/lib_dec/dec_tcx_fx.c b/lib_dec/dec_tcx_fx.c index 7ee784876..113e72e98 100644 --- a/lib_dec/dec_tcx_fx.c +++ b/lib_dec/dec_tcx_fx.c @@ -2291,7 +2291,7 @@ void IMDCT_fx( Word32 *x, Word16 x_e, Word16 *old_syn_overl, Word16 *syn_Overl_T tmp8, fullbandScale ); } /* TRANSITION_OVERLAP */ - } /* TCX-20 and TCX-only */ + } /* TCX-20 and TCX-only */ /* Window and overlap-add past frame if past frame is TCX */ test(); -- GitLab From 113b4417b4fc9d8524f554dedb02ff6b4689319c Mon Sep 17 00:00:00 2001 From: vaclav Date: Mon, 16 Mar 2026 18:46:10 +0100 Subject: [PATCH 05/14] clang-format --- lib_com/edct_fx.c | 16 +++------------- lib_com/rom_com_fx.c | 4 ++-- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/lib_com/edct_fx.c b/lib_com/edct_fx.c index 43a61d292..b6078e295 100644 --- a/lib_com/edct_fx.c +++ b/lib_com/edct_fx.c @@ -297,25 +297,15 @@ void edct_fx( move32(); } /*Q(q-2) */ -#ifdef HARMONIZE_DCTaa - IF( element_mode == EVS_MONO ) - { - *q = sub( 15 + 2, *q ); - } - ELSE - { - *q = sub( 31 + 2, *q ); - } -#else *q = sub( 15 + 2, *q ); + move16(); #ifdef HARMONIZE_DCT IF( element_mode != EVS_MONO ) { *q = add( *q, Q16 ); + move16(); } #endif -#endif - move16(); return; } @@ -1055,7 +1045,7 @@ void edxt_fx( #ifdef OPTIMIZE_FFT_STACK spec[( length / 2 )].re = Mpy_32_16_1( x[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ #else - re[( length / 2 )] = Mpy_32_16_1( x[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ + re[( length / 2 )] = Mpy_32_16_1( x[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ #endif move32(); } diff --git a/lib_com/rom_com_fx.c b/lib_com/rom_com_fx.c index 81215f548..7139829a3 100644 --- a/lib_com/rom_com_fx.c +++ b/lib_com/rom_com_fx.c @@ -25778,8 +25778,8 @@ const Word16 mfreq_loc_div_25[] = { 7, 15, 31, 47, 63, 79, 95, 111, 127, 143, 15 /* % idx= 0 1 2 3 4 5 6 7; */ /* call with band_len_idx[sfm_size>>3] */ const Word16 band_len_idx[1 + ( MAX_SFM_LEN_FX / 8 )] = { - /*sfm/8*/ /*1*/ /*2 */ /*3 */ /*4 */ /*6 */ /*8 */ /*10*/ /*12*/ - -1, 0 /*8*/, 1 /*16*/, 2 /*24*/, 3 /*32*/, 4 /*40*/, 5 /*48 */, -1, 6 /*64 */, -1, 7 /*80*/, -1, 8 /*96*/ + /*sfm/8*/ /*1*/ /*2 */ /*3 */ /*4 */ /*6 */ /*8 */ /*10*/ /*12*/ + -1, 0 /*8*/, 1 /*16*/, 2 /*24*/, 3 /*32*/, 4 /*40*/, 5 /*48 */, -1, 6 /*64 */, -1, 7 /*80*/, -1, 8 /*96*/ }; // Q0 const Word16 band_len_ener_shift[9] = { 1, 2, 2, 2, 3, 3, 3, 4 /*sfm==80*/, 4 /*sfm==96*/ }; // Q0 /* 96 requires 1 bit more than 48 */ const Word16 fine_gain_pred_sqrt_bw[9] = { 5793, 8192, 10033, 11585, 12953, 14189, 16384, 18318, 20066 }; /* (Q11) */ -- GitLab From c8646f386627582c912d03052c4295f5d982ca4e Mon Sep 17 00:00:00 2001 From: vaclav Date: Mon, 16 Mar 2026 18:57:23 +0100 Subject: [PATCH 06/14] HARMONIZE_DCT - remove unused static functions --- lib_com/fft_fx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c index 7af092cd3..a3582a846 100644 --- a/lib_com/fft_fx.c +++ b/lib_com/fft_fx.c @@ -2440,6 +2440,7 @@ static void cftmdl( return; } +#ifndef HARMONIZE_DCT static void cftbsub( Word16 n, // Q0 Word32 *a, // Qx @@ -2632,7 +2633,6 @@ static void dctsub( return; } -#ifndef HARMONIZE_DCT /*-----------------------------------------------------------------* * edct2_fx_ivas() * -- GitLab From e6bc3d6356f936ff369fe6861697e10e59114c5f Mon Sep 17 00:00:00 2001 From: vaclav Date: Mon, 16 Mar 2026 19:38:52 +0100 Subject: [PATCH 07/14] fix + OPTIMIZE_FFT_STACK --- lib_com/edct_fx.c | 7 ++--- lib_com/scale_mem_fx.c | 4 +-- lib_enc/ivas_mdct_core_enc_fx.c | 55 +++++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 6 deletions(-) diff --git a/lib_com/edct_fx.c b/lib_com/edct_fx.c index b6078e295..bccf098bf 100644 --- a/lib_com/edct_fx.c +++ b/lib_com/edct_fx.c @@ -193,7 +193,7 @@ static Word16 const *get_edct_table( /*-------------------------------------------------------------------------* * FUNCTION : edct_fx() * - * PURPOSE : DCT transform + * PURPOSE : DCT transform, 32-bit version * * INPUT ARGUMENTS : * _ (Word16) length : length @@ -463,9 +463,9 @@ void edst_fx( /*========================================================================*/ -/* FUNCTION : edct_fx() */ +/* FUNCTION : edct_16fx() */ /*------------------------------------------------------------------------*/ -/* PURPOSE : DCT transform */ +/* PURPOSE : DCT transform, 32-bit version */ /*------------------------------------------------------------------------*/ /* INPUT ARGUMENTS : */ /* _ (Word16) length : length */ @@ -477,7 +477,6 @@ void edst_fx( /* OUTPUT ARGUMENTS : */ /* _ (Word16[]) y : output transform Qx */ /*------------------------------------------------------------------------*/ - /*------------------------------------------------------------------------*/ /* RETURN ARGUMENTS : */ /* _ None */ diff --git a/lib_com/scale_mem_fx.c b/lib_com/scale_mem_fx.c index 3df645e6a..13118dada 100644 --- a/lib_com/scale_mem_fx.c +++ b/lib_com/scale_mem_fx.c @@ -353,13 +353,13 @@ void scale_sig32_cmplx( move32(); if ( 0 == exp0 ) { - i = lg; + break; } x[i].im = L_shl( x[i].im, exp0 ); move32(); if ( 0 == exp0 ) { - i = lg; + break; } } diff --git a/lib_enc/ivas_mdct_core_enc_fx.c b/lib_enc/ivas_mdct_core_enc_fx.c index 96777796c..516ced089 100644 --- a/lib_enc/ivas_mdct_core_enc_fx.c +++ b/lib_enc/ivas_mdct_core_enc_fx.c @@ -1117,6 +1117,55 @@ void enc_prm_igf_mdct( return; } +#ifdef OPTIMIZE_FFT_STACK +/*-------------------------------------------------------------------* + * compute_power_spec() + * + * + *-------------------------------------------------------------------*/ + +static void compute_power_spec( + TCX_ENC_HANDLE hTcxEnc, + Word32 *mdst_spectrum_fx[NB_DIV], + Word32 powerSpec_fx[N_MAX], + Word16 *q_pow, + const Word16 n, + const Word16 L_subframeTCX ) +{ + Word16 i; + Word64 powerSpec_fx64[N_MAX]; + + IF( hTcxEnc->fUseTns[n] ) + { + FOR( i = 0; i < L_subframeTCX; i++ ) + { + powerSpec_fx64[i] = W_mult_32_32( hTcxEnc->spectrum_fx[n][i], hTcxEnc->spectrum_fx[n][i] ); + move64(); + } + *q_pow = W_norm_arr( powerSpec_fx64, L_subframeTCX ); + } + ELSE + { + FOR( i = 0; i < L_subframeTCX; i++ ) + { + powerSpec_fx64[i] = W_mac_32_32( W_mult_32_32( mdst_spectrum_fx[n][i], mdst_spectrum_fx[n][i] ), hTcxEnc->spectrum_fx[n][i], hTcxEnc->spectrum_fx[n][i] ); + move64(); + } + *q_pow = W_norm_arr( powerSpec_fx64, L_subframeTCX ); + } + + FOR( i = 0; i < L_subframeTCX; i++ ) + { + powerSpec_fx64[i] = W_shl( powerSpec_fx64[i], *q_pow ); + move64(); + powerSpec_fx[i] = W_extract_h( powerSpec_fx64[i] ); + move32(); + } + + return; +} +#endif + /*-------------------------------------------------------------------* * ivas_mdct_core_whitening_enc() * @@ -1152,7 +1201,9 @@ void ivas_mdct_core_whitening_enc_fx( Word32 temp_buffer[15 * L_FRAME48k / 8]; Word32 *windowedSignal_fx[CPE_CHANNELS]; Word32 *powerSpec_fx = orig_spectrum_long[0]; +#ifndef OPTIMIZE_FFT_STACK Word64 powerSpec_fx64[N_MAX]; +#endif Word16 nrg_fx; /* Q15 */ Encoder_State *st, **sts; Word32 scf_fx[CPE_CHANNELS][NB_DIV][M]; @@ -1920,6 +1971,9 @@ void ivas_mdct_core_whitening_enc_fx( move16(); FOR( n = 0; n < nSubframes; n++ ) { +#ifdef OPTIMIZE_FFT_STACK + compute_power_spec( st->hTcxEnc, mdst_spectrum_fx[ch], powerSpec_fx, &q_pow, n, L_subframeTCX ); +#else IF( st->hTcxEnc->fUseTns[n] ) { FOR( i = 0; i < L_subframeTCX; i++ ) @@ -1946,6 +2000,7 @@ void ivas_mdct_core_whitening_enc_fx( powerSpec_fx[i] = W_extract_h( powerSpec_fx64[i] ); move32(); } +#endif IF( mct_on ) { FOR( i = 0; i < L_subframeTCX; i++ ) -- GitLab From 6a1be2331936a1b95e72be3b9841058e7925da4e Mon Sep 17 00:00:00 2001 From: vaclav Date: Mon, 16 Mar 2026 20:00:26 +0100 Subject: [PATCH 08/14] fix --- lib_com/scale_mem_fx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_com/scale_mem_fx.c b/lib_com/scale_mem_fx.c index 13118dada..4f095703a 100644 --- a/lib_com/scale_mem_fx.c +++ b/lib_com/scale_mem_fx.c @@ -353,13 +353,13 @@ void scale_sig32_cmplx( move32(); if ( 0 == exp0 ) { - break; + BREAK; } x[i].im = L_shl( x[i].im, exp0 ); move32(); if ( 0 == exp0 ) { - break; + BREAK; } } -- GitLab From 4b025984bff9abf60b709cd7bd4f1218786e4d98 Mon Sep 17 00:00:00 2001 From: vaclav Date: Tue, 17 Mar 2026 09:28:59 +0100 Subject: [PATCH 09/14] fix in DoRTFTn_fx() --- lib_com/fft_fx_evs.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/lib_com/fft_fx_evs.c b/lib_com/fft_fx_evs.c index c68b21650..be4a8df83 100644 --- a/lib_com/fft_fx_evs.c +++ b/lib_com/fft_fx_evs.c @@ -32,11 +32,6 @@ static void fft64_16fx( Word16 *x, Word16 *y, const Word16 *Idx ); static void fft5_32_16fx( Word16 *zRe, Word16 *zIm, const Word16 *Idx ); static void cdftForw_16fx( Word16 n, Word16 *a, const Word16 *ip, const Word32 *w ); -#include "math_32.h" - -/*-----------------------------------------------------------------* - * Local functions - *-----------------------------------------------------------------*/ static void cdftForw_fx( Word16 n, Word32 *a, const Word16 *ip, const Word16 *w ); static void bitrv2_SR_fx( Word16 n, const Word16 *ip, Word32 *a ); static void cftfsub_fx( Word16 n, Word32 *a, const Word16 *w ); @@ -44,6 +39,12 @@ static void cft1st_fx( Word16 n, Word32 *a, const Word16 *w ); static void cftmdl_fx( Word16 n, Word16 l, Word32 *a, const Word16 *w ); +/*-----------------------------------------------------------------* + * DoRTFTn_fx() + * + * + *-----------------------------------------------------------------*/ + void DoRTFTn_fx( Word32 *x, /* i/o : real part of input and output data Q(x) */ Word32 *y, /* i/o : imaginary part of input and output data Q(x) */ @@ -113,7 +114,22 @@ void DoRTFTn_fx( } #ifdef OPTIMIZE_FFT_STACK - IF( spec == NULL ) + IF( spec != NULL ) + { + spec[0].re = z[0]; + move16(); + spec[0].im = z[1]; + move16(); + pt = &z[2]; + FOR( i = n - 1; i >= 1; i-- ) + { + spec[i].re = *pt++; + move16(); + spec[i].im = *pt++; + move16(); + } + } + ELSE { #endif x[0] = z[0]; -- GitLab From e53b91a99ac331c2c8d31f2a7f0c724edb169ce8 Mon Sep 17 00:00:00 2001 From: vaclav Date: Tue, 17 Mar 2026 11:06:47 +0100 Subject: [PATCH 10/14] editorial change --- lib_com/fft_fx_evs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib_com/fft_fx_evs.c b/lib_com/fft_fx_evs.c index be4a8df83..37f27fbe7 100644 --- a/lib_com/fft_fx_evs.c +++ b/lib_com/fft_fx_evs.c @@ -10,7 +10,7 @@ #include /*-----------------------------------------------------------------* - * Local functions + * Local constants *-----------------------------------------------------------------*/ #define FFT3_ONE_THIRD 21845 /* 1/3 in Q16 */ @@ -19,6 +19,10 @@ #define KP951056516_16FX 2042378325 /* EDCT & EMDCT constants Q31*/ #define KP587785252_16FX 1262259213 /* EDCT & EMDCT constants Q31*/ +/*-----------------------------------------------------------------* + * Local function prototypes + *-----------------------------------------------------------------*/ + static void fft5_shift4_16fx( Word16 n1, Word16 *zRe, Word16 *zIm, const Word16 *Idx ); static void fft64_16fx( Word16 *x, Word16 *y, const Word16 *Idx ); static void fft32_5_16fx( Word16 *x, Word16 *y, const Word16 *Idx ); -- GitLab From 69bd8cc40c0e9c63af9e3226ba50bc4c6989e870 Mon Sep 17 00:00:00 2001 From: vaclav Date: Tue, 17 Mar 2026 13:11:51 +0100 Subject: [PATCH 11/14] revert harmonization of DoRTFTn_fx_ivas() --- lib_com/fft_fx.c | 12 ++---------- lib_com/fft_fx_evs.c | 3 ++- lib_com/prot_fx.h | 4 +--- lib_rend/ivas_reverb_fft_filter_fx.c | 8 -------- lib_rend/ivas_reverb_filter_design_fx.c | 4 ---- 5 files changed, 5 insertions(+), 26 deletions(-) diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c index a3582a846..4182a350b 100644 --- a/lib_com/fft_fx.c +++ b/lib_com/fft_fx.c @@ -2736,6 +2736,7 @@ void edct2_fx_ivas( } } } +#endif void DoRTFTn_fx_ivas( Word32 *x, /* i/o: real part of input and output data Qx */ @@ -2743,7 +2744,6 @@ void DoRTFTn_fx_ivas( const Word16 n /* i : size of the FFT up to 1024 Q0*/ ) { - Word16 i; Word32 z[2048]; @@ -2793,7 +2793,7 @@ void DoRTFTn_fx_ivas( return; } -#endif + #ifndef HQ_ALIGN_DUPLICATED_CODE void fft3_fx_ivas( const Word32 X[], // Qx @@ -4277,11 +4277,7 @@ void DoFFT_fx( DoRTFT320_fx( re2, im2 ); BREAK; case 256: -#ifdef HARMONIZE_DCT - DoRTFTn_fx( re2, im2, NULL, 256 ); -#else DoRTFTn_fx_ivas( re2, im2, 256 ); -#endif BREAK; case 240: DoRTFT240( re2, im2 ); @@ -4305,11 +4301,7 @@ void DoFFT_fx( DoRTFT80_fx( re2, im2 ); BREAK; case 64: -#ifdef HARMONIZE_DCT - DoRTFTn_fx( re2, im2, NULL, 64 ); -#else DoRTFTn_fx_ivas( re2, im2, 64 ); -#endif BREAK; case 40: DoRTFT40_fx( re2, im2 ); diff --git a/lib_com/fft_fx_evs.c b/lib_com/fft_fx_evs.c index 37f27fbe7..a17ff1832 100644 --- a/lib_com/fft_fx_evs.c +++ b/lib_com/fft_fx_evs.c @@ -55,7 +55,7 @@ void DoRTFTn_fx( #ifdef OPTIMIZE_FFT_STACK cmplx *spec, /* i/o : complex input and output data */ #endif - const Word16 n /* i : size of the FFT up to 1024 */ + const Word16 n /* i : size of the FFT up to 1024 */ ) { Word16 i; @@ -151,6 +151,7 @@ void DoRTFTn_fx( #ifdef OPTIMIZE_FFT_STACK } #endif + return; } diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index 207fd8234..089a7119e 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -4073,7 +4073,7 @@ void DoRTFTn_fx( #ifdef OPTIMIZE_FFT_STACK cmplx *spec, /* i/o : complex input and output data */ #endif - const Word16 n /* i : size of the FFT up to 1024 */ + const Word16 n /* i : size of the FFT up to 1024 */ ); void DoRTFT480_fx( @@ -4148,13 +4148,11 @@ void rfft_fx( const Word16 isign /* i : sign */ ); -#ifndef HARMONIZE_DCT void DoRTFTn_fx_ivas( Word32 *x, /* i/o: real part of input and output data */ Word32 *y, /* i/o: imaginary part of input and output data */ const Word16 n /* i : size of the FFT up to 1024 */ ); -#endif Word16 find_guarded_bits_fx( const Word32 n ); diff --git a/lib_rend/ivas_reverb_fft_filter_fx.c b/lib_rend/ivas_reverb_fft_filter_fx.c index edc4c0dca..6f4e3052d 100644 --- a/lib_rend/ivas_reverb_fft_filter_fx.c +++ b/lib_rend/ivas_reverb_fft_filter_fx.c @@ -100,11 +100,7 @@ static void fft_wrapper_2ch_fx( Word16 k, mirror_k; Word32 left_re_fx, left_im_fx, right_re_fx, right_im_fx; -#ifdef HARMONIZE_DCT - DoRTFTn_fx( buffer_L_fx, buffer_R_fx, NULL, fft_size ); -#else DoRTFTn_fx_ivas( buffer_L_fx, buffer_R_fx, fft_size ); -#endif /* separating left and right channel spectra */ buffer_L_fx[0] = L_shl( buffer_L_fx[0], 1 ); // Qx + 1 @@ -172,11 +168,7 @@ static void ifft_wrapper_2ch_fx( move32(); } -#ifdef HARMONIZE_DCT - DoRTFTn_fx( buffer_L, buffer_R, NULL, fft_size ); -#else DoRTFTn_fx_ivas( buffer_L, buffer_R, fft_size ); -#endif return; } diff --git a/lib_rend/ivas_reverb_filter_design_fx.c b/lib_rend/ivas_reverb_filter_design_fx.c index 9d3d7af6c..6d23b0053 100644 --- a/lib_rend/ivas_reverb_filter_design_fx.c +++ b/lib_rend/ivas_reverb_filter_design_fx.c @@ -206,11 +206,7 @@ static void calc_min_phase_fx( /* Convert back and isolate the phase. */ IF( LE_16( fft_size, 512 ) ) /* for size <= 512 using complex-value FFT (more effecient, but available only up to 512 size) */ { -#ifdef HARMONIZE_DCT - DoRTFTn_fx( pFolded_cepstrum_re, pFolded_cepstrum_im, NULL, fft_size ); -#else DoRTFTn_fx_ivas( pFolded_cepstrum_re, pFolded_cepstrum_im, fft_size ); -#endif /* Copying the img part into the output */ FOR( idx = 1; idx < half_fft_size; idx++ ) -- GitLab From cb17b506aed0ef21c9ecd19d23fceed56a5347bb Mon Sep 17 00:00:00 2001 From: vaclav Date: Tue, 17 Mar 2026 14:36:23 +0100 Subject: [PATCH 12/14] harmonize fft_fx --- lib_com/edct_fx.c | 6 ++-- lib_com/fft_fx.c | 65 +++++++++++++++++++++++++++++++++--- lib_com/ivas_mdft_imdft_fx.c | 58 ++++++++++++++++++++++++++++++++ lib_com/prot_fx.h | 10 +++--- 4 files changed, 125 insertions(+), 14 deletions(-) diff --git a/lib_com/edct_fx.c b/lib_com/edct_fx.c index bccf098bf..84793d4fd 100644 --- a/lib_com/edct_fx.c +++ b/lib_com/edct_fx.c @@ -273,7 +273,7 @@ void edct_fx( tmp = sub( getScaleFactor32_cmplx( spec, len1 ), find_guarded_bits_fx( len1 ) ); scale_sig32_cmplx( spec, len1, tmp ); - fft_cmplx_fx( spec, len1 ); + fft_fx( spec, len1 ); *q = sub( *q, tmp ); move16(); @@ -954,7 +954,7 @@ void edxt_fx( ELSE /* fft() doesn't support 512 */ { #ifdef OPTIMIZE_FFT_STACK - fft_cmplx_fx( spec, length ); + fft_fx( spec, length ); #else fft_fx( re, im, length, 1 ); #endif @@ -1136,7 +1136,7 @@ void edxt_fx( ELSE /* fft() doesn't support 512 */ { #ifdef OPTIMIZE_FFT_STACK - fft_cmplx_fx( spec, length ); + fft_fx( spec, length ); #else fft_fx( re, im, length, 1 ); #endif diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c index 4182a350b..ac241e86a 100644 --- a/lib_com/fft_fx.c +++ b/lib_com/fft_fx.c @@ -6928,6 +6928,7 @@ static void fft_lenN( * Complex-value FFT *-----------------------------------------------------------------*/ +#ifndef HARMONIZE_DCT void fft_fx( Word32 *re, /* i/o: real part Qx */ Word32 *im, /* i/o: imag part Qx */ @@ -7013,10 +7014,8 @@ void fft_fx( return; } - - -#ifdef OPTIMIZE_FFT_STACK -void fft_cmplx_fx( +#else +void fft_fx( cmplx *x, /* i/o: complex data */ const Word16 length /* i : length of fft */ ) @@ -7093,6 +7092,9 @@ void rfft_fx( Word16 i, sizeOfFft2, sizeOfFft4; Word32 tmp, t1, t2, t3, t4; Word16 s1, s2; +#ifdef HARMONIZE_DCT + cmplx spec[L_FRAME48k]; +#endif sizeOfFft2 = shr( length, 1 ); sizeOfFft4 = shr( length, 2 ); @@ -7163,10 +7165,43 @@ void rfft_fx( SWITCH( isign ) { - case -1: +#ifdef HARMONIZE_DCT + FOR( i = 0; i < sizeOfFft2; i++ ) + { + spec[i].re = x[2 * i]; + move32(); + spec[i].im = x[2 * i + 1]; + move32(); + } + + fft_fx( spec, sizeOfFft2 ); + + FOR( i = 0; i < sizeOfFft4; i++ ) + { + x[2 * i] = spec[i].re; + move32(); + x[2 * i + 1] = spec[sizeOfFft2 - i - 1].re; + move32(); + + x[2 * i] = spec[i].im; + move32(); + x[2 * i + 1] = L_negate( spec[sizeOfFft2 - i - 1].im ); + move32(); + } + + FOR( i = 0; i < sizeOfFft2; i++ ) + { + x[2 * i] = spec[i].re; + move32(); + x[2 * i + 1] = spec[i].im; + move32(); + } +#else fft_fx( x, x + 1, sizeOfFft2, 2 ); +#endif + // Qx tmp = L_add( x[0], x[1] ); x[1] = L_sub( x[0], x[1] ); // Qx @@ -7222,7 +7257,27 @@ void rfft_fx( move32(); } +#ifdef HARMONIZE_DCT + FOR( i = 0; i < sizeOfFft2; i++ ) + { + spec[i].re = x[2 * i]; + move32(); + spec[i].im = x[2 * i + 1]; + move32(); + } + + fft_fx( spec, sizeOfFft2 ); + + FOR( i = 0; i < sizeOfFft2; i++ ) + { + x[2 * i] = spec[i].re; + move32(); + x[2 * i + 1] = spec[i].im; + move32(); + } +#else fft_fx( x, x + 1, sizeOfFft2, 2 ); +#endif FOR( i = 0; i < length; i += 2 ) { diff --git a/lib_com/ivas_mdft_imdft_fx.c b/lib_com/ivas_mdft_imdft_fx.c index 49d1cbbf1..19774e712 100644 --- a/lib_com/ivas_mdft_imdft_fx.c +++ b/lib_com/ivas_mdft_imdft_fx.c @@ -214,7 +214,29 @@ static void ivas_ifft_cplx1_fx( move32(); } +#ifdef HARMONIZE_DCT + cmplx x[L_FRAME48k]; + + FOR( i = 0; i < length; i++ ) + { + x[i].re = re[i]; + move32(); + x[i].im = im[i]; + move32(); + } + + fft_fx( x, length ); + + FOR( i = 0; i < length; i++ ) + { + re[i] = x[i].re; + move32(); + im[i] = x[i].im; + move32(); + } +#else fft_fx( re, im, length, 1 ); +#endif return; } @@ -233,8 +255,12 @@ void ivas_mdft_fx( const Word16 mdft_length /* i : MDFT length */ ) { +#ifdef HARMONIZE_DCT + cmplx spec[L_FRAME48k]; +#else Word32 re[L_FRAME48k]; Word32 im[L_FRAME48k]; +#endif Word16 j, len_by_2; const Word32 *pTwid; // Q31 len_by_2 = shr( mdft_length, 1 ); @@ -244,23 +270,53 @@ void ivas_mdft_fx( { FOR( j = 0; j < mdft_length; j++ ) { +#ifdef HARMONIZE_DCT + spec[j].re = Mpy_32_32( pIn[j], pTwid[j] ); // ( Qin + Q31 ) - Q31 -> Qin + move32(); + spec[j].im = Mpy_32_32( L_negate( pIn[j] ), pTwid[mdft_length - j] ); // ( Qin + Q31 ) - Q31 -> Qin + move32(); +#else re[j] = Mpy_32_32( pIn[j], pTwid[j] ); // ( Qin + Q31 ) - Q31 -> Qin move32(); im[j] = Mpy_32_32( L_negate( pIn[j] ), pTwid[mdft_length - j] ); // ( Qin + Q31 ) - Q31 -> Qin move32(); +#endif } } ELSE { FOR( j = 0; j < mdft_length; j++ ) { +#ifdef HARMONIZE_DCT + spec[j].re = Msub_32_32( Mpy_32_32( pIn[j], pTwid[j] ), pIn[add( mdft_length, j )], pTwid[mdft_length - j] ); // ( Qin + Q31 ) - Q31 -> Qin + move32(); + spec[j].im = Msub_32_32( Mpy_32_32( L_negate( pIn[j] ), pTwid[mdft_length - j] ), pIn[mdft_length + j], pTwid[j] ); // ( Qin + Q31 ) - Q31 -> Qin + move32(); +#else re[j] = Msub_32_32( Mpy_32_32( pIn[j], pTwid[j] ), pIn[add( mdft_length, j )], pTwid[mdft_length - j] ); // ( Qin + Q31 ) - Q31 -> Qin move32(); im[j] = Msub_32_32( Mpy_32_32( L_negate( pIn[j] ), pTwid[mdft_length - j] ), pIn[mdft_length + j], pTwid[j] ); // ( Qin + Q31 ) - Q31 -> Qin move32(); +#endif } } +#ifdef HARMONIZE_DCT + fft_fx( spec, mdft_length ); + + FOR( j = 0; j < len_by_2; j++ ) + { + pOut_re[2 * j] = spec[j].re; + move32(); + pOut_re[2 * j + 1] = spec[mdft_length - j - 1].re; + move32(); + + pOut_im[2 * j] = spec[j].im; + move32(); + pOut_im[2 * j + 1] = L_negate( spec[mdft_length - j - 1].im ); + move32(); + } +#else fft_fx( re, im, mdft_length, 1 ); FOR( j = 0; j < len_by_2; j++ ) { @@ -274,6 +330,8 @@ void ivas_mdft_fx( pOut_im[2 * j + 1] = L_negate( im[mdft_length - j - 1] ); // Qin move32(); } +#endif + return; } diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index 089a7119e..a55beffe0 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -4018,7 +4018,7 @@ void edct_fx( Word16 *q, /* i : Q value of input signal */ const Word16 element_mode /* i : element mode */ #else - Word16 *q /* i : Q value of i signal */ + Word16 *q /* i : Q value of i signal */ #endif ); @@ -4128,19 +4128,17 @@ void DoFFT_fx( const Word16 length ); void fft_fx( +#ifndef HARMONIZE_DCT Word32 *re, /* i/o: real part */ Word32 *im, /* i/o: imag part */ const Word16 length, /* i : length of fft */ const Word16 s /* i : sign */ -); - -#ifdef OPTIMIZE_FFT_STACK -void fft_cmplx_fx( +#else cmplx *spec, /* i/o: complex data */ const Word16 length /* i : length of fft */ +#endif ); -#endif void rfft_fx( Word32 *x, /* i/o: values */ const Word16 *w, /* i : window */ -- GitLab From ba9026abb9a05c10ff178d2c450835b53c6cdda6 Mon Sep 17 00:00:00 2001 From: vaclav Date: Tue, 17 Mar 2026 16:10:33 +0100 Subject: [PATCH 13/14] fix --- lib_com/basop_util.c | 45 ++++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index 77ac9fc97..5187ec49d 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -768,23 +768,27 @@ Word16 getScaleFactor32_cmplx( const Word16 len_x /* i: length of the array to scan */ ) { - Word16 i, i_min, i_max; - Word32 x_min, x_max; + Word16 i, i_min, i_max, i_re, i_im; + Word32 x_min_re, x_max_re, x_min_im, x_max_im; - x_max = 0; + x_max_re = 0; move32(); - x_min = 0; + x_min_re = 0; + move32(); + x_max_im = 0; + move32(); + x_min_im = 0; move32(); FOR( i = 0; i < len_x; i++ ) { if ( x[i].re >= 0 ) - x_max = L_max( x_max, x[i].re ); + x_max_re = L_max( x_max_re, x[i].re ); if ( x[i].re < 0 ) - x_min = L_min( x_min, x[i].re ); + x_min_re = L_min( x_min_re, x[i].re ); if ( x[i].im >= 0 ) - x_max = L_max( x_max, x[i].im ); + x_max_im = L_max( x_max_im, x[i].im ); if ( x[i].im < 0 ) - x_min = L_min( x_min, x[i].im ); + x_min_im = L_min( x_min_im, x[i].im ); } i_max = 0x20; @@ -792,15 +796,28 @@ Word16 getScaleFactor32_cmplx( i_min = 0x20; move16(); - if ( x_max != 0 ) - i_max = norm_l( x_max ); + if ( x_max_re != 0 ) + i_max = norm_l( x_max_re ); - if ( x_min != 0 ) - i_min = norm_l( x_min ); + if ( x_min_re != 0 ) + i_min = norm_l( x_min_re ); - i = s_and( s_min( i_max, i_min ), 0x1F ); + i_re = s_and( s_min( i_max, i_min ), 0x1F ); - return i; + i_max = 0x20; + move16(); + i_min = 0x20; + move16(); + + if ( x_max_im != 0 ) + i_max = norm_l( x_max_im ); + + if ( x_min_im != 0 ) + i_min = norm_l( x_min_im ); + + i_im = s_and( s_min( i_max, i_min ), 0x1F ); + + return s_min(i_re, i_im); } #endif -- GitLab From 5da1c71bab0e91c893a237dfa82e7341f480f54f Mon Sep 17 00:00:00 2001 From: vaclav Date: Tue, 17 Mar 2026 16:13:47 +0100 Subject: [PATCH 14/14] clang-format --- lib_com/basop_util.c | 2 +- lib_com/prot_fx.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index 5187ec49d..73581f372 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -817,7 +817,7 @@ Word16 getScaleFactor32_cmplx( i_im = s_and( s_min( i_max, i_min ), 0x1F ); - return s_min(i_re, i_im); + return s_min( i_re, i_im ); } #endif diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index a55beffe0..ea6a1fd0a 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -4018,7 +4018,7 @@ void edct_fx( Word16 *q, /* i : Q value of input signal */ const Word16 element_mode /* i : element mode */ #else - Word16 *q /* i : Q value of i signal */ + Word16 *q /* i : Q value of i signal */ #endif ); -- GitLab