From 78c80a0ca36a7e7631f198855faf0856db2c46c0 Mon Sep 17 00:00:00 2001 From: vaclav Date: Mon, 16 Mar 2026 16:51:15 +0100 Subject: [PATCH 01/23] OPTIMIZE_FFT_STACK --- lib_com/edct_fx.c | 193 +++++++++++++++++++++++++++++++--- lib_com/fft_fx.c | 70 ++++++++++++ lib_com/fft_fx_evs.c | 64 +++++++---- lib_com/options.h | 1 + lib_com/prot_fx.h | 25 ++++- lib_com/rom_com.h | 2 + lib_com/rom_com_fx.c | 9 +- lib_com/scale_mem_fx.c | 31 ++++++ lib_com/tools_fx.c | 29 +++++ lib_dec/FEC_HQ_phase_ecu_fx.c | 4 + 10 files changed, 389 insertions(+), 39 deletions(-) diff --git a/lib_com/edct_fx.c b/lib_com/edct_fx.c index 55bc483e0..02ad1d64c 100644 --- a/lib_com/edct_fx.c +++ b/lib_com/edct_fx.c @@ -11,7 +11,9 @@ #include "stl.h" #include "math_32.h" -static Word16 get_edxt_factor( Word16 length ) /* Returns value of sqrtf(2.f/length) in Q15 */ + +static Word16 get_edxt_factor( + const Word16 length ) /* Returns value of sqrtf(2.f/length) in Q15 */ { Word16 factor; /*Q15*/ factor = 0; @@ -53,8 +55,12 @@ static Word16 get_edxt_factor( Word16 length ) /* Returns value of sqrtf(2.f/len } ELSE IF( EQ_16( length, 40 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else factor = 7327; /*0.223 in Q15*/ move16(); +#endif } ELSE IF( EQ_16( length, 960 ) ) { @@ -73,33 +79,57 @@ static Word16 get_edxt_factor( Word16 length ) /* Returns value of sqrtf(2.f/len } ELSE IF( EQ_16( length, 120 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else factor = 4230; /*0.1290 in Q15*/ move16(); +#endif } ELSE IF( EQ_16( length, 1200 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else factor = 1338; /*0.040 in Q15*/ move16(); +#endif } ELSE IF( EQ_16( length, 800 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else factor = 1638; /*0.05 in Q15*/ move16(); +#endif } ELSE IF( EQ_16( length, 400 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else factor = 2317; /*0.070 in Q15*/ move16(); +#endif } ELSE IF( EQ_16( length, 200 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else factor = 3277; /*0.1 in Q15*/ move16(); +#endif } + return factor; /*Q15*/ } -static Word16 const *get_edct_table( Word16 length /*Q0*/, Word16 *q ) + +static Word16 const *get_edct_table( + const Word16 length /*Q0*/, + Word16 *q ) { Word16 const *edct_table; edct_table = NULL; @@ -590,6 +620,7 @@ void edxt_fx( move16(); cosPtr = NULL; sinPtr = NULL; + IF( EQ_16( length, 512 ) ) { cosPtr = cos_scale_tbl_512; /*Q15*/ @@ -641,10 +672,14 @@ void edxt_fx( } ELSE IF( EQ_16( length, 40 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else cosPtr = cos_scale_tbl_640; /*Q15*/ sinPtr = sin_scale_tbl_640; /*Q15*/ n = 16; move16(); +#endif } ELSE IF( EQ_16( length, 960 ) ) { @@ -669,38 +704,58 @@ void edxt_fx( } ELSE IF( EQ_16( length, 120 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else cosPtr = cos_scale_tbl_960; /*Q15*/ sinPtr = sin_scale_tbl_960; /*Q15*/ n = 8; move16(); +#endif } ELSE IF( EQ_16( length, 1200 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else cosPtr = cos_scale_tbl_1200; /*Q15*/ sinPtr = sin_scale_tbl_1200; /*Q15*/ n = 1; move16(); +#endif } ELSE IF( EQ_16( length, 800 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else cosPtr = cos_scale_tbl_800; /*Q15*/ sinPtr = sin_scale_tbl_800; /*Q15*/ n = 1; move16(); +#endif } ELSE IF( EQ_16( length, 400 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else cosPtr = cos_scale_tbl_800; /*Q15*/ sinPtr = sin_scale_tbl_800; /*Q15*/ n = 2; move16(); +#endif } ELSE IF( EQ_16( length, 200 ) ) { +#ifdef OPTIMIZE_FFT_STACK + assert( 0 ); +#else cosPtr = cos_scale_tbl_800; /*Q15*/ sinPtr = sin_scale_tbl_800; /*Q15*/ n = 4; move16(); +#endif } test(); @@ -708,16 +763,26 @@ void edxt_fx( { const Word16 Nm1 = sub( length, 1 ); const Word16 xSign = sub( imult1616( 2, shr( kernelType, 1 ) ), 1 ); /*Q0*/ +#ifdef OPTIMIZE_FFT_STACK + cmplx spec[L_FRAME_MAX]; +#else Word32 re[L_FRAME_PLUS]; Word32 im[L_FRAME_PLUS]; +#endif IF( !synthesis ) { FOR( k = ( Nm1 / 2 ); k >= 0; k-- ) /* pre-modulation of audio input */ { +#ifdef OPTIMIZE_FFT_STACK + spec[k].re = x[2 * k]; /*Qx*/ + spec[( Nm1 - k )].re = Mpy_32_16_1( x[( ( k * 2 ) + 1 )], shl_sat( xSign, 15 ) ); /*Qx*/ + spec[k].im = spec[( Nm1 - k )].im = 0; +#else re[k] = x[2 * k]; /*Qx*/ re[( Nm1 - k )] = Mpy_32_16_1( x[( ( k * 2 ) + 1 )], shl_sat( xSign, 15 ) ); /*Qx*/ im[k] = im[( Nm1 - k )] = 0; +#endif move32(); move32(); move32(); @@ -726,26 +791,46 @@ void edxt_fx( IF( EQ_16( length, 512 ) ) { /* Scaling down re and im buffers to avoid overflow in DoRTFTn_fx if the minimum headroom is less than 4 bits */ +#ifdef OPTIMIZE_FFT_STACK + hdrm = L_norm_arr_cmplx( spec, 512 ); +#else hdrm = s_min( L_norm_arr( re, 512 ), L_norm_arr( im, 512 ) ); +#endif IF( LT_16( hdrm, 4 ) ) { tmp = sub( hdrm, 4 ); +#ifdef OPTIMIZE_FFT_STACK + scale_sig32_cmplx( spec, 512, tmp ); +#else scale_sig32( re, 512, tmp ); scale_sig32( im, 512, tmp ); +#endif } +#ifdef OPTIMIZE_FFT_STACK + DoRTFTn_fx( NULL, NULL, spec, 512 ); +#else DoRTFTn_fx( re, im, 512 ); +#endif IF( LT_16( hdrm, 4 ) ) { tmp = negate( tmp ); +#ifdef OPTIMIZE_FFT_STACK + scale_sig32_cmplx( spec, 512, tmp ); +#else scale_sig32( re, 512, tmp ); scale_sig32( im, 512, tmp ); +#endif } } ELSE /* fft() doesn't support 512 */ { +#ifdef OPTIMIZE_FFT_STACK + fft_cmplx_fx( spec, length ); +#else fft_fx( re, im, length, 1 ); +#endif } IF( shr( kernelType, 1 ) ) @@ -757,12 +842,21 @@ void edxt_fx( const Word16 wRe = cosPtr[( k * n )]; /*Q15*/ const Word16 wIm = sinPtr[( k * n )]; /*Q15*/ - y[k] /*pt 1*/ = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) ); /*Qx*/ - y[( length - k )] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) ); /*Qx*/ +#ifdef OPTIMIZE_FFT_STACK + y[k] /*pt 1*/ = L_add( Mpy_32_16_1( spec[k].re, wRe ), Mpy_32_16_1( spec[k].im, wIm ) ); /*Qx*/ + y[( length - k )] = L_sub( Mpy_32_16_1( spec[k].re, wIm ), Mpy_32_16_1( spec[k].im, wRe ) ); /*Qx*/ +#else + y[k] /*pt 1*/ = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) ); /*Qx*/ + y[( length - k )] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) ); /*Qx*/ +#endif move32(); move32(); } - y[( length / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#ifdef OPTIMIZE_FFT_STACK + y[( length / 2 )] = Mpy_32_16_1( spec[( length / 2 )].re, INV_SQRT_2_Q15 ); /*Qx*/ +#else + y[( length / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#endif move32(); } ELSE /* forw. DST-II */ @@ -774,16 +868,29 @@ void edxt_fx( const Word16 wRe = cosPtr[( k * n )]; /*Q15*/ const Word16 wIm = sinPtr[( k * n )]; /*Q15*/ - y[( Nm1 - k )] = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) ); /*Qx*/ - y[k - 1] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) ); /*Qx*/ +#ifdef OPTIMIZE_FFT_STACK + y[( Nm1 - k )] = L_add( Mpy_32_16_1( spec[k].re, wRe ), Mpy_32_16_1( spec[k].im, wIm ) ); /*Qx*/ + y[k - 1] = L_sub( Mpy_32_16_1( spec[k].re, wIm ), Mpy_32_16_1( spec[k].im, wRe ) ); /*Qx*/ +#else + y[( Nm1 - k )] = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) ); /*Qx*/ + y[k - 1] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) ); /*Qx*/ +#endif move32(); move32(); } - y[( Nm1 / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#ifdef OPTIMIZE_FFT_STACK + y[( Nm1 / 2 )] = Mpy_32_16_1( spec[( length / 2 )].re, INV_SQRT_2_Q15 ); /*Qx*/ +#else + y[( Nm1 / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#endif move32(); } - y[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )] = L_shr( re[0], 1 ); /*Qx*/ +#ifdef OPTIMIZE_FFT_STACK + y[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )] = L_shr( spec[0].re, 1 ); /*Qx*/ +#else + y[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )] = L_shr( re[0], 1 ); /*Qx*/ +#endif move32(); } ELSE /* inverse II = III */ @@ -797,12 +904,21 @@ void edxt_fx( const Word16 wRe = shr( cosPtr[imult1616( k, n )], 1 ); const Word16 wIm = shr( sinPtr[imult1616( k, n )], 1 ); - re[k] = L_add( Mpy_32_16_1( x[k], wRe ), Mpy_32_16_1( x[( length - k )], wIm ) ); /*Qx*/ - im[k] = L_sub( Mpy_32_16_1( x[( length - k )], wRe ), Mpy_32_16_1( x[k], wIm ) ); /*Qx*/ +#ifdef OPTIMIZE_FFT_STACK + spec[k].re = L_add( Mpy_32_16_1( x[k], wRe ), Mpy_32_16_1( x[( length - k )], wIm ) ); /*Qx*/ + spec[k].im = L_sub( Mpy_32_16_1( x[( length - k )], wRe ), Mpy_32_16_1( x[k], wIm ) ); /*Qx*/ +#else + re[k] = L_add( Mpy_32_16_1( x[k], wRe ), Mpy_32_16_1( x[( length - k )], wIm ) ); /*Qx*/ + im[k] = L_sub( Mpy_32_16_1( x[( length - k )], wRe ), Mpy_32_16_1( x[k], wIm ) ); /*Qx*/ +#endif move32(); move32(); } - re[( length / 2 )] = Mpy_32_16_1( x[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#ifdef OPTIMIZE_FFT_STACK + spec[( length / 2 )].re = Mpy_32_16_1( x[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#else + re[( length / 2 )] = Mpy_32_16_1( x[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#endif move32(); } ELSE /* DST type III */ @@ -814,23 +930,42 @@ void edxt_fx( const Word16 wRe = shr( cosPtr[( k * n )], 1 ); /*Q15*/ const Word16 wIm = shr( sinPtr[( k * n )], 1 ); /*Q15*/ +#ifdef OPTIMIZE_FFT_STACK + spec[k].re = L_add( Mpy_32_16_1( x[( Nm1 - k )], wRe ), Mpy_32_16_1( x[( k - 1 )], wIm ) ); /*Qx*/ + spec[k].im = L_sub( Mpy_32_16_1( x[( k - 1 )], wRe ), Mpy_32_16_1( x[( Nm1 - k )], wIm ) ); /*Qx*/ +#else re[k] = L_add( Mpy_32_16_1( x[( Nm1 - k )], wRe ), Mpy_32_16_1( x[( k - 1 )], wIm ) ); /*Qx*/ im[k] = L_sub( Mpy_32_16_1( x[( k - 1 )], wRe ), Mpy_32_16_1( x[( Nm1 - k )], wIm ) ); /*Qx*/ +#endif move32(); move32(); } - re[( length / 2 )] = Mpy_32_16_1( x[( Nm1 / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#ifdef OPTIMIZE_FFT_STACK + spec[( length / 2 )].re = Mpy_32_16_1( x[( Nm1 / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#else + re[( length / 2 )] = Mpy_32_16_1( x[( Nm1 / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ +#endif move32(); } - re[0] = x[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )]; /*Qx*/ +#ifdef OPTIMIZE_FFT_STACK + spec[0].re = x[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )]; /*Qx*/ + spec[0].im = spec[( length / 2 )].im = 0; +#else + re[0] = x[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )]; /*Qx*/ im[0] = im[( length / 2 )] = 0; +#endif move32(); move32(); FOR( k = ( Nm1 / 2 ); k > 0; k-- ) { +#ifdef OPTIMIZE_FFT_STACK + spec[( length - k )].re = spec[k].re; /*Qx*/ + spec[( length - k )].im = L_negate( spec[k].im ); /*Qx*/ +#else re[( length - k )] = re[k]; /*Qx*/ im[( length - k )] = L_negate( im[k] ); /*Qx*/ +#endif move32(); move32(); } @@ -838,35 +973,63 @@ void edxt_fx( IF( EQ_16( length, 512 ) ) { /* Scaling down re and im buffers to avoid overflow in DoRTFTn_fx if the minimum headroom is less than 4 bits */ +#ifdef OPTIMIZE_FFT_STACK + hdrm = L_norm_arr_cmplx( spec, 512 ); +#else hdrm = s_min( L_norm_arr( re, 512 ), L_norm_arr( im, 512 ) ); +#endif IF( LT_16( hdrm, 4 ) ) { tmp = sub( hdrm, 4 ); +#ifdef OPTIMIZE_FFT_STACK + scale_sig32_cmplx( spec, 512, tmp ); +#else scale_sig32( re, 512, tmp ); scale_sig32( im, 512, tmp ); +#endif } +#ifdef OPTIMIZE_FFT_STACK + DoRTFTn_fx( NULL, NULL, spec, 512 ); +#else DoRTFTn_fx( re, im, 512 ); +#endif IF( LT_16( hdrm, 4 ) ) { tmp = negate( tmp ); +#ifdef OPTIMIZE_FFT_STACK + scale_sig32_cmplx( spec, 512, tmp ); +#else scale_sig32( re, 512, tmp ); scale_sig32( im, 512, tmp ); +#endif } } ELSE /* fft() doesn't support 512 */ { +#ifdef OPTIMIZE_FFT_STACK + fft_cmplx_fx( spec, length ); +#else fft_fx( re, im, length, 1 ); +#endif } FOR( k = ( Nm1 / 2 ); k >= 0; k-- ) /* post-modulation of FFT output */ { - y[2 * k] = re[k]; /*Qx*/ +#ifdef OPTIMIZE_FFT_STACK + y[2 * k] = spec[k].re; /*Qx*/ +#else + y[2 * k] = re[k]; /*Qx*/ +#endif move32(); IF( xSign != 0 ) { +#ifdef OPTIMIZE_FFT_STACK + y[( ( k * 2 ) + 1 )] = Mpy_32_16_1( spec[( Nm1 - k )].re, shl_sat( xSign, 15 ) ); /*Qx*/ +#else y[( ( k * 2 ) + 1 )] = Mpy_32_16_1( re[( Nm1 - k )], shl_sat( xSign, 15 ) ); /*Qx*/ +#endif } ELSE { diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c index 97fef62e5..6eca9930f 100644 --- a/lib_com/fft_fx.c +++ b/lib_com/fft_fx.c @@ -6932,6 +6932,7 @@ void fft_fx( const Word16 s /* i : sign */ ) { + cmplx x[960]; FOR( Word16 j = 0; j < length; j++ ) @@ -7010,6 +7011,75 @@ void fft_fx( return; } + +#ifdef OPTIMIZE_FFT_STACK +void fft_cmplx_fx( + cmplx *x, /* i/o: complex data */ + const Word16 length /* i : length of fft */ +) +{ + SWITCH( length ) + { + case 20: + fft_len20_fx( x ); + BREAK; + case 40: + fft_lenN( x, FFT_RotVector_640_fx, 640, 5, 8, 8, 40 ); + BREAK; + case 64: + fft_lenN( x, FFT_RotVector_256_fx, 256, 8, 8, 8, 64 ); + BREAK; + case 80: + fft_lenN( x, FFT_RotVector_640_fx, 640, 10, 8, 4, 40 ); + BREAK; + case 100: + fft_lenN( x, FFT_RotVector_400_fx, 400, 10, 10, 4, 40 ); + BREAK; + case 120: + fft_lenN( x, FFT_RotVector_960_fx, 960, 15, 8, 4, 60 ); + BREAK; + case 128: + fft_lenN( x, FFT_RotVector_256_fx, 256, 16, 8, 4, 64 ); + BREAK; + case 160: + fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 8, 2, 40 ); + BREAK; + case 200: + fft_lenN( x, FFT_RotVector_400_fx, 400, 20, 10, 2, 40 ); + BREAK; + case 240: + fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 8, 2, 60 ); + BREAK; + case 256: + fft_lenN( x, FFT_RotVector_256_fx, 256, 32, 8, 2, 64 ); + BREAK; + case 320: + fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 16, 2, 40 ); + BREAK; + case 400: + fft_lenN( x, FFT_RotVector_400_fx, 400, 20, 20, 2, 40 ); + BREAK; + case 480: + fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 16, 2, 60 ); + BREAK; + case 600: + fft_lenN( x, FFT_RotVector_600_fx, 600, 30, 20, 2, 60 ); + BREAK; + case 640: + fft_lenN( x, FFT_RotVector_640_fx, 640, 20, 32, 2, 40 ); + BREAK; + case 960: + fft_lenN( x, FFT_RotVector_960_fx, 960, 30, 32, 2, 60 ); + BREAK; + default: + assert( !"fft length is not supported!" ); + } + + return; +} +#endif + + void rfft_fx( Word32 *x, /* i/o: values Qx */ const Word16 *w, /* i : window Q15 */ diff --git a/lib_com/fft_fx_evs.c b/lib_com/fft_fx_evs.c index a7b2461cb..c68b21650 100644 --- a/lib_com/fft_fx_evs.c +++ b/lib_com/fft_fx_evs.c @@ -45,23 +45,43 @@ static void cftmdl_fx( Word16 n, Word16 l, Word32 *a, const Word16 *w ); void DoRTFTn_fx( - Word32 *x, /* i/o : real part of input and output data Q(x) */ - Word32 *y, /* i/o : imaginary part of input and output data Q(x) */ + Word32 *x, /* i/o : real part of input and output data Q(x) */ + Word32 *y, /* i/o : imaginary part of input and output data Q(x) */ +#ifdef OPTIMIZE_FFT_STACK + cmplx *spec, /* i/o : complex input and output data */ +#endif const Word16 n /* i : size of the FFT up to 1024 */ ) { - Word16 i; Word32 z[2048], *pt; - pt = z; - FOR( i = 0; i < n; i++ ) +#ifdef OPTIMIZE_FFT_STACK + IF( spec != NULL ) { - *pt++ = x[i]; - move16(); - *pt++ = y[i]; - move16(); + pt = z; + FOR( i = 0; i < n; i++ ) + { + *pt++ = spec[i].re; + move16(); + *pt++ = spec[i].im; + move16(); + } } + ELSE + { +#endif + pt = z; + FOR( i = 0; i < n; i++ ) + { + *pt++ = x[i]; + move16(); + *pt++ = y[i]; + move16(); + } +#ifdef OPTIMIZE_FFT_STACK + } +#endif IF( EQ_16( n, 16 ) ) { @@ -92,19 +112,25 @@ void DoRTFTn_fx( assert( 0 ); } - x[0] = z[0]; - move16(); - y[0] = z[1]; - move16(); - pt = &z[2]; - FOR( i = n - 1; i >= 1; i-- ) +#ifdef OPTIMIZE_FFT_STACK + IF( spec == NULL ) { - x[i] = *pt++; +#endif + x[0] = z[0]; move16(); - y[i] = *pt++; + y[0] = z[1]; move16(); + pt = &z[2]; + FOR( i = n - 1; i >= 1; i-- ) + { + x[i] = *pt++; + move16(); + y[i] = *pt++; + move16(); + } +#ifdef OPTIMIZE_FFT_STACK } - +#endif return; } @@ -124,6 +150,8 @@ static void cdftForw_fx( /* Do FFT */ cftfsub_fx( n, a, w ); + + return; } /*-----------------------------------------------------------------* diff --git a/lib_com/options.h b/lib_com/options.h index b63ee327f..d4ace21b4 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -95,6 +95,7 @@ #define FIX_1525_UNINIT_FORMAT_SWITCHING_DEC /* VA: float issue 1525: fix reading of uninitialized memory in format switching at the decoder */ #define HARMONIZE_2446_CON_TCX_FX /* FhG: basop issue: 2446 harmonization of function con_tcx_fx() */ #define FIX_2433_ARITH_OVERFLOW_IN_QMETA_ENC /* Nokia: Fix to convert non-converted binary operations */ +#define OPTIMIZE_FFT_STACK /* #################### End BE switches ################################## */ diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index a40fba659..2bfdf6935 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -1303,6 +1303,14 @@ void scale_sig32( const Word16 exp0 /* i : exponent: x = round(x << exp) Qx xx exp */ ); +#ifdef OPTIMIZE_FFT_STACK +void scale_sig32_cmplx( + cmplx x[], /* i/o: signal to scale Qx */ + const Word16 lg, /* i : size of x[] Q0 */ + const Word16 exp0 /* i : exponent: x = round(x << exp) Qx exp */ +); + +#endif void Scale_sig64( Word64 x[], /* i/o: signal to scale Qx */ Word16 len, /* i : size of x[] Q0 */ @@ -4050,8 +4058,11 @@ void BASOP_cfft( cmplx *pComplexBuf, Word16 sizeOfFft, Word16 *scale, Word32 wor void BASOP_rfft( Word32 *x, Word16 sizeOfFft, Word16 *scale, Word16 isign ); void DoRTFTn_fx( - Word32 *x, /* i/o : real part of i and output data */ - Word32 *y, /* i/o : imaginary part of i and output data */ + Word32 *x, /* i/o : real part of i and output data */ + Word32 *y, /* i/o : imaginary part of i and output data */ +#ifdef OPTIMIZE_FFT_STACK + cmplx *spec, /* i/o : complex input and output data */ +#endif const Word16 n /* i : size of the FFT up to 1024 */ ); @@ -4113,6 +4124,13 @@ void fft_fx( const Word16 s /* i : sign */ ); +#ifdef OPTIMIZE_FFT_STACK +void fft_cmplx_fx( + cmplx *spec, /* i/o: complex data */ + const Word16 length /* i : length of fft */ +); + +#endif void rfft_fx( Word32 *x, /* i/o: values */ const Word16 *w, /* i : window */ @@ -4130,6 +4148,9 @@ void DoRTFTn_fx_ivas( Word16 find_guarded_bits_fx( const Word32 n ); Word16 L_norm_arr( const Word32 *arr, Word16 size ); +#ifdef OPTIMIZE_FFT_STACK +Word16 L_norm_arr_cmplx( const cmplx *arr, Word16 size ); +#endif Word16 norm_arr( Word16 *arr, Word16 size ); Word16 W_norm_arr( Word64 *arr, Word16 size ); diff --git a/lib_com/rom_com.h b/lib_com/rom_com.h index 2843d1d6b..42ede75fd 100644 --- a/lib_com/rom_com.h +++ b/lib_com/rom_com.h @@ -1559,10 +1559,12 @@ extern const Word16 cos_scale_tbl_640[640]; // Q15 extern const Word16 sin_scale_tbl_640[640]; // Q15 extern const Word16 sin_scale_tbl_512[512]; // Q15 extern const Word16 cos_scale_tbl_512[512]; // Q15 +#ifndef OPTIMIZE_FFT_STACK extern const Word16 cos_scale_tbl_1200[1200]; // Q15 extern const Word16 sin_scale_tbl_1200[1200]; // Q15 extern const Word16 cos_scale_tbl_800[800]; // Q15 extern const Word16 sin_scale_tbl_800[800]; // Q15 +#endif extern const Word16 scales_ivas_fx[][MAX_NO_SCALES * 2]; // Q11 extern const Word16 scales_p_ivas_fx[][MAX_NO_SCALES * 2]; // Q11 diff --git a/lib_com/rom_com_fx.c b/lib_com/rom_com_fx.c index 376333284..760ccf9c0 100644 --- a/lib_com/rom_com_fx.c +++ b/lib_com/rom_com_fx.c @@ -25778,9 +25778,9 @@ const Word16 mfreq_loc_div_25[] = { 7, 15, 31, 47, 63, 79, 95, 111, 127, 143, 15 /* % idx= 0 1 2 3 4 5 6 7; */ /* call with band_len_idx[sfm_size>>3] */ const Word16 band_len_idx[1 + ( MAX_SFM_LEN_FX / 8 )] = { - /*sfm/8*/ /*1*/ /*2 */ /*3 */ /*4 */ /*6 */ /*8 */ /*10*/ /*12*/ - -1, 0 /*8*/, 1 /*16*/, 2 /*24*/, 3 /*32*/, 4 /*40*/, 5 /*48 */, -1, 6 /*64 */, -1, 7 /*80*/, -1, 8 /*96*/ -}; // Q0 + /*sfm/8*/ /*1*/ /*2 */ /*3 */ /*4 */ /*6 */ /*8 */ /*10*/ /*12*/ + -1, 0 /*8*/, 1 /*16*/, 2 /*24*/, 3 /*32*/, 4 /*40*/, 5 /*48 */, -1, 6 /*64 */, -1, 7 /*80*/, -1, 8 /*96*/ +}; // Q0 const Word16 band_len_ener_shift[9] = { 1, 2, 2, 2, 3, 3, 3, 4 /*sfm==80*/, 4 /*sfm==96*/ }; // Q0 /* 96 requires 1 bit more than 48 */ const Word16 fine_gain_pred_sqrt_bw[9] = { 5793, 8192, 10033, 11585, 12953, 14189, 16384, 18318, 20066 }; /* (Q11) */ /* For extended frames in ACELP->HQ transitions in IVAS, map indices floor(sfms*1.25/8)=[1 2 3 5 7 10 12 15] from extended bws: */ @@ -27403,6 +27403,7 @@ const Word16 cos_scale_tbl_512[512] = /* Q15 */ 201, 100 }; +#ifndef OPTIMIZE_FFT_STACK const Word16 sin_scale_tbl_1200[1200] = { /* Q15 */ 0, 42, 85, 128, 171, 214, 257, 300, 343, 386, 428, 471, 514, 557, 600, 643, @@ -27914,7 +27915,7 @@ const Word16 cos_scale_tbl_800[800] = { /* Q15 */ 32750, 32752, 32754, 32756, 32757, 32759, 32760, 32761, 32762, 32763, 32764, 32765, 32765, 32766, 32766, 32766 }; - +#endif const Word16 scales_ivas_fx[][MAX_NO_SCALES * 2] = /* 2 subvectors Q11*/ { { diff --git a/lib_com/scale_mem_fx.c b/lib_com/scale_mem_fx.c index 01c2442a8..3df645e6a 100644 --- a/lib_com/scale_mem_fx.c +++ b/lib_com/scale_mem_fx.c @@ -337,6 +337,37 @@ void scale_sig32_r( return; } +#ifdef OPTIMIZE_FFT_STACK +void scale_sig32_cmplx( + cmplx x[], /* i/o: signal to scale Qx */ + const Word16 lg, /* i : size of x[] Q0 */ + const Word16 exp0 /* i : exponent: x = round(x << exp) Qx exp */ +) +{ + Word16 i; + + FOR( i = 0; i < lg; i++ ) + { + /* saturation can occur here */ + x[i].re = L_shl( x[i].re, exp0 ); + move32(); + if ( 0 == exp0 ) + { + i = lg; + } + x[i].im = L_shl( x[i].im, exp0 ); + move32(); + if ( 0 == exp0 ) + { + i = lg; + } + } + + return; +} +#endif + + /*-------------------------------------------------------------------* * Rescale_mem: * diff --git a/lib_com/tools_fx.c b/lib_com/tools_fx.c index 47111db3e..eb9f7324f 100644 --- a/lib_com/tools_fx.c +++ b/lib_com/tools_fx.c @@ -5376,6 +5376,35 @@ Word16 L_norm_arr( return q; } +#ifdef OPTIMIZE_FFT_STACK +Word16 L_norm_arr_cmplx( + const cmplx *arr, + Word16 size ) +{ + Word16 q = 31; + move16(); + + FOR( Word16 i = 0; i < size; i++ ) + { + Word16 q_tst; + + q_tst = norm_l( arr[i].re ); + if ( arr[i].re != 0 ) + { + q = s_min( q, q_tst ); + } + + q_tst = norm_l( arr[i].im ); + if ( arr[i].im != 0 ) + { + q = s_min( q, q_tst ); + } + } + + return q; +} +#endif + Word16 norm_arr( Word16 *arr, Word16 size ) diff --git a/lib_dec/FEC_HQ_phase_ecu_fx.c b/lib_dec/FEC_HQ_phase_ecu_fx.c index b76037653..1120a889b 100644 --- a/lib_dec/FEC_HQ_phase_ecu_fx.c +++ b/lib_dec/FEC_HQ_phase_ecu_fx.c @@ -2556,7 +2556,11 @@ static void fec_ecu_dft_fx( *exp = s_min( *exp, 15 ); } +#ifdef OPTIMIZE_FFT_STACK + DoRTFTn_fx( Tfr32, Tfi32, NULL, *Nfft ); +#else DoRTFTn_fx( Tfr32, Tfi32, *Nfft ); +#endif N_LP = shr( *Nfft, 1 ); L_tmp = L_deposit_l( 0 ); -- GitLab From 2ac149333f10d10884fd2a0791249f05fc757d00 Mon Sep 17 00:00:00 2001 From: vaclav Date: Mon, 16 Mar 2026 18:16:27 +0100 Subject: [PATCH 02/23] HARMONIZE_DCT --- lib_com/basop_util.c | 42 ++++++ lib_com/basop_util.h | 8 ++ lib_com/edct_fx.c | 182 +++++++++++++++++++++--- lib_com/fft_fx.c | 13 +- lib_com/gs_inact_switching_fx.c | 12 ++ lib_com/options.h | 1 + lib_com/prot_fx.h | 24 +++- lib_com/tcx_mdct_fx.c | 8 ++ lib_com/trans_direct_fx.c | 16 +++ lib_com/trans_inv_fx.c | 4 + lib_dec/FEC_fx.c | 34 +++-- lib_dec/LD_music_post_filter_fx.c | 8 ++ lib_dec/core_switching_dec_fx.c | 4 + lib_dec/dec_tcx_fx.c | 14 +- lib_dec/gs_dec_amr_wb_fx.c | 11 ++ lib_dec/gs_dec_fx.c | 9 ++ lib_dec/hf_synth_fx.c | 8 ++ lib_dec/ivas_td_low_rate_dec_fx.c | 5 + lib_enc/bw_detect_fx.c | 4 + lib_enc/cod_tcx_fx.c | 12 ++ lib_enc/ext_sig_ana_fx.c | 8 ++ lib_enc/gs_enc_fx.c | 10 ++ lib_enc/ivas_td_low_rate_enc_fx.c | 9 ++ lib_rend/ivas_reverb_fft_filter_fx.c | 11 ++ lib_rend/ivas_reverb_filter_design_fx.c | 4 + 25 files changed, 423 insertions(+), 38 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index 551dbdeef..dd9bf4e09 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -758,9 +758,51 @@ Word16 getScaleFactor32( /* o: measured headroom in range [ i = s_and( s_min( i_max, i_min ), 0x1F ); + return i; +} + +#ifdef OPTIMIZE_FFT_STACK +/* o: measured headroom in range [0..31], 0 if all x[i] == 0 */ +Word16 getScaleFactor32_cmplx( + cmplx *x, /* i: array containing 32-bit data */ + const Word16 len_x /* i: length of the array to scan */ +) +{ + Word16 i, i_min, i_max; + Word32 x_min, x_max; + + x_max = 0; + move32(); + x_min = 0; + move32(); + FOR( i = 0; i < len_x; i++ ) + { + if ( x[i].re >= 0 ) + x_max = L_max( x_max, x[i].re ); + if ( x[i].re < 0 ) + x_min = L_min( x_min, x[i].re ); + if ( x[i].im >= 0 ) + x_max = L_max( x_max, x[i].im ); + if ( x[i].im < 0 ) + x_min = L_min( x_min, x[i].im ); + } + + i_max = 0x20; + move16(); + i_min = 0x20; + move16(); + + if ( x_max != 0 ) + i_max = norm_l( x_max ); + + if ( x_min != 0 ) + i_min = norm_l( x_min ); + + i = s_and( s_min( i_max, i_min ), 0x1F ); return i; } +#endif Word16 getScaleFactor32_copy( /* o: measured headroom in range [0..31], 0 if all x[i] == 0 */ const Word32 *x, /* i: array containing 32-bit data */ diff --git a/lib_com/basop_util.h b/lib_com/basop_util.h index 426516248..06d9ee759 100644 --- a/lib_com/basop_util.h +++ b/lib_com/basop_util.h @@ -235,6 +235,14 @@ Word16 getScaleFactor32( const Word32 *x, /* i : array containing 32-bit data */ const Word16 len_x ); /* i : length of the array to scan */ +#ifdef OPTIMIZE_FFT_STACK +/* o: measured headroom in range [0..31], 0 if all x[i] == 0 */ +Word16 getScaleFactor32_cmplx( + cmplx *x, /* i: array containing 32-bit data */ + const Word16 len_x /* i: length of the array to scan */ +); +#endif + Word16 getScaleFactor32_copy( /* o: measured headroom in range [0..31], 0 if all x[i] == 0 */ const Word32 *x, /* i: array containing 32-bit data */ const Word32 len_x ); /* i: length of the array to scan */ diff --git a/lib_com/edct_fx.c b/lib_com/edct_fx.c index 02ad1d64c..0d544182c 100644 --- a/lib_com/edct_fx.c +++ b/lib_com/edct_fx.c @@ -207,14 +207,23 @@ void edct_fx( const Word32 *x, /* i : input signal Qq */ Word32 *y, /* o : output transform Qq */ Word16 length, /* i : length Q0*/ - Word16 *q /* i : Q value of input signal */ +#ifdef HARMONIZE_DCT + Word16 *q, /* i : Q value of input signal */ + const Word16 element_mode /* i : element mode */ +#else + Word16 *q /* i : Q value of input signal */ +#endif ) { Word16 i; Word32 re; Word32 im; const Word16 *edct_table; /*Q16 */ +#ifdef OPTIMIZE_FFT_STACK + cmplx spec[L_FRAME_PLUS / 2]; +#else Word32 complex_buf[2 * ( L_FRAME48k / 2 + 240 )]; +#endif Word32 L_tmp; Word16 tmp; Word16 len1; @@ -224,37 +233,94 @@ void edct_fx( /* Twiddling and Pre-rotate */ FOR( i = 0; i < len1; i++ ) { - L_tmp = Mult_32_16( x[2 * i], edct_table[i] ); /*Q(q+1) */ + L_tmp = Mult_32_16( x[2 * i], edct_table[i] ); /*Q(q+1) */ +#ifdef OPTIMIZE_FFT_STACK + spec[i].re = Madd_32_16( L_tmp, x[( length - ( 1 + ( i * 2 ) ) )], edct_table[( len1 - ( 1 + i ) )] ); /*Q(q+1) */ +#else complex_buf[2 * i] = Madd_32_16( L_tmp, x[( length - ( 1 + ( i * 2 ) ) )], edct_table[( len1 - ( 1 + i ) )] ); /*Q(q+1) */ +#endif move32(); L_tmp = Mult_32_16( x[( length - ( 1 + ( i * 2 ) ) )], edct_table[i] ); /*Q(q+1) */ - - complex_buf[( ( i * 2 ) + 1 )] = Msub_32_16( L_tmp, x[( i * 2 )], edct_table[( len1 - ( 1 + i ) )] ); /*Q(q+1) */ +#ifdef OPTIMIZE_FFT_STACK + spec[i].im = Msub_32_16( L_tmp, x[( i * 2 )], edct_table[( len1 - ( 1 + i ) )] ); /*Q(q+1) */ +#else + complex_buf[( ( i * 2 ) + 1 )] = Msub_32_16( L_tmp, x[( i * 2 )], edct_table[( len1 - ( 1 + i ) )] ); /*Q(q+1) */ +#endif move32(); } - *q = sub( 15, *q ); - move16(); +#ifdef HARMONIZE_DCT + IF( element_mode == EVS_MONO ) + { +#endif + *q = sub( 15, *q ); + move16(); +#ifdef OPTIMIZE_FFT_STACK + BASOP_cfft( spec, len1, q, y ); +#else BASOP_cfft( (cmplx *) complex_buf, len1, q, y ); +#endif + + tmp = div_s( 1, length ); /*Q15 */ + tmp = round_fx( L_shl( L_mult( tmp, 19302 /*0.75f * EVS_PI in Q13*/ ), 2 ) ); /*Q15 */ +#ifdef HARMONIZE_DCT + } + ELSE + { + *q = sub( 31, *q ); + move16(); + tmp = sub( getScaleFactor32_cmplx( spec, len1 ), find_guarded_bits_fx( len1 ) ); + scale_sig32_cmplx( spec, len1, tmp ); + + fft_cmplx_fx( spec, len1 ); + *q = sub( *q, tmp ); + move16(); + + tmp = div_s( 4, length ); /*Q17 */ + tmp = round_fx( L_mult( tmp, 19302 /*0.75f * EVS_PI in Q13*/ ) ); /*Q15 */ + } +#endif - tmp = div_s( 1, length ); /*Q15 */ - tmp = round_fx( L_shl( L_mult( tmp, 19302 /*0.75f * EVS_PI in Q13*/ ), 2 ) ); /*Q15 */ FOR( i = 0; i < len1; i++ ) { - re = Msub_32_16( complex_buf[2 * i], complex_buf[( ( i * 2 ) + 1 )], tmp ); /*Q(q+1) */ - im = Madd_32_16( complex_buf[( ( i * 2 ) + 1 )], complex_buf[2 * i], tmp ); /*Q(q+1) */ +#ifdef OPTIMIZE_FFT_STACK + re = Msub_32_16( spec[i].re, spec[i].im, tmp ); /*Q(q+1) */ + im = Madd_32_16( spec[i].im, spec[i].re, tmp ); /*Q(q+1) */ +#else + re = Msub_32_16( complex_buf[2 * i], complex_buf[( ( i * 2 ) + 1 )], tmp ); /*Q(q+1) */ + im = Madd_32_16( complex_buf[( ( i * 2 ) + 1 )], complex_buf[2 * i], tmp ); /*Q(q+1) */ +#endif y[2 * i] = L_add( Mult_32_16( re, edct_table[i] ), Mult_32_16( im, edct_table[( len1 - ( 1 + i ) )] ) ); /*Q(q+2)*/ move32(); y[( length - ( 1 + ( i * 2 ) ) )] = L_sub( Mult_32_16( re, edct_table[( len1 - ( 1 + i ) )] ), Mult_32_16( im, edct_table[i] ) ); /*Q(q+2)*/ move32(); } /*Q(q-2) */ +#ifdef HARMONIZE_DCTaa + IF( element_mode == EVS_MONO ) + { + *q = sub( 15 + 2, *q ); + } + ELSE + { + *q = sub( 31 + 2, *q ); + } +#else *q = sub( 15 + 2, *q ); +#ifdef HARMONIZE_DCT + IF( element_mode != EVS_MONO ) + { + *q = add( *q, Q16 ); + } +#endif +#endif move16(); + return; } +#ifndef HARMONIZE_DCT void edct_ivas_fx( const Word32 *x, /* i : input signal Qq */ Word32 *y, /* o : output transform Qq */ @@ -314,8 +380,11 @@ void edct_ivas_fx( *q = sub( 31 + 2, *q ); move16(); + return; } +#endif + /*-------------------------------------------------------------------------* * FUNCTION : edst_fx() * @@ -340,7 +409,11 @@ void edst_fx( Word32 re; Word32 im; const Word16 *edct_table; /*Q16 */ +#ifdef OPTIMIZE_FFT_STACK + cmplx complex_buf[L_FRAME_PLUS / 2]; +#else Word32 complex_buf[2 * ( L_FRAME48k / 2 + 240 )]; +#endif Word32 L_tmp; Word16 tmp; Word16 len1; @@ -350,25 +423,42 @@ void edst_fx( /* Twiddling and Pre-rotate */ FOR( i = 0; i < len1; i++ ) { - L_tmp = Mult_32_16( x[( length - ( 1 + ( i * 2 ) ) )], edct_table[i] ); /*Qq+1*/ - complex_buf[2 * i] = Madd_32_16( L_tmp, x[2 * i], edct_table[( len1 - ( 1 + i ) )] ); /*Qq+1*/ + L_tmp = Mult_32_16( x[( length - ( 1 + ( i * 2 ) ) )], edct_table[i] ); /*Qq+1*/ +#ifdef OPTIMIZE_FFT_STACK + complex_buf[i].re = Madd_32_16( L_tmp, x[2 * i], edct_table[( len1 - ( 1 + i ) )] ); /*Qq+1*/ +#else + complex_buf[2 * i] = Madd_32_16( L_tmp, x[2 * i], edct_table[( len1 - ( 1 + i ) )] ); /*Qq+1*/ +#endif move32(); - L_tmp = Mult_32_16( x[2 * i], edct_table[i] ); /*Qq+1*/ + L_tmp = Mult_32_16( x[2 * i], edct_table[i] ); /*Qq+1*/ +#ifdef OPTIMIZE_FFT_STACK + complex_buf[i].im = Msub_32_16( L_tmp, x[( length - ( 1 + ( i * 2 ) ) )], edct_table[( len1 - ( 1 + i ) )] ); /*Qq+1*/ +#else complex_buf[( ( i * 2 ) + 1 )] = Msub_32_16( L_tmp, x[( length - ( 1 + ( i * 2 ) ) )], edct_table[( len1 - ( 1 + i ) )] ); /*Qq+1*/ +#endif move32(); } *q = sub( 15, *q ); move16(); +#ifdef OPTIMIZE_FFT_STACK + BASOP_cfft( complex_buf, len1, q, y ); +#else BASOP_cfft( (cmplx *) complex_buf, len1, q, y ); +#endif tmp = div_s( 1, length ); /*Q15 */ tmp = round_fx( L_shl( L_mult( tmp, 19302 /*0.75f * EVS_PI in Q13*/ ), 2 ) ); /*Q15 */ FOR( i = 0; i < len1; i++ ) { - re = Msub_32_16( complex_buf[2 * i], complex_buf[( ( i * 2 ) + 1 )], tmp ); /*Qq+1*/ - im = Madd_32_16( complex_buf[( ( i * 2 ) + 1 )], complex_buf[2 * i], tmp ); /*Qq+1*/ +#ifdef OPTIMIZE_FFT_STACK + re = Msub_32_16( complex_buf[i].re, complex_buf[i].im, tmp ); /*Qq+1*/ + im = Madd_32_16( complex_buf[i].im, complex_buf[i].re, tmp ); /*Qq+1*/ +#else + re = Msub_32_16( complex_buf[2 * i], complex_buf[( ( i * 2 ) + 1 )], tmp ); /*Qq+1*/ + im = Madd_32_16( complex_buf[( ( i * 2 ) + 1 )], complex_buf[2 * i], tmp ); /*Qq+1*/ +#endif y[2 * i] = L_add( Mult_32_16( re, edct_table[i] ), Mult_32_16( im, edct_table[( len1 - ( 1 + i ) )] ) ); /*Qq+2*/ move32(); y[( length - ( 1 + ( i * 2 ) ) )] = L_sub( Mult_32_16( im, edct_table[i] ), Mult_32_16( re, edct_table[( len1 - ( 1 + i ) )] ) ); /*Qq+2*/ @@ -380,6 +470,8 @@ void edst_fx( return; } + + /*========================================================================*/ /* FUNCTION : edct_fx() */ /*------------------------------------------------------------------------*/ @@ -404,26 +496,35 @@ void edct_16fx( const Word16 *x, /* i : input signal Qx */ Word16 *y, /* o : output transform Qx */ Word16 length, /* i : length */ - Word16 bh, /* bit-headroom */ + Word16 bh /* bit-headroom */ +#ifndef HARMONIZE_DCT + , const Word16 element_mode - +#endif ) { Word16 i; Word16 re[L_FRAME48k / 2]; Word16 im[L_FRAME48k / 2]; const Word16 *edct_table = NULL; +#ifndef OPTIMIZE_FFT_STACK Word16 re2[L_FRAME48k / 2]; Word16 im2[L_FRAME48k / 2]; - +#endif Word32 L_tmp, Lacc, Lmax; +#ifdef OPTIMIZE_FFT_STACK + Word16 tmp, tmp_re, fact; +#else Word16 tmp, fact; +#endif Word16 Q_edct; Word16 Len2, i2; const Word16 *px, *pt; Word16 *py; +#ifndef HARMONIZE_DCT (void) element_mode; /*COMPLETE: some eDCT sub function are missing */ +#endif IF( EQ_16( length, L_FRAME32k ) ) { @@ -477,28 +578,48 @@ void edct_16fx( { i2 = shl( i, 1 ); - L_tmp = L_mult( x[i2], edct_table[i] ); /*Q(Qx+16) */ - Lacc = L_mac_sat( L_tmp, *px, *pt ); /*Q(Qx+16) */ + L_tmp = L_mult( x[i2], edct_table[i] ); /*Q(Qx+16) */ + Lacc = L_mac_sat( L_tmp, *px, *pt ); /*Q(Qx+16) */ +#ifdef OPTIMIZE_FFT_STACK + re[i] = round_fx_sat( L_shl_sat( Lacc, Q_edct ) ); /* Q(Qx+Q_edct) */ +#else re2[i] = round_fx_sat( L_shl_sat( Lacc, Q_edct ) ); /* Q(Qx+Q_edct) */ +#endif move16(); - L_tmp = L_mult( *px, edct_table[i] ); /*Q(Qx+16) */ - Lacc = L_msu_sat( L_tmp, x[i2], *pt ); /*Q(Qx+16) */ + L_tmp = L_mult( *px, edct_table[i] ); /*Q(Qx+16) */ + Lacc = L_msu_sat( L_tmp, x[i2], *pt ); /*Q(Qx+16) */ +#ifdef OPTIMIZE_FFT_STACK + im[i] = round_fx_sat( L_shl_sat( Lacc, Q_edct ) ); /* Q(Qx+Q_edct) */ +#else im2[i] = round_fx_sat( L_shl_sat( Lacc, Q_edct ) ); /* Q(Qx+Q_edct) */ +#endif move16(); px -= 2; pt--; } IF( EQ_16( length, L_FRAME32k ) ) { +#ifdef OPTIMIZE_FFT_STACK + DoRTFT320_16fx( re, im ); +#else DoRTFT320_16fx( re2, im2 ); +#endif } ELSE IF( EQ_16( length, L_FRAME ) ) { +#ifdef OPTIMIZE_FFT_STACK + DoRTFT128_16fx( re, im ); +#else DoRTFT128_16fx( re2, im2 ); +#endif } ELSE IF( EQ_16( length, L_FRAME16k ) ) { +#ifdef OPTIMIZE_FFT_STACK + DoRTFT160_16fx( re, im ); +#else DoRTFT160_16fx( re2, im2 ); +#endif } ELSE { @@ -508,6 +629,18 @@ void edct_16fx( fact = round_fx( L_shl( L_tmp, 2 ) ); /*Q15 */ FOR( i = 0; i < shr( length, 1 ); i++ ) { +#ifdef OPTIMIZE_FFT_STACK + tmp = mult_r( im[i], fact ); /*Q(Qx+Q_edct) */ + tmp_re = sub_sat( re[i], tmp ); /*Q(Qx+Q_edct) */ + move16(); + + tmp = mult_r( re[i], fact ); /*Q(Qx+Q_edct) */ + im[i] = add_sat( im[i], tmp ); /*Q(Qx+Q_edct) */ + move16(); + + re[i] = tmp_re; + move16(); +#else tmp = mult_r( im2[i], fact ); /*Q(Qx+Q_edct) */ re[i] = sub_sat( re2[i], tmp ); /*Q(Qx+Q_edct) */ move16(); @@ -515,6 +648,7 @@ void edct_16fx( tmp = mult_r( re2[i], fact ); /*Q(Qx+Q_edct) */ im[i] = add_sat( im2[i], tmp ); /*Q(Qx+Q_edct) */ move16(); +#endif } /* Post-rotate and obtain the output data */ @@ -567,7 +701,11 @@ void iedct_short_fx( seg_len_div4 = shr( segment_length, 2 ); /*Q0*/ seg_len_3mul_div4 = add( seg_len_div2, seg_len_div4 ); +#ifdef HARMONIZE_DCT + edct_fx( in, alias, seg_len_div2, Q, EVS_MONO ); +#else edct_fx( in, alias, seg_len_div2, Q ); +#endif FOR( i = 0; i < seg_len_div2; i++ ) { IF( alias[i] != 0 ) diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c index 6eca9930f..7af092cd3 100644 --- a/lib_com/fft_fx.c +++ b/lib_com/fft_fx.c @@ -109,11 +109,13 @@ static void fft5_8( Word16 n1, Word32 *zRe, Word32 *zIm, const Word16 *Idx ); static void fft4_5( Word32 *x, Word32 *y, const Word16 *Idx ); static void fft5_4( Word16 n1, Word32 *zRe, Word32 *zIm, const Word16 *Idx ); +#ifndef HARMONIZE_DCT void DoRTFTn_fx_ivas( Word32 *x, /* i/o: real part of input and output data */ Word32 *y, /* i/o: imaginary part of input and output data */ const Word16 n /* i : size of the FFT n=(2^k) up to 1024 */ ); +#endif /*-----------------------------------------------------------------* * fft15_shift2() * 15-point FFT with 2-point circular shift @@ -2630,6 +2632,7 @@ static void dctsub( return; } +#ifndef HARMONIZE_DCT /*-----------------------------------------------------------------* * edct2_fx_ivas() * @@ -2790,7 +2793,7 @@ void DoRTFTn_fx_ivas( return; } - +#endif #ifndef HQ_ALIGN_DUPLICATED_CODE void fft3_fx_ivas( const Word32 X[], // Qx @@ -4274,7 +4277,11 @@ void DoFFT_fx( DoRTFT320_fx( re2, im2 ); BREAK; case 256: +#ifdef HARMONIZE_DCT + DoRTFTn_fx( re2, im2, NULL, 256 ); +#else DoRTFTn_fx_ivas( re2, im2, 256 ); +#endif BREAK; case 240: DoRTFT240( re2, im2 ); @@ -4298,7 +4305,11 @@ void DoFFT_fx( DoRTFT80_fx( re2, im2 ); BREAK; case 64: +#ifdef HARMONIZE_DCT + DoRTFTn_fx( re2, im2, NULL, 64 ); +#else DoRTFTn_fx_ivas( re2, im2, 64 ); +#endif BREAK; case 40: DoRTFT40_fx( re2, im2 ); diff --git a/lib_com/gs_inact_switching_fx.c b/lib_com/gs_inact_switching_fx.c index d00b8e3c0..16e72cd58 100644 --- a/lib_com/gs_inact_switching_fx.c +++ b/lib_com/gs_inact_switching_fx.c @@ -89,7 +89,11 @@ void Inac_switch_ematch_fx( ELSE IF( EQ_16( coder_type, VOICED ) || EQ_16( coder_type, GENERIC ) || EQ_16( coder_type, TRANSITION ) || ( last_core != ACELP_CORE ) || NE_16( last_codec_mode, MODE1 ) || ( ( element_mode > EVS_MONO ) && EQ_16( coder_type, UNVOICED ) ) ) { /* Find spectrum and energy per band for GC and VC frames */ +#ifdef HARMONIZE_DCT + edct_16fx( exc2, dct_exc_tmp, L_frame, 5 ); +#else edct_16fx( exc2, dct_exc_tmp, L_frame, 5, element_mode ); +#endif Ener_per_band_comp_fx( dct_exc_tmp, Ener_per_bd, Q_exc, MBANDS_GN, 1, L_frame ); @@ -103,7 +107,11 @@ void Inac_switch_ematch_fx( ELSE IF( ( coder_type == INACTIVE ) && inactive_coder_type_flag ) { /* Find spectrum and energy per band for inactive frames */ +#ifdef HARMONIZE_DCT + edct_16fx( exc2, dct_exc_tmp, L_frame, 5 ); +#else edct_16fx( exc2, dct_exc_tmp, L_frame, 5, element_mode ); +#endif Ener_per_band_comp_fx( dct_exc_tmp, Ener_per_bd, Q_exc, MBANDS_GN, 1, L_frame ); @@ -188,7 +196,11 @@ void Inac_switch_ematch_fx( Scale_sig( dct_exc_tmp, 240, 1 ); // Q_exc Scale_sig( exc2, 240, 1 ); // Q_exc } +#ifdef HARMONIZE_DCT + edct_16fx( dct_exc_tmp, exc2, L_frame, 5 ); +#else edct_16fx( dct_exc_tmp, exc2, L_frame, 5, element_mode ); +#endif } return; diff --git a/lib_com/options.h b/lib_com/options.h index d4ace21b4..59f6694eb 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -96,6 +96,7 @@ #define HARMONIZE_2446_CON_TCX_FX /* FhG: basop issue: 2446 harmonization of function con_tcx_fx() */ #define FIX_2433_ARITH_OVERFLOW_IN_QMETA_ENC /* Nokia: Fix to convert non-converted binary operations */ #define OPTIMIZE_FFT_STACK +#define HARMONIZE_DCT /* #################### End BE switches ################################## */ diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index 2bfdf6935..e5379cefa 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -4014,16 +4014,22 @@ void edct_fx( const Word32 *x, /* i : i signal Qq */ Word32 *y, /* o : output transform Qq */ Word16 length, /* i : length */ - Word16 *q /* i : Q value of i signal */ +#ifdef HARMONIZE_DCT + Word16 *q, /* i : Q value of input signal */ + const Word16 element_mode /* i : element mode */ +#else + Word16 *q /* i : Q value of i signal */ +#endif ); +#ifndef HARMONIZE_DCT void edct_ivas_fx( const Word32 *x, /* i : input signal Qq */ Word32 *y, /* o : output transform Qq */ Word16 length, /* i : length Q0*/ Word16 *q /* i : Q value of input signal */ ); - +#endif void edst_fx( const Word32 *x, /* i : i signal Qq */ Word32 *y, /* o : output transform Qq */ @@ -4035,8 +4041,12 @@ void edct_16fx( const Word16 *x, /* i : i signal Qx */ Word16 *y, /* o : output transform Qx */ Word16 length, /* i : length */ - Word16 bh, /* bit-headroom */ - const Word16 element_mode ); + Word16 bh /* bit-headroom */ +#ifndef HARMONIZE_DCT + , + const Word16 element_mode +#endif +); void iedct_short_fx( const Word32 *in, /* i : i vector */ @@ -4138,12 +4148,13 @@ void rfft_fx( const Word16 isign /* i : sign */ ); +#ifndef HARMONIZE_DCT void DoRTFTn_fx_ivas( Word32 *x, /* i/o: real part of input and output data */ Word32 *y, /* i/o: imaginary part of input and output data */ const Word16 n /* i : size of the FFT up to 1024 */ ); - +#endif Word16 find_guarded_bits_fx( const Word32 n ); @@ -4160,6 +4171,7 @@ Flag is_zero_arr( Word32 *arr, Word16 size ); Flag is_zero_arr16( Word16 *arr, Word16 size ); Flag is_zero_arr64( Word64 *arr, Word16 size ); +#ifndef HARMONIZE_DCT void edct2_fx_ivas( const Word16 n, const Word16 isgn, @@ -4167,7 +4179,7 @@ void edct2_fx_ivas( Word32 *a, const Word16 *ip, const Word16 *w ); - +#endif void edct2_fx( Word16 n, Word16 isgn, diff --git a/lib_com/tcx_mdct_fx.c b/lib_com/tcx_mdct_fx.c index 4fd016729..d29a4ffed 100644 --- a/lib_com/tcx_mdct_fx.c +++ b/lib_com/tcx_mdct_fx.c @@ -138,7 +138,11 @@ void TCX_MDCT( *y_e = sub( 15, *y_e ); move16(); +#ifdef HARMONIZE_DCT + edct_fx( y, y, l / 2 + m + r / 2, y_e, EVS_MONO ); +#else edct_fx( y, y, l / 2 + m + r / 2, y_e ); +#endif *y_e = sub( 15 - 1, *y_e ); move16(); return; @@ -220,7 +224,11 @@ void TCX_MDCT_Inverse( R2 = shr( r, 1 ); x_e = sub( 15, x_e ); +#ifdef HARMONIZE_DCT + edct_fx( x, tmp_buf + L2, add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &x_e, EVS_MONO ); +#else edct_fx( x, tmp_buf + L2, add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &x_e ); +#endif x_e = sub( 15, x_e ); fac = TCX_MDCT_Inverse_GetScaleFactor( add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &fac_e ); /* exp(fac_e) */ diff --git a/lib_com/trans_direct_fx.c b/lib_com/trans_direct_fx.c index c84cd9efa..fa0a27a9a 100644 --- a/lib_com/trans_direct_fx.c +++ b/lib_com/trans_direct_fx.c @@ -103,7 +103,11 @@ void direct_transform_fx( Qs[0] = *Q; move16(); +#ifdef HARMONIZE_DCT + edct_fx( dctin32_fx, oseg_fx, segment_length2, &Qs[0], EVS_MONO ); +#else edct_fx( dctin32_fx, oseg_fx, segment_length2, &Qs[0] ); +#endif Qmin = s_min( Qs[0], Qmin ); iseg_fx = &in32_r16_fx[segment_length4]; @@ -136,7 +140,11 @@ void direct_transform_fx( Qs[seg] = *Q; move16(); +#ifdef HARMONIZE_DCT + edct_fx( dctin32_fx, oseg_fx, segment_length2, &Qs[seg], EVS_MONO ); +#else edct_fx( dctin32_fx, oseg_fx, segment_length2, &Qs[seg] ); +#endif Qmin = s_min( Qs[seg], Qmin ); iseg_fx += segment_length2; @@ -164,7 +172,11 @@ void direct_transform_fx( } Qs[NUM_TIME_SWITCHING_BLOCKS - 1] = *Q; move16(); +#ifdef HARMONIZE_DCT + edct_fx( dctin32_fx, oseg_fx, segment_length2, &Qs[NUM_TIME_SWITCHING_BLOCKS - 1], EVS_MONO ); +#else edct_fx( dctin32_fx, oseg_fx, segment_length2, &Qs[NUM_TIME_SWITCHING_BLOCKS - 1] ); +#endif Qmin = s_min( Qs[NUM_TIME_SWITCHING_BLOCKS - 1], Qmin ); *Q = Qmin; @@ -183,7 +195,11 @@ void direct_transform_fx( } ELSE { +#ifdef HARMONIZE_DCT + edct_fx( in32_fx, out32_fx, L, Q, EVS_MONO ); +#else edct_fx( in32_fx, out32_fx, L, Q ); +#endif } return; diff --git a/lib_com/trans_inv_fx.c b/lib_com/trans_inv_fx.c index 34d424f26..32e188f2d 100644 --- a/lib_com/trans_inv_fx.c +++ b/lib_com/trans_inv_fx.c @@ -1122,6 +1122,10 @@ void Inverse_Transform( } ELSE { +#ifdef HARMONIZE_DCT + edct_fx( in_mdct, out, L, Q, EVS_MONO ); +#else edct_fx( in_mdct, out, L, Q ); +#endif } } diff --git a/lib_dec/FEC_fx.c b/lib_dec/FEC_fx.c index fe2780a86..9674ab9c7 100644 --- a/lib_dec/FEC_fx.c +++ b/lib_dec/FEC_fx.c @@ -5,15 +5,20 @@ #include #include "options.h" /* Compilation switches */ #include "cnst.h" /* Common constants */ -#include "rom_com.h" /* Common static table prototypes */ +#include "rom_com.h" /* Common static table prototypes */ #include "rom_dec.h" /* Decoder static table prototypes */ #include "prot_fx.h" /* Function prototypes */ #include "basop_util.h" + + /*-------------------------------------------------------------------* * Local function prototypes *-------------------------------------------------------------------*/ + static void pulseRes_preCalc( Word16 *cond1, Word16 *cond2, Word32 *cond3, Word16 new_pit, Word16 Tc, Word16 L_frame ); void gain_dec_bfi_fx( Word16 *past_qua_en ); + + /*======================================================================*/ /* FUNCTION : FEC_exc_estim_fx() */ /*----------------------------------------------------------------------*/ @@ -47,7 +52,6 @@ void gain_dec_bfi_fx( Word16 *past_qua_en ); /* _ (Word16[]) voice_factors_fx : frame error rate Q15 */ /* _ (Word16[]) FEC_pitch_fx(tmp_tc): FEC pitch Q6 */ /*-----------------------------------------------------------------------*/ - /* _ (Word16) st_fx->lp_gainp_fx : FEC -low-pass filtered pitch gain Q14 */ /* _ (Word16) st_fx->seed :FEC-seed for random generator for excitation*/ /* _ (Word16) st_fx->bfi_pitch_fx : LP filter coefficient */ @@ -57,7 +61,6 @@ void gain_dec_bfi_fx( Word16 *past_qua_en ); /* _ None */ /*=======================================================================*/ - void FEC_exc_estim_fx( Decoder_State *st_fx, /* i/o: Decoder static memory */ const Word16 L_frame, /* i : length of the frame */ @@ -73,7 +76,6 @@ void FEC_exc_estim_fx( Word16 *tmp_noise /* o : long-term noise energy Q0 */ ) { - Word16 exc2_buf[L_FRAME16k + MODE1_L_FIR_FER - 1]; Word16 gainCNG, new_pit /*Q0*/; /* Q3*/ Word16 exp; @@ -152,7 +154,6 @@ void FEC_exc_estim_fx( move16(); } - pitch_pred_linear_fit( st_fx->nbLostCmpt, st_fx->last_good, @@ -170,13 +171,11 @@ void FEC_exc_estim_fx( new_pit /*Q0 int*/ = shl( round_fx( predPitchLag ), 0 ); } - /*-----------------------------------------------------------------* * estimate subframe pitch values for the FEC frame *-----------------------------------------------------------------*/ /* initialize pitch to the long-term pitch */ - *tmp_tc = st_fx->bfi_pitch_fx; move16(); /*Q6*/ IF( EQ_16( L_frame, L_FRAME ) ) @@ -473,7 +472,11 @@ void FEC_exc_estim_fx( move16(); /* Transform to frequency domain */ +#ifdef HARMONIZE_DCT + edct_16fx( exc, exc_dct_in, st_fx->L_frame, 5 ); +#else edct_16fx( exc, exc_dct_in, st_fx->L_frame, 5, st_fx->element_mode ); +#endif /* Reset unvaluable part of the adaptive (pitch) excitation contribution */ max_len = sub( st_fx->L_frame, Diff_len ); @@ -498,6 +501,7 @@ void FEC_exc_estim_fx( /*-----------------------------------------------------------------* * Replicate the last spectrum in case the last good frame was coded by GSC *-----------------------------------------------------------------*/ + test(); test(); test(); @@ -514,7 +518,11 @@ void FEC_exc_estim_fx( *tmp_noise = shr_r( st_fx->lp_gainc_fx, 3 ); /*Q0*/ move16(); /* Transform back to time domain */ +#ifdef HARMONIZE_DCT + edct_16fx( exc_dct_in, exc, st_fx->L_frame, 5 ); +#else edct_16fx( exc_dct_in, exc, st_fx->L_frame, 5, st_fx->element_mode ); +#endif } ELSE { @@ -739,12 +747,19 @@ void FEC_exc_estim_fx( move16(); st_fx->bfi_pitch_frame = st_fx->L_frame; move16(); + return; } /*calculates some conditions for Pulse resynchronization to take place*/ -static void pulseRes_preCalc( Word16 *cond1, Word16 *cond2, Word32 *cond3, Word16 new_pit, Word16 Tc, Word16 L_frame ) +static void pulseRes_preCalc( + Word16 *cond1, + Word16 *cond2, + Word32 *cond3, + Word16 new_pit, + Word16 Tc, + Word16 L_frame ) { Word16 tmp_pit, tmp_pit_e, tmp_frame, tmp_frame_e; Word32 tmp_pit2; @@ -773,8 +788,11 @@ static void pulseRes_preCalc( Word16 *cond1, Word16 *cond2, Word32 *cond3, Word1 BASOP_SATURATE_WARNING_ON_EVS *cond3 = L_sub( L_mult0( -1, tmp_pit ), tmp_pit2 ); move32(); + + return; } + /*-------------------------------------------------------------------* * gain_dec_bfi() * diff --git a/lib_dec/LD_music_post_filter_fx.c b/lib_dec/LD_music_post_filter_fx.c index fc3a94a77..989a47e8d 100644 --- a/lib_dec/LD_music_post_filter_fx.c +++ b/lib_dec/LD_music_post_filter_fx.c @@ -877,7 +877,11 @@ void Prep_music_postP_fx( * EDCT and back to 16 bits *------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( exc16, dct_buffer_out, DCT_L_POST, 6 ); +#else edct_16fx( exc16, dct_buffer_out, DCT_L_POST, 6, EVS_MONO ); +#endif *qdct = Q_exc; move16(); @@ -957,7 +961,11 @@ void Post_music_postP_fx( * Go back to time domain *------------------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( dct_buffer_in, exc16, DCT_L_POST, 6 ); +#else edct_16fx( dct_buffer_in, exc16, DCT_L_POST, 6, EVS_MONO ); +#endif Copy( exc16 + OFFSET2, exc2, L_FRAME ); diff --git a/lib_dec/core_switching_dec_fx.c b/lib_dec/core_switching_dec_fx.c index 001cfa975..e1db6fb06 100644 --- a/lib_dec/core_switching_dec_fx.c +++ b/lib_dec/core_switching_dec_fx.c @@ -111,7 +111,11 @@ void bw_switching_pre_proc_fx( * Calculate frequency energy of 0~3.2kHz and 3.2~6.4kHz the ACELP core synthesis *-------------------------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( old_syn_12k8_16k_fx, syn_dct_fx, L_FRAME, 6 ); +#else edct_16fx( old_syn_12k8_16k_fx, syn_dct_fx, L_FRAME, 6, st_fx->element_mode ); +#endif L_tmp = L_deposit_l( 0 ); FOR( i = 0; i < L_FRAME / 2; i++ ) diff --git a/lib_dec/dec_tcx_fx.c b/lib_dec/dec_tcx_fx.c index 0a0810349..7ee784876 100644 --- a/lib_dec/dec_tcx_fx.c +++ b/lib_dec/dec_tcx_fx.c @@ -2199,7 +2199,11 @@ void IMDCT_fx( Word32 *x, Word16 x_e, Word16 *old_syn_overl, Word16 *syn_Overl_T /* DCT */ Q = sub( 31, x_e ); +#ifdef HARMONIZE_DCT + edct_fx( x, tmp_buf, L_frame, &Q, EVS_MONO ); +#else edct_fx( x, tmp_buf, L_frame, &Q ); +#endif /* scale by sqrt(L / NORM_MDCT_FACTOR) */ tmp1 = mult_r( shl( L_frame, 4 ), 26214 /*128.f / NORM_MDCT_FACTOR Q15*/ ); /* 4Q11 */ @@ -2287,7 +2291,7 @@ void IMDCT_fx( Word32 *x, Word16 x_e, Word16 *old_syn_overl, Word16 *syn_Overl_T tmp8, fullbandScale ); } /* TRANSITION_OVERLAP */ - } /* TCX-20 and TCX-only */ + } /* TCX-20 and TCX-only */ /* Window and overlap-add past frame if past frame is TCX */ test(); @@ -2731,7 +2735,11 @@ static void TCX_MDCT_Inverse_qwin_fx( R2 = shr( r, 1 ); x_e = sub( 15, x_e ); +#ifdef HARMONIZE_DCT + edct_fx( x, tmp_buf + L2, add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &x_e, EVS_MONO ); +#else edct_fx( x, tmp_buf + L2, add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &x_e ); +#endif x_e = sub( 15, x_e ); fac = TCX_MDCT_Inverse_GetScaleFactor( add( add( shr( l, 1 ), m ), shr( r, 1 ) ), &fac_e ); /* exp(fac_e) */ @@ -3323,7 +3331,11 @@ void IMDCT_ivas_fx( } ELSE { +#ifdef HARMONIZE_DCT + edct_fx( x_fx, xn_buf_fx_32 + add( shr( overlap, 1 ), nz ), L_frame, &q_xn_buf_fx_32, IVAS_SCE /* just cannot be EVS_MONO */ ); +#else edct_ivas_fx( x_fx, xn_buf_fx_32 + add( shr( overlap, 1 ), nz ), L_frame, &q_xn_buf_fx_32 ); +#endif Word16 res_m, res_e; res_e = 0; move16(); diff --git a/lib_dec/gs_dec_amr_wb_fx.c b/lib_dec/gs_dec_amr_wb_fx.c index 326c2f919..2670ebfef 100644 --- a/lib_dec/gs_dec_amr_wb_fx.c +++ b/lib_dec/gs_dec_amr_wb_fx.c @@ -450,10 +450,21 @@ void improv_amr_wb_gs_fx( * Do the excitation modification according to the content * Go back to time domain -> Overwrite exctiation *------------------------------------------------------------*/ + +#ifdef HARMONIZE_DCT + edct_16fx( exc2_fx, dct_exc_in_fx, L_FRAME, 6 ); +#else edct_16fx( exc2_fx, dct_exc_in_fx, L_FRAME, 6, EVS_MONO ); +#endif + gs_dec_amr_wb_fx( core_brate, seed_tcx, dct_exc_in_fx, Q_exc2, dct_exc_out_fx, Q_exc2, pitch_buf_fx, lt_voice_fac_fx, clas, coder_type ); +#ifdef HARMONIZE_DCT + edct_16fx( dct_exc_out_fx, exc2_fx, L_FRAME, 6 ); +#else edct_16fx( dct_exc_out_fx, exc2_fx, L_FRAME, 6, EVS_MONO ); +#endif + /*------------------------------------------------------------* * Redo core synthesis at 12k8 Hz with the modified excitation *------------------------------------------------------------*/ diff --git a/lib_dec/gs_dec_fx.c b/lib_dec/gs_dec_fx.c index d867298d1..e2aa1dcb1 100644 --- a/lib_dec/gs_dec_fx.c +++ b/lib_dec/gs_dec_fx.c @@ -358,7 +358,11 @@ void decod_audio_fx( * DCT transform *---------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( exc, dct_epit, st_fx->L_frame, 7 ); +#else edct_16fx( exc, dct_epit, st_fx->L_frame, 7, st_fx->element_mode ); +#endif /*---------------------------------------------------------------* * Reset unvaluable part of the adaptive (pitch) excitation contribution @@ -497,8 +501,13 @@ void decod_audio_fx( * iDCT transform *--------------------------------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( dct_epit, exc, st_fx->L_frame, 7 ); + edct_16fx( exc_wo_nf, exc_wo_nf, st_fx->L_frame, 7 ); +#else edct_16fx( dct_epit, exc, st_fx->L_frame, 7, st_fx->element_mode ); edct_16fx( exc_wo_nf, exc_wo_nf, st_fx->L_frame, 7, st_fx->element_mode ); +#endif /*----------------------------------------------------------------------* * Remove potential pre-echo in case an onset has been detected diff --git a/lib_dec/hf_synth_fx.c b/lib_dec/hf_synth_fx.c index 9c4807835..32fcb3311 100644 --- a/lib_dec/hf_synth_fx.c +++ b/lib_dec/hf_synth_fx.c @@ -716,7 +716,11 @@ void hf_synth_amr_wb_fx( Copy_Scale_sig_16_32_DEPREC( exc, exc32, L_FRAME, qdct ); /* Qexc + qdct */ qdct = add( qdct, Q_exc ); +#ifdef HARMONIZE_DCT + edct_fx( exc32, dct_exc32, L_FRAME, &qdct, EVS_MONO ); +#else edct_fx( exc32, dct_exc32, L_FRAME, &qdct ); +#endif q_tmp = Exp32Array( L_FRAME, dct_exc32 ); q_tmp = sub( q_tmp, 16 ); @@ -1006,7 +1010,11 @@ void hf_synth_amr_wb_fx( qhf = sub( q_tmp, 1 ); Copy_Scale_sig_16_32_DEPREC( dct_hb, dct_hb32, L_FRAME16k, qhf ); /* qhf + qdct */ qhf = add( qhf, qdct ); +#ifdef HARMONIZE_DCT + edct_fx( dct_hb32, exc16k32, L_FRAME16k, &qhf, EVS_MONO ); +#else edct_fx( dct_hb32, exc16k32, L_FRAME16k, &qhf ); +#endif q_tmp = Exp32Array( L_FRAME16k, exc16k32 ); q_tmp = sub( q_tmp, 16 ); Copy_Scale_sig_32_16( exc16k32, exc16k, L_FRAME16k, q_tmp ); /* qhf + qtmp */ diff --git a/lib_dec/ivas_td_low_rate_dec_fx.c b/lib_dec/ivas_td_low_rate_dec_fx.c index 916329e66..200443dbc 100644 --- a/lib_dec/ivas_td_low_rate_dec_fx.c +++ b/lib_dec/ivas_td_low_rate_dec_fx.c @@ -134,9 +134,14 @@ void tdm_low_rate_dec_fx( * iDCT transform *--------------------------------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( dct_epit, exc, L_FRAME, find_guarded_bits_fx( L_FRAME ) ); + edct_16fx( exc_wo_nf_fx, exc_wo_nf_fx, L_FRAME, find_guarded_bits_fx( L_FRAME ) ); +#else edct_16fx( dct_epit, exc, L_FRAME, find_guarded_bits_fx( L_FRAME ), IVAS_CPE_TD ); edct_16fx( exc_wo_nf_fx, exc_wo_nf_fx, L_FRAME, find_guarded_bits_fx( L_FRAME ), IVAS_CPE_TD ); +#endif IF( bwe_exc != NULL ) { diff --git a/lib_enc/bw_detect_fx.c b/lib_enc/bw_detect_fx.c index 2e04b986f..200ff9098 100644 --- a/lib_enc/bw_detect_fx.c +++ b/lib_enc/bw_detect_fx.c @@ -308,7 +308,11 @@ void bw_detect_fx( in_win32[i] = L_mult( *pt++, *pt1-- ); move32(); } +#ifdef HARMONIZE_DCT + edct_fx( in_win32, spect32, BWD_TOTAL_WIDTH, &Q_dct, EVS_MONO ); +#else edct_fx( in_win32, spect32, BWD_TOTAL_WIDTH, &Q_dct /*,st->element_mode*/ ); +#endif FOR( i = 0; i < BWD_TOTAL_WIDTH; i++ ) { diff --git a/lib_enc/cod_tcx_fx.c b/lib_enc/cod_tcx_fx.c index aff171740..5fd37f1f5 100644 --- a/lib_enc/cod_tcx_fx.c +++ b/lib_enc/cod_tcx_fx.c @@ -2490,7 +2490,11 @@ void QuantizeSpectrum_fx( /* DCT */ Q = sub( 31, *spectrum_e ); +#ifdef HARMONIZE_DCT + edct_fx( spectrum, tmp_buf, L_frame, &Q, EVS_MONO ); +#else edct_fx( spectrum, tmp_buf, L_frame, &Q ); +#endif /* scale by sqrt(L / NORM_MDCT_FACTOR) */ tmp1 = mult_r( shl( L_frame, 4 ), 26214 /*128.f / NORM_MDCT_FACTOR Q15*/ ); /* 4Q11 */ @@ -3722,7 +3726,11 @@ void coder_tcx_fx( Q = sub( Q, tmp2 ); /* DCT */ +#ifdef HARMONIZE_DCT + edct_fx( tmp_buf, spectrum, L_frame, &Q, EVS_MONO ); +#else edct_fx( tmp_buf, spectrum, L_frame, &Q ); +#endif *spectrum_e = sub( 31, Q ); move16(); } @@ -4408,7 +4416,11 @@ void InternalTCXDecoder_fx( /* DCT */ Q = sub( 31, *spectrum_e ); +#ifdef HARMONIZE_DCT + edct_fx( spectrum_fx, tmp_buf, L_frame, &Q, IVAS_SCE /* just cannot be EVS_MONO */ ); +#else edct_ivas_fx( spectrum_fx, tmp_buf, L_frame, &Q ); +#endif /* scale by sqrt(L / NORM_MDCT_FACTOR) */ tmp1 = mult_r( shl( L_frame, 4 ), 26214 /*128.f / NORM_MDCT_FACTOR Q15*/ ); /* 4Q11 */ diff --git a/lib_enc/ext_sig_ana_fx.c b/lib_enc/ext_sig_ana_fx.c index d7f64c4bc..436de8531 100644 --- a/lib_enc/ext_sig_ana_fx.c +++ b/lib_enc/ext_sig_ana_fx.c @@ -374,7 +374,11 @@ void core_signal_analysis_high_bitrate_fx( Q = sub( Q, tmp2 ); /* DCT */ +#ifdef HARMONIZE_DCT + edct_fx( tmp_buf, spectrum[frameno], L_subframe, &Q, EVS_MONO ); +#else edct_fx( tmp_buf, spectrum[frameno], L_subframe, &Q ); +#endif *spectrum_e = sub( 31, Q ); } ELSE @@ -951,7 +955,11 @@ void core_signal_analysis_high_bitrate_ivas_fx( Word16 Q; Q = q_out_wtda; +#ifdef HARMONIZE_DCT + edct_fx( tcx20Win_32, hTcxEnc->spectrum_fx[frameno], L_subframe, &Q, st->element_mode ); +#else edct_ivas_fx( tcx20Win_32, hTcxEnc->spectrum_fx[frameno], L_subframe, &Q ); +#endif hTcxEnc->spectrum_e[frameno] = sub( 31, Q ); move16(); diff --git a/lib_enc/gs_enc_fx.c b/lib_enc/gs_enc_fx.c index 493e5e8f3..db64b6345 100644 --- a/lib_enc/gs_enc_fx.c +++ b/lib_enc/gs_enc_fx.c @@ -255,8 +255,13 @@ void encod_audio_fx( * DCT transform *---------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( exc, dct_epit, st_fx->L_frame, 7 ); + edct_16fx( res, dct_res, st_fx->L_frame, 7 ); +#else edct_16fx( exc, dct_epit, st_fx->L_frame, 7, st_fx->element_mode ); edct_16fx( res, dct_res, st_fx->L_frame, 7, st_fx->element_mode ); +#endif /*---------------------------------------------------------------* * Calculate energy dynamics @@ -372,8 +377,13 @@ void encod_audio_fx( * iDCT transform *--------------------------------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( dct_epit, exc, st_fx->L_frame, 7 ); + edct_16fx( exc_wo_nf, exc_wo_nf, st_fx->L_frame, 7 ); +#else edct_16fx( dct_epit, exc, st_fx->L_frame, 7, st_fx->element_mode ); edct_16fx( exc_wo_nf, exc_wo_nf, st_fx->L_frame, 7, st_fx->element_mode ); +#endif IF( NE_16( st_fx->element_mode, EVS_MONO ) ) { diff --git a/lib_enc/ivas_td_low_rate_enc_fx.c b/lib_enc/ivas_td_low_rate_enc_fx.c index 8ca5a4a50..17ca57065 100644 --- a/lib_enc/ivas_td_low_rate_enc_fx.c +++ b/lib_enc/ivas_td_low_rate_enc_fx.c @@ -94,7 +94,11 @@ void tdm_low_rate_enc_fx( * DCT transform of the residual and create a subsample residual *---------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( res, dct_res_fx, L_FRAME, 7 ); +#else edct_16fx( res, dct_res_fx, L_FRAME, 7, st->element_mode ); +#endif /*--------------------------------------------------------------------------------------* * GSC encoder @@ -119,9 +123,14 @@ void tdm_low_rate_enc_fx( * iDCT transform *--------------------------------------------------------------------------------------*/ +#ifdef HARMONIZE_DCT + edct_16fx( dct_epit_fx, exc_fx, L_FRAME, 7 ); + edct_16fx( exc_wo_nf_fx, exc_wo_nf_fx, L_FRAME, 7 ); +#else edct_16fx( dct_epit_fx, exc_fx, L_FRAME, 7, st->element_mode ); edct_16fx( exc_wo_nf_fx, exc_wo_nf_fx, L_FRAME, 7, st->element_mode ); +#endif /*--------------------------------------------------------------------------------------* * Remove potential pre-echo in case an onset has been detected diff --git a/lib_rend/ivas_reverb_fft_filter_fx.c b/lib_rend/ivas_reverb_fft_filter_fx.c index dcf13c153..edc4c0dca 100644 --- a/lib_rend/ivas_reverb_fft_filter_fx.c +++ b/lib_rend/ivas_reverb_fft_filter_fx.c @@ -100,7 +100,12 @@ static void fft_wrapper_2ch_fx( Word16 k, mirror_k; Word32 left_re_fx, left_im_fx, right_re_fx, right_im_fx; +#ifdef HARMONIZE_DCT + DoRTFTn_fx( buffer_L_fx, buffer_R_fx, NULL, fft_size ); +#else DoRTFTn_fx_ivas( buffer_L_fx, buffer_R_fx, fft_size ); +#endif + /* separating left and right channel spectra */ buffer_L_fx[0] = L_shl( buffer_L_fx[0], 1 ); // Qx + 1 move32(); @@ -167,10 +172,16 @@ static void ifft_wrapper_2ch_fx( move32(); } +#ifdef HARMONIZE_DCT + DoRTFTn_fx( buffer_L, buffer_R, NULL, fft_size ); +#else DoRTFTn_fx_ivas( buffer_L, buffer_R, fft_size ); +#endif return; } + + /*-----------------------------------------------------------------------------------------* * Function ivas_reverb_t2f_f2t_init() * diff --git a/lib_rend/ivas_reverb_filter_design_fx.c b/lib_rend/ivas_reverb_filter_design_fx.c index 6d23b0053..9d3d7af6c 100644 --- a/lib_rend/ivas_reverb_filter_design_fx.c +++ b/lib_rend/ivas_reverb_filter_design_fx.c @@ -206,7 +206,11 @@ static void calc_min_phase_fx( /* Convert back and isolate the phase. */ IF( LE_16( fft_size, 512 ) ) /* for size <= 512 using complex-value FFT (more effecient, but available only up to 512 size) */ { +#ifdef HARMONIZE_DCT + DoRTFTn_fx( pFolded_cepstrum_re, pFolded_cepstrum_im, NULL, fft_size ); +#else DoRTFTn_fx_ivas( pFolded_cepstrum_re, pFolded_cepstrum_im, fft_size ); +#endif /* Copying the img part into the output */ FOR( idx = 1; idx < half_fft_size; idx++ ) -- GitLab From ff53bfd7e17cef89bb5a1f85ab336b6a9029710d Mon Sep 17 00:00:00 2001 From: vaclav Date: Mon, 16 Mar 2026 18:27:43 +0100 Subject: [PATCH 03/23] clang-format --- lib_com/basop_util.c | 6 +++--- lib_com/basop_util.h | 4 ++-- lib_com/options.h | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index dd9bf4e09..77ac9fc97 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -763,9 +763,9 @@ Word16 getScaleFactor32( /* o: measured headroom in range [ #ifdef OPTIMIZE_FFT_STACK /* o: measured headroom in range [0..31], 0 if all x[i] == 0 */ -Word16 getScaleFactor32_cmplx( - cmplx *x, /* i: array containing 32-bit data */ - const Word16 len_x /* i: length of the array to scan */ +Word16 getScaleFactor32_cmplx( + cmplx *x, /* i: array containing 32-bit data */ + const Word16 len_x /* i: length of the array to scan */ ) { Word16 i, i_min, i_max; diff --git a/lib_com/basop_util.h b/lib_com/basop_util.h index 06d9ee759..b2290e453 100644 --- a/lib_com/basop_util.h +++ b/lib_com/basop_util.h @@ -238,8 +238,8 @@ Word16 getScaleFactor32( #ifdef OPTIMIZE_FFT_STACK /* o: measured headroom in range [0..31], 0 if all x[i] == 0 */ Word16 getScaleFactor32_cmplx( - cmplx *x, /* i: array containing 32-bit data */ - const Word16 len_x /* i: length of the array to scan */ + cmplx *x, /* i: array containing 32-bit data */ + const Word16 len_x /* i: length of the array to scan */ ); #endif diff --git a/lib_com/options.h b/lib_com/options.h index 59f6694eb..20fcfe925 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -95,8 +95,8 @@ #define FIX_1525_UNINIT_FORMAT_SWITCHING_DEC /* VA: float issue 1525: fix reading of uninitialized memory in format switching at the decoder */ #define HARMONIZE_2446_CON_TCX_FX /* FhG: basop issue: 2446 harmonization of function con_tcx_fx() */ #define FIX_2433_ARITH_OVERFLOW_IN_QMETA_ENC /* Nokia: Fix to convert non-converted binary operations */ -#define OPTIMIZE_FFT_STACK -#define HARMONIZE_DCT +#define OPTIMIZE_FFT_STACK /* VA: removal of intermediate FFT buffers */ +#define HARMONIZE_DCT /* VA: removal of duplicated DCT functions */ /* #################### End BE switches ################################## */ -- GitLab From 554d2d804ff02bb57ab1bff6df44f470397e6ab3 Mon Sep 17 00:00:00 2001 From: vaclav Date: Mon, 16 Mar 2026 18:38:40 +0100 Subject: [PATCH 04/23] clang-format --- lib_com/edct_fx.c | 28 ++++++++++++++-------------- lib_com/prot_fx.h | 2 +- lib_com/rom_com.h | 12 ++++++------ lib_com/rom_com_fx.c | 6 +++--- lib_dec/dec_tcx_fx.c | 2 +- 5 files changed, 25 insertions(+), 25 deletions(-) diff --git a/lib_com/edct_fx.c b/lib_com/edct_fx.c index 0d544182c..43a61d292 100644 --- a/lib_com/edct_fx.c +++ b/lib_com/edct_fx.c @@ -245,7 +245,7 @@ void edct_fx( #ifdef OPTIMIZE_FFT_STACK spec[i].im = Msub_32_16( L_tmp, x[( i * 2 )], edct_table[( len1 - ( 1 + i ) )] ); /*Q(q+1) */ #else - complex_buf[( ( i * 2 ) + 1 )] = Msub_32_16( L_tmp, x[( i * 2 )], edct_table[( len1 - ( 1 + i ) )] ); /*Q(q+1) */ + complex_buf[( ( i * 2 ) + 1 )] = Msub_32_16( L_tmp, x[( i * 2 )], edct_table[( len1 - ( 1 + i ) )] ); /*Q(q+1) */ #endif move32(); } @@ -427,7 +427,7 @@ void edst_fx( #ifdef OPTIMIZE_FFT_STACK complex_buf[i].re = Madd_32_16( L_tmp, x[2 * i], edct_table[( len1 - ( 1 + i ) )] ); /*Qq+1*/ #else - complex_buf[2 * i] = Madd_32_16( L_tmp, x[2 * i], edct_table[( len1 - ( 1 + i ) )] ); /*Qq+1*/ + complex_buf[2 * i] = Madd_32_16( L_tmp, x[2 * i], edct_table[( len1 - ( 1 + i ) )] ); /*Qq+1*/ #endif move32(); @@ -984,8 +984,8 @@ void edxt_fx( y[k] /*pt 1*/ = L_add( Mpy_32_16_1( spec[k].re, wRe ), Mpy_32_16_1( spec[k].im, wIm ) ); /*Qx*/ y[( length - k )] = L_sub( Mpy_32_16_1( spec[k].re, wIm ), Mpy_32_16_1( spec[k].im, wRe ) ); /*Qx*/ #else - y[k] /*pt 1*/ = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) ); /*Qx*/ - y[( length - k )] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) ); /*Qx*/ + y[k] /*pt 1*/ = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) ); /*Qx*/ + y[( length - k )] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) ); /*Qx*/ #endif move32(); move32(); @@ -993,7 +993,7 @@ void edxt_fx( #ifdef OPTIMIZE_FFT_STACK y[( length / 2 )] = Mpy_32_16_1( spec[( length / 2 )].re, INV_SQRT_2_Q15 ); /*Qx*/ #else - y[( length / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ + y[( length / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ #endif move32(); } @@ -1010,8 +1010,8 @@ void edxt_fx( y[( Nm1 - k )] = L_add( Mpy_32_16_1( spec[k].re, wRe ), Mpy_32_16_1( spec[k].im, wIm ) ); /*Qx*/ y[k - 1] = L_sub( Mpy_32_16_1( spec[k].re, wIm ), Mpy_32_16_1( spec[k].im, wRe ) ); /*Qx*/ #else - y[( Nm1 - k )] = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) ); /*Qx*/ - y[k - 1] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) ); /*Qx*/ + y[( Nm1 - k )] = L_add( Mpy_32_16_1( re[k], wRe ), Mpy_32_16_1( im[k], wIm ) ); /*Qx*/ + y[k - 1] = L_sub( Mpy_32_16_1( re[k], wIm ), Mpy_32_16_1( im[k], wRe ) ); /*Qx*/ #endif move32(); move32(); @@ -1019,7 +1019,7 @@ void edxt_fx( #ifdef OPTIMIZE_FFT_STACK y[( Nm1 / 2 )] = Mpy_32_16_1( spec[( length / 2 )].re, INV_SQRT_2_Q15 ); /*Qx*/ #else - y[( Nm1 / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ + y[( Nm1 / 2 )] = Mpy_32_16_1( re[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ #endif move32(); } @@ -1027,7 +1027,7 @@ void edxt_fx( #ifdef OPTIMIZE_FFT_STACK y[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )] = L_shr( spec[0].re, 1 ); /*Qx*/ #else - y[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )] = L_shr( re[0], 1 ); /*Qx*/ + y[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )] = L_shr( re[0], 1 ); /*Qx*/ #endif move32(); } @@ -1046,8 +1046,8 @@ void edxt_fx( spec[k].re = L_add( Mpy_32_16_1( x[k], wRe ), Mpy_32_16_1( x[( length - k )], wIm ) ); /*Qx*/ spec[k].im = L_sub( Mpy_32_16_1( x[( length - k )], wRe ), Mpy_32_16_1( x[k], wIm ) ); /*Qx*/ #else - re[k] = L_add( Mpy_32_16_1( x[k], wRe ), Mpy_32_16_1( x[( length - k )], wIm ) ); /*Qx*/ - im[k] = L_sub( Mpy_32_16_1( x[( length - k )], wRe ), Mpy_32_16_1( x[k], wIm ) ); /*Qx*/ + re[k] = L_add( Mpy_32_16_1( x[k], wRe ), Mpy_32_16_1( x[( length - k )], wIm ) ); /*Qx*/ + im[k] = L_sub( Mpy_32_16_1( x[( length - k )], wRe ), Mpy_32_16_1( x[k], wIm ) ); /*Qx*/ #endif move32(); move32(); @@ -1081,7 +1081,7 @@ void edxt_fx( #ifdef OPTIMIZE_FFT_STACK spec[( length / 2 )].re = Mpy_32_16_1( x[( Nm1 / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ #else - re[( length / 2 )] = Mpy_32_16_1( x[( Nm1 / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ + re[( length / 2 )] = Mpy_32_16_1( x[( Nm1 / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ #endif move32(); } @@ -1090,7 +1090,7 @@ void edxt_fx( spec[0].re = x[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )]; /*Qx*/ spec[0].im = spec[( length / 2 )].im = 0; #else - re[0] = x[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )]; /*Qx*/ + re[0] = x[( Nm1 - ( Nm1 * ( kernelType / 2 ) ) )]; /*Qx*/ im[0] = im[( length / 2 )] = 0; #endif move32(); @@ -1158,7 +1158,7 @@ void edxt_fx( #ifdef OPTIMIZE_FFT_STACK y[2 * k] = spec[k].re; /*Qx*/ #else - y[2 * k] = re[k]; /*Qx*/ + y[2 * k] = re[k]; /*Qx*/ #endif move32(); IF( xSign != 0 ) diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index e5379cefa..207fd8234 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -4018,7 +4018,7 @@ void edct_fx( Word16 *q, /* i : Q value of input signal */ const Word16 element_mode /* i : element mode */ #else - Word16 *q /* i : Q value of i signal */ + Word16 *q /* i : Q value of i signal */ #endif ); diff --git a/lib_com/rom_com.h b/lib_com/rom_com.h index 42ede75fd..8f2ff78b6 100644 --- a/lib_com/rom_com.h +++ b/lib_com/rom_com.h @@ -1553,12 +1553,12 @@ extern const Word16 ivas_sine_panning_tbl_fx[601]; // Q15 extern const Word16 ivas_sin_az_fx[361]; // Q15 // edct_fx.c -extern const Word16 sin_scale_tbl_960[960]; // Q15 -extern const Word16 cos_scale_tbl_960[960]; // Q15 -extern const Word16 cos_scale_tbl_640[640]; // Q15 -extern const Word16 sin_scale_tbl_640[640]; // Q15 -extern const Word16 sin_scale_tbl_512[512]; // Q15 -extern const Word16 cos_scale_tbl_512[512]; // Q15 +extern const Word16 sin_scale_tbl_960[960]; // Q15 +extern const Word16 cos_scale_tbl_960[960]; // Q15 +extern const Word16 cos_scale_tbl_640[640]; // Q15 +extern const Word16 sin_scale_tbl_640[640]; // Q15 +extern const Word16 sin_scale_tbl_512[512]; // Q15 +extern const Word16 cos_scale_tbl_512[512]; // Q15 #ifndef OPTIMIZE_FFT_STACK extern const Word16 cos_scale_tbl_1200[1200]; // Q15 extern const Word16 sin_scale_tbl_1200[1200]; // Q15 diff --git a/lib_com/rom_com_fx.c b/lib_com/rom_com_fx.c index 760ccf9c0..81215f548 100644 --- a/lib_com/rom_com_fx.c +++ b/lib_com/rom_com_fx.c @@ -25778,9 +25778,9 @@ const Word16 mfreq_loc_div_25[] = { 7, 15, 31, 47, 63, 79, 95, 111, 127, 143, 15 /* % idx= 0 1 2 3 4 5 6 7; */ /* call with band_len_idx[sfm_size>>3] */ const Word16 band_len_idx[1 + ( MAX_SFM_LEN_FX / 8 )] = { - /*sfm/8*/ /*1*/ /*2 */ /*3 */ /*4 */ /*6 */ /*8 */ /*10*/ /*12*/ - -1, 0 /*8*/, 1 /*16*/, 2 /*24*/, 3 /*32*/, 4 /*40*/, 5 /*48 */, -1, 6 /*64 */, -1, 7 /*80*/, -1, 8 /*96*/ -}; // Q0 + /*sfm/8*/ /*1*/ /*2 */ /*3 */ /*4 */ /*6 */ /*8 */ /*10*/ /*12*/ + -1, 0 /*8*/, 1 /*16*/, 2 /*24*/, 3 /*32*/, 4 /*40*/, 5 /*48 */, -1, 6 /*64 */, -1, 7 /*80*/, -1, 8 /*96*/ +}; // Q0 const Word16 band_len_ener_shift[9] = { 1, 2, 2, 2, 3, 3, 3, 4 /*sfm==80*/, 4 /*sfm==96*/ }; // Q0 /* 96 requires 1 bit more than 48 */ const Word16 fine_gain_pred_sqrt_bw[9] = { 5793, 8192, 10033, 11585, 12953, 14189, 16384, 18318, 20066 }; /* (Q11) */ /* For extended frames in ACELP->HQ transitions in IVAS, map indices floor(sfms*1.25/8)=[1 2 3 5 7 10 12 15] from extended bws: */ diff --git a/lib_dec/dec_tcx_fx.c b/lib_dec/dec_tcx_fx.c index 7ee784876..113e72e98 100644 --- a/lib_dec/dec_tcx_fx.c +++ b/lib_dec/dec_tcx_fx.c @@ -2291,7 +2291,7 @@ void IMDCT_fx( Word32 *x, Word16 x_e, Word16 *old_syn_overl, Word16 *syn_Overl_T tmp8, fullbandScale ); } /* TRANSITION_OVERLAP */ - } /* TCX-20 and TCX-only */ + } /* TCX-20 and TCX-only */ /* Window and overlap-add past frame if past frame is TCX */ test(); -- GitLab From 113b4417b4fc9d8524f554dedb02ff6b4689319c Mon Sep 17 00:00:00 2001 From: vaclav Date: Mon, 16 Mar 2026 18:46:10 +0100 Subject: [PATCH 05/23] clang-format --- lib_com/edct_fx.c | 16 +++------------- lib_com/rom_com_fx.c | 4 ++-- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/lib_com/edct_fx.c b/lib_com/edct_fx.c index 43a61d292..b6078e295 100644 --- a/lib_com/edct_fx.c +++ b/lib_com/edct_fx.c @@ -297,25 +297,15 @@ void edct_fx( move32(); } /*Q(q-2) */ -#ifdef HARMONIZE_DCTaa - IF( element_mode == EVS_MONO ) - { - *q = sub( 15 + 2, *q ); - } - ELSE - { - *q = sub( 31 + 2, *q ); - } -#else *q = sub( 15 + 2, *q ); + move16(); #ifdef HARMONIZE_DCT IF( element_mode != EVS_MONO ) { *q = add( *q, Q16 ); + move16(); } #endif -#endif - move16(); return; } @@ -1055,7 +1045,7 @@ void edxt_fx( #ifdef OPTIMIZE_FFT_STACK spec[( length / 2 )].re = Mpy_32_16_1( x[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ #else - re[( length / 2 )] = Mpy_32_16_1( x[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ + re[( length / 2 )] = Mpy_32_16_1( x[( length / 2 )], INV_SQRT_2_Q15 ); /*Qx*/ #endif move32(); } diff --git a/lib_com/rom_com_fx.c b/lib_com/rom_com_fx.c index 81215f548..7139829a3 100644 --- a/lib_com/rom_com_fx.c +++ b/lib_com/rom_com_fx.c @@ -25778,8 +25778,8 @@ const Word16 mfreq_loc_div_25[] = { 7, 15, 31, 47, 63, 79, 95, 111, 127, 143, 15 /* % idx= 0 1 2 3 4 5 6 7; */ /* call with band_len_idx[sfm_size>>3] */ const Word16 band_len_idx[1 + ( MAX_SFM_LEN_FX / 8 )] = { - /*sfm/8*/ /*1*/ /*2 */ /*3 */ /*4 */ /*6 */ /*8 */ /*10*/ /*12*/ - -1, 0 /*8*/, 1 /*16*/, 2 /*24*/, 3 /*32*/, 4 /*40*/, 5 /*48 */, -1, 6 /*64 */, -1, 7 /*80*/, -1, 8 /*96*/ + /*sfm/8*/ /*1*/ /*2 */ /*3 */ /*4 */ /*6 */ /*8 */ /*10*/ /*12*/ + -1, 0 /*8*/, 1 /*16*/, 2 /*24*/, 3 /*32*/, 4 /*40*/, 5 /*48 */, -1, 6 /*64 */, -1, 7 /*80*/, -1, 8 /*96*/ }; // Q0 const Word16 band_len_ener_shift[9] = { 1, 2, 2, 2, 3, 3, 3, 4 /*sfm==80*/, 4 /*sfm==96*/ }; // Q0 /* 96 requires 1 bit more than 48 */ const Word16 fine_gain_pred_sqrt_bw[9] = { 5793, 8192, 10033, 11585, 12953, 14189, 16384, 18318, 20066 }; /* (Q11) */ -- GitLab From c8646f386627582c912d03052c4295f5d982ca4e Mon Sep 17 00:00:00 2001 From: vaclav Date: Mon, 16 Mar 2026 18:57:23 +0100 Subject: [PATCH 06/23] HARMONIZE_DCT - remove unused static functions --- lib_com/fft_fx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c index 7af092cd3..a3582a846 100644 --- a/lib_com/fft_fx.c +++ b/lib_com/fft_fx.c @@ -2440,6 +2440,7 @@ static void cftmdl( return; } +#ifndef HARMONIZE_DCT static void cftbsub( Word16 n, // Q0 Word32 *a, // Qx @@ -2632,7 +2633,6 @@ static void dctsub( return; } -#ifndef HARMONIZE_DCT /*-----------------------------------------------------------------* * edct2_fx_ivas() * -- GitLab From e6bc3d6356f936ff369fe6861697e10e59114c5f Mon Sep 17 00:00:00 2001 From: vaclav Date: Mon, 16 Mar 2026 19:38:52 +0100 Subject: [PATCH 07/23] fix + OPTIMIZE_FFT_STACK --- lib_com/edct_fx.c | 7 ++--- lib_com/scale_mem_fx.c | 4 +-- lib_enc/ivas_mdct_core_enc_fx.c | 55 +++++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 6 deletions(-) diff --git a/lib_com/edct_fx.c b/lib_com/edct_fx.c index b6078e295..bccf098bf 100644 --- a/lib_com/edct_fx.c +++ b/lib_com/edct_fx.c @@ -193,7 +193,7 @@ static Word16 const *get_edct_table( /*-------------------------------------------------------------------------* * FUNCTION : edct_fx() * - * PURPOSE : DCT transform + * PURPOSE : DCT transform, 32-bit version * * INPUT ARGUMENTS : * _ (Word16) length : length @@ -463,9 +463,9 @@ void edst_fx( /*========================================================================*/ -/* FUNCTION : edct_fx() */ +/* FUNCTION : edct_16fx() */ /*------------------------------------------------------------------------*/ -/* PURPOSE : DCT transform */ +/* PURPOSE : DCT transform, 32-bit version */ /*------------------------------------------------------------------------*/ /* INPUT ARGUMENTS : */ /* _ (Word16) length : length */ @@ -477,7 +477,6 @@ void edst_fx( /* OUTPUT ARGUMENTS : */ /* _ (Word16[]) y : output transform Qx */ /*------------------------------------------------------------------------*/ - /*------------------------------------------------------------------------*/ /* RETURN ARGUMENTS : */ /* _ None */ diff --git a/lib_com/scale_mem_fx.c b/lib_com/scale_mem_fx.c index 3df645e6a..13118dada 100644 --- a/lib_com/scale_mem_fx.c +++ b/lib_com/scale_mem_fx.c @@ -353,13 +353,13 @@ void scale_sig32_cmplx( move32(); if ( 0 == exp0 ) { - i = lg; + break; } x[i].im = L_shl( x[i].im, exp0 ); move32(); if ( 0 == exp0 ) { - i = lg; + break; } } diff --git a/lib_enc/ivas_mdct_core_enc_fx.c b/lib_enc/ivas_mdct_core_enc_fx.c index 96777796c..516ced089 100644 --- a/lib_enc/ivas_mdct_core_enc_fx.c +++ b/lib_enc/ivas_mdct_core_enc_fx.c @@ -1117,6 +1117,55 @@ void enc_prm_igf_mdct( return; } +#ifdef OPTIMIZE_FFT_STACK +/*-------------------------------------------------------------------* + * compute_power_spec() + * + * + *-------------------------------------------------------------------*/ + +static void compute_power_spec( + TCX_ENC_HANDLE hTcxEnc, + Word32 *mdst_spectrum_fx[NB_DIV], + Word32 powerSpec_fx[N_MAX], + Word16 *q_pow, + const Word16 n, + const Word16 L_subframeTCX ) +{ + Word16 i; + Word64 powerSpec_fx64[N_MAX]; + + IF( hTcxEnc->fUseTns[n] ) + { + FOR( i = 0; i < L_subframeTCX; i++ ) + { + powerSpec_fx64[i] = W_mult_32_32( hTcxEnc->spectrum_fx[n][i], hTcxEnc->spectrum_fx[n][i] ); + move64(); + } + *q_pow = W_norm_arr( powerSpec_fx64, L_subframeTCX ); + } + ELSE + { + FOR( i = 0; i < L_subframeTCX; i++ ) + { + powerSpec_fx64[i] = W_mac_32_32( W_mult_32_32( mdst_spectrum_fx[n][i], mdst_spectrum_fx[n][i] ), hTcxEnc->spectrum_fx[n][i], hTcxEnc->spectrum_fx[n][i] ); + move64(); + } + *q_pow = W_norm_arr( powerSpec_fx64, L_subframeTCX ); + } + + FOR( i = 0; i < L_subframeTCX; i++ ) + { + powerSpec_fx64[i] = W_shl( powerSpec_fx64[i], *q_pow ); + move64(); + powerSpec_fx[i] = W_extract_h( powerSpec_fx64[i] ); + move32(); + } + + return; +} +#endif + /*-------------------------------------------------------------------* * ivas_mdct_core_whitening_enc() * @@ -1152,7 +1201,9 @@ void ivas_mdct_core_whitening_enc_fx( Word32 temp_buffer[15 * L_FRAME48k / 8]; Word32 *windowedSignal_fx[CPE_CHANNELS]; Word32 *powerSpec_fx = orig_spectrum_long[0]; +#ifndef OPTIMIZE_FFT_STACK Word64 powerSpec_fx64[N_MAX]; +#endif Word16 nrg_fx; /* Q15 */ Encoder_State *st, **sts; Word32 scf_fx[CPE_CHANNELS][NB_DIV][M]; @@ -1920,6 +1971,9 @@ void ivas_mdct_core_whitening_enc_fx( move16(); FOR( n = 0; n < nSubframes; n++ ) { +#ifdef OPTIMIZE_FFT_STACK + compute_power_spec( st->hTcxEnc, mdst_spectrum_fx[ch], powerSpec_fx, &q_pow, n, L_subframeTCX ); +#else IF( st->hTcxEnc->fUseTns[n] ) { FOR( i = 0; i < L_subframeTCX; i++ ) @@ -1946,6 +2000,7 @@ void ivas_mdct_core_whitening_enc_fx( powerSpec_fx[i] = W_extract_h( powerSpec_fx64[i] ); move32(); } +#endif IF( mct_on ) { FOR( i = 0; i < L_subframeTCX; i++ ) -- GitLab From 6a1be2331936a1b95e72be3b9841058e7925da4e Mon Sep 17 00:00:00 2001 From: vaclav Date: Mon, 16 Mar 2026 20:00:26 +0100 Subject: [PATCH 08/23] fix --- lib_com/scale_mem_fx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_com/scale_mem_fx.c b/lib_com/scale_mem_fx.c index 13118dada..4f095703a 100644 --- a/lib_com/scale_mem_fx.c +++ b/lib_com/scale_mem_fx.c @@ -353,13 +353,13 @@ void scale_sig32_cmplx( move32(); if ( 0 == exp0 ) { - break; + BREAK; } x[i].im = L_shl( x[i].im, exp0 ); move32(); if ( 0 == exp0 ) { - break; + BREAK; } } -- GitLab From 4b025984bff9abf60b709cd7bd4f1218786e4d98 Mon Sep 17 00:00:00 2001 From: vaclav Date: Tue, 17 Mar 2026 09:28:59 +0100 Subject: [PATCH 09/23] fix in DoRTFTn_fx() --- lib_com/fft_fx_evs.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/lib_com/fft_fx_evs.c b/lib_com/fft_fx_evs.c index c68b21650..be4a8df83 100644 --- a/lib_com/fft_fx_evs.c +++ b/lib_com/fft_fx_evs.c @@ -32,11 +32,6 @@ static void fft64_16fx( Word16 *x, Word16 *y, const Word16 *Idx ); static void fft5_32_16fx( Word16 *zRe, Word16 *zIm, const Word16 *Idx ); static void cdftForw_16fx( Word16 n, Word16 *a, const Word16 *ip, const Word32 *w ); -#include "math_32.h" - -/*-----------------------------------------------------------------* - * Local functions - *-----------------------------------------------------------------*/ static void cdftForw_fx( Word16 n, Word32 *a, const Word16 *ip, const Word16 *w ); static void bitrv2_SR_fx( Word16 n, const Word16 *ip, Word32 *a ); static void cftfsub_fx( Word16 n, Word32 *a, const Word16 *w ); @@ -44,6 +39,12 @@ static void cft1st_fx( Word16 n, Word32 *a, const Word16 *w ); static void cftmdl_fx( Word16 n, Word16 l, Word32 *a, const Word16 *w ); +/*-----------------------------------------------------------------* + * DoRTFTn_fx() + * + * + *-----------------------------------------------------------------*/ + void DoRTFTn_fx( Word32 *x, /* i/o : real part of input and output data Q(x) */ Word32 *y, /* i/o : imaginary part of input and output data Q(x) */ @@ -113,7 +114,22 @@ void DoRTFTn_fx( } #ifdef OPTIMIZE_FFT_STACK - IF( spec == NULL ) + IF( spec != NULL ) + { + spec[0].re = z[0]; + move16(); + spec[0].im = z[1]; + move16(); + pt = &z[2]; + FOR( i = n - 1; i >= 1; i-- ) + { + spec[i].re = *pt++; + move16(); + spec[i].im = *pt++; + move16(); + } + } + ELSE { #endif x[0] = z[0]; -- GitLab From e53b91a99ac331c2c8d31f2a7f0c724edb169ce8 Mon Sep 17 00:00:00 2001 From: vaclav Date: Tue, 17 Mar 2026 11:06:47 +0100 Subject: [PATCH 10/23] editorial change --- lib_com/fft_fx_evs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib_com/fft_fx_evs.c b/lib_com/fft_fx_evs.c index be4a8df83..37f27fbe7 100644 --- a/lib_com/fft_fx_evs.c +++ b/lib_com/fft_fx_evs.c @@ -10,7 +10,7 @@ #include /*-----------------------------------------------------------------* - * Local functions + * Local constants *-----------------------------------------------------------------*/ #define FFT3_ONE_THIRD 21845 /* 1/3 in Q16 */ @@ -19,6 +19,10 @@ #define KP951056516_16FX 2042378325 /* EDCT & EMDCT constants Q31*/ #define KP587785252_16FX 1262259213 /* EDCT & EMDCT constants Q31*/ +/*-----------------------------------------------------------------* + * Local function prototypes + *-----------------------------------------------------------------*/ + static void fft5_shift4_16fx( Word16 n1, Word16 *zRe, Word16 *zIm, const Word16 *Idx ); static void fft64_16fx( Word16 *x, Word16 *y, const Word16 *Idx ); static void fft32_5_16fx( Word16 *x, Word16 *y, const Word16 *Idx ); -- GitLab From 69bd8cc40c0e9c63af9e3226ba50bc4c6989e870 Mon Sep 17 00:00:00 2001 From: vaclav Date: Tue, 17 Mar 2026 13:11:51 +0100 Subject: [PATCH 11/23] revert harmonization of DoRTFTn_fx_ivas() --- lib_com/fft_fx.c | 12 ++---------- lib_com/fft_fx_evs.c | 3 ++- lib_com/prot_fx.h | 4 +--- lib_rend/ivas_reverb_fft_filter_fx.c | 8 -------- lib_rend/ivas_reverb_filter_design_fx.c | 4 ---- 5 files changed, 5 insertions(+), 26 deletions(-) diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c index a3582a846..4182a350b 100644 --- a/lib_com/fft_fx.c +++ b/lib_com/fft_fx.c @@ -2736,6 +2736,7 @@ void edct2_fx_ivas( } } } +#endif void DoRTFTn_fx_ivas( Word32 *x, /* i/o: real part of input and output data Qx */ @@ -2743,7 +2744,6 @@ void DoRTFTn_fx_ivas( const Word16 n /* i : size of the FFT up to 1024 Q0*/ ) { - Word16 i; Word32 z[2048]; @@ -2793,7 +2793,7 @@ void DoRTFTn_fx_ivas( return; } -#endif + #ifndef HQ_ALIGN_DUPLICATED_CODE void fft3_fx_ivas( const Word32 X[], // Qx @@ -4277,11 +4277,7 @@ void DoFFT_fx( DoRTFT320_fx( re2, im2 ); BREAK; case 256: -#ifdef HARMONIZE_DCT - DoRTFTn_fx( re2, im2, NULL, 256 ); -#else DoRTFTn_fx_ivas( re2, im2, 256 ); -#endif BREAK; case 240: DoRTFT240( re2, im2 ); @@ -4305,11 +4301,7 @@ void DoFFT_fx( DoRTFT80_fx( re2, im2 ); BREAK; case 64: -#ifdef HARMONIZE_DCT - DoRTFTn_fx( re2, im2, NULL, 64 ); -#else DoRTFTn_fx_ivas( re2, im2, 64 ); -#endif BREAK; case 40: DoRTFT40_fx( re2, im2 ); diff --git a/lib_com/fft_fx_evs.c b/lib_com/fft_fx_evs.c index 37f27fbe7..a17ff1832 100644 --- a/lib_com/fft_fx_evs.c +++ b/lib_com/fft_fx_evs.c @@ -55,7 +55,7 @@ void DoRTFTn_fx( #ifdef OPTIMIZE_FFT_STACK cmplx *spec, /* i/o : complex input and output data */ #endif - const Word16 n /* i : size of the FFT up to 1024 */ + const Word16 n /* i : size of the FFT up to 1024 */ ) { Word16 i; @@ -151,6 +151,7 @@ void DoRTFTn_fx( #ifdef OPTIMIZE_FFT_STACK } #endif + return; } diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index 207fd8234..089a7119e 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -4073,7 +4073,7 @@ void DoRTFTn_fx( #ifdef OPTIMIZE_FFT_STACK cmplx *spec, /* i/o : complex input and output data */ #endif - const Word16 n /* i : size of the FFT up to 1024 */ + const Word16 n /* i : size of the FFT up to 1024 */ ); void DoRTFT480_fx( @@ -4148,13 +4148,11 @@ void rfft_fx( const Word16 isign /* i : sign */ ); -#ifndef HARMONIZE_DCT void DoRTFTn_fx_ivas( Word32 *x, /* i/o: real part of input and output data */ Word32 *y, /* i/o: imaginary part of input and output data */ const Word16 n /* i : size of the FFT up to 1024 */ ); -#endif Word16 find_guarded_bits_fx( const Word32 n ); diff --git a/lib_rend/ivas_reverb_fft_filter_fx.c b/lib_rend/ivas_reverb_fft_filter_fx.c index edc4c0dca..6f4e3052d 100644 --- a/lib_rend/ivas_reverb_fft_filter_fx.c +++ b/lib_rend/ivas_reverb_fft_filter_fx.c @@ -100,11 +100,7 @@ static void fft_wrapper_2ch_fx( Word16 k, mirror_k; Word32 left_re_fx, left_im_fx, right_re_fx, right_im_fx; -#ifdef HARMONIZE_DCT - DoRTFTn_fx( buffer_L_fx, buffer_R_fx, NULL, fft_size ); -#else DoRTFTn_fx_ivas( buffer_L_fx, buffer_R_fx, fft_size ); -#endif /* separating left and right channel spectra */ buffer_L_fx[0] = L_shl( buffer_L_fx[0], 1 ); // Qx + 1 @@ -172,11 +168,7 @@ static void ifft_wrapper_2ch_fx( move32(); } -#ifdef HARMONIZE_DCT - DoRTFTn_fx( buffer_L, buffer_R, NULL, fft_size ); -#else DoRTFTn_fx_ivas( buffer_L, buffer_R, fft_size ); -#endif return; } diff --git a/lib_rend/ivas_reverb_filter_design_fx.c b/lib_rend/ivas_reverb_filter_design_fx.c index 9d3d7af6c..6d23b0053 100644 --- a/lib_rend/ivas_reverb_filter_design_fx.c +++ b/lib_rend/ivas_reverb_filter_design_fx.c @@ -206,11 +206,7 @@ static void calc_min_phase_fx( /* Convert back and isolate the phase. */ IF( LE_16( fft_size, 512 ) ) /* for size <= 512 using complex-value FFT (more effecient, but available only up to 512 size) */ { -#ifdef HARMONIZE_DCT - DoRTFTn_fx( pFolded_cepstrum_re, pFolded_cepstrum_im, NULL, fft_size ); -#else DoRTFTn_fx_ivas( pFolded_cepstrum_re, pFolded_cepstrum_im, fft_size ); -#endif /* Copying the img part into the output */ FOR( idx = 1; idx < half_fft_size; idx++ ) -- GitLab From cb17b506aed0ef21c9ecd19d23fceed56a5347bb Mon Sep 17 00:00:00 2001 From: vaclav Date: Tue, 17 Mar 2026 14:36:23 +0100 Subject: [PATCH 12/23] harmonize fft_fx --- lib_com/edct_fx.c | 6 ++-- lib_com/fft_fx.c | 65 +++++++++++++++++++++++++++++++++--- lib_com/ivas_mdft_imdft_fx.c | 58 ++++++++++++++++++++++++++++++++ lib_com/prot_fx.h | 10 +++--- 4 files changed, 125 insertions(+), 14 deletions(-) diff --git a/lib_com/edct_fx.c b/lib_com/edct_fx.c index bccf098bf..84793d4fd 100644 --- a/lib_com/edct_fx.c +++ b/lib_com/edct_fx.c @@ -273,7 +273,7 @@ void edct_fx( tmp = sub( getScaleFactor32_cmplx( spec, len1 ), find_guarded_bits_fx( len1 ) ); scale_sig32_cmplx( spec, len1, tmp ); - fft_cmplx_fx( spec, len1 ); + fft_fx( spec, len1 ); *q = sub( *q, tmp ); move16(); @@ -954,7 +954,7 @@ void edxt_fx( ELSE /* fft() doesn't support 512 */ { #ifdef OPTIMIZE_FFT_STACK - fft_cmplx_fx( spec, length ); + fft_fx( spec, length ); #else fft_fx( re, im, length, 1 ); #endif @@ -1136,7 +1136,7 @@ void edxt_fx( ELSE /* fft() doesn't support 512 */ { #ifdef OPTIMIZE_FFT_STACK - fft_cmplx_fx( spec, length ); + fft_fx( spec, length ); #else fft_fx( re, im, length, 1 ); #endif diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c index 4182a350b..ac241e86a 100644 --- a/lib_com/fft_fx.c +++ b/lib_com/fft_fx.c @@ -6928,6 +6928,7 @@ static void fft_lenN( * Complex-value FFT *-----------------------------------------------------------------*/ +#ifndef HARMONIZE_DCT void fft_fx( Word32 *re, /* i/o: real part Qx */ Word32 *im, /* i/o: imag part Qx */ @@ -7013,10 +7014,8 @@ void fft_fx( return; } - - -#ifdef OPTIMIZE_FFT_STACK -void fft_cmplx_fx( +#else +void fft_fx( cmplx *x, /* i/o: complex data */ const Word16 length /* i : length of fft */ ) @@ -7093,6 +7092,9 @@ void rfft_fx( Word16 i, sizeOfFft2, sizeOfFft4; Word32 tmp, t1, t2, t3, t4; Word16 s1, s2; +#ifdef HARMONIZE_DCT + cmplx spec[L_FRAME48k]; +#endif sizeOfFft2 = shr( length, 1 ); sizeOfFft4 = shr( length, 2 ); @@ -7163,10 +7165,43 @@ void rfft_fx( SWITCH( isign ) { - case -1: +#ifdef HARMONIZE_DCT + FOR( i = 0; i < sizeOfFft2; i++ ) + { + spec[i].re = x[2 * i]; + move32(); + spec[i].im = x[2 * i + 1]; + move32(); + } + + fft_fx( spec, sizeOfFft2 ); + + FOR( i = 0; i < sizeOfFft4; i++ ) + { + x[2 * i] = spec[i].re; + move32(); + x[2 * i + 1] = spec[sizeOfFft2 - i - 1].re; + move32(); + + x[2 * i] = spec[i].im; + move32(); + x[2 * i + 1] = L_negate( spec[sizeOfFft2 - i - 1].im ); + move32(); + } + + FOR( i = 0; i < sizeOfFft2; i++ ) + { + x[2 * i] = spec[i].re; + move32(); + x[2 * i + 1] = spec[i].im; + move32(); + } +#else fft_fx( x, x + 1, sizeOfFft2, 2 ); +#endif + // Qx tmp = L_add( x[0], x[1] ); x[1] = L_sub( x[0], x[1] ); // Qx @@ -7222,7 +7257,27 @@ void rfft_fx( move32(); } +#ifdef HARMONIZE_DCT + FOR( i = 0; i < sizeOfFft2; i++ ) + { + spec[i].re = x[2 * i]; + move32(); + spec[i].im = x[2 * i + 1]; + move32(); + } + + fft_fx( spec, sizeOfFft2 ); + + FOR( i = 0; i < sizeOfFft2; i++ ) + { + x[2 * i] = spec[i].re; + move32(); + x[2 * i + 1] = spec[i].im; + move32(); + } +#else fft_fx( x, x + 1, sizeOfFft2, 2 ); +#endif FOR( i = 0; i < length; i += 2 ) { diff --git a/lib_com/ivas_mdft_imdft_fx.c b/lib_com/ivas_mdft_imdft_fx.c index 49d1cbbf1..19774e712 100644 --- a/lib_com/ivas_mdft_imdft_fx.c +++ b/lib_com/ivas_mdft_imdft_fx.c @@ -214,7 +214,29 @@ static void ivas_ifft_cplx1_fx( move32(); } +#ifdef HARMONIZE_DCT + cmplx x[L_FRAME48k]; + + FOR( i = 0; i < length; i++ ) + { + x[i].re = re[i]; + move32(); + x[i].im = im[i]; + move32(); + } + + fft_fx( x, length ); + + FOR( i = 0; i < length; i++ ) + { + re[i] = x[i].re; + move32(); + im[i] = x[i].im; + move32(); + } +#else fft_fx( re, im, length, 1 ); +#endif return; } @@ -233,8 +255,12 @@ void ivas_mdft_fx( const Word16 mdft_length /* i : MDFT length */ ) { +#ifdef HARMONIZE_DCT + cmplx spec[L_FRAME48k]; +#else Word32 re[L_FRAME48k]; Word32 im[L_FRAME48k]; +#endif Word16 j, len_by_2; const Word32 *pTwid; // Q31 len_by_2 = shr( mdft_length, 1 ); @@ -244,23 +270,53 @@ void ivas_mdft_fx( { FOR( j = 0; j < mdft_length; j++ ) { +#ifdef HARMONIZE_DCT + spec[j].re = Mpy_32_32( pIn[j], pTwid[j] ); // ( Qin + Q31 ) - Q31 -> Qin + move32(); + spec[j].im = Mpy_32_32( L_negate( pIn[j] ), pTwid[mdft_length - j] ); // ( Qin + Q31 ) - Q31 -> Qin + move32(); +#else re[j] = Mpy_32_32( pIn[j], pTwid[j] ); // ( Qin + Q31 ) - Q31 -> Qin move32(); im[j] = Mpy_32_32( L_negate( pIn[j] ), pTwid[mdft_length - j] ); // ( Qin + Q31 ) - Q31 -> Qin move32(); +#endif } } ELSE { FOR( j = 0; j < mdft_length; j++ ) { +#ifdef HARMONIZE_DCT + spec[j].re = Msub_32_32( Mpy_32_32( pIn[j], pTwid[j] ), pIn[add( mdft_length, j )], pTwid[mdft_length - j] ); // ( Qin + Q31 ) - Q31 -> Qin + move32(); + spec[j].im = Msub_32_32( Mpy_32_32( L_negate( pIn[j] ), pTwid[mdft_length - j] ), pIn[mdft_length + j], pTwid[j] ); // ( Qin + Q31 ) - Q31 -> Qin + move32(); +#else re[j] = Msub_32_32( Mpy_32_32( pIn[j], pTwid[j] ), pIn[add( mdft_length, j )], pTwid[mdft_length - j] ); // ( Qin + Q31 ) - Q31 -> Qin move32(); im[j] = Msub_32_32( Mpy_32_32( L_negate( pIn[j] ), pTwid[mdft_length - j] ), pIn[mdft_length + j], pTwid[j] ); // ( Qin + Q31 ) - Q31 -> Qin move32(); +#endif } } +#ifdef HARMONIZE_DCT + fft_fx( spec, mdft_length ); + + FOR( j = 0; j < len_by_2; j++ ) + { + pOut_re[2 * j] = spec[j].re; + move32(); + pOut_re[2 * j + 1] = spec[mdft_length - j - 1].re; + move32(); + + pOut_im[2 * j] = spec[j].im; + move32(); + pOut_im[2 * j + 1] = L_negate( spec[mdft_length - j - 1].im ); + move32(); + } +#else fft_fx( re, im, mdft_length, 1 ); FOR( j = 0; j < len_by_2; j++ ) { @@ -274,6 +330,8 @@ void ivas_mdft_fx( pOut_im[2 * j + 1] = L_negate( im[mdft_length - j - 1] ); // Qin move32(); } +#endif + return; } diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index 089a7119e..a55beffe0 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -4018,7 +4018,7 @@ void edct_fx( Word16 *q, /* i : Q value of input signal */ const Word16 element_mode /* i : element mode */ #else - Word16 *q /* i : Q value of i signal */ + Word16 *q /* i : Q value of i signal */ #endif ); @@ -4128,19 +4128,17 @@ void DoFFT_fx( const Word16 length ); void fft_fx( +#ifndef HARMONIZE_DCT Word32 *re, /* i/o: real part */ Word32 *im, /* i/o: imag part */ const Word16 length, /* i : length of fft */ const Word16 s /* i : sign */ -); - -#ifdef OPTIMIZE_FFT_STACK -void fft_cmplx_fx( +#else cmplx *spec, /* i/o: complex data */ const Word16 length /* i : length of fft */ +#endif ); -#endif void rfft_fx( Word32 *x, /* i/o: values */ const Word16 *w, /* i : window */ -- GitLab From ba9026abb9a05c10ff178d2c450835b53c6cdda6 Mon Sep 17 00:00:00 2001 From: vaclav Date: Tue, 17 Mar 2026 16:10:33 +0100 Subject: [PATCH 13/23] fix --- lib_com/basop_util.c | 45 ++++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index 77ac9fc97..5187ec49d 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -768,23 +768,27 @@ Word16 getScaleFactor32_cmplx( const Word16 len_x /* i: length of the array to scan */ ) { - Word16 i, i_min, i_max; - Word32 x_min, x_max; + Word16 i, i_min, i_max, i_re, i_im; + Word32 x_min_re, x_max_re, x_min_im, x_max_im; - x_max = 0; + x_max_re = 0; move32(); - x_min = 0; + x_min_re = 0; + move32(); + x_max_im = 0; + move32(); + x_min_im = 0; move32(); FOR( i = 0; i < len_x; i++ ) { if ( x[i].re >= 0 ) - x_max = L_max( x_max, x[i].re ); + x_max_re = L_max( x_max_re, x[i].re ); if ( x[i].re < 0 ) - x_min = L_min( x_min, x[i].re ); + x_min_re = L_min( x_min_re, x[i].re ); if ( x[i].im >= 0 ) - x_max = L_max( x_max, x[i].im ); + x_max_im = L_max( x_max_im, x[i].im ); if ( x[i].im < 0 ) - x_min = L_min( x_min, x[i].im ); + x_min_im = L_min( x_min_im, x[i].im ); } i_max = 0x20; @@ -792,15 +796,28 @@ Word16 getScaleFactor32_cmplx( i_min = 0x20; move16(); - if ( x_max != 0 ) - i_max = norm_l( x_max ); + if ( x_max_re != 0 ) + i_max = norm_l( x_max_re ); - if ( x_min != 0 ) - i_min = norm_l( x_min ); + if ( x_min_re != 0 ) + i_min = norm_l( x_min_re ); - i = s_and( s_min( i_max, i_min ), 0x1F ); + i_re = s_and( s_min( i_max, i_min ), 0x1F ); - return i; + i_max = 0x20; + move16(); + i_min = 0x20; + move16(); + + if ( x_max_im != 0 ) + i_max = norm_l( x_max_im ); + + if ( x_min_im != 0 ) + i_min = norm_l( x_min_im ); + + i_im = s_and( s_min( i_max, i_min ), 0x1F ); + + return s_min(i_re, i_im); } #endif -- GitLab From 5da1c71bab0e91c893a237dfa82e7341f480f54f Mon Sep 17 00:00:00 2001 From: vaclav Date: Tue, 17 Mar 2026 16:13:47 +0100 Subject: [PATCH 14/23] clang-format --- lib_com/basop_util.c | 2 +- lib_com/prot_fx.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c index 5187ec49d..73581f372 100644 --- a/lib_com/basop_util.c +++ b/lib_com/basop_util.c @@ -817,7 +817,7 @@ Word16 getScaleFactor32_cmplx( i_im = s_and( s_min( i_max, i_min ), 0x1F ); - return s_min(i_re, i_im); + return s_min( i_re, i_im ); } #endif diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index a55beffe0..ea6a1fd0a 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -4018,7 +4018,7 @@ void edct_fx( Word16 *q, /* i : Q value of input signal */ const Word16 element_mode /* i : element mode */ #else - Word16 *q /* i : Q value of i signal */ + Word16 *q /* i : Q value of i signal */ #endif ); -- GitLab From af2d6a5ac05670f6d4a22d7f00cc7ffa9d4ed9ca Mon Sep 17 00:00:00 2001 From: vaclav Date: Wed, 18 Mar 2026 09:38:48 +0100 Subject: [PATCH 15/23] HARMONIZE_DoRTFTn --- lib_com/fft_fx.c | 54 +++++++++++++++++++++---- lib_com/fft_fx_evs.c | 30 ++++++++++++++ lib_com/options.h | 1 + lib_com/prot_fx.h | 14 ++++++- lib_com/rom_com.h | 2 + lib_com/rom_com_fx.c | 12 ++++-- lib_rend/ivas_reverb_fft_filter_fx.c | 8 ++++ lib_rend/ivas_reverb_filter_design_fx.c | 4 ++ 8 files changed, 113 insertions(+), 12 deletions(-) diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c index ac241e86a..4e13ec1bc 100644 --- a/lib_com/fft_fx.c +++ b/lib_com/fft_fx.c @@ -51,8 +51,6 @@ * Local constants *-----------------------------------------------------------------*/ -#define Mpy_32_xx Mpy_32_16_1 - #define FFTC( x ) WORD322WORD16( (Word32) x ) /* DCT related */ @@ -83,17 +81,18 @@ #define FFT_C165 ( FFTC( 0x30fbc54d ) ) /* FL2WORD32( 3.826834323650898e-1) COS_3PI_DIV8 Q15*/ #define FFT_C166 ( FFTC( 0xcf043ab3 ) ) /* FL2WORD32(-3.826834323650898e-1) -COS_3PI_DIV8 Q15*/ -#define SCALEFACTOR16 ( 0 ) -#define SCALEFACTOR20 ( 0 ) + /*-----------------------------------------------------------------* * Local function prototypes *-----------------------------------------------------------------*/ +#ifndef HARMONIZE_DoRTFTn static void cdftForw( Word16 n, Word32 *a, const Word16 *ip, const Word16 *w ); static void bitrv2_SR( Word16 n, const Word16 *ip, Word32 *a ); static void cftfsub( Word16 n, Word32 *a, const Word16 *w ); static void cft1st( Word16 n, Word32 *a, const Word16 *w ); static void cftmdl( Word16 n, Word16 l, Word32 *a, const Word16 *w ); +#endif static void fft16_ivas( Word32 *x, Word32 *y, const Word16 *Idx ); static void fft5_shift1( Word16 n1, Word32 *zRe, Word32 *zIm, const Word16 *Idx ); static void fft8( Word32 *x, Word32 *y, const Word16 *Idx ); @@ -1088,7 +1087,11 @@ static void fft64( move32(); } +#ifdef HARMONIZE_DoRTFTn + cdftForw_fx( 128, z, Ip_fft64, w_fft64_fx ); +#else cdftForw( 128, z, Ip_fft64, w_fft64_fx ); +#endif FOR( i = 0; i < 64; i++ ) { @@ -1130,7 +1133,11 @@ static void fft32_15( move32(); } +#ifdef HARMONIZE_DoRTFTn + cdftForw_fx( 64, z, Ip_fft32, w_fft32_fx ); +#else cdftForw( 64, z, Ip_fft32, w_fft32_fx ); +#endif FOR( i = 0; i < 32; i++ ) { @@ -1171,7 +1178,11 @@ static void fft32_5( move32(); } +#ifdef HARMONIZE_DoRTFTn + cdftForw_fx( 64, z, Ip_fft32, w_fft32_fx ); +#else cdftForw( 64, z, Ip_fft32, w_fft32_fx ); +#endif FOR( i = 0; i < 32; i++ ) { @@ -1212,7 +1223,11 @@ static void fft16_ivas( move32(); } +#ifdef HARMONIZE_DoRTFTn + cdftForw_fx( 32, z, Ip_fft16, w_fft16_fx ); +#else cdftForw( 32, z, Ip_fft16, w_fft16_fx ); +#endif FOR( i = 0; i < 16; i++ ) { @@ -1253,7 +1268,11 @@ static void fft8( move32(); } +#ifdef HARMONIZE_DoRTFTn + cdftForw_fx( 16, z, Ip_fft8, w_fft8_fx ); +#else cdftForw( 16, z, Ip_fft8, w_fft8_fx ); +#endif FOR( i = 0; i < 8; i++ ) { @@ -1292,7 +1311,11 @@ static void fft8_5( move32(); } +#ifdef HARMONIZE_DoRTFTn + cdftForw_fx( 16, z, Ip_fft8, w_fft8_fx ); +#else cdftForw( 16, z, Ip_fft8, w_fft8_fx ); +#endif FOR( i = 0; i < 8; i++ ) { @@ -1433,7 +1456,11 @@ static void fft4_5( move32(); } +#ifdef HARMONIZE_DoRTFTn + cdftForw_fx( 8, z, Ip_fft4, w_fft4_fx ); +#else cdftForw( 8, z, Ip_fft4, w_fft4_fx ); +#endif FOR( i = 0; i < 4; i++ ) { @@ -1760,7 +1787,11 @@ void DoRTFT128_fx( move32(); } +#ifdef HARMONIZE_DoRTFTn + cdftForw_fx( 256, z, Ip_fft128, w_fft128_fx ); +#else cdftForw( 256, z, Ip_fft128, w_fft128_fx ); +#endif x[0] = z[0]; move32(); @@ -1777,6 +1808,7 @@ void DoRTFT128_fx( return; } +#ifndef HARMONIZE_DoRTFTn /*-----------------------------------------------------------------* * cdftForw() * Main fuction of Complex Discrete Fourier Transform @@ -2439,7 +2471,7 @@ static void cftmdl( return; } - +#endif #ifndef HARMONIZE_DCT static void cftbsub( Word16 n, // Q0 @@ -2737,7 +2769,7 @@ void edct2_fx_ivas( } } #endif - +#ifndef HARMONIZE_DoRTFTn void DoRTFTn_fx_ivas( Word32 *x, /* i/o: real part of input and output data Qx */ Word32 *y, /* i/o: imaginary part of input and output data Qx */ @@ -2793,7 +2825,7 @@ void DoRTFTn_fx_ivas( return; } - +#endif #ifndef HQ_ALIGN_DUPLICATED_CODE void fft3_fx_ivas( const Word32 X[], // Qx @@ -4277,7 +4309,11 @@ void DoFFT_fx( DoRTFT320_fx( re2, im2 ); BREAK; case 256: +#ifdef HARMONIZE_DoRTFTn + DoRTFTn_fx( re2, im2, NULL, 256 ); +#else DoRTFTn_fx_ivas( re2, im2, 256 ); +#endif BREAK; case 240: DoRTFT240( re2, im2 ); @@ -4301,7 +4337,11 @@ void DoFFT_fx( DoRTFT80_fx( re2, im2 ); BREAK; case 64: +#ifdef HARMONIZE_DoRTFTn + DoRTFTn_fx( re2, im2, NULL, 64 ); +#else DoRTFTn_fx_ivas( re2, im2, 64 ); +#endif BREAK; case 40: DoRTFT40_fx( re2, im2 ); diff --git a/lib_com/fft_fx_evs.c b/lib_com/fft_fx_evs.c index a17ff1832..9f057fdb5 100644 --- a/lib_com/fft_fx_evs.c +++ b/lib_com/fft_fx_evs.c @@ -36,7 +36,9 @@ static void fft64_16fx( Word16 *x, Word16 *y, const Word16 *Idx ); static void fft5_32_16fx( Word16 *zRe, Word16 *zIm, const Word16 *Idx ); static void cdftForw_16fx( Word16 n, Word16 *a, const Word16 *ip, const Word32 *w ); +#ifndef HARMONIZE_DoRTFTn static void cdftForw_fx( Word16 n, Word32 *a, const Word16 *ip, const Word16 *w ); +#endif static void bitrv2_SR_fx( Word16 n, const Word16 *ip, Word32 *a ); static void cftfsub_fx( Word16 n, Word32 *a, const Word16 *w ); static void cft1st_fx( Word16 n, Word32 *a, const Word16 *w ); @@ -90,27 +92,51 @@ void DoRTFTn_fx( IF( EQ_16( n, 16 ) ) { +#ifdef HARMONIZE_DoRTFTn + cdftForw_fx( 2 * n, z, Ip_fft16, w_fft512_fx ); +#else cdftForw_fx( 2 * n, z, Ip_fft16, w_fft512_fx_evs ); +#endif } ELSE IF( EQ_16( n, 32 ) ) { +#ifdef HARMONIZE_DoRTFTn + cdftForw_fx( 2 * n, z, Ip_fft32, w_fft512_fx ); +#else cdftForw_fx( 2 * n, z, Ip_fft32, w_fft512_fx_evs ); +#endif } ELSE IF( EQ_16( n, 64 ) ) { +#ifdef HARMONIZE_DoRTFTn + cdftForw_fx( 2 * n, z, Ip_fft64, w_fft512_fx ); +#else cdftForw_fx( 2 * n, z, Ip_fft64, w_fft512_fx_evs ); +#endif } ELSE IF( EQ_16( n, 128 ) ) { +#ifdef HARMONIZE_DoRTFTn + cdftForw_fx( 2 * n, z, Ip_fft128, w_fft512_fx ); +#else cdftForw_fx( 2 * n, z, Ip_fft128, w_fft512_fx_evs ); +#endif } ELSE IF( EQ_16( n, 256 ) ) { +#ifdef HARMONIZE_DoRTFTn + cdftForw_fx( 2 * n, z, Ip_fft256, w_fft512_fx ); +#else cdftForw_fx( 2 * n, z, Ip_fft256, w_fft512_fx_evs ); +#endif } ELSE IF( EQ_16( n, 512 ) ) { +#ifdef HARMONIZE_DoRTFTn + cdftForw_fx( 2 * n, z, Ip_fft512, w_fft512_fx ); +#else cdftForw_fx( 2 * n, z, Ip_fft512, w_fft512_fx_evs ); +#endif } ELSE { @@ -159,7 +185,11 @@ void DoRTFTn_fx( * cdftForw_fx() * Main fuction of Complex Discrete Fourier Transform *-----------------------------------------------------------------*/ +#ifdef HARMONIZE_DoRTFTn +void cdftForw_fx( +#else static void cdftForw_fx( +#endif Word16 n, /* i : data length of real and imag */ Word32 *a, /* i/o : input/output data Q(q)*/ const Word16 *ip, /* i : work area for bit reversal */ diff --git a/lib_com/options.h b/lib_com/options.h index 4e47693c7..646fb67da 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -117,6 +117,7 @@ #define FIX_FLOAT_1528_5MS_REND_ISM_META_DELAY_COMPENSATION /* Nokia: float issue 1528: Fixes incorrect compensation for ISM metadata delay in 5ms TD rendering */ #define FIX_2398_PRECISSION_ORIENTATION_TRACKING /* FhG: use refinement of Sqrt32 within certain functions*/ #define FIX_2462_PARCOR_FIX /* VA: issue 2462: Fix bug in calculating parcor coefficient in Calc_rc0_h() */ +#define HARMONIZE_DoRTFTn /* VA: harmonize functions DoRTFTn_fx() and DoRTFTn_fx_ivas() */ /* ##################### End NON-BE switches ########################### */ diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index ef0ea856a..f8c850cba 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -4067,6 +4067,15 @@ void fft16( Word32 *re, Word32 *im, Word16 s, Word16 bScale ); void BASOP_cfft( cmplx *pComplexBuf, Word16 sizeOfFft, Word16 *scale, Word32 workBuffer[2 * BASOP_CFFT_MAX_LENGTH] ); void BASOP_rfft( Word32 *x, Word16 sizeOfFft, Word16 *scale, Word16 isign ); +#ifdef HARMONIZE_DoRTFTn +void cdftForw_fx( + Word16 n, /* i : data length of real and imag */ + Word32 *a, /* i/o : input/output data Q(q)*/ + const Word16 *ip, /* i : work area for bit reversal */ + const Word16 *w /* i : cos/sin table Q14*/ +); +#endif + void DoRTFTn_fx( Word32 *x, /* i/o : real part of i and output data */ Word32 *y, /* i/o : imaginary part of i and output data */ @@ -4146,12 +4155,13 @@ void rfft_fx( const Word16 isign /* i : sign */ ); +#ifndef HARMONIZE_DoRTFTn void DoRTFTn_fx_ivas( Word32 *x, /* i/o: real part of input and output data */ Word32 *y, /* i/o: imaginary part of input and output data */ const Word16 n /* i : size of the FFT up to 1024 */ ); - +#endif Word16 find_guarded_bits_fx( const Word32 n ); Word16 L_norm_arr( const Word32 *arr, Word16 size ); @@ -5253,7 +5263,7 @@ void IGFCommonFuncsMDCTSquareSpec( Word16 indexOffset, /**< in: Q0 | index offset */ const Word16 element_mode /**< in: | IVAS element mode type */ #else - Word16 indexOffset /**< in: Q0 | index offset */ + Word16 indexOffset /**< in: Q0 | index offset */ #endif ); diff --git a/lib_com/rom_com.h b/lib_com/rom_com.h index 8f2ff78b6..7ffc65217 100644 --- a/lib_com/rom_com.h +++ b/lib_com/rom_com.h @@ -1080,7 +1080,9 @@ extern const Word16 Ip_fft128[10]; // Q0 extern const Word32 w_fft128_16fx[64]; // Q30 extern const Word16 Ip_fft256[10]; // Q0 extern const Word16 Ip_fft512[18]; // Q0 +#ifndef HARMONIZE_DoRTFTn extern const Word16 w_fft512_fx_evs[256]; // Q14 +#endif extern const Word16 Idx_dortft40[40]; // Q0 extern const Word16 Odx_fft8_5[8]; // Q0 extern const Word16 ip_edct2_64[6]; // Q0 diff --git a/lib_com/rom_com_fx.c b/lib_com/rom_com_fx.c index 7139829a3..db6a1e093 100644 --- a/lib_com/rom_com_fx.c +++ b/lib_com/rom_com_fx.c @@ -18437,7 +18437,11 @@ const Word16 Ip_fft256[10] = {128, 1, 0, 256, 128, 384, 64, 320,192, 448}; // const Word16 Ip_fft512[18] = {256, 1, 0, 512, 256, 768, 128, 640,384, 896, 64, 576, 320, 832, 192, 704,448, 960}; // Q0 +#ifdef HARMONIZE_DoRTFTn +const Word16 w_fft512_fx[256] =//Q14 +#else const Word16 w_fft512_fx_evs[256] =//Q14 +#endif { 16384, 0, 11585, 11585, 15137, 6270, 6270, 15137, 16069, 3196, 9102, 13623, 13623, 9102, 3196, 16069, @@ -25778,9 +25782,9 @@ const Word16 mfreq_loc_div_25[] = { 7, 15, 31, 47, 63, 79, 95, 111, 127, 143, 15 /* % idx= 0 1 2 3 4 5 6 7; */ /* call with band_len_idx[sfm_size>>3] */ const Word16 band_len_idx[1 + ( MAX_SFM_LEN_FX / 8 )] = { - /*sfm/8*/ /*1*/ /*2 */ /*3 */ /*4 */ /*6 */ /*8 */ /*10*/ /*12*/ - -1, 0 /*8*/, 1 /*16*/, 2 /*24*/, 3 /*32*/, 4 /*40*/, 5 /*48 */, -1, 6 /*64 */, -1, 7 /*80*/, -1, 8 /*96*/ -}; // Q0 + /*sfm/8*/ /*1*/ /*2 */ /*3 */ /*4 */ /*6 */ /*8 */ /*10*/ /*12*/ + -1, 0 /*8*/, 1 /*16*/, 2 /*24*/, 3 /*32*/, 4 /*40*/, 5 /*48 */, -1, 6 /*64 */, -1, 7 /*80*/, -1, 8 /*96*/ +}; // Q0 const Word16 band_len_ener_shift[9] = { 1, 2, 2, 2, 3, 3, 3, 4 /*sfm==80*/, 4 /*sfm==96*/ }; // Q0 /* 96 requires 1 bit more than 48 */ const Word16 fine_gain_pred_sqrt_bw[9] = { 5793, 8192, 10033, 11585, 12953, 14189, 16384, 18318, 20066 }; /* (Q11) */ /* For extended frames in ACELP->HQ transitions in IVAS, map indices floor(sfms*1.25/8)=[1 2 3 5 7 10 12 15] from extended bws: */ @@ -31461,6 +31465,7 @@ const Word16 w_fft256_fx[128] = { SHC( 0x7ff6 ), }; +#ifndef HARMONIZE_DoRTFTn const Word16 w_fft512_fx[256] = { // Q15 SHC( 0x7fff ), @@ -31721,6 +31726,7 @@ const Word16 w_fft512_fx[256] = { SHC( 0x7ffd ), }; +#endif const Word16 FFT_RotVector_960_fx[1860] = { // Q15 SHC( 0x7fff ), diff --git a/lib_rend/ivas_reverb_fft_filter_fx.c b/lib_rend/ivas_reverb_fft_filter_fx.c index 6f4e3052d..c8bd561dd 100644 --- a/lib_rend/ivas_reverb_fft_filter_fx.c +++ b/lib_rend/ivas_reverb_fft_filter_fx.c @@ -100,7 +100,11 @@ static void fft_wrapper_2ch_fx( Word16 k, mirror_k; Word32 left_re_fx, left_im_fx, right_re_fx, right_im_fx; +#ifdef HARMONIZE_DoRTFTn + DoRTFTn_fx( buffer_L_fx, buffer_R_fx, NULL, fft_size ); +#else DoRTFTn_fx_ivas( buffer_L_fx, buffer_R_fx, fft_size ); +#endif /* separating left and right channel spectra */ buffer_L_fx[0] = L_shl( buffer_L_fx[0], 1 ); // Qx + 1 @@ -168,7 +172,11 @@ static void ifft_wrapper_2ch_fx( move32(); } +#ifdef HARMONIZE_DoRTFTn + DoRTFTn_fx( buffer_L, buffer_R, NULL, fft_size ); +#else DoRTFTn_fx_ivas( buffer_L, buffer_R, fft_size ); +#endif return; } diff --git a/lib_rend/ivas_reverb_filter_design_fx.c b/lib_rend/ivas_reverb_filter_design_fx.c index 6d23b0053..c783d3e78 100644 --- a/lib_rend/ivas_reverb_filter_design_fx.c +++ b/lib_rend/ivas_reverb_filter_design_fx.c @@ -206,7 +206,11 @@ static void calc_min_phase_fx( /* Convert back and isolate the phase. */ IF( LE_16( fft_size, 512 ) ) /* for size <= 512 using complex-value FFT (more effecient, but available only up to 512 size) */ { +#ifdef HARMONIZE_DoRTFTn + DoRTFTn_fx( pFolded_cepstrum_re, pFolded_cepstrum_im, NULL, fft_size ); +#else DoRTFTn_fx_ivas( pFolded_cepstrum_re, pFolded_cepstrum_im, fft_size ); +#endif /* Copying the img part into the output */ FOR( idx = 1; idx < half_fft_size; idx++ ) -- GitLab From 89ccc023d3a4a09071cce7de1b09c49353175e01 Mon Sep 17 00:00:00 2001 From: vaclav Date: Wed, 18 Mar 2026 09:45:22 +0100 Subject: [PATCH 16/23] clang-format --- lib_com/prot_fx.h | 2 +- lib_com/rom_com.h | 50 ++++++++++++++++++++++---------------------- lib_com/rom_com_fx.c | 6 +++--- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index f8c850cba..c52a8decd 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -4018,7 +4018,7 @@ void edct_fx( Word16 *q, /* i : Q value of input signal */ const Word16 element_mode /* i : element mode */ #else - Word16 *q /* i : Q value of i signal */ + Word16 *q /* i : Q value of i signal */ #endif ); diff --git a/lib_com/rom_com.h b/lib_com/rom_com.h index 7ffc65217..121d5ea59 100644 --- a/lib_com/rom_com.h +++ b/lib_com/rom_com.h @@ -1062,34 +1062,34 @@ extern const Word16 Gain_dic3_NB_fx[]; /*Q12 */ * FFT transform *------------------------------------------------------------------------------*/ -extern const Word16 Odx_fft64[64]; // Q0 -extern const Word16 Ip_fft64[6]; // Q0 -extern const Word16 Odx_fft32_15[32]; // Q0 -extern const Word32 w_fft32_16fx[16]; // Q30 -extern const Word16 Ip_fft32[6]; // Q0 -extern const Word16 Odx_fft32_5[32]; // Q0 -extern const Word16 Odx_fft16[16]; // Q0 -extern const Word16 Ip_fft16[6]; // Q0 -extern const Word16 Ip_fft8[6]; // Q0 -extern const Word16 Idx_dortft80[80]; // Q0 -extern const Word16 Idx_dortft120[120]; // Q0 -extern const Word16 Idx_dortft160[160]; // Q0 -extern const Word16 Idx_dortft320[320]; // Q0 -extern const Word16 Idx_dortft480[480]; // Q0 -extern const Word16 Ip_fft128[10]; // Q0 -extern const Word32 w_fft128_16fx[64]; // Q30 -extern const Word16 Ip_fft256[10]; // Q0 -extern const Word16 Ip_fft512[18]; // Q0 +extern const Word16 Odx_fft64[64]; // Q0 +extern const Word16 Ip_fft64[6]; // Q0 +extern const Word16 Odx_fft32_15[32]; // Q0 +extern const Word32 w_fft32_16fx[16]; // Q30 +extern const Word16 Ip_fft32[6]; // Q0 +extern const Word16 Odx_fft32_5[32]; // Q0 +extern const Word16 Odx_fft16[16]; // Q0 +extern const Word16 Ip_fft16[6]; // Q0 +extern const Word16 Ip_fft8[6]; // Q0 +extern const Word16 Idx_dortft80[80]; // Q0 +extern const Word16 Idx_dortft120[120]; // Q0 +extern const Word16 Idx_dortft160[160]; // Q0 +extern const Word16 Idx_dortft320[320]; // Q0 +extern const Word16 Idx_dortft480[480]; // Q0 +extern const Word16 Ip_fft128[10]; // Q0 +extern const Word32 w_fft128_16fx[64]; // Q30 +extern const Word16 Ip_fft256[10]; // Q0 +extern const Word16 Ip_fft512[18]; // Q0 #ifndef HARMONIZE_DoRTFTn extern const Word16 w_fft512_fx_evs[256]; // Q14 #endif -extern const Word16 Idx_dortft40[40]; // Q0 -extern const Word16 Odx_fft8_5[8]; // Q0 -extern const Word16 ip_edct2_64[6]; // Q0 -extern const Word16 w_edct2_64_fx[80]; /*Q14 */ -extern const Word16 Idx_dortft20[20]; // Q0 -extern const Word16 Odx_fft4_5[4]; // Q0 -extern const Word16 Ip_fft4[6]; // Q0 +extern const Word16 Idx_dortft40[40]; // Q0 +extern const Word16 Odx_fft8_5[8]; // Q0 +extern const Word16 ip_edct2_64[6]; // Q0 +extern const Word16 w_edct2_64_fx[80]; /*Q14 */ +extern const Word16 Idx_dortft20[20]; // Q0 +extern const Word16 Odx_fft4_5[4]; // Q0 +extern const Word16 Ip_fft4[6]; // Q0 /*----------------------------------------------------------------------------------* * FEC for HQ core diff --git a/lib_com/rom_com_fx.c b/lib_com/rom_com_fx.c index db6a1e093..5cb8858c3 100644 --- a/lib_com/rom_com_fx.c +++ b/lib_com/rom_com_fx.c @@ -25782,9 +25782,9 @@ const Word16 mfreq_loc_div_25[] = { 7, 15, 31, 47, 63, 79, 95, 111, 127, 143, 15 /* % idx= 0 1 2 3 4 5 6 7; */ /* call with band_len_idx[sfm_size>>3] */ const Word16 band_len_idx[1 + ( MAX_SFM_LEN_FX / 8 )] = { - /*sfm/8*/ /*1*/ /*2 */ /*3 */ /*4 */ /*6 */ /*8 */ /*10*/ /*12*/ - -1, 0 /*8*/, 1 /*16*/, 2 /*24*/, 3 /*32*/, 4 /*40*/, 5 /*48 */, -1, 6 /*64 */, -1, 7 /*80*/, -1, 8 /*96*/ -}; // Q0 + /*sfm/8*/ /*1*/ /*2 */ /*3 */ /*4 */ /*6 */ /*8 */ /*10*/ /*12*/ + -1, 0 /*8*/, 1 /*16*/, 2 /*24*/, 3 /*32*/, 4 /*40*/, 5 /*48 */, -1, 6 /*64 */, -1, 7 /*80*/, -1, 8 /*96*/ +}; // Q0 const Word16 band_len_ener_shift[9] = { 1, 2, 2, 2, 3, 3, 3, 4 /*sfm==80*/, 4 /*sfm==96*/ }; // Q0 /* 96 requires 1 bit more than 48 */ const Word16 fine_gain_pred_sqrt_bw[9] = { 5793, 8192, 10033, 11585, 12953, 14189, 16384, 18318, 20066 }; /* (Q11) */ /* For extended frames in ACELP->HQ transitions in IVAS, map indices floor(sfms*1.25/8)=[1 2 3 5 7 10 12 15] from extended bws: */ -- GitLab From ed09a9c46864426871114c9c997beebfa5b9bf7b Mon Sep 17 00:00:00 2001 From: vaclav Date: Wed, 18 Mar 2026 09:48:58 +0100 Subject: [PATCH 17/23] clang-format --- lib_com/prot_fx.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index c52a8decd..3d650a344 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -4018,7 +4018,7 @@ void edct_fx( Word16 *q, /* i : Q value of input signal */ const Word16 element_mode /* i : element mode */ #else - Word16 *q /* i : Q value of i signal */ + Word16 *q /* i : Q value of i signal */ #endif ); @@ -5263,7 +5263,7 @@ void IGFCommonFuncsMDCTSquareSpec( Word16 indexOffset, /**< in: Q0 | index offset */ const Word16 element_mode /**< in: | IVAS element mode type */ #else - Word16 indexOffset /**< in: Q0 | index offset */ + Word16 indexOffset /**< in: Q0 | index offset */ #endif ); -- GitLab From 3982d1d7ca26c0d79d4fbbd85df88fac93aefead Mon Sep 17 00:00:00 2001 From: vaclav Date: Thu, 19 Mar 2026 11:54:04 +0100 Subject: [PATCH 18/23] keep cdftForw() --- lib_com/fft_fx.c | 50 ++++++++++++------------------------ lib_com/fft_fx_evs.c | 14 +++++----- lib_com/ivas_mdct_imdct_fx.c | 27 ++++++++++++++++--- lib_com/ivas_prot_fx.h | 2 ++ lib_com/prot_fx.h | 9 +++---- 5 files changed, 54 insertions(+), 48 deletions(-) diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c index 2adbb283f..1fd82432a 100644 --- a/lib_com/fft_fx.c +++ b/lib_com/fft_fx.c @@ -86,13 +86,14 @@ * Local function prototypes *-----------------------------------------------------------------*/ -#ifndef HARMONIZE_DoRTFTn static void cdftForw( Word16 n, Word32 *a, const Word16 *ip, const Word16 *w ); +#ifndef HARMONIZE_DoRTFTn static void bitrv2_SR( Word16 n, const Word16 *ip, Word32 *a ); +#endif static void cftfsub( Word16 n, Word32 *a, const Word16 *w ); static void cft1st( Word16 n, Word32 *a, const Word16 *w ); static void cftmdl( Word16 n, Word16 l, Word32 *a, const Word16 *w ); -#endif + static void fft16_ivas( Word32 *x, Word32 *y, const Word16 *Idx ); static void fft5_shift1( Word16 n1, Word32 *zRe, Word32 *zIm, const Word16 *Idx ); static void fft8( Word32 *x, Word32 *y, const Word16 *Idx ); @@ -1087,11 +1088,7 @@ static void fft64( move32(); } -#ifdef HARMONIZE_DoRTFTn - cdftForw_fx( 128, z, Ip_fft64, w_fft64_fx ); -#else cdftForw( 128, z, Ip_fft64, w_fft64_fx ); -#endif FOR( i = 0; i < 64; i++ ) { @@ -1133,11 +1130,7 @@ static void fft32_15( move32(); } -#ifdef HARMONIZE_DoRTFTn - cdftForw_fx( 64, z, Ip_fft32, w_fft32_fx ); -#else cdftForw( 64, z, Ip_fft32, w_fft32_fx ); -#endif FOR( i = 0; i < 32; i++ ) { @@ -1178,11 +1171,7 @@ static void fft32_5( move32(); } -#ifdef HARMONIZE_DoRTFTn - cdftForw_fx( 64, z, Ip_fft32, w_fft32_fx ); -#else cdftForw( 64, z, Ip_fft32, w_fft32_fx ); -#endif FOR( i = 0; i < 32; i++ ) { @@ -1223,11 +1212,7 @@ static void fft16_ivas( move32(); } -#ifdef HARMONIZE_DoRTFTn - cdftForw_fx( 32, z, Ip_fft16, w_fft16_fx ); -#else cdftForw( 32, z, Ip_fft16, w_fft16_fx ); -#endif FOR( i = 0; i < 16; i++ ) { @@ -1268,11 +1253,7 @@ static void fft8( move32(); } -#ifdef HARMONIZE_DoRTFTn - cdftForw_fx( 16, z, Ip_fft8, w_fft8_fx ); -#else cdftForw( 16, z, Ip_fft8, w_fft8_fx ); -#endif FOR( i = 0; i < 8; i++ ) { @@ -1311,11 +1292,7 @@ static void fft8_5( move32(); } -#ifdef HARMONIZE_DoRTFTn - cdftForw_fx( 16, z, Ip_fft8, w_fft8_fx ); -#else cdftForw( 16, z, Ip_fft8, w_fft8_fx ); -#endif FOR( i = 0; i < 8; i++ ) { @@ -1456,7 +1433,7 @@ static void fft4_5( move32(); } -#ifdef HARMONIZE_DoRTFTn +#ifdef HARMONIZE_DoRTFTnAA cdftForw_fx( 8, z, Ip_fft4, w_fft4_fx ); #else cdftForw( 8, z, Ip_fft4, w_fft4_fx ); @@ -1787,11 +1764,7 @@ void DoRTFT128_fx( move32(); } -#ifdef HARMONIZE_DoRTFTn - cdftForw_fx( 256, z, Ip_fft128, w_fft128_fx ); -#else cdftForw( 256, z, Ip_fft128, w_fft128_fx ); -#endif x[0] = z[0]; move32(); @@ -1808,7 +1781,7 @@ void DoRTFT128_fx( return; } -#ifndef HARMONIZE_DoRTFTn + /*-----------------------------------------------------------------* * cdftForw() * Main fuction of Complex Discrete Fourier Transform @@ -1818,16 +1791,24 @@ static void cdftForw( Word16 n, /* i : data length of real and imag Q0 */ Word32 *a, /* i/o: input/output data Qx */ const Word16 *ip, /* i : work area for bit reversal Qx */ - const Word16 *w /* i : cos/sin table Q15 */ + const Word16 *w /* i : cos/sin table Q15 */ ) { /* bit reversal */ +#ifdef HARMONIZE_DoRTFTn + bitrv2_SR_fx( n, ip + 2, a ); +#else bitrv2_SR( n, ip + 2, a ); +#endif /* Do FFT */ cftfsub( n, a, w ); + + return; } + +#ifndef HARMONIZE_DoRTFTn /*-----------------------------------------------------------------* * bitrv2_SR() * Bit reversal @@ -2029,6 +2010,7 @@ static void bitrv2_SR( return; } +#endif /*-----------------------------------------------------------------* * cftfsub() @@ -2471,7 +2453,7 @@ static void cftmdl( return; } -#endif + #ifndef HARMONIZE_DCT static void cftbsub( Word16 n, // Q0 diff --git a/lib_com/fft_fx_evs.c b/lib_com/fft_fx_evs.c index 9f057fdb5..8c773b580 100644 --- a/lib_com/fft_fx_evs.c +++ b/lib_com/fft_fx_evs.c @@ -36,10 +36,10 @@ static void fft64_16fx( Word16 *x, Word16 *y, const Word16 *Idx ); static void fft5_32_16fx( Word16 *zRe, Word16 *zIm, const Word16 *Idx ); static void cdftForw_16fx( Word16 n, Word16 *a, const Word16 *ip, const Word32 *w ); -#ifndef HARMONIZE_DoRTFTn static void cdftForw_fx( Word16 n, Word32 *a, const Word16 *ip, const Word16 *w ); -#endif +#ifndef HARMONIZE_DoRTFTn static void bitrv2_SR_fx( Word16 n, const Word16 *ip, Word32 *a ); +#endif static void cftfsub_fx( Word16 n, Word32 *a, const Word16 *w ); static void cft1st_fx( Word16 n, Word32 *a, const Word16 *w ); static void cftmdl_fx( Word16 n, Word16 l, Word32 *a, const Word16 *w ); @@ -185,11 +185,8 @@ void DoRTFTn_fx( * cdftForw_fx() * Main fuction of Complex Discrete Fourier Transform *-----------------------------------------------------------------*/ -#ifdef HARMONIZE_DoRTFTn -void cdftForw_fx( -#else + static void cdftForw_fx( -#endif Word16 n, /* i : data length of real and imag */ Word32 *a, /* i/o : input/output data Q(q)*/ const Word16 *ip, /* i : work area for bit reversal */ @@ -209,7 +206,12 @@ static void cdftForw_fx( * bitrv2_SR_fx() * Bit reversal *-----------------------------------------------------------------*/ + +#ifdef HARMONIZE_DoRTFTn +void bitrv2_SR_fx( +#else static void bitrv2_SR_fx( +#endif Word16 n, /* i : data length of real and imag */ const Word16 *ip, /* i/o : work area for bit reversal */ Word32 *a /* i/o : input/output data Q(q)*/ diff --git a/lib_com/ivas_mdct_imdct_fx.c b/lib_com/ivas_mdct_imdct_fx.c index fde3edadd..8c6e7e48c 100644 --- a/lib_com/ivas_mdct_imdct_fx.c +++ b/lib_com/ivas_mdct_imdct_fx.c @@ -52,9 +52,17 @@ #define IVAS_MDCT_SCALING_GAIN_16k_Q31 0X00003193 /* 5.909703592235439e-06f */ #define IVAS_MDCT_SCALING_GAIN_16k_Q46 0x18C97EF4 - #define IVAS_ONE_BY_IMDCT_SCALING_GAIN_Q16 0x08432A51 /* 1 / 2115.165304808f */ +#ifdef HARMONIZE_DoRTFTn + +/*------------------------------------------------------------------------------------------* + * Local functions prototypes + *------------------------------------------------------------------------------------------*/ + +static void ivas_get_twid_factors_fx( const Word16 length, const Word16 **pTwid_re, const Word16 **pTwid_im ); + +#endif /*-----------------------------------------------------------------------------------------* * Function ivas_tda_fx() @@ -154,7 +162,11 @@ void ivas_mdct_fx( len_by_2 = shr( length, 1 ); ivas_mdct_scaling_gain = ivas_get_mdct_scaling_gain_fx( len_by_2 ); // Q46 +#ifdef HARMONIZE_DoRTFTn + ivas_get_twid_factors_fx( length, &pTwid_re, &pTwid_im ); +#else ivas_get_twid_factors_fx1( length, &pTwid_re, &pTwid_im ); +#endif FOR( i = 0; i < len_by_2; i++ ) { @@ -299,8 +311,11 @@ void ivas_imdct_fx( Word32 im[IVAS_480_PT_LEN]; len_by_2 = shr( length, 1 ); +#ifdef HARMONIZE_DoRTFTn + ivas_get_twid_factors_fx( length, &pTwid_re, &pTwid_im ); +#else ivas_get_twid_factors_fx1( length, &pTwid_re, &pTwid_im ); - +#endif FOR( i = 0; i < len_by_2; i++ ) { re[i] = L_add( Mpy_32_16_1( pIn[length - 2 * i - 1], pTwid_re[i] ), Mpy_32_16_1( pIn[2 * i], pTwid_im[i] ) ); /*stl_arr_index Q24*/ @@ -310,6 +325,7 @@ void ivas_imdct_fx( } ivas_ifft_cplx( &re[0], &im[0], len_by_2 ); + IF( len_by_2 > 0 ) { *q_out = sub( *q_out, Q15 ); @@ -351,11 +367,16 @@ void ivas_imdct_fx( /*-----------------------------------------------------------------------------------------* - * Function ivas_get_twid_factors_fx1() + * Function ivas_get_twid_factors_fx() * * Sets/Maps the fft twiddle tables based on fft length *-----------------------------------------------------------------------------------------*/ + +#ifdef HARMONIZE_DoRTFTn +static void ivas_get_twid_factors_fx( +#else void ivas_get_twid_factors_fx1( +#endif const Word16 length, // Q0 const Word16 **pTwid_re, // Q15 const Word16 **pTwid_im ) // Q15 diff --git a/lib_com/ivas_prot_fx.h b/lib_com/ivas_prot_fx.h index 17d4ba062..a28f20d87 100644 --- a/lib_com/ivas_prot_fx.h +++ b/lib_com/ivas_prot_fx.h @@ -216,12 +216,14 @@ void ivas_dct_windowing_fx( Word32 *pTemp_lfe ); +#ifndef HARMONIZE_DoRTFTn void ivas_get_twid_factors_fx1( const Word16 length, // Q0 const Word16 **pTwid_re, // Q15 const Word16 **pTwid_im ); +#endif Word32 ivas_get_mdct_scaling_gain_fx( const Word16 dct_len_by_2 ); diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index 3314abf66..f85fc9f35 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -4044,11 +4044,10 @@ void BASOP_cfft( cmplx *pComplexBuf, Word16 sizeOfFft, Word16 *scale, Word32 wor void BASOP_rfft( Word32 *x, Word16 sizeOfFft, Word16 *scale, Word16 isign ); #ifdef HARMONIZE_DoRTFTn -void cdftForw_fx( - Word16 n, /* i : data length of real and imag */ - Word32 *a, /* i/o : input/output data Q(q)*/ - const Word16 *ip, /* i : work area for bit reversal */ - const Word16 *w /* i : cos/sin table Q14*/ +void bitrv2_SR_fx( + Word16 n, /* i : data length of real and imag Q0 */ + const Word16 *ip, /* i/o: work area for bit reversal Q0 */ + Word32 *a /* i/o: input/output data Qx */ ); #endif -- GitLab From 6e83726f7ccb18bfce490bcc66fb1f6ff020884a Mon Sep 17 00:00:00 2001 From: vaclav Date: Thu, 19 Mar 2026 11:59:50 +0100 Subject: [PATCH 19/23] comments --- lib_com/fft_fx.c | 2 +- lib_com/fft_fx_evs.c | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c index 1fd82432a..6776f4ead 100644 --- a/lib_com/fft_fx.c +++ b/lib_com/fft_fx.c @@ -1784,7 +1784,7 @@ void DoRTFT128_fx( /*-----------------------------------------------------------------* * cdftForw() - * Main fuction of Complex Discrete Fourier Transform + * Main function of Complex Discrete Fourier Transform *-----------------------------------------------------------------*/ static void cdftForw( diff --git a/lib_com/fft_fx_evs.c b/lib_com/fft_fx_evs.c index 8c773b580..82b47b73e 100644 --- a/lib_com/fft_fx_evs.c +++ b/lib_com/fft_fx_evs.c @@ -183,7 +183,7 @@ void DoRTFTn_fx( /*-----------------------------------------------------------------* * cdftForw_fx() - * Main fuction of Complex Discrete Fourier Transform + * Main function of Complex Discrete Fourier Transform, 32-bit data *-----------------------------------------------------------------*/ static void cdftForw_fx( @@ -1544,9 +1544,11 @@ void DoRTFT128_16fx( return; } + + /*-----------------------------------------------------------------* * cdftForw() - * Main fuction of Complex Discrete Fourier Transform + * Main function of Complex Discrete Fourier Transform, 16-bit data *-----------------------------------------------------------------*/ static void cdftForw_16fx( Word16 n, /* i : data length of real and imag */ -- GitLab From ba15be346ba2b65b781fc521acb60b36f2198f5a Mon Sep 17 00:00:00 2001 From: vaclav Date: Thu, 19 Mar 2026 12:43:14 +0100 Subject: [PATCH 20/23] remove unused tables ivas_cos_twiddle_80_fx[] and ivas_sin_twiddle_80_fx[] --- lib_com/ivas_mdct_imdct_fx.c | 2 ++ lib_com/ivas_rom_com.h | 2 ++ lib_com/ivas_rom_com_fx.c | 3 ++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/lib_com/ivas_mdct_imdct_fx.c b/lib_com/ivas_mdct_imdct_fx.c index 8c6e7e48c..304ed9fcc 100644 --- a/lib_com/ivas_mdct_imdct_fx.c +++ b/lib_com/ivas_mdct_imdct_fx.c @@ -396,11 +396,13 @@ void ivas_get_twid_factors_fx1( *pTwid_re = (const Word16 *) &ivas_cos_twiddle_160_fx[0]; // Q15 *pTwid_im = (const Word16 *) &ivas_sin_twiddle_160_fx[0]; // Q15 } +#ifndef HARMONIZE_DoRTFTn ELSE IF( EQ_16( length, 80 ) ) { *pTwid_re = (const Word16 *) &ivas_cos_twiddle_80_fx[0]; // Q15 *pTwid_im = (const Word16 *) &ivas_sin_twiddle_80_fx[0]; // Q15 } +#endif ELSE { assert( !"Not supported FFT length!" ); diff --git a/lib_com/ivas_rom_com.h b/lib_com/ivas_rom_com.h index 6081e450a..1ac8737ac 100644 --- a/lib_com/ivas_rom_com.h +++ b/lib_com/ivas_rom_com.h @@ -502,8 +502,10 @@ extern const Word16 ivas_sin_twiddle_320_fx[IVAS_320_PT_LEN >> 1]; extern const Word16 ivas_cos_twiddle_320_fx[IVAS_320_PT_LEN >> 1]; extern const Word16 ivas_sin_twiddle_160_fx[IVAS_160_PT_LEN >> 1]; extern const Word16 ivas_cos_twiddle_160_fx[IVAS_160_PT_LEN >> 1]; +#ifndef HARMONIZE_DoRTFTn extern const Word16 ivas_sin_twiddle_80_fx[IVAS_80_PT_LEN >> 1]; extern const Word16 ivas_cos_twiddle_80_fx[IVAS_80_PT_LEN >> 1]; +#endif extern const Word16 nf_tw_smoothing_coeffs_fx[N_LTP_GAIN_MEMS]; extern const Word32 dft_res_gains_q_fx[][2]; extern const Word16 dft_res_cod_alpha_fx[STEREO_DFT_BAND_MAX]; diff --git a/lib_com/ivas_rom_com_fx.c b/lib_com/ivas_rom_com_fx.c index 4ef543df7..3aa42c5a6 100644 --- a/lib_com/ivas_rom_com_fx.c +++ b/lib_com/ivas_rom_com_fx.c @@ -3044,6 +3044,7 @@ const Word16 ivas_cos_twiddle_160_fx[ IVAS_160_PT_LEN >> 1 ] = { SHC( 0x13b6 ), SHC( 0x1139 ), SHC( 0x0ebb ), SHC( 0x0c3b ), SHC( 0x09ba ), SHC( 0x0738 ), SHC( 0x04b6 ), SHC( 0x0232 ), }; +#ifndef HARMONIZE_DoRTFTn const Word16 ivas_sin_twiddle_80_fx[ IVAS_80_PT_LEN >> 1 ] = { SHC( 0xff60 ), SHC( 0xfa59 ), SHC( 0xf555 ), SHC( 0xf055 ), SHC( 0xeb5c ), SHC( 0xe66a ), SHC( 0xe183 ), SHC( 0xdca7 ), SHC( 0xd7da ), SHC( 0xd31c ), SHC( 0xce70 ), SHC( 0xc9d8 ), SHC( 0xc555 ), SHC( 0xc0e9 ), SHC( 0xbc96 ), SHC( 0xb85e ), @@ -3059,7 +3060,7 @@ const Word16 ivas_cos_twiddle_80_fx[ IVAS_80_PT_LEN >> 1 ] = { SHC( 0x4aba ), SHC( 0x4696 ), SHC( 0x4257 ), SHC( 0x3dfe ), SHC( 0x398c ), SHC( 0x3504 ), SHC( 0x3067 ), SHC( 0x2bb6 ), SHC( 0x26f4 ), SHC( 0x2223 ), SHC( 0x1d45 ), SHC( 0x185a ), SHC( 0x1367 ), SHC( 0x0e6b ), SHC( 0x096a ), SHC( 0x0465 ), }; - +#endif const Word16 nf_tw_smoothing_coeffs_fx[N_LTP_GAIN_MEMS] = { 13107, 6553, 6553, 6553 -- GitLab From f75daa53a610a8cb4001e6052efd7ae674681b64 Mon Sep 17 00:00:00 2001 From: vaclav Date: Thu, 19 Mar 2026 13:02:01 +0100 Subject: [PATCH 21/23] define local functions as static --- lib_com/fft_fx.c | 33 ++++++++++++++++++++++++++++++++- lib_com/prot_fx.h | 4 +++- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c index 6776f4ead..4ee552c4a 100644 --- a/lib_com/fft_fx.c +++ b/lib_com/fft_fx.c @@ -1559,7 +1559,11 @@ static void fft5_4( * a low complexity 2-dimensional DFT of 80 points *-----------------------------------------------------------------*/ +#ifdef HARMONIZE_DoRTFTn +static void DoRTFT80_fx( +#else void DoRTFT80_fx( +#endif Word32 *x, /* i/o: real part of input and output data Qx */ Word32 *y /* i/o: imaginary part of input and output data Qx */ ) @@ -1586,7 +1590,11 @@ void DoRTFT80_fx( * a low complexity 2-dimensional DFT of 120 points *-----------------------------------------------------------------*/ +#ifdef HARMONIZE_DoRTFTn +static void DoRTFT120_fx( +#else void DoRTFT120_fx( +#endif Word32 *x, /* i/o: real part of input and output data Qx */ Word32 *y /* i/o: imaginary part of input and output data Qx */ ) @@ -1613,7 +1621,11 @@ void DoRTFT120_fx( * a low complexity 2-dimensional DFT of 160 points *-----------------------------------------------------------------*/ +#ifdef HARMONIZE_DoRTFTn +static void DoRTFT160_fx( +#else void DoRTFT160_fx( +#endif Word32 x[], /* i/o: real part of input and output data Qx */ Word32 y[] /* i/o: imaginary part of input and output data Qx */ ) @@ -1640,7 +1652,11 @@ void DoRTFT160_fx( * a low complexity 2-dimensional DFT of 320 points *-----------------------------------------------------------------*/ +#ifdef HARMONIZE_DoRTFTn +static void DoRTFT320_fx( +#else void DoRTFT320_fx( +#endif Word32 *x, /* i/o: real part of input and output data Qx */ Word32 *y /* i/o: imaginary part of input and output data Qx */ ) @@ -1667,7 +1683,11 @@ void DoRTFT320_fx( * a low complexity 2-dimensional DFT of 480 points *-----------------------------------------------------------------*/ +#ifdef HARMONIZE_DoRTFTn +static void DoRTFT480_fx( +#else void DoRTFT480_fx( +#endif Word32 *x, /* i/o: real part of input and output data Qx */ Word32 *y /* i/o: imaginary part of input and output data Qx */ ) @@ -1694,7 +1714,11 @@ void DoRTFT480_fx( * a low complexity 2-dimensional DFT of 40 points *-----------------------------------------------------------------*/ +#ifdef HARMONIZE_DoRTFTn +static void DoRTFT40_fx( +#else void DoRTFT40_fx( +#endif Word32 *x, /* i/o: real part of input and output data Qx */ Word32 *y /* i/o: imaginary part of input and output data Qx */ ) @@ -1720,7 +1744,11 @@ void DoRTFT40_fx( * a low complexity 2-dimensional DFT of 20 points *-----------------------------------------------------------------*/ +#ifdef HARMONIZE_DoRTFTn +static void DoRTFT20_fx( +#else void DoRTFT20_fx( +#endif Word32 *x, /* i/o: real part of input and output data Qx */ Word32 *y /* i/o: imaginary part of input and output data Qx */ ) @@ -1747,12 +1775,15 @@ void DoRTFT20_fx( * FFT with 128 points *-----------------------------------------------------------------*/ +#ifdef HARMONIZE_DoRTFTn +static void DoRTFT128_fx( +#else void DoRTFT128_fx( +#endif Word32 *x, /* i/o: real part of input and output data Qx */ Word32 *y /* i/o: imaginary part of input and output data Qx */ ) { - Word16 i; Word32 z[256]; diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index f85fc9f35..7207380e4 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -3994,7 +3994,7 @@ void edct_fx( Word16 *q, /* i : Q value of input signal */ const Word16 element_mode /* i : element mode */ #else - Word16 *q /* i : Q value of i signal */ + Word16 *q /* i : Q value of i signal */ #endif ); @@ -4060,6 +4060,7 @@ void DoRTFTn_fx( const Word16 n /* i : size of the FFT up to 1024 */ ); +#ifndef HARMONIZE_DoRTFTn void DoRTFT480_fx( Word32 *x, /* i/o: real part of input and output data */ Word32 *y /* i/o: imaginary part of input and output data */ @@ -4100,6 +4101,7 @@ void DoRTFT20_fx( Word32 *y /* i/o: imaginary part of input and output data */ ); +#endif Word16 RFFTN_fx( Word32 *data, const Word16 *sine_table, -- GitLab From 766495894633cb050e47ccfde535f93c67fd0f0e Mon Sep 17 00:00:00 2001 From: vaclav Date: Thu, 19 Mar 2026 15:11:36 +0100 Subject: [PATCH 22/23] cleaning --- lib_com/fft_fx.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c index 4ee552c4a..18acad865 100644 --- a/lib_com/fft_fx.c +++ b/lib_com/fft_fx.c @@ -1433,11 +1433,7 @@ static void fft4_5( move32(); } -#ifdef HARMONIZE_DoRTFTnAA - cdftForw_fx( 8, z, Ip_fft4, w_fft4_fx ); -#else cdftForw( 8, z, Ip_fft4, w_fft4_fx ); -#endif FOR( i = 0; i < 4; i++ ) { -- GitLab From d5189c75312e6d747a07d798c754612c88d6cda3 Mon Sep 17 00:00:00 2001 From: vaclav Date: Thu, 19 Mar 2026 15:13:50 +0100 Subject: [PATCH 23/23] clang-format --- lib_com/prot_fx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index 7207380e4..629c6fd35 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -3994,7 +3994,7 @@ void edct_fx( Word16 *q, /* i : Q value of input signal */ const Word16 element_mode /* i : element mode */ #else - Word16 *q /* i : Q value of i signal */ + Word16 *q /* i : Q value of i signal */ #endif ); -- GitLab