From defa4937cc74af9928015fd67aa64d8aabcedbdd Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Tue, 4 Mar 2025 13:42:55 +0530 Subject: [PATCH] Optimization of stereo decode path 32kHz @32kbps --- lib_com/fft_fx.c | 334 ++++++++++++++++++++++++++++ lib_com/hp50_fx.c | 66 ++++++ lib_com/modif_fs.c | 94 ++++++++ lib_com/mslvq_com_fx.c | 50 +++++ lib_com/options.h | 1 + lib_com/scale_mem_fx.c | 13 ++ lib_com/trans_inv_fx.c | 31 ++- lib_dec/acelp_core_dec_ivas_fx.c | 18 +- lib_dec/ivas_stereo_cng_dec.c | 19 +- lib_dec/ivas_stereo_dft_dec_fx.c | 167 ++++++++++++-- lib_dec/ivas_stereo_switching_dec.c | 38 +++- lib_dec/ivas_stereo_td_dec.c | 24 +- 12 files changed, 833 insertions(+), 22 deletions(-) diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c index 1b095e346..acadbb06b 100644 --- a/lib_com/fft_fx.c +++ b/lib_com/fft_fx.c @@ -4727,6 +4727,16 @@ static void fft_len16( cmplx t[4]; cmplx y[16]; +#ifdef OPT_STEREO_32KBPS_V1 + s[0] = x[0]; // Qx + move64(); + s[1] = x[4]; // Qx + move64(); + s[2] = x[8]; // Qx + move64(); + s[3] = x[12]; // Qx + move64(); +#else /* OPT_STEREO_32KBPS_V1 */ s[0] = CL_shr( x[0], SCALEFACTOR16 ); // Qx move64(); s[1] = CL_shr( x[4], SCALEFACTOR16 ); // Qx @@ -4735,6 +4745,7 @@ static void fft_len16( move64(); s[3] = CL_shr( x[12], SCALEFACTOR16 ); // Qx move64(); +#endif /* OPT_STEREO_32KBPS_V1 */ t[0] = CL_add( s[0], s[2] ); move64(); @@ -4754,6 +4765,16 @@ static void fft_len16( y[3] = CL_add( t[1], t[3] ); move64(); +#ifdef OPT_STEREO_32KBPS_V1 + s[0] = x[1]; // Qx + move64(); + s[1] = x[5]; // Qx + move64(); + s[2] = x[9]; // Qx + move64(); + s[3] = x[13]; // Qx + move64(); +#else /* OPT_STEREO_32KBPS_V1 */ s[0] = CL_shr( x[1], SCALEFACTOR16 ); // Qx move64(); s[1] = CL_shr( x[5], SCALEFACTOR16 ); // Qx @@ -4762,6 +4783,7 @@ static void fft_len16( move64(); s[3] = CL_shr( x[13], SCALEFACTOR16 ); // Qx move64(); +#endif /* OPT_STEREO_32KBPS_V1 */ t[0] = CL_add( s[0], s[2] ); move64(); @@ -4781,6 +4803,16 @@ static void fft_len16( y[7] = CL_add( t[1], t[3] ); move64(); +#ifdef OPT_STEREO_32KBPS_V1 + s[0] = x[2]; // Qx + move64(); + s[1] = x[6]; // Qx + move64(); + s[2] = x[10]; // Qx + move64(); + s[3] = x[14]; // Qx + move64(); +#else /* OPT_STEREO_32KBPS_V1 */ s[0] = CL_shr( x[2], SCALEFACTOR16 ); // Qx move64(); s[1] = CL_shr( x[6], SCALEFACTOR16 ); // Qx @@ -4789,6 +4821,7 @@ static void fft_len16( move64(); s[3] = CL_shr( x[14], SCALEFACTOR16 ); // Qx move64(); +#endif /* OPT_STEREO_32KBPS_V1 */ t[0] = CL_add( s[0], s[2] ); move64(); @@ -4810,6 +4843,16 @@ static void fft_len16( y[11] = CL_add( t[1], t[3] ); move64(); +#ifdef OPT_STEREO_32KBPS_V1 + s[0] = x[3]; // Qx + move64(); + s[1] = x[7]; // Qx + move64(); + s[2] = x[11]; // Qx + move64(); + s[3] = x[15]; // Qx + move64(); +#else /* OPT_STEREO_32KBPS_V1 */ s[0] = CL_shr( x[3], SCALEFACTOR16 ); // Qx move64(); s[1] = CL_shr( x[7], SCALEFACTOR16 ); // Qx @@ -4818,6 +4861,7 @@ static void fft_len16( move64(); s[3] = CL_shr( x[15], SCALEFACTOR16 ); // Qx move64(); +#endif /* OPT_STEREO_32KBPS_V1 */ t[0] = CL_add( s[0], s[2] ); move64(); @@ -4978,6 +5022,18 @@ static void fft_len20_fx( cmplx tt[4]; cmplx y[20]; +#ifdef OPT_STEREO_32KBPS_V1 + xx[0] = x[0]; // Qx + move64(); + xx[1] = x[16]; // Qx + move64(); + xx[2] = x[12]; // Qx + move64(); + xx[3] = x[8]; // Qx + move64(); + xx[4] = x[4]; // Qx + move64(); +#else /* OPT_STEREO_32KBPS_V1 */ xx[0] = CL_shr( x[0], SCALEFACTOR20 ); // Qx move64(); xx[1] = CL_shr( x[16], SCALEFACTOR20 ); // Qx @@ -4988,6 +5044,7 @@ static void fft_len20_fx( move64(); xx[4] = CL_shr( x[4], SCALEFACTOR20 ); // Qx move64(); +#endif /* OPT_STEREO_32KBPS_V1 */ s[0] = CL_add( xx[1], xx[4] ); move64(); @@ -5023,6 +5080,18 @@ static void fft_len20_fx( y[12] = CL_msu_j( s[2], s[3] ); move64(); +#ifdef OPT_STEREO_32KBPS_V1 + xx[0] = x[5]; + move64(); + xx[1] = x[1]; + move64(); + xx[2] = x[17]; + move64(); + xx[3] = x[13]; + move64(); + xx[4] = x[9]; + move64(); +#else /* OPT_STEREO_32KBPS_V1 */ xx[0] = CL_shr( x[5], SCALEFACTOR20 ); move64(); xx[1] = CL_shr( x[1], SCALEFACTOR20 ); @@ -5033,6 +5102,7 @@ static void fft_len20_fx( move64(); xx[4] = CL_shr( x[9], SCALEFACTOR20 ); move64(); +#endif /* OPT_STEREO_32KBPS_V1 */ s[0] = CL_add( xx[1], xx[4] ); move64(); @@ -5068,6 +5138,18 @@ static void fft_len20_fx( y[13] = CL_msu_j( s[2], s[3] ); move64(); +#ifdef OPT_STEREO_32KBPS_V1 + xx[0] = x[10]; + move64(); + xx[1] = x[6]; + move64(); + xx[2] = x[2]; + move64(); + xx[3] = x[18]; + move64(); + xx[4] = x[14]; + move64(); +#else /* OPT_STEREO_32KBPS_V1 */ xx[0] = CL_shr( x[10], SCALEFACTOR20 ); move64(); xx[1] = CL_shr( x[6], SCALEFACTOR20 ); @@ -5078,6 +5160,7 @@ static void fft_len20_fx( move64(); xx[4] = CL_shr( x[14], SCALEFACTOR20 ); move64(); +#endif /* OPT_STEREO_32KBPS_V1 */ s[0] = CL_add( xx[1], xx[4] ); move64(); @@ -5113,6 +5196,18 @@ static void fft_len20_fx( y[14] = CL_msu_j( s[2], s[3] ); move64(); +#ifdef OPT_STEREO_32KBPS_V1 + xx[0] = x[15]; + move64(); + xx[1] = x[11]; + move64(); + xx[2] = x[7]; + move64(); + xx[3] = x[3]; + move64(); + xx[4] = x[19]; + move64(); +#else /* OPT_STEREO_32KBPS_V1 */ xx[0] = CL_shr( x[15], SCALEFACTOR20 ); move64(); xx[1] = CL_shr( x[11], SCALEFACTOR20 ); @@ -5123,6 +5218,7 @@ static void fft_len20_fx( move64(); xx[4] = CL_shr( x[19], SCALEFACTOR20 ); move64(); +#endif /* OPT_STEREO_32KBPS_V1 */ s[0] = CL_add( xx[1], xx[4] ); move64(); @@ -6501,6 +6597,239 @@ static void fft_lenN( cmplx s[8]; cmplx y[8]; +#ifdef OPT_STEREO_32KBPS_V1 + y[1] = xx[1 * dim1]; + move64(); + y[2] = xx[2 * dim1]; + move64(); + y[3] = xx[3 * dim1]; + move64(); + y[4] = xx[4 * dim1]; + move64(); + y[5] = xx[5 * dim1]; + move64(); + y[6] = xx[6 * dim1]; + move64(); + y[7] = xx[7 * dim1]; + move64(); + + test(); + test(); + IF( EQ_16( dim1, 8 ) || EQ_16( dim1, 16 ) || EQ_16( dim1, 32 ) ) + { + FOR( i = 0; i < dim1; i++ ) + { + { + y[0] = xx[i]; + move64(); + }; + IF( i > 0 ) + { + { + y[1] = CL_mac_j( CL_scale( xx[( i + ( 1 * dim1 ) )], W[( ( ( sc * i ) + ( ( sc * 1 ) * dim1 ) ) - Woff )] ), + CL_scale( xx[( i + ( 1 * dim1 ) )], W[( ( ( ( sc * i ) + ( ( sc * 1 ) * dim1 ) ) + 1 ) - Woff )] ) ); // Qx + move64(); + }; + { + y[2] = CL_mac_j( CL_scale( xx[( i + ( 2 * dim1 ) )], W[( ( ( sc * i ) + ( ( sc * 2 ) * dim1 ) ) - Woff )] ), + CL_scale( xx[( i + ( 2 * dim1 ) )], W[( ( ( ( sc * i ) + ( ( sc * 2 ) * dim1 ) ) + 1 ) - Woff )] ) ); // Qx + move64(); + }; + { + y[3] = CL_mac_j( CL_scale( xx[( i + ( 3 * dim1 ) )], W[( ( ( sc * i ) + ( ( sc * 3 ) * dim1 ) ) - Woff )] ), + CL_scale( xx[( i + ( 3 * dim1 ) )], W[( ( ( ( sc * i ) + ( ( sc * 3 ) * dim1 ) ) + 1 ) - Woff )] ) ); // Qx + move64(); + }; + { + y[4] = CL_mac_j( CL_scale( xx[( i + ( 4 * dim1 ) )], W[( ( ( sc * i ) + ( ( sc * 4 ) * dim1 ) ) - Woff )] ), + CL_scale( xx[( i + ( 4 * dim1 ) )], W[( ( ( ( sc * i ) + ( ( sc * 4 ) * dim1 ) ) + 1 ) - Woff )] ) ); // Qx + move64(); + }; + { + y[5] = CL_mac_j( CL_scale( xx[( i + ( 5 * dim1 ) )], W[( ( ( sc * i ) + ( ( sc * 5 ) * dim1 ) ) - Woff )] ), + CL_scale( xx[( i + ( 5 * dim1 ) )], W[( ( ( ( sc * i ) + ( ( sc * 5 ) * dim1 ) ) + 1 ) - Woff )] ) ); // Qx + move64(); + }; + { + y[6] = CL_mac_j( CL_scale( xx[( i + ( 6 * dim1 ) )], W[( ( ( sc * i ) + ( ( sc * 6 ) * dim1 ) ) - Woff )] ), + CL_scale( xx[( i + ( 6 * dim1 ) )], W[( ( ( ( sc * i ) + ( ( sc * 6 ) * dim1 ) ) + 1 ) - Woff )] ) ); // Qx + move64(); + }; + { + y[7] = CL_mac_j( CL_scale( xx[( i + ( 7 * dim1 ) )], W[( ( ( sc * i ) + ( ( sc * 7 ) * dim1 ) ) - Woff )] ), + CL_scale( xx[( i + ( 7 * dim1 ) )], W[( ( ( ( sc * i ) + ( ( sc * 7 ) * dim1 ) ) + 1 ) - Woff )] ) ); // Qx + move64(); + }; + } + + t[0] = CL_add( y[0], y[4] ); + move64(); + t[1] = CL_sub( y[0], y[4] ); + move64(); + t[2] = CL_add( y[1], y[5] ); + move64(); + t[3] = CL_sub( y[1], y[5] ); + move64(); + t[4] = CL_add( y[2], y[6] ); + move64(); + t[5] = CL_sub( y[2], y[6] ); + move64(); + t[6] = CL_add( y[3], y[7] ); + move64(); + t[7] = CL_sub( y[3], y[7] ); + move64(); + + s[0] = CL_add( t[0], t[4] ); + move64(); + s[2] = CL_sub( t[0], t[4] ); + move64(); + s[4] = CL_mac_j( t[1], t[5] ); + move64(); + s[5] = CL_msu_j( t[1], t[5] ); + move64(); + s[1] = CL_add( t[2], t[6] ); + move64(); + s[3] = CL_swap_real_imag( CL_sub( CL_conjugate( t[2] ), CL_conjugate( t[6] ) ) ); + move64(); + + t[0] = CL_swap_real_imag( CL_add( t[3], t[7] ) ); + move64(); + t[1] = CL_sub( t[3], t[7] ); + move64(); + + s[6] = CL_scale( CL_add( CL_conjugate( t[0] ), t[1] ), FFT_C81 ); // Qx + move64(); + s[7] = CL_scale( CL_sub( t[0], CL_conjugate( t[1] ) ), FFT_C81 ); // Qx + move64(); + s[7] = CL_conjugate( s[7] ); + move64(); + + x[i] = CL_add( s[0], s[1] ); + move64(); + x[( i + ( 1 * dim1 ) )] = CL_add( s[5], s[6] ); + move64(); + x[( i + ( 2 * dim1 ) )] = CL_sub( s[2], s[3] ); + move64(); + x[( i + ( 3 * dim1 ) )] = CL_add( s[4], s[7] ); + move64(); + x[( i + ( 4 * dim1 ) )] = CL_sub( s[0], s[1] ); + move64(); + x[( i + ( 5 * dim1 ) )] = CL_sub( s[5], s[6] ); + move64(); + x[( i + ( 6 * dim1 ) )] = CL_add( s[2], s[3] ); + move64(); + x[( i + ( 7 * dim1 ) )] = CL_sub( s[4], s[7] ); + move64(); + } + } + ELSE + { + FOR( i = 0; i < dim1; i++ ) + { + { + y[0] = xx[i]; + move64(); + }; + IF( i > 0 ) + { + { + y[1] = CL_mac_j( CL_scale( xx[( i + ( 1 * dim1 ) )], W[( ( ( sc * i ) + ( ( ( sc * 1 ) * dim1 ) << 1 ) ) - Woff )] ), + CL_scale( xx[( i + ( 1 * dim1 ) )], W[( ( ( ( sc * i ) + ( ( ( sc * 1 ) * dim1 ) << 1 ) ) + 1 ) - Woff )] ) ); // Qx + move64(); + }; + { + y[2] = CL_mac_j( CL_scale( xx[( i + ( 2 * dim1 ) )], W[( ( ( sc * i ) + ( ( ( sc * 2 ) * dim1 ) << 1 ) ) - Woff )] ), + CL_scale( xx[( i + ( 2 * dim1 ) )], W[( ( ( ( sc * i ) + ( ( ( sc * 2 ) * dim1 ) << 1 ) ) + 1 ) - Woff )] ) ); // Qx + move64(); + }; + { + y[3] = CL_mac_j( CL_scale( xx[( i + ( 3 * dim1 ) )], W[( ( ( sc * i ) + ( ( ( sc * 3 ) * dim1 ) << 1 ) ) - Woff )] ), + CL_scale( xx[( i + ( 3 * dim1 ) )], W[( ( ( ( sc * i ) + ( ( ( sc * 3 ) * dim1 ) << 1 ) ) + 1 ) - Woff )] ) ); // Qx + move64(); + }; + { + y[4] = CL_mac_j( CL_scale( xx[( i + ( 4 * dim1 ) )], W[( ( ( sc * i ) + ( ( ( sc * 4 ) * dim1 ) << 1 ) ) - Woff )] ), + CL_scale( xx[( i + ( 4 * dim1 ) )], W[( ( ( ( sc * i ) + ( ( ( sc * 4 ) * dim1 ) << 1 ) ) + 1 ) - Woff )] ) ); // Qx + move64(); + }; + { + y[5] = CL_mac_j( CL_scale( xx[( i + ( 5 * dim1 ) )], W[( ( ( sc * i ) + ( ( ( sc * 5 ) * dim1 ) << 1 ) ) - Woff )] ), + CL_scale( xx[( i + ( 5 * dim1 ) )], W[( ( ( ( sc * i ) + ( ( ( sc * 5 ) * dim1 ) << 1 ) ) + 1 ) - Woff )] ) ); // Qx + move64(); + }; + { + y[6] = CL_mac_j( CL_scale( xx[( i + ( 6 * dim1 ) )], W[( ( ( sc * i ) + ( ( ( sc * 6 ) * dim1 ) << 1 ) ) - Woff )] ), + CL_scale( xx[( i + ( 6 * dim1 ) )], W[( ( ( ( sc * i ) + ( ( ( sc * 6 ) * dim1 ) << 1 ) ) + 1 ) - Woff )] ) ); // Qx + move64(); + }; + { + y[7] = CL_mac_j( CL_scale( xx[( i + ( 7 * dim1 ) )], W[( ( ( sc * i ) + ( ( ( sc * 7 ) * dim1 ) << 1 ) ) - Woff )] ), + CL_scale( xx[( i + ( 7 * dim1 ) )], W[( ( ( ( sc * i ) + ( ( ( sc * 7 ) * dim1 ) << 1 ) ) + 1 ) - Woff )] ) ); // Qx + move64(); + }; + } + + t[0] = CL_add( y[0], y[4] ); + move64(); + t[1] = CL_sub( y[0], y[4] ); + move64(); + t[2] = CL_add( y[1], y[5] ); + move64(); + t[3] = CL_sub( y[1], y[5] ); + move64(); + t[4] = CL_add( y[2], y[6] ); + move64(); + t[5] = CL_sub( y[2], y[6] ); + move64(); + t[6] = CL_add( y[3], y[7] ); + move64(); + t[7] = CL_sub( y[3], y[7] ); + move64(); + + s[0] = CL_add( t[0], t[4] ); + move64(); + s[2] = CL_sub( t[0], t[4] ); + move64(); + s[4] = CL_mac_j( t[1], t[5] ); + move64(); + s[5] = CL_msu_j( t[1], t[5] ); + move64(); + s[1] = CL_add( t[2], t[6] ); + move64(); + s[3] = CL_swap_real_imag( CL_sub( CL_conjugate( t[2] ), CL_conjugate( t[6] ) ) ); + move64(); + + t[0] = CL_swap_real_imag( CL_add( t[3], t[7] ) ); + move64(); + t[1] = CL_sub( t[3], t[7] ); + move64(); + + s[6] = CL_scale( CL_add( CL_conjugate( t[0] ), t[1] ), FFT_C81 ); // Qx + move64(); + s[7] = CL_scale( CL_sub( t[0], CL_conjugate( t[1] ) ), FFT_C81 ); // Qx + move64(); + s[7] = CL_conjugate( s[7] ); + move64(); + + x[i] = CL_add( s[0], s[1] ); + move64(); + x[( i + ( 1 * dim1 ) )] = CL_add( s[5], s[6] ); + move64(); + x[( i + ( 2 * dim1 ) )] = CL_sub( s[2], s[3] ); + move64(); + x[( i + ( 3 * dim1 ) )] = CL_add( s[4], s[7] ); + move64(); + x[( i + ( 4 * dim1 ) )] = CL_sub( s[0], s[1] ); + move64(); + x[( i + ( 5 * dim1 ) )] = CL_sub( s[5], s[6] ); + move64(); + x[( i + ( 6 * dim1 ) )] = CL_add( s[2], s[3] ); + move64(); + x[( i + ( 7 * dim1 ) )] = CL_sub( s[4], s[7] ); + move64(); + } + } +#else /* OPT_STEREO_32KBPS_V1 */ test(); test(); test(); @@ -6781,6 +7110,7 @@ static void fft_lenN( move64(); } } +#endif /* OPT_STEREO_32KBPS_V1 */ BREAK; } @@ -7173,7 +7503,11 @@ void rfft_fx( move32(); x[( length - ( i << 1 ) )] = Mpy_32_16_1( L_add( t1, t3 ), 16384 /*0.5.Q15*/ ); move32(); +#ifdef OPT_STEREO_32KBPS_V1 + x[( ( length - ( i << 1 ) ) + 1 )] = Mpy_32_16_1( L_add( t2, t4 ), -16384 /*0.5.Q15*/ ); +#else /* OPT_STEREO_32KBPS_V1 */ x[( ( length - ( i << 1 ) ) + 1 )] = Mpy_32_16_1( L_negate( L_add( t2, t4 ) ), 16384 /*0.5.Q15*/ ); +#endif /* OPT_STEREO_32KBPS_V1 */ move32(); } diff --git a/lib_com/hp50_fx.c b/lib_com/hp50_fx.c index 84bbf8b3c..d13835f4c 100644 --- a/lib_com/hp50_fx.c +++ b/lib_com/hp50_fx.c @@ -469,8 +469,14 @@ void hp20_fx_32( { Word16 i; Word32 a1_fx, a2_fx, b1_fx, b2_fx; +#ifdef OPT_STEREO_32KBPS_V1 + Word16 Qy1, Qy2, Qmin; + Word64 y0_fx64, y1_fx64, y2_fx64; + Word32 x0, x1, x2; +#else /* OPT_STEREO_32KBPS_V1 */ Word16 Qx0, Qx1, Qx2, Qy1, Qprev_y1, Qy2, Qprev_y2, Qmin; Word64 x0_fx64, x1_fx64, x2_fx64, y0_fx64, y1_fx64, y2_fx64, R1, R2, R3, R4, R5; +#endif /* OPT_STEREO_32KBPS_V1 */ IF( EQ_32( Fs, 8000 ) ) { @@ -521,15 +527,64 @@ void hp20_fx_32( move32(); move32(); +#ifdef OPT_STEREO_32KBPS_V1 + y1_fx64 = W_add( W_deposit32_l( mem_fx[0] ), W_deposit32_h( mem_fx[1] ) ); + y2_fx64 = W_add( W_deposit32_l( mem_fx[2] ), W_deposit32_h( mem_fx[3] ) ); + + x0 = mem_fx[4]; + move32(); + x1 = mem_fx[5]; + move32(); +#else /* OPT_STEREO_32KBPS_V1 */ Qprev_y1 = extract_l( mem_fx[4] ); Qprev_y2 = extract_l( mem_fx[5] ); y1_fx64 = W_deposit32_l( mem_fx[0] ); y2_fx64 = W_deposit32_l( mem_fx[1] ); x0_fx64 = W_deposit32_l( mem_fx[2] ); x1_fx64 = W_deposit32_l( mem_fx[3] ); +#endif /* OPT_STEREO_32KBPS_V1 */ FOR( i = 0; i < lg; i++ ) { +#ifdef OPT_STEREO_32KBPS_V1 + x2 = x1; + move32(); + x1 = x0; + move32(); + x0 = signal_fx[i]; + move32(); + + Qy1 = W_norm( y1_fx64 ); + if ( y1_fx64 == 0 ) + { + Qy1 = 62; + move16(); + } + + Qy2 = W_norm( y2_fx64 ); + if ( y2_fx64 == 0 ) + { + Qy2 = 62; + move16(); + } + + Qmin = s_min( Qy1, Qy2 ); + + Qmin = sub( Qmin, 34 ); + + y0_fx64 = W_mac_32_32( W_mult_32_32( W_shl_sat_l( y1_fx64, Qmin ), a1_fx ), W_shl_sat_l( y2_fx64, Qmin ), a2_fx ); // Qmin + Q29 + Q30 + 1 + + Word64 temp = W_mac_32_32( W_mac_32_32( W_mult_32_32( x2, b2_fx ), x1, b1_fx ), x0, b2_fx ); // Q30 + Word64 y0_fx = W_shr( y0_fx64, add( Qmin, Q30 ) ); // Q30 + y0_fx64 = W_add( temp, y0_fx ); // Q30 + signal_fx[i] = W_shl_sat_l( y0_fx64, -Q30 ); + move32(); + + y2_fx64 = y1_fx64; + move64(); + y1_fx64 = y0_fx64; + move64(); +#else /* OPT_STEREO_32KBPS_V1 */ x2_fx64 = x1_fx64; move64(); x1_fx64 = x0_fx64; @@ -611,8 +666,17 @@ void hp20_fx_32( move64(); move16(); move16(); +#endif /* OPT_STEREO_32KBPS_V1 */ } +#ifdef OPT_STEREO_32KBPS_V1 + mem_fx[0] = W_extract_l( y1_fx64 ); + mem_fx[1] = W_extract_h( y1_fx64 ); + mem_fx[2] = W_extract_l( y2_fx64 ); + mem_fx[3] = W_extract_h( y2_fx64 ); + mem_fx[4] = x0; + mem_fx[5] = x1; +#else /* OPT_STEREO_32KBPS_V1 */ Qy1 = W_norm( y1_fx64 ); test(); IF( y1_fx64 != 0 && LT_16( Qy1, 32 ) ) @@ -635,6 +699,8 @@ void hp20_fx_32( mem_fx[3] = W_extract_l( x1_fx64 ); mem_fx[4] = Qprev_y1; mem_fx[5] = Qprev_y2; +#endif /* OPT_STEREO_32KBPS_V1 */ + move32(); move32(); move32(); diff --git a/lib_com/modif_fs.c b/lib_com/modif_fs.c index 4af93522d..1b2836252 100644 --- a/lib_com/modif_fs.c +++ b/lib_com/modif_fs.c @@ -115,6 +115,22 @@ void Decimate_allpass_steep_fx32( /* upper allpass filter chain */ FOR( k = 0; k < N / 2; k++ ) { +#ifdef OPT_STEREO_32KBPS_V1 + temp[0] = Madd_32_16( mem[0], in[2 * k], AP1_STEEP_FX[0] ); // Qx + move32(); + mem[0] = Msub_32_16( in[2 * k], temp[0], AP1_STEEP_FX[0] ); // Qx + move32(); + + temp[1] = Madd_32_16( mem[1], temp[0], AP1_STEEP_FX[1] ); // Qx + move32(); + mem[1] = Msub_32_16( temp[0], temp[1], AP1_STEEP_FX[1] ); // Qx + move32(); + + out[k] = Madd_32_16( mem[ALLPASSSECTIONS_STEEP - 1], temp[ALLPASSSECTIONS_STEEP - 2], AP1_STEEP_FX[ALLPASSSECTIONS_STEEP - 1] ); // Qx + move32(); + mem[ALLPASSSECTIONS_STEEP - 1] = Msub_32_16( temp[ALLPASSSECTIONS_STEEP - 2], out[k], AP1_STEEP_FX[ALLPASSSECTIONS_STEEP - 1] ); // Qx + move32(); +#else /* OPT_STEREO_32KBPS_V1 */ temp[0] = L_add( mem[0], Mpy_32_16_1( in[2 * k], AP1_STEEP_FX[0] ) ); // Qx move32(); mem[0] = L_sub( in[2 * k], Mpy_32_16_1( temp[0], AP1_STEEP_FX[0] ) ); // Qx @@ -129,17 +145,31 @@ void Decimate_allpass_steep_fx32( move32(); mem[ALLPASSSECTIONS_STEEP - 1] = L_sub( temp[ALLPASSSECTIONS_STEEP - 2], Mpy_32_16_1( out[k], AP1_STEEP_FX[ALLPASSSECTIONS_STEEP - 1] ) ); // Qx move32(); +#endif /* OPT_STEREO_32KBPS_V1 */ } /* lower allpass filter chain */ +#ifdef OPT_STEREO_32KBPS_V1 + temp[0] = Madd_32_16( mem[ALLPASSSECTIONS_STEEP], mem[2 * ALLPASSSECTIONS_STEEP], AP2_STEEP_FX[0] ); // Qx + move32(); + mem[ALLPASSSECTIONS_STEEP] = Msub_32_16( mem[2 * ALLPASSSECTIONS_STEEP], temp[0], AP2_STEEP_FX[0] ); // Qx + move32(); +#else /* OPT_STEREO_32KBPS_V1 */ temp[0] = L_add( mem[ALLPASSSECTIONS_STEEP], Mpy_32_16_1( mem[2 * ALLPASSSECTIONS_STEEP], AP2_STEEP_FX[0] ) ); // Qx move32(); mem[ALLPASSSECTIONS_STEEP] = L_sub( mem[2 * ALLPASSSECTIONS_STEEP], Mpy_32_16_1( temp[0], AP2_STEEP_FX[0] ) ); // Qx move32(); +#endif /* OPT_STEREO_32KBPS_V1 */ /* for better performance, unroll this loop */ FOR( n = 1; n < ALLPASSSECTIONS_STEEP - 1; n++ ) { +#ifdef OPT_STEREO_32KBPS_V1 + temp[n] = Madd_32_16( mem[ALLPASSSECTIONS_STEEP + n], temp[n - 1], AP2_STEEP_FX[n] ); // Qx + move32(); + mem[ALLPASSSECTIONS_STEEP + 1] = Msub_32_16( temp[n - 1], temp[n], AP2_STEEP_FX[n] ); // Qx + move32(); +#else /* OPT_STEREO_32KBPS_V1 */ temp[n] = L_add( mem[ALLPASSSECTIONS_STEEP + n], Mpy_32_16_1( temp[n - 1], AP2_STEEP_FX[n] ) ); // Qx move32(); /*if ( fabs( temp[n] ) < 1e-12 ) @@ -148,26 +178,48 @@ void Decimate_allpass_steep_fx32( }*/ mem[ALLPASSSECTIONS_STEEP + 1] = L_sub( temp[n - 1], Mpy_32_16_1( temp[n], AP2_STEEP_FX[n] ) ); // Qx move32(); +#endif /* OPT_STEREO_32KBPS_V1 */ } +#ifdef OPT_STEREO_32KBPS_V1 + temp[ALLPASSSECTIONS_STEEP - 1] = Madd_32_16( mem[2 * ALLPASSSECTIONS_STEEP - 1], temp[ALLPASSSECTIONS_STEEP - 2], AP2_STEEP_FX[ALLPASSSECTIONS_STEEP - 1] ); // Qx + move32(); + + mem[2 * ALLPASSSECTIONS_STEEP - 1] = Msub_32_16( temp[ALLPASSSECTIONS_STEEP - 2], temp[ALLPASSSECTIONS_STEEP - 1], AP2_STEEP_FX[ALLPASSSECTIONS_STEEP - 1] ); // Qx + move32(); +#else /* OPT_STEREO_32KBPS_V1 */ temp[ALLPASSSECTIONS_STEEP - 1] = L_add( mem[2 * ALLPASSSECTIONS_STEEP - 1], Mpy_32_16_1( temp[ALLPASSSECTIONS_STEEP - 2], AP2_STEEP_FX[ALLPASSSECTIONS_STEEP - 1] ) ); // Qx move32(); mem[2 * ALLPASSSECTIONS_STEEP - 1] = L_sub( temp[ALLPASSSECTIONS_STEEP - 2], Mpy_32_16_1( temp[ALLPASSSECTIONS_STEEP - 1], AP2_STEEP_FX[ALLPASSSECTIONS_STEEP - 1] ) ); // Qx move32(); +#endif /* OPT_STEREO_32KBPS_V1 */ out[0] = W_round48_L( W_mac_32_16( W_mult_32_16( out[0], 16384 /*0.5 in Q15*/ ), temp[ALLPASSSECTIONS_STEEP - 1], 16384 /*0.5 in Q15*/ ) ); // Qx move32(); FOR( k = 1; k < N / 2; k++ ) { +#ifdef OPT_STEREO_32KBPS_V1 + temp[0] = Madd_32_16( mem[ALLPASSSECTIONS_STEEP], in[2 * k - 1], AP2_STEEP_FX[0] ); // Qx + move32(); + mem[ALLPASSSECTIONS_STEEP] = Msub_32_16( in[2 * k - 1], temp[0], AP2_STEEP_FX[0] ); // Qx + move32(); +#else /* OPT_STEREO_32KBPS_V1 */ temp[0] = L_add( mem[ALLPASSSECTIONS_STEEP], Mpy_32_16_1( in[sub( shl( k, 1 ), 1 )], AP2_STEEP_FX[0] ) ); // Qx move32(); mem[ALLPASSSECTIONS_STEEP] = L_sub( in[sub( shl( k, 1 ), 1 )], Mpy_32_16_1( temp[0], AP2_STEEP_FX[0] ) ); // Qx move32(); +#endif /* OPT_STEREO_32KBPS_V1 */ /* for better performance, unroll this loop */ FOR( n = 1; n < ALLPASSSECTIONS_STEEP - 1; n++ ) { +#ifdef OPT_STEREO_32KBPS_V1 + temp[n] = Madd_32_16( mem[ALLPASSSECTIONS_STEEP + n], temp[n - 1], AP2_STEEP_FX[n] ); // Qx + move32(); + mem[ALLPASSSECTIONS_STEEP + n] = Msub_32_16( temp[n - 1], temp[n], AP2_STEEP_FX[n] ); // Qx + move32(); +#else /* OPT_STEREO_32KBPS_V1 */ temp[n] = L_add( mem[ALLPASSSECTIONS_STEEP + n], Mpy_32_16_1( temp[n - 1], AP2_STEEP_FX[n] ) ); // Qx move32(); /*if ( fabs( temp[n] ) < 1e-12 ) @@ -176,12 +228,20 @@ void Decimate_allpass_steep_fx32( }*/ mem[ALLPASSSECTIONS_STEEP + n] = L_sub( temp[n - 1], Mpy_32_16_1( temp[n], AP2_STEEP_FX[n] ) ); // Qx move32(); +#endif /* OPT_STEREO_32KBPS_V1 */ } +#ifdef OPT_STEREO_32KBPS_V1 + temp[ALLPASSSECTIONS_STEEP - 1] = Madd_32_16( mem[2 * ALLPASSSECTIONS_STEEP - 1], temp[ALLPASSSECTIONS_STEEP - 2], AP2_STEEP_FX[ALLPASSSECTIONS_STEEP - 1] ); // Qx + move32(); + mem[2 * ALLPASSSECTIONS_STEEP - 1] = Msub_32_16( temp[ALLPASSSECTIONS_STEEP - 2], temp[ALLPASSSECTIONS_STEEP - 1], AP2_STEEP_FX[ALLPASSSECTIONS_STEEP - 1] ); // Qx + move32(); +#else /* OPT_STEREO_32KBPS_V1 */ temp[ALLPASSSECTIONS_STEEP - 1] = L_add( mem[2 * ALLPASSSECTIONS_STEEP - 1], Mpy_32_16_1( temp[ALLPASSSECTIONS_STEEP - 2], AP2_STEEP_FX[ALLPASSSECTIONS_STEEP - 1] ) ); // Qx move32(); mem[2 * ALLPASSSECTIONS_STEEP - 1] = L_sub( temp[ALLPASSSECTIONS_STEEP - 2], Mpy_32_16_1( temp[ALLPASSSECTIONS_STEEP - 1], AP2_STEEP_FX[ALLPASSSECTIONS_STEEP - 1] ) ); // Qx move32(); +#endif /* OPT_STEREO_32KBPS_V1 */ out[k] = W_round48_L( W_mac_32_16( W_mult_32_16( out[k], 16384 /*0.5 in Q15*/ ), temp[ALLPASSSECTIONS_STEEP - 1], 16384 /*0.5 in Q15*/ ) ); // Qx move32(); } @@ -211,12 +271,21 @@ void interpolate_3_over_2_allpass_32( FOR( i = 0; i < len; i++ ) { /* Upper branch */ +#ifdef OPT_STEREO_32KBPS_V1 + Vu[0] = Madd_32_16( mem[0], L_sub( input[i], mem[1] ), filt_coeff[0] ); // Qx + Q15 - Q15 -> Qx + move32(); + Vu[1] = Madd_32_16( mem[1], L_sub( Vu[0], mem[2] ), filt_coeff[1] ); // Qx + Q15 - Q15 -> Qx + move32(); + mem[3] = Madd_32_16( mem[2], L_sub( Vu[1], mem[3] ), filt_coeff[2] ); // Qx + Q15 - Q15 -> Qx + move32(); +#else /* OPT_STEREO_32KBPS_V1 */ Vu[0] = L_add( mem[0], Mpy_32_16_1( L_sub( input[i], mem[1] ), filt_coeff[0] ) ); // Qx + Q15 - Q15 -> Qx move32(); Vu[1] = L_add( mem[1], Mpy_32_16_1( L_sub( Vu[0], mem[2] ), filt_coeff[1] ) ); // Qx + Q15 - Q15 -> Qx move32(); mem[3] = L_add( mem[2], Mpy_32_16_1( L_sub( Vu[1], mem[3] ), filt_coeff[2] ) ); // Qx + Q15 - Q15 -> Qx move32(); +#endif /* OPT_STEREO_32KBPS_V1 */ mem[1] = Vu[0]; // Qx move32(); @@ -226,12 +295,21 @@ void interpolate_3_over_2_allpass_32( move32(); /* Middle branch */ +#ifdef OPT_STEREO_32KBPS_V1 + Vm[0] = Madd_32_16( mem[0], L_sub( input[i], mem[4] ), filt_coeff[3] ); // Qx + Q15 - Q15 -> Qx + move32(); + Vm[1] = Madd_32_16( mem[4], L_sub( Vm[0], mem[5] ), filt_coeff[4] ); // Qx + Q15 - Q15 -> Qx + move32(); + mem[6] = Madd_32_16( mem[5], L_sub( Vm[1], mem[6] ), filt_coeff[5] ); // Qx + Q15 - Q15 -> Qx + move32(); +#else /* OPT_STEREO_32KBPS_V1 */ Vm[0] = L_add( mem[0], Mpy_32_16_1( L_sub( input[i], mem[4] ), filt_coeff[3] ) ); // Qx + Q15 - Q15 -> Qx move32(); Vm[1] = L_add( mem[4], Mpy_32_16_1( L_sub( Vm[0], mem[5] ), filt_coeff[4] ) ); // Qx + Q15 - Q15 -> Qx move32(); mem[6] = L_add( mem[5], Mpy_32_16_1( L_sub( Vm[1], mem[6] ), filt_coeff[5] ) ); // Qx + Q15 - Q15 -> Qx move32(); +#endif /* OPT_STEREO_32KBPS_V1 */ mem[4] = Vm[0]; // Qx move32(); @@ -241,12 +319,21 @@ void interpolate_3_over_2_allpass_32( move32(); /* Lower branch */ +#ifdef OPT_STEREO_32KBPS_V1 + Vl[0] = Madd_32_16( mem[0], L_sub( input[i], mem[7] ), filt_coeff[6] ); // Qx + Q15 - Q15 -> Qx + move32(); + Vl[1] = Madd_32_16( mem[7], L_sub( Vl[0], mem[8] ), filt_coeff[7] ); // Qx + Q15 - Q15 -> Qx + move32(); + mem[9] = Madd_32_16( mem[8], L_sub( Vl[1], mem[9] ), filt_coeff[8] ); // Qx + Q15 - Q15 -> Qx + move32(); +#else /* OPT_STEREO_32KBPS_V1 */ Vl[0] = L_add( mem[0], Mpy_32_16_1( L_sub( input[i], mem[7] ), filt_coeff[6] ) ); // Qx + Q15 - Q15 -> Qx move32(); Vl[1] = L_add( mem[7], Mpy_32_16_1( L_sub( Vl[0], mem[8] ), filt_coeff[7] ) ); // Qx + Q15 - Q15 -> Qx move32(); mem[9] = L_add( mem[8], Mpy_32_16_1( L_sub( Vl[1], mem[9] ), filt_coeff[8] ) ); // Qx + Q15 - Q15 -> Qx move32(); +#endif /* OPT_STEREO_32KBPS_V1 */ mem[0] = input[i]; // Qx move32(); @@ -265,10 +352,17 @@ void interpolate_3_over_2_allpass_32( { mem_temp = out1_buff[2 * i]; move32(); +#ifdef OPT_STEREO_32KBPS_V1 + out[i] = Madd_32_16( Mpy_32_16_1( L_add( mem_temp, mem[10] ), 1550 ), L_add( mem[11], mem[14] ), -4965 ); // Qx + Q15 - Q15 -> Qx + // 0.0473147f in Q15 -> 1550, -0.151521f in Q15 -> -4965 + out[i] = Madd_32_16( out[i], L_add( mem[12], mem[13] ), 20125 ); + // 0.614152f in Q15 -> 20125 +#else /* OPT_STEREO_32KBPS_V1 */ out[i] = L_add( Mpy_32_16_1( L_add( mem_temp, mem[10] ), 1550 ), Mpy_32_16_1( L_add( mem[11], mem[14] ), -4965 ) ); // Qx + Q15 - Q15 -> Qx // 0.0473147f in Q15 -> 1550, -0.151521f in Q15 -> -4965 out[i] = L_add( out[i], Mpy_32_16_1( L_add( mem[12], mem[13] ), 20125 ) ); // 0.614152f in Q15 -> 20125 +#endif /* OPT_STEREO_32KBPS_V1 */ mem[10] = mem[11]; // Qx move32(); mem[11] = mem[12]; // Qx diff --git a/lib_com/mslvq_com_fx.c b/lib_com/mslvq_com_fx.c index 14c9b6d97..d97830a5f 100644 --- a/lib_com/mslvq_com_fx.c +++ b/lib_com/mslvq_com_fx.c @@ -122,9 +122,33 @@ void init_lvq_fx( ) { Word16 i, j; +#ifdef OPT_STEREO_32KBPS_V1 + Word16 k; +#endif /* OPT_STEREO_32KBPS_V1 */ /* safety-net mode */ FOR( i = 0; i < MAX_NO_MODES; i++ ) { +#ifdef OPT_STEREO_32KBPS_V1 + FOR( ( j = 0, k = 0 ); j < MAX_NO_SCALES; ( j++, k++ ) ) + { + if ( ( no_lead_fx[i][j] <= 0 ) ) + { + j = MAX_NO_SCALES; + } + } + no_scales[i][0] = k; + move16(); + + FOR( k = 0; j < MAX_NO_SCALES << 1; ( j++, k++ ) ) + { + if ( no_lead_fx[i][j] <= 0 ) + { + j = MAX_NO_SCALES << 1; + } + } + no_scales[i][1] = k; + move16(); +#else /* OPT_STEREO_32KBPS_V1 */ j = 0; move16(); test(); @@ -143,10 +167,35 @@ void init_lvq_fx( } no_scales[i][1] = sub( j, MAX_NO_SCALES ); move16(); +#endif /* OPT_STEREO_32KBPS_V1 */ } /* predictive mode */ FOR( i = 0; i < MAX_NO_MODES_p; i++ ) { +#ifdef OPT_STEREO_32KBPS_V1 + FOR( ( j = 0, k = 0 ); j < MAX_NO_SCALES; ( j++, k++ ) ) + { + + if ( ( no_lead_p_fx[i][j] <= 0 ) ) + { + j = MAX_NO_SCALES; + } + } + no_scales_p[i][0] = k; + move16(); + + FOR( k = 0; j < MAX_NO_SCALES << 1; ( j++, k++ ) ) + { + + if ( ( no_lead_p_fx[i][j] <= 0 ) ) + { + j = MAX_NO_SCALES << 1; + } + } + + no_scales_p[i][1] = k; + move16(); +#else /* OPT_STEREO_32KBPS_V1 */ j = 0; move16(); WHILE( ( LT_16( j, MAX_NO_SCALES ) ) && ( no_lead_p_fx[i][j] > 0 ) ) @@ -165,6 +214,7 @@ void init_lvq_fx( } no_scales_p[i][1] = sub( j, MAX_NO_SCALES ); move16(); +#endif /* OPT_STEREO_32KBPS_V1 */ } /* index offsets for each truncation */ init_offset_fx( offset_scale1, offset_scale2, offset_scale1_p, offset_scale2_p, no_scales, no_scales_p ); diff --git a/lib_com/options.h b/lib_com/options.h index b24ac8267..d3570004d 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -172,4 +172,5 @@ #define FIX_1301_CORRECT_TD_CNST /* VA: Fix 1301, correct wrong constant in TD stereo */ #define NONBE_FIX_1277_EVS_DTX_HIGH_RATE_THRESHOLD /* VA/Eri: FLP issue 1277: Fix Mismatch in DTX high-rate threshold between EVS float and BASOP */ #define NONBE_FIX_708_OSBA_BR_SWITCHING_CRASH /* FhG: issue 708: fix crash in OSBA BR switching with long test vectors */ +//#define OPT_STEREO_32KBPS_V1 /* Optimization made in stereo decoding path for 32kbps decoding */ #endif diff --git a/lib_com/scale_mem_fx.c b/lib_com/scale_mem_fx.c index 002821050..2ce2ffd12 100644 --- a/lib_com/scale_mem_fx.c +++ b/lib_com/scale_mem_fx.c @@ -307,8 +307,15 @@ void scale_sig32( /* saturation can occur here */ x[i] = L_shl( x[i], exp0 ); move32(); +#ifdef OPT_STEREO_32KBPS_V1 + if ( 0 == exp0 ) + { + i = lg; + } +#endif /* OPT_STEREO_32KBPS_V1 */ } } + void scale_sig32_r( Word32 x[], /* i/o: signal to scale Qx */ const Word16 lg, /* i : size of x[] Q0 */ @@ -322,6 +329,12 @@ void scale_sig32_r( /* saturation can occur here */ x[i] = L_shl_r( x[i], exp0 ); move32(); +#ifdef OPT_STEREO_32KBPS_V1 + if ( 0 == exp0 ) + { + i = lg; + } +#endif /* OPT_STEREO_32KBPS_V1 */ } } diff --git a/lib_com/trans_inv_fx.c b/lib_com/trans_inv_fx.c index fbe0f0d6d..0033e2c9e 100644 --- a/lib_com/trans_inv_fx.c +++ b/lib_com/trans_inv_fx.c @@ -84,7 +84,9 @@ void preecho_sb_fx( UWord16 tmp_u16; Word32 mean_prev_hb_fx_loc, mean_prev_nc_fx_loc, mean_prev_fx_loc; /* */ Word16 q16p1, qmemp1, qtmp; - +#ifdef OPT_STEREO_32KBPS_V1 + Word16 shift_q = sub( 15, q_sig32 ); +#endif /* OPT_STEREO_32KBPS_V1 */ q16p1 = add( q_sig16, 1 ); qmemp1 = q16p1; @@ -137,6 +139,18 @@ void preecho_sb_fx( /* len3xLp20 = framelength/2-(short)((float)framelength*N_ZERO_MDCT/FRAME_SIZE_MS); in float*/ fxptr1 = imdct_mem_fx; +#ifdef OPT_STEREO_32KBPS_V1 + FOR( i = 0; i < len3xLp20; i++ ) + { + *fxptr1++ = negate( extract_h( L_shl_sat( wtda_audio_fx[len3xLp20 - 1 - i], shift_q ) ) ); /*Q-1*/ + move16(); /*convert to Word16 Q-1 with saturation (saturation not a problem here) */ + } + FOR( i = 0; i < framelength >> 1; i++ ) + { + *fxptr1++ = negate( extract_h( L_shl_sat( wtda_audio_fx[i], shift_q ) ) ); /*Q-1*/ + move16(); /*convert to Word16 Q-1 with saturation (saturation not a problem here) */ + } +#else /* OPT_STEREO_32KBPS_V1 */ fx32ptr1 = wtda_audio_fx + len3xLp20 - 1; /*q_sig32*/ FOR( i = 0; i < len3xLp20; i++ ) { @@ -148,7 +162,10 @@ void preecho_sb_fx( *fxptr1++ = negate( extract_h( L_shl_sat( wtda_audio_fx[i], sub( 15, q_sig32 ) ) ) ); /*Q-1*/ move16(); /*convert to Word16 Q-1 with saturation (saturation not a problem here) */ } +#endif /* OPT_STEREO_32KBPS_V1 */ + qmemp1 = 0; /*already in q-1*/ + move16(); subframelength = shr( framelength, LOG2_NUMSF ); /*Q0*/ subsubframelength = shr( subframelength, log2_num_subsubframes ); /*Q0*/ @@ -391,6 +408,17 @@ void preecho_sb_fx( move16(); FOR( i = 1; i <= NUMSF; i++ ) { +#ifdef OPT_STEREO_32KBPS_V1 + max_es_hb_fx = L_max( max_es_hb_fx, es_mdct_hb_fx[i] ); /* max energy low band, 8 present and 1 future subframes */ + + max_es_fx = L_max( max_es_fx, es_mdct_fx[i] ); /* max energy low band, 8 present and 1 future subframes */ + + if ( GE_32( es_mdct_fx[i], max_es_fx ) ) /* '=' to handle the first window*/ + { + maxind = i; + move16(); + } +#else /* OPT_STEREO_32KBPS_V1 */ IF( GE_32( es_mdct_hb_fx[i], max_es_hb_fx ) ) /* '=' to handle the first window*/ { max_es_hb_fx = L_add( es_mdct_hb_fx[i], 0 ); /* max energy low band, 8 present and 1 future subframes */ @@ -402,6 +430,7 @@ void preecho_sb_fx( maxind = i; move16(); } +#endif /* OPT_STEREO_32KBPS_V1 */ } cnt2 = cnt5 = 0; diff --git a/lib_dec/acelp_core_dec_ivas_fx.c b/lib_dec/acelp_core_dec_ivas_fx.c index 6e56bc154..5c16bc3e9 100644 --- a/lib_dec/acelp_core_dec_ivas_fx.c +++ b/lib_dec/acelp_core_dec_ivas_fx.c @@ -1908,7 +1908,11 @@ ivas_error acelp_core_dec_ivas_fx( scale_sig32( realBuffer_fx[i], CLDFB_NO_CHANNELS_MAX, Q_real ); // Q_real scale_sig32( imagBuffer_fx[i], CLDFB_NO_CHANNELS_MAX, Q_imag ); // Q_imag } - scale_sig32_r( st->cldfbSynHB->cldfb_state_fx, st->cldfbSynHB->p_filter_length, sub( sub( Q_real, 1 ), Q10 ) ); // (Q_real-1) +#ifdef OPT_STEREO_32KBPS_V1 + scale_sig32( st->cldfbSynHB->cldfb_state_fx, st->cldfbSynHB->p_filter_length, sub( Q_real, Q11 ) ); // Q10 - > (Q_real-1) +#else /* OPT_STEREO_32KBPS_V1 */ + scale_sig32_r( st->cldfbSynHB->cldfb_state_fx, st->cldfbSynHB->p_filter_length, sub( sub( Q_real, 1 ), Q10 ) ); // (Q_real-1) +#endif /* OPT_STEREO_32KBPS_V1 */ st->cldfbSynHB->Q_cldfb_state = sub( Q_real, 1 ); move16(); #ifndef MSAN_FIX @@ -1989,7 +1993,11 @@ ivas_error acelp_core_dec_ivas_fx( scale_sig32( realBuffer_fx[i], CLDFB_NO_CHANNELS_MAX, Q_real ); // Q_real scale_sig32( imagBuffer_fx[i], CLDFB_NO_CHANNELS_MAX, Q_real ); // Q_real } - scale_sig32_r( st->cldfbSyn->cldfb_state_fx, st->cldfbSyn->p_filter_length, sub( sub( Q_real, 1 ), Q10 ) ); //(Q_real - 1) +#ifdef OPT_STEREO_32KBPS_V1 + scale_sig32( st->cldfbSyn->cldfb_state_fx, st->cldfbSyn->p_filter_length, sub( Q_real, Q11 ) ); // Q10 - > (Q_real-1) +#else /* OPT_STEREO_32KBPS_V1 */ + scale_sig32_r( st->cldfbSyn->cldfb_state_fx, st->cldfbSyn->p_filter_length, sub( sub( Q_real, 1 ), Q10 ) ); //(Q_real - 1) +#endif /* OPT_STEREO_32KBPS_V1 */ st->cldfbSyn->Q_cldfb_state = sub( Q_real, 1 ); move16(); #ifndef MSAN_FIX @@ -2096,7 +2104,11 @@ ivas_error acelp_core_dec_ivas_fx( scale_sig32( realBuffer_fx[i], CLDFB_NO_CHANNELS_MAX, Q_real ); // Q_real scale_sig32( imagBuffer_fx[i], CLDFB_NO_CHANNELS_MAX, Q_real ); // Q_real } - scale_sig32_r( st->cldfbSyn->cldfb_state_fx, st->cldfbSyn->p_filter_length, sub( sub( Q_real, 1 ), Q10 ) ); //(Q_real - 1) +#ifdef OPT_STEREO_32KBPS_V1 + scale_sig32( st->cldfbSyn->cldfb_state_fx, st->cldfbSyn->p_filter_length, sub( Q_real, Q11 ) ); // Q10 - > (Q_real-1) +#else /* OPT_STEREO_32KBPS_V1 */ + scale_sig32_r( st->cldfbSyn->cldfb_state_fx, st->cldfbSyn->p_filter_length, sub( sub( Q_real, 1 ), Q10 ) ); //(Q_real - 1) +#endif /* OPT_STEREO_32KBPS_V1 */ #ifndef MSAN_FIX Scale_sig32( synth_fx, L_FRAME48k, Q_real - 1 ); #endif diff --git a/lib_dec/ivas_stereo_cng_dec.c b/lib_dec/ivas_stereo_cng_dec.c index c8985d7cd..b3bddae59 100644 --- a/lib_dec/ivas_stereo_cng_dec.c +++ b/lib_dec/ivas_stereo_cng_dec.c @@ -1450,7 +1450,11 @@ void stereo_cna_update_params_fx( { IF( EQ_16( hCPE->nchan_out, 1 ) ) { +#ifdef OPT_STEREO_32KBPS_V1 + c_LR_fx = MAX_32; +#else /* OPT_STEREO_32KBPS_V1 */ c_LR_fx = MAX_WORD16; +#endif /* OPT_STEREO_32KBPS_V1 */ move32(); c_ILD_fx = 0; move32(); @@ -1512,8 +1516,12 @@ void stereo_cna_update_params_fx( dotLR_fx = Mpy_32_32( W_extract_l( dotLR_fx ), energy_xy_fx ); /* dotLR_fx_q + ((31 - temp_res_q) - 31)) */ dotLR_fx_q = add( dotLR_fx_q, sub( sub( 31, temp_res_q ), 31 ) ); dotLR_fx = W_deposit32_l( L_shl_sat( W_extract_l( dotLR_fx ), sub( 31, dotLR_fx_q ) ) ); /* Q31 */ - /* estimate L/R correlation factor and ILD in time domain */ + /* estimate L/R correlation factor and ILD in time domain */ +#ifdef OPT_STEREO_32KBPS_V1 + c_LR_fx = W_extract_l( dotLR_fx ); /* Q31 */ +#else /* OPT_STEREO_32KBPS_V1 */ c_LR_fx = extract_h( W_extract_l( dotLR_fx ) ); /* Q15 */ +#endif /* OPT_STEREO_32KBPS_V1 */ temp_res_q = 0; move16(); @@ -1552,15 +1560,24 @@ void stereo_cna_update_params_fx( /* update of long-term ILD and LR correlation factors for stereo CNA */ IF( !hFdCngDec->first_cna_noise_updated ) { +#ifdef OPT_STEREO_32KBPS_V1 + hFdCngDec->cna_LR_LT_fx = extract_h( c_LR_fx ); +#else /* OPT_STEREO_32KBPS_V1 */ hFdCngDec->cna_LR_LT_fx = extract_l( c_LR_fx ); +#endif /* OPT_STEREO_32KBPS_V1 */ move16(); hFdCngDec->cna_ILD_LT_fx = extract_l( c_ILD_fx ); move16(); } ELSE { +#ifdef OPT_STEREO_32KBPS_V1 + hFdCngDec->cna_LR_LT_fx = extract_h( L_add_sat( Mpy_32_16_1( STEREO_CNA_LR_CORR_LT_FILT_FX, hFdCngDec->cna_LR_LT_fx ), + Mpy_32_32( ONE_IN_Q31 - STEREO_CNA_LR_CORR_LT_FILT_FX, c_LR_fx ) ) ); /* Q31 */ +#else /* OPT_STEREO_32KBPS_V1 */ hFdCngDec->cna_LR_LT_fx = extract_h( L_add_sat( Mpy_32_16_1( STEREO_CNA_LR_CORR_LT_FILT_FX, hFdCngDec->cna_LR_LT_fx ), Mpy_32_16_1( L_sub( ONE_IN_Q31, STEREO_CNA_LR_CORR_LT_FILT_FX ), extract_l( c_LR_fx ) ) ) ); /* Q31 */ +#endif /* OPT_STEREO_32KBPS_V1 */ move16(); hFdCngDec->cna_ILD_LT_fx = extract_h( L_add_sat( Mpy_32_16_1( STEREO_CNA_ILD_LT_FILT_FX, hFdCngDec->cna_ILD_LT_fx ), diff --git a/lib_dec/ivas_stereo_dft_dec_fx.c b/lib_dec/ivas_stereo_dft_dec_fx.c index ca4cfdb69..7c1c892aa 100644 --- a/lib_dec/ivas_stereo_dft_dec_fx.c +++ b/lib_dec/ivas_stereo_dft_dec_fx.c @@ -1519,8 +1519,10 @@ void stereo_dft_dec_fx( HANDLE_FD_CNG_COM hFdCngCom = hFdCngDec->hFdCngCom; Word16 *cna_seed = &( hFdCngCom->seed ); Word32 DFT_W, DFT_Y; +#ifndef OPT_STEREO_32KBPS_V1 Word16 q_samp_ratio = Q15; move16(); +#endif /* OPT_STEREO_32KBPS_V1 */ output_frame = (Word16) Mpy_32_32( L_add( st0->output_Fs, FRAMES_PER_SEC_BY_2 ), ONE_BY_FRAMES_PER_SEC_Q31 ); /* Q0 */ @@ -1528,8 +1530,12 @@ void stereo_dft_dec_fx( * Initialization *-----------------------------------------------------------------*/ +#ifdef OPT_STEREO_32KBPS_V1 + samp_ratio = divide3232( st0->sr_core, st0->output_Fs ); // Q15 +#else /* OPT_STEREO_32KBPS_V1 */ samp_ratio = (Word16) BASOP_Util_Divide3232_Scale( st0->sr_core, st0->output_Fs, &q_samp_ratio ); samp_ratio = shr( samp_ratio, sub( Q15 - Q12, q_samp_ratio ) ); +#endif /* OPT_STEREO_32KBPS_V1 */ stop = shr( STEREO_DFT32MS_N_32k, 1 ); @@ -2102,10 +2108,17 @@ void stereo_dft_dec_fx( /*Nyquist Freq.*/ IF( EQ_16( hStereoDft->band_limits[b], shr( hStereoDft->NFFT, 1 ) ) ) { +#ifdef OPT_STEREO_32KBPS_V1 + DFT_L[1] = Madd_32_16( pDFT_DMX[1], pDFT_DMX[1], g ); /* qDFT */ + move32(); + DFT_R[1] = Msub_32_16( pDFT_DMX[1], pDFT_DMX[1], g ); /* qDFT */ + move32(); +#else /* OPT_STEREO_32KBPS_V1 */ DFT_L[1] = L_add( pDFT_DMX[1], Mpy_32_16_1( pDFT_DMX[1], g ) ); /* qDFT */ move32(); DFT_R[1] = L_sub( pDFT_DMX[1], Mpy_32_16_1( pDFT_DMX[1], g ) ); /* qDFT */ move32(); +#endif /* OPT_STEREO_32KBPS_V1 */ DFT_L[1] = Mpy_32_16_1( DFT_L[1], INV_SQRT2_FX_Q15 ); /* qDFT */ move32(); DFT_R[1] = Mpy_32_16_1( DFT_R[1], INV_SQRT2_FX_Q15 ); /* qDFT */ @@ -2238,27 +2251,52 @@ void stereo_dft_dec_fx( q_cna_level = sub( Q31, add( q_cna_level, Q16 ) ); /* generate comfort noise from gaussian noise and add to the decoded DFT spectrum */ +#ifdef OPT_STEREO_32KBPS_V1 + Word16 shift = sub( q_cna_level, hStereoDft->q_dft ); +#endif /* OPT_STEREO_32KBPS_V1 */ N1 = L_shl( Mpy_32_16_1( scale_fact, rand_gauss_fix( &ftmp, cna_seed ) ), Q2 ); /* q_cna_level */ N2 = L_shl( Mpy_32_16_1( scale_fact, rand_gauss_fix( &ftmp, cna_seed ) ), Q2 ); /* q_cna_level */ - l_tmp = L_add( Madd_32_16( N1, N1, g ), Mpy_32_16_1( N2, gamma ) ); /* q_cna_level */ - l_tmp = L_shr( l_tmp, sub( q_cna_level, hStereoDft->q_dft ) ); /* q_dft */ - DFT_L[2 * i] = L_add( DFT_L[2 * i], l_tmp ); /* q_dft */ +#ifdef OPT_STEREO_32KBPS_V1 + l_tmp = Madd_32_16( Madd_32_16( N1, N1, g ), N2, gamma ); /* q_cna_level */ + l_tmp = L_shr( l_tmp, shift ); /* q_dft */ +#else /* OPT_STEREO_32KBPS_V1 */ + l_tmp = L_add( Madd_32_16( N1, N1, g ), Mpy_32_16_1( N2, gamma ) ); /* q_cna_level */ + l_tmp = L_shr( l_tmp, sub( q_cna_level, hStereoDft->q_dft ) ); /* q_dft */ +#endif /* OPT_STEREO_32KBPS_V1 */ + DFT_L[2 * i] = L_add( DFT_L[2 * i], l_tmp ); /* q_dft */ move32(); +#ifdef OPT_STEREO_32KBPS_V1 + l_tmp = Msub_32_16( Msub_32_16( N1, N1, g ), N2, gamma ); /* q_cna_level */ + l_tmp = L_shr( l_tmp, shift ); /* q_dft */ + DFT_R[2 * i] = L_add( DFT_R[2 * i], l_tmp ); /* q_dft */ +#else /* OPT_STEREO_32KBPS_V1 */ l_tmp = L_sub( Msub_32_16( N1, N1, g ), Mpy_32_16_1( N2, gamma ) ); /* q_cna_level */ l_tmp = L_shr( l_tmp, sub( q_cna_level, hStereoDft->q_dft ) ); /* q_dft */ DFT_R[2 * i] = L_add( DFT_R[2 * i], l_tmp ); /* q_dft */ +#endif /* OPT_STEREO_32KBPS_V1 */ move32(); N1 = L_shl( Mpy_32_16_1( scale_fact, rand_gauss_fix( &ftmp, cna_seed ) ), Q2 ); /* q_cna_level */ N2 = L_shl( Mpy_32_16_1( scale_fact, rand_gauss_fix( &ftmp, cna_seed ) ), Q2 ); /* q_cna_level */ - l_tmp = L_add( Madd_32_16( N1, N1, g ), Mpy_32_16_1( N2, gamma ) ); /* q_cna_level */ - l_tmp = L_shr( l_tmp, sub( q_cna_level, hStereoDft->q_dft ) ); /* q_dft */ - DFT_L[2 * i + 1] = L_add( DFT_L[2 * i + 1], l_tmp ); /* q_dft */ +#ifdef OPT_STEREO_32KBPS_V1 + l_tmp = Madd_32_16( Madd_32_16( N1, N1, g ), N2, gamma ); /* q_cna_level */ + l_tmp = L_shr( l_tmp, shift ); /* q_dft */ + DFT_L[2 * i + 1] = L_add( DFT_L[2 * i + 1], l_tmp ); /* q_dft */ + move32(); + l_tmp = Msub_32_16( Msub_32_16( N1, N1, g ), N2, gamma ); /* q_cna_level */ + l_tmp = L_shr( l_tmp, shift ); /* q_dft */ + DFT_R[2 * i + 1] = L_add( DFT_R[2 * i + 1], l_tmp ); /* q_dft */ + move32(); +#else /* OPT_STEREO_32KBPS_V1 */ + l_tmp = L_add( Madd_32_16( N1, N1, g ), Mpy_32_16_1( N2, gamma ) ); /* q_cna_level */ + l_tmp = L_shr( l_tmp, sub( q_cna_level, hStereoDft->q_dft ) ); /* q_dft */ + DFT_L[2 * i + 1] = L_add( DFT_L[2 * i + 1], l_tmp ); /* q_dft */ move32(); l_tmp = L_sub( Msub_32_16( N1, N1, g ), Mpy_32_16_1( N2, gamma ) ); /* q_cna_level */ l_tmp = L_shr( l_tmp, sub( q_cna_level, hStereoDft->q_dft ) ); /* q_dft */ DFT_R[2 * i + 1] = L_add( DFT_R[2 * i + 1], l_tmp ); /* q_dft */ move32(); +#endif /* OPT_STEREO_32KBPS_V1 */ } } @@ -2346,8 +2384,12 @@ static void stereo_dft_compute_td_stefi_params_fx( return; } +#ifdef OPT_STEREO_32KBPS_V1 + bin0 = round_fx( L_mult0( hStereoDft->NFFT, samp_ratio ) ); +#else /* OPT_STEREO_32KBPS_V1 */ bin0 = extract_l( L_shr_r( Mpy_32_16_1( (Word32) hStereoDft->NFFT, samp_ratio ), 1 ) ); /* Q0 */ bin0 = shl( bin0, Q3 ); +#endif /* OPT_STEREO_32KBPS_V1 */ bin0 = s_min( bin0, hStereoDft->band_limits[hStereoDft->nbands] ); /* Q0 */ b = hStereoDft->nbands; /* Q0 */ move16(); @@ -2512,14 +2554,18 @@ static void stereo_dft_dequantize_ipd_fx( *-------------------------------------------------------------------------*/ void stereo_dft_generate_res_pred_fx( STEREO_DFT_DEC_DATA_HANDLE hStereoDft, /* i/o: decoder DFT stereo handle */ - const Word16 samp_ratio, /* i : sampling ratio Q13*/ - Word32 *pDFT_DMX, /* i : downmix signal qDFT*/ - Word32 *DFT_PRED_RES, /* o : residual prediction signal qDFT*/ - Word32 *pPredGain, /* i : residual prediction gains Q31*/ - const Word16 k, /* i : subframe index Q0*/ - Word32 *ap_filt_DMX, /* i : enhanced stereo filling signal qDFT*/ - Word16 *stop, /* o : last FD stereo filling bin Q0*/ - const Word16 bfi /* i : BFI flag Q0*/ +#ifdef OPT_STEREO_32KBPS_V1 + const Word16 samp_ratio, /* i : sampling ratio Q15*/ +#else /* OPT_STEREO_32KBPS_V1 */ + const Word16 samp_ratio, /* i : sampling ratio Q13*/ +#endif /* OPT_STEREO_32KBPS_V1 */ + Word32 *pDFT_DMX, /* i : downmix signal qDFT*/ + Word32 *DFT_PRED_RES, /* o : residual prediction signal qDFT*/ + Word32 *pPredGain, /* i : residual prediction gains Q31*/ + const Word16 k, /* i : subframe index Q0*/ + Word32 *ap_filt_DMX, /* i : enhanced stereo filling signal qDFT*/ + Word16 *stop, /* o : last FD stereo filling bin Q0*/ + const Word16 bfi /* i : BFI flag Q0*/ ) { /* general variables */ @@ -2529,7 +2575,10 @@ void stereo_dft_generate_res_pred_fx( Word16 lb_stefi_start_band; /* variables for enhanced stereo filling */ - Word16 norm_fac, q_norm_fac, lim_norm_fac; + Word16 norm_fac, q_norm_fac; +#ifndef OPT_STEREO_32KBPS_V1 + Word16 lim_norm_fac; +#endif /* OPT_STEREO_32KBPS_V1 */ Word16 q_sqrt; Word16 alpha; // gain_limit; @@ -2543,8 +2592,12 @@ void stereo_dft_generate_res_pred_fx( Word32 pred_gain_avg; Word32 g2; Word16 nbands_respred; +#ifdef OPT_STEREO_32KBPS_V1 + Word16 q_new, diff; +#else /* OPT_STEREO_32KBPS_V1 */ q_norm_fac = 0; move16(); +#endif /* OPT_STEREO_32KBPS_V1 */ push_wmops( "gen_respred" ); /* smoothing and limiting parameters */ @@ -2566,8 +2619,12 @@ void stereo_dft_generate_res_pred_fx( /* In ACELP mode the downmix signal is not available in bandwidth extension area. * * Therefore, the downmix energy in the corresponding subbands is estimated. */ +#ifdef OPT_STEREO_32KBPS_V1 + bin0 = round_fx( L_mult0( hStereoDft->NFFT, samp_ratio ) ); +#else /* OPT_STEREO_32KBPS_V1 */ bin0 = (Word16) ( L_shr( L_add( L_mult0( hStereoDft->NFFT, samp_ratio ), ONE_IN_Q12 ), Q12 + 1 ) ); /* Q0 */ move16(); +#endif /* OPT_STEREO_32KBPS_V1 */ bin0 = s_min( bin0, hStereoDft->band_limits[hStereoDft->nbands] ); /* Q0 */ b = hStereoDft->nbands; move16(); @@ -2611,37 +2668,78 @@ void stereo_dft_generate_res_pred_fx( /* calculate band energies (low band only in case of ACELP) */ FOR( i = hStereoDft->band_limits[b]; i < min( hStereoDft->band_limits[b + 1], bin0 ); i++ ) { +#ifdef OPT_STEREO_32KBPS_V1 + dmx_nrg_64bit = W_mac_32_32( W_mac_32_32( dmx_nrg_64bit, pDFT_DMX[2 * i], pDFT_DMX[2 * i] ), + pDFT_DMX[2 * i + 1], pDFT_DMX[2 * i + 1] ); /* 2 * q_dft + 1 */ + rev_nrg_64bit = W_mac_32_32( W_mac_32_32( rev_nrg_64bit, ap_filt_DMX[2 * i], ap_filt_DMX[2 * i] ), + ap_filt_DMX[2 * i + 1], ap_filt_DMX[2 * i + 1] ); /* 2 * q_dft + 1 */ +#else /* OPT_STEREO_32KBPS_V1 */ dmx_nrg_64bit = W_add( dmx_nrg_64bit, W_add( W_mult0_32_32( pDFT_DMX[2 * i], pDFT_DMX[2 * i] ), W_mult0_32_32( pDFT_DMX[2 * i + 1], pDFT_DMX[2 * i + 1] ) ) ); /* 2 * q_dft */ rev_nrg_64bit = W_add( rev_nrg_64bit, W_add( W_mult0_32_32( ap_filt_DMX[2 * i], ap_filt_DMX[2 * i] ), W_mult0_32_32( ap_filt_DMX[2 * i + 1], ap_filt_DMX[2 * i + 1] ) ) ); /* 2 * q_dft */ +#endif /* OPT_STEREO_32KBPS_V1 */ } + +#ifdef OPT_STEREO_32KBPS_V1 + q_new = add( shl( hStereoDft->q_dft, 1 ), 1 ); + q_shift = W_norm( dmx_nrg_64bit ); + dmx_nrg = W_shl_sat_l( dmx_nrg_64bit, sub( q_shift, 32 ) ); // 2 * hStereoDft->q_dft + 1 - (q_shift - 32) + dmx_nrg_q = add( q_new, sub( q_shift, 32 ) ); + q_shift = W_norm( rev_nrg_64bit ); + rev_nrg = W_shl_sat_l( rev_nrg_64bit, sub( q_shift, 32 ) ); // 2 * hStereoDft->q_dft + 1 - (q_shift - 32) + rev_nrg_q = add( q_new, sub( q_shift, 32 ) ); + move16(); +#else /* OPT_STEREO_32KBPS_V1 */ q_shift = W_norm( dmx_nrg_64bit ); dmx_nrg = W_extract_l( W_shl( dmx_nrg_64bit, sub( q_shift, 32 ) ) ); // 2 * hStereoDft->q_dft + (q_shift - 32) dmx_nrg_q = add( imult1616( 2, hStereoDft->q_dft ), sub( q_shift, 32 ) ); q_shift = W_norm( rev_nrg_64bit ); rev_nrg = W_extract_l( W_shl( rev_nrg_64bit, sub( q_shift, 32 ) ) ); // 2 * hStereoDft->q_dft + (q_shift - 32) rev_nrg_q = add( imult1616( 2, hStereoDft->q_dft ), sub( q_shift, 32 ) ); +#endif /* OPT_STEREO_32KBPS_V1 */ /* Reach a common Q for dmx_nrg and rev_nrg */ q_com = s_min( dmx_nrg_q, rev_nrg_q ); dmx_nrg = L_shl( dmx_nrg, sub( q_com, dmx_nrg_q ) ); /* q_com */ rev_nrg = L_shl( rev_nrg, sub( q_com, rev_nrg_q ) ); /* q_com */ + +#ifdef OPT_STEREO_32KBPS_V1 + diff = sub( hStereoDft->q_smoothed_nrg, q_com ); + IF( diff < 0 ) +#else /* OPT_STEREO_32KBPS_V1 */ IF( LT_16( hStereoDft->q_smoothed_nrg, q_com ) ) +#endif /* OPT_STEREO_32KBPS_V1 */ { +#ifdef OPT_STEREO_32KBPS_V1 + rev_nrg = L_shl( rev_nrg, shl( diff, 1 ) ); + dmx_nrg = L_shl( dmx_nrg, shl( diff, 1 ) ); +#else /* OPT_STEREO_32KBPS_V1 */ rev_nrg = L_shr( rev_nrg, shl( sub( q_com, hStereoDft->q_smoothed_nrg ), 1 ) ); dmx_nrg = L_shr( dmx_nrg, shl( sub( q_com, hStereoDft->q_smoothed_nrg ), 1 ) ); +#endif /* OPT_STEREO_32KBPS_V1 */ q_com = hStereoDft->q_smoothed_nrg; move16(); } +#ifdef OPT_STEREO_32KBPS_V1 + ELSE +#else /* OPT_STEREO_32KBPS_V1 */ ELSE IF( GT_16( hStereoDft->q_smoothed_nrg, q_com ) ) +#endif /* OPT_STEREO_32KBPS_V1 */ { +#ifdef OPT_STEREO_32KBPS_V1 + hStereoDft->smooth_res_nrg_fx[b] = L_shr( hStereoDft->smooth_res_nrg_fx[b], shl( diff, 1 ) ); /* hStereoDft->q_smoothed_nrg */ + move32(); + hStereoDft->smooth_dmx_nrg_fx[b] = L_shr( hStereoDft->smooth_dmx_nrg_fx[b], shl( diff, 1 ) ); /* hStereoDft->q_smoothed_nrg */ + move32(); +#else /* OPT_STEREO_32KBPS_V1 */ hStereoDft->smooth_res_nrg_fx[b] = L_shr( hStereoDft->smooth_res_nrg_fx[b], shl( sub( hStereoDft->q_smoothed_nrg, q_com ), 1 ) ); /* hStereoDft->q_smoothed_nrg */ move32(); hStereoDft->smooth_dmx_nrg_fx[b] = L_shr( hStereoDft->smooth_dmx_nrg_fx[b], shl( sub( hStereoDft->q_smoothed_nrg, q_com ), 1 ) ); /* hStereoDft->q_smoothed_nrg */ move32(); +#endif /* OPT_STEREO_32KBPS_V1 */ q_smoothed_nrg_local[b] = q_com; move16(); } @@ -2651,9 +2749,38 @@ void stereo_dft_generate_res_pred_fx( hStereoDft->smooth_dmx_nrg_fx[b] = Madd_32_16( Mpy_32_16_1( hStereoDft->smooth_dmx_nrg_fx[b], alpha ), dmx_nrg, sub( (Word16) ( 0x7FFF ), alpha ) ); /* hStereoDft->q_smoothed_nrg */ move32(); +#ifdef OPT_STEREO_32KBPS_V1 + // Compute norm_fac in Q14 + norm_fac = MAX_16; + move16(); +#endif /* OPT_STEREO_32KBPS_V1 */ /* normalization factor */ IF( hStereoDft->smooth_res_nrg_fx[b] != 0 ) { +#ifdef OPT_STEREO_32KBPS_V1 + norm_fac = 0; + move16(); + IF( hStereoDft->smooth_dmx_nrg_fx[b] != 0 ) + { + Word16 quo, quo_e; + Word32 prod; + + norm_fac = BASOP_Util_Divide3232_Scale( hStereoDft->smooth_dmx_nrg_fx[b], hStereoDft->smooth_res_nrg_fx[b], &q_norm_fac ); /* q_norm_fac */ + norm_fac = Sqrt16( norm_fac, &q_norm_fac ); + + quo = BASOP_Util_Divide1616_Scale( 32767, norm_fac, &quo_e ); /* q_norm_fac */ + quo_e = sub( quo_e, q_norm_fac ); + quo = shl_sat( quo, sub( quo_e, 1 ) ); // Q14 + quo = s_max( 13107 /*0.8 in Q14 */, quo ); + quo = s_min( quo, 20480 /* 1.25 in Q14*/ ); + + prod = L_mult( norm_fac, quo ); // exp:q_norm_fac+1 + // Bring to Q30 + prod = L_shl_sat( prod, q_norm_fac ); + + norm_fac = extract_h( prod ); + } +#else /* OPT_STEREO_32KBPS_V1 */ norm_fac = BASOP_Util_Divide3232_Scale( hStereoDft->smooth_dmx_nrg_fx[b], hStereoDft->smooth_res_nrg_fx[b], &q_norm_fac ); /* q_norm_fac */ norm_fac = Sqrt16( norm_fac, &q_norm_fac ); IF( norm_fac != 0 ) @@ -2709,7 +2836,9 @@ void stereo_dft_generate_res_pred_fx( } } } +#endif /* OPT_STEREO_32KBPS_V1 */ } +#ifndef OPT_STEREO_32KBPS_V1 ELSE { norm_fac = MAX_16; @@ -2717,13 +2846,21 @@ void stereo_dft_generate_res_pred_fx( q_norm_fac = Q1; move16(); } +#endif /* OPT_STEREO_32KBPS_V1 */ FOR( i = hStereoDft->band_limits[b]; i < min( hStereoDft->band_limits[b + 1], bin0 ); i++ ) { +#ifdef OPT_STEREO_32KBPS_V1 + DFT_PRED_RES[2 * i] = L_shl( Mpy_32_32( Mpy_32_16_1( pPredGain[b], norm_fac ), ap_filt_DMX[2 * i] ), 1 ); /* q_dft */ + move32(); + DFT_PRED_RES[2 * i + 1] = L_shl( Mpy_32_32( Mpy_32_16_1( pPredGain[b], norm_fac ), ap_filt_DMX[2 * i + 1] ), 1 ); /* q_dft */ + move32(); +#else /* OPT_STEREO_32KBPS_V1 */ DFT_PRED_RES[2 * i] = L_shl( Mpy_32_32( Mpy_32_16_1( pPredGain[b], norm_fac ), ap_filt_DMX[2 * i] ), q_norm_fac ); /* q_dft */ move32(); DFT_PRED_RES[2 * i + 1] = L_shl( Mpy_32_32( Mpy_32_16_1( pPredGain[b], norm_fac ), ap_filt_DMX[2 * i + 1] ), q_norm_fac ); /* q_dft */ move32(); +#endif /* OPT_STEREO_32KBPS_V1 */ } } diff --git a/lib_dec/ivas_stereo_switching_dec.c b/lib_dec/ivas_stereo_switching_dec.c index c6466183b..c8a59cd6a 100644 --- a/lib_dec/ivas_stereo_switching_dec.c +++ b/lib_dec/ivas_stereo_switching_dec.c @@ -2343,15 +2343,20 @@ static Word32 ncross_corr_self_fx( { Word64 c_c_fx; Word32 c_c_fx_return; + Word64 energy_x_fx, energy_y_fx; +#ifndef OPT_STEREO_32KBPS_V1 Word16 c_c_fx_q; - Word64 energy_xy_fx, energy_x_fx, energy_y_fx; + Word64 energy_xy_fx; +#endif /* OPT_STEREO_32KBPS_V1 */ UWord16 j; Word32 *signal_a_fx, *signal_b_fx; Word32 temp_x, temp_y; Word16 headroom_left_x, headroom_left_y; +#ifndef OPT_STEREO_32KBPS_V1 Word16 x_inv_q, y_inv_q; Word16 x_q, y_q; Word16 res_q; +#endif /* OPT_STEREO_32KBPS_V1 */ c_c_fx = 0; move64(); energy_x_fx = 0; @@ -2362,13 +2367,43 @@ static Word32 ncross_corr_self_fx( signal_b_fx = &signal_fx[y]; /* Q11 */ FOR( j = 0; j < corr_len; j += subsampling ) { +#ifdef OPT_STEREO_32KBPS_V1 + c_c_fx = W_mac_32_32( c_c_fx, signal_a_fx[j], signal_b_fx[j] ); /* 2 * Q11 + 1*/ + energy_x_fx = W_mac_32_32( energy_x_fx, signal_a_fx[j], signal_a_fx[j] ); /* 2 * Q11+ 1 */ + energy_y_fx = W_mac_32_32( energy_y_fx, signal_b_fx[j], signal_b_fx[j] ); /* 2 * Q11+ 1 */ +#else /* OPT_STEREO_32KBPS_V1 */ c_c_fx = W_add( c_c_fx, W_mult0_32_32( ( signal_a_fx[j] ), ( signal_b_fx[j] ) ) ); /* 2 * Q11 */ energy_x_fx = W_add( energy_x_fx, W_mult0_32_32( ( signal_a_fx[j] ), ( signal_a_fx[j] ) ) ); /* 2 * Q11 */ energy_y_fx = W_add( energy_y_fx, W_mult0_32_32( ( signal_b_fx[j] ), ( signal_b_fx[j] ) ) ); /* 2 * Q11 */ +#endif /* OPT_STEREO_32KBPS_V1 */ } headroom_left_x = W_norm( energy_x_fx ); headroom_left_y = W_norm( energy_y_fx ); +#ifdef OPT_STEREO_32KBPS_V1 + temp_x = W_extract_h( W_shl( energy_x_fx, headroom_left_x ) ); // Q23 + headroom_left_x -32 + temp_y = W_extract_h( W_shl( energy_y_fx, headroom_left_y ) ); // Q23 + headroom_left_y -32 + Word64 prod = W_mult0_32_32( temp_x, temp_y ); // Q(headroom_left_x + headroom_left_y - 18) + Word16 q_prod = W_norm( prod ); + Word32 energy = W_extract_h( W_shl( prod, q_prod ) ); // Q(headroom_left_x + headroom_left_y + q_prod - 18) - 32 + q_prod = sub( 81, add( add( headroom_left_x, headroom_left_y ), q_prod ) ); + energy = Sqrt32( energy, &q_prod ); + + IF( LT_32( energy, L_shl_sat( 1, sub( 31, q_prod ) ) ) ) + { + c_c_fx_return = W_shl_sat_l( c_c_fx, 31 - ( 2 * OUTPUT_Q + 1 ) ); // Q31 + } + ELSE + { + // Maximize c_c_fx + Word16 q_cc = W_norm( c_c_fx ); + Word32 num = W_extract_h( W_shl( c_c_fx, q_cc ) ); // Q(23 + q_cc - 32) -> e(40 - q_cc) + Word16 quo_e; + num = BASOP_Util_Divide3232_Scale_cadence( num, energy, &quo_e ); + quo_e = add( sub( sub( 40, q_cc ), q_prod ), quo_e ); + c_c_fx_return = L_shl_sat( num, quo_e ); // Q31 + } +#else /* OPT_STEREO_32KBPS_V1 */ IF( LT_16( headroom_left_x, 32 ) ) { energy_x_fx = W_shr( energy_x_fx, sub( 32, headroom_left_x ) ); /* 2 * Q11 - (32 -headroom_left_x) */ @@ -2440,6 +2475,7 @@ static Word32 ncross_corr_self_fx( c_c_fx_return = W_extract_l( c_c_fx ); /* Q31 */ move32(); +#endif /* OPT_STEREO_32KBPS_V1 */ return c_c_fx_return; } diff --git a/lib_dec/ivas_stereo_td_dec.c b/lib_dec/ivas_stereo_td_dec.c index c975da8d6..e31c9f6bc 100644 --- a/lib_dec/ivas_stereo_td_dec.c +++ b/lib_dec/ivas_stereo_td_dec.c @@ -384,7 +384,7 @@ void tdm_upmix_plain_fx( const Word32 PCh_2_L_fx[], /* i : primary channel Qx*/ const Word32 SCh_2_R_fx[], /* i : secondary channel Qx*/ const Word32 LR_ratio_fx, /* i : mixing ratio Q31*/ - const Word32 inv_den_LR_ratio_fx, /* i : inverse mixing ration Q31*/ + const Word32 inv_den_LR_ratio_fx, /* i : inverse mixing ration Q30*/ const Word16 start_index, /* i : start index Q0*/ const Word16 end_index, /* i : end index Q0*/ const Word16 plus_minus_flag /* i : plus/minus flag Q0*/ @@ -396,16 +396,37 @@ void tdm_upmix_plain_fx( { FOR( i = start_index; i < end_index; i++ ) { +#ifdef OPT_STEREO_32KBPS_V1 + Word32 temp_left = Madd_32_32( SCh_2_R_fx[i], L_sub( PCh_2_L_fx[i], SCh_2_R_fx[i] ), LR_ratio_fx ); /* Qx */ + Left_fx[i] = W_shl_sat_l( W_mult0_32_32( temp_left, inv_den_LR_ratio_fx ), -30 ); /* Qx */ + move32(); + Word32 temp_right = Msub_32_32( PCh_2_L_fx[i], L_add( PCh_2_L_fx[i], SCh_2_R_fx[i] ), LR_ratio_fx ); /* Qx */ + Right_fx[i] = W_shl_sat_l( W_mult0_32_32( temp_right, inv_den_LR_ratio_fx ), -30 ); /* Qx */ + move32(); +#else /* OPT_STEREO_32KBPS_V1 */ Word32 temp_left = L_add( Mpy_32_32( L_sub( PCh_2_L_fx[i], SCh_2_R_fx[i] ), LR_ratio_fx ), SCh_2_R_fx[i] ); /* Qx */ Left_fx[i] = L_shl_sat( Mpy_32_32( temp_left, inv_den_LR_ratio_fx ), 1 ); /* Qx + 1 */ move32(); Word32 temp_right = L_add( Mpy_32_32( L_add( PCh_2_L_fx[i], SCh_2_R_fx[i] ), L_negate( LR_ratio_fx ) ), PCh_2_L_fx[i] ); /* Qx */ Right_fx[i] = L_shl_sat( Mpy_32_32( temp_right, inv_den_LR_ratio_fx ), 1 ); /* Qx + 1 */ move32(); +#endif /* OPT_STEREO_32KBPS_V1 */ } } ELSE { +#ifdef OPT_STEREO_32KBPS_V1 + Word32 inv_den_LR_ratio_fx_neg = L_negate( inv_den_LR_ratio_fx ); + FOR( i = start_index; i < end_index; i++ ) + { + Word32 temp_left = Msub_32_32( SCh_2_R_fx[i], L_add( PCh_2_L_fx[i], SCh_2_R_fx[i] ), LR_ratio_fx ); /* Qx */ + Left_fx[i] = W_shl_sat_l( W_mult0_32_32( temp_left, inv_den_LR_ratio_fx_neg ), -30 ); /* Qx */ + move32(); + Word32 temp_right = Msub_32_32( PCh_2_L_fx[i], L_sub( PCh_2_L_fx[i], SCh_2_R_fx[i] ), LR_ratio_fx ); /* Qx */ + Right_fx[i] = W_shl_sat_l( W_mult0_32_32( temp_right, inv_den_LR_ratio_fx_neg ), -30 ); /* Qx */ + move32(); + } +#else /* OPT_STEREO_32KBPS_V1 */ FOR( i = start_index; i < end_index; i++ ) { Word32 temp_left = L_sub( Mpy_32_32( L_add( PCh_2_L_fx[i], SCh_2_R_fx[i] ), LR_ratio_fx ), SCh_2_R_fx[i] ); /* Qx */ @@ -415,6 +436,7 @@ void tdm_upmix_plain_fx( Right_fx[i] = L_shl_sat( Mpy_32_32( temp_right, inv_den_LR_ratio_fx ), 1 ); /* Qx + 1 */ move32(); } +#endif /* OPT_STEREO_32KBPS_V1 */ } return; -- GitLab