diff --git a/lib_enc/ivas_stereo_mdct_stereo_enc_fx.c b/lib_enc/ivas_stereo_mdct_stereo_enc_fx.c index a61b2b0106170638a85e848080f6176e5ce661d5..9a1198e20608799d30d2c37da62da198d0eb75f0 100644 --- a/lib_enc/ivas_stereo_mdct_stereo_enc_fx.c +++ b/lib_enc/ivas_stereo_mdct_stereo_enc_fx.c @@ -836,52 +836,67 @@ void convertToMS_fx( /*! r: SQ gain */ static Word32 SQ_gain_estimate_stereo_fx( // e_res - Word32 xL_fx[], /* i : L vector to quantize Q31-e_xL*/ - Word16 e_xL, - Word32 xR_fx[], /* i : R vector to quantize Q31-e_xR*/ - Word16 e_xR, + const Word32 xL_fx[], /* i : L vector to quantize Q31-e_xL*/ + const Word16 e_xL, + const Word32 xR_fx[], /* i : R vector to quantize Q31-e_xR*/ + const Word16 e_xR, const Word16 nbitsSQ, /* i : number of bits targeted Q0*/ const Word16 lg, /* i : vector size (2048 max) Q0*/ Word16 *e_res ) { - Word16 i, q, iter, e_ener, e_tmp; + Word16 i, q, iter, e_ener, e_xL_2, e_xR_2, s; Word32 ener_fx, tmp_32, target_fx, fac_fx, offset_fx; - Word32 en_fx[N_MAX / 2]; // Q(26) + Word32 en_fx[N_MAX / 2]; // Q25 Word16 lg2, lg_4, lg2_4; + Word64 W_tmp, _0_01; lg_4 = shr( lg, 2 ); /* Q0 */ lg2_4 = shl( lg_4, 1 ); /* Q0 */ lg2 = shl( lg2_4, 2 ); /* Q0 */ i = 0; move16(); + e_xL_2 = shl( e_xL, 1 ); + e_xR_2 = shl( e_xR, 1 ); + _0_01 = W_shr( 21474836 /* 0.01 in Q31 */, sub( e_xL_2, 32 ) ); // 0.01 in 2*(Q of specL/R) + 1 - set32_fx( en_fx, 21474836 /* 0.01 in Q31 */, idiv1616( N_MAX, 2 ) ); + set32_fx( en_fx, 335544 /* 0.01 in Q25 */, ( N_MAX / 2 ) ); /* energy of quadruples with 9dB offset */ /* ignore that we may take no all lines into account, max. 3 lines at the upper end of the spectrum can be missed (if lg is not a multiple of 4, happens also in SQGain()*/ FOR( q = 0; q < lg_4; q++ ) { - ener_fx = BASOP_Util_Add_Mant32Exp( 21474836 /*0.01 in Q15*/, 0, Mpy_32_32( xL_fx[i], xL_fx[i] ), e_xL * 2, &e_ener ); /* Q31-e_ener */ - ener_fx = BASOP_Util_Add_Mant32Exp( ener_fx, e_ener, Mpy_32_32( xL_fx[i + 1], xL_fx[i + 1] ), e_xL * 2, &e_ener ); /* Q31-e_ener */ - ener_fx = BASOP_Util_Add_Mant32Exp( ener_fx, e_ener, Mpy_32_32( xL_fx[i + 2], xL_fx[i + 2] ), e_xL * 2, &e_ener ); /* Q31-e_ener */ - ener_fx = BASOP_Util_Add_Mant32Exp( ener_fx, e_ener, Mpy_32_32( xL_fx[i + 3], xL_fx[i + 3] ), e_xL * 2, &e_ener ); /* Q31-e_ener */ - en_fx[q] = BASOP_Util_Log2( ener_fx ); /* saves a MAC */ + W_tmp = W_mac_32_32( _0_01, xL_fx[i], xL_fx[i] ); // 2 * e_xL + W_tmp = W_mac_32_32( W_tmp, xL_fx[i + 1], xL_fx[i + 1] ); // 2 * e_xL + W_tmp = W_mac_32_32( W_tmp, xL_fx[i + 2], xL_fx[i + 2] ); // 2 * e_xL + W_tmp = W_mac_32_32( W_tmp, xL_fx[i + 3], xL_fx[i + 3] ); // 2 * e_xL + + s = W_norm( W_tmp ); + ener_fx = W_extract_h( W_shl( W_tmp, s ) ); + e_ener = sub( e_xL_2, s ); + + en_fx[q] = BASOP_Util_Log2( ener_fx ); /* saves a MAC */ move32(); - en_fx[q] = Mpy_32_16_1( L_add( e_ener * ONE_IN_Q25, en_fx[q] ), 9864 /* log10(2) in Q15 */ ); // Q(25) + en_fx[q] = Mpy_32_16_1( L_add( L_shl( e_ener, Q25 ), en_fx[q] ), 9864 /* log10(2) in Q15 */ ); // Q(25) move32(); i = add( i, 4 ); } i = 0; + move16(); FOR( ; q < lg2_4; q++ ) { - ener_fx = BASOP_Util_Add_Mant32Exp( 21474836 /*0.01 Q15*/, 0, Mpy_32_32( xR_fx[i], xR_fx[i] ), e_xR * 2, &e_ener ); /* Q31-e_ener */ - ener_fx = BASOP_Util_Add_Mant32Exp( ener_fx, e_ener, Mpy_32_32( xR_fx[i + 1], xR_fx[i + 1] ), e_xR * 2, &e_ener ); /* Q31-e_ener */ - ener_fx = BASOP_Util_Add_Mant32Exp( ener_fx, e_ener, Mpy_32_32( xR_fx[i + 2], xR_fx[i + 2] ), e_xR * 2, &e_ener ); /* Q31-e_ener */ - ener_fx = BASOP_Util_Add_Mant32Exp( ener_fx, e_ener, Mpy_32_32( xR_fx[i + 3], xR_fx[i + 3] ), e_xR * 2, &e_ener ); /* Q31-e_ener */ - en_fx[q] = BASOP_Util_Log2( ener_fx ); /* saves a MAC */ + W_tmp = W_mac_32_32( _0_01, xR_fx[i], xR_fx[i] ); // 2 * e_xR + W_tmp = W_mac_32_32( W_tmp, xR_fx[i + 1], xR_fx[i + 1] ); // 2 * e_xR + W_tmp = W_mac_32_32( W_tmp, xR_fx[i + 2], xR_fx[i + 2] ); // 2 * e_xR + W_tmp = W_mac_32_32( W_tmp, xR_fx[i + 3], xR_fx[i + 3] ); // 2 * e_xR + + s = W_norm( W_tmp ); + ener_fx = W_extract_h( W_shl( W_tmp, s ) ); + e_ener = sub( e_xR_2, s ); + + en_fx[q] = BASOP_Util_Log2( ener_fx ); /* saves a MAC */ move32(); - en_fx[q] = Mpy_32_16_1( L_add( e_ener * ONE_IN_Q25, en_fx[q] ), 9864 /* log10(2) in Q15 */ ); // Q(25) + en_fx[q] = Mpy_32_16_1( L_add( L_shl( e_ener, Q25 ), en_fx[q] ), 9864 /* log10(2) in Q15 */ ); // Q(25) move32(); i = add( i, 4 ); } @@ -898,24 +913,20 @@ static Word32 SQ_gain_estimate_stereo_fx( // e_res { fac_fx = L_shr( fac_fx, 1 ); /* Q25 */ offset_fx = L_sub( offset_fx, fac_fx ); /* Q25 */ - ener_fx = 0; - move32(); - e_ener = 0; - move16(); + W_tmp = 0; + move64(); FOR( i = 0; i < lg2_4; i++ ) { tmp_32 = L_sub( en_fx[i], offset_fx ); /* Q25 */ - e_tmp = 6; - move16(); /* avoid SV with 1 bin of amp < 0.5f */ IF( GT_32( tmp_32, 10066329 /*0.3 Q25*/ ) ) { - ener_fx = BASOP_Util_Add_Mant32Exp( ener_fx, e_ener, tmp_32, e_tmp, &e_ener ); /* Q31-e_ener */ + W_tmp = W_add( W_tmp, W_deposit32_l( tmp_32 ) ); /* Q25 */ /* if ener is above target -> break and increase offset */ - IF( L_shl_sat( ener_fx, sub( e_ener, Q13 ) ) > target_fx ) + IF( GT_64( W_tmp, W_shl( W_deposit32_l( target_fx ), 7 ) ) ) { offset_fx = L_add( offset_fx, fac_fx ); /* Q25 */ BREAK;