Commit bcc85e76 authored by Sandesh Venkatesh's avatar Sandesh Venkatesh
Browse files

Merge branch '3gpp_issue_1464_fix' into 'main'

Precision improvements in SQ_gain_estimate_stereo_fx [allow regression]

See merge request !1394
parents 27af412d e0213868
Loading
Loading
Loading
Loading
Loading
+38 −27
Original line number Diff line number Diff line
@@ -836,52 +836,67 @@ void convertToMS_fx(

/*! r: SQ gain */
static Word32 SQ_gain_estimate_stereo_fx( // e_res
    Word32 xL_fx[],                       /* i  : L vector to quantize        Q31-e_xL*/
    Word16 e_xL,
    Word32 xR_fx[], /* i  : R vector to quantize        Q31-e_xR*/
    Word16 e_xR,
    const Word32 xL_fx[],                 /* i  : L vector to quantize        Q31-e_xL*/
    const Word16 e_xL,
    const Word32 xR_fx[], /* i  : R vector to quantize        Q31-e_xR*/
    const Word16 e_xR,
    const Word16 nbitsSQ, /* i  : number of bits targeted     Q0*/
    const Word16 lg,      /* i  : vector size (2048 max)      Q0*/
    Word16 *e_res )
{
    Word16 i, q, iter, e_ener, e_tmp;
    Word16 i, q, iter, e_ener, e_xL_2, e_xR_2, s;
    Word32 ener_fx, tmp_32, target_fx, fac_fx, offset_fx;
    Word32 en_fx[N_MAX / 2]; // Q(26)
    Word32 en_fx[N_MAX / 2]; // Q25
    Word16 lg2, lg_4, lg2_4;
    Word64 W_tmp, _0_01;

    lg_4 = shr( lg, 2 );    /* Q0 */
    lg2_4 = shl( lg_4, 1 ); /* Q0 */
    lg2 = shl( lg2_4, 2 );  /* Q0 */
    i = 0;
    move16();
    e_xL_2 = shl( e_xL, 1 );
    e_xR_2 = shl( e_xR, 1 );
    _0_01 = W_shr( 21474836 /* 0.01 in Q31 */, sub( e_xL_2, 32 ) ); // 0.01 in 2*(Q of specL/R) + 1

    set32_fx( en_fx, 21474836 /* 0.01 in Q31 */, idiv1616( N_MAX, 2 ) );
    set32_fx( en_fx, 335544 /* 0.01 in Q25 */, ( N_MAX / 2 ) );

    /* energy of quadruples with 9dB offset */
    /* ignore that we may take no all lines into account, max. 3 lines at the upper end of the spectrum can be missed (if lg is not a multiple of 4, happens also in SQGain()*/

    FOR( q = 0; q < lg_4; q++ )
    {
        ener_fx = BASOP_Util_Add_Mant32Exp( 21474836 /*0.01 in Q15*/, 0, Mpy_32_32( xL_fx[i], xL_fx[i] ), e_xL * 2, &e_ener ); /* Q31-e_ener */
        ener_fx = BASOP_Util_Add_Mant32Exp( ener_fx, e_ener, Mpy_32_32( xL_fx[i + 1], xL_fx[i + 1] ), e_xL * 2, &e_ener );     /* Q31-e_ener */
        ener_fx = BASOP_Util_Add_Mant32Exp( ener_fx, e_ener, Mpy_32_32( xL_fx[i + 2], xL_fx[i + 2] ), e_xL * 2, &e_ener );     /* Q31-e_ener */
        ener_fx = BASOP_Util_Add_Mant32Exp( ener_fx, e_ener, Mpy_32_32( xL_fx[i + 3], xL_fx[i + 3] ), e_xL * 2, &e_ener );     /* Q31-e_ener */
        W_tmp = W_mac_32_32( _0_01, xL_fx[i], xL_fx[i] );         // 2 * e_xL
        W_tmp = W_mac_32_32( W_tmp, xL_fx[i + 1], xL_fx[i + 1] ); // 2 * e_xL
        W_tmp = W_mac_32_32( W_tmp, xL_fx[i + 2], xL_fx[i + 2] ); // 2 * e_xL
        W_tmp = W_mac_32_32( W_tmp, xL_fx[i + 3], xL_fx[i + 3] ); // 2 * e_xL

        s = W_norm( W_tmp );
        ener_fx = W_extract_h( W_shl( W_tmp, s ) );
        e_ener = sub( e_xL_2, s );

        en_fx[q] = BASOP_Util_Log2( ener_fx ); /* saves a MAC */
        move32();
        en_fx[q] = Mpy_32_16_1( L_add( e_ener * ONE_IN_Q25, en_fx[q] ), 9864 /* log10(2) in Q15 */ ); // Q(25)
        en_fx[q] = Mpy_32_16_1( L_add( L_shl( e_ener, Q25 ), en_fx[q] ), 9864 /* log10(2) in Q15 */ ); // Q(25)
        move32();
        i = add( i, 4 );
    }
    i = 0;
    move16();
    FOR( ; q < lg2_4; q++ )
    {
        ener_fx = BASOP_Util_Add_Mant32Exp( 21474836 /*0.01 Q15*/, 0, Mpy_32_32( xR_fx[i], xR_fx[i] ), e_xR * 2, &e_ener ); /* Q31-e_ener */
        ener_fx = BASOP_Util_Add_Mant32Exp( ener_fx, e_ener, Mpy_32_32( xR_fx[i + 1], xR_fx[i + 1] ), e_xR * 2, &e_ener );  /* Q31-e_ener */
        ener_fx = BASOP_Util_Add_Mant32Exp( ener_fx, e_ener, Mpy_32_32( xR_fx[i + 2], xR_fx[i + 2] ), e_xR * 2, &e_ener );  /* Q31-e_ener */
        ener_fx = BASOP_Util_Add_Mant32Exp( ener_fx, e_ener, Mpy_32_32( xR_fx[i + 3], xR_fx[i + 3] ), e_xR * 2, &e_ener );  /* Q31-e_ener */
        W_tmp = W_mac_32_32( _0_01, xR_fx[i], xR_fx[i] );         // 2 * e_xR
        W_tmp = W_mac_32_32( W_tmp, xR_fx[i + 1], xR_fx[i + 1] ); // 2 * e_xR
        W_tmp = W_mac_32_32( W_tmp, xR_fx[i + 2], xR_fx[i + 2] ); // 2 * e_xR
        W_tmp = W_mac_32_32( W_tmp, xR_fx[i + 3], xR_fx[i + 3] ); // 2 * e_xR

        s = W_norm( W_tmp );
        ener_fx = W_extract_h( W_shl( W_tmp, s ) );
        e_ener = sub( e_xR_2, s );

        en_fx[q] = BASOP_Util_Log2( ener_fx ); /* saves a MAC */
        move32();
        en_fx[q] = Mpy_32_16_1( L_add( e_ener * ONE_IN_Q25, en_fx[q] ), 9864 /* log10(2) in Q15 */ ); // Q(25)
        en_fx[q] = Mpy_32_16_1( L_add( L_shl( e_ener, Q25 ), en_fx[q] ), 9864 /* log10(2) in Q15 */ ); // Q(25)
        move32();
        i = add( i, 4 );
    }
@@ -898,24 +913,20 @@ static Word32 SQ_gain_estimate_stereo_fx( // e_res
    {
        fac_fx = L_shr( fac_fx, 1 );            /* Q25 */
        offset_fx = L_sub( offset_fx, fac_fx ); /* Q25 */
        ener_fx = 0;
        move32();
        e_ener = 0;
        move16();
        W_tmp = 0;
        move64();

        FOR( i = 0; i < lg2_4; i++ )
        {
            tmp_32 = L_sub( en_fx[i], offset_fx ); /* Q25 */
            e_tmp = 6;
            move16();

            /* avoid SV with 1 bin of amp < 0.5f */
            IF( GT_32( tmp_32, 10066329 /*0.3 Q25*/ ) )
            {
                ener_fx = BASOP_Util_Add_Mant32Exp( ener_fx, e_ener, tmp_32, e_tmp, &e_ener ); /* Q31-e_ener */
                W_tmp = W_add( W_tmp, W_deposit32_l( tmp_32 ) ); /* Q25 */

                /* if ener is above target -> break and increase offset */
                IF( L_shl_sat( ener_fx, sub( e_ener, Q13 ) ) > target_fx )
                IF( GT_64( W_tmp, W_shl( W_deposit32_l( target_fx ), 7 ) ) )
                {
                    offset_fx = L_add( offset_fx, fac_fx ); /* Q25 */
                    BREAK;