Commit d8cd608d authored by Sandesh Venkatesh's avatar Sandesh Venkatesh
Browse files

LTV test optimizations - 01072025

parent c7a85855
Loading
Loading
Loading
Loading
Loading
+8 −8
Original line number Diff line number Diff line
@@ -485,14 +485,14 @@ const Word32 dft_res_gains_q_fx[][2] = {

// Q13
const Word16 McMASA_LFEGain_vectors_fx_q13[64] = {
    3112, 2703, 1556, 1638, -1310, -1802, -2867, -2785,
    4096, 4096, 4096, 4096, -6553, -3276, 8355, 819,
    -4096, -4096, -4096, -4096, -4587, -983, -6389, 11141,
    -8355, 9666, -4669, 2703, 5898, -9256, 7946, -5079,
    -7454, 7618, 8192, -9011, 14172, -1884, -6389, -6881,
    7782, -13107, -2785, 7618, 7127, 3850, -15564, 4259,
    5488, 11632, -7946, -10158, 6799, 4751, 4997, -16711,
    -6553, -12943, 6717, 11632, -17530, 2129, 6881, 8355
    3113, 2703, 1556, 1638, -1311, -1802, -2867, -2785,
    4096, 4096, 4096, 4096, -6554, -3277, 8356, 819,
    -4096, -4096, -4096, -4096, -4588, -983, -6390, 11141,
    -8356, 9667, -4669, 2703, 5898, -9257, 7946, -5079,
    -7455, 7619, 8192, -9011, 14172, -1884, -6390, -6881,
    7782, -13107, -2785, 7619, 7127, 3850, -15565, 4260,
    5489, 11633, -7946, -10158, 6799, 4751, 4997, -16712,
    -6554, -12943, 6717, 11633, -17531, 2130, 6881, 8356
};

// Q25
+1 −0
Original line number Diff line number Diff line
@@ -79,6 +79,7 @@


/* Note: each compile switch (FIX_1101_...) is independent from the other ones */
#define OPT_MCH_DEC_V1_NBE
#define OPT_MCH_DEC_V1_BE
#define OPT_MCT_ENC_V2_NBE
#define OPT_SBA_DEC_V2_NBE
+43 −0
Original line number Diff line number Diff line
@@ -886,7 +886,11 @@ Word16 computeMixingMatrices_fx(
    move16();
    FOR( i = 1; i < lengthCx; i++ )
    {
#ifdef OPT_MCH_DEC_V1_NBE
        IF( GT_32( svd_s_buffer_fx[i], L_shl_sat( limit_fx, sub( limit_e, svd_s_buffer_e[i] ) ) ) )
#else  /* OPT_MCH_DEC_V1_NBE */
        IF( BASOP_Util_Cmp_Mant32Exp( svd_s_buffer_fx[i], svd_s_buffer_e[i], limit_fx, limit_e ) > 0 )
#endif /* OPT_MCH_DEC_V1_NBE */
        {
            limit_fx = svd_s_buffer_fx[i];
            move32();
@@ -896,6 +900,7 @@ Word16 computeMixingMatrices_fx(
    }

    limit_e = add( limit_e, reg_Sx_e );

#ifdef OPT_MCH_DEC_V1_BE
    limit_fx = Madd_32_32( EPSILON_FX, limit_fx, reg_Sx_fx );
#else  /* OPT_MCH_DEC_V1_BE */
@@ -905,7 +910,11 @@ Word16 computeMixingMatrices_fx(

    FOR( i = 0; i < lengthCx; ++i )
    {
#ifdef OPT_MCH_DEC_V1_NBE
        IF( LT_32( L_shl_sat( svd_s_buffer_fx[i], sub( svd_s_buffer_e[i], limit_e ) ), limit_fx ) )
#else  /* OPT_MCH_DEC_V1_NBE */
        IF( BASOP_Util_Cmp_Mant32Exp( svd_s_buffer_fx[i], svd_s_buffer_e[i], limit_fx, limit_e ) < 0 )
#endif /* OPT_MCH_DEC_V1_NBE */
        {
            svd_s_buffer_fx[i] = limit_fx;
            move32();
@@ -950,9 +959,16 @@ Word16 computeMixingMatrices_fx(
    matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e );


#ifdef OPT_MCH_DEC_V1_NBE
    Word16 com_e = sub( limit_e, Cy_hat_diag_e );
#endif /* OPT_MCH_DEC_V1_NBE */
    FOR( i = 0; i < lengthCy; ++i )
    {
#ifdef OPT_MCH_DEC_V1_NBE
        IF( GT_32( Cy_hat_diag_fx[i], L_shl_sat( limit_fx, com_e ) ) )
#else  /* OPT_MCH_DEC_V1_NBE */
        IF( BASOP_Util_Cmp_Mant32Exp( Cy_hat_diag_fx[i], Cy_hat_diag_e, limit_fx, limit_e ) > 0 )
#endif /* OPT_MCH_DEC_V1_NBE */
        {
            limit_fx = Cy_hat_diag_fx[i];
            move32();
@@ -968,11 +984,19 @@ Word16 computeMixingMatrices_fx(
#endif                                                          /* OPT_MCH_DEC_V1_BE */
    limit_e = add( limit_e, reg_ghat_e );

#ifdef OPT_MCH_DEC_V1_NBE
    com_e = sub( Cy_hat_diag_e, limit_e );
#endif /* OPT_MCH_DEC_V1_NBE */
    FOR( i = 0; i < lengthCy; ++i )
    {
        Cy_hat_diag_buff_e[i] = Cy_hat_diag_e;
        move16();

#ifdef OPT_MCH_DEC_V1_NBE
        IF( GT_32( limit_fx, L_shl_sat( Cy_hat_diag_fx[i], com_e ) ) )
#else  /* OPT_MCH_DEC_V1_NBE */
        IF( BASOP_Util_Cmp_Mant32Exp( limit_fx, limit_e, Cy_hat_diag_fx[i], Cy_hat_diag_buff_e[i] ) > 0 ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
#endif /* OPT_MCH_DEC_V1_NBE */
        {
            Cy_hat_diag_fx[i] = limit_fx;
            move32();
@@ -1392,7 +1416,11 @@ Word16 computeMixingMatricesResidual_fx(

    FOR( i = 0; i < lengthCx; ++i )
    {
#ifdef OPT_MCH_DEC_V1_NBE
        IF( GT_32( Kx_fx[i], L_shl_sat( limit_fx, sub( limit_e, Kx_fx_e[i] ) ) ) )
#else  /* OPT_MCH_DEC_V1_NBE */
        IF( BASOP_Util_Cmp_Mant32Exp( Kx_fx[i], Kx_fx_e[i], limit_fx, limit_e ) > 0 )
#endif /* OPT_MCH_DEC_V1_NBE */
        {
            div_tmp = Kx_fx[i];
            move32();
@@ -1433,9 +1461,16 @@ Word16 computeMixingMatricesResidual_fx(
    Cy_hat_diag_e = Cx_e;
    move16();

#ifdef OPT_MCH_DEC_V1_NBE
    Word16 com_e = sub( limit_e, Cy_hat_diag_e );
#endif /* OPT_MCH_DEC_V1_NBE */
    FOR( i = 0; i < lengthCy; ++i )
    {
#ifdef OPT_MCH_DEC_V1_NBE
        IF( GT_32( Cy_hat_diag_fx[i], L_shl_sat( limit_fx, com_e ) ) )
#else  /* OPT_MCH_DEC_V1_NBE */
        IF( BASOP_Util_Cmp_Mant32Exp( Cy_hat_diag_fx[i], Cy_hat_diag_e, limit_fx, limit_e ) > 0 )
#endif /* OPT_MCH_DEC_V1_NBE */
        {
            limit_fx = Cy_hat_diag_fx[i];
            move32();
@@ -1453,11 +1488,19 @@ Word16 computeMixingMatricesResidual_fx(
    limit_e = add( limit_e, reg_ghat_e );

    /* Computing G_hat */

#ifdef OPT_MCH_DEC_V1_NBE
    com_e = sub( Cy_hat_diag_e, limit_e );
#endif /* OPT_MCH_DEC_V1_NBE */
    FOR( i = 0; i < lengthCy; ++i )
    {
        Cy_hat_diag_fx_e[i] = Cy_hat_diag_e;
        move16();
#ifdef OPT_MCH_DEC_V1_NBE
        IF( GT_32( limit_fx, L_shl_sat( Cy_hat_diag_fx[i], com_e ) ) ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
#else                                                                  /* OPT_MCH_DEC_V1_NBE */
        IF( BASOP_Util_Cmp_Mant32Exp( limit_fx, limit_e, Cy_hat_diag_fx[i], Cy_hat_diag_e ) > 0 ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
#endif                                                                 /* OPT_MCH_DEC_V1_NBE */
        {
            Cy_hat_diag_fx[i] = limit_fx;
            move32();
+5 −5
Original line number Diff line number Diff line
@@ -1339,14 +1339,14 @@ void ivas_ism_param_dec_tc_gain_ajust_fx(
        {
            gain_fx = 0;
            move16();
            tmp_e1 = 31;
            tmp_e1 = 0;
            move16();
        }
        ELSE
        { /*handling denominator equals to zero*/
            gain_fx = 1;
        ELSE /*handling denominator equals to zero*/
        {
            gain_fx = 32767; //(max value of Word16 in Q0)
            move16();
            tmp_e1 = -32767; //(-1.0f in Q15) + 1
            tmp_e1 = 15;
            move16();
        }
    }
+114 −4
Original line number Diff line number Diff line
@@ -322,7 +322,11 @@ Word16 svd_fx(
        move16();
        FOR( iCh = 0; iCh < lengthSingularValues - 1; iCh++ )
        {
#ifdef OPT_MCH_DEC_V1_NBE
            IF( LT_32( L_shl_sat( singularValues_fx[iCh], sub( singularValues_fx_e[iCh], singularValues_fx_e[iCh + 1] ) ), singularValues_fx[iCh + 1] ) )
#else  /* OPT_MCH_DEC_V1_NBE */
            IF( BASOP_Util_Cmp_Mant32Exp( singularValues_fx[iCh], singularValues_fx_e[iCh], singularValues_fx[iCh + 1], singularValues_fx_e[iCh + 1] ) < 0 )
#endif /* OPT_MCH_DEC_V1_NBE */
            {
                condition = 1;
                move16();
@@ -428,13 +432,23 @@ static Word16 BidagonalDiagonalisation_fx(
            FOR( jCh = iCh; jCh >= 0; jCh-- )
            {
                split = sub( jCh, 1 ); /* Q0 */
#ifdef OPT_MCH_DEC_V1_NBE
                Word16 com_e = s_max( secDiag_new_e[jCh], eps_x_e );
                IF( LE_32( L_shr( L_abs( secDiag_fx[jCh] ), sub( com_e, secDiag_new_e[jCh] ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) ) /* is secDiag[ch] vanishing compared to eps_x */
#else                                                                                                                                                                         /* OPT_MCH_DEC_V1_NBE */
                IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( secDiag_fx[jCh] ), secDiag_new_e[jCh], Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) )                   /* is secDiag[ch] vanishing compared to eps_x */
#endif                                                                                                                                                                        /* OPT_MCH_DEC_V1_NBE */
                {
                    found_split = 0;
                    move16();
                    BREAK;
                }
#ifdef OPT_MCH_DEC_V1_NBE
                com_e = s_max( singularValues_new_e[split], eps_x_e );
                IF( LE_32( L_shr( L_abs( singularValues_fx[split] ), sub( com_e, singularValues_new_e[split] ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) ) /* is singularValues[split] vanishing compared to eps_x */
#else                                                                                                                                                                                           /* OPT_MCH_DEC_V1_NBE */
                IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( singularValues_fx[split] ), singularValues_new_e[split], Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) ) /* is singularValues[split] vanishing compared to eps_x */
#endif                                                                                                                                                                                          /* OPT_MCH_DEC_V1_NBE */
                {
                    BREAK;
                }
@@ -469,7 +483,12 @@ static Word16 BidagonalDiagonalisation_fx(
                    g_e = add( s_e, secDiag_new_e[kCh] );
                    secDiag_fx[kCh] = Mpy_32_32( c, secDiag_fx[kCh] ); /* exp(c_e + secDiag_new_e) */
                    secDiag_new_e[kCh] = add( c_e, secDiag_new_e[kCh] );
#ifdef OPT_MCH_DEC_V1_NBE
                    Word16 com_e = s_max( g_e, eps_x_e );
                    IF( LE_32( L_shr( L_abs( g ), sub( com_e, g_e ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) )
#else  /* OPT_MCH_DEC_V1_NBE */
                    IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( g ), g_e, Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) )                                            /* is singularValues[split] vanishing compared to eps_x */
#endif /* OPT_MCH_DEC_V1_NBE */
                    {
                        BREAK;
                    }
@@ -929,9 +948,15 @@ static void biDiagonalReductionLeft_fx(
            Word16 invVal_e;
            Word32 invVal;
            invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e );
#ifdef OPT_MCH_DEC_V1_NBE
            Word64 temp = 0;
            move64();
            Word16 max_e = MIN_16;
#else  /* OPT_MCH_DEC_V1_NBE */
            norm_x = 0;
            move32();
            norm_x_e = 0;
#endif /* OPT_MCH_DEC_V1_NBE */
            move16();
            FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
            {
@@ -940,8 +965,25 @@ static void biDiagonalReductionLeft_fx(
                move32();
                singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e );
                move16();
#ifdef OPT_MCH_DEC_V1_NBE
                max_e = s_max( max_e, singularVectors2_e[jCh][currChannel] );
#else  /* OPT_MCH_DEC_V1_NBE */
                norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */
#endif /* OPT_MCH_DEC_V1_NBE */
            }

#ifdef OPT_MCH_DEC_V1_NBE
            FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
            {
                temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sub( max_e, singularVectors2_e[jCh][currChannel] ), 1 ) ) );
            }

            Word16 nrm = W_norm( temp );
            nrm = sub( nrm, 32 );
            norm_x = W_shl_sat_l( temp, nrm );
            norm_x_e = sub( add( max_e, max_e ), nrm );
#endif /* OPT_MCH_DEC_V1_NBE */

            IF( GT_16( norm_x_e, 0 ) )
            {
                norm_x = MAX_32;
@@ -969,6 +1011,30 @@ static void biDiagonalReductionLeft_fx(

            FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
            {
#ifdef OPT_MCH_DEC_V1_NBE
                Word16 max2_e = MIN_16;
                max_e = MIN_16;
                move16();
                move16();
                temp = 0;
                move64();

                FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
                {
                    max_e = s_max( max_e, singularVectors2_e[jCh][currChannel] ); /* exp(norm_x_e) */
                    max2_e = s_max( max2_e, singularVectors2_e[jCh][iCh] );       /* exp(norm_x_e) */
                }
                max_e = add( max_e, max2_e );

                FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
                {
                    temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), sub( max_e, add( singularVectors2_e[jCh][currChannel], singularVectors2_e[jCh][iCh] ) ) ) );
                }
                nrm = W_norm( temp );
                nrm = sub( nrm, 32 );
                norm_x = W_shl_sat_l( temp, nrm );
                norm_x_e = sub( max_e, nrm );
#else  /* OPT_MCH_DEC_V1_NBE */
                norm_x = 0;
                move32();
                norm_x_e = 0;
@@ -977,6 +1043,7 @@ static void biDiagonalReductionLeft_fx(
                {
                    norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( singularVectors2_e[jCh][currChannel], singularVectors2_e[jCh][iCh] ), &norm_x_e ); /* exp(norm_x_e) */
                }
#endif /* OPT_MCH_DEC_V1_NBE */

                f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
                f_e = add( invVal_e, sub( norm_x_e, r_e ) );
@@ -1228,8 +1295,16 @@ static void singularVectorsAccumulationLeft_fx(
                move32();
            }
        }
#ifdef OPT_MCH_DEC_V1_NBE
        Word16 exp = s_max( singularVectors_Left_e[nCh][nCh], 1 );
        singularVectors_Left[nCh][nCh] = L_sub( L_shr( singularVectors_Left[nCh][nCh], sub( exp, singularVectors_Left_e[nCh][nCh] ) ), L_shr( MINUS_ONE_IN_Q31, exp ) ); /* exp(sing_exp2) */
        move32();
        singularVectors_Left_e[nCh][nCh] = exp;
        move16();
#else  /* OPT_MCH_DEC_V1_NBE */
        singularVectors_Left[nCh][nCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[nCh][nCh], singularVectors_Left_e[nCh][nCh], ONE_IN_Q30, 1, &singularVectors_Left_e[nCh][nCh] ); /* exp(sing_exp2) */
        move32();
#endif /* OPT_MCH_DEC_V1_NBE */
    }
    // fclose(fp);
    FOR( nCh = 0; nCh < nChannelsL; nCh++ )
@@ -1292,21 +1367,56 @@ static void singularVectorsAccumulationRight_fx(

                FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
                {
#ifdef OPT_MCH_DEC_V1_NBE
                    Word64 norm_val = 0;
                    move64();
                    Word16 maxL_e = MIN_16;
                    Word16 maxR_e = MIN_16;
                    Word16 maxR2_e = MIN_16;
                    move16();
                    move16();
                    move16();
                    FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
                    {
                        maxL_e = s_max( maxL_e, singularVectors_Left_e[nCh][k] );
                        maxR_e = s_max( maxR_e, sing_right_exp[k][iCh] );
                        maxR2_e = s_max( maxR2_e, sing_right_exp[k][nCh] );
                    }
#else  /* OPT_MCH_DEC_V1_NBE */
                    norm_y = 0;
                    move32();
                    norm_y_e = 0;
                    move16();
#endif /* OPT_MCH_DEC_V1_NBE */

                    FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
                    {
#ifdef OPT_MCH_DEC_V1_NBE
                        norm_val = W_mac_32_32( norm_val, L_shr( singularVectors_Left[nCh][k], sub( maxL_e, singularVectors_Left_e[nCh][k] ) ), L_shr( singularVectors_Right[k][iCh], sub( maxR_e, sing_right_exp[k][iCh] ) ) );
#else  /* OPT_MCH_DEC_V1_NBE */
                        norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[nCh][k], singularVectors_Right[k][iCh] ), add( singularVectors_Left_e[nCh][k], sing_right_exp[k][iCh] ), &norm_y_e );                               /* exp(norm_y_e) */
#endif /* OPT_MCH_DEC_V1_NBE */
                    }
#ifdef OPT_MCH_DEC_V1_NBE
                    norm_y_e = W_norm( norm_val );
                    norm_y = W_extract_h( W_shl( norm_val, norm_y_e ) );
                    norm_y_e = sub( add( maxL_e, maxR_e ), norm_y_e );

                    Word16 max_new = s_max( maxR_e, add( maxR2_e, norm_y_e ) );
#endif                                                      /* OPT_MCH_DEC_V1_NBE */
                    FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
                    {
#ifdef OPT_MCH_DEC_V1_NBE
                        Word32 temp = Mpy_32_32( norm_y, singularVectors_Right[k][nCh] );
                        Word32 op2 = L_shr( temp, sub( max_new, add( norm_y_e, sing_right_exp[k][nCh] ) ) );
                        singularVectors_Right[k][iCh] = L_add_sat( L_shr( singularVectors_Right[k][iCh], sub( max_new, sing_right_exp[k][iCh] ) ), op2 ); /* exp(sing_right_exp) */
                        move32();
                        singularVectors_Right[k][iCh] = L_shl_sat( singularVectors_Right[k][iCh], max_new ); /* Q31 */
#else                                                                                                        /* OPT_MCH_DEC_V1_NBE */
                        singularVectors_Right[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Right[k][iCh], sing_right_exp[k][iCh], Mpy_32_32( norm_y, singularVectors_Right[k][nCh] ), add( norm_y_e, sing_right_exp[k][nCh] ), &sing_right_exp[k][iCh] ); /* exp(sing_right_exp) */
                        move32();
                        singularVectors_Right[k][iCh] = L_shl_sat( singularVectors_Right[k][iCh], sing_right_exp[k][iCh] ); /* Q31 */
#endif                                                                                                       /* OPT_MCH_DEC_V1_NBE */
                        move32();
                        sing_right_exp[k][iCh] = 0;
                        move16();
Loading