Commit 6c5ed9f1 authored by Sandesh Venkatesh's avatar Sandesh Venkatesh
Browse files

Optimization in ISM Dec path - Non bit exact

parent d58c4fe1
Loading
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -81,6 +81,7 @@
/* Note: each compile switch (FIX_1101_...) is independent from the other ones */
#define OPT_MCT_ENC_V2_BE
#define OPT_MCH_DEC_V1_NBE
#define OPT_MASA_DEC_V1_NBE
#define OPT_MCT_ENC_48KB_NBE
#define OPT_MCH_DEC_V1_BE
#define OPT_MCT_ENC_V2_NBE
+119 −2
Original line number Diff line number Diff line
@@ -193,7 +193,6 @@ static void GenerateFilter_fx(
)
{
    Word16 qp, p, k, i;
    Word32 index;
    Word16 AzIdx[HRTF_MODEL_BSPLINE_NUM_COEFFS][HRTF_MODEL_BSPLINE_NUM_COEFFS], EvIdx[HRTF_MODEL_BSPLINE_NUM_COEFFS]; /* non-zero basis functions */
    Word16 num_az_idx[HRTF_MODEL_BSPLINE_NUM_COEFFS];
    Word16 num_ev_idx;
@@ -255,6 +254,16 @@ static void GenerateFilter_fx(
    move16();
    FOR( p = 0; p < num_ev_idx; p++ )
    {
#ifdef OPT_MASA_DEC_V1_NBE
        Word32 expt = L_shl_sat( modelEval->elevBfVec_fx[p], 1 );
        FOR( i = 0; i < num_az_idx[p]; i++ )
        {
            modelEval->BM_fx[qp + i] = Mpy_32_32( expt, modelEval->azimBfVec_fx[p][i] ); /*Q30 - ( Q30 * 2 - 31 )*/ // Q30
            move32();
            BM_idx[qp + i] = add( model->azim_start_idx[EvIdx[p]], AzIdx[p][i] );
            move16();
        }
#else  /* OPT_MASA_DEC_V1_NBE */
        FOR( i = 0; i < num_az_idx[p]; i++ )
        {
            modelEval->BM_fx[add( qp, i )] = L_shl( Mpy_32_32( modelEval->elevBfVec_fx[p], modelEval->azimBfVec_fx[p][i] ), 1 ); /*Q30 - ( Q30 * 2 - 31 )*/ // Q30
@@ -262,12 +271,21 @@ static void GenerateFilter_fx(
            BM_idx[add( qp, i )] = add( model->azim_start_idx[EvIdx[p]], AzIdx[p][i] );
            move16();
        }
#endif /* OPT_MASA_DEC_V1_NBE */
        qp = add( qp, num_az_idx[p] );
    }

#ifdef OPT_MASA_DEC_V1_NBE
    Word16 expL = add( model->AlphaL_e, 1 );
    Word16 expR = add( model->AlphaR_e, 1 );
    BMEnergiesL_e = add( model->EL_e, 2 );
    BMEnergiesR_e = add( model->ER_e, 2 );
#endif /* OPT_MASA_DEC_V1_NBE */

    /* Compute HR filters, approximate optimized model evaluation */
    FOR( iSec = 0; iSec < HRTF_MODEL_N_SECTIONS; iSec++ )
    {
#ifndef OPT_MASA_DEC_V1_NBE
        ETotL = 0;
        move32();
        ETotR = 0;
@@ -288,45 +306,102 @@ static void GenerateFilter_fx(
        BMEnergiesL_e = add( model->EL_e, 2 );
        BMEnergiesR_e = add( model->ER_e, 2 );

#else  /* OPT_MASA_DEC_V1_NBE */
        Word64 temp1 = 0;
        move64();
        Word64 temp2 = 0;
        move64();
#endif /* OPT_MASA_DEC_V1_NBE */
        /* Energy is precalculated part updated with square of BM value. Store index for sorting */
        FOR( i = 0; i < qp; i++ )
        {
#ifdef OPT_MASA_DEC_V1_NBE
            modelEval->BMEnergiesL[i].val_fx = Mpy_32_32( Mpy_32_32( modelEval->BM_fx[i], modelEval->BM_fx[i] ) /*Q29*/, model->EL_fx[( iSec * model->AlphaN ) + BM_idx[i]] ); // exp: model->EL_e + 2
            modelEval->BMEnergiesR[i].val_fx = Mpy_32_32( Mpy_32_32( modelEval->BM_fx[i], modelEval->BM_fx[i] ) /*Q29*/, model->ER_fx[( iSec * model->AlphaN ) + BM_idx[i]] ); // exp: model->ER_e + 2
#else                                                                                                                                                                          /* OPT_MASA_DEC_V1_NBE */
            modelEval->BMEnergiesL[i].val_fx = Mpy_32_32( Mpy_32_32( modelEval->BM_fx[i], modelEval->BM_fx[i] ) /*Q29*/, model->EL_fx[add( i_mult( iSec, model->AlphaN ), BM_idx[i] )] ); // exp: model->EL_e + 2
            modelEval->BMEnergiesR[i].val_fx = Mpy_32_32( Mpy_32_32( modelEval->BM_fx[i], modelEval->BM_fx[i] ) /*Q29*/, model->ER_fx[add( i_mult( iSec, model->AlphaN ), BM_idx[i] )] ); // exp: model->ER_e + 2
#endif                                                                                                                                                                         /* OPT_MASA_DEC_V1_NBE */
            move32();
            move32();
            modelEval->BMEnergiesL[i].i = i;
            move16();
            modelEval->BMEnergiesR[i].i = i;
            move16();

#ifndef OPT_MASA_DEC_V1_NBE
            ETotL = BASOP_Util_Add_Mant32Exp( ETotL, ETotL_e, modelEval->BMEnergiesL[i].val_fx, BMEnergiesL_e, &ETotL_e );
            ETotR = BASOP_Util_Add_Mant32Exp( ETotR, ETotR_e, modelEval->BMEnergiesR[i].val_fx, BMEnergiesR_e, &ETotR_e );
        }
#else  /* OPT_MASA_DEC_V1_NBE */
            temp1 = W_add( temp1, modelEval->BMEnergiesL[i].val_fx );                                                                                                                     // BMEnergiesL_e
            temp2 = W_add( temp2, modelEval->BMEnergiesR[i].val_fx );                                                                                                                     // BMEnergiesR_e
#endif /* OPT_MASA_DEC_V1_NBE */
        }
#ifdef OPT_MASA_DEC_V1_NBE
        ETotL_e = W_norm( temp1 );
        ETotL_e = sub( ETotL_e, 32 );
        ETotL = W_shl_sat_l( temp1, ETotL_e );
        ETotL_e = sub( BMEnergiesL_e, ETotL_e );

        ETotR_e = W_norm( temp2 );
        ETotR_e = sub( ETotR_e, 32 );
        ETotR = W_shl_sat_l( temp2, ETotR_e );
        ETotR_e = sub( BMEnergiesR_e, ETotR_e );
#endif /* OPT_MASA_DEC_V1_NBE */

        /* Number of basis components actually used. */
        p = s_min( HRTF_MODEL_N_CPTS_VAR[iSec], qp );
        SkipSmallest_ValueIndex_fx( modelEval->UseIndsL, modelEval->BMEnergiesL, qp, sub( qp, p ) );
        SkipSmallest_ValueIndex_fx( modelEval->UseIndsR, modelEval->BMEnergiesR, qp, sub( qp, p ) );

#ifndef OPT_MASA_DEC_V1_NBE
        /* Account for lost energy */
        FOR( i = 0; i < p; i++ )
        {
            ESynL = BASOP_Util_Add_Mant32Exp( ESynL, ESynL_e, modelEval->BMEnergiesL[modelEval->UseIndsL[i]].val_fx, BMEnergiesL_e, &ESynL_e );
            ESynR = BASOP_Util_Add_Mant32Exp( ESynR, ESynR_e, modelEval->BMEnergiesR[modelEval->UseIndsR[i]].val_fx, BMEnergiesR_e, &ESynR_e );
        }
#else /* OPT_MASA_DEC_V1_NBE */
        temp1 = 0;
        move64();
        temp2 = 0;
        move64();

        /* Account for lost energy */
        FOR( i = 0; i < p; i++ )
        {
            temp1 = W_add( temp1, modelEval->BMEnergiesL[modelEval->UseIndsL[i]].val_fx ); // BMEnergiesL_e
            temp2 = W_add( temp2, modelEval->BMEnergiesR[modelEval->UseIndsR[i]].val_fx ); // BMEnergiesR_e
        }
        ESynL_e = W_norm( temp1 );
        ESynL_e = sub( ESynL_e, 32 );
        ESynL = W_shl_sat_l( temp1, ESynL_e );
        ESynL_e = sub( BMEnergiesL_e, ESynL_e );

        ESynR_e = W_norm( temp2 );
        ESynR_e = sub( ESynR_e, 32 );
        ESynR = W_shl_sat_l( temp2, ESynR_e );
        ESynR_e = sub( BMEnergiesR_e, ESynR_e );

#endif /* OPT_MASA_DEC_V1_NBE */

        tmp32 = L_deposit_h( BASOP_Util_Divide3232_Scale( ETotL, ESynL, &ScaleL_e ) );
        ScaleL_e = add( ScaleL_e, sub( ETotL_e, ESynL_e ) );
        ScaleL = Sqrt32( tmp32, &ScaleL_e );
#ifdef OPT_MASA_DEC_V1_NBE
        ScaleL_e = sub( ScaleL_e, 1 );
#endif /* OPT_MASA_DEC_V1_NBE */

        tmp32 = L_deposit_h( BASOP_Util_Divide3232_Scale( ETotR, ESynR, &ScaleR_e ) );
        ScaleR_e = add( ScaleR_e, sub( ETotR_e, ESynR_e ) );
        ScaleR = Sqrt32( tmp32, &ScaleR_e );
#ifdef OPT_MASA_DEC_V1_NBE
        ScaleR_e = sub( ScaleR_e, 1 );
#endif /* OPT_MASA_DEC_V1_NBE */

        /* Build using only the most energetic components. */
        FOR( k = model->iSecFirst[iSec]; k <= model->iSecLast[iSec]; k++ )
        {
#ifndef OPT_MASA_DEC_V1_NBE
            modelEval->hrfModL_fx[k] = 0;
            move32();
            modelEval->hrfModR_fx[k] = 0;
@@ -337,6 +412,7 @@ static void GenerateFilter_fx(
            tmp_hrfModR_e = 0;
            move16();

            Word32 index;
            FOR( i = 0; i < p; i++ )
            {
                index = L_add( BM_idx[modelEval->BMEnergiesL[modelEval->UseIndsL[i]].i], imult3216( model->AlphaN, k ) );
@@ -348,6 +424,42 @@ static void GenerateFilter_fx(
                modelEval->hrfModR_fx[k] = BASOP_Util_Add_Mant32Exp( modelEval->hrfModR_fx[k], tmp_hrfModR_e, tmp32, add( model->AlphaR_e, 1 ), &tmp_hrfModR_e );
                move32();
            }
#else /* OPT_MASA_DEC_V1_NBE */
            temp1 = 0;
            move64();
            temp2 = 0;
            move64();

            FOR( i = 0; i < p; i++ )
            {
                temp1 = W_add( temp1, Mpy_32_32( modelEval->BM_fx[modelEval->BMEnergiesL[modelEval->UseIndsL[i]].i], model->AlphaL_fx[BM_idx[modelEval->BMEnergiesL[modelEval->UseIndsL[i]].i] + ( model->AlphaN * k )] ) ); // add(model->AlphaL_e, 1)
                temp2 = W_add( temp2, Mpy_32_32( modelEval->BM_fx[modelEval->BMEnergiesR[modelEval->UseIndsR[i]].i], model->AlphaR_fx[BM_idx[modelEval->BMEnergiesR[modelEval->UseIndsR[i]].i] + ( model->AlphaN * k )] ) ); // add(model->AlphaR_e, 1)
            }

            tmp_hrfModL_e = W_norm( temp1 );
            tmp_hrfModL_e = sub( tmp_hrfModL_e, 32 );
            modelEval->hrfModL_fx[k] = W_shl_sat_l( temp1, tmp_hrfModL_e );
            move32();
            tmp_hrfModL_e = sub( expL, tmp_hrfModL_e );
            if ( temp1 == 0 )
            {
                tmp_hrfModL_e = 0;
                move16();
            }

            tmp_hrfModR_e = W_norm( temp2 );
            tmp_hrfModR_e = sub( tmp_hrfModR_e, 32 );
            modelEval->hrfModR_fx[k] = W_shl_sat_l( temp2, tmp_hrfModR_e );
            move32();
            tmp_hrfModR_e = sub( expR, tmp_hrfModR_e );

            if ( temp2 == 0 )
            {
                tmp_hrfModR_e = 0;
                move16();
            }

#endif /* OPT_MASA_DEC_V1_NBE */
            /* Account for lost energy */
            modelEval->hrfModL_fx[k] = Mpy_32_32( modelEval->hrfModL_fx[k], ScaleL );
            move32();
@@ -355,8 +467,13 @@ static void GenerateFilter_fx(
            move32();

            /* NOTE: Assuming that finally, hrfMod values will be <= 1. Hence making it Q30 */
#ifdef OPT_MASA_DEC_V1_NBE
            modelEval->hrfModL_fx[k] = L_shl( modelEval->hrfModL_fx[k], add( tmp_hrfModL_e, ScaleL_e ) ); // assuming Q30
            modelEval->hrfModR_fx[k] = L_shl( modelEval->hrfModR_fx[k], add( tmp_hrfModR_e, ScaleR_e ) ); // assuming Q30
#else                                                                                                     /* OPT_MASA_DEC_V1_NBE */
            modelEval->hrfModL_fx[k] = L_shl( modelEval->hrfModL_fx[k], sub( add( tmp_hrfModL_e, ScaleL_e ), 1 ) ); // assuming Q30
            modelEval->hrfModR_fx[k] = L_shl( modelEval->hrfModR_fx[k], sub( add( tmp_hrfModR_e, ScaleR_e ), 1 ) ); // assuming Q30
#endif /* OPT_MASA_DEC_V1_NBE */                                                                          //
            move32();
            move32();
        }
+43 −3
Original line number Diff line number Diff line
@@ -292,7 +292,9 @@ void TDREND_firfilt_fx(
    Word32 step_fx /* Q31 */, gain_tmp_fx /* Q31 */, gain_delta_fx /* Q30 */;
    Word16 tmp_e;
    Word64 tmp64_fx;

#ifdef OPT_MASA_DEC_V1_NBE
    Word16 shift = sub( filter_e, 32 );
#endif                                                                                              /* OPT_MASA_DEC_V1_NBE */
    gain_delta_fx = L_sub( Gain_fx, prevGain_fx );                                                  // Q30
    step_fx = L_deposit_h( BASOP_Util_Divide3232_Scale( gain_delta_fx, subframe_length, &tmp_e ) ); // exp(tmp_e)
    tmp_e = sub( tmp_e, Q30 );
@@ -306,12 +308,18 @@ void TDREND_firfilt_fx(
    Copy32( signal_fx + add( sub( subframe_length, filterlength ), 1 ), mem_fx, sub( filterlength, 1 ) ); /* Update memory for next frame */ // Qx

    /* Convolution */
#ifdef OPT_MASA_DEC_V1_NBE
    FOR( i = 0; i < intp_count; i++ )
#else  /* OPT_MASA_DEC_V1_NBE */
    FOR( i = 0; i < subframe_length; i++ )
#endif /* OPT_MASA_DEC_V1_NBE */
    {
        tmp64_fx = 0;
        move64();
#ifndef OPT_MASA_DEC_V1_NBE
        tmp_e = 0;
        move16();
#endif                              /* OPT_MASA_DEC_V1_NBE */
        p_tmp_fx = p_signal_fx + i; // Qx
        p_filter_fx = filter_fx;    // exp(filter_e)

@@ -324,18 +332,50 @@ void TDREND_firfilt_fx(
        }

        // This is done to keep the output Q same as input Q for signal
#ifdef OPT_MASA_DEC_V1_NBE
        tmp_fx = W_shl_sat_l( tmp64_fx, shift ); // Qx
#else                                            /* OPT_MASA_DEC_V1_NBE */
        tmp64_fx = W_shl( tmp64_fx, filter_e );                          // Qx + 32
        tmp_fx = W_extract_h( tmp64_fx );                                // Qx
#endif                                           /* OPT_MASA_DEC_V1_NBE */

        /* Apply linear gain interpolation in case of abrupt gain changes */
        gain_tmp_fx = L_add_sat( gain_tmp_fx, step_fx ); /* Saturating values which just exceeds 1, Q31*/
        signal_fx[i] = Mpy_32_32( tmp_fx, gain_tmp_fx ); // Qx
        move32();
#ifndef OPT_MASA_DEC_V1_NBE
        IF( LT_16( i, intp_count ) )
        {
            v_add_fx( filter_fx, filter_delta_fx, filter_fx, filterlength ); // exp(filter_e)
        }
    }
#else  /* OPT_MASA_DEC_V1_NBE */
        v_add_fx( filter_fx, filter_delta_fx, filter_fx, filterlength ); // exp(filter_e)
    }
    FOR( ; i < subframe_length; i++ )
    {
        tmp64_fx = 0;
        move64();
        p_tmp_fx = p_signal_fx + i; // Qx
        p_filter_fx = filter_fx;    // exp(filter_e)


        FOR( j = 0; j < filterlength; j++ )
        {
            tmp64_fx = W_mac_32_32( tmp64_fx, *p_filter_fx, *p_tmp_fx ); // Qx + (Q31 - filter_e) + 1
            p_filter_fx++;                                               // exp(filter_e)
            p_tmp_fx--;                                                  // Qx
        }

        // This is done to keep the output Q same as input Q for signal
        tmp_fx = W_shl_sat_l( tmp64_fx, shift ); // Qx

        /* Apply linear gain interpolation in case of abrupt gain changes */
        gain_tmp_fx = L_add_sat( gain_tmp_fx, step_fx ); /* Saturating values which just exceeds 1, Q31*/
        signal_fx[i] = Mpy_32_32( tmp_fx, gain_tmp_fx ); // Qx
        move32();
    }
#endif /* OPT_MASA_DEC_V1_NBE */

    return;
}