Commit 291c363a authored by Sandesh Venkatesh's avatar Sandesh Venkatesh
Browse files

LTV for optimizations of hp20

parent 0df52ff5
Loading
Loading
Loading
Loading
Loading
+30 −64
Original line number Diff line number Diff line
@@ -458,14 +458,8 @@ void hp20_fx_32(
{
    Word16 i;
    Word32 a1_fx, a2_fx, b1_fx, b2_fx;
#ifdef OPT_STEREO_32KBPS_V1
    Word16 Qy1, Qy2, Qmin;
    Word64 y0_fx64, y1_fx64, y2_fx64;
    Word32 x0, x1, x2;
#else  /* OPT_STEREO_32KBPS_V1 */
    Word16 Qx0, Qx1, Qx2, Qy1, Qprev_y1, Qy2, Qprev_y2, Qmin;
    Word64 x0_fx64, x1_fx64, x2_fx64, y0_fx64, y1_fx64, y2_fx64, R1, R2, R3, R4, R5;
#endif /* OPT_STEREO_32KBPS_V1 */

    IF( EQ_32( Fs, 8000 ) )
    {
@@ -516,64 +510,16 @@ void hp20_fx_32(
    move32();
    move32();

#ifdef OPT_STEREO_32KBPS_V1
    y1_fx64 = W_add( W_deposit32_l( mem_fx[0] ), W_deposit32_h( mem_fx[1] ) );
    y2_fx64 = W_add( W_deposit32_l( mem_fx[2] ), W_deposit32_h( mem_fx[3] ) );

    x0 = mem_fx[4];
    move32();
    x1 = mem_fx[5];
    move32();
#else  /* OPT_STEREO_32KBPS_V1 */
    Qprev_y1 = extract_l( mem_fx[4] );
    Qprev_y2 = extract_l( mem_fx[5] );
    y1_fx64 = W_deposit32_l( mem_fx[0] );
    y2_fx64 = W_deposit32_l( mem_fx[1] );
    x0_fx64 = W_deposit32_l( mem_fx[2] );
    x1_fx64 = W_deposit32_l( mem_fx[3] );
#endif /* OPT_STEREO_32KBPS_V1 */

    FOR( i = 0; i < lg; i++ )
    {
#ifdef OPT_STEREO_32KBPS_V1
        x2 = x1;
        move32();
        x1 = x0;
        move32();
        x0 = signal_fx[i];
        move32();

        Qy1 = W_norm( y1_fx64 );
        if ( y1_fx64 == 0 )
        {
            Qy1 = 62;
            move16();
        }

        Qy2 = W_norm( y2_fx64 );
        if ( y2_fx64 == 0 )
        {
            Qy2 = 62;
            move16();
        }

        Qmin = s_min( Qy1, Qy2 );

        Qmin = sub( Qmin, 34 );

        y0_fx64 = W_mac_32_32( W_mult_32_32( W_shl_sat_l( y1_fx64, Qmin ), a1_fx ), W_shl_sat_l( y2_fx64, Qmin ), a2_fx ); // Qmin + Q29 + Q30 + 1

        Word64 temp = W_mac_32_32( W_mac_32_32( W_mult_32_32( x2, b2_fx ), x1, b1_fx ), x0, b2_fx ); // Q30
        Word64 y0_fx = W_shr( y0_fx64, add( Qmin, Q30 ) );                                           // Q30
        y0_fx64 = W_add( temp, y0_fx );                                                              // Q30
        signal_fx[i] = W_shl_sat_l( y0_fx64, -Q30 );
        move32();

        y2_fx64 = y1_fx64;
        move64();
        y1_fx64 = y0_fx64;
        move64();
#else  /* OPT_STEREO_32KBPS_V1 */
        x2_fx64 = x1_fx64;
        move64();
        x1_fx64 = x0_fx64;
@@ -587,7 +533,11 @@ void hp20_fx_32(
            move16();
        }
        Qy1 = sub( Qy1, 34 );
#ifdef OPT_STEREO_32KBPS_V1
        R1 = W_mult0_32_32( W_shl_sat_l( y1_fx64, Qy1 ), a1_fx );
#else /* OPT_STEREO_32KBPS_V1 */
        R1 = W_mult0_32_32( W_extract_l( W_shl( y1_fx64, Qy1 ) ), a1_fx );
#endif /* OPT_STEREO_32KBPS_V1 */
        Qy1 = add( Qy1, Qprev_y1 );

        Qy2 = W_norm( y2_fx64 );
@@ -597,7 +547,11 @@ void hp20_fx_32(
            move16();
        }
        Qy2 = sub( Qy2, 34 );
#ifdef OPT_STEREO_32KBPS_V1
        R2 = W_mult0_32_32( W_shl_sat_l( y2_fx64, Qy2 ), a2_fx );
#else /* OPT_STEREO_32KBPS_V1 */
        R2 = W_mult0_32_32( W_extract_l( W_shl( y2_fx64, Qy2 ) ), a2_fx );
#endif /* OPT_STEREO_32KBPS_V1 */
        Qy2 = add( Qy2, Qprev_y2 );

        Qx0 = W_norm( x0_fx64 );
@@ -607,7 +561,9 @@ void hp20_fx_32(
            move16();
        }
        Qx0 = sub( Qx0, 34 );
#ifndef OPT_STEREO_32KBPS_V1
        R3 = W_mult0_32_32( W_extract_l( W_shl( x0_fx64, Qx0 ) ), b2_fx );
#endif /* OPT_STEREO_32KBPS_V1 */

        Qx1 = W_norm( x1_fx64 );
        if ( x1_fx64 == 0 )
@@ -616,7 +572,9 @@ void hp20_fx_32(
            move16();
        }
        Qx1 = sub( Qx1, 34 );
#ifndef OPT_STEREO_32KBPS_V1
        R4 = W_mult0_32_32( W_extract_l( W_shl( x1_fx64, Qx1 ) ), b1_fx );
#endif /* OPT_STEREO_32KBPS_V1 */

        Qx2 = W_norm( x2_fx64 );
        if ( x2_fx64 == 0 )
@@ -625,6 +583,7 @@ void hp20_fx_32(
            move16();
        }
        Qx2 = sub( Qx2, 34 );
#ifndef OPT_STEREO_32KBPS_V1
        R5 = W_mult0_32_32( W_extract_l( W_shl( x2_fx64, Qx2 ) ), b2_fx );

        Qmin = s_min( Qy1, Qy2 );
@@ -632,20 +591,37 @@ void hp20_fx_32(
        y0_fx64 = W_add( W_shr( R1, sub( Qy1, Qmin ) ), W_shr( R2, sub( Qy2, Qmin ) ) );

        Qmin = s_min( Qmin, Qx0 );
#else /* OPT_STEREO_32KBPS_V1 */

        Word16 Qy = s_min(Qy1, Qy2);

        y0_fx64 = W_add(W_shr(R1, sub(Qy1, Qy)), W_shr(R2, sub(Qy2, Qy)));

        Qmin = s_min( Qy, Qx0 );
#endif /* OPT_STEREO_32KBPS_V1 */
        Qmin = s_min( Qmin, Qx1 );
        Qmin = s_min( Qmin, Qx2 );

#ifdef OPT_STEREO_32KBPS_V1
        R3 = W_mult0_32_32( W_shl_sat_l( x0_fx64, Qmin ), b2_fx );
        R4 = W_mult0_32_32( W_shl_sat_l( x1_fx64, Qmin ), b1_fx );
        R5 = W_mult0_32_32( W_shl_sat_l( x2_fx64, Qmin ), b2_fx );
        y0_fx64 = W_add( W_shr( y0_fx64, sub( Qy, Qmin ) ), W_add(R3, W_add( R4, R5 ) ) );
#else /* OPT_STEREO_32KBPS_V1 */
        y0_fx64 = W_add( W_shr( y0_fx64, sub( s_min( Qy1, Qy2 ), Qmin ) ), W_add( W_shr( R3, sub( Qx0, Qmin ) ), W_add( W_shr( R4, sub( Qx1, Qmin ) ), W_shr( R5, sub( Qx2, Qmin ) ) ) ) );
#endif /* OPT_STEREO_32KBPS_V1 */

        y0_fx64 = W_shr( y0_fx64, 29 );

        signal_fx[i] = W_extract_l( W_shr( y0_fx64, Qmin ) );
        move32();
#ifndef OPT_STEREO_32KBPS_V1
        IF( signal_fx[i] < 0 )
        {
            signal_fx[i] = L_add( signal_fx[i], 1 );
            move32();
        }
#endif /* OPT_STEREO_32KBPS_V1 */

        y2_fx64 = y1_fx64;
        y1_fx64 = y0_fx64;
@@ -655,17 +631,8 @@ void hp20_fx_32(
        move64();
        move16();
        move16();
#endif /* OPT_STEREO_32KBPS_V1 */
    }

#ifdef OPT_STEREO_32KBPS_V1
    mem_fx[0] = W_extract_l( y1_fx64 );
    mem_fx[1] = W_extract_h( y1_fx64 );
    mem_fx[2] = W_extract_l( y2_fx64 );
    mem_fx[3] = W_extract_h( y2_fx64 );
    mem_fx[4] = x0;
    mem_fx[5] = x1;
#else  /* OPT_STEREO_32KBPS_V1 */
    Qy1 = W_norm( y1_fx64 );
    test();
    IF( y1_fx64 != 0 && LT_16( Qy1, 32 ) )
@@ -688,7 +655,6 @@ void hp20_fx_32(
    mem_fx[3] = W_extract_l( x1_fx64 );
    mem_fx[4] = Qprev_y1;
    mem_fx[5] = Qprev_y2;
#endif /* OPT_STEREO_32KBPS_V1 */

    move32();
    move32();
+23 −8
Original line number Diff line number Diff line
@@ -128,18 +128,26 @@ void init_lvq_fx(
    FOR( i = 0; i < MAX_NO_MODES; i++ )
    {
#ifdef OPT_STEREO_32KBPS_V1
        FOR( ( j = 0, k = 0 ); j < MAX_NO_SCALES; ( j++, k++ ) )
        FOR( ( j = 0, k = 0 ); j < MAX_NO_SCALES; j++ )
        {
            if ( ( no_lead_fx[i][j] <= 0 ) )
            if ( no_lead_fx[i][j] > 0 )
            {
                j = MAX_NO_SCALES;
                k++;
            }
            if ( no_lead_fx[i][j] <= 0 )
            {
                j = MAX_NO_SCALES - 1;
            }
        }
        no_scales[i][0] = k;
        move16();

        FOR( k = 0; j < MAX_NO_SCALES << 1; ( j++, k++ ) )
        FOR( k = 0; j < MAX_NO_SCALES << 1; j++ )
        {
            if ( no_lead_fx[i][j] > 0 )
            {
                k++;
            }
            if ( no_lead_fx[i][j] <= 0 )
            {
                j = MAX_NO_SCALES << 1;
@@ -172,19 +180,26 @@ void init_lvq_fx(
    FOR( i = 0; i < MAX_NO_MODES_p; i++ )
    {
#ifdef OPT_STEREO_32KBPS_V1
        FOR( ( j = 0, k = 0 ); j < MAX_NO_SCALES; ( j++, k++ ) )
        FOR( ( j = 0, k = 0 ); j < MAX_NO_SCALES; j++ )
        {

            if ( no_lead_p_fx[i][j] > 0 )
            {
                k++;
            }
            if ( ( no_lead_p_fx[i][j] <= 0 ) )
            {
                j = MAX_NO_SCALES;
                j = MAX_NO_SCALES - 1;
            }
        }
        no_scales_p[i][0] = k;
        move16();

        FOR( k = 0; j < MAX_NO_SCALES << 1; ( j++, k++ ) )
        FOR( k = 0; j < MAX_NO_SCALES << 1; j++ )
        {
            if ( no_lead_p_fx[i][j] > 0 )
            {
                k++;
            }

            if ( ( no_lead_p_fx[i][j] <= 0 ) )
            {
+1 −1
Original line number Diff line number Diff line
@@ -68,7 +68,7 @@
#endif

/* Note: each compile switch (FIX_1101_...) is independent from the other ones */
//#define OPT_STEREO_32KBPS_V1                    /* Optimization made in stereo decoding path for 32kbps decoding */
#define OPT_STEREO_32KBPS_V1                    /* Optimization made in stereo decoding path for 32kbps decoding */
#define OPT_AVOID_STATE_BUF_RESCALE             /* Optimization made to avoid rescale of synth state buffer */
#define FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx                 /*FhG: WMOPS tuning, nonbe*/
#define FIX_1310_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot   /*FhG: WMOPS tuning, nonbe*/
+35 −4
Original line number Diff line number Diff line
@@ -666,14 +666,45 @@ ivas_error front_vad_spar_fx(

        old_pitch = st->pitch[1];
        move16();
        Scale_sig( wsp_fx, 368, sub( Q8, Q_inp_12k8 ) ); // Q8
        pitch_ol_ivas_fx( st->pitch, st->voicing_fx, &st->old_pitch, &st->old_corr_fx, corr_shift_fx, &st->old_thres_fx, &st->delta_pit, st->old_wsp2_fx, wsp_fx, st->mem_decim2_fx, relE_fx, st->clas, st->input_bwidth, st->Opt_SC_VBR, Q8 );
        Word16 shift, Q_wsp;
        Word16 shift1 = norm_arr( old_wsp_fx, L_WSP_MEM );
        Word16 shift2 = norm_arr( wsp_fx, L_WSP - L_WSP_MEM );
        maximum_abs_16_fx( old_wsp_fx, L_WSP_MEM, &shift );
        if ( !shift )
        {
            shift1 = Q15;
            move16();
        }
        maximum_abs_16_fx( wsp_fx, L_WSP - L_WSP_MEM, &shift );
        if ( !shift )
        {
            shift2 = Q15;
            move16();
        }

        shift = s_min( Q15, s_min( add( Q_inp_12k8, shift1 ), add( Q_inp_12k8, shift2 ) ) );
        shift = s_min( shift, add( norm_arr( st->mem_decim2_fx, 3 ), st->Q_old_wsp2 ) );
        shift = s_min( shift, add( norm_arr( st->old_wsp2_fx, ( L_WSP_MEM - L_INTERPOL ) / OPL_DECIM ), st->Q_old_wsp2 ) );

        scale_sig( old_wsp_fx, L_WSP_MEM, sub( shift, Q_inp_12k8 ) );
        scale_sig( wsp_fx, L_WSP - L_WSP_MEM, sub( shift, Q_inp_12k8 ) );

        Q_wsp = shift;
        move16();

        scale_sig( st->mem_decim2_fx, 3, sub( Q_wsp, st->Q_old_wsp2 ) );                                    // Q( mem_decim ) = Q( old_wsp2 )
        scale_sig( st->old_wsp2_fx, ( L_WSP_MEM - L_INTERPOL ) / OPL_DECIM, sub( Q_wsp, st->Q_old_wsp2 ) ); // Q_wsp

        st->Q_old_wsp2 = Q_wsp;
        move16();

        pitch_ol_ivas_fx( st->pitch, st->voicing_fx, &st->old_pitch, &st->old_corr_fx, corr_shift_fx, &st->old_thres_fx, &st->delta_pit, st->old_wsp2_fx, wsp_fx, st->mem_decim2_fx, relE_fx, st->clas, st->input_bwidth, st->Opt_SC_VBR, Q_wsp );

        /* Updates for adaptive lag window memory */
        st->old_pitch_la = st->pitch[2]; /* Q0 */
        move16();
        Scale_sig( wsp_fx, 368, Q9 - Q8 ); /* Q9 */
        StableHighPitchDetect_ivas_fx( &flag_spitch, st->pitch, st->voicing_fx, wsp_fx, st->localVAD, &st->voicing_sm_fx, &st->voicing0_sm_fx, &st->LF_EnergyRatio_sm_fx, &st->predecision_flag, &st->diff_sm_fx, &st->energy_sm_fx, Q12, st->lgBin_E_fx );

        StableHighPitchDetect_ivas_fx( &flag_spitch, st->pitch, st->voicing_fx, wsp_fx, st->localVAD, &st->voicing_sm_fx, &st->voicing0_sm_fx, &st->LF_EnergyRatio_sm_fx, &st->predecision_flag, &st->diff_sm_fx, &st->energy_sm_fx, Q_wsp, st->lgBin_E_fx );
        IF( st->hSpMusClas != NULL )
        {
            Word16 dummy_int;
+15 −16
Original line number Diff line number Diff line
@@ -230,6 +230,7 @@ static void ivas_binaural_reverb_setReverbTimes_fx(
    Word16 tmp, tmp_exp, scale, tmpVal_exp, attenuationFactorPerSample_exp, attenuationFactorPerSampleSq_exp, energyBuildup_exp, currentEnergy_exp, intendedEnergy_exp, actualizedEnergy_exp;
    Word16 sine_inp, norm, div_exp1, div1, sine, binCenterFreq_exp;
    Word16 reverb_exp = 0;
    Word32 tmp32;
    move16();

    hReverb->binRend_RandNext = (UWord16) BIN_REND_RANDOM_SEED;
@@ -376,14 +377,13 @@ static void ivas_binaural_reverb_setReverbTimes_fx(

                UWord16 ret_binRend = binRend_rand( hReverb );

                tmp = BASOP_Util_Divide3232_Scale( ret_binRend, PCM16_TO_FLT_FAC_FX, &tmp_exp );
                L_tmp = BASOP_Util_Add_Mant32Exp( L_deposit_h( tmp ), tmp_exp, L_negate( 1073741824 ), 0, &exp );
                L_tmp = Mpy_32_32( L_tmp, 214748364 ); // exp + 0
                tmp32 = BASOP_Util_Divide3232_Scale_cadence( ret_binRend, PCM16_TO_FLT_FAC_FX, &tmp_exp );
                L_tmp = BASOP_Util_Add_Mant32Exp( tmp32, tmp_exp, L_negate( 1073741824 ), 0, &exp );
                L_tmp = Mpy_32_32( L_tmp, 214748365 ); // exp + 0
                L_tmp = BASOP_Util_Add_Mant32Exp( L_tmp, exp, currentEnergy_fx, currentEnergy_exp, &exp );
                energyBuildup_fx = BASOP_Util_Add_Mant32Exp( energyBuildup_fx, energyBuildup_exp, L_tmp, exp, &energyBuildup_exp );
                IF( energyBuildup_fx >= 0 ) /* A new filter tap is added at this condition */
                {
                    IF( ( BASOP_Util_Cmp_Mant32Exp( energyBuildup_fx, energyBuildup_exp, 1, 31 ) > 0 ) )

                IF( ( BASOP_Util_Cmp_Mant32Exp( energyBuildup_fx, energyBuildup_exp, 1, 31 ) >= 0 ) )
                {
                    /* Four efficient phase operations: n*pi/2, n=0,1,2,3 */
                    hReverb->tapPhaseShiftType[bin][ch][tap] = (Word16) ( binRend_rand( hReverb ) % 4 );
@@ -399,7 +399,6 @@ static void ivas_binaural_reverb_setReverbTimes_fx(

                    actualizedEnergy_fx = BASOP_Util_Add_Mant32Exp( actualizedEnergy_fx, actualizedEnergy_exp, 1073741824, 1, &actualizedEnergy_exp );
                }
                }

                currentEnergy_fx = BASOP_Util_Add_Mant32Exp( currentEnergy_fx, currentEnergy_exp, 0, 0, &currentEnergy_exp );
                currentEnergy_fx = Mpy_32_32( currentEnergy_fx, attenuationFactorPerSampleSq_fx );