Commit 3ad19ded authored by Fabian Bauer's avatar Fabian Bauer
Browse files

introduced STAGE2, but still not functional and inactive - implemented speedup...

introduced STAGE2, but still not functional and inactive - implemented speedup for STAGE0 and activated
parent 834a5383
Loading
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -83,8 +83,9 @@
#define FIX_1439_SPEEDUP_Copy_Scale_sig_16_32_no_sat            /*FhG: reduces WMOPS - bit-exact*/
#define FIX_1439_SPEEDUP_stereo_icBWE_dec_fx                    /*FhG: reduces WMOPS - bit-exact*/
#define FIX_1439_SPEEDUP_ivas_swb_tbe_dec_fx                    /*FhG: reduces WMOPS - bit-exact*/
#define FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic              /*FhG: reduces maintenance complexity & reduces WMOPS & prepares another speedup patch*/

#define FIX_1439_SPEEDUP_synthesise_fb_high_band_fx             /*FhG: reduces WMOPS - bit-exact*/

#define FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic              /*FhG: reduces maintenance complexity & reduces WMOPS & prepares STAGE2 patch*/
//#define FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2       /*FhG: reduces WMOPS* /

#endif
+4 −0
Original line number Diff line number Diff line
@@ -3258,7 +3258,11 @@ void interp_code_4over2_fx(
void wb_tbe_extras_reset_synth_fx( Word16 state_lsyn_filt_shb[], Word16 state_lsyn_filt_dwn_shb[], Word16 state_32and48k_WB_upsample[], Word16 state_resamp_HB[] );
void elliptic_bpf_48k_generic_fx(
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2
    int isIVAS;
#endif
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic
    Word16 IsUpsampled3,
    Word16 input_fx[], /* i  : input signal                             Q_input_fx*/
#else
    const Word16 input_fx[], /* i  : input signal                             Q_input_fx*/
+116 −11
Original line number Diff line number Diff line
@@ -6695,7 +6695,11 @@ void wb_tbe_extras_reset_synth_fx(
 *-------------------------------------------------------------------*/

void elliptic_bpf_48k_generic_fx(
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2
    int isIVAS;
#endif
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic
    Word16 IsUpsampled3,
    Word16 input_fx[], /* i  : input signal                             Q_input_fx*/
#else
    const Word16 input_fx[], /* i  : input signal                             Q_input_fx*/
@@ -6742,6 +6746,26 @@ void elliptic_bpf_48k_generic_fx(
    }

#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2
    FOR( i = 0; i < L_FRAME48k; i++ )
    {
        Word64 W_tmpX;
        Word64 W_tmpY;
        W_tmpX = W_mac_16_16( 0, input_fx[i - 4], full_band_bpf_fx[0][4] );
        W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 3], full_band_bpf_fx[0][3] );
        W_tmpY = W_msu_32_16( 0, L_tmp[i - 1], full_band_bpf_fx[3][1] );
        W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 2], full_band_bpf_fx[0][2] );
        W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 2], full_band_bpf_fx[3][2] );
        W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 1], full_band_bpf_fx[0][1] );
        W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 3], full_band_bpf_fx[3][3] );
        W_tmpX = W_mac_16_16( W_tmpX, input_fx[i], full_band_bpf_fx[0][0] );
        W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 4], full_band_bpf_fx[3][4] );
        L_tmp[i] = W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 ) ), 3 ) ); /*Q_input_fx + 11*/
        move32();
    }
#else
    IF( !IsUpsampled3 )
    {
        FOR( i = 0; i < L_FRAME48k; i++ )
        {
            L_tmpX = L_shr( L_mult( input_fx[i - 4], full_band_bpf_fx[0][4] ), 3 );                             /*Q_input_fx + 13 + 1 - 3*/
@@ -6755,6 +6779,24 @@ void elliptic_bpf_48k_generic_fx(
            L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13  -15 +2*/
            move32();
        }
    }
    ELSE
    {
        FOR( i = 0; i < L_FRAME48k; i++ )
        {
            L_tmpX = L_shr( L_mult( input_fx[i - 4], full_band_bpf_fx[0][4] ), 3 );                             /*Q_input_fx + 13 + 1 - 3*/
            //L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 3], full_band_bpf_fx[0][3] ), 3 ), L_tmpX );        /*Q_input_fx + 13 + 1 - 3*/
            L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[3][1] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
            //L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 2], full_band_bpf_fx[0][2] ), 3 ), L_tmpX );        /*Q_input_fx + 13 + 1 - 3*/
            L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[3][2] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
            L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 1], full_band_bpf_fx[0][1] ), 3 ), L_tmpX );        /*Q_input_fx + 13 + 1 - 3*/
            L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[3][3] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
            //L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i], full_band_bpf_fx[0][0] ), 3 ), L_tmpX );            /*Q_input_fx + 13 + 1 - 3*/
            L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13  -15 +2*/
            move32();
        }
    }
#endif
#else
    L_tmpX = L_shr( L_mult( memory_fx[0][0], full_band_bpf_fx[0][4] ), 3 );                             /*Q_input_fx + 13 + 1 - 3*/
    L_tmpX = L_add( L_shr( L_mult( memory_fx[0][1], full_band_bpf_fx[0][3] ), 3 ), L_tmpX );            /*Q_input_fx + 13 + 1 - 3*/
@@ -6827,6 +6869,26 @@ void elliptic_bpf_48k_generic_fx(
    move32();

#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2
    FOR( i = 0; i < L_FRAME48k; i++ )
    {
        Word64 W_tmpX;
        Word64 W_tmpY;
        W_tmpX = W_mac_32_16( 0, L_tmp[i - 4], full_band_bpf_fx[1][4] );
        W_tmpX = W_mac_32_16( W_tmpX, L_tmp[i - 3], full_band_bpf_fx[1][3] );
        W_tmpY = W_msu_32_16( 0, L_tmp2[i - 1], full_band_bpf_fx[4][1] );
        W_tmpX = W_mac_32_16( W_tmpX, L_tmp[i - 2], full_band_bpf_fx[1][2] );
        W_tmpY = W_msu_32_16( W_tmpY, L_tmp2[i - 2], full_band_bpf_fx[4][2] );
        W_tmpX = W_mac_32_16( W_tmpX, L_tmp[i - 1], full_band_bpf_fx[1][1] );
        W_tmpY = W_msu_32_16( W_tmpY, L_tmp2[i - 3], full_band_bpf_fx[4][3] );
        W_tmpX = W_mac_32_16( W_tmpX, L_tmp[i], full_band_bpf_fx[1][0] );
        W_tmpY = W_msu_32_16( W_tmpY, L_tmp2[i - 4], full_band_bpf_fx[4][4] );
        // L_tmp2[i] = W_sat_l( W_add( W_shr( W_tmpX, 3 + 16 ), W_shl( W_tmpY, 2 - 16 ) ) );
        L_tmp2[i] = W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 + 16 ) ), 3 + 16 ) );
        move32();
        L_tmpMax = L_max( L_tmpMax, L_abs( L_tmp2[i] ) );
    }
#else
    FOR( i = 0; i < L_FRAME48k; i++ )
    {
        L_tmpX = L_shr( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[1][4] ), 3 );                              /*Q_input_fx + 11 + 13 - 15 -3*/
@@ -6841,6 +6903,7 @@ void elliptic_bpf_48k_generic_fx(
        move32();
        L_tmpMax = L_max( L_tmpMax, L_abs( L_tmp2[i] ) );
    }
#endif
#else
    L_tmpX = L_shr( Mult_32_16( memory2_fx[1][0], full_band_bpf_fx[1][4] ), 3 );                             /*Q_input_fx + 11 + 13 - 15 -3*/
    L_tmpX = L_add( L_shr( Mult_32_16( memory2_fx[1][1], full_band_bpf_fx[1][3] ), 3 ), L_tmpX );            /*Q_input_fx + 11 + 13 - 15 -3*/
@@ -6925,6 +6988,30 @@ void elliptic_bpf_48k_generic_fx(
        move32();
    }
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2
    FOR( 0 = 4; i < L_FRAME48k; i++ )
    {
        Word64 W_tmpX;
        Word64 W_tmpY;

        W_tmpX = W_mac_32_16( 0, L_tmp2[i - 4], full_band_bpf_fx[2][4] );
        W_tmpX = W_mac_32_16( W_tmpX, L_tmp2[i - 3], full_band_bpf_fx[2][3] );
        W_tmpY = W_msu_32_16( 0, L_output[i - 1], full_band_bpf_fx[5][1] );

        W_tmpX = W_mac_32_16( W_tmpX, L_tmp2[i - 2], full_band_bpf_fx[2][2] );
        W_tmpY = W_msu_32_16( W_tmpY, L_output[i - 2], full_band_bpf_fx[5][2] );

        W_tmpX = W_mac_32_16( W_tmpX, L_tmp2[i - 1], full_band_bpf_fx[2][1] );
        W_tmpY = W_msu_32_16( W_tmpY, L_output[i - 3], full_band_bpf_fx[5][3] );

        W_tmpX = W_mac_32_16( W_tmpX, L_tmp2[i], full_band_bpf_fx[2][0] );
        W_tmpY = W_msu_32_16( W_tmpY, L_output[i - 4], full_band_bpf_fx[5][4] );
        // L_output[i] = W_sat_l( W_add( W_shr( W_tmpX, 3 + 16 ), W_shl( W_tmpY, 2 - 16 ) ) );
        L_output[i] = W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 + 16 ) ), 3 + 16 ) );
        move32();
        L_tmpMax = L_max( L_tmpMax, L_abs( L_output[i] ) );
    }
#else
    FOR( i = 0; i < L_FRAME48k; i++ )
    {
        L_tmpX = L_shr( Mult_32_16( L_tmp2[i - 4], full_band_bpf_fx[2][4] ), 3 );                            /*Q_input_fx + 6 +Q_temp+13 -15 -3 */
@@ -6942,6 +7029,7 @@ void elliptic_bpf_48k_generic_fx(
        move32();
        L_tmpMax = L_max( L_tmpMax, L_abs( L_output[i] ) );
    }
#endif
#else
    L_tmpX = L_shr( Mult_32_16( memory2_fx_2[0], full_band_bpf_fx[2][4] ), 3 );                               /* *Q_input_fx+6 +Q_temp +13 -15 -3 */
    L_tmpX = L_add_sat( L_shr( Mult_32_16( memory2_fx_2[1], full_band_bpf_fx[2][3] ), 3 ), L_tmpX );          /*Q_input_fx + 6 +Q_temp+13 -15 -3*/
@@ -7108,12 +7196,29 @@ void synthesise_fb_high_band_fx(
    IF( EQ_16( L_frame, L_FRAME16k ) )
    {
        /* for 16kHz ACELP core */
        elliptic_bpf_48k_generic_fx( excitation_in_interp3, &exp_tmp, tmp, bpf_memory, bpf_memory_Q, full_band_bpf_3_fx );
        elliptic_bpf_48k_generic_fx(
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2
            1 // isIVAS
#endif
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic
            1, // IsUpsampled3
#endif
            excitation_in_interp3, &exp_tmp, tmp, bpf_memory, bpf_memory_Q, full_band_bpf_3_fx

        );
    }
    ELSE
    {
        /* for 12.8kHz ACELP core */
        elliptic_bpf_48k_generic_fx( excitation_in_interp3, &exp_tmp, tmp, bpf_memory, bpf_memory_Q, full_band_bpf_1_fx );
        elliptic_bpf_48k_generic_fx(
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2
            1 // isIVAS
#endif
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic
            1, // IsUpsampled3
#endif
            excitation_in_interp3, &exp_tmp, tmp, bpf_memory, bpf_memory_Q, full_band_bpf_1_fx
        );
    }
    temp1 = sum2_fx_mod( tmp, L_FRAME48k );

+22 −3
Original line number Diff line number Diff line
@@ -7330,7 +7330,14 @@ void fb_tbe_enc_fx(

    Copy_Scale_sig( new_input, input_fhb, L_FRAME48k, exp_temp );

    elliptic_bpf_48k_generic_fx( input_fhb, &exp_temp, tmp_vec, hBWE_TD->elliptic_bpf_2_48k_mem_fx, hBWE_TD->elliptic_bpf_2_48k_mem_fx_Q, full_band_bpf_2_fx );
    elliptic_bpf_48k_generic_fx(
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2
        1 // isIVAS
#endif
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic
        0, // IsUpsampled3
#endif
        input_fhb, &exp_temp, tmp_vec, hBWE_TD->elliptic_bpf_2_48k_mem_fx, hBWE_TD->elliptic_bpf_2_48k_mem_fx_Q, full_band_bpf_2_fx );
    Sample_Delay_HP = NS2SA( 48000, ACELP_LOOK_NS + DELAY_FD_BWE_ENC_12k8_NS + DELAY_FIR_RESAMPL_NS ) - L_FRAME48k / 2;

    IF( NE_16( st->last_extl, FB_TBE ) )
@@ -7459,11 +7466,23 @@ void fb_tbe_enc_ivas_fx(

    IF( EQ_16( st->element_mode, IVAS_CPE_DFT ) )
    {
        elliptic_bpf_48k_generic_fx( input_fhb_new, &exp_temp, tmp_vec, hBWE_TD->elliptic_bpf_2_48k_mem_fx, hBWE_TD->elliptic_bpf_2_48k_mem_fx_Q, full_band_bpf_2_fx );
        elliptic_bpf_48k_generic_fx( 
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2
        1 // isIVAS
#endif
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic
        0, // IsUpsampled3
#endif
          input_fhb_new, &exp_temp, tmp_vec, hBWE_TD->elliptic_bpf_2_48k_mem_fx, hBWE_TD->elliptic_bpf_2_48k_mem_fx_Q, full_band_bpf_2_fx );
    }
    ELSE
    {
        elliptic_bpf_48k_generic_fx( input_fhb_new + NS2SA( 48000, DELAY_FIR_RESAMPL_NS ), &exp_temp, tmp_vec, hBWE_TD->elliptic_bpf_2_48k_mem_fx, hBWE_TD->elliptic_bpf_2_48k_mem_fx_Q, full_band_bpf_2_fx );
        elliptic_bpf_48k_generic_fx( input_fhb_new + NS2SA( 48000, DELAY_FIR_RESAMPL_NS ), &exp_temp, tmp_vec, hBWE_TD->elliptic_bpf_2_48k_mem_fx, hBWE_TD->elliptic_bpf_2_48k_mem_fx_Q, full_band_bpf_2_fx 
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2
    , 1 // IsUpsampled3
    , 1 // isIVAS
#endif 
        );
    }

    test();