Commit 2d6e3ac7 authored by Fabian Bauer's avatar Fabian Bauer
Browse files

Merge branch...

Merge branch '1439-complexity-overhead-stereo-to-stereo-32kbps-fb-float-vs-basop-nonBE' of ssh://forge.3gpp.org:29419/sa4/audio/ivas-basop into 1439-complexity-overhead-stereo-to-stereo-32kbps-fb-float-vs-basop-nonBE - maybe it doesnt work
parents eeb01697 e1d19cec
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -72,6 +72,11 @@
#define FIX_1378_ACELP_OUT_OF_BOUNDS

/* Note: each compile switch (FIX_1101_...) is independent from the other ones */

#define DIV32_OPT_NEWTON                               /* FhG: faster 32 by 32 bit division */ 
#define	MERGE_REQ


//#define OPT_STEREO_32KBPS_V1                    /* Optimization made in stereo decoding path for 32kbps decoding */
#define OPT_AVOID_STATE_BUF_RESCALE             /* Optimization made to avoid rescale of synth state buffer */
#define FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx                 /*FhG: WMOPS tuning, nonbe*/
+149 −25
Original line number Diff line number Diff line
@@ -6881,11 +6881,18 @@ void elliptic_bpf_48k_generic_fx(
#else
    FOR( i = 0; i < 4; i++ )
    {
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic
        memory_fx0 = extract_l( memory_fx2[0][i] );
        input_fx[i - 4] = shl_sat( memory_fx0, sub( *Q_input_fx, memory_fx_Q[0] ) );
#else
        memory_fx0[0][i] = extract_l( memory_fx2[0][i] );
        memory_fx[0][i] = shl_sat( memory_fx0[0][i], sub( *Q_input_fx, memory_fx_Q[0] ) );
        memory2_fx[1][i] = L_shl_sat( memory_fx2[1][i], sub( add( *Q_input_fx, 11 ), memory_fx_Q[1] ) );
        memory2_fx[2][i] = L_shl_sat( memory_fx2[2][i], sub( add( *Q_input_fx, 6 ), memory_fx_Q[2] ) );
        memory2_fx[3][i] = L_shl_sat( memory_fx2[3][i], sub( add( *Q_input_fx, 1 ), memory_fx_Q[3] ) );
#endif
        L_tmp[i - 4] = L_shl_sat( memory_fx2[1][i], sub( add( *Q_input_fx, 11 ), memory_fx_Q[1] ) );
        L_tmp2[i - 4] = L_shl_sat( memory_fx2[2][i], sub( add( *Q_input_fx, 6 ), memory_fx_Q[2] ) );
        // memory2_fx[3][i] = L_shl_sat( memory_fx2[3][i], sub( add( *Q_input_fx, 1 ), memory_fx_Q[3] ) );
        move32();
        move32();
        move32();
        move32();
        move32();
@@ -7020,6 +7027,37 @@ void elliptic_bpf_48k_generic_fx(
    L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( memory2_fx[1][2], full_band_bpf_fx[3][2] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
    L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( memory2_fx[1][1], full_band_bpf_fx[3][3] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
    L_tmp[0] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( memory2_fx[1][0], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13  -15 +2*/
    move32();

            L_tmpX = L_shr( L_mult( input_fx[i - 4], full_band_bpf_fx[0][4] ), 3 );                             /*Q_input_fx + 13 + 1 - 3*/
            L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[3][1] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
            L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[3][2] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
            L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 1], full_band_bpf_fx[0][1] ), 3 ), L_tmpX );        /*Q_input_fx + 13 + 1 - 3*/
            L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[3][3] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
            L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13  -15 +2*/
            move32();
            i++;

    L_tmpX = L_shr( L_mult( memory_fx[0][2], full_band_bpf_fx[0][4] ), 3 );                                 /*Q_input_fx + 13 + 1 - 3*/
    L_tmpX = L_add( L_shr( L_mult( memory_fx[0][3], full_band_bpf_fx[0][3] ), 3 ), L_tmpX );                /*Q_input_fx + 13 + 1 - 3*/
    L_tmpX = L_add( L_shr( L_mult( input_fx[0], full_band_bpf_fx[0][2] ), 3 ), L_tmpX );                    /*Q_input_fx + 13 + 1 - 3*/
    L_tmpX = L_add( L_shr( L_mult( input_fx[1], full_band_bpf_fx[0][1] ), 3 ), L_tmpX );                    /*Q_input_fx + 13 + 1 - 3*/
    L_tmpX = L_add( L_shr( L_mult( input_fx[2], full_band_bpf_fx[0][0] ), 3 ), L_tmpX );                    /*Q_input_fx + 13 + 1 - 3*/
    L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[1], full_band_bpf_fx[3][1] ), 2 ) );           /*Q_input_fx + 11 + 13  -15 +2*/
    L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[0], full_band_bpf_fx[3][2] ), 2 ) );           /*Q_input_fx + 11 + 13  -15 +2*/
    L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( memory2_fx[1][3], full_band_bpf_fx[3][3] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
    L_tmp[2] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( memory2_fx[1][2], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13  -15 +2*/
    move32();

    L_tmpX = L_shr( L_mult( memory_fx[0][3], full_band_bpf_fx[0][4] ), 3 );                                 /*Q_input_fx + 13 + 1 - 3*/
    L_tmpX = L_add( L_shr( L_mult( input_fx[0], full_band_bpf_fx[0][3] ), 3 ), L_tmpX );                    /*Q_input_fx + 13 + 1 - 3*/
    L_tmpX = L_add( L_shr( L_mult( input_fx[1], full_band_bpf_fx[0][2] ), 3 ), L_tmpX );                    /*Q_input_fx + 13 + 1 - 3*/
    L_tmpX = L_add( L_shr( L_mult( input_fx[2], full_band_bpf_fx[0][1] ), 3 ), L_tmpX );                    /*Q_input_fx + 13 + 1 - 3*/
    L_tmpX = L_add( L_shr( L_mult( input_fx[3], full_band_bpf_fx[0][0] ), 3 ), L_tmpX );                    /*Q_input_fx + 13 + 1 - 3*/
    L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[2], full_band_bpf_fx[3][1] ), 2 ) );           /*Q_input_fx + 11 + 13  -15 +2*/
    L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[1], full_band_bpf_fx[3][2] ), 2 ) );           /*Q_input_fx + 11 + 13  -15 +2*/
    L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[0], full_band_bpf_fx[3][3] ), 2 ) );           /*Q_input_fx + 11 + 13  -15 +2*/
    L_tmp[3] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( memory2_fx[1][3], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13  -15 +2*/
    move32();

    L_tmpX = L_shr( L_mult( memory_fx[0][1], full_band_bpf_fx[0][4] ), 3 );                             /*Q_input_fx + 13 + 1 - 3*/
@@ -7028,9 +7066,9 @@ void elliptic_bpf_48k_generic_fx(
    L_tmpX = L_add( L_shr( L_mult( input_fx[0], full_band_bpf_fx[0][1] ), 3 ), L_tmpX );                /*Q_input_fx + 13 + 1 - 3*/
    L_tmpX = L_add( L_shr( L_mult( input_fx[1], full_band_bpf_fx[0][0] ), 3 ), L_tmpX );                /*Q_input_fx + 13 + 1 - 3*/
    L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[0], full_band_bpf_fx[3][1] ), 2 ) );       /*Q_input_fx + 11 + 13  -15 +2*/
    L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( memory2_fx[1][3], full_band_bpf_fx[3][2] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
    L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( memory2_fx[1][2], full_band_bpf_fx[3][3] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
    L_tmp[1] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( memory2_fx[1][1], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13  -15 +2*/
    L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[3 - 4], full_band_bpf_fx[3][2] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
    L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[2 - 4], full_band_bpf_fx[3][3] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
    L_tmp[1] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[1 - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13  -15 +2*/
    move32();

    L_tmpX = L_shr( L_mult( memory_fx[0][2], full_band_bpf_fx[0][4] ), 3 );                             /*Q_input_fx + 13 + 1 - 3*/
@@ -7040,8 +7078,8 @@ void elliptic_bpf_48k_generic_fx(
    L_tmpX = L_add( L_shr( L_mult( input_fx[2], full_band_bpf_fx[0][0] ), 3 ), L_tmpX );                /*Q_input_fx + 13 + 1 - 3*/
    L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[1], full_band_bpf_fx[3][1] ), 2 ) );       /*Q_input_fx + 11 + 13  -15 +2*/
    L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[0], full_band_bpf_fx[3][2] ), 2 ) );       /*Q_input_fx + 11 + 13  -15 +2*/
    L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( memory2_fx[1][3], full_band_bpf_fx[3][3] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
    L_tmp[2] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( memory2_fx[1][2], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13  -15 +2*/
    L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[3 - 4], full_band_bpf_fx[3][3] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
    L_tmp[2] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[2 - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13  -15 +2*/
    move32();

    L_tmpX = L_shr( L_mult( memory_fx[0][3], full_band_bpf_fx[0][4] ), 3 );                             /*Q_input_fx + 13 + 1 - 3*/
@@ -7052,8 +7090,10 @@ void elliptic_bpf_48k_generic_fx(
    L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[2], full_band_bpf_fx[3][1] ), 2 ) );       /*Q_input_fx + 11 + 13  -15 +2*/
    L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[1], full_band_bpf_fx[3][2] ), 2 ) );       /*Q_input_fx + 11 + 13  -15 +2*/
    L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[0], full_band_bpf_fx[3][3] ), 2 ) );       /*Q_input_fx + 11 + 13  -15 +2*/
    L_tmp[3] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( memory2_fx[1][3], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13  -15 +2*/
    L_tmp[3] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[3 - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13  -15 +2*/
    move32();


    FOR( i = 4; i < L_FRAME48k; i++ )
    {
        L_tmpX = L_shr( L_mult( input_fx[i - 4], full_band_bpf_fx[0][4] ), 3 );                             /*Q_input_fx + 13 + 1 - 3*/
@@ -7168,19 +7208,19 @@ void elliptic_bpf_48k_generic_fx(
    L_tmp2[3] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( memory2_fx[2][3], full_band_bpf_fx[4][4] ), 2 ) ); /*Q_input_fx + 6 +13 -15 +2 */ /*14 + Q_input_fx - shift_flag*/
    move32();
    L_tmpMax = L_max( L_tmpMax, L_abs( L_tmp2[3] ) );

    FOR( i = 4; i < L_FRAME48k; i++ )
    {
        L_tmpX = L_shr( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[1][4] ), 3 );                              /*Q_input_fx + 11 + 13 - 15 -3*/
        L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[1][3] ), 3 ), L_tmpX );         /*Q_input_fx + 11 + 13 - 15 -3*/
        L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp2[i - 1], full_band_bpf_fx[4][1] ), 2 ) );    /*Q_input_fx + 6 +13 -15 +2 */
        L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[1][2] ), 3 ), L_tmpX );         /*Q_input_fx + 11 + 13 - 15 -3*/
        L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp2[i - 2], full_band_bpf_fx[4][2] ), 2 ) );    /*Q_input_fx + 6 +13 -15 +2 */
        L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[1][1] ), 3 ), L_tmpX );         /*Q_input_fx + 11 + 13 - 15 -3*/
        L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp2[i - 3], full_band_bpf_fx[4][3] ), 2 ) );    /*Q_input_fx + 6 +13 -15 +2 */
        L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp[i], full_band_bpf_fx[1][0] ), 3 ), L_tmpX );             /*Q_input_fx + 11 + 13 - 15 -3*/
        L_tmp2[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp2[i - 4], full_band_bpf_fx[4][4] ), 2 ) ); /*Q_input_fx + 6 +13 -15 +2 */
        L_tmpX = L_shr( L_mult( input_fx[i - 4], full_band_bpf_fx[0][4] ), 3 );                             /*Q_input_fx + 13 + 1 - 3*/
        L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 3], full_band_bpf_fx[0][3] ), 3 ), L_tmpX );        /*Q_input_fx + 13 + 1 - 3*/
        L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[3][1] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
        L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 2], full_band_bpf_fx[0][2] ), 3 ), L_tmpX );        /*Q_input_fx + 13 + 1 - 3*/
        L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[3][2] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
        L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 1], full_band_bpf_fx[0][1] ), 3 ), L_tmpX );        /*Q_input_fx + 13 + 1 - 3*/
        L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[3][3] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
        L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i], full_band_bpf_fx[0][0] ), 3 ), L_tmpX );            /*Q_input_fx + 13 + 1 - 3*/
        L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13  -15 +2*/
        move32();
        L_tmpMax = L_max( L_tmpMax, L_abs( L_tmp2[i] ) );
    }
#endif

@@ -7258,12 +7298,51 @@ void elliptic_bpf_48k_generic_fx(
#else  /*FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic*/
    FOR( j = 0; j < 4; j++ )
    {
        memory2_fx_2[j] = L_shl_sat( memory_fx2[2][j], sub( add( add( *Q_input_fx, 6 ), Q_temp ), memory_fx_Q[2] ) );
        memory2_fx_3[j] = L_shl_sat( memory_fx2[3][j], sub( add( add( *Q_input_fx, 1 ), Q_temp ), memory_fx_Q[3] ) );
        L_tmp2[j - 4] = L_shl_sat( memory_fx2[2][j], sub( add( add( *Q_input_fx, 6 ), Q_temp ), memory_fx_Q[2] ) );
        L_output[j - 4] = L_shl_sat( memory_fx2[3][j], sub( add( add( *Q_input_fx, 1 ), Q_temp ), memory_fx_Q[3] ) );
        move32();
        move32();
        move32();
    }
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic
    L_tmpMax = L_add( 0, 0 );
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_func1 /*use subfunc*/
    elliptic_bpf_48k_generic_func1( L_tmp2, L_output, &full_band_bpf_fx[2], 0, &L_tmpMax );
#else
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2
    FOR( i = 0; i < L_FRAME48k; i++ )
    {
        W_tmpX = W_mac_32_16( 0, L_tmp2[i - 4], full_band_bpf_fx[2][4] );
        W_tmpX = W_mac_32_16( W_tmpX, L_tmp2[i - 3], full_band_bpf_fx[2][3] );
        W_tmpY = W_msu_32_16( 0, L_output[i - 1], full_band_bpf_fx[5][1] );
        W_tmpX = W_mac_32_16( W_tmpX, L_tmp2[i - 2], full_band_bpf_fx[2][2] );
        W_tmpY = W_msu_32_16( W_tmpY, L_output[i - 2], full_band_bpf_fx[5][2] );
        W_tmpX = W_mac_32_16( W_tmpX, L_tmp2[i - 1], full_band_bpf_fx[2][1] );
        W_tmpY = W_msu_32_16( W_tmpY, L_output[i - 3], full_band_bpf_fx[5][3] );
        W_tmpX = W_mac_32_16( W_tmpX, L_tmp2[i], full_band_bpf_fx[2][0] );
        W_tmpY = W_msu_32_16( W_tmpY, L_output[i - 4], full_band_bpf_fx[5][4] );
        L_output[i] = W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 + 16 ) ), 3 + 16 ) );
        move32();
        L_tmpMax = L_max( L_tmpMax, L_abs( L_output[i] ) );
    }
#else  /*FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2*/
    FOR( i = 0; i < L_FRAME48k; i++ )
    {
        L_tmpX = L_shr( Mult_32_16( L_tmp2[i - 4], full_band_bpf_fx[2][4] ), 3 );                            /*Q_input_fx + 6 +Q_temp+13 -15 -3 */
        L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp2[i - 3], full_band_bpf_fx[2][3] ), 3 ), L_tmpX );       /*Q_input_fx + 6 +Q_temp +13 -15 -3*/
        L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_output[i - 1], full_band_bpf_fx[5][1] ), 2 ) ); /*Q_input_fx + 1 +Q_temp+13 -15 + 2*/

        L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp2[i - 2], full_band_bpf_fx[2][2] ), 3 ), L_tmpX );       /*Q_input_fx + 6 +Q_temp+13 -15 -3*/
        L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_output[i - 2], full_band_bpf_fx[5][2] ), 2 ) ); /*Q_input_fx + 1 +Q_temp+13 -15 + 2*/

        L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp2[i - 1], full_band_bpf_fx[2][1] ), 3 ), L_tmpX );       /*Q_input_fx + 6 +Q_temp +13 -15 -3*/
        L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_output[i - 3], full_band_bpf_fx[5][3] ), 2 ) ); /*Q_input_fx + 1 +Q_temp+13 -15 + 2*/

        L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp2[i], full_band_bpf_fx[2][0] ), 3 ), L_tmpX );                /*Q_input_fx + 6 +Q_temp +13 -15 -3*/
        L_output[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_output[i - 4], full_band_bpf_fx[5][4] ), 2 ) ); /*Q_input_fx + 1 +Q_temp+13 -15 + 2*/
        move32();
        L_tmpMax = L_max( L_tmpMax, L_abs( L_output[i] ) );
    }

    L_tmpX = L_shr( Mult_32_16( memory2_fx_2[0], full_band_bpf_fx[2][4] ), 3 );                               /* *Q_input_fx+6 +Q_temp +13 -15 -3 */
    L_tmpX = L_add_sat( L_shr( Mult_32_16( memory2_fx_2[1], full_band_bpf_fx[2][3] ), 3 ), L_tmpX );          /*Q_input_fx + 6 +Q_temp+13 -15 -3*/
@@ -7505,6 +7584,50 @@ void synthesise_fb_high_band_fx(
    tmp3 = add( sub( Qout, add( sub( 1, exp ), exp_tmp ) ), 16 ); /*Qout - (1 -exp +exp_tmp) + 16 */
    FOR( i = 0; i < L_FRAME48k; i++ )
    {
#ifdef FIX_1439_SPEEDUP_synthesise_fb_high_band_fx
        L_tmp = Mult_32_16( ratio2, tmp[i] ); /* Q(16-exp+exp_tmp-15 = 1-exp+exp_tmp) */
        Word32 L_tmp32;
        Word16 tmp16;

        // if (L_tmp < 0)
        if ( L_tmp < 0 )
        {
            L_tmp32 = L_negate( L_tmp );
        }
        if ( L_tmp < 0 )
        {
            L_tmp32 = L_shl_sat( L_tmp32, tmp3 );
        }
        if ( L_tmp < 0 )
        {
            tmp16 = extract_h( L_tmp32 );
        }
        if ( L_tmp < 0 )
        {
            tmp16 = negate( tmp16 );
        }

        // if (L_tmp == 0)
        if ( L_tmp == 0 )
        {
            tmp16 = 0;
            move16();
        }

        // if (L_tmp > 0)
        if ( L_tmp > 0 )
        {
            L_tmp32 = L_shl_sat( L_tmp, tmp3 );
        }
        if ( L_tmp > 0 )
        {
            tmp16 = extract_h( L_tmp32 );
        }

        output[i] = tmp16;
        move16();

#else
        L_tmp = Mult_32_16( ratio2, tmp[i] ); /* Q(16-exp+exp_tmp-15 = 1-exp+exp_tmp) */
        IF( L_tmp < 0 )
        {
@@ -7516,6 +7639,7 @@ void synthesise_fb_high_band_fx(
            output[i] = extract_h( L_shl_sat( L_tmp, tmp3 ) ); /*Qout*/
            move16();
        }
#endif
    }
    pop_wmops(); /*push_wmops( "SYNTHESISE_FB_HIGH_BAND PART B" );*/
    return;
+24 −0
Original line number Diff line number Diff line
@@ -794,13 +794,37 @@ void Copy_Scale_sig_16_32_no_sat(
        }
        return;
    }
#ifdef FIX_1439_SPEEDUP_Copy_Scale_sig_16_32_no_sat
    L_tmp = L_shl_o( 1, exp0 - 1, &Overflow );

    IF( L_tmp >= 0x7FFF )
    {
        FOR( i = 0; i < lg; i++ )
        {
            // y[i] = L_mult0(x[i], L_tmp);
            y[i] = W_extract_l( W_mult_32_16( L_tmp, x[i] ) );
            move32(); /* Overflow can occur here */
        }
        return;
    }
    // ELSE
    {
        Word16 tmp = extract_l( L_tmp );
        FOR( i = 0; i < lg; i++ )
        {
            y[i] = L_mult( x[i], tmp );
            move32();
        }
    }
#else
    L_tmp = L_shl_o( 1, exp0 - 1, &Overflow );
    FOR( i = 0; i < lg; i++ )
    {
        // y[i] = L_mult0(x[i], L_tmp);
        y[i] = W_extract_l( W_mult_32_16( L_tmp, x[i] ) );
        move32(); /* Overflow can occur here */
    }
#endif
}

void Copy_Scale_sig_32_16(
+10 −0
Original line number Diff line number Diff line
@@ -904,6 +904,9 @@ void stereo_icBWE_dec_fx(
    winSlope_fx = div_s( 1, winLen_fx );                       /* Q15 */
    alpha_fx = winSlope_fx;                                    /* Q15 */
    move16();
#ifdef FIX_1439_SPEEDUP_stereo_icBWE_dec_fx
    Word16 winSlope_fx_ = sub( 32767 /* 1.0 in Q15*/, winSlope_fx );
#endif
    FOR( i = 0; i < winLen_fx; i++ )
    {
        L_tmp = L_mult0( alpha_fx, icbweM2Ref_fx );                                                        /* Q29 */
@@ -911,10 +914,17 @@ void stereo_icBWE_dec_fx(
        tmp = shl( round_fx( L_tmp ), 1 );                                                                 /* Q14 */
        synthRef_fx[i] = Mpy_32_16_1( synthRef_fx[i], tmp );                                               /* Qsyn - 1 */
        move32();
#ifdef FIX_1439_SPEEDUP_stereo_icBWE_dec_fx
        if ( LE_16( alpha_fx, winSlope_fx_ ) )
        {
            alpha_fx = add( alpha_fx, winSlope_fx ); /* Q15 */
        }
#else
        IF( LE_16( alpha_fx, sub( 32767 /* 1.0 in Q15*/, winSlope_fx ) ) )
        {
            alpha_fx = add( alpha_fx, winSlope_fx ); /* Q15 */
        }
#endif
    }

    FOR( ; i < NS2SA_FX2( st->output_Fs, FRAME_SIZE_NS ); i++ )
+5 −0
Original line number Diff line number Diff line
@@ -7047,12 +7047,17 @@ void ivas_swb_tbe_dec_fx(

            FOR( i = 0; i < L_FRAME16k; i++ )
            {
#ifndef FIX_1439_SPEEDUP_ivas_swb_tbe_dec_fx
                Word16 idx = 0;
                move16();
                IF( i != 0 )
                {
                    idx = idiv1616( i_mult( NUM_SHB_SUBFR, i ), L_FRAME16k );
                }
#else
                Word16 idx;
                idx = extract_h( imult3216( idx32, i ) ); /*Q0*/
#endif
                L_tmp1 = Mult_32_16( L_tmp, GainShape_fx[idx] );                           /* Q : 18 + tmp +15 -15*/
                White_exc16k_fx[i] = round_fx( Mult_32_16( L_tmp1, White_exc16k_fx[i] ) ); /* 18 + tmp +*Q_white_exc -15 -16 */
                move16();