Commit ada8a986 authored by Fabian Bauer's avatar Fabian Bauer
Browse files

integrate loops into speedup if and else branch

parent 3de3ae52
Loading
Loading
Loading
Loading
Loading
+76 −27
Original line number Diff line number Diff line
@@ -6784,6 +6784,20 @@ void elliptic_bpf_48k_generic_fx(
    L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[0], full_band_bpf_fx[3][3] ), 2 ) );       /*Q_input_fx + 11 + 13  -15 +2*/
    L_tmp[3] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[3 - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13  -15 +2*/
    move32();

    FOR( i = 4; i < L_FRAME48k; i++ )
    {
        L_tmpX = L_shr( L_mult( input_fx[i - 4], full_band_bpf_fx[0][4] ), 3 );                             /*Q_input_fx + 13 + 1 - 3*/
        L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 3], full_band_bpf_fx[0][3] ), 3 ), L_tmpX );        /*Q_input_fx + 13 + 1 - 3*/
        L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[3][1] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
        L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 2], full_band_bpf_fx[0][2] ), 3 ), L_tmpX );        /*Q_input_fx + 13 + 1 - 3*/
        L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[3][2] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
        L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 1], full_band_bpf_fx[0][1] ), 3 ), L_tmpX );        /*Q_input_fx + 13 + 1 - 3*/
        L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[3][3] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
        L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i], full_band_bpf_fx[0][0] ), 3 ), L_tmpX );            /*Q_input_fx + 13 + 1 - 3*/
        L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13  -15 +2*/
        move32();
    }
#else
    L_tmpX = L_shr( L_mult( memory_fx[0][0], full_band_bpf_fx[0][4] ), 3 );                             /*Q_input_fx + 13 + 1 - 3*/
    L_tmpX = L_add( L_shr( L_mult( memory_fx[0][1], full_band_bpf_fx[0][3] ), 3 ), L_tmpX );            /*Q_input_fx + 13 + 1 - 3*/
@@ -6828,9 +6842,8 @@ void elliptic_bpf_48k_generic_fx(
    L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[0], full_band_bpf_fx[3][3] ), 2 ) );       /*Q_input_fx + 11 + 13  -15 +2*/
    L_tmp[3] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[3 - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13  -15 +2*/
    move32();
#endif

    {

    FOR( i = 4; i < L_FRAME48k; i++ )
    {
        L_tmpX = L_shr( L_mult( input_fx[i - 4], full_band_bpf_fx[0][4] ), 3 );                             /*Q_input_fx + 13 + 1 - 3*/
@@ -6844,7 +6857,7 @@ void elliptic_bpf_48k_generic_fx(
        L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13  -15 +2*/
        move32();
    }
    }
#endif

    memory_fx2[0][0] = input_fx[L_FRAME48k - 4];
    memory_fx2[0][1] = input_fx[L_FRAME48k - 3];
@@ -6901,6 +6914,21 @@ void elliptic_bpf_48k_generic_fx(
    L_tmp2[3] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp2[3 - 4], full_band_bpf_fx[4][4] ), 2 ) ); /*Q_input_fx + 6 +13 -15 +2 */ /*14 + Q_input_fx - shift_flag*/
    move32();
    L_tmpMax = L_max( L_tmpMax, L_abs( L_tmp2[3] ) );

    FOR( i = 4; i < L_FRAME48k; i++ )
    {
        L_tmpX = L_shr( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[1][4] ), 3 );                              /*Q_input_fx + 11 + 13 - 15 -3*/
        L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[1][3] ), 3 ), L_tmpX );         /*Q_input_fx + 11 + 13 - 15 -3*/
        L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp2[i - 1], full_band_bpf_fx[4][1] ), 2 ) );    /*Q_input_fx + 6 +13 -15 +2 */
        L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[1][2] ), 3 ), L_tmpX );         /*Q_input_fx + 11 + 13 - 15 -3*/
        L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp2[i - 2], full_band_bpf_fx[4][2] ), 2 ) );    /*Q_input_fx + 6 +13 -15 +2 */
        L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[1][1] ), 3 ), L_tmpX );         /*Q_input_fx + 11 + 13 - 15 -3*/
        L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp2[i - 3], full_band_bpf_fx[4][3] ), 2 ) );    /*Q_input_fx + 6 +13 -15 +2 */
        L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp[i], full_band_bpf_fx[1][0] ), 3 ), L_tmpX );             /*Q_input_fx + 11 + 13 - 15 -3*/
        L_tmp2[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp2[i - 4], full_band_bpf_fx[4][4] ), 2 ) ); /*Q_input_fx + 6 +13 -15 +2 */
        move32();
        L_tmpMax = L_max( L_tmpMax, L_abs( L_tmp2[i] ) );
    }
#else
    L_tmpX = L_shr( Mult_32_16( memory2_fx[1][0], full_band_bpf_fx[1][4] ), 3 );                             /*Q_input_fx + 11 + 13 - 15 -3*/
    L_tmpX = L_add( L_shr( Mult_32_16( memory2_fx[1][1], full_band_bpf_fx[1][3] ), 3 ), L_tmpX );            /*Q_input_fx + 11 + 13 - 15 -3*/
@@ -6946,21 +6974,22 @@ void elliptic_bpf_48k_generic_fx(
    L_tmp2[3] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( memory2_fx[2][3], full_band_bpf_fx[4][4] ), 2 ) ); /*Q_input_fx + 6 +13 -15 +2 */ /*14 + Q_input_fx - shift_flag*/
    move32();
    L_tmpMax = L_max( L_tmpMax, L_abs( L_tmp2[3] ) );
#endif

    FOR( i = 4; i < L_FRAME48k; i++ )
    {
        L_tmpX = L_shr( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[1][4] ), 3 );                              /*Q_input_fx + 11 + 13 - 15 -3*/
        L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[1][3] ), 3 ), L_tmpX );         /*Q_input_fx + 11 + 13 - 15 -3*/
        L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp2[i - 1], full_band_bpf_fx[4][1] ), 2 ) );    /*Q_input_fx + 6 +13 -15 +2 */
        L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[1][2] ), 3 ), L_tmpX );         /*Q_input_fx + 11 + 13 - 15 -3*/
        L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp2[i - 2], full_band_bpf_fx[4][2] ), 2 ) );    /*Q_input_fx + 6 +13 -15 +2 */
        L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[1][1] ), 3 ), L_tmpX );         /*Q_input_fx + 11 + 13 - 15 -3*/
        L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp2[i - 3], full_band_bpf_fx[4][3] ), 2 ) );    /*Q_input_fx + 6 +13 -15 +2 */
        L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp[i], full_band_bpf_fx[1][0] ), 3 ), L_tmpX );             /*Q_input_fx + 11 + 13 - 15 -3*/
        L_tmp2[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp2[i - 4], full_band_bpf_fx[4][4] ), 2 ) ); /*Q_input_fx + 6 +13 -15 +2 */
        L_tmpX = L_shr( L_mult( input_fx[i - 4], full_band_bpf_fx[0][4] ), 3 );                             /*Q_input_fx + 13 + 1 - 3*/
        L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 3], full_band_bpf_fx[0][3] ), 3 ), L_tmpX );        /*Q_input_fx + 13 + 1 - 3*/
        L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[3][1] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
        L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 2], full_band_bpf_fx[0][2] ), 3 ), L_tmpX );        /*Q_input_fx + 13 + 1 - 3*/
        L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 2], full_band_bpf_fx[3][2] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
        L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 1], full_band_bpf_fx[0][1] ), 3 ), L_tmpX );        /*Q_input_fx + 13 + 1 - 3*/
        L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[3][3] ), 2 ) );   /*Q_input_fx + 11 + 13  -15 +2*/
        L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i], full_band_bpf_fx[0][0] ), 3 ), L_tmpX );            /*Q_input_fx + 13 + 1 - 3*/
        L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13  -15 +2*/
        move32();
        L_tmpMax = L_max( L_tmpMax, L_abs( L_tmp2[i] ) );
    }
#endif


    Q_temp = norm_l( L_tmpMax );
    Q_temp = sub( Q_temp, 4 );
@@ -7031,6 +7060,24 @@ void elliptic_bpf_48k_generic_fx(
    L_output[3] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_output[3 - 4], full_band_bpf_fx[5][4] ), 2 ) ); /*Q_input_fx + 1 +Q_temp+13 -15 + 2*/
    move32();
    L_tmpMax = L_max( L_tmpMax, L_abs( L_output[3] ) );

    FOR( i = 4; i < L_FRAME48k; i++ )
    {
        L_tmpX = L_shr( Mult_32_16( L_tmp2[i - 4], full_band_bpf_fx[2][4] ), 3 );                            /*Q_input_fx + 6 +Q_temp+13 -15 -3 */
        L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp2[i - 3], full_band_bpf_fx[2][3] ), 3 ), L_tmpX );       /*Q_input_fx + 6 +Q_temp +13 -15 -3*/
        L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_output[i - 1], full_band_bpf_fx[5][1] ), 2 ) ); /*Q_input_fx + 1 +Q_temp+13 -15 + 2*/

        L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp2[i - 2], full_band_bpf_fx[2][2] ), 3 ), L_tmpX );       /*Q_input_fx + 6 +Q_temp+13 -15 -3*/
        L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_output[i - 2], full_band_bpf_fx[5][2] ), 2 ) ); /*Q_input_fx + 1 +Q_temp+13 -15 + 2*/

        L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp2[i - 1], full_band_bpf_fx[2][1] ), 3 ), L_tmpX );       /*Q_input_fx + 6 +Q_temp +13 -15 -3*/
        L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_output[i - 3], full_band_bpf_fx[5][3] ), 2 ) ); /*Q_input_fx + 1 +Q_temp+13 -15 + 2*/

        L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp2[i], full_band_bpf_fx[2][0] ), 3 ), L_tmpX );                /*Q_input_fx + 6 +Q_temp +13 -15 -3*/
        L_output[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_output[i - 4], full_band_bpf_fx[5][4] ), 2 ) ); /*Q_input_fx + 1 +Q_temp+13 -15 + 2*/
        move32();
        L_tmpMax = L_max( L_tmpMax, L_abs( L_output[i] ) );
    }
#else
    L_tmpX = L_shr( Mult_32_16( memory2_fx_2[0], full_band_bpf_fx[2][4] ), 3 );                               /* *Q_input_fx+6 +Q_temp +13 -15 -3 */
    L_tmpX = L_add_sat( L_shr( Mult_32_16( memory2_fx_2[1], full_band_bpf_fx[2][3] ), 3 ), L_tmpX );          /*Q_input_fx + 6 +Q_temp+13 -15 -3*/
@@ -7080,8 +7127,6 @@ void elliptic_bpf_48k_generic_fx(
    move32();
    L_tmpMax = L_max( L_tmpMax, L_abs( L_output[3] ) );

#endif

    FOR( i = 4; i < L_FRAME48k; i++ )
    {
        L_tmpX = L_shr( Mult_32_16( L_tmp2[i - 4], full_band_bpf_fx[2][4] ), 3 );                            /*Q_input_fx + 6 +Q_temp+13 -15 -3 */
@@ -7099,6 +7144,10 @@ void elliptic_bpf_48k_generic_fx(
        move32();
        L_tmpMax = L_max( L_tmpMax, L_abs( L_output[i] ) );
    }

#endif


    memory_fx2[2][0] = L_tmp2[L_FRAME48k - 4];
    memory_fx2[2][1] = L_tmp2[L_FRAME48k - 3];
    memory_fx2[2][2] = L_tmp2[L_FRAME48k - 2];