Commit cdc6a0c2 authored by Fabian Bauer's avatar Fabian Bauer
Browse files

move stage2 code into loops

parent 21a7f761
Loading
Loading
Loading
Loading
+70 −45
Original line number Diff line number Diff line
@@ -6785,6 +6785,46 @@ void elliptic_bpf_48k_generic_fx(
    {
        FOR( i = 0; i < L_FRAME48k; )
        {
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2
            // W_tmpX = W_mac_16_16( 0, input_fx[i - 4], full_band_bpf_fx[0][4] );
            W_tmpX = W_mac_16_16( 0, input_fx[i - 3], full_band_bpf_fx[0][3] );
            W_tmpY = W_msu_32_16( 0, L_tmp[i - 1], full_band_bpf_fx[3][1] );
            // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 2], full_band_bpf_fx[0][2] );
            W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 2], full_band_bpf_fx[3][2] );
            // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 1], full_band_bpf_fx[0][1] );
            W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 3], full_band_bpf_fx[3][3] );
            W_tmpX = W_mac_16_16( W_tmpX, input_fx[i], full_band_bpf_fx[0][0] );
            W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 4], full_band_bpf_fx[3][4] );
            L_tmp[i] = W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 ) ), 3 ) ); /*Q_input_fx + 11*/
            move32();
            i++;

            W_tmpX = W_mac_16_16( 0, input_fx[i - 4], full_band_bpf_fx[0][4] );
            // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 3], full_band_bpf_fx[0][3] );
            W_tmpY = W_msu_32_16( 0, L_tmp[i - 1], full_band_bpf_fx[3][1] );
            // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 2], full_band_bpf_fx[0][2] );
            W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 2], full_band_bpf_fx[3][2] );
            W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 1], full_band_bpf_fx[0][1] );
            W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 3], full_band_bpf_fx[3][3] );
            // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i], full_band_bpf_fx[0][0] );
            W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 4], full_band_bpf_fx[3][4] );
            L_tmp[i] = W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 ) ), 3 ) ); /*Q_input_fx + 11*/
            move32();
            i++;

            // W_tmpX = W_mac_16_16( 0, input_fx[i - 4], full_band_bpf_fx[0][4] );
            // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 3], full_band_bpf_fx[0][3] );
            W_tmpY = W_msu_32_16( 0, L_tmp[i - 1], full_band_bpf_fx[3][1] );
            W_tmpX = W_mac_16_16( 0, input_fx[i - 2], full_band_bpf_fx[0][2] );
            W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 2], full_band_bpf_fx[3][2] );
            // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i - 1], full_band_bpf_fx[0][1] );
            W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 3], full_band_bpf_fx[3][3] );
            // W_tmpX = W_mac_16_16( W_tmpX, input_fx[i], full_band_bpf_fx[0][0] );
            W_tmpY = W_msu_32_16( W_tmpY, L_tmp[i - 4], full_band_bpf_fx[3][4] );
            L_tmp[i] = W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 ) ), 3 ) ); /*Q_input_fx + 11*/
            move32();
            i++;
#else
            // L_tmpX = L_shr( L_mult( input_fx[i - 4], full_band_bpf_fx[0][4] ), 3 );                             /*Q_input_fx + 13 + 1 - 3*/
            L_tmpX = L_add_sat( L_shr( L_mult( input_fx[i - 3], full_band_bpf_fx[0][3] ), 3 ), 0 );           /*Q_input_fx + 13 + 1 - 3*/
            L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 1], full_band_bpf_fx[3][1] ), 2 ) ); /*Q_input_fx + 11 + 13  -15 +2*/
@@ -6820,6 +6860,7 @@ void elliptic_bpf_48k_generic_fx(
            L_tmp[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[3][4] ), 2 ) ); /*Q_input_fx + 11 + 13  -15 +2*/
            move32();
            i++;
#endif /*#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2*/
        }
    }
#else  /*FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic*/
@@ -6894,11 +6935,9 @@ void elliptic_bpf_48k_generic_fx(
    move32();

#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2
    FOR( i = 0; i < L_FRAME48k; i++ )
    {
        Word64 W_tmpX;
        Word64 W_tmpY;
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2
        W_tmpX = W_mac_32_16( 0, L_tmp[i - 4], full_band_bpf_fx[1][4] );
        W_tmpX = W_mac_32_16( W_tmpX, L_tmp[i - 3], full_band_bpf_fx[1][3] );
        W_tmpY = W_msu_32_16( 0, L_tmp2[i - 1], full_band_bpf_fx[4][1] );
@@ -6908,14 +6947,10 @@ void elliptic_bpf_48k_generic_fx(
        W_tmpY = W_msu_32_16( W_tmpY, L_tmp2[i - 3], full_band_bpf_fx[4][3] );
        W_tmpX = W_mac_32_16( W_tmpX, L_tmp[i], full_band_bpf_fx[1][0] );
        W_tmpY = W_msu_32_16( W_tmpY, L_tmp2[i - 4], full_band_bpf_fx[4][4] );
        // L_tmp2[i] = W_sat_l( W_add( W_shr( W_tmpX, 3 + 16 ), W_shl( W_tmpY, 2 - 16 ) ) );
        L_tmp2[i] = W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 + 16 ) ), 3 + 16 ) );
        move32();
        L_tmpMax = L_max( L_tmpMax, L_abs( L_tmp2[i] ) );
    }
#else
    FOR( i = 0; i < L_FRAME48k; i++ )
    {
#else  /*FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2*/
        L_tmpX = L_shr( Mult_32_16( L_tmp[i - 4], full_band_bpf_fx[1][4] ), 3 );                              /*Q_input_fx + 11 + 13 - 15 -3*/
        L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp[i - 3], full_band_bpf_fx[1][3] ), 3 ), L_tmpX );         /*Q_input_fx + 11 + 13 - 15 -3*/
        L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp2[i - 1], full_band_bpf_fx[4][1] ), 2 ) );    /*Q_input_fx + 6 +13 -15 +2 */
@@ -6927,8 +6962,8 @@ void elliptic_bpf_48k_generic_fx(
        L_tmp2[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_tmp2[i - 4], full_band_bpf_fx[4][4] ), 2 ) ); /*Q_input_fx + 6 +13 -15 +2 */
        move32();
        L_tmpMax = L_max( L_tmpMax, L_abs( L_tmp2[i] ) );
#endif /*FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2*/
    }
#endif
#else
    L_tmpX = L_shr( Mult_32_16( memory2_fx[1][0], full_band_bpf_fx[1][4] ), 3 );                             /*Q_input_fx + 11 + 13 - 15 -3*/
    L_tmpX = L_add( L_shr( Mult_32_16( memory2_fx[1][1], full_band_bpf_fx[1][3] ), 3 ), L_tmpX );            /*Q_input_fx + 11 + 13 - 15 -3*/
@@ -7013,32 +7048,22 @@ void elliptic_bpf_48k_generic_fx(
        move32();
    }
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic
#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2
    FOR( 0 = 4; i < L_FRAME48k; i++ )
    FOR( i = 0; i < L_FRAME48k; i++ )
    {
        Word64 W_tmpX;
        Word64 W_tmpY;

#ifdef FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2
        W_tmpX = W_mac_32_16( 0, L_tmp2[i - 4], full_band_bpf_fx[2][4] );
        W_tmpX = W_mac_32_16( W_tmpX, L_tmp2[i - 3], full_band_bpf_fx[2][3] );
        W_tmpY = W_msu_32_16( 0, L_output[i - 1], full_band_bpf_fx[5][1] );

        W_tmpX = W_mac_32_16( W_tmpX, L_tmp2[i - 2], full_band_bpf_fx[2][2] );
        W_tmpY = W_msu_32_16( W_tmpY, L_output[i - 2], full_band_bpf_fx[5][2] );

        W_tmpX = W_mac_32_16( W_tmpX, L_tmp2[i - 1], full_band_bpf_fx[2][1] );
        W_tmpY = W_msu_32_16( W_tmpY, L_output[i - 3], full_band_bpf_fx[5][3] );

        W_tmpX = W_mac_32_16( W_tmpX, L_tmp2[i], full_band_bpf_fx[2][0] );
        W_tmpY = W_msu_32_16( W_tmpY, L_output[i - 4], full_band_bpf_fx[5][4] );
        // L_output[i] = W_sat_l( W_add( W_shr( W_tmpX, 3 + 16 ), W_shl( W_tmpY, 2 - 16 ) ) );
        L_output[i] = W_sat_l( W_shr( W_add( W_tmpX, W_shl( W_tmpY, 2 - 16 + 3 + 16 ) ), 3 + 16 ) );
        move32();
        L_tmpMax = L_max( L_tmpMax, L_abs( L_output[i] ) );
    }
#else
    FOR( i = 0; i < L_FRAME48k; i++ )
    {
#else  /*FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2*/
        L_tmpX = L_shr( Mult_32_16( L_tmp2[i - 4], full_band_bpf_fx[2][4] ), 3 );                            /*Q_input_fx + 6 +Q_temp+13 -15 -3 */
        L_tmpX = L_add_sat( L_shr( Mult_32_16( L_tmp2[i - 3], full_band_bpf_fx[2][3] ), 3 ), L_tmpX );       /*Q_input_fx + 6 +Q_temp +13 -15 -3*/
        L_tmpX = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_output[i - 1], full_band_bpf_fx[5][1] ), 2 ) ); /*Q_input_fx + 1 +Q_temp+13 -15 + 2*/
@@ -7053,8 +7078,8 @@ void elliptic_bpf_48k_generic_fx(
        L_output[i] = L_sub_sat( L_tmpX, L_shl_sat( Mult_32_16( L_output[i - 4], full_band_bpf_fx[5][4] ), 2 ) ); /*Q_input_fx + 1 +Q_temp+13 -15 + 2*/
        move32();
        L_tmpMax = L_max( L_tmpMax, L_abs( L_output[i] ) );
#endif /*FIX_1439_SPEEDUP_SIMPLIFY_elliptic_bpf_48k_generic_STAGE2*/
    }
#endif
#else
    L_tmpX = L_shr( Mult_32_16( memory2_fx_2[0], full_band_bpf_fx[2][4] ), 3 );                               /* *Q_input_fx+6 +Q_temp +13 -15 -3 */
    L_tmpX = L_add_sat( L_shr( Mult_32_16( memory2_fx_2[1], full_band_bpf_fx[2][3] ), 3 ), L_tmpX );          /*Q_input_fx + 6 +Q_temp+13 -15 -3*/