Commit aa9c0272 authored by Fabian Bauer's avatar Fabian Bauer
Browse files

add macro SPEEDUP_1_hp20_fx_32 and activate - R1, R2 calculation changed and...

add macro SPEEDUP_1_hp20_fx_32 and activate - R1, R2 calculation changed and shortened, should be equal, best case BE, but at least no regression
parent 1816aab3
Loading
Loading
Loading
Loading
Loading

lib_com/hp50_fx.c

100644 → 100755
+47 −5
Original line number Diff line number Diff line
@@ -514,6 +514,7 @@ void hp20_fx_32(
    return;
}
#else
#define SPEEDUP_1_hp20_fx_32
void hp20_fx_32(
    Word32 signal_fx[],
    const Word16 lg,
@@ -581,6 +582,10 @@ void hp20_fx_32(
    x0_fx64 = W_deposit32_l( mem_fx[2] );
    x1_fx64 = W_deposit32_l( mem_fx[3] );

#ifdef SPEEDUP_1_hp20_fx_32
    Word16 Qy1_new, Qy2_new;
#endif

    FOR( i = 0; i < lg; i++ )
    {
        x2_fx64 = x1_fx64;
@@ -596,9 +601,14 @@ void hp20_fx_32(
            move16();
        }
        Qy1 = sub( Qy1, 34 );

#ifdef SPEEDUP_1_hp20_fx_32
        Qy1_new = add( Qy1, Qprev_y1 );

#else
        R1 = W_mult0_32_32( W_shl_sat_l( y1_fx64, Qy1 ), a1_fx );
        Qy1 = add( Qy1, Qprev_y1 );

#endif
        Qy2 = W_norm( y2_fx64 );
        if ( y2_fx64 == 0 )
        {
@@ -606,9 +616,14 @@ void hp20_fx_32(
            move16();
        }
        Qy2 = sub( Qy2, 34 );

#ifdef SPEEDUP_1_hp20_fx_32
        Qy2_new = add( Qy2, Qprev_y2 );

#else
        R2 = W_mult0_32_32( W_shl_sat_l( y2_fx64, Qy2 ), a2_fx );
        Qy2 = add( Qy2, Qprev_y2 );

#endif
        Qx0 = W_norm( x0_fx64 );
        if ( x0_fx64 == 0 )
        {
@@ -616,8 +631,10 @@ void hp20_fx_32(
            move16();
        }
        Qx0 = sub( Qx0, 34 );
        R3 = W_mult0_32_32( W_shl_sat_l( x0_fx64, Qx0 ), b2_fx );

#ifndef SPEEDUP_1_hp20_fx_32
        R3 = W_mult0_32_32( W_shl_sat_l( x0_fx64, Qx0 ), b2_fx );
#endif
        Qx1 = W_norm( x1_fx64 );
        if ( x1_fx64 == 0 )
        {
@@ -625,8 +642,9 @@ void hp20_fx_32(
            move16();
        }
        Qx1 = sub( Qx1, 34 );
#ifndef SPEEDUP_1_hp20_fx_32
        R4 = W_mult0_32_32( W_shl_sat_l( x1_fx64, Qx1 ), b1_fx );

#endif
        Qx2 = W_norm( x2_fx64 );
        if ( x2_fx64 == 0 )
        {
@@ -634,18 +652,42 @@ void hp20_fx_32(
            move16();
        }
        Qx2 = sub( Qx2, 34 );

        R5 = W_mult0_32_32( W_shl_sat_l( x2_fx64, Qx2 ), b2_fx );

#ifdef SPEEDUP_1_hp20_fx_32

        Qmin = s_min( Qy1_new, Qy2_new );

        R1 = W_mult0_32_32( W_shr( W_shl( y1_fx64, Qy1 ), sub( Qy1_new, Qmin ) ), a1_fx );
        R2 = W_mult0_32_32( W_shr( W_shl( y2_fx64, Qy2 ), sub( Qy2_new, Qmin ) ), a2_fx );
        R3 = W_mult0_32_32( W_shl( x0_fx64, Qx0 ), b2_fx );
        R4 = W_mult0_32_32( W_shl( x1_fx64, Qx1 ), b1_fx );

        y0_fx64 = W_add( R1, R2 );


        Qmin = s_min( Qmin, Qx0 );
        Qmin = s_min( Qmin, Qx1 );
        Qmin = s_min( Qmin, Qx2 );

        R3 = W_shr( R3, sub( Qx0, Qmin ) );
        R4 = W_shr( R4, sub( Qx1, Qmin ) );
        R5 = W_shr( R5, sub( Qx2, Qmin ) );

        y0_fx64 = W_add( W_shr( y0_fx64, sub( s_min( Qy1_new, Qy2_new ), Qmin ) ), W_add( R3, W_add( R4, R5 ) ) );
#else
        Qmin = s_min( Qy1, Qy2 );

        y0_fx64 = W_add( W_shr( R1, sub( Qy1, Qmin ) ), W_shr( R2, sub( Qy2, Qmin ) ) );


        Qmin = s_min( Qmin, Qx0 );
        Qmin = s_min( Qmin, Qx1 );
        Qmin = s_min( Qmin, Qx2 );

        y0_fx64 = W_add( W_shr( y0_fx64, sub( s_min( Qy1, Qy2 ), Qmin ) ), W_add( W_shr( R3, sub( Qx0, Qmin ) ), W_add( W_shr( R4, sub( Qx1, Qmin ) ), W_shr( R5, sub( Qx2, Qmin ) ) ) ) );

#endif
        y0_fx64 = W_shr( y0_fx64, 29 );

        signal_fx[i] = W_extract_l( W_shr( y0_fx64, Qmin ) );