Commit 29b8a3f7 authored by Fabian Bauer's avatar Fabian Bauer Committed by Manuel Jander
Browse files

ctivated speedup 15, 16 to test

parent da0a2a18
Loading
Loading
Loading
Loading
+6 −112
Original line number Diff line number Diff line
@@ -53,18 +53,14 @@
//#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx                //  .3 WMOPS                    --> USE
//#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx                //  .1 WMOPS                    --> USE
//#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx                //  .2 WMOPS                    --> USE
//#define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt                // 3.5 WMOPS //Quite bad diffs  --> DONT USE
//#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt                // 3.0 WMOPS //Quite bad diffs  --> DONT USE
//#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt                //  0  WMOPS                    --> DONT USE
//#define FIX_1326_SPEEDUP_08 // "-"                              // 3.0 WMOPS //small diffs      --> USE
//#define FIX_1326_SPEEDUP_09 // tiny speedup                     //  .1 WMOPS   pipe 48851 fails --> DONTUSEYET
//#define FIX_1326_SPEEDUP_10 // tiny speedup                    //   .1 WMOPS   pipe 48851 fails --> DONTUSEYET
//#define FIX_1326_SPEEDUP_11  // tiny speedup                    //  .1 WMOPS   pipe 48851 fails --> DONTUSEYET
//#define FIX_1326_SPEEDUP_12 // tiny speedup                     //  <.1 WMOPS                   --> DONTUSE
#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt          //   2.9 WMOPS                  --> USE? (pipe tbd)
//#define FIX_1326_SPEEDUP_14 // test wether any of these paths is realy necessary, then assert   --> DONTUSE (pipes red, asserts!)
//#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd          // .1 WMOPS                   --> USE? (pipe tbd)
//#define FIX_1326_SPEEDUP_16 // tiny speedup like 04              // .18 WMOPS                     --> USE? (pipe tbd)
//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt           //   2.9 WMOPS                  --> USE

#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd          // .1 WMOPS                   --> USE? (pipe tbd)
#define FIX_1326_SPEEDUP_16 // tiny speedup like 04              // .18 WMOPS                     --> USE? (pipe tbd)


Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
@@ -3224,15 +3220,8 @@ static void eig2x2_fx(
    a_fx = (e1 + e2) * (e1 + e2) - 4.0f * ((e1 * e2) - crossSquare_fx) = (e1 - e2)^2 + 4 * crossSquare_fx
    pm_fx = 0.5f * sqrtf(max(0.0f, a_fx))
    add_fx = 0.5f * (e1 + e2)*/

#ifdef FIX_1326_SPEEDUP_14
    static int tstcnt = 0;
#endif
    IF( L_and( c_re == 0, c_im == 0 ) )
    {
#ifdef FIX_1326_SPEEDUP_14
        tstcnt++;
#endif
        /* if c_re = 0 and c_im = 0, then crossSquare_fx = (c_re * c_re) + (c_im * c_im) = 0
        a_fx = (E1 - E2)^2
        pm_fx = 0.5 * sqrt(max(0, a_fx)) = 0.5 * max(0, (e1 - e2)) */
@@ -3250,9 +3239,6 @@ static void eig2x2_fx(
        q_crossSquare = sub( add( q_c, q_c ), 31 );
        IF( EQ_32( e1, e2 ) )
        {
#ifdef FIX_1326_SPEEDUP_14
            tstcnt++;
#endif
            /* if e1 - e2 = 0, then a_fx = 4 * crossSquare_fx
            pm_fx = 0.5 * sqrt(max(0, 4 * crossSquare_fx)) =  sqrt(0, crossSquare_fx)*/
            test();
@@ -3286,9 +3272,6 @@ static void eig2x2_fx(

            IF( GT_16( sub( q_c, q_e ), Q15 ) )
            {
#ifdef FIX_1326_SPEEDUP_14
                tstcnt++;
#endif
                pm_fx = L_shr( L_max( 0, L_abs( L_sub( e1, e2 ) ) ), 1 );
                q_tmp2 = q_e;
                move16();
@@ -3312,10 +3295,6 @@ static void eig2x2_fx(
            }
        }
    }
#ifdef FIX_1326_SPEEDUP_14
    if ( tstcnt > 10000 )
        assert( 0 );
#endif
    // add_fx = 0.5 * (e1 + e2)
    add_fx = L_shr( L_add( e1, e2 ), 1 );
    q_tmp1 = q_e;
@@ -4287,33 +4266,6 @@ static void formulate2x2MixingMatrix_fx(
        }
    }
    ELSE
#ifdef FIX_1326_SPEEDUP_05
    {
        Word16 shift = norm_l( temp );
#if 1 // oldcode
        temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
#else

        temp = L_add( L_shl( temp, sub( shift, 1 ) ), L_shl_sat( EPSILON_MANT, sub( sub( EPSILON_EXP, shift ), 1 ) ) );
        exp_temp = sub( 30, q_ein );
        if ( temp == 0 )
        {
            exp_temp = EPSILON_EXP;
            move32();
        }
        if ( temp == 0 )
        {
            temp = EPSILON_MANT;
            move32();
        }
#endif
        temp = ISqrt32( temp, &exp_temp );
        shift = sub( 31, q_eout );
        Ghat_fx[0] = Mpy_32_32( Sqrt32( E_out1, &shift ), temp );
        move32();
        exp = add( shift, exp_temp );
    }
#else
    {
        temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
        push_wmops( "formulate2x2MixingMatrix Division" );
@@ -4322,7 +4274,7 @@ static void formulate2x2MixingMatrix_fx(
        exp = sub( exp, sub( q_eout, sub( 31, exp_temp ) ) );
        Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
    }
#endif

#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
    Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
#endif
@@ -4346,39 +4298,6 @@ static void formulate2x2MixingMatrix_fx(
        }
    }
    ELSE
#ifdef FIX_1326_SPEEDUP_06
    {
        Word16 shift = norm_l( temp );
#if 0 // oldcode
        temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
#else
        temp = L_add( L_shl( temp, sub( shift, 1 ) ), L_shl_sat( EPSILON_MANT, sub( sub( EPSILON_EXP, shift ), 1 ) ) );
        exp_temp = sub( 31 - 1, q_ein );
        if ( temp == 0 )
        {
            exp_temp = add( 0, EPSILON_EXP );
        }
        if ( temp == 0 )
        {
            temp = L_add( 0, EPSILON_MANT );
        }
#endif
#if 1 // oldcode - new code introduces too much noise
        push_wmops( "formulate2x2MixingMatrix Division" );
        temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 );
        pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
        exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) );
#ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
        Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
#endif
#else
        temp = ISqrt32( temp, &exp_temp );
        shift = sub( 31, q_eout );
        Ghat_fx[1] = Mpy_32_32( temp, Sqrt32( E_out2, &shift ) );
        exp1 = add( shift, exp_temp );
#endif
    }
#else
    {
        temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
        push_wmops( "formulate2x2MixingMatrix Division" );
@@ -4387,7 +4306,7 @@ static void formulate2x2MixingMatrix_fx(
        exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) );
        Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
    }
#endif

#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
    Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
#endif
@@ -4512,30 +4431,6 @@ static void formulate2x2MixingMatrix_fx(
#endif
    pop_wmops(); /*push_wmops( "oPtoA MT1M" );*/

#ifdef FIX_1326_SPEEDUP_07
    IF( D_fx[0] == 0 )
    {
        // temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */
        // exp = ONE_DIV_EPSILON_EXP;
        div_fx[0] = L_add( 0, 2047986068 ); // Sqrt32( temp, &exp ); // Q = 31 - exp
        exp = add( 0, 20 );
    }
    ELSE
    {
#if 1 // old code
        push_wmops( "formulate2x2MixingMatrix Division" );
        temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp );
        exp = sub( exp, sub( Q30, q_D ) );
        pop_wmops();                      /*push_wmops( "formulate2x2MixingMatrix Division" )*/
        div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
        move32();
#else
        exp = sub( 31, q_D );
        div_fx[0] = ISqrt32_2( D_fx[0], &exp );
        move32();
#endif
    }
#else
    IF( D_fx[0] == 0 )
    {
        temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */
@@ -4552,7 +4447,6 @@ static void formulate2x2MixingMatrix_fx(
    }
    div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
    move32();
#endif

#ifdef FIX_1326_SPEEDUP_08
    // This is just a shortcut to already existing optimizations  (FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC) - but makes everything even faster