Commit 50fbdde0 authored by Fabian Bauer's avatar Fabian Bauer Committed by Sandesh Venkatesh
Browse files

added and activated FIX_1326_SPEEDUP_00 - 07

parent 679ca05f
Loading
Loading
Loading
Loading
+182 −8
Original line number Diff line number Diff line
@@ -46,8 +46,13 @@
#include "wmc_auto.h"

//#define FIX_1326_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams //no occurence in current bitstream
//#define FIX_1326_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx : rollout loop in mul, only 3 out of 4 results are needed - maybe a=b can also benefitcui

#define FIX_1326_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx :  .4 WMOPS
#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx //  .3 WMOPS
#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx //  .1 WMOPS
#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx //  .2 WMOPS
#define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt // 3.5 WMOPS
#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS
#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // 2.8 WMOPS
Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };

/*-------------------------------------------------------------------------
@@ -3280,6 +3285,19 @@ static void eig2x2_fx(

    /* Numeric case, when input is practically zeros */
    // IF( D_fx[0] < EPSILON_FX )
#ifdef FIX_1326_SPEEDUP_02
    IF ( LT_32( L_shl_sat( D_fx[0], sub( sub( 31, *q_D ), EPSILON_EXP ) ), EPSILON_MANT ) )
    {
        Ure_fx[0][0] = ONE_IN_Q31;
        move32();
        Ure_fx[1][1] = ONE_IN_Q31;
        move32();
        *q_U = Q31;
        move16();

        return;
    }
#else
    IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( D_fx[0], *q_D, EPSILON_MANT, EPSILON_EXP ), -1 ) )
    {
        Ure_fx[0][0] = ONE_IN_Q31;
@@ -3291,8 +3309,24 @@ static void eig2x2_fx(

        return;
    }
#endif

    /* Numeric case, when input is near an identity matrix with a gain */
#ifdef FIX_1326_SPEEDUP_03 //178.932
    tmp1 = Mpy_32_32( 2147484, add_fx ); // 2147484 = 1e-3f in Q31

        IF( LT_32( pm_fx, L_shl_sat(tmp1, sub(q_tmp1,q_tmp2) ) ) )
        {
            Ure_fx[0][0] = ONE_IN_Q30;
            move32();
            Ure_fx[1][1] = ONE_IN_Q30;
            move32();
            *q_U = Q30;
            move16();

            return;
        }
#else
    tmp1 = Mpy_32_32( 2147484, add_fx ); // 2147484 = 1e-3f in Q31

    IF( LT_16( q_tmp1, q_tmp2 ) )
@@ -3323,6 +3357,7 @@ static void eig2x2_fx(
            return;
        }
    }
#endif

    q_U_1 = 0;
    q_U_2 = 0;
@@ -3431,10 +3466,22 @@ static void eig2x2_fx(
            tmp2 = Mpy_32_32( s_fx, s_fx );
            q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 );


#ifdef FIX_1326_SPEEDUP_04
            Word16 exp_tmp2;
            Word32 eps_tmp;
            
            tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &exp_tmp2 );
            eps_tmp = L_shl_sat( epsilon_mant, sub( epsilon_exp, exp_tmp2 ) );

            tmp3 = L_add( L_shr ( tmp2,1), L_shr(eps_tmp,1) ); // Add Epsilon if relevant

            exp_tmp3 = add(exp_tmp2 , 1);
#else
            tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 );
            q_tmp2 = sub( 31, q_tmp2 );

            tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 );
#endif

#if 1
            tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp );
@@ -4468,6 +4515,28 @@ static void formulate2x2MixingMatrix_fx(
#endif
    }
    ELSE
#ifdef FIX_1326_SPEEDUP_05
    {
        Word16 shift = norm_l( temp );
        temp = L_add( L_shl( temp, sub( shift, 1 ) ), L_shl_sat( EPSILON_MANT, sub( sub( EPSILON_EXP, shift ), 1 ) ) );
        exp_temp = sub( 31, q_ein );
        if ( temp == 0 )
        {
            exp_temp = EPSILON_EXP;
            move32();
        }
        if (temp == 0)
        {
            temp = EPSILON_MANT;
            move32();
        }
        temp = ISqrt32( temp , &exp_temp);
        shift = sub( 31, q_eout );
        Ghat_fx[0] = Mpy_32_32( Sqrt32( E_out1, &shift ), temp );
        move32();
        exp = add( shift, exp_temp );
   }
#else
    {
        temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
        push_wmops( "formulate2x2MixingMatrix Division" );
@@ -4478,6 +4547,7 @@ static void formulate2x2MixingMatrix_fx(
        Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
#endif
    }
#endif
#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
    Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
#endif
@@ -4506,6 +4576,25 @@ static void formulate2x2MixingMatrix_fx(
#endif
    }
    ELSE
#ifdef FIX_1326_SPEEDUP_06
    {
        Word16 shift = norm_l( temp );
        temp = L_add( L_shl( temp, sub( shift, 1 ) ), L_shl_sat( EPSILON_MANT, sub( sub( EPSILON_EXP, shift ), 1 ) ) );
        exp_temp = sub(31, q_ein);
        if ( temp == 0 )
        {
            exp_temp = add( 0, EPSILON_EXP );
        }
        if (temp == 0)
        {
            temp = L_add( 0, EPSILON_MANT );
        }
        temp = ISqrt32( temp, &exp_temp );
        shift = sub( 31, q_eout );
        Ghat_fx[1] = Mpy_32_32( temp, ISqrt32( E_out2, &shift ) );
        exp_temp = add( shift, exp_temp );
    }
#else
    {
        temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
        push_wmops( "formulate2x2MixingMatrix Division" );
@@ -4516,6 +4605,7 @@ static void formulate2x2MixingMatrix_fx(
        Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
#endif
    }
#endif
#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
    Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
#endif
@@ -4569,7 +4659,72 @@ static void formulate2x2MixingMatrix_fx(
       For matrix A that is P = A(A'A)^0.5 */
    push_wmops( "oPtoA MT1M" );
#ifdef FIX_1326_SPEEDUP_01
    matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );
    // matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );

    {
        Word16 chA, chB;
            {
            chA = 0, chB = 0;
                tmpRe_fx[0][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Are_fx[0][0] ),
                                                                         Are_fx[1][0], Are_fx[1][0] ),
                                                             Aim_fx[0][0], Aim_fx[0][0] ),
                                                 Aim_fx[1][0], Aim_fx[1][0] );
                move32();
            }
            {
                chA = 0, chB = 1;
                tmpRe_fx[1][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][0] ),
                                                                         Are_fx[1][1], Are_fx[1][0] ),
                                                             Aim_fx[0][1], Aim_fx[0][0] ),
                                                 Aim_fx[1][1], Aim_fx[1][0] );
                move32();
                tmpIm_fx[1][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Aim_fx[0][0] ),
                                                                         Are_fx[1][1], Aim_fx[1][0] ),
                                                             Aim_fx[0][1], Are_fx[0][0] ),
                                                 Aim_fx[1][1], Are_fx[1][0] );
                move32();
            }
            {
                chA = 1, chB = 0;
                tmpRe_fx[1][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][1] ),
                                                                         Are_fx[1][1], Are_fx[1][1] ),
                                                             Aim_fx[0][1], Aim_fx[0][1] ),
                                                 Aim_fx[1][1], Aim_fx[1][1] );
                move32();
            }
            {
                chA = 1, chB = 1;
            }

        q_temp = sub( add( q_A, q_A ), 31 );

        move16();
        Word16 ZeroState = add( 1, 0 );
        if (tmpRe_fx[0][0] != 0)
        {
            ZeroState = add(0, 0);
        }
        if ( tmpRe_fx[1][1] != 0 )
        {
            ZeroState = add( 0, 0 );
        }
        if ( tmpRe_fx[1][0] != 0 )
        {
            ZeroState = add( 0, 0 );
        }
        if ( tmpIm_fx[1][0] != 0 )
        {
            ZeroState = add( 0, 0 );
        }

        if ( sub(ZeroState,1) == 0 )
        {
          q_temp = Q31;
            move16();
        }

    }


    eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );
#else
@@ -4579,6 +4734,24 @@ static void formulate2x2MixingMatrix_fx(
#endif
    pop_wmops();/*push_wmops( "oPtoA MT1M" );*/

#ifdef FIX_1326_SPEEDUP_07
    IF( D_fx[0] == 0 )
    {
        //temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */
        //exp = ONE_DIV_EPSILON_EXP;
        div_fx[0] = L_add(0,2047986068); //Sqrt32( temp, &exp ); // Q = 31 - exp
        exp = add(0,20);
    }
    ELSE
    {
        exp = sub( 31, q_D );
        div_fx[0] = ISqrt32( D_fx[0], &exp );
        //temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp );
        //exp = sub( exp, sub( Q30, q_D ) );
        //div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
        move32();
    }
#else
    IF( D_fx[0] == 0 )
    {
#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
@@ -4598,6 +4771,7 @@ static void formulate2x2MixingMatrix_fx(
    }
    div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
    move32();
#endif

    IF( D_fx[1] == 0 )
    {