Commit a6154d64 authored by ber's avatar ber
Browse files

check speedupChol2x2_0L2L

parent 63bdb05e
Loading
Loading
Loading
Loading
Loading
+92 −41
Original line number Diff line number Diff line
@@ -50,7 +50,20 @@
//#define FIX1072_SPEEDUP_formulate2x2MixingMatrix_fx_simple
//#define FIX1072_SPEEDUP_formulate2x2MixingMatrix_fx_reduceDivs // orange tests

//#define FIX1072_SPEEDUP_chol2x2_fx // 6Mhz

// Push this chol2x2 config: 0L2L - check pipelines
#define FIX1072_SPEEDUP_chol2x2_fx // 6Mhz _0 + _1 + _2
#ifdef FIX1072_SPEEDUP_chol2x2_fx
//#define FIX1072_SPEEDUP_chol2x2_fx_0
#ifndef FIX1072_SPEEDUP_chol2x2_fx_0
#define FIX1072_SPEEDUP_chol2x2_fx_0_light
#endif
//#define FIX1072_SPEEDUP_chol2x2_fx_1
//#define FIX1072_SPEEDUP_chol2x2_fx_2
#ifndef FIX1072_SPEEDUP_chol2x2_fx_2
#define FIX1072_SPEEDUP_chol2x2_fx_2_light
#endif
#endif

// SPEEDUP_matrix 012345: ACCEPT REGRESSIONS                  // NULL: 306.459 Mhz --> difference to SPEEDUP_matrix 012345 : 14Mhz
//#define FIX_1072_SPEEDUP_matrixMul_fx                       //SPEEDUP_matrix_0
@@ -82,7 +95,7 @@
//#define FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT_5
#endif

#define FIX_1072_REDUCE_DIVS
//#define FIX_1072_REDUCE_DIVS // accept regressions!


#ifndef FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT
@@ -4422,13 +4435,14 @@ static void chol2x2_fx(
        }
        ELSE
        {
#ifdef FIX1072_SPEEDUP_chol2x2_fx
#if defined( FIX1072_SPEEDUP_chol2x2_fx ) && defined( FIX1072_SPEEDUP_chol2x2_fx_0 )
            {
                /*2,3 Mhz*/
                // outRe[1][0] = BASOP_Util_Divide3232_Scale_cadence( c_re, outRe[0][0], &exp );
                // q_re2 = add( sub( 31, exp ), sub( q_c, q_re1 ) );
                Word32 tmp32;
                Word16 exp_c = sub( 31, q_c );
            exp = sub( 31, q_re1 );
                Word16 exp = sub( 31, q_re1 );
                tmp32 = ISqrt32( outRe[0][0], &exp );
                move32();
                tmp32 = Mpy_32_32( tmp32, tmp32 );
@@ -4448,6 +4462,23 @@ static void chol2x2_fx(
                move32();
                exp = add( exp, exp_c );
                q_im = sub( 31, exp );
            }
#elif defined( FIX1072_SPEEDUP_chol2x2_fx ) && defined( FIX1072_SPEEDUP_chol2x2_fx_0_light )
            {
                Word32 tmp32 = BASOP_Util_Divide3232_Scale_cadence( 0x7fffffff, outRe[0][0], &exp );
                Word16 exp_re = sub( 31, q_re1 );
                Word16 exp_c = sub( 31, q_c );
                exp = sub( exp, exp_re );
                
                outRe[1][0] = Mpy_32_32( tmp32, c_re );
                q_re2 = sub( 31, add( exp, exp_c ) );
                move32();

                outIm[1][0] = Mpy_32_32( tmp32, c_im );
                q_im = q_re2;
                move32();
            }

#else
            outRe[1][0] = BASOP_Util_Divide3232_Scale_cadence( c_re, outRe[0][0], &exp );
            move32();
@@ -4480,17 +4511,19 @@ static void chol2x2_fx(
        }
        ELSE
        {
#ifdef FIX1072_SPEEDUP_chol2x2_fx
#if defined( FIX1072_SPEEDUP_chol2x2_fx ) && defined( FIX1072_SPEEDUP_chol2x2_fx_1 )
            {
                /*2Mhz*/
                // temp = BASOP_Util_Divide3232_Scale_cadence( temp, e1, &exp );
                // q_tmp = add( sub( 31, exp ), sub( q_tmp, q_e ) );
            exp = sub( 31, q_e );
                Word16 exp = sub( 31, q_e );
                Word32 tmp32 = ISqrt32( e1, &exp );
                tmp32 = Mpy_32_32( tmp32, tmp32 );
                exp = imult1616( 2, exp );
                temp = Mpy_32_32( temp, tmp32 );
                exp = add( exp, sub( 31, q_tmp ) );
                q_tmp = sub( 31, exp );
            }

#else
            temp = BASOP_Util_Divide3232_Scale_cadence( temp, e1, &exp );
@@ -4603,14 +4636,14 @@ static void chol2x2_fx(
        temp = Madd_32_32( Mpy_32_32( c_re, c_re ), c_im, c_im );
        q_tmp = sub( add( q_c, q_c ), 31 );

#ifdef FIX1072_SPEEDUP_chol2x2_fx
#if defined( FIX1072_SPEEDUP_chol2x2_fx ) && defined( FIX1072_SPEEDUP_chol2x2_fx_2  )
        // 4611686 = 1e-12 in Q62
        IF( e2 == 0 )
        {
            // temp = BASOP_Util_Divide3232_Scale_cadence( temp, 4611686, &exp );
            // q_tmp = add( sub( 31, exp ), sub( q_tmp, 62 ) );
            Word32 tmp32 = 1953125005; /* 1/4611686 Q62 */
            exp = 9;
            Word16 exp = 9;
            temp = Mpy_32_32( temp, tmp32 );
            exp = add( exp, sub( 31, q_tmp ) );
            q_tmp = sub( 31, exp );
@@ -4619,7 +4652,7 @@ static void chol2x2_fx(
        {
            // temp = BASOP_Util_Divide3232_Scale_cadence( temp, e2, &exp );
            // q_tmp = add( sub( 31, exp ), sub( q_tmp, q_e ) );
            exp = sub( 31, q_e );
            Word16 exp = sub( 31, q_e );
            Word32 tmp32 = ISqrt32( e2, &exp );
            tmp32 = Mpy_32_32( tmp32, tmp32 );
            exp = imult1616( 2, exp );
@@ -4627,6 +4660,24 @@ static void chol2x2_fx(
            exp = add( exp, sub( 31, q_tmp ) );
            q_tmp = sub( 31, exp );
        }
#elif defined( FIX1072_SPEEDUP_chol2x2_fx ) && defined( FIX1072_SPEEDUP_chol2x2_fx_2_light )
        // 4611686 = 1e-12 in Q62
        Word32 tmp32;
        //Word16 exp;
        {
            tmp32 = 1953125005; /* 1/4611686 Q62 */
            exp = 9;
            move32();
            move32();
        }
        IF( e2 != 0 )
        {
            tmp32 = BASOP_Util_Divide3232_Scale_cadence( 0x7fffffff, e2, &exp );
            exp = add(sub( 0, sub(31, q_e)), exp);
        }
        temp = Mpy_32_32( temp, tmp32 );
        exp = add( exp, sub( 31, q_tmp ) );
        q_tmp = sub( 31, exp );
#else
        // 4611686 = 1e-12 in Q62
        IF( e2 == 0 )