Commit 1d70ad19 authored by ber's avatar ber
Browse files

activate matrixFunctionsSpeedups only

parent e066ae25
Loading
Loading
Loading
Loading
Loading
+50 −12
Original line number Diff line number Diff line
@@ -48,14 +48,16 @@
#include "wmc_auto.h"

//#define FIX1072_SPEEDUP_formulate2x2MixingMatrix_fx_simple
#define FIX1072_SPEEDUP_formulate2x2MixingMatrix_fx_reduceDivs // orange tests - GOOD
//#define FIX1072_SPEEDUP_formulate2x2MixingMatrix_fx_reduceDivs // orange tests

//#define FIX1072_SPEEDUP_chol2x2_fx

#define FIX_1072_SPEEDUP_matrixMul_fx
//#define FIX_1072_SPEEDUP_matrixTransp1Mul_fx
//#define FIX_1072_SPEEDUP_matrixTransp2Mul_fx_scaleSwitch
//#define FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest
#define FIX_1072_SPEEDUP_matrixTransp1Mul_fx
#define FIX_1072_SPEEDUP_matrixTransp2Mul_fx_scaleSwitch
#define FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest
#define FIX_1072_SPEEDUP_matrixFunctions_negateTuning


// issue 1072
// numbers fa2a72: 
@@ -3820,9 +3822,15 @@ static void matrixTransp1Mul_fx(
            Word32 Bre_fx_1_chB_GE_0 = GE_32( Bre_fx[1][chB], 0 );
            Word32 Are_fx_1_chA_LT_0 = LT_32( Are_fx[1][chA], 0 );
            Word32 Bre_fx_1_chB_LT_0 = LT_32( Bre_fx[1][chB], 0 );
#ifdef FIX_1072_SPEEDUP_matrixFunctions_negateTuning
            Word32 Aim_fx_0_chA_LT_0 = LT_32( Aim_fx[0][chA], 0 );
            Word32 Aim_fx_0_chA_GE_0 = GE_32( Aim_fx[0][chA], 0 );
            Word32 LNeg_Aim_fx_0_chA = L_negate( Aim_fx[0][chA] );
#else
            Word32 LNeg_Aim_fx_0_chA_GE_0 = GE_32( L_negate( Aim_fx[0][chA] ), 0 );
            Word32 Bim_fx_0_chB_GE_0 = GE_32( Bim_fx[0][chB], 0 );
            Word32 LNeg_Aim_fx_0_chA_LT_0 = LT_32( L_negate( Aim_fx[0][chA] ), 0 );
#endif
            Word32 Bim_fx_0_chB_GE_0 = GE_32( Bim_fx[0][chB], 0 );
            Word32 Bim_fx_0_chB_LT_0 = LT_32( Bim_fx[0][chB], 0 );
            Word32 Aim_fx_1_chA_GE_0 = GE_32( Aim_fx[1][chA], 0 );
            Word32 Bim_fx_1_chB_GE_0 = GE_32( Bim_fx[1][chB], 0 );
@@ -3839,8 +3847,11 @@ static void matrixTransp1Mul_fx(
            move32();
            tmp3 = L_add( tmp1, tmp2 );


#ifdef FIX_1072_SPEEDUP_matrixFunctions_negateTuning
            tmp1 = matrixMul_func1( Aim_fx_0_chA_GE_0 || Bim_fx_0_chB_LT_0, Aim_fx_0_chA_LT_0 || Bim_fx_0_chB_GE_0, Mpy_32_32( LNeg_Aim_fx_0_chA, Bim_fx[0][chB] ) );
#else
            tmp1 = matrixMul_func1( LNeg_Aim_fx_0_chA_LT_0 || Bim_fx_0_chB_LT_0, LNeg_Aim_fx_0_chA_GE_0 || Bim_fx_0_chB_GE_0, Mpy_32_32( L_negate( Aim_fx[0][chA] ), Bim_fx[0][chB] ) );
#endif
            move32();
            move32();
            tmp2 = matrixMul_func1( Aim_fx_1_chA_LT_0 || Bim_fx_1_chB_LT_0, Aim_fx_1_chA_GE_0 || Bim_fx_1_chB_GE_0, Mpy_32_32( Aim_fx[1][chA], Bim_fx[1][chB] ) );
@@ -3849,7 +3860,11 @@ static void matrixTransp1Mul_fx(
            outRe_fx[chA][chB] = L_sub( tmp3, L_sub( tmp1, tmp2 ) );
            move32();

#ifdef FIX_1072_SPEEDUP_matrixFunctions_negateTuning
            tmp1 = matrixMul_func1( Aim_fx_0_chA_GE_0 || Bre_fx_0_chB_LT_0, Aim_fx_0_chA_LT_0 || Bre_fx_0_chB_GE_0, Mpy_32_32( LNeg_Aim_fx_0_chA, Bre_fx[0][chB] ) );
#else
            tmp1 = matrixMul_func1( LNeg_Aim_fx_0_chA_LT_0 || Bre_fx_0_chB_LT_0, LNeg_Aim_fx_0_chA_GE_0 || Bre_fx_0_chB_GE_0, Mpy_32_32( L_negate( Aim_fx[0][chA] ), Bre_fx[0][chB] ) );
#endif
            move32();
            move32();
            tmp2 = matrixMul_func1( Aim_fx_1_chA_LT_0 || Bre_fx_1_chB_LT_0, Aim_fx_1_chA_GE_0 || Bre_fx_1_chB_GE_0, Mpy_32_32( Aim_fx[1][chA], Bre_fx[1][chB] ) );
@@ -4016,14 +4031,23 @@ static void matrixTransp2Mul_fx(
            Word32 Are_fx_chA_1_LT_0 = LT_32( Are_fx[chA][1], 0 );
            Word32 Bre_fx_chB_1_LT_0 = LT_32( Bre_fx[chB][1], 0 );
            Word32 Aim_fx_chA_0_GE_0 = GE_32( Aim_fx[chA][0], 0 );
            Word32 Lneg_Bim_fx_chB_0_GE_0 = GE_32( L_negate( Bim_fx[chB][0] ), 0 );
            Word32 Aim_fx_chA_0_LT_0 = LT_32( Aim_fx[chA][0], 0 );
            Word32 Lneg_Bim_fx_chB_0_LT_0 = LT_32( L_negate( Bim_fx[chB][0] ), 0 );
            Word32 Aim_fx_chA_1_GE_0 = GE_32( Aim_fx[chA][1], 0 );
            Word32 Lneg_Bim_fx_chB_1_GE_0 = GE_32( L_negate( Bim_fx[chB][1] ), 0 );
            Word32 Aim_fx_chA_1_LT_0 = LT_32( Aim_fx[chA][1], 0 );
            Word32 Lneg_Bim_fx_chB_1_LT_0 = LT_32( L_negate( Bim_fx[chB][1] ), 0 );
            Word32 tmp3;
#ifdef FIX_1072_SPEEDUP_matrixFunctions_negateTuning
            Word32 Bim_fx_chB_0_LT_0 = LT_32( Bim_fx[chB][0], 0 );
            Word32 Bim_fx_chB_0_GE_0 = GE_32( Bim_fx[chB][0], 0 );
            Word32 Bim_fx_chB_1_LT_0 = LT_32( Bim_fx[chB][1], 0 );
            Word32 Bim_fx_chB_1_GE_0 = GE_32( Bim_fx[chB][1], 0 );
            Word32 LNeg_Bim_fx_chB_0 = L_negate( Bim_fx[chB][0] );
            Word32 LNeg_Bim_fx_chB_1 = L_negate( Bim_fx[chB][1] );
#else
            Word32 Lneg_Bim_fx_chB_0_GE_0 = GE_32( L_negate( Bim_fx[chB][0] ), 0 );
            Word32 Lneg_Bim_fx_chB_0_LT_0 = LT_32( L_negate( Bim_fx[chB][0] ), 0 );
            Word32 Lneg_Bim_fx_chB_1_GE_0 = GE_32( L_negate( Bim_fx[chB][1] ), 0 );
            Word32 Lneg_Bim_fx_chB_1_LT_0 = LT_32( L_negate( Bim_fx[chB][1] ), 0 );
#endif

            tmp1 = matrixMul_func1( Are_fx_chA_0_LT_0 || Bre_fx_chB_0_LT_0, Are_fx_chA_0_GE_0 || Bre_fx_chB_0_GE_0, Mpy_32_32( Are_fx[chA][0], Bre_fx[chB][0] ) );
            move32();
@@ -4032,11 +4056,18 @@ static void matrixTransp2Mul_fx(
            move32();
            move32();
            tmp3 = L_add( tmp1, tmp2 );

#ifdef FIX_1072_SPEEDUP_matrixFunctions_negateTuning
            tmp1 = matrixMul_func1( Aim_fx_chA_0_LT_0 || Bim_fx_chB_0_GE_0, Aim_fx_chA_0_GE_0 || Bim_fx_chB_0_LT_0, Mpy_32_32( Aim_fx[chA][0], LNeg_Bim_fx_chB_0 ) );
#else
            tmp1 = matrixMul_func1( Aim_fx_chA_0_LT_0 || Lneg_Bim_fx_chB_0_LT_0, Aim_fx_chA_0_GE_0 || Lneg_Bim_fx_chB_0_GE_0, Mpy_32_32( Aim_fx[chA][0], L_negate( Bim_fx[chB][0] ) ) );
#endif
            move32();
            move32();
#ifdef FIX_1072_SPEEDUP_matrixFunctions_negateTuning
            tmp2 = matrixMul_func1( Aim_fx_chA_1_LT_0 || Bim_fx_chB_1_GE_0, Aim_fx_chA_1_GE_0 || Bim_fx_chB_1_LT_0, Mpy_32_32( Aim_fx[chA][1], LNeg_Bim_fx_chB_1 ) );
#else
            tmp2 = matrixMul_func1( Aim_fx_chA_1_LT_0 || Lneg_Bim_fx_chB_1_LT_0, Aim_fx_chA_1_GE_0 || Lneg_Bim_fx_chB_1_GE_0, Mpy_32_32( Aim_fx[chA][1], L_negate( Bim_fx[chB][1] ) ) );
#endif
            move32();
            move32();
            outRe_fx[chA][chB] = L_sub( tmp3, L_add( tmp1, tmp2 ) );
@@ -4051,11 +4082,18 @@ static void matrixTransp2Mul_fx(
            tmp3 = L_add( tmp1, tmp2 );
            move32();
            move32();

#ifdef FIX_1072_SPEEDUP_matrixFunctions_negateTuning
            tmp1 = matrixMul_func1( Are_fx_chA_0_LT_0 || Bim_fx_chB_0_GE_0, Are_fx_chA_0_GE_0 || Bim_fx_chB_0_LT_0, Mpy_32_32( Are_fx[chA][0], LNeg_Bim_fx_chB_0 ) );
#else
            tmp1 = matrixMul_func1( Are_fx_chA_0_LT_0 || Lneg_Bim_fx_chB_0_LT_0, Are_fx_chA_0_GE_0 || Lneg_Bim_fx_chB_0_GE_0, Mpy_32_32( Are_fx[chA][0], L_negate( Bim_fx[chB][0] ) ) );
#endif
            move32();
            move32();
#ifdef FIX_1072_SPEEDUP_matrixFunctions_negateTuning
            tmp2 = matrixMul_func1( Are_fx_chA_1_LT_0 || Bim_fx_chB_1_GE_0, Are_fx_chA_1_GE_0 || Bim_fx_chB_1_LT_0, Mpy_32_32( Are_fx[chA][1], LNeg_Bim_fx_chB_1 ) );
#else
            tmp2 = matrixMul_func1( Are_fx_chA_1_LT_0 || Lneg_Bim_fx_chB_1_LT_0, Are_fx_chA_1_GE_0 || Lneg_Bim_fx_chB_1_GE_0, Mpy_32_32( Are_fx[chA][1], L_negate( Bim_fx[chB][1] ) ) );
#endif
            move32();
            move32();
            outIm_fx[chA][chB] = L_add( tmp3, L_add( tmp1, tmp2 ) );