Commit 0d8f56c9 authored by ber's avatar ber
Browse files

activate chol2x2 macro to test pipeline

parent 1d70ad19
Loading
Loading
Loading
Loading
Loading
+209 −15
Original line number Diff line number Diff line
@@ -50,16 +50,20 @@
//#define FIX1072_SPEEDUP_formulate2x2MixingMatrix_fx_simple
//#define FIX1072_SPEEDUP_formulate2x2MixingMatrix_fx_reduceDivs // orange tests

//#define FIX1072_SPEEDUP_chol2x2_fx

#define FIX_1072_SPEEDUP_matrixMul_fx
#define FIX_1072_SPEEDUP_matrixTransp1Mul_fx
#define FIX_1072_SPEEDUP_matrixTransp2Mul_fx_scaleSwitch
#define FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest
#define FIX_1072_SPEEDUP_matrixFunctions_negateTuning

#define FIX1072_SPEEDUP_chol2x2_fx // 6Mhz

// SPEEDUP_matrix 012345: ACCEPT REGRESSIONS                  // NULL: 306.459 Mhz --> difference to SPEEDUP_matrix 012345 : 14Mhz
//#define FIX_1072_SPEEDUP_matrixMul_fx                       //SPEEDUP_matrix_0
//#define FIX_1072_SPEEDUP_matrixTransp1Mul_fx                //SPEEDUP_matrix_1
//#define FIX_1072_SPEEDUP_matrixTransp2Mul_fx_scaleSwitch    //SPEEDUP_matrix_2
//#define FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest     //SPEEDUP_matrix_3
//#define FIX_1072_SPEEDUP_matrixFunctions_negateTuning       //SPEEDUP_matrix_4
#if BINAURAL_CHANNELS==2
//#define FIX_1072_SPEEDUP_matrixTransp1Mul_fx_IN1EQIN2_BINCH2   //SPEEDUP_matrix_5 //293.773 , 292.468
#endif

// issue 1072
//#define FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT
#ifdef FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT
// numbers fa2a72:
//                  default : 266.984
//                  no-opt  : 290.663 --> 23.5 Mhz Gain
@@ -68,8 +72,6 @@
//                  1       : 282.651 --> 8 Mhz gain
//                  0       : 282.704 --> 8 Mhz gain

//#define FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT
#ifdef FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT
#include <stdio.h>

//#define FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT_0  //8Mhz with 1072 issue stream
@@ -3575,6 +3577,7 @@ static inline Word32 matrixMul_func1( Word32 cond1, Word32 cond2, Word32 prod )
        tmp = L_negate( Mpy_32_32( L_abs( Are_fx[chA][0] ), L_abs( Bre_fx[0][chB] ) ) );
    */
}

#endif


@@ -3962,6 +3965,181 @@ static void matrixTransp1Mul_fx(
    return;
}

#ifdef FIX_1072_SPEEDUP_matrixTransp1Mul_fx_IN1EQIN2_BINCH2
static void matrixTransp1Mul_fx_in1isin2(
    Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/
    Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/
    Word16 q_A,
    Word32 outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/
    Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/
    Word16 *q_out )
{
    Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS );
    Word32 tmp1, tmp2;

            /* Create testVariables*/
    Word32 Are_fx_0_0_GE_0 = GE_32( Are_fx[0][0], 0 );
    Word32 Are_fx_0_0_LT_0 = LT_32( Are_fx[0][0], 0 );
    Word32 Are_fx_1_0_GE_0 = GE_32( Are_fx[1][0], 0 );
    Word32 Are_fx_1_0_LT_0 = LT_32( Are_fx[1][0], 0 );
    Word32 Aim_fx_0_0_GE_0 = GE_32( Aim_fx[0][0], 0 );
    Word32 Aim_fx_0_0_LT_0 = LT_32( Aim_fx[0][0], 0 );
    Word32 Aim_fx_1_0_GE_0 = GE_32( Aim_fx[1][0], 0 );
    Word32 Aim_fx_1_0_LT_0 = LT_32( Aim_fx[1][0], 0 );
    Word32 LNeg_Aim_fx_0_0 = L_negate( Aim_fx[0][0] );

    Word32 Are_fx_0_1_GE_0 = GE_32( Are_fx[0][1], 0 );
    Word32 Are_fx_0_1_LT_0 = LT_32( Are_fx[0][1], 0 );
    Word32 Are_fx_1_1_GE_0 = GE_32( Are_fx[1][1], 0 );
    Word32 Are_fx_1_1_LT_0 = LT_32( Are_fx[1][1], 0 );
    Word32 Aim_fx_0_1_GE_0 = GE_32( Aim_fx[0][1], 0 );
    Word32 Aim_fx_0_1_LT_0 = LT_32( Aim_fx[0][1], 0 );
    Word32 Aim_fx_1_1_GE_0 = GE_32( Aim_fx[1][1], 0 );
    Word32 Aim_fx_1_1_LT_0 = LT_32( Aim_fx[1][1], 0 );
    Word32 LNeg_Aim_fx_0_1 = L_negate( Aim_fx[0][1] );


    Word32 tmp3;

    { /*UNROLL FOR BINAURAL CHANNELS==2*/
        /*CHA=0, CHB=0*/
        /* Create testVariables*/
        tmp1 = Mpy_32_32( Are_fx[0][0], Are_fx[0][0] );
        tmp2 = Mpy_32_32( Are_fx[1][0], Are_fx[1][0] );
        tmp3 = L_add( tmp1, tmp2 );

        tmp1 = Mpy_32_32( LNeg_Aim_fx_0_0, Aim_fx[0][0] );
        tmp2 = Mpy_32_32( Aim_fx[1][0], Aim_fx[1][0] );
        outRe_fx[0][0] = L_sub( tmp3, L_sub( tmp1, tmp2 ) );
        move32();

        tmp1 = matrixMul_func1( Aim_fx_0_0_GE_0 || Are_fx_0_0_LT_0, Aim_fx_0_0_LT_0 || Are_fx_0_0_GE_0, Mpy_32_32( LNeg_Aim_fx_0_0, Are_fx[0][0] ) );
        move32();
        move32();
        tmp2 = matrixMul_func1( Aim_fx_1_0_LT_0 || Are_fx_1_0_LT_0, Aim_fx_1_0_GE_0 || Are_fx_1_0_GE_0, Mpy_32_32( Aim_fx[1][0], Are_fx[1][0] ) );
        move32();
        move32();
        tmp3 = L_sub( tmp1, tmp2 );

        tmp1 = matrixMul_func1( Are_fx_0_0_LT_0 || Aim_fx_0_0_LT_0, Are_fx_0_0_GE_0 || Aim_fx_0_0_GE_0, Mpy_32_32( Are_fx[0][0], Aim_fx[0][0] ) );
        move32();
        move32();
        tmp2 = matrixMul_func1( Are_fx_1_0_LT_0 || Aim_fx_1_0_LT_0, Are_fx_1_0_GE_0 || Aim_fx_1_0_GE_0, Mpy_32_32( Are_fx[1][0], Aim_fx[1][0] ) );
        move32();
        move32();
        outIm_fx[0][0] = L_add( tmp3, L_add( tmp1, tmp2 ) );
        move32();

        /*CHA=0, CHB=1*/
        tmp1 = matrixMul_func1( Are_fx_0_0_LT_0 || Are_fx_0_1_LT_0, Are_fx_0_0_GE_0 || Are_fx_0_1_GE_0, Mpy_32_32( Are_fx[0][0], Are_fx[0][1] ) );
        move32();
        move32();
        tmp2 = matrixMul_func1( Are_fx_1_0_LT_0 || Are_fx_1_1_LT_0, Are_fx_1_0_GE_0 || Are_fx_1_1_GE_0, Mpy_32_32( Are_fx[1][0], Are_fx[1][1] ) );
        move32();
        move32();
        tmp3 = L_add( tmp1, tmp2 );

        tmp1 = matrixMul_func1( Aim_fx_0_0_GE_0 || Aim_fx_0_1_LT_0, Aim_fx_0_0_LT_0 || Aim_fx_0_1_GE_0, Mpy_32_32( LNeg_Aim_fx_0_0, Aim_fx[0][1] ) );
        move32();
        move32();
        tmp2 = matrixMul_func1( Aim_fx_1_0_LT_0 || Aim_fx_1_1_LT_0, Aim_fx_1_0_GE_0 || Aim_fx_1_1_GE_0, Mpy_32_32( Aim_fx[1][0], Aim_fx[1][1] ) );
        move32();
        move32();
        outRe_fx[0][1] = L_sub( tmp3, L_sub( tmp1, tmp2 ) );
        move32();

        tmp1 = matrixMul_func1( Aim_fx_0_0_GE_0 || Are_fx_0_1_LT_0, Aim_fx_0_0_LT_0 || Are_fx_0_1_GE_0, Mpy_32_32( LNeg_Aim_fx_0_0, Are_fx[0][1] ) );
        move32();
        move32();
        tmp2 = matrixMul_func1( Aim_fx_1_0_LT_0 || Are_fx_1_1_LT_0, Aim_fx_1_0_GE_0 || Are_fx_1_1_GE_0, Mpy_32_32( Aim_fx[1][0], Are_fx[1][1] ) );
        move32();
        move32();
        tmp3 = L_sub( tmp1, tmp2 );

        tmp1 = matrixMul_func1( Are_fx_0_0_LT_0 || Aim_fx_0_1_LT_0, Are_fx_0_0_GE_0 || Aim_fx_0_1_GE_0, Mpy_32_32( Are_fx[0][0], Aim_fx[0][1] ) );
        move32();
        move32();
        tmp2 = matrixMul_func1( Are_fx_1_0_LT_0 || Aim_fx_1_1_LT_0, Are_fx_1_0_GE_0 || Aim_fx_1_1_GE_0, Mpy_32_32( Are_fx[1][0], Aim_fx[1][1] ) );
        move32();
        move32();
        outIm_fx[0][1] = L_add( tmp3, L_add( tmp1, tmp2 ) );
        move32();

        /*CHA=1, CHB=0*/
        tmp1 = matrixMul_func1( Are_fx_0_1_LT_0 || Are_fx_0_0_LT_0, Are_fx_0_1_GE_0 || Are_fx_0_0_GE_0, Mpy_32_32( Are_fx[0][1], Are_fx[0][0] ) );
        move32();
        move32();
        tmp2 = matrixMul_func1( Are_fx_1_1_LT_0 || Are_fx_1_0_LT_0, Are_fx_1_1_GE_0 || Are_fx_1_0_GE_0, Mpy_32_32( Are_fx[1][1], Are_fx[1][0] ) );
        move32();
        move32();
        tmp3 = L_add( tmp1, tmp2 );

        tmp1 = matrixMul_func1( Aim_fx_0_1_GE_0 || Aim_fx_0_0_LT_0, Aim_fx_0_1_LT_0 || Aim_fx_0_0_GE_0, Mpy_32_32( LNeg_Aim_fx_0_1, Aim_fx[0][0] ) );
        move32();
        move32();
        tmp2 = matrixMul_func1( Aim_fx_1_1_LT_0 || Aim_fx_1_0_LT_0, Aim_fx_1_1_GE_0 || Aim_fx_1_0_GE_0, Mpy_32_32( Aim_fx[1][1], Aim_fx[1][0] ) );
        move32();
        move32();
        outRe_fx[1][0] = L_sub( tmp3, L_sub( tmp1, tmp2 ) );
        move32();

        tmp1 = matrixMul_func1( Aim_fx_0_1_GE_0 || Are_fx_0_0_LT_0, Aim_fx_0_1_LT_0 || Are_fx_0_0_GE_0, Mpy_32_32( LNeg_Aim_fx_0_1, Are_fx[0][0] ) );
        move32();
        move32();
        tmp2 = matrixMul_func1( Aim_fx_1_1_LT_0 || Are_fx_1_0_LT_0, Aim_fx_1_1_GE_0 || Are_fx_1_0_GE_0, Mpy_32_32( Aim_fx[1][1], Are_fx[1][0] ) );
        move32();
        move32();
        tmp3 = L_sub( tmp1, tmp2 );

        tmp1 = matrixMul_func1( Are_fx_0_1_LT_0 || Aim_fx_0_0_LT_0, Are_fx_0_1_GE_0 || Aim_fx_0_0_GE_0, Mpy_32_32( Are_fx[0][1], Aim_fx[0][0] ) );
        move32();
        move32();
        tmp2 = matrixMul_func1( Are_fx_1_1_LT_0 || Aim_fx_1_0_LT_0, Are_fx_1_1_GE_0 || Aim_fx_1_0_GE_0, Mpy_32_32( Are_fx[1][1], Aim_fx[1][0] ) );
        move32();
        move32();
        outIm_fx[1][0] = L_add( tmp3, L_add( tmp1, tmp2 ) );
        move32();

        /*CHA=1, CHB=1*/
        /* Create testVariables*/
        tmp1 = Mpy_32_32( Are_fx[0][1], Are_fx[0][1] );
        tmp2 = Mpy_32_32( Are_fx[1][1], Are_fx[1][1] );
        tmp3 = L_add( tmp1, tmp2 );

        tmp1 = Mpy_32_32( LNeg_Aim_fx_0_1, Aim_fx[0][1] );
        tmp2 = Mpy_32_32( Aim_fx[1][1], Aim_fx[1][1] );
        outRe_fx[1][1] = L_sub( tmp3, L_sub( tmp1, tmp2 ) );
        move32();

        tmp1 = matrixMul_func1( Aim_fx_0_1_GE_0 || Are_fx_0_1_LT_0, Aim_fx_0_1_LT_0 || Are_fx_0_1_GE_0, Mpy_32_32( LNeg_Aim_fx_0_1, Are_fx[0][1] ) );
        move32();
        move32();
        tmp2 = matrixMul_func1( Aim_fx_1_1_LT_0 || Are_fx_1_1_LT_0, Aim_fx_1_1_GE_0 || Are_fx_1_1_GE_0, Mpy_32_32( Aim_fx[1][1], Are_fx[1][1] ) );
        move32();
        move32();
        tmp3 = L_sub( tmp1, tmp2 );

        tmp1 = matrixMul_func1( Are_fx_0_1_LT_0 || Aim_fx_0_1_LT_0, Are_fx_0_1_GE_0 || Aim_fx_0_1_GE_0, Mpy_32_32( Are_fx[0][1], Aim_fx[0][1] ) );
        move32();
        move32();
        tmp2 = matrixMul_func1( Are_fx_1_1_LT_0 || Aim_fx_1_1_LT_0, Are_fx_1_1_GE_0 || Aim_fx_1_1_GE_0, Mpy_32_32( Are_fx[1][1], Aim_fx[1][1] ) );
        move32();
        move32();
        outIm_fx[1][1] = L_add( tmp3, L_add( tmp1, tmp2 ) );
        move32();
    }
    *q_out = sub( add( q_A, q_A ), 31 );

    move16();
    if ( L_and( is_zero_arr( outRe_fx[0], size ), is_zero_arr( outIm_fx[0], size ) ) )
    {
        *q_out = Q31;
        move16();
    }
    return;
}
#endif /*FIX_1072_SPEEDUP_matrixTransp1Mul_fx_IN1EQIN2_BINCH2*/

static void matrixTransp2Mul_fx(
    Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/
    Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/
@@ -4664,6 +4842,7 @@ static void formulate2x2MixingMatrix_fx(
    pop_wmops(); //( "IDDB_2x2Matrix_Part1" );
    push_wmops( "IDDB_2x2Matrix_Part2" );

    push_wmops( "IDDB_2x2Matrix_Part2.1" );
    /* Cholesky decomposition of target / output covariance matrix */
    chol2x2_fx( E_out1, E_out2, q_eout, Cout_re, Cout_im, q_cout, KyRe_fx, KyIm_fx, &q_ky );

@@ -4695,6 +4874,8 @@ static void formulate2x2MixingMatrix_fx(
    temp = Mpy_32_32( E_in2, 2147484 ); // 2147484 = 0.001f in Q31
    temp = L_max( temp, E_in1 );

    pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.1" );
    push_wmops( "IDDB_2x2Matrix_Part2.2" );
#if defined( FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT ) && defined( FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT_2 )
    /*IF (E_out1 == 0)*/
    {
@@ -4818,6 +4999,8 @@ static void formulate2x2MixingMatrix_fx(
    move32();
    Ghat_fx[1] = L_shr( Ghat_fx[1], sub( sub( 31, exp1 ), q_Ghat ) ); // q_Ghat
    move32();
    pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.2" );
    push_wmops( "IDDB_2x2Matrix_Part2.3" );

    /* Matrix multiplication, tmp = Ky' * G_hat * Q */
    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
@@ -4848,12 +5031,19 @@ static void formulate2x2MixingMatrix_fx(

    q_temp = sub( add( q_ky, q_GhatQ ), 31 );

    pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.3" );
    push_wmops( "IDDB_2x2Matrix_Part2.4" );

    /* A = Ky' * G_hat * Q * Kx (see publication) */
    matrixMul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Kxre_fx, Kxim_fx, &q_Kx, Are_fx, Aim_fx, &q_A );

    /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx
       For matrix A that is P = A(A'A)^0.5 */
#ifdef FIX_1072_SPEEDUP_matrixTransp1Mul_fx_IN1EQIN2_BINCH2
    matrixTransp1Mul_fx_in1isin2( Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );
#else
    matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );
#endif

    eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );

@@ -4914,7 +5104,8 @@ static void formulate2x2MixingMatrix_fx(
    div_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
    move32();
#endif /*FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT*/

    pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.4" );
    push_wmops( "IDDB_2x2Matrix_Part2.5" );
    q_div = sub( 31, s_max( exp, exp1 ) );

    div_fx[0] = L_shr( div_fx[0], sub( sub( 31, exp ), q_div ) ); // q_div
@@ -4946,6 +5137,8 @@ static void formulate2x2MixingMatrix_fx(
    scale_sig32( div_fx, BINAURAL_CHANNELS, exp );
    q_div = add( q_div, exp );

    pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.5" );
    push_wmops( "IDDB_2x2Matrix_Part2.6" );
    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
    {
        FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
@@ -5015,7 +5208,8 @@ static void formulate2x2MixingMatrix_fx(
            }
        }
    }

    pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.6" );
    push_wmops( "IDDB_2x2Matrix_Part2.7" );
    minimum_s( hdrm_re[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ), &exp );
    q_temp = exp;
    move16();
@@ -5039,7 +5233,7 @@ static void formulate2x2MixingMatrix_fx(
                         0 /*int Bscale*/,
#endif
                         Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */

    pop_wmops(); //push_wmops( "IDDB_2x2Matrix_Part2.7" );
    pop_wmops(); //( "IDDB_2x2Matrix_Part2" );
    push_wmops( "IDDB_2x2Matrix_Part3" );