Commit bfde9510 authored by ber's avatar ber
Browse files

Some more fixes: FIX_1072_SPEEDUP_matrixTransp2Mul_fx_scaleSwitch,...

Some more fixes: FIX_1072_SPEEDUP_matrixTransp2Mul_fx_scaleSwitch, FIX_1072_SPEEDUP_matrixMul_fx, FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest, FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest_reduceLabs - all inactive, need to be tested more
parent 11307a29
Loading
Loading
Loading
Loading
Loading
+518 −22
Original line number Diff line number Diff line
@@ -47,7 +47,11 @@

#include "wmc_auto.h"

#define FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT
//#define FIX_1072_SPEEDUP_matrixTransp2Mul_fx_scaleSwitch
//#define FIX_1072_SPEEDUP_matrixMul_fx
//#define FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest
//#define FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest_reduceLabs
//#define FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT

#ifndef FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT
//#define FIX_1072_SET_DIV3232_RETURN_VAL /*only meaningful if FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT is not defined*/
@@ -134,7 +138,20 @@ static void formulate2x2MixingMatrix_fx( Word32 Ein1_fx /*q_Ein*/, Word32 Ein2_f

static void matrixMul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out );

static void matrixTransp2Mul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out );
static void matrixTransp2Mul_fx( 
  Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, 
  Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, 
  Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, 
  Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, 
  Word16 *q_B, 
#ifdef FIX_1072_SPEEDUP_matrixTransp2Mul_fx_scaleSwitch
    int Ascale,
    int Bscale,
#endif
  Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, 
  Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, 
  Word16 *q_out 
);

/*-------------------------------------------------------------------------
 * ivas_dirac_dec_init_binaural_data()
@@ -1961,7 +1978,13 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(

        /* Make matrix multiplication M*Cx*M' to determine resulting covariance matrix of processing input with M */
        matrixMul_fx( Mre_fx, Mim_fx, &q_M, CxRe_fx, CxIm_fx, &q_Cx, tmpMtxRe_fx, tmpMtxIm_fx, &q_tmp );
        matrixTransp2Mul_fx( tmpMtxRe_fx, tmpMtxIm_fx, &q_tmp, Mre_fx, Mim_fx, &q_M, resultMtxRe_fx, resultMtxIm_fx, &q_res );
        matrixTransp2Mul_fx( 
          tmpMtxRe_fx, tmpMtxIm_fx, &q_tmp, Mre_fx, Mim_fx, &q_M, 
#ifdef FIX_1072_SPEEDUP_matrixTransp2Mul_fx_scaleSwitch
            1 /*int Ascale*/,
            0 /*int Bscale*/,
#endif
            resultMtxRe_fx, resultMtxIm_fx, &q_res );

        /* When below the frequency limit where decorrelation is applied, we inject the decorrelated
         * residual (or missing) signal component. The procedure is active when there are not enough independent
@@ -2071,7 +2094,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
                                         Q31, CrEneL_fx, CrEneR_fx, q_CrEne,
                                         CrCrossRe_fx, CrCrossIm_fx, q_CrCross,
                                         prototypeMtx_fx, MdecRe_fx, MdecIm_fx, &q_Mdec, 3277 ); // 3277 = 0.2 in Q14
            pop_wmops();
            pop_wmops(); /*IDDB_detProcMat_bigLoop1_bigBranch1*/
        }
        ELSE
        {
@@ -2298,10 +2321,10 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
                    move16();
                }
            }
            pop_wmops();
            pop_wmops(); /*IDDB_detProcMat_bigLoop1_bigBranch2*/
        }
    }
    pop_wmops();
    pop_wmops(); /*IDDB_detProcMat_bigLoop1*/

    /* Aligning Q-factors of all bins in the processing matrices to a common Q-factor */
    minimum_s( q_processMtx, nBins, &hDiracDecBin->q_processMtx );
@@ -2322,7 +2345,6 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
    minimum_s( q_processMtxDec, nBins, &hDiracDecBin->q_processMtxDec );
    minimum_s( q_processMtxDecPrev, nBins, &hDiracDecBin->q_processMtxDecPrev );

    push_wmops( "IDDB_detProcMat_bigLoop2" );
    FOR( bin = 0; bin < nBins; bin++ )
    {
        FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
@@ -2362,7 +2384,6 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
            }
        }
    }
    pop_wmops();

    return;
}
@@ -3522,6 +3543,243 @@ static void matrixMul_fx(
            move32();
            outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], W_extract_h( W_mac_32_32( W_mult_32_32( Are_fx[chA][0], Bim_fx[0][chB] ), Are_fx[chA][1], Bim_fx[1][chB] ) ) );
            move32();
#else
#ifdef FIX_1072_SPEEDUP_matrixMul_fx
            /* Create testVariables*/
            Word32 Are_fx_chA_0_GE_0 = GE_32( Are_fx[chA][0], 0 );
            Word32 Bre_fx_0_chB_GE_0 = GE_32( Bre_fx[0][chB], 0 );
            Word32 Are_fx_chA_0_LT_0 = LT_32( Are_fx[chA][0], 0 );
            Word32 Bre_fx_0_chB_LT_0 = LT_32( Bre_fx[0][chB], 0 );
            Word32 Are_fx_chA_1_GE_0 = GE_32( Are_fx[chA][1], 0 );
            Word32 Bre_fx_1_chB_GE_0 = GE_32( Bre_fx[1][chB], 0 );
            Word32 Are_fx_chA_1_LT_0 = LT_32( Are_fx[chA][1], 0 );
            Word32 Bre_fx_1_chB_LT_0 = LT_32( Bre_fx[1][chB], 0 );
            Word32 Aim_fx_chA_0_GE_0 = GE_32( Aim_fx[chA][0], 0 );
            Word32 Bim_fx_0_chB_GE_0 = GE_32( Bim_fx[0][chB], 0 );
            Word32 Aim_fx_chA_0_LT_0 = LT_32( Aim_fx[chA][0], 0 );
            Word32 Bim_fx_0_chB_LT_0 = LT_32( Bim_fx[0][chB], 0 );
            Word32 Aim_fx_chA_1_GE_0 = GE_32( Aim_fx[chA][1], 0 );
            Word32 Bim_fx_1_chB_GE_0 = GE_32( Bim_fx[1][chB], 0 );
            Word32 Aim_fx_chA_1_LT_0 = LT_32( Aim_fx[chA][1], 0 );
            Word32 Bim_fx_1_chB_LT_0 = LT_32( Bim_fx[1][chB], 0 );
            Word32 cond1, cond2, cond3;

#if 1
            tmp1 = Mpy_32_32( Are_fx[chA][0], Bre_fx[0][chB]) ;
            cond2 = Are_fx_chA_0_GE_0 || Bre_fx_0_chB_GE_0;
            move32();
            cond1 = Are_fx_chA_0_LT_0 || Bre_fx_0_chB_LT_0;
            move32();
            cond3 = cond1 && cond2;
            move32();

            if ( cond3 )
                tmp1 = ( L_abs( tmp1 ) );

            if ( cond3 )
                tmp1 = L_negate( tmp1 );
#else
            IF( ( Are_fx_chA_0_GE_0 && Bre_fx_0_chB_GE_0 ) || ( Are_fx_chA_0_LT_0 && Bre_fx_0_chB_LT_0 ) )
            {
                tmp1 = Mpy_32_32( Are_fx[chA][0], Bre_fx[0][chB] );
            }
            ELSE
            {
                tmp1 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][0] ), L_abs( Bre_fx[0][chB] ) ) );
            }
#endif

#if 1

            tmp2 = Mpy_32_32( Are_fx[chA][1], Bre_fx[1][chB] );
            cond2 = Are_fx_chA_1_GE_0 || Bre_fx_1_chB_GE_0;
            move32();
            cond1 = Are_fx_chA_1_LT_0 || Bre_fx_1_chB_LT_0;
            move32();
            cond3 = cond1 && cond2;
            move32();

            if ( cond3 )
                tmp2 = ( L_abs( tmp2 ) );

            if ( cond3 )
                tmp2 = L_negate( tmp2 );
#else
            IF( ( Are_fx_chA_1_GE_0 && Bre_fx_1_chB_GE_0 ) || ( Are_fx_chA_1_LT_0 && Bre_fx_1_chB_LT_0 ) )
            {
                tmp2 = Mpy_32_32( Are_fx[chA][1], Bre_fx[1][chB] );
            }
            ELSE
            {
                tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][1] ), L_abs( Bre_fx[1][chB] ) ) );
            }
#endif
            outRe_fx[chA][chB] = L_add( tmp1, tmp2 );
            move32();

#if 1
            tmp1 = Mpy_32_32( Aim_fx[chA][0], Bim_fx[0][chB] );
            cond2 = Aim_fx_chA_0_GE_0 || Bim_fx_0_chB_GE_0;
            move32();
            cond1 = Aim_fx_chA_0_LT_0 || Bim_fx_0_chB_LT_0;
            move32();
            cond3 = cond1 && cond2;
            move32();

            if ( cond3 )
                tmp1 = ( L_abs( tmp1 ) );

            if ( cond3 )
                tmp1 = L_negate( tmp1 );
#else
            IF( ( Aim_fx_chA_0_GE_0 && Bim_fx_0_chB_GE_0 ) || ( Aim_fx_chA_0_LT_0 && Bim_fx_0_chB_LT_0 ) )
            {
                tmp1 = Mpy_32_32( Aim_fx[chA][0], Bim_fx[0][chB] );
            }
            ELSE
            {
                tmp1 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][0] ), L_abs( Bim_fx[0][chB] ) ) );
            }
#endif

#if 1

            tmp2 = Mpy_32_32( Aim_fx[chA][1], Bim_fx[1][chB] );
            cond2 = Aim_fx_chA_1_GE_0 || Bim_fx_1_chB_GE_0;
            move32();
            cond1 = Aim_fx_chA_1_LT_0 || Bim_fx_1_chB_LT_0;
            move32();
            cond3 = cond1 && cond2;
            move32();

            if ( cond3 )
                tmp2 = ( L_abs( tmp2 ) );

            if ( cond3 )
                tmp2 = L_negate( tmp2 );
#else
            IF( ( Aim_fx_chA_1_GE_0 && Bim_fx_1_chB_GE_0 ) || ( Aim_fx_chA_1_LT_0 && Bim_fx_1_chB_LT_0 ) )
            {
                tmp2 = Mpy_32_32( Aim_fx[chA][1], Bim_fx[1][chB] );
            }
            ELSE
            {
                tmp2 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][1] ), L_abs( Bim_fx[1][chB] ) ) );
            }
#endif

            outRe_fx[chA][chB] = L_sub( outRe_fx[chA][chB], L_add( tmp1, tmp2 ) );
            move32();

#if 1
            tmp1 = Mpy_32_32( Aim_fx[chA][0], Bre_fx[0][chB] );
            cond2 = Aim_fx_chA_0_GE_0 || Bre_fx_0_chB_GE_0;
            move32();
            cond1 = Aim_fx_chA_0_LT_0 || Bre_fx_0_chB_LT_0;
            move32();
            cond3 = cond1 && cond2;
            move32();

            if ( cond3 )
                tmp1 = ( L_abs( tmp1 ) );

            if ( cond3 )
                tmp1 = L_negate( tmp1 );
#else

            IF( ( Aim_fx_chA_0_GE_0 && Bre_fx_0_chB_GE_0 ) || ( Aim_fx_chA_0_LT_0 && Bre_fx_0_chB_LT_0 ) )
            {
                tmp1 = Mpy_32_32( Aim_fx[chA][0], Bre_fx[0][chB] );
            }
            ELSE
            {
                tmp1 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][0] ), L_abs( Bre_fx[0][chB] ) ) );
            }
#endif

#if 1

            tmp2 = Mpy_32_32( Aim_fx[chA][1], Bre_fx[1][chB] );
            cond2 = Aim_fx_chA_1_GE_0 || Bre_fx_1_chB_GE_0;
            move32();
            cond1 = Aim_fx_chA_1_LT_0 || Bre_fx_1_chB_LT_0;
            move32();
            cond3 = cond1 && cond2;
            move32();

            if ( cond3 )
                tmp2 = ( L_abs( tmp2 ) );

            if ( cond3 )
                tmp2 = L_negate( tmp2 );
#else
            IF( ( Aim_fx_chA_1_GE_0 && Bre_fx_1_chB_GE_0 ) || ( Aim_fx_chA_1_LT_0 && Bre_fx_1_chB_LT_0 ) )
            {
                tmp2 = Mpy_32_32( Aim_fx[chA][1], Bre_fx[1][chB] );
            }
            ELSE
            {
                tmp2 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][1] ), L_abs( Bre_fx[1][chB] ) ) );
            }
#endif


            outIm_fx[chA][chB] = L_add( tmp1, tmp2 );
            move32();

#if 1
            tmp1 = Mpy_32_32( Are_fx[chA][0], Bim_fx[0][chB] );
            cond2 = Are_fx_chA_0_GE_0 || Bim_fx_0_chB_GE_0;
            move32();
            cond1 = Are_fx_chA_0_LT_0 || Bim_fx_0_chB_LT_0;
            move32();
            cond3 = cond1 && cond2;
            move32();

            if ( cond3 )
                tmp1 = ( L_abs( tmp1 ) );

            if ( cond3 )
                tmp1 = L_negate( tmp1 );
#else
            IF( ( Are_fx_chA_0_GE_0 && Bim_fx_0_chB_GE_0 ) || ( Are_fx_chA_0_LT_0 && Bim_fx_0_chB_LT_0 ) )
            {
                tmp1 = Mpy_32_32( Are_fx[chA][0], Bim_fx[0][chB] );
            }
            ELSE
            {
                tmp1 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][0] ), L_abs( Bim_fx[0][chB] ) ) );
            }
#endif

#if 1
            tmp2 = Mpy_32_32( Are_fx[chA][1], Bim_fx[1][chB] );
            cond2 = Are_fx_chA_1_GE_0 || Bim_fx_1_chB_GE_0;
            move32();
            cond1 = Are_fx_chA_1_LT_0 || Bim_fx_1_chB_LT_0;
            move32();
            cond3 = cond1 && cond2;
            move32();

            if ( cond3 )
                tmp2 = ( L_abs( tmp2 ) );

            if ( cond3 )
                tmp2 = L_negate( tmp2 );
#else
            IF( ( Are_fx_chA_1_GE_0 && Bim_fx_1_chB_GE_0 ) || ( Are_fx_chA_1_LT_0 && Bim_fx_1_chB_LT_0 ) )
            {
                tmp2 = Mpy_32_32( Are_fx[chA][1], Bim_fx[1][chB] );
            }
            ELSE
            {
                tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][1] ), L_abs( Bim_fx[1][chB] ) ) );
            }
#endif

            outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], L_add( tmp1, tmp2 ) );
            move32();

#else
            test();
            test();
@@ -3621,6 +3879,9 @@ static void matrixMul_fx(
            }
            outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], L_add( tmp1, tmp2 ) );
            move32();


#endif
#endif /* #ifdef IVAS_ENH64_CADENCE_CHANGES */
        }
    }
@@ -3742,6 +4003,10 @@ static void matrixTransp2Mul_fx(
    Word32 Bre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_B*/
    Word32 Bim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_B*/
    Word16 *q_B,
#ifdef FIX_1072_SPEEDUP_matrixTransp2Mul_fx_scaleSwitch
    int Ascale,
    int Bscale,
#endif
    Word32 outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/
    Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/
    Word16 *q_out )
@@ -3753,16 +4018,27 @@ static void matrixTransp2Mul_fx(
    Word32 tmp1, tmp2;
#endif

#ifdef FIX_1072_SPEEDUP_matrixTransp2Mul_fx_scaleSwitch
    IF (Ascale == 1)
#endif
    {
        min_q_shift = sub( s_min( L_norm_arr( Are_fx[0], size ), L_norm_arr( Aim_fx[0], size ) ), 1 );
        scale_sig32( Are_fx[0], size, min_q_shift );
        scale_sig32( Aim_fx[0], size, min_q_shift );
        *q_A = add( *q_A, min_q_shift );
        move16();
    }

#ifdef FIX_1072_SPEEDUP_matrixTransp2Mul_fx_scaleSwitch
    IF (Bscale == 1)
#endif
    {
        min_q_shift = sub( s_min( L_norm_arr( Bre_fx[0], size ), L_norm_arr( Bim_fx[0], size ) ), 1 );
        scale_sig32( Bre_fx[0], size, min_q_shift );
        scale_sig32( Bim_fx[0], size, min_q_shift );
        *q_B = add( *q_B, min_q_shift );
        move16();
    }

    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
    {
@@ -3778,6 +4054,216 @@ static void matrixTransp2Mul_fx(
            outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], W_extract_h( W_mac_32_32( W_mult_32_32( Are_fx[chA][0], L_negate( Bim_fx[chB][0] ) ), Are_fx[chA][1], L_negate( Bim_fx[chB][1] ) ) ) );
            move32();
#else
#ifdef FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest
            /* Create testVariables*/
            Word32 Are_fx_chA_0_GE_0 = GE_32( Are_fx[chA][0], 0 );
            Word32 Bre_fx_chB_0_GE_0 = GE_32( Bre_fx[chB][0], 0 );
            Word32 Are_fx_chA_0_LT_0 = LT_32( Are_fx[chA][0], 0 );
            Word32 Bre_fx_chB_0_LT_0 = LT_32( Bre_fx[chB][0], 0 );
            Word32 Are_fx_chA_1_GE_0 = GE_32( Are_fx[chA][1], 0 );
            Word32 Bre_fx_chB_1_GE_0 = GE_32( Bre_fx[chB][1], 0 );
            Word32 Are_fx_chA_1_LT_0 = LT_32( Are_fx[chA][1], 0 );
            Word32 Bre_fx_chB_1_LT_0 = LT_32( Bre_fx[chB][1], 0 );
            Word32 Aim_fx_chA_0_GE_0 = GE_32( Aim_fx[chA][0], 0 );
            Word32 Lneg_Bim_fx_chB_0_GE_0 = GE_32( L_negate( Bim_fx[chB][0] ), 0 );
            Word32 Aim_fx_chA_0_LT_0 = LT_32( Aim_fx[chA][0], 0 );
            Word32 Lneg_Bim_fx_chB_0_LT_0 = LT_32( L_negate( Bim_fx[chB][0] ), 0 );
            Word32 Aim_fx_chA_1_GE_0 = GE_32( Aim_fx[chA][1], 0 );
            Word32 Lneg_Bim_fx_chB_1_GE_0 = GE_32( L_negate( Bim_fx[chB][1] ), 0 );
            Word32 Aim_fx_chA_1_LT_0 = LT_32( Aim_fx[chA][1], 0 );
            Word32 Lneg_Bim_fx_chB_1_LT_0 = LT_32( L_negate( Bim_fx[chB][1] ), 0 );
            Word32 cond1, cond2, cond3;
            
#ifdef FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest_reduceLabs
            tmp1 = Mpy_32_32( Are_fx[chA][0], Bre_fx[chB][0] );
            cond2 = Are_fx_chA_0_GE_0 || Bre_fx_chB_0_GE_0;
            move32();
            cond1 = Are_fx_chA_0_LT_0 || Bre_fx_chB_0_LT_0;
            move32();
            cond3 = cond1 && cond2;
            move32();

            if ( cond3 )
                tmp1 = ( L_abs( tmp1 ) );

            if ( cond3 )
                tmp1 = L_negate( tmp1 );
#else
            IF( ( Are_fx_chA_0_GE_0 && Bre_fx_chB_0_GE_0 ) || ( Are_fx_chA_0_LT_0 && Bre_fx_chB_0_LT_0 ) )
            tmp1 = Mpy_32_32( Are_fx[chA][0], Bre_fx[chB][0] );
            ELSE
                tmp1 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][0] ), L_abs( Bre_fx[chB][0] ) ) );
#endif /*FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest_reduceLabs*/

#ifdef FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest_reduceLabs

            tmp2 = Mpy_32_32( Are_fx[chA][1], Bre_fx[chB][1] );
            cond2 = Are_fx_chA_1_GE_0 || Bre_fx_chB_1_GE_0;
            move32();
            cond1 = Are_fx_chA_1_LT_0 || Bre_fx_chB_1_LT_0;
            move32();
            cond3 = cond1 && cond2;
            move32();

            if ( cond3 )
                tmp2 = ( L_abs( tmp2 ) );

            if ( cond3 )
                tmp2 = L_negate( tmp2 );
#else
            IF( ( Are_fx_chA_1_GE_0 && Bre_fx_chB_1_GE_0 ) || ( Are_fx_chA_1_LT_0 && Bre_fx_chB_1_LT_0 ) )
            tmp2 = Mpy_32_32( Are_fx[chA][1], Bre_fx[chB][1] );
            ELSE
                tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][1] ), L_abs( Bre_fx[chB][1] ) ) );
#endif /*FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest_reduceLabs*/


            outRe_fx[chA][chB] = L_add( tmp1, tmp2 );
            move32();

#ifdef FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest_reduceLabs
            tmp1 = Mpy_32_32( Aim_fx[chA][0], L_negate( Bim_fx[chB][0] ) );
            cond2 = Aim_fx_chA_0_GE_0 || Lneg_Bim_fx_chB_0_GE_0;
            move32();
            cond1 = Aim_fx_chA_0_LT_0 || Lneg_Bim_fx_chB_0_LT_0;
            move32();
            cond3 = cond1 && cond2;
            move32();

            if ( cond3 )
                tmp1 = ( L_abs( tmp1 ) );

            if ( cond3 )
                tmp1 = L_negate( tmp1 );
#else

            IF( ( Aim_fx_chA_0_GE_0 && Lneg_Bim_fx_chB_0_GE_0 ) || ( Aim_fx_chA_0_LT_0 && Lneg_Bim_fx_chB_0_LT_0 ) )
            tmp1 = Mpy_32_32( Aim_fx[chA][0], -Bim_fx[chB][0] );
            ELSE
                tmp1 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][0] ), L_abs( Bim_fx[chB][0] ) ) );
#endif /*FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest_reduceLabs*/


#ifdef FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest_reduceLabs
            tmp2 = Mpy_32_32( Aim_fx[chA][1], L_negate( Bim_fx[chB][1] ) );
            cond2 = Aim_fx_chA_1_GE_0 || Lneg_Bim_fx_chB_1_GE_0;
            move32();
            cond1 = Aim_fx_chA_1_LT_0 || Lneg_Bim_fx_chB_1_LT_0;
            move32();
            cond3 = cond1 && cond2;
            move32();

            if ( cond3 )
                tmp2 = ( L_abs( tmp2 ) );

            if ( cond3 )
                tmp2 = L_negate( tmp2 );

#else

            IF( ( Aim_fx_chA_1_GE_0 && Lneg_Bim_fx_chB_1_GE_0 ) || ( Aim_fx_chA_1_LT_0 && Lneg_Bim_fx_chB_1_LT_0 ) )
            tmp2 = Mpy_32_32( Aim_fx[chA][1], -Bim_fx[chB][1] );
            ELSE
                tmp2 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][1] ), L_abs( -Bim_fx[chB][1] ) ) );
#endif /*FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest_reduceLabs*/

            outRe_fx[chA][chB] = L_sub( outRe_fx[chA][chB], L_add( tmp1, tmp2 ) );
            move32();

#ifdef FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest_reduceLabs
            tmp1 = Mpy_32_32( Aim_fx[chA][0], Bre_fx[chB][0] );
            cond2 = Aim_fx_chA_0_GE_0 || Bre_fx_chB_0_GE_0;
            move32();
            cond1 = Aim_fx_chA_0_LT_0 || Bre_fx_chB_0_LT_0;
            move32();
            cond3 = cond1 && cond2;
            move32();

            if ( cond3 )
                tmp1 = ( L_abs( tmp1 ) );

            if ( cond3 )
                tmp1 = L_negate( tmp1 );
#else

            IF( ( Aim_fx_chA_0_GE_0 && Bre_fx_chB_0_GE_0 ) || ( Aim_fx_chA_0_LT_0 && Bre_fx_chB_0_LT_0 ) )
            tmp1 = Mpy_32_32( Aim_fx[chA][0], Bre_fx[chB][0] );
            ELSE
                tmp1 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][0] ), L_abs( Bre_fx[chB][0] ) ) );
#endif /*FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest_reduceLabs*/

#ifdef FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest_reduceLabs
            tmp2 = Mpy_32_32( Aim_fx[chA][1], Bre_fx[chB][1] );
            cond2 = Aim_fx_chA_1_GE_0 || Bre_fx_chB_1_GE_0;
            move32();
            cond1 = Aim_fx_chA_1_LT_0 || Bre_fx_chB_1_LT_0;
            move32();
            cond3 = cond1 && cond2;
            move32();

            if ( cond3 )
                tmp2 = ( L_abs( tmp2 ) );

            if ( cond3 )
                tmp2 = L_negate( tmp2 );
#else
            IF( ( Aim_fx_chA_1_GE_0 && Bre_fx_chB_1_GE_0 ) || ( Aim_fx_chA_1_LT_0 && Bre_fx_chB_1_LT_0 ) )
            tmp2 = Mpy_32_32( Aim_fx[chA][1], Bre_fx[chB][1] );
            ELSE
                tmp2 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][1] ), L_abs( Bre_fx[chB][1] ) ) );
#endif /*FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest_reduceLabs*/

            outIm_fx[chA][chB] = L_add( tmp1, tmp2 );
            move32();

#ifdef FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest_reduceLabs
            tmp1 = Mpy_32_32( Are_fx[chA][0], L_negate( Bim_fx[chB][0] ) );
            cond2 = Are_fx_chA_0_GE_0 || Lneg_Bim_fx_chB_0_GE_0;
            move32();
            cond1 = Are_fx_chA_0_LT_0 || Lneg_Bim_fx_chB_0_LT_0;
            move32();
            cond3 = cond1 && cond2;
            move32();

            if ( cond3 )
                tmp1 = ( L_abs( tmp1 ) );

            if ( cond3 )
                tmp1 = L_negate( tmp1 );
#else

            IF( ( Are_fx_chA_0_GE_0 && Lneg_Bim_fx_chB_0_GE_0 ) || ( Are_fx_chA_0_LT_0 && Lneg_Bim_fx_chB_0_LT_0 ) )
            tmp1 = Mpy_32_32( Are_fx[chA][0], -Bim_fx[chB][0] );
            ELSE
                tmp1 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][0] ), L_abs( -Bim_fx[chB][0] ) ) );
#endif /*FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest_reduceLabs*/

#ifdef FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest_reduceLabs
            tmp2 = Mpy_32_32( Are_fx[chA][1], L_negate( Bim_fx[chB][1] ) );
            cond2 = Are_fx_chA_1_GE_0 || Lneg_Bim_fx_chB_1_GE_0;
            move32();
            cond1 = Are_fx_chA_1_LT_0 || Lneg_Bim_fx_chB_1_LT_0;
            move32();
            cond3 = cond1 && cond2;
            move32();

            if ( cond3 )
                tmp2 = ( L_abs( tmp2 ) );

            if ( cond3 )
                tmp2 = L_negate( tmp2 );
#else
            IF( ( Are_fx_chA_1_GE_0 && Lneg_Bim_fx_chB_1_GE_0 ) || ( Are_fx_chA_1_LT_0 && Lneg_Bim_fx_chB_1_LT_0 ) )
            tmp2 = Mpy_32_32( Are_fx[chA][1], -Bim_fx[chB][1] );
            ELSE
                tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][1] ), L_abs( -Bim_fx[chB][1] ) ) );
#endif /*FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest_reduceLabs*/

            outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], L_add( tmp1, tmp2 ) );
            move32();
#else /*FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest*/


            test();
            test();
            test();
@@ -3845,6 +4331,8 @@ static void matrixTransp2Mul_fx(
                tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][1] ), L_abs( -Bim_fx[chB][1] ) ) );
            outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], L_add( tmp1, tmp2 ) );
            move32();
#endif /*FIX_1072_SPEEDUP_matrixTransp2Mul_fx_reducetest*/

#endif /* #ifdef IVAS_ENH64_CADENCE_CHANGES */
        }
    }
@@ -3994,7 +4482,6 @@ static void chol2x2_fx(
        // 4611686 = Q62
        IF( outRe[1][1] == 0 )
        {
            push_wmops( "DIV3232_0" );
#if !defined( FIX_1072_REPLACE_DIV_SQRT_BY_ISQRT )
            outRe[0][1] = BASOP_Util_Divide3232_Scale_cadence( c_re, 4611686, &exp );
            move32();
@@ -4018,7 +4505,6 @@ static void chol2x2_fx(
            move32();
            q_im = add( sub( 31, exp ), sub( q_c, 62 ) );
#endif
            pop_wmops();
        }
        ELSE
        {
@@ -4575,7 +5061,12 @@ static void formulate2x2MixingMatrix_fx(
        }
    }

    matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U, Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */
    matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U, 
#ifdef FIX_1072_SPEEDUP_matrixTransp2Mul_fx_scaleSwitch
                         1 /*int Ascale*/,
                         0 /*int Bscale*/,
#endif
      Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */

    /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */
    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
@@ -4622,7 +5113,12 @@ static void formulate2x2MixingMatrix_fx(

    matrixMul_fx( KyRe_fx, KyIm_fx, &q_ky, Pre_fx, Pim_fx, &q_P, tmpRe_fx, tmpIm_fx, &q_temp );

    matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Uxre_fx, Uxim_fx, &q_Ux, Mre_fx, Mim_fx, q_M );
    matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Uxre_fx, Uxim_fx, &q_Ux, 
#ifdef FIX_1072_SPEEDUP_matrixTransp2Mul_fx_scaleSwitch
                         1 /*int Ascale*/,
                         0 /*int Bscale*/,
#endif
      Mre_fx, Mim_fx, q_M );

    pop_wmops();
    return;