Commit 94da89c8 authored by mave2802's avatar mave2802
Browse files

added dedicated version of formulate2x2MixingMatrix() for the case if cross terms are zero

parent 96ae0d1c
Loading
Loading
Loading
Loading
+56 −3
Original line number Diff line number Diff line
@@ -80,9 +80,62 @@
/* ################### Start BE switches ################################# */
/* only BE switches wrt wrt. TS 26.251 V3.0 */

/* ################### Start FIXES switches ########################### */
/*#define FIX_I4_OL_PITCH*/                                  /* fix open-loop pitch used for EVS core switching */
#define REMOVE_BASOP_Util_Divide3232_Scale_cadence           /* remove this division variant */
#define FIX_1990_SANITIZER_IN_REVERB_LOAD                    /* Nokia: Fix issue part of issue 1990 by introducing missing free of structure - keep until #2059 is addressed */
#define FIX_1999_TEMPORARY_DISABLE_DIST_ATT_CHECK            /* Eri: Issue 1999: Range check on float values of distance attenuation, while the float values are not propagated to this function. The test is not correct, but configurable distance attenuation is not used in Characterization.*/
#define TEMP_FIX_2088_MSAN_INIT_ERROR                        /* Eri: Temporary fix for Issue 2088 - MSAN error. Will come with later port of JBM+Split rendering update */
#define FIX_2092_ASSERT_IN_OMASA_RENDER                      /* FhG, Nokia: Fix LTV crash due to overflow in OMASA EXT output */
#define FIX_2084_FLOATING_POINT_LEFTOVERS                    /* FhG: convert floating-point leftovers in IVAS_ENC_FeedObjectMetadata() */
#define FIX_2141_ASSERT_IN_OMASA_BITRATE_SWITSCHING          /* FhG: Replace L_shl with L_shl_sat to prevent overflow when calculating scale factors for very small numbers in the logarithmic domain */
#define FIX_APA_EXECS_SCALING                                /* VA: fix scaling of JBM APA buffer */
#define FIX_2164_ASSERT_IN_OMASA_PREPROC_FOR_EDIT            /* Nokia: Issue 2164: Prevent overflow when calculating equalization coefficient for editing before clamping to safe range */
#define FIX_BASOP_ASSERT_IN_TONAL_MDCT_PLC                   /* FhG: fix for issue 2165 - using saturating addition in tonal MDCT PLC function */
#define OPT_2146_BASOP_UTIL_ADD_MANT32EXP                    /* Dlb: optimized version of BASOP_Util_Add_Mant32Exp() */
#define FIX_2166_ASSERT_OSBA_PLC_STEREO_OUT                  /* FhG: fix for issue 2166 - add missing averaging factor 0.5 in for the sum of energies in function stereo_dft_dmx_swb_nrg_fx()*/
#define FIX_2086_ENABLE_HP20_OPT_FOR_ENC                     /* FhG: Enable hp20_fx_32_opt() for Encoder */
#define FIX_1793_DEC_MC_TO_MONO_SCALING_ISSUE                /* FhG: Use dynamic Q factor for synth_fx and synthFB_fx to prevent overflow */
#define FIX_2170_ASSERT_IN_FFT3                              /* Eri: Assert in fft3_fx from EVS, adding _sat */
#define FIX_2082_FP_LEFTOVERS_OMASA_DEC                      /* Nokia: fix for issue 2082, cleaning remaining floating point code */
#define FIX_2174_JBM_BASOP_ALIGNMENT                         /* VoiceAge, Nokia: Fixes to JBM BASOP implementation and alignment to float */
#define FIX_GAIN_EDIT_LIMITS                                 /* Harmonize gain edit limits for all opertation points. For all modes, limit to max +12dB. For parametric modes, limit to min -24dB. */

#define FIX_2176_ASSERT_DEC_MAP_PARAMS_DIRAC2STEREO          /* FhG: Reduce hStereoDft->q_smooth_buf_fx by one to prevent overflow in the subframe_band_nrg[][] calculation */
#define FIX_2015_PREMPH_SAT_ALT                              /* VA: saturation can happen during preemphasis filtering due to a too aggressive scaling factor, allows preemphis to get 1 more bit headroom */
#define FIX_2178_FL_TO_FX_WITH_OBJ_EDIT_FILE_INTERFACE       /* Nokia: Fixes float  to fx conversion in decoder app with object edit file interface */
#define FIX_2070_JBM_TC_CHANNEL_RESCALING_ISSUE              /* Eri/Orange: scale_sig32 problem on p_tc_fx[] */

#define FIX_2173_UBSAN_IN_JBM_PCMDSP_APA                     /* FhG: Fix UBSAN problems in jbm_pcmdsp_apa_fx.c */
#define FIX_1947_DEC_HIGH_MLD_FOR_STEREO2MONO                /* FhG: Make Q-factor of synth_16_fx and output_16_fx dynamic to prevent overflow in HQ_CORE mode */

#define FIX_2184_EVS_STEREO_DMX_CHANNEL_DISAPPEARING         /* Orange: Fix for issue 2184 - to prevent one channel from becoming inaudible in the mono downmix output */
#define FIX_2148_OBJ_EDIT_ISSUE_WITH_OSBA                    /* Nokia: Add missing code to solve issue */
#define FIX_2200_ISAR_PLC_CRASH                              /* Dolby: Fix for ISAR PLC crash observed with newly added BASOP tests */
#define FIX_2210_ASSERT_IN_BW_DETEC_FX_FOR_OMASA             /* FhG: Resolve overflow by swapping the order of addition and multiplication */
#define FIX_2217_ASSERT_IN_IVAS_CORE_DECODER_WITH_MC         /* FhG: Adjust Q_real to prevent overflow in st->cldfbSyn->cldfb_state_fx scaling */
#define FIX_2211_ASSERT_IN_REND_CREND_CONVOLER               /* FhG: Add headroom to p_output_fx to prevent overflow in ivas_rend_crendProcessSubframe_fx() */

#define NONBE_FIX_2205_SATURATE_ALTERNATIVE
#define NONBE_FIX_2206_SATURATE_ALTERNATIVE
#define FIX_2226_ISAR_PRE_CRASH_CLDFB_NO_CHANNELS           /* Dolby: Fix crash of ISAR pre-renderer due to an attempt of re-scaling  uninitialized values in the CLDFB filter bank */
#define RTP_SR_CODEC_FRAME_SIZE_IN_TOC_BYTE                 /* Dolby: CR for split rendering codec framesize signalling in Toc Byte */


#define NONBE_FIX_ISSUE_2232_CHECK_CLDFB_STATES             /* FhG: Adjust scaleFactor according to st->cldfbSyn->cldfb_state_fx too to avoid overflow in cldfbSynthesis_ivas_fx() */

#define NONBE_FIX_BASOP_2233_RTPDUMP_DIFFERING_BITSTREAMS /* Nokia: fix basop issue 2233: Fix differing rtpdump streams */

#define NONBE_FIX_2237_ZERO_CURR_NOISE_PROBLEM              /* FhG: Modify sum of hTonalMDCTConc->curr_noise_nrg to avoid inaccurate zero */

#define NONBE_2169_BINAURAL_MIXING_MATRIX_OPT                /* Dlb: use dedicated formulate2x2MixingMatrix() function if cross terms are zero */
/* ################### End FIXES switches ########################### */

/* #################### Start BASOP porting switches ############################ */

#define NONBE_1244_FIX_SWB_BWE_MEMORY                   /* VA: issue 1244: fix to SWB BWE memory in case of switching from FB coding - pending a review by Huawei */
#define FIX_1053_REVERB_RECONFIGURATION
#define FIX_1119_SPLIT_RENDERING_VOIP                   /* FhG: Add split rendering support to decoder in VoIP mode */
#define TMP_1342_WORKAROUND_DEC_FLUSH_BROKEN_IN_SR      /* FhG: Temporary workaround for incorrect implementation of decoder flush with split rendering */
#define NONBE_1122_KEEP_EVS_MODE_UNCHANGED              /* FhG: Disables fix for issue 1122 in EVS mode to keep BE tests green. This switch should be removed once the 1122 fix is added to EVS via a CR.  */
#define FIX_1435_MOVE_STEREO_PANNING                    /* VA: issue 1435: do the EVS stereo panning in the renderer */
+358 −5
Original line number Diff line number Diff line
@@ -116,11 +116,9 @@ static void getDirectPartGains_fx( const Word16 bin, Word16 aziDeg, Word16 eleDe
static void ivas_masa_ext_rend_parambin_internal_fx( MASA_EXT_REND_HANDLE hMasaExtRend, COMBINED_ORIENTATION_HANDLE hCombinedOrientationData, Word32 *output_fx[] /*Q11*/, const Word16 subframe, const SPLIT_REND_WRAPPER *hSplitRendWrapper, Word32 Cldfb_Out_Real[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], Word32 Cldfb_Out_Imag[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX] );

static void formulate2x2MixingMatrix_fx( Word32 Ein1_fx /*q_Ein*/, Word32 Ein2_fx /*q_Ein*/, Word16 q_Ein, Word32 CinRe_fx /*q_Cin*/, Word32 CinIm_fx /*q_Cin*/, Word16 q_Cin, Word32 Eout1_fx /*q_Eout*/, Word32 Eout2_fx /*q_Eout*/, Word16 q_Eout, Word32 CoutRe_fx /*q_Cout*/, Word32 CoutIm_fx /*q_Cout*/, Word16 q_Cout, Word32 Q_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*Q31*/, Word32 Mre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word32 Mim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word16 *q_M, const Word16 regularizationFactor_fx /*Q14*/ );

#ifdef OPT_2182_MATRIX_SCALE_OPS
static void matrixScale_fx( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A );
#endif

#ifdef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT
static void formulate2x2MixingMatrixNoCross_fx( Word32 Ein1_fx /*q_Ein*/, Word32 Ein2_fx /*q_Ein*/, Word16 q_Ein, Word32 Eout1_fx /*q_Eout*/, Word32 Eout2_fx /*q_Eout*/, Word16 q_Eout, Word32 CoutRe_fx /*q_Cout*/, Word32 CoutIm_fx /*q_Cout*/, Word16 q_Cout, Word32 Q_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*Q31*/, Word32 Mre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word32 Mim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word16* q_M );
#endif /* NONBE_2169_BINAURAL_MIXING_MATRIX_OPT */
static void matrixMul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out );

#ifdef OPT_2182_MATRIX_SCALE_OPS
@@ -2355,12 +2353,22 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
            CrCrossIm_fx = Mpy_32_32( CrCrossIm_fx, decorrelationReductionFactor_fx );
            q_CrCross = sub( add( q_CrCross, q_decorrelationReductionFactor ), 31 );

#ifndef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT
            formulate2x2MixingMatrix_fx( hDiracDecBin->ChEne_fx[0][bin], hDiracDecBin->ChEne_fx[1][bin],
                                         hDiracDecBin->q_ChEne,
                                         0, 0, /* Decorrelated signal has ideally no cross-terms */
                                         Q31, CrEneL_fx, CrEneR_fx, q_CrEne,
                                         CrCrossRe_fx, CrCrossIm_fx, q_CrCross,
                                         prototypeMtx_fx, MdecRe_fx, MdecIm_fx, &q_Mdec, 3277 ); // 3277 = 0.2 in Q14
#else                                                                                            /* NONBE_2169_BINAURAL_MIXING_MATRIX_OPT */
            /* Determine a residual mixing matrix Mdec for processing the decorrelated signal to obtain
             * the residual signal (that has the residual covariance matrix)
             * Decorrelated signal has ideally no cross-terms */
            formulate2x2MixingMatrixNoCross_fx( hDiracDecBin->ChEne_fx[0][bin], hDiracDecBin->ChEne_fx[1][bin], hDiracDecBin->q_ChEne,
                                                CrEneL_fx, CrEneR_fx, q_CrEne,
                                                CrCrossRe_fx, CrCrossIm_fx, q_CrCross,
                                                prototypeMtx_fx, MdecRe_fx, MdecIm_fx, &q_Mdec );
#endif                                                                                           /* NONBE_2169_BINAURAL_MIXING_MATRIX_OPT */
        }
        ELSE
        {
@@ -4837,6 +4845,351 @@ static void chol2x2_fx(

    return;
}

#ifdef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT
static void formulate2x2MixingMatrixNoCross_fx(
    Word32 Ein1_fx /*q_Ein*/,
    Word32 Ein2_fx /*q_Ein*/,
    Word16 q_Ein,
    Word32 Eout1_fx /*q_Eout*/,
    Word32 Eout2_fx /*q_Eout*/,
    Word16 q_Eout,
    Word32 CoutRe_fx /*q_Cout*/,
    Word32 CoutIm_fx /*q_Cout*/,
    Word16 q_Cout,
    Word32 Q_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*Q31*/,
    Word32 Mre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/,
    Word32 Mim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/,
    Word16 *q_M )
{
    /*
     This function implements a 2x2 solution for an optimized spatial audio rendering algorithm, based on
     Vilkamo, J., Bäckström, T. and Kuntz, A., 2013.
     "Optimized covariance domain framework for time–frequency processing of spatial audio."
     Journal of the Audio Engineering Society, 61(6), pp.403-411.
     but optimized for decorrelated signals

     The result of the formulas below are the same as those in the publication, however, some
     derivation details differ for as simple as possible 2x2 formulation
     */
    Word16 chA, chB;
    Word32 maxEne_fx, tmp, maxEneDiv_fx;
    Word16 q_maxEne, q_maxEneDiv, exp, exp1;
    Word32 KyRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], KyIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
    Word32 tmpRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], tmpIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
    Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
    Word32 Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
    Word32 D_fx[BINAURAL_CHANNELS];
    Word32 div_fx[BINAURAL_CHANNELS];
    Word32 Ghat_fx[BINAURAL_CHANNELS];
    Word32 GhatQ_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
    Word32 Pre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Pim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
    Word16 q_ky, q_A, q_U, q_D, q_P;
    Word32 E_in1, E_in2, E_out1, E_out2, Cout_re, Cout_im;
    Word16 q_ein, q_eout, q_cout, q_Ghat, q_GhatQ, q_temp, q_div, exp_temp;
    Word32 temp;
    Word16 q_Pre[BINAURAL_CHANNELS][BINAURAL_CHANNELS], q_Pim[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
    Word16 hdrm_re[BINAURAL_CHANNELS][BINAURAL_CHANNELS], hdrm_im[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
    set16_fx( hdrm_re[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) );
    set16_fx( hdrm_im[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) );
    set16_fx( q_Pre[0], Q31, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) );
    set16_fx( q_Pim[0], Q31, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) );

    q_ky = 0;
    move16();

    exp = sub( get_min_scalefactor( Ein1_fx, Ein2_fx ), 1 );
    E_in1 = L_shl( Ein1_fx, exp );
    E_in2 = L_shl( Ein2_fx, exp );
    q_ein = add( q_Ein, exp );

    exp = sub( get_min_scalefactor( Eout1_fx, Eout2_fx ), 1 );
    E_out1 = L_shl( Eout1_fx, exp );
    E_out2 = L_shl( Eout2_fx, exp );
    q_eout = add( q_Eout, exp );

    exp = sub( get_min_scalefactor( CoutRe_fx, CoutIm_fx ), 1 );
    Cout_re = L_shl( CoutRe_fx, exp );
    Cout_im = L_shl( CoutIm_fx, exp );
    q_cout = add( q_Cout, exp );

    /* Normalize energy values */
    maxEne_fx = L_max( E_in1, E_in2 );
    q_maxEne = q_ein;
    move16();

    tmp = L_max( E_out1, E_out2 );
    IF( LT_16( q_maxEne, q_eout ) )
    {
        maxEne_fx = L_max( maxEne_fx, L_shr( tmp, sub( q_eout, q_maxEne ) ) ); // q_maxEne
    }
    ELSE
    {
        maxEne_fx = L_max( L_shr( maxEne_fx, sub( q_maxEne, q_eout ) ), tmp ); // q_maxEne
        q_maxEne = q_eout;
        move16();
    }

    // 4611686 = Q62
    IF( maxEne_fx == 0 )
    {
        maxEneDiv_fx = ONE_DIV_EPSILON_MANT;
        move32();
        q_maxEneDiv = 31 - ONE_DIV_EPSILON_EXP;
        move16();
    }
    ELSE
    {
        maxEneDiv_fx = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, maxEne_fx, &exp );
        q_maxEneDiv = add( sub( 31, exp ), sub( Q30, q_maxEne ) );
    }
    exp = norm_l( maxEneDiv_fx );
    maxEneDiv_fx = L_shl( maxEneDiv_fx, exp );
    q_maxEneDiv = add( q_maxEneDiv, exp );

    E_in1 = Mpy_32_32( E_in1, maxEneDiv_fx );
    E_in2 = Mpy_32_32( E_in2, maxEneDiv_fx );
    q_ein = sub( add( q_ein, q_maxEneDiv ), 31 );

    E_out1 = Mpy_32_32( E_out1, maxEneDiv_fx );
    E_out2 = Mpy_32_32( E_out2, maxEneDiv_fx );
    q_eout = sub( add( q_eout, q_maxEneDiv ), 31 );

    Cout_re = Mpy_32_32( Cout_re, maxEneDiv_fx );
    Cout_im = Mpy_32_32( Cout_im, maxEneDiv_fx );
    q_cout = sub( add( q_cout, q_maxEneDiv ), 31 );

    /* Cholesky decomposition of target / output covariance matrix */
    chol2x2_fx( E_out1, E_out2, q_eout, Cout_re, Cout_im, q_cout, KyRe_fx, KyIm_fx, &q_ky );

    /* If there are no cross-terms, the Eigendecomposition of input covariance matrix
       can be skipped. Uxre is a unit matrix, Uxim is a zero matrix and Sx is (1, 1)
       Further on, also Kxre is a unit matrix and Kxim is a zero matrix
       Multiplication with these matrices / scalars can be skipped
    */

    temp = Mpy_32_32( E_in2, INV_1000_Q31 );
    temp = L_max( temp, E_in1 );

    IF( temp == 0 )
    {
        IF( E_out1 == 0 )
        {
            Ghat_fx[0] = 0;
            exp = -19;
            move32();
            move16();
        }
        ELSE
        {
            temp = BASOP_Util_Divide3232_Scale_newton( E_out1, 4611686, &exp ); // 4611686 = Q62
            exp = sub( exp, sub( q_eout, 62 ) );
            Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
        }
    }
    ELSE
    {
        temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );

        temp = BASOP_Util_Divide3232_Scale_newton( E_out1, temp, &exp );
        exp = sub( exp, sub( q_eout, sub( 31, exp_temp ) ) );
        Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
    }
    move32();

    temp = Mpy_32_32( E_in1, 2147484 );
    temp = L_max( temp, E_in2 ); // q_ein
    IF( temp == 0 )
    {
        IF( E_out2 == 0 )
        { /* We can set hard-coded results */
            Ghat_fx[1] = 0;
            exp1 = -19;
            move16();
        }
        ELSE
        {
            temp = BASOP_Util_Divide3232_Scale_newton( E_out2, 4611686, &exp1 ); // 4611686 = Q62
            exp1 = sub( exp1, sub( q_eout, 62 ) );
            Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
        }
    }
    ELSE
    {
        temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );

        temp = BASOP_Util_Divide3232_Scale_newton( E_out2, temp, &exp1 );
        exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) );
        Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
    }
    move32();

    q_Ghat = sub( 31, s_max( exp, exp1 ) );
    Word16 q_diff = sub( 31, q_Ghat );
    Ghat_fx[0] = L_shr( Ghat_fx[0], sub( q_diff, exp ) ); // q_Ghat
    move32();
    Ghat_fx[1] = L_shr( Ghat_fx[1], sub( q_diff, exp1 ) ); // q_Ghat
    move32();

    /* Matrix multiplication, A = Ky' * G_hat * Q */
    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
    {
        GhatQ_fx[chA][0] = Mpy_32_32( Q_fx[chA][0], Ghat_fx[chA] );
        GhatQ_fx[chA][1] = Mpy_32_32( Q_fx[chA][1], Ghat_fx[chA] );
        move32();
        move32();
    }
    q_GhatQ = sub( add( Q31, q_Ghat ), 31 );

    exp = sub( s_min( L_norm_arr( KyRe_fx[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ), L_norm_arr( KyIm_fx[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ) ), 1 );
    scale_sig32( KyRe_fx[0], BINAURAL_CHANNELS * BINAURAL_CHANNELS, exp );
    scale_sig32( KyIm_fx[0], BINAURAL_CHANNELS * BINAURAL_CHANNELS, exp );
    q_ky = add( q_ky, exp );

    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
    {
        FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
        {
            Are_fx[chA][chB] = Madd_32_32( Mpy_32_32( KyRe_fx[0][chA], GhatQ_fx[0][chB] ), KyRe_fx[1][chA], GhatQ_fx[1][chB] );
            Aim_fx[chA][chB] = Msub_32_32( Mpy_32_32( L_negate( KyIm_fx[0][chA] ), GhatQ_fx[0][chB] ), KyIm_fx[1][chA], GhatQ_fx[1][chB] );
            move32();
            move32();
        }
    }

    q_A = sub( add( q_ky, q_GhatQ ), 31 ); // TODO SMM: Check if this is correct!!!
    move16();

    /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx
       For matrix A that is P = A(A'A)^0.5 */
    matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );

    eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );

    IF( D_fx[0] != 0 && D_fx[1] == 0 ) // Due to an eig2x2 error, sometimes D_fx[1] becomes zero, which implies that the input matrix should be singular (i.e., determinant = 0).
    {
        Word32 det_fx = L_sub_sat( Mult_32_32( tmpRe_fx[0][0], tmpRe_fx[1][1] ),
                                   L_add_sat( Mult_32_32( tmpRe_fx[1][0], tmpRe_fx[1][0] ),
                                              Mult_32_32( tmpIm_fx[1][0], tmpIm_fx[1][0] ) ) );
        if ( det_fx != 0 )
        {
            D_fx[1] = SMALL_EIGENVALUE; // Setting D_fx[1] to epsilon has no effect, as the value is too small to affect the output.
            move32();
        }
    }

    IF( D_fx[0] == 0 )
    {
        temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */
        move32();
        exp = ONE_DIV_EPSILON_EXP;
        move16();
    }
    ELSE
    {
        temp = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, D_fx[0], &exp );
        exp = sub( exp, sub( Q30, q_D ) );
    }
    div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
    move32();

    // Sqrt(1)
    div_fx[1] = L_add( 0, 2047986068 ); // Q = 31 - exp1
    exp1 = add( 0, 20 );

    IF( D_fx[1] != 0 ) // This is the new code: replace div sqrt by isqrt
    {
        exp1 = sub( 31, q_D );
        div_fx[1] = ISqrt32( D_fx[1], &exp1 );
        move32();
    }
    q_div = sub( 31, s_max( exp, exp1 ) );

    div_fx[0] = L_shr( div_fx[0], sub( sub( 31, exp ), q_div ) ); // q_div
    move32();
    div_fx[1] = L_shr( div_fx[1], sub( sub( 31, exp1 ), q_div ) ); // q_div
    move32();

    // 1310720000 = 10,000.0f in Q17
    Word32 thresh = L_shl_sat( 1310720000, sub( q_div, Q17 ) ); // q_div
    div_fx[0] = L_min( div_fx[0], thresh );                     // q_div
    div_fx[1] = L_min( div_fx[1], thresh );                     // q_div

    matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp );

    exp = L_norm_arr( div_fx, BINAURAL_CHANNELS );
    scale_sig32( div_fx, BINAURAL_CHANNELS, exp );
    q_div = add( q_div, exp );

    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
    {
        FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
        {
            Word64 W_tmp;

            W_tmp = W_mult0_32_32( tmpRe_fx[chA][chB], div_fx[chB] );
            IF( W_tmp != 0 )
            {
                Word16 hdrm = sub( W_norm( W_tmp ), 32 );
                tmpRe_fx[chA][chB] = W_shl_sat_l( W_tmp, hdrm );
                move32();
                hdrm_re[chA][chB] = add( add( q_temp, q_div ), hdrm );
                move16();
            }
            ELSE
            {
                tmpRe_fx[chA][chB] = 0;
                move32();
            }

            W_tmp = W_mult0_32_32( tmpIm_fx[chA][chB], div_fx[chB] );
            IF( W_tmp != 0 )
            {
                Word16 hdrm = sub( W_norm( W_tmp ), 32 );
                move16();
                tmpIm_fx[chA][chB] = W_shl_sat_l( W_tmp, hdrm );
                move32();
                hdrm_im[chA][chB] = add( add( q_temp, q_div ), hdrm );
                move16();
            }
            ELSE
            {
                tmpIm_fx[chA][chB] = 0;
                move32();
            }
        }
    }

    minimum_s( hdrm_re[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ), &exp );
    q_temp = exp;
    move16();
    minimum_s( hdrm_im[0], BINAURAL_CHANNELS * BINAURAL_CHANNELS, &exp );
    q_temp = s_min( q_temp, exp );
    q_temp = sub( q_temp, 1 );

    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
    {
        FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
        {
            tmpRe_fx[chA][chB] = L_shr( tmpRe_fx[chA][chB], sub( hdrm_re[chA][chB], q_temp ) );
            tmpIm_fx[chA][chB] = L_shr( tmpIm_fx[chA][chB], sub( hdrm_im[chA][chB], q_temp ) );
            move32();
            move32();
        }
    }

    matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U,
                         0 /*int Ascale*/,
                         0 /*int Bscale*/,
                         Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */

    matrixMul_fx( KyRe_fx, KyIm_fx, &q_ky, Pre_fx, Pim_fx, &q_P, Mre_fx, Mim_fx, q_M );

    return;
}
#endif /* FORM2x2MIXMAT_IMPROVEMENT */


static void formulate2x2MixingMatrix_fx(
    Word32 Ein1_fx, /*q_Ein*/
    Word32 Ein2_fx, /*q_Ein*/