From 4976cfb8dcbcc0a7f90035a14a47e0b9ff7388bc Mon Sep 17 00:00:00 2001
From: mave2802 <59919483+mave2802@users.noreply.github.com>
Date: Thu, 30 Oct 2025 11:31:44 +0100
Subject: [PATCH 1/8] added dedicated version of formulate2x2MixingMatrix() for
 the case if cross terms are zero

---
 lib_com/options.h                             |  12 +-
 .../ivas_dirac_dec_binaural_functions_fx.c    | 359 +++++++++++++++++-
 2 files changed, 359 insertions(+), 12 deletions(-)

diff --git a/lib_com/options.h b/lib_com/options.h
index f792698a9..bc61b0892 100644
--- a/lib_com/options.h
+++ b/lib_com/options.h
@@ -87,17 +87,7 @@
 #define FIX_2166_ASSERT_OSBA_PLC_STEREO_OUT                  /* FhG: fix for issue 2166 - add missing averaging factor 0.5 in for the sum of energies in function stereo_dft_dmx_swb_nrg_fx()*/
 #define FIX_2086_ENABLE_HP20_OPT_FOR_ENC                     /* FhG: Enable hp20_fx_32_opt() for Encoder */
 #define FIX_1793_DEC_MC_TO_MONO_SCALING_ISSUE                /* FhG: Use dynamic Q factor for synth_fx and synthFB_fx to prevent overflow */
-#define FIX_2170_ASSERT_IN_FFT3                              /* Eri: Assert in fft3_fx from EVS, adding _sat */
-#define FIX_2082_FP_LEFTOVERS_OMASA_DEC                      /* Nokia: fix for issue 2082, cleaning remaining floating point code */
-#define FIX_2174_JBM_BASOP_ALIGNMENT                         /* VoiceAge, Nokia: Fixes to JBM BASOP implementation and alignment to float */
-
-#define FIX_2176_ASSERT_DEC_MAP_PARAMS_DIRAC2STEREO          /* FhG: Reduce hStereoDft->q_smooth_buf_fx by one to prevent overflow in the subframe_band_nrg[][] calculation */
-#define FIX_2015_PREMPH_SAT_ALT                              /* VA: saturation can happen during preemphasis filtering due to a too aggressive scaling factor, allows preemphis to get 1 more bit headroom */
-#define FIX_2178_FL_TO_FX_WITH_OBJ_EDIT_FILE_INTERFACE       /* Nokia: Fixes float  to fx conversion in decoder app with object edit file interface */
-#define FIX_2070_JBM_TC_CHANNEL_RESCALING_ISSUE              /* Eri/Orange: scale_sig32 problem on p_tc_fx[] */
-
-#define FIX_2173_UBSAN_IN_JBM_PCMDSP_APA                     /* FhG: Fix UBSAN problems in jbm_pcmdsp_apa_fx.c */
-#define FIX_1947_DEC_HIGH_MLD_FOR_STEREO2MONO                /* FhG: Make Q-factor of synth_16_fx and output_16_fx dynamic to prevent overflow in HQ_CORE mode */
+#define NONBE_2169_BINAURAL_MIXING_MATRIX_OPT                /* Dlb: use dedicated formulate2x2MixingMatrix() function if cross terms are zero */
 /* ################### End FIXES switches ########################### */
 
 /* #################### Start BASOP porting switches ############################ */
diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 3211b0e46..6b4476273 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -117,7 +117,9 @@ static void getDirectPartGains_fx( const Word16 bin, Word16 aziDeg, Word16 eleDe
 static void ivas_masa_ext_rend_parambin_internal_fx( MASA_EXT_REND_HANDLE hMasaExtRend, COMBINED_ORIENTATION_HANDLE hCombinedOrientationData, Word32 *output_fx[] /*Q11*/, const Word16 subframe, const SPLIT_REND_WRAPPER *hSplitRendWrapper, Word32 Cldfb_Out_Real[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], Word32 Cldfb_Out_Imag[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX] );
 
 static void formulate2x2MixingMatrix_fx( Word32 Ein1_fx /*q_Ein*/, Word32 Ein2_fx /*q_Ein*/, Word16 q_Ein, Word32 CinRe_fx /*q_Cin*/, Word32 CinIm_fx /*q_Cin*/, Word16 q_Cin, Word32 Eout1_fx /*q_Eout*/, Word32 Eout2_fx /*q_Eout*/, Word16 q_Eout, Word32 CoutRe_fx /*q_Cout*/, Word32 CoutIm_fx /*q_Cout*/, Word16 q_Cout, Word32 Q_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*Q31*/, Word32 Mre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word32 Mim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word16 *q_M, const Word16 regularizationFactor_fx /*Q14*/ );
-
+#ifdef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT
+static void formulate2x2MixingMatrixNoCross_fx( Word32 Ein1_fx /*q_Ein*/, Word32 Ein2_fx /*q_Ein*/, Word16 q_Ein, Word32 Eout1_fx /*q_Eout*/, Word32 Eout2_fx /*q_Eout*/, Word16 q_Eout, Word32 CoutRe_fx /*q_Cout*/, Word32 CoutIm_fx /*q_Cout*/, Word16 q_Cout, Word32 Q_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*Q31*/, Word32 Mre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word32 Mim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word16* q_M );
+#endif /* NONBE_2169_BINAURAL_MIXING_MATRIX_OPT */
 static void matrixMul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out );
 
 static void matrixTransp2Mul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 Ascale, Word32 Bscale, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out );
@@ -2392,12 +2394,22 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
             CrCrossIm_fx = Mpy_32_32( CrCrossIm_fx, decorrelationReductionFactor_fx );
             q_CrCross = sub( add( q_CrCross, q_decorrelationReductionFactor ), 31 );
 
+#ifndef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT
             formulate2x2MixingMatrix_fx( hDiracDecBin->ChEne_fx[0][bin], hDiracDecBin->ChEne_fx[1][bin],
                                          hDiracDecBin->q_ChEne,
                                          0, 0, /* Decorrelated signal has ideally no cross-terms */
                                          Q31, CrEneL_fx, CrEneR_fx, q_CrEne,
                                          CrCrossRe_fx, CrCrossIm_fx, q_CrCross,
                                          prototypeMtx_fx, MdecRe_fx, MdecIm_fx, &q_Mdec, 3277 ); // 3277 = 0.2 in Q14
+#else                                                                                            /* NONBE_2169_BINAURAL_MIXING_MATRIX_OPT */
+            /* Determine a residual mixing matrix Mdec for processing the decorrelated signal to obtain
+             * the residual signal (that has the residual covariance matrix)
+             * Decorrelated signal has ideally no cross-terms */
+            formulate2x2MixingMatrixNoCross_fx( hDiracDecBin->ChEne_fx[0][bin], hDiracDecBin->ChEne_fx[1][bin], hDiracDecBin->q_ChEne,
+                                                CrEneL_fx, CrEneR_fx, q_CrEne,
+                                                CrCrossRe_fx, CrCrossIm_fx, q_CrCross,
+                                                prototypeMtx_fx, MdecRe_fx, MdecIm_fx, &q_Mdec );
+#endif                                                                                           /* NONBE_2169_BINAURAL_MIXING_MATRIX_OPT */
         }
         ELSE
         {
@@ -4394,6 +4406,351 @@ static void chol2x2_fx(
 
     return;
 }
+
+#ifdef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT
+static void formulate2x2MixingMatrixNoCross_fx(
+    Word32 Ein1_fx /*q_Ein*/,
+    Word32 Ein2_fx /*q_Ein*/,
+    Word16 q_Ein,
+    Word32 Eout1_fx /*q_Eout*/,
+    Word32 Eout2_fx /*q_Eout*/,
+    Word16 q_Eout,
+    Word32 CoutRe_fx /*q_Cout*/,
+    Word32 CoutIm_fx /*q_Cout*/,
+    Word16 q_Cout,
+    Word32 Q_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*Q31*/,
+    Word32 Mre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/,
+    Word32 Mim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/,
+    Word16 *q_M )
+{
+    /*
+     This function implements a 2x2 solution for an optimized spatial audio rendering algorithm, based on
+     Vilkamo, J., Bäckström, T. and Kuntz, A., 2013.
+     "Optimized covariance domain framework for time–frequency processing of spatial audio."
+     Journal of the Audio Engineering Society, 61(6), pp.403-411.
+     but optimized for decorrelated signals
+
+     The result of the formulas below are the same as those in the publication, however, some
+     derivation details differ for as simple as possible 2x2 formulation
+     */
+    Word16 chA, chB;
+    Word32 maxEne_fx, tmp, maxEneDiv_fx;
+    Word16 q_maxEne, q_maxEneDiv, exp, exp1;
+    Word32 KyRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], KyIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
+    Word32 tmpRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], tmpIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
+    Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
+    Word32 Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
+    Word32 D_fx[BINAURAL_CHANNELS];
+    Word32 div_fx[BINAURAL_CHANNELS];
+    Word32 Ghat_fx[BINAURAL_CHANNELS];
+    Word32 GhatQ_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
+    Word32 Pre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Pim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
+    Word16 q_ky, q_A, q_U, q_D, q_P;
+    Word32 E_in1, E_in2, E_out1, E_out2, Cout_re, Cout_im;
+    Word16 q_ein, q_eout, q_cout, q_Ghat, q_GhatQ, q_temp, q_div, exp_temp;
+    Word32 temp;
+    Word16 q_Pre[BINAURAL_CHANNELS][BINAURAL_CHANNELS], q_Pim[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
+    Word16 hdrm_re[BINAURAL_CHANNELS][BINAURAL_CHANNELS], hdrm_im[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
+    set16_fx( hdrm_re[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) );
+    set16_fx( hdrm_im[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) );
+    set16_fx( q_Pre[0], Q31, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) );
+    set16_fx( q_Pim[0], Q31, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) );
+
+    q_ky = 0;
+    move16();
+
+    exp = sub( get_min_scalefactor( Ein1_fx, Ein2_fx ), 1 );
+    E_in1 = L_shl( Ein1_fx, exp );
+    E_in2 = L_shl( Ein2_fx, exp );
+    q_ein = add( q_Ein, exp );
+
+    exp = sub( get_min_scalefactor( Eout1_fx, Eout2_fx ), 1 );
+    E_out1 = L_shl( Eout1_fx, exp );
+    E_out2 = L_shl( Eout2_fx, exp );
+    q_eout = add( q_Eout, exp );
+
+    exp = sub( get_min_scalefactor( CoutRe_fx, CoutIm_fx ), 1 );
+    Cout_re = L_shl( CoutRe_fx, exp );
+    Cout_im = L_shl( CoutIm_fx, exp );
+    q_cout = add( q_Cout, exp );
+
+    /* Normalize energy values */
+    maxEne_fx = L_max( E_in1, E_in2 );
+    q_maxEne = q_ein;
+    move16();
+
+    tmp = L_max( E_out1, E_out2 );
+    IF( LT_16( q_maxEne, q_eout ) )
+    {
+        maxEne_fx = L_max( maxEne_fx, L_shr( tmp, sub( q_eout, q_maxEne ) ) ); // q_maxEne
+    }
+    ELSE
+    {
+        maxEne_fx = L_max( L_shr( maxEne_fx, sub( q_maxEne, q_eout ) ), tmp ); // q_maxEne
+        q_maxEne = q_eout;
+        move16();
+    }
+
+    // 4611686 = Q62
+    IF( maxEne_fx == 0 )
+    {
+        maxEneDiv_fx = ONE_DIV_EPSILON_MANT;
+        move32();
+        q_maxEneDiv = 31 - ONE_DIV_EPSILON_EXP;
+        move16();
+    }
+    ELSE
+    {
+        maxEneDiv_fx = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, maxEne_fx, &exp );
+        q_maxEneDiv = add( sub( 31, exp ), sub( Q30, q_maxEne ) );
+    }
+    exp = norm_l( maxEneDiv_fx );
+    maxEneDiv_fx = L_shl( maxEneDiv_fx, exp );
+    q_maxEneDiv = add( q_maxEneDiv, exp );
+
+    E_in1 = Mpy_32_32( E_in1, maxEneDiv_fx );
+    E_in2 = Mpy_32_32( E_in2, maxEneDiv_fx );
+    q_ein = sub( add( q_ein, q_maxEneDiv ), 31 );
+
+    E_out1 = Mpy_32_32( E_out1, maxEneDiv_fx );
+    E_out2 = Mpy_32_32( E_out2, maxEneDiv_fx );
+    q_eout = sub( add( q_eout, q_maxEneDiv ), 31 );
+
+    Cout_re = Mpy_32_32( Cout_re, maxEneDiv_fx );
+    Cout_im = Mpy_32_32( Cout_im, maxEneDiv_fx );
+    q_cout = sub( add( q_cout, q_maxEneDiv ), 31 );
+
+    /* Cholesky decomposition of target / output covariance matrix */
+    chol2x2_fx( E_out1, E_out2, q_eout, Cout_re, Cout_im, q_cout, KyRe_fx, KyIm_fx, &q_ky );
+
+    /* If there are no cross-terms, the Eigendecomposition of input covariance matrix
+       can be skipped. Uxre is a unit matrix, Uxim is a zero matrix and Sx is (1, 1)
+       Further on, also Kxre is a unit matrix and Kxim is a zero matrix
+       Multiplication with these matrices / scalars can be skipped
+    */
+
+    temp = Mpy_32_32( E_in2, INV_1000_Q31 );
+    temp = L_max( temp, E_in1 );
+
+    IF( temp == 0 )
+    {
+        IF( E_out1 == 0 )
+        {
+            Ghat_fx[0] = 0;
+            exp = -19;
+            move32();
+            move16();
+        }
+        ELSE
+        {
+            temp = BASOP_Util_Divide3232_Scale_newton( E_out1, 4611686, &exp ); // 4611686 = Q62
+            exp = sub( exp, sub( q_eout, 62 ) );
+            Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
+        }
+    }
+    ELSE
+    {
+        temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
+
+        temp = BASOP_Util_Divide3232_Scale_newton( E_out1, temp, &exp );
+        exp = sub( exp, sub( q_eout, sub( 31, exp_temp ) ) );
+        Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
+    }
+    move32();
+
+    temp = Mpy_32_32( E_in1, 2147484 );
+    temp = L_max( temp, E_in2 ); // q_ein
+    IF( temp == 0 )
+    {
+        IF( E_out2 == 0 )
+        { /* We can set hard-coded results */
+            Ghat_fx[1] = 0;
+            exp1 = -19;
+            move16();
+        }
+        ELSE
+        {
+            temp = BASOP_Util_Divide3232_Scale_newton( E_out2, 4611686, &exp1 ); // 4611686 = Q62
+            exp1 = sub( exp1, sub( q_eout, 62 ) );
+            Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
+        }
+    }
+    ELSE
+    {
+        temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
+
+        temp = BASOP_Util_Divide3232_Scale_newton( E_out2, temp, &exp1 );
+        exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) );
+        Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
+    }
+    move32();
+
+    q_Ghat = sub( 31, s_max( exp, exp1 ) );
+    Word16 q_diff = sub( 31, q_Ghat );
+    Ghat_fx[0] = L_shr( Ghat_fx[0], sub( q_diff, exp ) ); // q_Ghat
+    move32();
+    Ghat_fx[1] = L_shr( Ghat_fx[1], sub( q_diff, exp1 ) ); // q_Ghat
+    move32();
+
+    /* Matrix multiplication, A = Ky' * G_hat * Q */
+    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
+    {
+        GhatQ_fx[chA][0] = Mpy_32_32( Q_fx[chA][0], Ghat_fx[chA] );
+        GhatQ_fx[chA][1] = Mpy_32_32( Q_fx[chA][1], Ghat_fx[chA] );
+        move32();
+        move32();
+    }
+    q_GhatQ = sub( add( Q31, q_Ghat ), 31 );
+
+    exp = sub( s_min( L_norm_arr( KyRe_fx[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ), L_norm_arr( KyIm_fx[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ) ), 1 );
+    scale_sig32( KyRe_fx[0], BINAURAL_CHANNELS * BINAURAL_CHANNELS, exp );
+    scale_sig32( KyIm_fx[0], BINAURAL_CHANNELS * BINAURAL_CHANNELS, exp );
+    q_ky = add( q_ky, exp );
+
+    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
+    {
+        FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
+        {
+            Are_fx[chA][chB] = Madd_32_32( Mpy_32_32( KyRe_fx[0][chA], GhatQ_fx[0][chB] ), KyRe_fx[1][chA], GhatQ_fx[1][chB] );
+            Aim_fx[chA][chB] = Msub_32_32( Mpy_32_32( L_negate( KyIm_fx[0][chA] ), GhatQ_fx[0][chB] ), KyIm_fx[1][chA], GhatQ_fx[1][chB] );
+            move32();
+            move32();
+        }
+    }
+
+    q_A = sub( add( q_ky, q_GhatQ ), 31 ); // TODO SMM: Check if this is correct!!!
+    move16();
+
+    /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx
+       For matrix A that is P = A(A'A)^0.5 */
+    matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );
+
+    eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );
+
+    IF( D_fx[0] != 0 && D_fx[1] == 0 ) // Due to an eig2x2 error, sometimes D_fx[1] becomes zero, which implies that the input matrix should be singular (i.e., determinant = 0).
+    {
+        Word32 det_fx = L_sub_sat( Mult_32_32( tmpRe_fx[0][0], tmpRe_fx[1][1] ),
+                                   L_add_sat( Mult_32_32( tmpRe_fx[1][0], tmpRe_fx[1][0] ),
+                                              Mult_32_32( tmpIm_fx[1][0], tmpIm_fx[1][0] ) ) );
+        if ( det_fx != 0 )
+        {
+            D_fx[1] = SMALL_EIGENVALUE; // Setting D_fx[1] to epsilon has no effect, as the value is too small to affect the output.
+            move32();
+        }
+    }
+
+    IF( D_fx[0] == 0 )
+    {
+        temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */
+        move32();
+        exp = ONE_DIV_EPSILON_EXP;
+        move16();
+    }
+    ELSE
+    {
+        temp = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, D_fx[0], &exp );
+        exp = sub( exp, sub( Q30, q_D ) );
+    }
+    div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
+    move32();
+
+    // Sqrt(1)
+    div_fx[1] = L_add( 0, 2047986068 ); // Q = 31 - exp1
+    exp1 = add( 0, 20 );
+
+    IF( D_fx[1] != 0 ) // This is the new code: replace div sqrt by isqrt
+    {
+        exp1 = sub( 31, q_D );
+        div_fx[1] = ISqrt32( D_fx[1], &exp1 );
+        move32();
+    }
+    q_div = sub( 31, s_max( exp, exp1 ) );
+
+    div_fx[0] = L_shr( div_fx[0], sub( sub( 31, exp ), q_div ) ); // q_div
+    move32();
+    div_fx[1] = L_shr( div_fx[1], sub( sub( 31, exp1 ), q_div ) ); // q_div
+    move32();
+
+    // 1310720000 = 10,000.0f in Q17
+    Word32 thresh = L_shl_sat( 1310720000, sub( q_div, Q17 ) ); // q_div
+    div_fx[0] = L_min( div_fx[0], thresh );                     // q_div
+    div_fx[1] = L_min( div_fx[1], thresh );                     // q_div
+
+    matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp );
+
+    exp = L_norm_arr( div_fx, BINAURAL_CHANNELS );
+    scale_sig32( div_fx, BINAURAL_CHANNELS, exp );
+    q_div = add( q_div, exp );
+
+    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
+    {
+        FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
+        {
+            Word64 W_tmp;
+
+            W_tmp = W_mult0_32_32( tmpRe_fx[chA][chB], div_fx[chB] );
+            IF( W_tmp != 0 )
+            {
+                Word16 hdrm = sub( W_norm( W_tmp ), 32 );
+                tmpRe_fx[chA][chB] = W_shl_sat_l( W_tmp, hdrm );
+                move32();
+                hdrm_re[chA][chB] = add( add( q_temp, q_div ), hdrm );
+                move16();
+            }
+            ELSE
+            {
+                tmpRe_fx[chA][chB] = 0;
+                move32();
+            }
+
+            W_tmp = W_mult0_32_32( tmpIm_fx[chA][chB], div_fx[chB] );
+            IF( W_tmp != 0 )
+            {
+                Word16 hdrm = sub( W_norm( W_tmp ), 32 );
+                move16();
+                tmpIm_fx[chA][chB] = W_shl_sat_l( W_tmp, hdrm );
+                move32();
+                hdrm_im[chA][chB] = add( add( q_temp, q_div ), hdrm );
+                move16();
+            }
+            ELSE
+            {
+                tmpIm_fx[chA][chB] = 0;
+                move32();
+            }
+        }
+    }
+
+    minimum_s( hdrm_re[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ), &exp );
+    q_temp = exp;
+    move16();
+    minimum_s( hdrm_im[0], BINAURAL_CHANNELS * BINAURAL_CHANNELS, &exp );
+    q_temp = s_min( q_temp, exp );
+    q_temp = sub( q_temp, 1 );
+
+    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
+    {
+        FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
+        {
+            tmpRe_fx[chA][chB] = L_shr( tmpRe_fx[chA][chB], sub( hdrm_re[chA][chB], q_temp ) );
+            tmpIm_fx[chA][chB] = L_shr( tmpIm_fx[chA][chB], sub( hdrm_im[chA][chB], q_temp ) );
+            move32();
+            move32();
+        }
+    }
+
+    matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U,
+                         0 /*int Ascale*/,
+                         0 /*int Bscale*/,
+                         Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */
+
+    matrixMul_fx( KyRe_fx, KyIm_fx, &q_ky, Pre_fx, Pim_fx, &q_P, Mre_fx, Mim_fx, q_M );
+
+    return;
+}
+#endif /* FORM2x2MIXMAT_IMPROVEMENT */
+
+
 static void formulate2x2MixingMatrix_fx(
     Word32 Ein1_fx, /*q_Ein*/
     Word32 Ein2_fx, /*q_Ein*/
-- 
GitLab


From 608650ba528f2393ba1d21f2ce310bcdad726f35 Mon Sep 17 00:00:00 2001
From: mave2802 <59919483+mave2802@users.noreply.github.com>
Date: Thu, 30 Oct 2025 11:37:48 +0100
Subject: [PATCH 2/8] clang format

---
 lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 6b4476273..176f32964 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -118,7 +118,7 @@ static void ivas_masa_ext_rend_parambin_internal_fx( MASA_EXT_REND_HANDLE hMasaE
 
 static void formulate2x2MixingMatrix_fx( Word32 Ein1_fx /*q_Ein*/, Word32 Ein2_fx /*q_Ein*/, Word16 q_Ein, Word32 CinRe_fx /*q_Cin*/, Word32 CinIm_fx /*q_Cin*/, Word16 q_Cin, Word32 Eout1_fx /*q_Eout*/, Word32 Eout2_fx /*q_Eout*/, Word16 q_Eout, Word32 CoutRe_fx /*q_Cout*/, Word32 CoutIm_fx /*q_Cout*/, Word16 q_Cout, Word32 Q_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*Q31*/, Word32 Mre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word32 Mim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word16 *q_M, const Word16 regularizationFactor_fx /*Q14*/ );
 #ifdef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT
-static void formulate2x2MixingMatrixNoCross_fx( Word32 Ein1_fx /*q_Ein*/, Word32 Ein2_fx /*q_Ein*/, Word16 q_Ein, Word32 Eout1_fx /*q_Eout*/, Word32 Eout2_fx /*q_Eout*/, Word16 q_Eout, Word32 CoutRe_fx /*q_Cout*/, Word32 CoutIm_fx /*q_Cout*/, Word16 q_Cout, Word32 Q_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*Q31*/, Word32 Mre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word32 Mim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word16* q_M );
+static void formulate2x2MixingMatrixNoCross_fx( Word32 Ein1_fx /*q_Ein*/, Word32 Ein2_fx /*q_Ein*/, Word16 q_Ein, Word32 Eout1_fx /*q_Eout*/, Word32 Eout2_fx /*q_Eout*/, Word16 q_Eout, Word32 CoutRe_fx /*q_Cout*/, Word32 CoutIm_fx /*q_Cout*/, Word16 q_Cout, Word32 Q_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*Q31*/, Word32 Mre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word32 Mim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word16 *q_M );
 #endif /* NONBE_2169_BINAURAL_MIXING_MATRIX_OPT */
 static void matrixMul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out );
 
-- 
GitLab


From 08dfae98715cfb799ea0a889d1cef95c64ee33c1 Mon Sep 17 00:00:00 2001
From: mave2802 <59919483+mave2802@users.noreply.github.com>
Date: Thu, 6 Nov 2025 19:31:19 +0100
Subject: [PATCH 3/8] added optimized chol2x2() function, refactored pre
 scaling

---
 lib_com/options.h                             |   2 +-
 .../ivas_dirac_dec_binaural_functions_fx.c    | 569 +++++++-----------
 2 files changed, 207 insertions(+), 364 deletions(-)

diff --git a/lib_com/options.h b/lib_com/options.h
index bc61b0892..525f60e04 100644
--- a/lib_com/options.h
+++ b/lib_com/options.h
@@ -87,7 +87,7 @@
 #define FIX_2166_ASSERT_OSBA_PLC_STEREO_OUT                  /* FhG: fix for issue 2166 - add missing averaging factor 0.5 in for the sum of energies in function stereo_dft_dmx_swb_nrg_fx()*/
 #define FIX_2086_ENABLE_HP20_OPT_FOR_ENC                     /* FhG: Enable hp20_fx_32_opt() for Encoder */
 #define FIX_1793_DEC_MC_TO_MONO_SCALING_ISSUE                /* FhG: Use dynamic Q factor for synth_fx and synthFB_fx to prevent overflow */
-#define NONBE_2169_BINAURAL_MIXING_MATRIX_OPT                /* Dlb: use dedicated formulate2x2MixingMatrix() function if cross terms are zero */
+#define NONBE_2169_BINAURAL_MIXING_MATRIX_OPT                /* Dlb: use optimized formulate2x2MixingMatrix() function and subfunctiions */
 /* ################### End FIXES switches ########################### */
 
 /* #################### Start BASOP porting switches ############################ */
diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 176f32964..eea415804 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -117,9 +117,7 @@ static void getDirectPartGains_fx( const Word16 bin, Word16 aziDeg, Word16 eleDe
 static void ivas_masa_ext_rend_parambin_internal_fx( MASA_EXT_REND_HANDLE hMasaExtRend, COMBINED_ORIENTATION_HANDLE hCombinedOrientationData, Word32 *output_fx[] /*Q11*/, const Word16 subframe, const SPLIT_REND_WRAPPER *hSplitRendWrapper, Word32 Cldfb_Out_Real[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], Word32 Cldfb_Out_Imag[][CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX] );
 
 static void formulate2x2MixingMatrix_fx( Word32 Ein1_fx /*q_Ein*/, Word32 Ein2_fx /*q_Ein*/, Word16 q_Ein, Word32 CinRe_fx /*q_Cin*/, Word32 CinIm_fx /*q_Cin*/, Word16 q_Cin, Word32 Eout1_fx /*q_Eout*/, Word32 Eout2_fx /*q_Eout*/, Word16 q_Eout, Word32 CoutRe_fx /*q_Cout*/, Word32 CoutIm_fx /*q_Cout*/, Word16 q_Cout, Word32 Q_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*Q31*/, Word32 Mre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word32 Mim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word16 *q_M, const Word16 regularizationFactor_fx /*Q14*/ );
-#ifdef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT
-static void formulate2x2MixingMatrixNoCross_fx( Word32 Ein1_fx /*q_Ein*/, Word32 Ein2_fx /*q_Ein*/, Word16 q_Ein, Word32 Eout1_fx /*q_Eout*/, Word32 Eout2_fx /*q_Eout*/, Word16 q_Eout, Word32 CoutRe_fx /*q_Cout*/, Word32 CoutIm_fx /*q_Cout*/, Word16 q_Cout, Word32 Q_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*Q31*/, Word32 Mre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word32 Mim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/, Word16 *q_M );
-#endif /* NONBE_2169_BINAURAL_MIXING_MATRIX_OPT */
+
 static void matrixMul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out );
 
 static void matrixTransp2Mul_fx( Word32 Are[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word32 Aim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_A*/, Word16 *q_A, Word32 Bre[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word32 Bim[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_B*/, Word16 *q_B, Word32 Ascale, Word32 Bscale, Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_out*/, Word16 *q_out );
@@ -2394,22 +2392,12 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
             CrCrossIm_fx = Mpy_32_32( CrCrossIm_fx, decorrelationReductionFactor_fx );
             q_CrCross = sub( add( q_CrCross, q_decorrelationReductionFactor ), 31 );
 
-#ifndef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT
             formulate2x2MixingMatrix_fx( hDiracDecBin->ChEne_fx[0][bin], hDiracDecBin->ChEne_fx[1][bin],
                                          hDiracDecBin->q_ChEne,
                                          0, 0, /* Decorrelated signal has ideally no cross-terms */
                                          Q31, CrEneL_fx, CrEneR_fx, q_CrEne,
                                          CrCrossRe_fx, CrCrossIm_fx, q_CrCross,
                                          prototypeMtx_fx, MdecRe_fx, MdecIm_fx, &q_Mdec, 3277 ); // 3277 = 0.2 in Q14
-#else                                                                                            /* NONBE_2169_BINAURAL_MIXING_MATRIX_OPT */
-            /* Determine a residual mixing matrix Mdec for processing the decorrelated signal to obtain
-             * the residual signal (that has the residual covariance matrix)
-             * Decorrelated signal has ideally no cross-terms */
-            formulate2x2MixingMatrixNoCross_fx( hDiracDecBin->ChEne_fx[0][bin], hDiracDecBin->ChEne_fx[1][bin], hDiracDecBin->q_ChEne,
-                                                CrEneL_fx, CrEneR_fx, q_CrEne,
-                                                CrCrossRe_fx, CrCrossIm_fx, q_CrCross,
-                                                prototypeMtx_fx, MdecRe_fx, MdecIm_fx, &q_Mdec );
-#endif                                                                                           /* NONBE_2169_BINAURAL_MIXING_MATRIX_OPT */
         }
         ELSE
         {
@@ -4161,7 +4149,131 @@ static void matrixTransp2Mul_fx(
     return;
 }
 
+#ifdef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT
+static void chol2x2_fx(
+  const  Word32 E1,                                   /*q_E*/
+  const  Word32 E2,                                   /*q_E*/
+  Word16 q_E,
+  const Word32 Cre,                                    /*q_C*/
+  const Word32 Cim,                                    /*q_C*/
+  Word16 q_C,
+  Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS],  /*q_out*/
+  Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS],   /*q_out */
+  Word16 *q_out)
+
+{
+  Word16 chA, chB;
+  Word32 sqrtVal;
+  Word16 q_sqrtVal, q_min;
+  Word16 exp = 0;
+  Word16 q_out_re_0_0, q_out_re_0_1, q_out_re_1_0, q_out_re_1_1;
+  Word16 q_out_im_0_1, q_out_im_1_0;
+
+  FOR(chA = 0; chA < BINAURAL_CHANNELS; chA++)
+  {
+    FOR(chB = 0; chB < BINAURAL_CHANNELS; chB++)
+    {
+      outRe[chA][chB] = 0;
+      move32();
+      outIm[chA][chB] = 0;
+      move32();
+    }
+  }
+
+  IF(GT_32(E1, E2)) /* Perform Cholesky decomposition according to louder channel first */
+  {
+    exp = sub(31, q_E);
+    Word32 tmp = ISqrt32(E1, &exp);
+    Word16 q_tmp = sub(31, exp);
+
+    outRe[0][0] = Mpy_32_32(E1, tmp);
+    q_out_re_0_0 = sub(q_E, sub(Q31, q_tmp));
+
+    q_out_re_1_0 = Q31;
+    IF(Cre != 0) {
+      outRe[1][0] = Mpy_32_32(Cre, tmp);
+      q_out_re_1_0 = sub(q_C, sub(Q31, q_tmp));
+    }
+    q_out_im_1_0 = Q31;
+    if (Cim != 0) {
+      outIm[1][0] = Mpy_32_32(Cim, tmp);
+      q_out_im_1_0 = sub(q_C, sub(Q31, q_tmp));
+    }
+
+    sqrtVal = L_add(L_shr(Mpy_32_32(Cre, Cre), 1), L_shr(Mpy_32_32(Cim, Cim), 1));
+    sqrtVal = BASOP_Util_Divide3232_Scale_newton(sqrtVal, E1, &exp); // q_sqrtVal = 2 * q_C - q_E - exp - 1;
+    q_sqrtVal = sub(add(q_C, q_C), add(add(q_E, exp), 1));
+
+    // normalize to min
+    q_min = s_min(q_E, q_sqrtVal);
+    sqrtVal = L_sub(L_shl(E2, q_min - q_E), L_shl(sqrtVal, q_min - q_sqrtVal));
+    q_sqrtVal = q_min;
+
+    exp = sub(31, q_sqrtVal);
+    outRe[1][1] = Sqrt32(L_max(sqrtVal, 0), &exp);
+    q_out_re_1_1 = sub(31, exp);
+
+    *q_out = s_min(s_min(q_out_re_0_0, q_out_re_1_0), s_min(q_out_im_1_0, q_out_re_1_1));
+    move16();
+    outRe[1][1] = L_shr(outRe[1][1], sub(q_out_re_1_1, *q_out));
+    move32();
+    outRe[1][0] = L_shr(outRe[1][0], sub(q_out_re_1_0, *q_out));
+    move32();
+    outIm[1][0] = L_shr(outIm[1][0], sub(q_out_im_1_0, *q_out));
+    move32();
+    outRe[0][0] = L_shr(outRe[0][0], sub(q_out_re_0_0, *q_out));
+    move32();
+  }
+  ELSE
+  {
+      exp = sub(31, q_E);
+      Word32 tmp = ISqrt32(E2, &exp);
+      Word16 q_tmp = sub(31, exp);
+
+      outRe[1][1] = Mpy_32_32(E2, tmp);
+      q_out_re_1_1 = sub(q_E, sub(Q31, q_tmp));
+
+      q_out_re_0_1 = Q31;
+      IF(Cre != 0) {
+        outRe[0][1] = Mpy_32_32(Cre, tmp);
+        q_out_re_0_1 = sub(q_C, sub(Q31, q_tmp));
+      }
+
+      q_out_im_0_1 = Q31;
+      IF(Cim != 0) {
+        outIm[0][1] = L_negate(Mpy_32_32(Cim, tmp));
+        q_out_im_0_1 = sub(q_C, sub(Q31, q_tmp));
+      }
+
+      sqrtVal = L_add(L_shr(Mpy_32_32(Cre, Cre),1), L_shr(Mpy_32_32(Cim, Cim),1));
+      sqrtVal = BASOP_Util_Divide3232_Scale_newton(sqrtVal, E2, &exp);
+
+      q_sqrtVal = sub(add(q_C, q_C), add(add(q_E, exp), 1));  // q_sqrtVal = 2 * q_C - q_E - exp - 1;
+
+      // normalize to min
+      q_min = s_min(q_E, q_sqrtVal);
+      sqrtVal = L_sub(L_shl(E1, q_min - q_E), L_shl(sqrtVal, q_min - q_sqrtVal));
+      q_sqrtVal = q_min;
+
+      exp = sub(31, q_sqrtVal);
+      outRe[0][0] = Sqrt32(L_max(sqrtVal, 0), &exp);
+      q_out_re_0_0 = sub(31, exp);
+
+      *q_out = s_min(s_min(q_out_re_0_0, q_out_re_0_1), s_min(q_out_im_0_1, q_out_re_1_1));
+      move16();
+      outRe[1][1] = L_shr(outRe[1][1], sub(q_out_re_1_1, *q_out));
+      move32();
+      outRe[0][1] = L_shr(outRe[0][1], sub(q_out_re_0_1, *q_out));
+      move32();
+      outIm[0][1] = L_shr(outIm[0][1], sub(q_out_im_0_1, *q_out));
+      move32();
+      outRe[0][0] = L_shr(outRe[0][0], sub(q_out_re_0_0, *q_out));
+      move32();
+  }
+  return;
+}
 
+#else
 static void chol2x2_fx(
     const Word32 E1, /*q_E*/
     const Word32 E2, /*q_E*/
@@ -4406,350 +4518,7 @@ static void chol2x2_fx(
 
     return;
 }
-
-#ifdef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT
-static void formulate2x2MixingMatrixNoCross_fx(
-    Word32 Ein1_fx /*q_Ein*/,
-    Word32 Ein2_fx /*q_Ein*/,
-    Word16 q_Ein,
-    Word32 Eout1_fx /*q_Eout*/,
-    Word32 Eout2_fx /*q_Eout*/,
-    Word16 q_Eout,
-    Word32 CoutRe_fx /*q_Cout*/,
-    Word32 CoutIm_fx /*q_Cout*/,
-    Word16 q_Cout,
-    Word32 Q_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*Q31*/,
-    Word32 Mre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/,
-    Word32 Mim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS] /*q_M*/,
-    Word16 *q_M )
-{
-    /*
-     This function implements a 2x2 solution for an optimized spatial audio rendering algorithm, based on
-     Vilkamo, J., Bäckström, T. and Kuntz, A., 2013.
-     "Optimized covariance domain framework for time–frequency processing of spatial audio."
-     Journal of the Audio Engineering Society, 61(6), pp.403-411.
-     but optimized for decorrelated signals
-
-     The result of the formulas below are the same as those in the publication, however, some
-     derivation details differ for as simple as possible 2x2 formulation
-     */
-    Word16 chA, chB;
-    Word32 maxEne_fx, tmp, maxEneDiv_fx;
-    Word16 q_maxEne, q_maxEneDiv, exp, exp1;
-    Word32 KyRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], KyIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
-    Word32 tmpRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], tmpIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
-    Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
-    Word32 Ure_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Uim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
-    Word32 D_fx[BINAURAL_CHANNELS];
-    Word32 div_fx[BINAURAL_CHANNELS];
-    Word32 Ghat_fx[BINAURAL_CHANNELS];
-    Word32 GhatQ_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
-    Word32 Pre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Pim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
-    Word16 q_ky, q_A, q_U, q_D, q_P;
-    Word32 E_in1, E_in2, E_out1, E_out2, Cout_re, Cout_im;
-    Word16 q_ein, q_eout, q_cout, q_Ghat, q_GhatQ, q_temp, q_div, exp_temp;
-    Word32 temp;
-    Word16 q_Pre[BINAURAL_CHANNELS][BINAURAL_CHANNELS], q_Pim[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
-    Word16 hdrm_re[BINAURAL_CHANNELS][BINAURAL_CHANNELS], hdrm_im[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
-    set16_fx( hdrm_re[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) );
-    set16_fx( hdrm_im[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) );
-    set16_fx( q_Pre[0], Q31, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) );
-    set16_fx( q_Pim[0], Q31, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) );
-
-    q_ky = 0;
-    move16();
-
-    exp = sub( get_min_scalefactor( Ein1_fx, Ein2_fx ), 1 );
-    E_in1 = L_shl( Ein1_fx, exp );
-    E_in2 = L_shl( Ein2_fx, exp );
-    q_ein = add( q_Ein, exp );
-
-    exp = sub( get_min_scalefactor( Eout1_fx, Eout2_fx ), 1 );
-    E_out1 = L_shl( Eout1_fx, exp );
-    E_out2 = L_shl( Eout2_fx, exp );
-    q_eout = add( q_Eout, exp );
-
-    exp = sub( get_min_scalefactor( CoutRe_fx, CoutIm_fx ), 1 );
-    Cout_re = L_shl( CoutRe_fx, exp );
-    Cout_im = L_shl( CoutIm_fx, exp );
-    q_cout = add( q_Cout, exp );
-
-    /* Normalize energy values */
-    maxEne_fx = L_max( E_in1, E_in2 );
-    q_maxEne = q_ein;
-    move16();
-
-    tmp = L_max( E_out1, E_out2 );
-    IF( LT_16( q_maxEne, q_eout ) )
-    {
-        maxEne_fx = L_max( maxEne_fx, L_shr( tmp, sub( q_eout, q_maxEne ) ) ); // q_maxEne
-    }
-    ELSE
-    {
-        maxEne_fx = L_max( L_shr( maxEne_fx, sub( q_maxEne, q_eout ) ), tmp ); // q_maxEne
-        q_maxEne = q_eout;
-        move16();
-    }
-
-    // 4611686 = Q62
-    IF( maxEne_fx == 0 )
-    {
-        maxEneDiv_fx = ONE_DIV_EPSILON_MANT;
-        move32();
-        q_maxEneDiv = 31 - ONE_DIV_EPSILON_EXP;
-        move16();
-    }
-    ELSE
-    {
-        maxEneDiv_fx = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, maxEne_fx, &exp );
-        q_maxEneDiv = add( sub( 31, exp ), sub( Q30, q_maxEne ) );
-    }
-    exp = norm_l( maxEneDiv_fx );
-    maxEneDiv_fx = L_shl( maxEneDiv_fx, exp );
-    q_maxEneDiv = add( q_maxEneDiv, exp );
-
-    E_in1 = Mpy_32_32( E_in1, maxEneDiv_fx );
-    E_in2 = Mpy_32_32( E_in2, maxEneDiv_fx );
-    q_ein = sub( add( q_ein, q_maxEneDiv ), 31 );
-
-    E_out1 = Mpy_32_32( E_out1, maxEneDiv_fx );
-    E_out2 = Mpy_32_32( E_out2, maxEneDiv_fx );
-    q_eout = sub( add( q_eout, q_maxEneDiv ), 31 );
-
-    Cout_re = Mpy_32_32( Cout_re, maxEneDiv_fx );
-    Cout_im = Mpy_32_32( Cout_im, maxEneDiv_fx );
-    q_cout = sub( add( q_cout, q_maxEneDiv ), 31 );
-
-    /* Cholesky decomposition of target / output covariance matrix */
-    chol2x2_fx( E_out1, E_out2, q_eout, Cout_re, Cout_im, q_cout, KyRe_fx, KyIm_fx, &q_ky );
-
-    /* If there are no cross-terms, the Eigendecomposition of input covariance matrix
-       can be skipped. Uxre is a unit matrix, Uxim is a zero matrix and Sx is (1, 1)
-       Further on, also Kxre is a unit matrix and Kxim is a zero matrix
-       Multiplication with these matrices / scalars can be skipped
-    */
-
-    temp = Mpy_32_32( E_in2, INV_1000_Q31 );
-    temp = L_max( temp, E_in1 );
-
-    IF( temp == 0 )
-    {
-        IF( E_out1 == 0 )
-        {
-            Ghat_fx[0] = 0;
-            exp = -19;
-            move32();
-            move16();
-        }
-        ELSE
-        {
-            temp = BASOP_Util_Divide3232_Scale_newton( E_out1, 4611686, &exp ); // 4611686 = Q62
-            exp = sub( exp, sub( q_eout, 62 ) );
-            Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
-        }
-    }
-    ELSE
-    {
-        temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
-
-        temp = BASOP_Util_Divide3232_Scale_newton( E_out1, temp, &exp );
-        exp = sub( exp, sub( q_eout, sub( 31, exp_temp ) ) );
-        Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
-    }
-    move32();
-
-    temp = Mpy_32_32( E_in1, 2147484 );
-    temp = L_max( temp, E_in2 ); // q_ein
-    IF( temp == 0 )
-    {
-        IF( E_out2 == 0 )
-        { /* We can set hard-coded results */
-            Ghat_fx[1] = 0;
-            exp1 = -19;
-            move16();
-        }
-        ELSE
-        {
-            temp = BASOP_Util_Divide3232_Scale_newton( E_out2, 4611686, &exp1 ); // 4611686 = Q62
-            exp1 = sub( exp1, sub( q_eout, 62 ) );
-            Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
-        }
-    }
-    ELSE
-    {
-        temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
-
-        temp = BASOP_Util_Divide3232_Scale_newton( E_out2, temp, &exp1 );
-        exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) );
-        Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
-    }
-    move32();
-
-    q_Ghat = sub( 31, s_max( exp, exp1 ) );
-    Word16 q_diff = sub( 31, q_Ghat );
-    Ghat_fx[0] = L_shr( Ghat_fx[0], sub( q_diff, exp ) ); // q_Ghat
-    move32();
-    Ghat_fx[1] = L_shr( Ghat_fx[1], sub( q_diff, exp1 ) ); // q_Ghat
-    move32();
-
-    /* Matrix multiplication, A = Ky' * G_hat * Q */
-    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
-    {
-        GhatQ_fx[chA][0] = Mpy_32_32( Q_fx[chA][0], Ghat_fx[chA] );
-        GhatQ_fx[chA][1] = Mpy_32_32( Q_fx[chA][1], Ghat_fx[chA] );
-        move32();
-        move32();
-    }
-    q_GhatQ = sub( add( Q31, q_Ghat ), 31 );
-
-    exp = sub( s_min( L_norm_arr( KyRe_fx[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ), L_norm_arr( KyIm_fx[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) ) ), 1 );
-    scale_sig32( KyRe_fx[0], BINAURAL_CHANNELS * BINAURAL_CHANNELS, exp );
-    scale_sig32( KyIm_fx[0], BINAURAL_CHANNELS * BINAURAL_CHANNELS, exp );
-    q_ky = add( q_ky, exp );
-
-    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
-    {
-        FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
-        {
-            Are_fx[chA][chB] = Madd_32_32( Mpy_32_32( KyRe_fx[0][chA], GhatQ_fx[0][chB] ), KyRe_fx[1][chA], GhatQ_fx[1][chB] );
-            Aim_fx[chA][chB] = Msub_32_32( Mpy_32_32( L_negate( KyIm_fx[0][chA] ), GhatQ_fx[0][chB] ), KyIm_fx[1][chA], GhatQ_fx[1][chB] );
-            move32();
-            move32();
-        }
-    }
-
-    q_A = sub( add( q_ky, q_GhatQ ), 31 ); // TODO SMM: Check if this is correct!!!
-    move16();
-
-    /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx
-       For matrix A that is P = A(A'A)^0.5 */
-    matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );
-
-    eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );
-
-    IF( D_fx[0] != 0 && D_fx[1] == 0 ) // Due to an eig2x2 error, sometimes D_fx[1] becomes zero, which implies that the input matrix should be singular (i.e., determinant = 0).
-    {
-        Word32 det_fx = L_sub_sat( Mult_32_32( tmpRe_fx[0][0], tmpRe_fx[1][1] ),
-                                   L_add_sat( Mult_32_32( tmpRe_fx[1][0], tmpRe_fx[1][0] ),
-                                              Mult_32_32( tmpIm_fx[1][0], tmpIm_fx[1][0] ) ) );
-        if ( det_fx != 0 )
-        {
-            D_fx[1] = SMALL_EIGENVALUE; // Setting D_fx[1] to epsilon has no effect, as the value is too small to affect the output.
-            move32();
-        }
-    }
-
-    IF( D_fx[0] == 0 )
-    {
-        temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */
-        move32();
-        exp = ONE_DIV_EPSILON_EXP;
-        move16();
-    }
-    ELSE
-    {
-        temp = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, D_fx[0], &exp );
-        exp = sub( exp, sub( Q30, q_D ) );
-    }
-    div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
-    move32();
-
-    // Sqrt(1)
-    div_fx[1] = L_add( 0, 2047986068 ); // Q = 31 - exp1
-    exp1 = add( 0, 20 );
-
-    IF( D_fx[1] != 0 ) // This is the new code: replace div sqrt by isqrt
-    {
-        exp1 = sub( 31, q_D );
-        div_fx[1] = ISqrt32( D_fx[1], &exp1 );
-        move32();
-    }
-    q_div = sub( 31, s_max( exp, exp1 ) );
-
-    div_fx[0] = L_shr( div_fx[0], sub( sub( 31, exp ), q_div ) ); // q_div
-    move32();
-    div_fx[1] = L_shr( div_fx[1], sub( sub( 31, exp1 ), q_div ) ); // q_div
-    move32();
-
-    // 1310720000 = 10,000.0f in Q17
-    Word32 thresh = L_shl_sat( 1310720000, sub( q_div, Q17 ) ); // q_div
-    div_fx[0] = L_min( div_fx[0], thresh );                     // q_div
-    div_fx[1] = L_min( div_fx[1], thresh );                     // q_div
-
-    matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp );
-
-    exp = L_norm_arr( div_fx, BINAURAL_CHANNELS );
-    scale_sig32( div_fx, BINAURAL_CHANNELS, exp );
-    q_div = add( q_div, exp );
-
-    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
-    {
-        FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
-        {
-            Word64 W_tmp;
-
-            W_tmp = W_mult0_32_32( tmpRe_fx[chA][chB], div_fx[chB] );
-            IF( W_tmp != 0 )
-            {
-                Word16 hdrm = sub( W_norm( W_tmp ), 32 );
-                tmpRe_fx[chA][chB] = W_shl_sat_l( W_tmp, hdrm );
-                move32();
-                hdrm_re[chA][chB] = add( add( q_temp, q_div ), hdrm );
-                move16();
-            }
-            ELSE
-            {
-                tmpRe_fx[chA][chB] = 0;
-                move32();
-            }
-
-            W_tmp = W_mult0_32_32( tmpIm_fx[chA][chB], div_fx[chB] );
-            IF( W_tmp != 0 )
-            {
-                Word16 hdrm = sub( W_norm( W_tmp ), 32 );
-                move16();
-                tmpIm_fx[chA][chB] = W_shl_sat_l( W_tmp, hdrm );
-                move32();
-                hdrm_im[chA][chB] = add( add( q_temp, q_div ), hdrm );
-                move16();
-            }
-            ELSE
-            {
-                tmpIm_fx[chA][chB] = 0;
-                move32();
-            }
-        }
-    }
-
-    minimum_s( hdrm_re[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ), &exp );
-    q_temp = exp;
-    move16();
-    minimum_s( hdrm_im[0], BINAURAL_CHANNELS * BINAURAL_CHANNELS, &exp );
-    q_temp = s_min( q_temp, exp );
-    q_temp = sub( q_temp, 1 );
-
-    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
-    {
-        FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
-        {
-            tmpRe_fx[chA][chB] = L_shr( tmpRe_fx[chA][chB], sub( hdrm_re[chA][chB], q_temp ) );
-            tmpIm_fx[chA][chB] = L_shr( tmpIm_fx[chA][chB], sub( hdrm_im[chA][chB], q_temp ) );
-            move32();
-            move32();
-        }
-    }
-
-    matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U,
-                         0 /*int Ascale*/,
-                         0 /*int Bscale*/,
-                         Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */
-
-    matrixMul_fx( KyRe_fx, KyIm_fx, &q_ky, Pre_fx, Pim_fx, &q_P, Mre_fx, Mim_fx, q_M );
-
-    return;
-}
-#endif /* FORM2x2MIXMAT_IMPROVEMENT */
-
+#endif
 
 static void formulate2x2MixingMatrix_fx(
     Word32 Ein1_fx, /*q_Ein*/
@@ -4780,7 +4549,7 @@ static void formulate2x2MixingMatrix_fx(
      derivation details differ for as simple as possible 2x2 formulattion
      */
     Word16 chA, chB;
-    Word32 maxEne_fx, tmp, maxEneDiv_fx;
+    Word32 maxEneIn_fx, maxEneOut_fx, maxEne_fx, maxEneDiv_fx;
     Word16 q_maxEne, q_maxEneDiv, exp, exp1;
     Word32 KyRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], KyIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
     Word32 Uxre_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], Uxim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
@@ -4803,7 +4572,6 @@ static void formulate2x2MixingMatrix_fx(
     set16_fx( hdrm_im[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) );
     set16_fx( q_Pre[0], Q31, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) );
     set16_fx( q_Pim[0], Q31, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) );
-
     q_ky = 0;
     move16();
     q_Sx = 0;
@@ -4813,6 +4581,77 @@ static void formulate2x2MixingMatrix_fx(
     q_Kx = 0;
     move16();
 
+#ifdef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT
+    q_ein = q_Ein;
+    move16();
+    q_eout = q_Eout;
+    move16();
+    q_cin = q_Cin;
+    move16();
+    q_cout = q_Cout;
+    move16();
+
+    /* Normalize energy values */
+    maxEneIn_fx = L_max( Ein1_fx, Ein2_fx );
+    move16();
+
+    maxEneOut_fx = L_max( Eout1_fx, Eout2_fx );
+    move16();
+
+    test();
+    if ( maxEneIn_fx == 0 || maxEneOut_fx == 0 )
+    {
+        Mre_fx[0][0] = Mre_fx[0][1] = Mre_fx[1][0] = Mre_fx[1][1] = 0;
+        move32();
+        move32();
+        move32();
+        move32();
+        Mim_fx[0][0] = Mim_fx[0][1] = Mim_fx[1][0] = Mim_fx[1][1] = 0;
+        move32();
+        move32();
+        move32();
+        move32();
+        *q_M = Q31;
+        move16();
+        return;
+    }
+
+    IF( LT_16( q_eout, q_ein ) )
+    {
+        maxEne_fx = L_max( maxEneOut_fx, L_shr( maxEneIn_fx, sub( q_ein, q_eout ) ) );
+        q_maxEne = q_eout;
+        move16();
+    }
+    ELSE
+    {
+        maxEne_fx = L_max( maxEneIn_fx, L_shr( maxEneOut_fx, sub( q_eout, q_ein ) ) );
+        q_maxEne = q_ein;
+        move16();
+    }
+
+    maxEneDiv_fx = BASOP_Util_Divide3232_Scale_newton( ONE_IN_Q30, maxEne_fx, &exp );
+    q_maxEneDiv = add( sub( 31, exp ), sub( Q30, q_maxEne ) );
+
+    exp = norm_l( maxEneDiv_fx );
+    maxEneDiv_fx = L_shl( maxEneDiv_fx, exp );
+    q_maxEneDiv = add( q_maxEneDiv, exp );
+
+    E_in1 = Mpy_32_32( Ein1_fx, maxEneDiv_fx );
+    E_in2 = Mpy_32_32( Ein2_fx, maxEneDiv_fx );
+    q_ein = sub( add( q_ein, q_maxEneDiv ), 31 );
+
+    Cin_re = Mpy_32_32( CinRe_fx, maxEneDiv_fx );
+    Cin_im = Mpy_32_32( CinIm_fx, maxEneDiv_fx );
+    q_cin = sub( add( q_cin, q_maxEneDiv ), 31 );
+
+    E_out1 = Mpy_32_32( Eout1_fx, maxEneDiv_fx );
+    E_out2 = Mpy_32_32( Eout2_fx, maxEneDiv_fx );
+    q_eout = sub( add( q_eout, q_maxEneDiv ), 31 );
+
+    Cout_re = Mpy_32_32( CoutRe_fx, maxEneDiv_fx );
+    Cout_im = Mpy_32_32( CoutIm_fx, maxEneDiv_fx );
+    q_cout = sub( add( q_cout, q_maxEneDiv ), 31 );
+#else
     exp = sub( get_min_scalefactor( Ein1_fx, Ein2_fx ), 1 );
     E_in1 = L_shl( Ein1_fx, exp );
     E_in2 = L_shl( Ein2_fx, exp );
@@ -4838,14 +4677,14 @@ static void formulate2x2MixingMatrix_fx(
     q_maxEne = q_ein;
     move16();
 
-    tmp = L_max( E_out1, E_out2 );
+    temp = L_max( E_out1, E_out2 );
     IF( LT_16( q_maxEne, q_eout ) )
     {
-        maxEne_fx = L_max( maxEne_fx, L_shr( tmp, sub( q_eout, q_maxEne ) ) ); // q_maxEne
+        maxEne_fx = L_max( maxEne_fx, L_shr( temp, sub( q_eout, q_maxEne ) ) ); // q_maxEne
     }
     ELSE
     {
-        maxEne_fx = L_max( L_shr( maxEne_fx, sub( q_maxEne, q_eout ) ), tmp ); // q_maxEne
+        maxEne_fx = L_max( L_shr( maxEne_fx, sub( q_maxEne, q_eout ) ), temp ); // q_maxEne
         q_maxEne = q_eout;
         move16();
     }
@@ -4883,6 +4722,9 @@ static void formulate2x2MixingMatrix_fx(
     Cout_im = Mpy_32_32( Cout_im, maxEneDiv_fx );
     q_cout = sub( add( q_cout, q_maxEneDiv ), 31 );
 
+#endif
+
+
     /* Cholesky decomposition of target / output covariance matrix */
     chol2x2_fx( E_out1, E_out2, q_eout, Cout_re, Cout_im, q_cout, KyRe_fx, KyIm_fx, &q_ky );
 
@@ -5129,7 +4971,7 @@ static void formulate2x2MixingMatrix_fx(
                          0 /*int Bscale*/,
                          Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */
 
-    /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */
+/* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */
 #if ( BINAURAL_CHANNELS != 2 )
     FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
     {
@@ -5268,6 +5110,7 @@ static void formulate2x2MixingMatrix_fx(
 }
 
 
+
 static void getDirectPartGains_fx(
     const Word16 bin,
     Word16 aziDeg,
-- 
GitLab


From 08be2a947c0eceb5ded414968f759d18b7424261 Mon Sep 17 00:00:00 2001
From: mave2802 <59919483+mave2802@users.noreply.github.com>
Date: Thu, 6 Nov 2025 19:34:55 +0100
Subject: [PATCH 4/8] clang format

---
 .../ivas_dirac_dec_binaural_functions_fx.c    | 241 +++++++++---------
 1 file changed, 122 insertions(+), 119 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index eea415804..528df258c 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -4151,126 +4151,130 @@ static void matrixTransp2Mul_fx(
 
 #ifdef NONBE_2169_BINAURAL_MIXING_MATRIX_OPT
 static void chol2x2_fx(
-  const  Word32 E1,                                   /*q_E*/
-  const  Word32 E2,                                   /*q_E*/
-  Word16 q_E,
-  const Word32 Cre,                                    /*q_C*/
-  const Word32 Cim,                                    /*q_C*/
-  Word16 q_C,
-  Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS],  /*q_out*/
-  Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS],   /*q_out */
-  Word16 *q_out)
+    const Word32 E1, /*q_E*/
+    const Word32 E2, /*q_E*/
+    Word16 q_E,
+    const Word32 Cre, /*q_C*/
+    const Word32 Cim, /*q_C*/
+    Word16 q_C,
+    Word32 outRe[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/
+    Word32 outIm[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out */
+    Word16 *q_out )
 
 {
-  Word16 chA, chB;
-  Word32 sqrtVal;
-  Word16 q_sqrtVal, q_min;
-  Word16 exp = 0;
-  Word16 q_out_re_0_0, q_out_re_0_1, q_out_re_1_0, q_out_re_1_1;
-  Word16 q_out_im_0_1, q_out_im_1_0;
-
-  FOR(chA = 0; chA < BINAURAL_CHANNELS; chA++)
-  {
-    FOR(chB = 0; chB < BINAURAL_CHANNELS; chB++)
-    {
-      outRe[chA][chB] = 0;
-      move32();
-      outIm[chA][chB] = 0;
-      move32();
-    }
-  }
-
-  IF(GT_32(E1, E2)) /* Perform Cholesky decomposition according to louder channel first */
-  {
-    exp = sub(31, q_E);
-    Word32 tmp = ISqrt32(E1, &exp);
-    Word16 q_tmp = sub(31, exp);
-
-    outRe[0][0] = Mpy_32_32(E1, tmp);
-    q_out_re_0_0 = sub(q_E, sub(Q31, q_tmp));
-
-    q_out_re_1_0 = Q31;
-    IF(Cre != 0) {
-      outRe[1][0] = Mpy_32_32(Cre, tmp);
-      q_out_re_1_0 = sub(q_C, sub(Q31, q_tmp));
-    }
-    q_out_im_1_0 = Q31;
-    if (Cim != 0) {
-      outIm[1][0] = Mpy_32_32(Cim, tmp);
-      q_out_im_1_0 = sub(q_C, sub(Q31, q_tmp));
-    }
-
-    sqrtVal = L_add(L_shr(Mpy_32_32(Cre, Cre), 1), L_shr(Mpy_32_32(Cim, Cim), 1));
-    sqrtVal = BASOP_Util_Divide3232_Scale_newton(sqrtVal, E1, &exp); // q_sqrtVal = 2 * q_C - q_E - exp - 1;
-    q_sqrtVal = sub(add(q_C, q_C), add(add(q_E, exp), 1));
-
-    // normalize to min
-    q_min = s_min(q_E, q_sqrtVal);
-    sqrtVal = L_sub(L_shl(E2, q_min - q_E), L_shl(sqrtVal, q_min - q_sqrtVal));
-    q_sqrtVal = q_min;
-
-    exp = sub(31, q_sqrtVal);
-    outRe[1][1] = Sqrt32(L_max(sqrtVal, 0), &exp);
-    q_out_re_1_1 = sub(31, exp);
-
-    *q_out = s_min(s_min(q_out_re_0_0, q_out_re_1_0), s_min(q_out_im_1_0, q_out_re_1_1));
-    move16();
-    outRe[1][1] = L_shr(outRe[1][1], sub(q_out_re_1_1, *q_out));
-    move32();
-    outRe[1][0] = L_shr(outRe[1][0], sub(q_out_re_1_0, *q_out));
-    move32();
-    outIm[1][0] = L_shr(outIm[1][0], sub(q_out_im_1_0, *q_out));
-    move32();
-    outRe[0][0] = L_shr(outRe[0][0], sub(q_out_re_0_0, *q_out));
-    move32();
-  }
-  ELSE
-  {
-      exp = sub(31, q_E);
-      Word32 tmp = ISqrt32(E2, &exp);
-      Word16 q_tmp = sub(31, exp);
-
-      outRe[1][1] = Mpy_32_32(E2, tmp);
-      q_out_re_1_1 = sub(q_E, sub(Q31, q_tmp));
-
-      q_out_re_0_1 = Q31;
-      IF(Cre != 0) {
-        outRe[0][1] = Mpy_32_32(Cre, tmp);
-        q_out_re_0_1 = sub(q_C, sub(Q31, q_tmp));
-      }
-
-      q_out_im_0_1 = Q31;
-      IF(Cim != 0) {
-        outIm[0][1] = L_negate(Mpy_32_32(Cim, tmp));
-        q_out_im_0_1 = sub(q_C, sub(Q31, q_tmp));
-      }
-
-      sqrtVal = L_add(L_shr(Mpy_32_32(Cre, Cre),1), L_shr(Mpy_32_32(Cim, Cim),1));
-      sqrtVal = BASOP_Util_Divide3232_Scale_newton(sqrtVal, E2, &exp);
-
-      q_sqrtVal = sub(add(q_C, q_C), add(add(q_E, exp), 1));  // q_sqrtVal = 2 * q_C - q_E - exp - 1;
-
-      // normalize to min
-      q_min = s_min(q_E, q_sqrtVal);
-      sqrtVal = L_sub(L_shl(E1, q_min - q_E), L_shl(sqrtVal, q_min - q_sqrtVal));
-      q_sqrtVal = q_min;
-
-      exp = sub(31, q_sqrtVal);
-      outRe[0][0] = Sqrt32(L_max(sqrtVal, 0), &exp);
-      q_out_re_0_0 = sub(31, exp);
-
-      *q_out = s_min(s_min(q_out_re_0_0, q_out_re_0_1), s_min(q_out_im_0_1, q_out_re_1_1));
-      move16();
-      outRe[1][1] = L_shr(outRe[1][1], sub(q_out_re_1_1, *q_out));
-      move32();
-      outRe[0][1] = L_shr(outRe[0][1], sub(q_out_re_0_1, *q_out));
-      move32();
-      outIm[0][1] = L_shr(outIm[0][1], sub(q_out_im_0_1, *q_out));
-      move32();
-      outRe[0][0] = L_shr(outRe[0][0], sub(q_out_re_0_0, *q_out));
-      move32();
-  }
-  return;
+    Word16 chA, chB;
+    Word32 sqrtVal;
+    Word16 q_sqrtVal, q_min;
+    Word16 exp = 0;
+    Word16 q_out_re_0_0, q_out_re_0_1, q_out_re_1_0, q_out_re_1_1;
+    Word16 q_out_im_0_1, q_out_im_1_0;
+
+    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
+    {
+        FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
+        {
+            outRe[chA][chB] = 0;
+            move32();
+            outIm[chA][chB] = 0;
+            move32();
+        }
+    }
+
+    IF( GT_32( E1, E2 ) ) /* Perform Cholesky decomposition according to louder channel first */
+    {
+        exp = sub( 31, q_E );
+        Word32 tmp = ISqrt32( E1, &exp );
+        Word16 q_tmp = sub( 31, exp );
+
+        outRe[0][0] = Mpy_32_32( E1, tmp );
+        q_out_re_0_0 = sub( q_E, sub( Q31, q_tmp ) );
+
+        q_out_re_1_0 = Q31;
+        IF( Cre != 0 )
+        {
+            outRe[1][0] = Mpy_32_32( Cre, tmp );
+            q_out_re_1_0 = sub( q_C, sub( Q31, q_tmp ) );
+        }
+        q_out_im_1_0 = Q31;
+        if ( Cim != 0 )
+        {
+            outIm[1][0] = Mpy_32_32( Cim, tmp );
+            q_out_im_1_0 = sub( q_C, sub( Q31, q_tmp ) );
+        }
+
+        sqrtVal = L_add( L_shr( Mpy_32_32( Cre, Cre ), 1 ), L_shr( Mpy_32_32( Cim, Cim ), 1 ) );
+        sqrtVal = BASOP_Util_Divide3232_Scale_newton( sqrtVal, E1, &exp ); // q_sqrtVal = 2 * q_C - q_E - exp - 1;
+        q_sqrtVal = sub( add( q_C, q_C ), add( add( q_E, exp ), 1 ) );
+
+        // normalize to min
+        q_min = s_min( q_E, q_sqrtVal );
+        sqrtVal = L_sub( L_shl( E2, q_min - q_E ), L_shl( sqrtVal, q_min - q_sqrtVal ) );
+        q_sqrtVal = q_min;
+
+        exp = sub( 31, q_sqrtVal );
+        outRe[1][1] = Sqrt32( L_max( sqrtVal, 0 ), &exp );
+        q_out_re_1_1 = sub( 31, exp );
+
+        *q_out = s_min( s_min( q_out_re_0_0, q_out_re_1_0 ), s_min( q_out_im_1_0, q_out_re_1_1 ) );
+        move16();
+        outRe[1][1] = L_shr( outRe[1][1], sub( q_out_re_1_1, *q_out ) );
+        move32();
+        outRe[1][0] = L_shr( outRe[1][0], sub( q_out_re_1_0, *q_out ) );
+        move32();
+        outIm[1][0] = L_shr( outIm[1][0], sub( q_out_im_1_0, *q_out ) );
+        move32();
+        outRe[0][0] = L_shr( outRe[0][0], sub( q_out_re_0_0, *q_out ) );
+        move32();
+    }
+    ELSE
+    {
+        exp = sub( 31, q_E );
+        Word32 tmp = ISqrt32( E2, &exp );
+        Word16 q_tmp = sub( 31, exp );
+
+        outRe[1][1] = Mpy_32_32( E2, tmp );
+        q_out_re_1_1 = sub( q_E, sub( Q31, q_tmp ) );
+
+        q_out_re_0_1 = Q31;
+        IF( Cre != 0 )
+        {
+            outRe[0][1] = Mpy_32_32( Cre, tmp );
+            q_out_re_0_1 = sub( q_C, sub( Q31, q_tmp ) );
+        }
+
+        q_out_im_0_1 = Q31;
+        IF( Cim != 0 )
+        {
+            outIm[0][1] = L_negate( Mpy_32_32( Cim, tmp ) );
+            q_out_im_0_1 = sub( q_C, sub( Q31, q_tmp ) );
+        }
+
+        sqrtVal = L_add( L_shr( Mpy_32_32( Cre, Cre ), 1 ), L_shr( Mpy_32_32( Cim, Cim ), 1 ) );
+        sqrtVal = BASOP_Util_Divide3232_Scale_newton( sqrtVal, E2, &exp );
+
+        q_sqrtVal = sub( add( q_C, q_C ), add( add( q_E, exp ), 1 ) ); // q_sqrtVal = 2 * q_C - q_E - exp - 1;
+
+        // normalize to min
+        q_min = s_min( q_E, q_sqrtVal );
+        sqrtVal = L_sub( L_shl( E1, q_min - q_E ), L_shl( sqrtVal, q_min - q_sqrtVal ) );
+        q_sqrtVal = q_min;
+
+        exp = sub( 31, q_sqrtVal );
+        outRe[0][0] = Sqrt32( L_max( sqrtVal, 0 ), &exp );
+        q_out_re_0_0 = sub( 31, exp );
+
+        *q_out = s_min( s_min( q_out_re_0_0, q_out_re_0_1 ), s_min( q_out_im_0_1, q_out_re_1_1 ) );
+        move16();
+        outRe[1][1] = L_shr( outRe[1][1], sub( q_out_re_1_1, *q_out ) );
+        move32();
+        outRe[0][1] = L_shr( outRe[0][1], sub( q_out_re_0_1, *q_out ) );
+        move32();
+        outIm[0][1] = L_shr( outIm[0][1], sub( q_out_im_0_1, *q_out ) );
+        move32();
+        outRe[0][0] = L_shr( outRe[0][0], sub( q_out_re_0_0, *q_out ) );
+        move32();
+    }
+    return;
 }
 
 #else
@@ -5110,7 +5114,6 @@ static void formulate2x2MixingMatrix_fx(
 }
 
 
-
 static void getDirectPartGains_fx(
     const Word16 bin,
     Word16 aziDeg,
-- 
GitLab


From ca9a39329eac9b1f65983b6f7f3439e8a4e81042 Mon Sep 17 00:00:00 2001
From: mave2802 <59919483+mave2802@users.noreply.github.com>
Date: Thu, 6 Nov 2025 19:43:31 +0100
Subject: [PATCH 5/8] fixed comments

---
 lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 528df258c..653c92e67 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -4203,10 +4203,10 @@ static void chol2x2_fx(
         }
 
         sqrtVal = L_add( L_shr( Mpy_32_32( Cre, Cre ), 1 ), L_shr( Mpy_32_32( Cim, Cim ), 1 ) );
-        sqrtVal = BASOP_Util_Divide3232_Scale_newton( sqrtVal, E1, &exp ); // q_sqrtVal = 2 * q_C - q_E - exp - 1;
-        q_sqrtVal = sub( add( q_C, q_C ), add( add( q_E, exp ), 1 ) );
+        sqrtVal = BASOP_Util_Divide3232_Scale_newton( sqrtVal, E1, &exp );
+        q_sqrtVal = sub( add( q_C, q_C ), add( add( q_E, exp ), 1 ) ); /*  q_sqrtVal = 2 * q_C - q_E - exp - 1 */
 
-        // normalize to min
+        /* normalize to min q */
         q_min = s_min( q_E, q_sqrtVal );
         sqrtVal = L_sub( L_shl( E2, q_min - q_E ), L_shl( sqrtVal, q_min - q_sqrtVal ) );
         q_sqrtVal = q_min;
@@ -4252,9 +4252,9 @@ static void chol2x2_fx(
         sqrtVal = L_add( L_shr( Mpy_32_32( Cre, Cre ), 1 ), L_shr( Mpy_32_32( Cim, Cim ), 1 ) );
         sqrtVal = BASOP_Util_Divide3232_Scale_newton( sqrtVal, E2, &exp );
 
-        q_sqrtVal = sub( add( q_C, q_C ), add( add( q_E, exp ), 1 ) ); // q_sqrtVal = 2 * q_C - q_E - exp - 1;
+        q_sqrtVal = sub( add( q_C, q_C ), add( add( q_E, exp ), 1 ) ); /* q_sqrtVal = 2 * q_C - q_E - exp - 1 */
 
-        // normalize to min
+        /* normalize to min */
         q_min = s_min( q_E, q_sqrtVal );
         sqrtVal = L_sub( L_shl( E1, q_min - q_E ), L_shl( sqrtVal, q_min - q_sqrtVal ) );
         q_sqrtVal = q_min;
-- 
GitLab


From 3998c9ce31b50251e0af2a319fe24e909c982c26 Mon Sep 17 00:00:00 2001
From: mave2802 <59919483+mave2802@users.noreply.github.com>
Date: Thu, 6 Nov 2025 19:47:19 +0100
Subject: [PATCH 6/8] fixed if vs. IF

---
 lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 653c92e67..cee3e1342 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -4196,7 +4196,7 @@ static void chol2x2_fx(
             q_out_re_1_0 = sub( q_C, sub( Q31, q_tmp ) );
         }
         q_out_im_1_0 = Q31;
-        if ( Cim != 0 )
+        IF ( Cim != 0 )
         {
             outIm[1][0] = Mpy_32_32( Cim, tmp );
             q_out_im_1_0 = sub( q_C, sub( Q31, q_tmp ) );
@@ -4602,8 +4602,7 @@ static void formulate2x2MixingMatrix_fx(
     maxEneOut_fx = L_max( Eout1_fx, Eout2_fx );
     move16();
 
-    test();
-    if ( maxEneIn_fx == 0 || maxEneOut_fx == 0 )
+    IF ( maxEneIn_fx == 0 || maxEneOut_fx == 0 )
     {
         Mre_fx[0][0] = Mre_fx[0][1] = Mre_fx[1][0] = Mre_fx[1][1] = 0;
         move32();
-- 
GitLab


From d3ab85bedcfecd9d4d3a36cfc67ecdcaeee6241c Mon Sep 17 00:00:00 2001
From: mave2802 <59919483+mave2802@users.noreply.github.com>
Date: Thu, 6 Nov 2025 21:50:48 +0100
Subject: [PATCH 7/8] added missing move ops

---
 lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index cee3e1342..57bda2ac3 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -4190,12 +4190,14 @@ static void chol2x2_fx(
         q_out_re_0_0 = sub( q_E, sub( Q31, q_tmp ) );
 
         q_out_re_1_0 = Q31;
+        move16();
         IF( Cre != 0 )
         {
             outRe[1][0] = Mpy_32_32( Cre, tmp );
             q_out_re_1_0 = sub( q_C, sub( Q31, q_tmp ) );
         }
         q_out_im_1_0 = Q31;
+        move16();
         IF ( Cim != 0 )
         {
             outIm[1][0] = Mpy_32_32( Cim, tmp );
@@ -4209,9 +4211,8 @@ static void chol2x2_fx(
         /* normalize to min q */
         q_min = s_min( q_E, q_sqrtVal );
         sqrtVal = L_sub( L_shl( E2, q_min - q_E ), L_shl( sqrtVal, q_min - q_sqrtVal ) );
-        q_sqrtVal = q_min;
 
-        exp = sub( 31, q_sqrtVal );
+        exp = sub( 31, q_min);
         outRe[1][1] = Sqrt32( L_max( sqrtVal, 0 ), &exp );
         q_out_re_1_1 = sub( 31, exp );
 
@@ -4236,6 +4237,7 @@ static void chol2x2_fx(
         q_out_re_1_1 = sub( q_E, sub( Q31, q_tmp ) );
 
         q_out_re_0_1 = Q31;
+        move16();
         IF( Cre != 0 )
         {
             outRe[0][1] = Mpy_32_32( Cre, tmp );
@@ -4243,6 +4245,7 @@ static void chol2x2_fx(
         }
 
         q_out_im_0_1 = Q31;
+        move16();
         IF( Cim != 0 )
         {
             outIm[0][1] = L_negate( Mpy_32_32( Cim, tmp ) );
@@ -4257,9 +4260,8 @@ static void chol2x2_fx(
         /* normalize to min */
         q_min = s_min( q_E, q_sqrtVal );
         sqrtVal = L_sub( L_shl( E1, q_min - q_E ), L_shl( sqrtVal, q_min - q_sqrtVal ) );
-        q_sqrtVal = q_min;
 
-        exp = sub( 31, q_sqrtVal );
+        exp = sub( 31, q_min);
         outRe[0][0] = Sqrt32( L_max( sqrtVal, 0 ), &exp );
         q_out_re_0_0 = sub( 31, exp );
 
-- 
GitLab


From 07aff640efd01e10fe5192fbb9c2f6e69a711bc4 Mon Sep 17 00:00:00 2001
From: mave2802 <59919483+mave2802@users.noreply.github.com>
Date: Thu, 6 Nov 2025 21:53:31 +0100
Subject: [PATCH 8/8] clang format

---
 lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 57bda2ac3..53fed39c2 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -4198,7 +4198,7 @@ static void chol2x2_fx(
         }
         q_out_im_1_0 = Q31;
         move16();
-        IF ( Cim != 0 )
+        IF( Cim != 0 )
         {
             outIm[1][0] = Mpy_32_32( Cim, tmp );
             q_out_im_1_0 = sub( q_C, sub( Q31, q_tmp ) );
@@ -4212,7 +4212,7 @@ static void chol2x2_fx(
         q_min = s_min( q_E, q_sqrtVal );
         sqrtVal = L_sub( L_shl( E2, q_min - q_E ), L_shl( sqrtVal, q_min - q_sqrtVal ) );
 
-        exp = sub( 31, q_min);
+        exp = sub( 31, q_min );
         outRe[1][1] = Sqrt32( L_max( sqrtVal, 0 ), &exp );
         q_out_re_1_1 = sub( 31, exp );
 
@@ -4261,7 +4261,7 @@ static void chol2x2_fx(
         q_min = s_min( q_E, q_sqrtVal );
         sqrtVal = L_sub( L_shl( E1, q_min - q_E ), L_shl( sqrtVal, q_min - q_sqrtVal ) );
 
-        exp = sub( 31, q_min);
+        exp = sub( 31, q_min );
         outRe[0][0] = Sqrt32( L_max( sqrtVal, 0 ), &exp );
         q_out_re_0_0 = sub( 31, exp );
 
@@ -4604,7 +4604,7 @@ static void formulate2x2MixingMatrix_fx(
     maxEneOut_fx = L_max( Eout1_fx, Eout2_fx );
     move16();
 
-    IF ( maxEneIn_fx == 0 || maxEneOut_fx == 0 )
+    IF( maxEneIn_fx == 0 || maxEneOut_fx == 0 )
     {
         Mre_fx[0][0] = Mre_fx[0][1] = Mre_fx[1][0] = Mre_fx[1][1] = 0;
         move32();
-- 
GitLab