From 469ab4619d538f0df6037890fd6abfa8e8cadb48 Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Thu, 27 Feb 2025 10:39:39 +0100
Subject: [PATCH 01/41] - added some wmops push/pop, - added
 FIX_xxxx_SPEEDUP_00: not implemented, no bitstream - added
 FIX_xxxx_SPEEDUP_01: not implemented yet

---
 lib_com/basop_util.c                          |  6 +-
 lib_dec/ivas_jbm_dec_fx.c                     |  2 +-
 .../ivas_dirac_dec_binaural_functions_fx.c    | 72 +++++++++++++++++--
 3 files changed, 74 insertions(+), 6 deletions(-)

diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c
index b7ee35ab3..fa8d097df 100644
--- a/lib_com/basop_util.c
+++ b/lib_com/basop_util.c
@@ -1010,6 +1010,7 @@ Word32 div_w( Word32 L_num, Word32 L_den )
     }
 }
 
+
 Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
 {
     Word32 z;
@@ -1017,6 +1018,8 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
     Word16 sy;
     Word32 sign;
 
+    //push_wmops( "BASOP_Util_Divide3232_Scale_cadence" );
+
     /* assert (x >= (Word32)0); */
     assert( y != (Word32) 0 );
 
@@ -1038,6 +1041,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
     IF( x == (Word32) 0 )
     {
         *s = 0;
+        //pop_wmops();
         return ( (Word32) 0 );
     }
 
@@ -1058,7 +1062,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
     {
         z = L_negate( z );
     }
-
+    //pop_wmops();
     return z;
 }
 
diff --git a/lib_dec/ivas_jbm_dec_fx.c b/lib_dec/ivas_jbm_dec_fx.c
index 1918ce3f3..ce60c0d65 100644
--- a/lib_dec/ivas_jbm_dec_fx.c
+++ b/lib_dec/ivas_jbm_dec_fx.c
@@ -1875,7 +1875,7 @@ ivas_error ivas_jbm_dec_render_fx(
     move16();
     SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom;
 
-    push_wmops( "ivas_dec_render" );
+    push_wmops( "ivas_dec_render (IDR)" );
     /*----------------------------------------------------------------*
      * Initialization of local vars after struct has been set
      *----------------------------------------------------------------*/
diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index b5dd1f8b9..8153775b7 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -45,6 +45,9 @@
 
 #include "wmc_auto.h"
 
+//#define FIX_xxxx_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams //no occurence in current bitstream
+//#define FIX_xxxx_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx : rollout loop in mul, only 3 out of 4 results are needed - maybe a=b can also benefitcui
+
 Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
 
 /*-------------------------------------------------------------------------
@@ -504,8 +507,9 @@ void ivas_dirac_dec_binaural_render_fx(
     FOR( subframe_idx = first_sf; subframe_idx < last_sf; subframe_idx++ )
     {
         Word16 n_samples_sf = imult1616( slot_size, hSpatParamRendCom->subframe_nbslots[subframe_idx] );
+        push_wmops( "IDR binaural internal (IDRBI)" );
         ivas_dirac_dec_binaural_internal_fx( st_ivas, st_ivas->hCombinedOrientationData, output_fx_local, nchan_transport, subframe_idx );
-
+        pop_wmops();/*push_wmops( "IDR binaural internal (IDRBI)" );*/
         FOR( ch = 0; ch < nchan_out; ch++ )
         {
             output_fx_local[ch] += n_samples_sf;
@@ -708,6 +712,7 @@ static void ivas_dirac_dec_binaural_internal_fx(
         }
     }
     /* CLDFB Analysis of input */
+    push_wmops( "IDRBI CLDFB ANALYSYS" );
     FOR( slot = 0; slot < hSpatParamRendCom->subframe_nbslots[subframe]; slot++ )
     {
         FOR( ch = 0; ch < numInChannels; ch++ )
@@ -857,6 +862,7 @@ static void ivas_dirac_dec_binaural_internal_fx(
             }
         }
     }
+    pop_wmops(); /*push_wmops( "IDRBI CLDFB ANALYSYS" );*/
 
     test();
     IF( EQ_32( config_data.ivas_format, SBA_FORMAT ) || EQ_32( config_data.ivas_format, SBA_ISM_FORMAT ) )
@@ -921,7 +927,9 @@ static void ivas_dirac_dec_binaural_internal_fx(
     }
 
     test();
+    push_wmops( "IDRBI cov matrices" );
     ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, Rmat_fx, subframe, hCombinedOrientationData && hCombinedOrientationData->enableCombinedOrientation[hCombinedOrientationData->subframe_idx] > 0, st_ivas->hMasaIsmData, q_inp );
+    pop_wmops();/*push_wmops( "IDRBI cov matrices" );*/
 
     IF( EQ_32( config_data.ivas_format, ISM_FORMAT ) )
     {
@@ -959,7 +967,9 @@ static void ivas_dirac_dec_binaural_internal_fx(
         move16();
     }
 
+    push_wmops( "IDRBI proc matrices (IRDBI pm)" );
     ivas_dirac_dec_binaural_determine_processing_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, max_band_decorr, Rmat_fx, subframe, hCombinedOrientationData && hCombinedOrientationData->enableCombinedOrientation[hCombinedOrientationData->subframe_idx] > 0, nchanSeparateChannels, st_ivas->hMasaIsmData );
+    pop_wmops(); /*push_wmops( "IDRBI proc matrices (IRDBI pm)" );*/
 
     q_inp = Q6;
     move16();
@@ -1005,8 +1015,10 @@ static void ivas_dirac_dec_binaural_internal_fx(
     hDiracDecBin->q_processMtxDecPrev = q_mat;
     move16();
 
+    push_wmops( "IDRBI processOutput" );
     ivas_dirac_dec_binaural_process_output_fx( hDiracDecBin, hSpatParamRendCom, st_ivas->cldfbSynDec, output_fx, &q_out, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, q_inp, max_band_decorr, numInChannels, config_data.processReverb, subframe, q_mat );
-
+    pop_wmops(); /*push_wmops( "IDRBI processOutput" );
+    */
     hDiracDecBin->hDiffuseDist = NULL;
 
     hSpatParamRendCom->slots_rendered = add( hSpatParamRendCom->slots_rendered, hSpatParamRendCom->subframe_nbslots[subframe] );
@@ -1843,6 +1855,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
         move16();
     }
 
+    push_wmops( "IRDBI pm LOOP1 (IDRBI pm LOOP1)" );
     FOR( bin = 0; bin < nBins; bin++ )
     {
         Word32 tmpMtxRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], tmpMtxIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], resultMtxRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], resultMtxIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], gain_fx;
@@ -1866,6 +1879,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
         q_CrEne = Q31;
         move16();
 
+        push_wmops( "IDRBI pm LOOP1 sec A (formulate2x2MixingMatrix)" );
         IF( GT_16( hDiracDecBin->ChEne_e[0][bin], hDiracDecBin->ChEne_e[1][bin] ) )
         {
             hDiracDecBin->ChEne_fx[1][bin] = L_shr( hDiracDecBin->ChEne_fx[1][bin], sub( hDiracDecBin->ChEne_e[0][bin], hDiracDecBin->ChEne_e[1][bin] ) );
@@ -1935,7 +1949,9 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
                                      hDiracDecBin->ChCrossReOut_fx[bin], hDiracDecBin->ChCrossImOut_fx[bin],
                                      hDiracDecBin->q_ChCrossOut,
                                      prototypeMtx_fx, Mre_fx, Mim_fx, &q_M, hDiracDecBin->reqularizationFactor_fx );
+        pop_wmops(); /*push_wmops( "IDRBI pm LOOP1 sec A (formulate2x2MixingMatrix)" );*/
 
+        push_wmops( "IDRBI pm LOOP1 sec B" );
         IF( LT_16( hDiracDecBin->q_ChEne, hDiracDecBin->q_ChCross ) )
         {
             CxRe_fx[0][0] = hDiracDecBin->ChEne_fx[0][bin];
@@ -1989,9 +2005,13 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
 #endif
             resultMtxRe_fx, resultMtxIm_fx, &q_res );
 
+        pop_wmops(); /*push_wmops( "IDRBI pm LOOP1 sec B" );*/
+
         /* When below the frequency limit where decorrelation is applied, we inject the decorrelated
          * residual (or missing) signal component. The procedure is active when there are not enough independent
          * signal energy to synthesize a signal with the target covariance matrix from the non-decorrelated signals */
+
+        push_wmops( "IDRBI pm LOOP1 sec C" );
         IF( LT_16( bin, max_band_decorr ) )
         {
             Word32 decorrelationReductionFactor_fx;
@@ -2107,7 +2127,9 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
             q_Mdec = Q31;
             move16();
         }
+        pop_wmops(); /*push_wmops( "IDRBI pm LOOP1 sec C" );*/
 
+        push_wmops( "IDRBI pm LOOP1 sec D" );
         /* The regularizations at determining mixing matrices cause signal energy to be lost to some degree, which is compensated for here */
         tmp1 = L_add( CrEneL_fx, CrEneR_fx );
         exp = sub( get_min_scalefactor( resultMtxRe_fx[0][0], resultMtxRe_fx[1][1] ), 2 );
@@ -2198,6 +2220,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
         q_processMtxDec_bin = q_processMtxDec[bin];
         move16();
         move16();
+
         /* Store processing matrices */
         FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
         {
@@ -2232,7 +2255,10 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
         move16();
         q_processMtxDec[bin] = sub( q_Mdec, 16 );
         move16();
+        pop_wmops(); /*push_wmops( "IDRBI pm LOOP1 sec D" );*/
 
+
+        push_wmops( "IDRBI pm LOOP1 sec E" );
         IF( separateCenterChannelRendering )
         {
             /* The rendering of the separate center channel in masa + mono mode.
@@ -2322,7 +2348,10 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
                 }
             }
         }
+        pop_wmops(); /*push_wmops( "IDRBI pm LOOP1 sec E" );*/
     }
+    pop_wmops(); /*push_wmops( "IRDBI pm LOOP1 (IDRBI pm LOOP1)" );*/
+
     /* Aligning Q-factors of all bins in the processing matrices to a common Q-factor */
     minimum_s( q_processMtx, nBins, &hDiracDecBin->q_processMtx );
     minimum_s( q_processMtxPrev, nBins, &hDiracDecBin->q_processMtxPrev );
@@ -2342,6 +2371,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
     minimum_s( q_processMtxDec, nBins, &hDiracDecBin->q_processMtxDec );
     minimum_s( q_processMtxDecPrev, nBins, &hDiracDecBin->q_processMtxDecPrev );
 
+    push_wmops( "IRDBI pm LOOP2" );
     FOR( bin = 0; bin < nBins; bin++ )
     {
         FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
@@ -2381,6 +2411,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
             }
         }
     }
+    pop_wmops(); /*push_wmops( "IRDBI pm LOOP2" );*/
 
     return;
 }
@@ -4354,7 +4385,9 @@ static void formulate2x2MixingMatrix_fx(
     }
     ELSE
     {
+        push_wmops( "formulate2x2MixingMatrix Division" );
         maxEneDiv_fx = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxEne_fx, &exp );
+        pop_wmops();/*push_wmops( "formulate2x2MixingMatrix Division" )*/
         q_maxEneDiv = add( sub( 31, exp ), sub( Q30, q_maxEne ) );
     }
     exp = norm_l( maxEneDiv_fx );
@@ -4377,9 +4410,12 @@ static void formulate2x2MixingMatrix_fx(
     Cout_im = Mpy_32_32( Cout_im, maxEneDiv_fx );
     q_cout = sub( add( q_cout, q_maxEneDiv ), 31 );
 
+    push_wmops( "formulate2x2MixingMatrix cholesky" );
     /* Cholesky decomposition of target / output covariance matrix */
     chol2x2_fx( E_out1, E_out2, q_eout, Cout_re, Cout_im, q_cout, KyRe_fx, KyIm_fx, &q_ky );
+    pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix cholesky" );*/
 
+    push_wmops( "formulate2x2MixingMatrix Eigendecomp" );
     /* Eigendecomposition of input covariance matrix */
     eig2x2_fx( E_in1, E_in2, q_ein, Cin_re, Cin_im, q_cin, Uxre_fx, Uxim_fx, &q_Ux, Sx_fx, &q_Sx );
 
@@ -4397,7 +4433,9 @@ static void formulate2x2MixingMatrix_fx(
     move32();
 
     matrixDiagMul_fx( Uxre_fx, Uxim_fx, q_Ux, Sx_fx, q_Sx, Kxre_fx, Kxim_fx, &q_Kx );
+    pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Eigendecomp" );*/
 
+    push_wmops( "formulate2x2MixingMatrix RegSMInv" );
     /* Regularize the diagonal Sx for matrix inversion */
     Sx_fx[0] = L_max( L_shr( Sx_fx[0], 1 ), Mpy_32_16_1( Sx_fx[1], regularizationFactor_fx ) );
     Sx_fx[1] = L_max( L_shr( Sx_fx[1], 1 ), L_shl( Mpy_32_16_1( Sx_fx[0], regularizationFactor_fx ), 1 ) );
@@ -4432,8 +4470,9 @@ static void formulate2x2MixingMatrix_fx(
     ELSE
     {
         temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
-
+        push_wmops( "formulate2x2MixingMatrix Division" );
         temp = BASOP_Util_Divide3232_Scale_cadence( E_out1, temp, &exp );
+        pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
         exp = sub( exp, sub( q_eout, sub( 31, exp_temp ) ) );
 #ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
         Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
@@ -4469,8 +4508,9 @@ static void formulate2x2MixingMatrix_fx(
     ELSE
     {
         temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
-
+        push_wmops( "formulate2x2MixingMatrix Division" );
         temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 );
+        pop_wmops();/*push_wmops( "formulate2x2MixingMatrix Division" )*/
         exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) );
 #ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
         Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
@@ -4487,7 +4527,9 @@ static void formulate2x2MixingMatrix_fx(
     move32();
     Ghat_fx[1] = L_shr( Ghat_fx[1], sub( sub( 31, exp1 ), q_Ghat ) ); // q_Ghat
     move32();
+    pop_wmops();
 
+    push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q" );
     /* Matrix multiplication, tmp = Ky' * G_hat * Q */
     FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
     {
@@ -4513,17 +4555,29 @@ static void formulate2x2MixingMatrix_fx(
             move32();
         }
     }
+    pop_wmops();/*push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q" );*/
 
     q_temp = sub( add( q_ky, q_GhatQ ), 31 );
 
+    push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q*Kx" );
     /* A = Ky' * G_hat * Q * Kx (see publication) */
     matrixMul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Kxre_fx, Kxim_fx, &q_Kx, Are_fx, Aim_fx, &q_A );
+    pop_wmops();/*push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q*Kx" );*/
 
+    push_wmops( "formulate2x2MixingMatrix nrst orthonrm PtoA (oPtoA)" );
     /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx
        For matrix A that is P = A(A'A)^0.5 */
+    push_wmops( "oPtoA MT1M" );
+#ifdef FIX_xxxx_SPEEDUP_01
+    matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );
+
+    eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );
+#else
     matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );
 
     eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );
+#endif
+    pop_wmops();/*push_wmops( "oPtoA MT1M" );*/
 
     IF( D_fx[0] == 0 )
     {
@@ -4537,8 +4591,10 @@ static void formulate2x2MixingMatrix_fx(
     }
     ELSE
     {
+        push_wmops( "formulate2x2MixingMatrix Division" );
         temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp );
         exp = sub( exp, sub( Q30, q_D ) );
+        pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
     }
     div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
     move32();
@@ -4555,7 +4611,9 @@ static void formulate2x2MixingMatrix_fx(
     }
     ELSE
     {
+        push_wmops( "formulate2x2MixingMatrix Division" );
         temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[1], &exp1 );
+        pop_wmops();/*push_wmops( "formulate2x2MixingMatrix Division" )*/
         exp1 = sub( exp1, sub( Q30, q_D ) );
     }
     div_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
@@ -4657,7 +4715,9 @@ static void formulate2x2MixingMatrix_fx(
                          0 /*int Bscale*/,
 #endif
                          Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */
+    pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix nrst orthonrm PtoA" );*/
 
+    push_wmops( "formulate2x2MixingMatrix Ky P Kx^-1" );
     /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */
 #if ( BINAURAL_CHANNELS != 2 )
     FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
@@ -4740,7 +4800,9 @@ static void formulate2x2MixingMatrix_fx(
         {
             Word16 Pre_shift, Pim_shift;
             temp = BASOP_Util_Add_Mant32Exp( Sx_fx[chB], sub( 31, q_Sx ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
+            push_wmops( "formulate2x2MixingMatrix Division" );
             temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, temp, &exp );
+            pop_wmops();/*push_wmops( "formulate2x2MixingMatrix Division" )*/
             q_temp = add( sub( sub( q_P, exp ), sub( 31, Q30 ) ), exp_temp );
 
             Pre_shift = norm_l( Pre_fx[0][chB] );
@@ -4811,6 +4873,8 @@ static void formulate2x2MixingMatrix_fx(
                          0 /*int Bscale*/,
 #endif
                          Mre_fx, Mim_fx, q_M );
+    pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Ky P Kx^-1" );*/
+
     return;
 }
 
-- 
GitLab


From 679ca05f9e94d39c8df4ff0a4b107b9fd89782bc Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Thu, 27 Feb 2025 10:41:26 +0100
Subject: [PATCH 02/41] change names of macros from xxxx to 1326

---
 lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 8153775b7..716b16262 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -45,8 +45,8 @@
 
 #include "wmc_auto.h"
 
-//#define FIX_xxxx_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams //no occurence in current bitstream
-//#define FIX_xxxx_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx : rollout loop in mul, only 3 out of 4 results are needed - maybe a=b can also benefitcui
+//#define FIX_1326_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams //no occurence in current bitstream
+//#define FIX_1326_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx : rollout loop in mul, only 3 out of 4 results are needed - maybe a=b can also benefitcui
 
 Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
 
@@ -4568,7 +4568,7 @@ static void formulate2x2MixingMatrix_fx(
     /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx
        For matrix A that is P = A(A'A)^0.5 */
     push_wmops( "oPtoA MT1M" );
-#ifdef FIX_xxxx_SPEEDUP_01
+#ifdef FIX_1326_SPEEDUP_01
     matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );
 
     eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );
-- 
GitLab


From 50fbdde0ea69c9e131293ad7a4abecf64498c847 Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Thu, 27 Feb 2025 16:56:36 +0100
Subject: [PATCH 03/41] added and activated FIX_1326_SPEEDUP_00 - 07

---
 .../ivas_dirac_dec_binaural_functions_fx.c    | 190 +++++++++++++++++-
 1 file changed, 182 insertions(+), 8 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 716b16262..d1fb6c23c 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -46,8 +46,13 @@
 #include "wmc_auto.h"
 
 //#define FIX_1326_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams //no occurence in current bitstream
-//#define FIX_1326_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx : rollout loop in mul, only 3 out of 4 results are needed - maybe a=b can also benefitcui
-
+#define FIX_1326_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx :  .4 WMOPS
+#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx //  .3 WMOPS
+#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx //  .1 WMOPS
+#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx //  .2 WMOPS
+#define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt // 3.5 WMOPS
+#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS
+#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // 2.8 WMOPS
 Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
 
 /*-------------------------------------------------------------------------
@@ -3280,6 +3285,19 @@ static void eig2x2_fx(
 
     /* Numeric case, when input is practically zeros */
     // IF( D_fx[0] < EPSILON_FX )
+#ifdef FIX_1326_SPEEDUP_02
+    IF ( LT_32( L_shl_sat( D_fx[0], sub( sub( 31, *q_D ), EPSILON_EXP ) ), EPSILON_MANT ) )
+    {
+        Ure_fx[0][0] = ONE_IN_Q31;
+        move32();
+        Ure_fx[1][1] = ONE_IN_Q31;
+        move32();
+        *q_U = Q31;
+        move16();
+
+        return;
+    }
+#else
     IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( D_fx[0], *q_D, EPSILON_MANT, EPSILON_EXP ), -1 ) )
     {
         Ure_fx[0][0] = ONE_IN_Q31;
@@ -3291,8 +3309,24 @@ static void eig2x2_fx(
 
         return;
     }
+#endif
 
     /* Numeric case, when input is near an identity matrix with a gain */
+#ifdef FIX_1326_SPEEDUP_03 //178.932
+    tmp1 = Mpy_32_32( 2147484, add_fx ); // 2147484 = 1e-3f in Q31
+
+        IF( LT_32( pm_fx, L_shl_sat(tmp1, sub(q_tmp1,q_tmp2) ) ) )
+        {
+            Ure_fx[0][0] = ONE_IN_Q30;
+            move32();
+            Ure_fx[1][1] = ONE_IN_Q30;
+            move32();
+            *q_U = Q30;
+            move16();
+
+            return;
+        }
+#else
     tmp1 = Mpy_32_32( 2147484, add_fx ); // 2147484 = 1e-3f in Q31
 
     IF( LT_16( q_tmp1, q_tmp2 ) )
@@ -3323,6 +3357,7 @@ static void eig2x2_fx(
             return;
         }
     }
+#endif
 
     q_U_1 = 0;
     q_U_2 = 0;
@@ -3431,10 +3466,22 @@ static void eig2x2_fx(
             tmp2 = Mpy_32_32( s_fx, s_fx );
             q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 );
 
+
+#ifdef FIX_1326_SPEEDUP_04
+            Word16 exp_tmp2;
+            Word32 eps_tmp;
+            
+            tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &exp_tmp2 );
+            eps_tmp = L_shl_sat( epsilon_mant, sub( epsilon_exp, exp_tmp2 ) );
+
+            tmp3 = L_add( L_shr ( tmp2,1), L_shr(eps_tmp,1) ); // Add Epsilon if relevant
+
+            exp_tmp3 = add(exp_tmp2 , 1);
+#else
             tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 );
             q_tmp2 = sub( 31, q_tmp2 );
-
             tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 );
+#endif
 
 #if 1
             tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp );
@@ -4387,7 +4434,7 @@ static void formulate2x2MixingMatrix_fx(
     {
         push_wmops( "formulate2x2MixingMatrix Division" );
         maxEneDiv_fx = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxEne_fx, &exp );
-        pop_wmops();/*push_wmops( "formulate2x2MixingMatrix Division" )*/
+        pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
         q_maxEneDiv = add( sub( 31, exp ), sub( Q30, q_maxEne ) );
     }
     exp = norm_l( maxEneDiv_fx );
@@ -4468,6 +4515,28 @@ static void formulate2x2MixingMatrix_fx(
 #endif
     }
     ELSE
+#ifdef FIX_1326_SPEEDUP_05
+    {
+        Word16 shift = norm_l( temp );
+        temp = L_add( L_shl( temp, sub( shift, 1 ) ), L_shl_sat( EPSILON_MANT, sub( sub( EPSILON_EXP, shift ), 1 ) ) );
+        exp_temp = sub( 31, q_ein );
+        if ( temp == 0 )
+        {
+            exp_temp = EPSILON_EXP;
+            move32();
+        }
+        if (temp == 0)
+        {
+            temp = EPSILON_MANT;
+            move32();
+        }
+        temp = ISqrt32( temp , &exp_temp);
+        shift = sub( 31, q_eout );
+        Ghat_fx[0] = Mpy_32_32( Sqrt32( E_out1, &shift ), temp );
+        move32();
+        exp = add( shift, exp_temp );
+   }
+#else
     {
         temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
         push_wmops( "formulate2x2MixingMatrix Division" );
@@ -4478,6 +4547,7 @@ static void formulate2x2MixingMatrix_fx(
         Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
 #endif
     }
+#endif
 #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
     Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
 #endif
@@ -4506,16 +4576,36 @@ static void formulate2x2MixingMatrix_fx(
 #endif
     }
     ELSE
+#ifdef FIX_1326_SPEEDUP_06
+    {
+        Word16 shift = norm_l( temp );
+        temp = L_add( L_shl( temp, sub( shift, 1 ) ), L_shl_sat( EPSILON_MANT, sub( sub( EPSILON_EXP, shift ), 1 ) ) );
+        exp_temp = sub(31, q_ein);
+        if ( temp == 0 )
+        {
+            exp_temp = add( 0, EPSILON_EXP );
+        }
+        if (temp == 0)
+        {
+            temp = L_add( 0, EPSILON_MANT );
+        }
+        temp = ISqrt32( temp, &exp_temp );
+        shift = sub( 31, q_eout );
+        Ghat_fx[1] = Mpy_32_32( temp, ISqrt32( E_out2, &shift ) );
+        exp_temp = add( shift, exp_temp );
+    }
+#else
     {
         temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
         push_wmops( "formulate2x2MixingMatrix Division" );
         temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 );
-        pop_wmops();/*push_wmops( "formulate2x2MixingMatrix Division" )*/
+        pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
         exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) );
 #ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
         Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
 #endif
     }
+#endif
 #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
     Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
 #endif
@@ -4555,21 +4645,86 @@ static void formulate2x2MixingMatrix_fx(
             move32();
         }
     }
-    pop_wmops();/*push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q" );*/
+    pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q" );*/
 
     q_temp = sub( add( q_ky, q_GhatQ ), 31 );
 
     push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q*Kx" );
     /* A = Ky' * G_hat * Q * Kx (see publication) */
     matrixMul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Kxre_fx, Kxim_fx, &q_Kx, Are_fx, Aim_fx, &q_A );
-    pop_wmops();/*push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q*Kx" );*/
+    pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q*Kx" );*/
 
     push_wmops( "formulate2x2MixingMatrix nrst orthonrm PtoA (oPtoA)" );
     /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx
        For matrix A that is P = A(A'A)^0.5 */
     push_wmops( "oPtoA MT1M" );
 #ifdef FIX_1326_SPEEDUP_01
-    matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );
+    // matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );
+
+    {
+        Word16 chA, chB;
+            {
+            chA = 0, chB = 0;
+                tmpRe_fx[0][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Are_fx[0][0] ),
+                                                                         Are_fx[1][0], Are_fx[1][0] ),
+                                                             Aim_fx[0][0], Aim_fx[0][0] ),
+                                                 Aim_fx[1][0], Aim_fx[1][0] );
+                move32();
+            }
+            {
+                chA = 0, chB = 1;
+                tmpRe_fx[1][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][0] ),
+                                                                         Are_fx[1][1], Are_fx[1][0] ),
+                                                             Aim_fx[0][1], Aim_fx[0][0] ),
+                                                 Aim_fx[1][1], Aim_fx[1][0] );
+                move32();
+                tmpIm_fx[1][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Aim_fx[0][0] ),
+                                                                         Are_fx[1][1], Aim_fx[1][0] ),
+                                                             Aim_fx[0][1], Are_fx[0][0] ),
+                                                 Aim_fx[1][1], Are_fx[1][0] );
+                move32();
+            }
+            {
+                chA = 1, chB = 0;
+                tmpRe_fx[1][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][1] ),
+                                                                         Are_fx[1][1], Are_fx[1][1] ),
+                                                             Aim_fx[0][1], Aim_fx[0][1] ),
+                                                 Aim_fx[1][1], Aim_fx[1][1] );
+                move32();
+            }
+            {
+                chA = 1, chB = 1;
+            }
+
+        q_temp = sub( add( q_A, q_A ), 31 );
+
+        move16();
+        Word16 ZeroState = add( 1, 0 );
+        if (tmpRe_fx[0][0] != 0)
+        {
+            ZeroState = add(0, 0);
+        }
+        if ( tmpRe_fx[1][1] != 0 )
+        {
+            ZeroState = add( 0, 0 );
+        }
+        if ( tmpRe_fx[1][0] != 0 )
+        {
+            ZeroState = add( 0, 0 );
+        }
+        if ( tmpIm_fx[1][0] != 0 )
+        {
+            ZeroState = add( 0, 0 );
+        }
+
+        if ( sub(ZeroState,1) == 0 )
+        {
+          q_temp = Q31;
+            move16();
+        }
+
+    }
+
 
     eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );
 #else
@@ -4579,6 +4734,24 @@ static void formulate2x2MixingMatrix_fx(
 #endif
     pop_wmops();/*push_wmops( "oPtoA MT1M" );*/
 
+#ifdef FIX_1326_SPEEDUP_07
+    IF( D_fx[0] == 0 )
+    {
+        //temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */
+        //exp = ONE_DIV_EPSILON_EXP;
+        div_fx[0] = L_add(0,2047986068); //Sqrt32( temp, &exp ); // Q = 31 - exp
+        exp = add(0,20);
+    }
+    ELSE
+    {
+        exp = sub( 31, q_D );
+        div_fx[0] = ISqrt32( D_fx[0], &exp );
+        //temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp );
+        //exp = sub( exp, sub( Q30, q_D ) );
+        //div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
+        move32();
+    }
+#else
     IF( D_fx[0] == 0 )
     {
 #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
@@ -4598,6 +4771,7 @@ static void formulate2x2MixingMatrix_fx(
     }
     div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
     move32();
+#endif
 
     IF( D_fx[1] == 0 )
     {
-- 
GitLab


From d12ec4121777398516e9a8bab2665f62e16c43c0 Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Tue, 4 Mar 2025 16:32:41 +0100
Subject: [PATCH 04/41] apply clang patch

---
 lib_com/basop_util.c                          |   6 +-
 .../ivas_dirac_dec_binaural_functions_fx.c    | 139 +++++++++---------
 2 files changed, 72 insertions(+), 73 deletions(-)

diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c
index fa8d097df..609ca234d 100644
--- a/lib_com/basop_util.c
+++ b/lib_com/basop_util.c
@@ -1018,7 +1018,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
     Word16 sy;
     Word32 sign;
 
-    //push_wmops( "BASOP_Util_Divide3232_Scale_cadence" );
+    // push_wmops( "BASOP_Util_Divide3232_Scale_cadence" );
 
     /* assert (x >= (Word32)0); */
     assert( y != (Word32) 0 );
@@ -1041,7 +1041,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
     IF( x == (Word32) 0 )
     {
         *s = 0;
-        //pop_wmops();
+        // pop_wmops();
         return ( (Word32) 0 );
     }
 
@@ -1062,7 +1062,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
     {
         z = L_negate( z );
     }
-    //pop_wmops();
+    // pop_wmops();
     return z;
 }
 
diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index d1fb6c23c..577ee62f7 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -514,7 +514,7 @@ void ivas_dirac_dec_binaural_render_fx(
         Word16 n_samples_sf = imult1616( slot_size, hSpatParamRendCom->subframe_nbslots[subframe_idx] );
         push_wmops( "IDR binaural internal (IDRBI)" );
         ivas_dirac_dec_binaural_internal_fx( st_ivas, st_ivas->hCombinedOrientationData, output_fx_local, nchan_transport, subframe_idx );
-        pop_wmops();/*push_wmops( "IDR binaural internal (IDRBI)" );*/
+        pop_wmops(); /*push_wmops( "IDR binaural internal (IDRBI)" );*/
         FOR( ch = 0; ch < nchan_out; ch++ )
         {
             output_fx_local[ch] += n_samples_sf;
@@ -934,7 +934,7 @@ static void ivas_dirac_dec_binaural_internal_fx(
     test();
     push_wmops( "IDRBI cov matrices" );
     ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, Rmat_fx, subframe, hCombinedOrientationData && hCombinedOrientationData->enableCombinedOrientation[hCombinedOrientationData->subframe_idx] > 0, st_ivas->hMasaIsmData, q_inp );
-    pop_wmops();/*push_wmops( "IDRBI cov matrices" );*/
+    pop_wmops(); /*push_wmops( "IDRBI cov matrices" );*/
 
     IF( EQ_32( config_data.ivas_format, ISM_FORMAT ) )
     {
@@ -1023,7 +1023,7 @@ static void ivas_dirac_dec_binaural_internal_fx(
     push_wmops( "IDRBI processOutput" );
     ivas_dirac_dec_binaural_process_output_fx( hDiracDecBin, hSpatParamRendCom, st_ivas->cldfbSynDec, output_fx, &q_out, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, q_inp, max_band_decorr, numInChannels, config_data.processReverb, subframe, q_mat );
     pop_wmops(); /*push_wmops( "IDRBI processOutput" );
-    */
+                  */
     hDiracDecBin->hDiffuseDist = NULL;
 
     hSpatParamRendCom->slots_rendered = add( hSpatParamRendCom->slots_rendered, hSpatParamRendCom->subframe_nbslots[subframe] );
@@ -3286,7 +3286,7 @@ static void eig2x2_fx(
     /* Numeric case, when input is practically zeros */
     // IF( D_fx[0] < EPSILON_FX )
 #ifdef FIX_1326_SPEEDUP_02
-    IF ( LT_32( L_shl_sat( D_fx[0], sub( sub( 31, *q_D ), EPSILON_EXP ) ), EPSILON_MANT ) )
+    IF( LT_32( L_shl_sat( D_fx[0], sub( sub( 31, *q_D ), EPSILON_EXP ) ), EPSILON_MANT ) )
     {
         Ure_fx[0][0] = ONE_IN_Q31;
         move32();
@@ -3312,20 +3312,20 @@ static void eig2x2_fx(
 #endif
 
     /* Numeric case, when input is near an identity matrix with a gain */
-#ifdef FIX_1326_SPEEDUP_03 //178.932
+#ifdef FIX_1326_SPEEDUP_03               // 178.932
     tmp1 = Mpy_32_32( 2147484, add_fx ); // 2147484 = 1e-3f in Q31
 
-        IF( LT_32( pm_fx, L_shl_sat(tmp1, sub(q_tmp1,q_tmp2) ) ) )
-        {
-            Ure_fx[0][0] = ONE_IN_Q30;
-            move32();
-            Ure_fx[1][1] = ONE_IN_Q30;
-            move32();
-            *q_U = Q30;
-            move16();
+    IF( LT_32( pm_fx, L_shl_sat( tmp1, sub( q_tmp1, q_tmp2 ) ) ) )
+    {
+        Ure_fx[0][0] = ONE_IN_Q30;
+        move32();
+        Ure_fx[1][1] = ONE_IN_Q30;
+        move32();
+        *q_U = Q30;
+        move16();
 
-            return;
-        }
+        return;
+    }
 #else
     tmp1 = Mpy_32_32( 2147484, add_fx ); // 2147484 = 1e-3f in Q31
 
@@ -3470,13 +3470,13 @@ static void eig2x2_fx(
 #ifdef FIX_1326_SPEEDUP_04
             Word16 exp_tmp2;
             Word32 eps_tmp;
-            
+
             tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &exp_tmp2 );
             eps_tmp = L_shl_sat( epsilon_mant, sub( epsilon_exp, exp_tmp2 ) );
 
-            tmp3 = L_add( L_shr ( tmp2,1), L_shr(eps_tmp,1) ); // Add Epsilon if relevant
+            tmp3 = L_add( L_shr( tmp2, 1 ), L_shr( eps_tmp, 1 ) ); // Add Epsilon if relevant
 
-            exp_tmp3 = add(exp_tmp2 , 1);
+            exp_tmp3 = add( exp_tmp2, 1 );
 #else
             tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 );
             q_tmp2 = sub( 31, q_tmp2 );
@@ -4525,17 +4525,17 @@ static void formulate2x2MixingMatrix_fx(
             exp_temp = EPSILON_EXP;
             move32();
         }
-        if (temp == 0)
+        if ( temp == 0 )
         {
             temp = EPSILON_MANT;
             move32();
         }
-        temp = ISqrt32( temp , &exp_temp);
+        temp = ISqrt32( temp, &exp_temp );
         shift = sub( 31, q_eout );
         Ghat_fx[0] = Mpy_32_32( Sqrt32( E_out1, &shift ), temp );
         move32();
         exp = add( shift, exp_temp );
-   }
+    }
 #else
     {
         temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
@@ -4580,12 +4580,12 @@ static void formulate2x2MixingMatrix_fx(
     {
         Word16 shift = norm_l( temp );
         temp = L_add( L_shl( temp, sub( shift, 1 ) ), L_shl_sat( EPSILON_MANT, sub( sub( EPSILON_EXP, shift ), 1 ) ) );
-        exp_temp = sub(31, q_ein);
+        exp_temp = sub( 31, q_ein );
         if ( temp == 0 )
         {
             exp_temp = add( 0, EPSILON_EXP );
         }
-        if (temp == 0)
+        if ( temp == 0 )
         {
             temp = L_add( 0, EPSILON_MANT );
         }
@@ -4663,46 +4663,46 @@ static void formulate2x2MixingMatrix_fx(
 
     {
         Word16 chA, chB;
-            {
+        {
             chA = 0, chB = 0;
-                tmpRe_fx[0][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Are_fx[0][0] ),
-                                                                         Are_fx[1][0], Are_fx[1][0] ),
-                                                             Aim_fx[0][0], Aim_fx[0][0] ),
-                                                 Aim_fx[1][0], Aim_fx[1][0] );
-                move32();
-            }
-            {
-                chA = 0, chB = 1;
-                tmpRe_fx[1][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][0] ),
-                                                                         Are_fx[1][1], Are_fx[1][0] ),
-                                                             Aim_fx[0][1], Aim_fx[0][0] ),
-                                                 Aim_fx[1][1], Aim_fx[1][0] );
-                move32();
-                tmpIm_fx[1][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Aim_fx[0][0] ),
-                                                                         Are_fx[1][1], Aim_fx[1][0] ),
-                                                             Aim_fx[0][1], Are_fx[0][0] ),
-                                                 Aim_fx[1][1], Are_fx[1][0] );
-                move32();
-            }
-            {
-                chA = 1, chB = 0;
-                tmpRe_fx[1][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][1] ),
-                                                                         Are_fx[1][1], Are_fx[1][1] ),
-                                                             Aim_fx[0][1], Aim_fx[0][1] ),
-                                                 Aim_fx[1][1], Aim_fx[1][1] );
-                move32();
-            }
-            {
-                chA = 1, chB = 1;
-            }
+            tmpRe_fx[0][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Are_fx[0][0] ),
+                                                                 Are_fx[1][0], Are_fx[1][0] ),
+                                                     Aim_fx[0][0], Aim_fx[0][0] ),
+                                         Aim_fx[1][0], Aim_fx[1][0] );
+            move32();
+        }
+        {
+            chA = 0, chB = 1;
+            tmpRe_fx[1][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][0] ),
+                                                                 Are_fx[1][1], Are_fx[1][0] ),
+                                                     Aim_fx[0][1], Aim_fx[0][0] ),
+                                         Aim_fx[1][1], Aim_fx[1][0] );
+            move32();
+            tmpIm_fx[1][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Aim_fx[0][0] ),
+                                                                 Are_fx[1][1], Aim_fx[1][0] ),
+                                                     Aim_fx[0][1], Are_fx[0][0] ),
+                                         Aim_fx[1][1], Are_fx[1][0] );
+            move32();
+        }
+        {
+            chA = 1, chB = 0;
+            tmpRe_fx[1][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][1] ),
+                                                                 Are_fx[1][1], Are_fx[1][1] ),
+                                                     Aim_fx[0][1], Aim_fx[0][1] ),
+                                         Aim_fx[1][1], Aim_fx[1][1] );
+            move32();
+        }
+        {
+            chA = 1, chB = 1;
+        }
 
         q_temp = sub( add( q_A, q_A ), 31 );
 
         move16();
         Word16 ZeroState = add( 1, 0 );
-        if (tmpRe_fx[0][0] != 0)
+        if ( tmpRe_fx[0][0] != 0 )
         {
-            ZeroState = add(0, 0);
+            ZeroState = add( 0, 0 );
         }
         if ( tmpRe_fx[1][1] != 0 )
         {
@@ -4717,12 +4717,11 @@ static void formulate2x2MixingMatrix_fx(
             ZeroState = add( 0, 0 );
         }
 
-        if ( sub(ZeroState,1) == 0 )
+        if ( sub( ZeroState, 1 ) == 0 )
         {
-          q_temp = Q31;
+            q_temp = Q31;
             move16();
         }
-
     }
 
 
@@ -4732,23 +4731,23 @@ static void formulate2x2MixingMatrix_fx(
 
     eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );
 #endif
-    pop_wmops();/*push_wmops( "oPtoA MT1M" );*/
+    pop_wmops(); /*push_wmops( "oPtoA MT1M" );*/
 
 #ifdef FIX_1326_SPEEDUP_07
     IF( D_fx[0] == 0 )
     {
-        //temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */
-        //exp = ONE_DIV_EPSILON_EXP;
-        div_fx[0] = L_add(0,2047986068); //Sqrt32( temp, &exp ); // Q = 31 - exp
-        exp = add(0,20);
+        // temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */
+        // exp = ONE_DIV_EPSILON_EXP;
+        div_fx[0] = L_add( 0, 2047986068 ); // Sqrt32( temp, &exp ); // Q = 31 - exp
+        exp = add( 0, 20 );
     }
     ELSE
     {
         exp = sub( 31, q_D );
         div_fx[0] = ISqrt32( D_fx[0], &exp );
-        //temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp );
-        //exp = sub( exp, sub( Q30, q_D ) );
-        //div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
+        // temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp );
+        // exp = sub( exp, sub( Q30, q_D ) );
+        // div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
         move32();
     }
 #else
@@ -4787,7 +4786,7 @@ static void formulate2x2MixingMatrix_fx(
     {
         push_wmops( "formulate2x2MixingMatrix Division" );
         temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[1], &exp1 );
-        pop_wmops();/*push_wmops( "formulate2x2MixingMatrix Division" )*/
+        pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
         exp1 = sub( exp1, sub( Q30, q_D ) );
     }
     div_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
@@ -4889,7 +4888,7 @@ static void formulate2x2MixingMatrix_fx(
                          0 /*int Bscale*/,
 #endif
                          Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */
-    pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix nrst orthonrm PtoA" );*/
+    pop_wmops();                                 /*push_wmops( "formulate2x2MixingMatrix nrst orthonrm PtoA" );*/
 
     push_wmops( "formulate2x2MixingMatrix Ky P Kx^-1" );
     /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */
@@ -4976,7 +4975,7 @@ static void formulate2x2MixingMatrix_fx(
             temp = BASOP_Util_Add_Mant32Exp( Sx_fx[chB], sub( 31, q_Sx ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
             push_wmops( "formulate2x2MixingMatrix Division" );
             temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, temp, &exp );
-            pop_wmops();/*push_wmops( "formulate2x2MixingMatrix Division" )*/
+            pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
             q_temp = add( sub( sub( q_P, exp ), sub( 31, Q30 ) ), exp_temp );
 
             Pre_shift = norm_l( Pre_fx[0][chB] );
-- 
GitLab


From f12a124db1300a8f344276a4b674747fa464a264 Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Tue, 4 Mar 2025 17:04:07 +0100
Subject: [PATCH 05/41] added FIX_1326_SPEEDUP_08

---
 lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 577ee62f7..2dac2b867 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -53,6 +53,7 @@
 #define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt // 3.5 WMOPS
 #define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS
 #define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // 2.8 WMOPS
+#define FIX_1326_SPEEDUP_08 // "-"               // 3.0 WMOPS
 Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
 
 /*-------------------------------------------------------------------------
@@ -4772,6 +4773,20 @@ static void formulate2x2MixingMatrix_fx(
     move32();
 #endif
 
+#ifdef FIX_1326_SPEEDUP_08
+    // This is just a shortcut to already existing optimizations  (FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC) - but makes everything even faster
+    {
+        div_fx[1] = L_add( 0, 2047986068 ); // Q = 31 - exp1
+        exp1 = add( 0, 20 );                // move32();
+    }
+
+    IF( D_fx[1] != 0 ) // This is the new code: replace div sqrt by isqrt
+    {
+        exp1 = sub( 31, q_D );
+        div_fx[1] = ISqrt32( D_fx[1], &exp1 );
+        move32();
+    }
+#else
     IF( D_fx[1] == 0 )
     {
 #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
@@ -4791,7 +4806,7 @@ static void formulate2x2MixingMatrix_fx(
     }
     div_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
     move32();
-
+#endif
     q_div = sub( 31, s_max( exp, exp1 ) );
 
     div_fx[0] = L_shr( div_fx[0], sub( sub( 31, exp ), q_div ) ); // q_div
-- 
GitLab


From 56305ddde36fee4fe6ecfb9e50494117be5a6c74 Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Tue, 4 Mar 2025 17:07:26 +0100
Subject: [PATCH 06/41] fixed warning

---
 lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 2dac2b867..7c243d7d8 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -3770,6 +3770,7 @@ static void matrixMul_fx(
     return;
 }
 
+#ifndef FIX_1326_SPEEDUP_01
 static void matrixTransp1Mul_fx(
     Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/
     Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/
@@ -3883,6 +3884,7 @@ static void matrixTransp1Mul_fx(
 
     return;
 }
+#endif /*FIX_1326_SPEEDUP_01*/
 
 static void matrixTransp2Mul_fx(
     Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/
@@ -4663,7 +4665,7 @@ static void formulate2x2MixingMatrix_fx(
     // matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );
 
     {
-        Word16 chA, chB;
+        //Word16 chA, chB;
         {
             chA = 0, chB = 0;
             tmpRe_fx[0][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Are_fx[0][0] ),
@@ -4673,7 +4675,7 @@ static void formulate2x2MixingMatrix_fx(
             move32();
         }
         {
-            chA = 0, chB = 1;
+            //chA = 0, chB = 1;
             tmpRe_fx[1][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][0] ),
                                                                  Are_fx[1][1], Are_fx[1][0] ),
                                                      Aim_fx[0][1], Aim_fx[0][0] ),
@@ -4686,7 +4688,7 @@ static void formulate2x2MixingMatrix_fx(
             move32();
         }
         {
-            chA = 1, chB = 0;
+            //chA = 1, chB = 0;
             tmpRe_fx[1][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][1] ),
                                                                  Are_fx[1][1], Are_fx[1][1] ),
                                                      Aim_fx[0][1], Aim_fx[0][1] ),
@@ -4694,7 +4696,7 @@ static void formulate2x2MixingMatrix_fx(
             move32();
         }
         {
-            chA = 1, chB = 1;
+            //chA = 1, chB = 1;
         }
 
         q_temp = sub( add( q_A, q_A ), 31 );
-- 
GitLab


From 4b66449d1b749bc14b31ea0e1ef9a1715ef29613 Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Tue, 4 Mar 2025 17:11:02 +0100
Subject: [PATCH 07/41] apply clang patch

---
 lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 7c243d7d8..1544b4f1a 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -4665,7 +4665,7 @@ static void formulate2x2MixingMatrix_fx(
     // matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );
 
     {
-        //Word16 chA, chB;
+        // Word16 chA, chB;
         {
             chA = 0, chB = 0;
             tmpRe_fx[0][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Are_fx[0][0] ),
@@ -4675,7 +4675,7 @@ static void formulate2x2MixingMatrix_fx(
             move32();
         }
         {
-            //chA = 0, chB = 1;
+            // chA = 0, chB = 1;
             tmpRe_fx[1][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][0] ),
                                                                  Are_fx[1][1], Are_fx[1][0] ),
                                                      Aim_fx[0][1], Aim_fx[0][0] ),
@@ -4688,7 +4688,7 @@ static void formulate2x2MixingMatrix_fx(
             move32();
         }
         {
-            //chA = 1, chB = 0;
+            // chA = 1, chB = 0;
             tmpRe_fx[1][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][1] ),
                                                                  Are_fx[1][1], Are_fx[1][1] ),
                                                      Aim_fx[0][1], Aim_fx[0][1] ),
@@ -4696,7 +4696,7 @@ static void formulate2x2MixingMatrix_fx(
             move32();
         }
         {
-            //chA = 1, chB = 1;
+            // chA = 1, chB = 1;
         }
 
         q_temp = sub( add( q_A, q_A ), 31 );
-- 
GitLab


From 03b77f5ce70483c1024a794f419882ab4a15a1bb Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Wed, 5 Mar 2025 09:26:29 +0100
Subject: [PATCH 08/41] activated SPEEDUP8

---
 lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 1544b4f1a..f024f53ba 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -46,14 +46,14 @@
 #include "wmc_auto.h"
 
 //#define FIX_1326_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams //no occurence in current bitstream
-#define FIX_1326_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx :  .4 WMOPS
-#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx //  .3 WMOPS
-#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx //  .1 WMOPS
-#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx //  .2 WMOPS
-#define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt // 3.5 WMOPS
-#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS
-#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // 2.8 WMOPS
-#define FIX_1326_SPEEDUP_08 // "-"               // 3.0 WMOPS
+//#define FIX_1326_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx :  .4 WMOPS
+//#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx //  .3 WMOPS
+//#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx //  .1 WMOPS
+//#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx //  .2 WMOPS
+//#define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt // 3.5 WMOPS //Quite bad diffs
+//#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS  //Quite bad diffs
+//#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // 2.8 WMOPS //Big DIffs
+#define FIX_1326_SPEEDUP_08 // "-"               // 3.0 WMOPS //small diffs!
 Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
 
 /*-------------------------------------------------------------------------
-- 
GitLab


From 86520445e6af996f9ead2625174d24694c6d8ae8 Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Wed, 5 Mar 2025 11:13:06 +0100
Subject: [PATCH 09/41] activate SPEEDUP 07 small version

---
 .../ivas_dirac_dec_binaural_functions_fx.c    | 20 ++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index f024f53ba..c452437f1 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -52,8 +52,8 @@
 //#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx //  .2 WMOPS
 //#define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt // 3.5 WMOPS //Quite bad diffs
 //#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS  //Quite bad diffs
-//#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // 2.8 WMOPS //Big DIffs
-#define FIX_1326_SPEEDUP_08 // "-"               // 3.0 WMOPS //small diffs!
+#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // 2.8 WMOPS //Big DIffs , no replacement of divSqrt
+#define FIX_1326_SPEEDUP_08 // "-"               // 3.0 WMOPS //small diffs, PIPELINE GREEN!
 Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
 
 /*-------------------------------------------------------------------------
@@ -4746,12 +4746,18 @@ static void formulate2x2MixingMatrix_fx(
     }
     ELSE
     {
-        exp = sub( 31, q_D );
-        div_fx[0] = ISqrt32( D_fx[0], &exp );
-        // temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp );
-        // exp = sub( exp, sub( Q30, q_D ) );
-        // div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
+#if 1 //old code
+        push_wmops( "formulate2x2MixingMatrix Division" );
+        temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp );
+        exp = sub( exp, sub( Q30, q_D ) );
+        pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
+        div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
+        move32();
+#else
+        exp = sub(31, q_D);
+        div_fx[0] = ISqrt32_2( D_fx[0], &exp );
         move32();
+#endif 
     }
 #else
     IF( D_fx[0] == 0 )
-- 
GitLab


From 12809f5b6359704415a114ddde609e193c899283 Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Wed, 5 Mar 2025 11:18:14 +0100
Subject: [PATCH 10/41] apply clang format patch

---
 lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index c452437f1..1e37860c9 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -4746,18 +4746,18 @@ static void formulate2x2MixingMatrix_fx(
     }
     ELSE
     {
-#if 1 //old code
+#if 1 // old code
         push_wmops( "formulate2x2MixingMatrix Division" );
         temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp );
         exp = sub( exp, sub( Q30, q_D ) );
-        pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
+        pop_wmops();                      /*push_wmops( "formulate2x2MixingMatrix Division" )*/
         div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
         move32();
 #else
-        exp = sub(31, q_D);
+        exp = sub( 31, q_D );
         div_fx[0] = ISqrt32_2( D_fx[0], &exp );
         move32();
-#endif 
+#endif
     }
 #else
     IF( D_fx[0] == 0 )
-- 
GitLab


From 9028c88021ef93cc2cfadb421f1db6be435a788c Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Wed, 5 Mar 2025 12:22:09 +0100
Subject: [PATCH 11/41] activated speedup 01 02 03 04

---
 .../ivas_dirac_dec_binaural_functions_fx.c    | 44 ++++++++++++++-----
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 1e37860c9..06b5be3ee 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -46,14 +46,14 @@
 #include "wmc_auto.h"
 
 //#define FIX_1326_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams //no occurence in current bitstream
-//#define FIX_1326_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx :  .4 WMOPS
-//#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx //  .3 WMOPS
-//#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx //  .1 WMOPS
-//#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx //  .2 WMOPS
-//#define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt // 3.5 WMOPS //Quite bad diffs
-//#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS  //Quite bad diffs
-#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt // 2.8 WMOPS //Big DIffs , no replacement of divSqrt
-#define FIX_1326_SPEEDUP_08 // "-"               // 3.0 WMOPS //small diffs, PIPELINE GREEN!
+#define FIX_1326_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx :  .4 WMOPS
+#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx //  .3 WMOPS
+#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx //  .1 WMOPS
+#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx //  .2 WMOPS
+//#define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt // 3.5 WMOPS //Quite bad diffs --> DONT USE
+//#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS  //Quite bad diffs -- > DONT USE
+#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt //  ?  WMOPS //Big DIffs , no replacement of divSqrt , PIPELINE GREEN, --> USE
+#define FIX_1326_SPEEDUP_08 // "-"               // 3.0 WMOPS //small diffs, PIPELINE GREEN! -- > USE
 Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
 
 /*-------------------------------------------------------------------------
@@ -4521,8 +4521,12 @@ static void formulate2x2MixingMatrix_fx(
 #ifdef FIX_1326_SPEEDUP_05
     {
         Word16 shift = norm_l( temp );
+#if 1 // oldcode
+        temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
+#else
+
         temp = L_add( L_shl( temp, sub( shift, 1 ) ), L_shl_sat( EPSILON_MANT, sub( sub( EPSILON_EXP, shift ), 1 ) ) );
-        exp_temp = sub( 31, q_ein );
+        exp_temp = sub( 30, q_ein );
         if ( temp == 0 )
         {
             exp_temp = EPSILON_EXP;
@@ -4533,6 +4537,7 @@ static void formulate2x2MixingMatrix_fx(
             temp = EPSILON_MANT;
             move32();
         }
+#endif
         temp = ISqrt32( temp, &exp_temp );
         shift = sub( 31, q_eout );
         Ghat_fx[0] = Mpy_32_32( Sqrt32( E_out1, &shift ), temp );
@@ -4582,8 +4587,11 @@ static void formulate2x2MixingMatrix_fx(
 #ifdef FIX_1326_SPEEDUP_06
     {
         Word16 shift = norm_l( temp );
+#if 0 //oldcode
+        temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
+#else
         temp = L_add( L_shl( temp, sub( shift, 1 ) ), L_shl_sat( EPSILON_MANT, sub( sub( EPSILON_EXP, shift ), 1 ) ) );
-        exp_temp = sub( 31, q_ein );
+        exp_temp = sub( 31 - 1, q_ein );
         if ( temp == 0 )
         {
             exp_temp = add( 0, EPSILON_EXP );
@@ -4592,10 +4600,22 @@ static void formulate2x2MixingMatrix_fx(
         {
             temp = L_add( 0, EPSILON_MANT );
         }
+#endif
+#if 1 //oldcode - new code introduces too much noise
+        push_wmops( "formulate2x2MixingMatrix Division" );
+        temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 );
+        pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
+        exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) );
+#ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
+        Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
+#endif
+#else
         temp = ISqrt32( temp, &exp_temp );
         shift = sub( 31, q_eout );
-        Ghat_fx[1] = Mpy_32_32( temp, ISqrt32( E_out2, &shift ) );
-        exp_temp = add( shift, exp_temp );
+        Ghat_fx[1] = Mpy_32_32( temp, Sqrt32( E_out2, &shift ) );
+        exp1 = add( shift, exp_temp );
+#endif 
+
     }
 #else
     {
-- 
GitLab


From 90d2563bf00e217b994ad64a27421f37adb0b628 Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Wed, 5 Mar 2025 12:35:34 +0100
Subject: [PATCH 12/41] apply clang format patch

---
 lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 06b5be3ee..5fcc8a2d1 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -54,6 +54,7 @@
 //#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS  //Quite bad diffs -- > DONT USE
 #define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt //  ?  WMOPS //Big DIffs , no replacement of divSqrt , PIPELINE GREEN, --> USE
 #define FIX_1326_SPEEDUP_08 // "-"               // 3.0 WMOPS //small diffs, PIPELINE GREEN! -- > USE
+#define FIX_1326_SPEEDUP_09 // Relocate matrixMul
 Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
 
 /*-------------------------------------------------------------------------
@@ -4587,7 +4588,7 @@ static void formulate2x2MixingMatrix_fx(
 #ifdef FIX_1326_SPEEDUP_06
     {
         Word16 shift = norm_l( temp );
-#if 0 //oldcode
+#if 0 // oldcode
         temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
 #else
         temp = L_add( L_shl( temp, sub( shift, 1 ) ), L_shl_sat( EPSILON_MANT, sub( sub( EPSILON_EXP, shift ), 1 ) ) );
@@ -4601,7 +4602,7 @@ static void formulate2x2MixingMatrix_fx(
             temp = L_add( 0, EPSILON_MANT );
         }
 #endif
-#if 1 //oldcode - new code introduces too much noise
+#if 1 // oldcode - new code introduces too much noise
         push_wmops( "formulate2x2MixingMatrix Division" );
         temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 );
         pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
@@ -4614,8 +4615,7 @@ static void formulate2x2MixingMatrix_fx(
         shift = sub( 31, q_eout );
         Ghat_fx[1] = Mpy_32_32( temp, Sqrt32( E_out2, &shift ) );
         exp1 = add( shift, exp_temp );
-#endif 
-
+#endif
     }
 #else
     {
@@ -4749,10 +4749,17 @@ static void formulate2x2MixingMatrix_fx(
 
 
     eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );
+#ifdef FIX_1326_SPEEDUP_09
+    matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp );
+#endif
 #else
     matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );
 
     eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );
+
+#ifdef FIX_1326_SPEEDUP_09
+    matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp );
+#endif
 #endif
     pop_wmops(); /*push_wmops( "oPtoA MT1M" );*/
 
@@ -4860,7 +4867,9 @@ static void formulate2x2MixingMatrix_fx(
         move16();
     }
 
+#ifndef FIX_1326_SPEEDUP_09
     matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp );
+#endif
 
     exp = L_norm_arr( div_fx, BINAURAL_CHANNELS );
     scale_sig32( div_fx, BINAURAL_CHANNELS, exp );
-- 
GitLab


From 33a98171ada436d22c6f538c5ad3dbefd93bebb0 Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Wed, 5 Mar 2025 14:30:57 +0100
Subject: [PATCH 13/41] activate speedup 09 10 11 for testing

---
 .../ivas_dirac_dec_binaural_functions_fx.c    | 80 ++++++++++++++-----
 1 file changed, 60 insertions(+), 20 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 5fcc8a2d1..e99cb6b34 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -45,16 +45,25 @@
 
 #include "wmc_auto.h"
 
-//#define FIX_1326_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams //no occurence in current bitstream
-#define FIX_1326_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx :  .4 WMOPS
-#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx //  .3 WMOPS
-#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx //  .1 WMOPS
-#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx //  .2 WMOPS
-//#define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt // 3.5 WMOPS //Quite bad diffs --> DONT USE
-//#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt // 3.0 WMOPS  //Quite bad diffs -- > DONT USE
-#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt //  ?  WMOPS //Big DIffs , no replacement of divSqrt , PIPELINE GREEN, --> USE
-#define FIX_1326_SPEEDUP_08 // "-"               // 3.0 WMOPS //small diffs, PIPELINE GREEN! -- > USE
-#define FIX_1326_SPEEDUP_09 // Relocate matrixMul
+// MHZ NUMBERS:
+// NULL: 179.292
+
+
+//#define FIX_1326_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams     //no occurence        --> DONT USE
+//#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx  //  .4 WMOPS                    --> USE
+//#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx                //  .3 WMOPS                    --> USE
+//#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx                //  .1 WMOPS                    --> USE
+//#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx                //  .2 WMOPS                    --> USE
+//#define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt                // 3.5 WMOPS //Quite bad diffs  --> DONT USE
+//#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt                // 3.0 WMOPS //Quite bad diffs  --> DONT USE
+//#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt                //  0  WMOPS                    --> DONT USE
+//#define FIX_1326_SPEEDUP_08 // "-"                              // 3.0 WMOPS //small diffs      --> USE
+#define FIX_1326_SPEEDUP_09 // tiny speedup                     //  .1 WMOPS                    
+#define FIX_1326_SPEEDUP_10 // tiny speedup                    //   .1 WMOPS 
+#define FIX_1326_SPEEDUP_11  // tiny speedup                    //  .1 WMOPS
+//#define FIX_1326_SPEEDUP_12 // tiny speedup                     //  <.1 WMOPS                 -->DONTUSE
+//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt          //   2.9 WMOPS
+//#define FIX_1326_SPEEDUP_14 // 
 Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
 
 /*-------------------------------------------------------------------------
@@ -2142,6 +2151,14 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
         exp = sub( get_min_scalefactor( resultMtxRe_fx[0][0], resultMtxRe_fx[1][1] ), 2 );
         tmp2 = L_add( L_shl( resultMtxRe_fx[0][0], exp ), L_shl( resultMtxRe_fx[1][1], exp ) );
         q_tmp2 = add( q_res, exp );
+#ifdef FIX_1326_SPEEDUP_11
+        {
+            Word16 shift1 = s_max( 0, sub( q_tmp2, q_CrEne ) );
+            Word16 shift2 = s_max( 0, sub( q_CrEne, q_tmp2 ) );
+            realizedOutputEne_fx = L_add( L_shr( tmp1, shift2 ), L_shr( tmp2, shift1 ) );
+            q_realizedOutputEne = s_min( q_CrEne, q_tmp2 );
+        }
+#else
         IF( LT_16( q_CrEne, q_tmp2 ) )
         {
             realizedOutputEne_fx = L_add( tmp1, L_shr( tmp2, sub( q_tmp2, q_CrEne ) ) );
@@ -2154,7 +2171,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
             q_realizedOutputEne = q_tmp2;
             move16();
         }
-
+#endif
         exp = sub( get_min_scalefactor( hDiracDecBin->ChEneOut_fx[0][bin], hDiracDecBin->ChEneOut_fx[1][bin] ), 1 );
         targetOutputEne_fx = L_add( L_shl( hDiracDecBin->ChEneOut_fx[0][bin], exp ), L_shl( hDiracDecBin->ChEneOut_fx[1][bin], exp ) );
         q_targetOutputEne = add( hDiracDecBin->q_ChEneOut, exp );
@@ -2177,9 +2194,17 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
             q_missingOutputEne = q_targetOutputEne;
             move16();
         }
-
         tmp1 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), missingOutputEne_fx, sub( 31, q_missingOutputEne ), &exp1 );
 
+#ifdef FIX_1326_SPEEDUP_13
+        {
+            Word16 exp_temp;
+            tmp2 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
+            tmp2 = ISqrt32( tmp2, &exp_temp );
+            gain_fx = Mpy_32_32(tmp2, Sqrt32(tmp1, &exp1));
+            q_gain = sub( 31, add( exp_temp, exp1 ) );
+        }
+#else
         {
             Word16 exp_temp;
             tmp2 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
@@ -2188,6 +2213,8 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
         }
         gain_fx = Sqrt32( tmp2, &exp2 );
         q_gain = sub( 31, exp2 );
+#endif
+     
 
         // 1073741824 = 4 in Q28
         IF( LT_16( q_gain, Q28 ) )
@@ -4749,17 +4776,11 @@ static void formulate2x2MixingMatrix_fx(
 
 
     eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );
-#ifdef FIX_1326_SPEEDUP_09
-    matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp );
-#endif
 #else
     matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );
 
     eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );
 
-#ifdef FIX_1326_SPEEDUP_09
-    matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp );
-#endif
 #endif
     pop_wmops(); /*push_wmops( "oPtoA MT1M" );*/
 
@@ -4849,7 +4870,19 @@ static void formulate2x2MixingMatrix_fx(
     div_fx[1] = L_shr( div_fx[1], sub( sub( 31, exp1 ), q_div ) ); // q_div
     move32();
 
+
     // 1310720000 = 10,000.0f in Q17
+#ifdef FIX_1326_SPEEDUP_09
+    {
+        Word16 shift1 = s_max( sub( Q17, q_div ), 0 );
+        Word16 shift2 = s_max( sub( q_div, Q17 ), 0 );
+
+        div_fx[0] = L_min( L_shr( 1310720000, shift1 ), L_shr( div_fx[0], shift2 ) ); // q_div
+        move32();
+        div_fx[1] = L_min( L_shr( 1310720000, shift1 ), L_shr( div_fx[1], shift2 ) ); // q_div
+        move32();
+    }
+#else
     IF( LT_16( q_div, Q17 ) )
     {
         div_fx[0] = L_min( L_shr( 1310720000, sub( Q17, q_div ) ), div_fx[0] ); // q_div
@@ -4866,10 +4899,9 @@ static void formulate2x2MixingMatrix_fx(
         q_div = Q17;
         move16();
     }
+#endif
 
-#ifndef FIX_1326_SPEEDUP_09
     matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp );
-#endif
 
     exp = L_norm_arr( div_fx, BINAURAL_CHANNELS );
     scale_sig32( div_fx, BINAURAL_CHANNELS, exp );
@@ -4884,7 +4916,11 @@ static void formulate2x2MixingMatrix_fx(
             W_tmp = W_mult0_32_32( tmpRe_fx[chA][chB], div_fx[chB] );
             IF( W_tmp != 0 )
             {
+#ifdef FIX_1326_SPEEDUP_10
+                hdrm_re[chA][chB] = W_norm( W_tmp );
+#else
                 hdrm_re[chA][chB] = sub( W_norm( W_tmp ), 0 );
+#endif
                 move16();
                 W_tmp = W_shl( W_tmp, hdrm_re[chA][chB] );
                 tmpRe_fx[chA][chB] = W_extract_h( W_tmp );
@@ -4901,7 +4937,11 @@ static void formulate2x2MixingMatrix_fx(
             W_tmp = W_mult0_32_32( tmpIm_fx[chA][chB], div_fx[chB] );
             IF( W_tmp != 0 )
             {
+#ifdef FIX_1326_SPEEDUP_10
+                hdrm_im[chA][chB] = W_norm( W_tmp );
+#else
                 hdrm_im[chA][chB] = sub( W_norm( W_tmp ), 0 );
+#endif
                 move16();
                 W_tmp = W_shl( W_tmp, hdrm_im[chA][chB] );
                 tmpIm_fx[chA][chB] = W_extract_h( W_tmp );
-- 
GitLab


From 1a23f8d8deb4ce90aeac714fd274f3fa6c380f16 Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Wed, 5 Mar 2025 14:56:15 +0100
Subject: [PATCH 14/41] apply clang format patch

---
 lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index e99cb6b34..2336151e0 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -58,12 +58,12 @@
 //#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt                // 3.0 WMOPS //Quite bad diffs  --> DONT USE
 //#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt                //  0  WMOPS                    --> DONT USE
 //#define FIX_1326_SPEEDUP_08 // "-"                              // 3.0 WMOPS //small diffs      --> USE
-#define FIX_1326_SPEEDUP_09 // tiny speedup                     //  .1 WMOPS                    
-#define FIX_1326_SPEEDUP_10 // tiny speedup                    //   .1 WMOPS 
-#define FIX_1326_SPEEDUP_11  // tiny speedup                    //  .1 WMOPS
+#define FIX_1326_SPEEDUP_09 // tiny speedup                     //  .1 WMOPS
+#define FIX_1326_SPEEDUP_10 // tiny speedup                    //   .1 WMOPS
+#define FIX_1326_SPEEDUP_11 // tiny speedup                    //  .1 WMOPS
 //#define FIX_1326_SPEEDUP_12 // tiny speedup                     //  <.1 WMOPS                 -->DONTUSE
 //#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt          //   2.9 WMOPS
-//#define FIX_1326_SPEEDUP_14 // 
+//#define FIX_1326_SPEEDUP_14 //
 Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
 
 /*-------------------------------------------------------------------------
@@ -2201,7 +2201,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
             Word16 exp_temp;
             tmp2 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
             tmp2 = ISqrt32( tmp2, &exp_temp );
-            gain_fx = Mpy_32_32(tmp2, Sqrt32(tmp1, &exp1));
+            gain_fx = Mpy_32_32( tmp2, Sqrt32( tmp1, &exp1 ) );
             q_gain = sub( 31, add( exp_temp, exp1 ) );
         }
 #else
@@ -2214,7 +2214,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
         gain_fx = Sqrt32( tmp2, &exp2 );
         q_gain = sub( 31, exp2 );
 #endif
-     
+
 
         // 1073741824 = 4 in Q28
         IF( LT_16( q_gain, Q28 ) )
-- 
GitLab


From c8151a8382939a33af975c6c5bf7e192030fcc10 Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Wed, 5 Mar 2025 15:33:23 +0100
Subject: [PATCH 15/41] added assert testing

---
 .../ivas_dirac_dec_binaural_functions_fx.c    | 34 +++++++++++++------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 2336151e0..cc6d977f0 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -58,12 +58,12 @@
 //#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt                // 3.0 WMOPS //Quite bad diffs  --> DONT USE
 //#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt                //  0  WMOPS                    --> DONT USE
 //#define FIX_1326_SPEEDUP_08 // "-"                              // 3.0 WMOPS //small diffs      --> USE
-#define FIX_1326_SPEEDUP_09 // tiny speedup                     //  .1 WMOPS
-#define FIX_1326_SPEEDUP_10 // tiny speedup                    //   .1 WMOPS
-#define FIX_1326_SPEEDUP_11 // tiny speedup                    //  .1 WMOPS
-//#define FIX_1326_SPEEDUP_12 // tiny speedup                     //  <.1 WMOPS                 -->DONTUSE
-//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt          //   2.9 WMOPS
-//#define FIX_1326_SPEEDUP_14 //
+//#define FIX_1326_SPEEDUP_09 // tiny speedup                     //  .1 WMOPS                      --> USE? (pipe 48851 fails --> DONTUSEYET)
+//#define FIX_1326_SPEEDUP_10 // tiny speedup                    //   .1 WMOPS                      --> USE? (pipe 48851 fails --> DONTUSEYET)
+//#define FIX_1326_SPEEDUP_11  // tiny speedup                    //  .1 WMOPS                      --> USE? (pipe 48851 fails --> DONTUSEYET)
+//#define FIX_1326_SPEEDUP_12 // tiny speedup                     //  <.1 WMOPS                   -->DONTUSE
+//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt          //   2.9 WMOPS                  -->USE? (pipe coming)
+#define FIX_1326_SPEEDUP_14 // test
 Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
 
 /*-------------------------------------------------------------------------
@@ -3213,8 +3213,14 @@ static void eig2x2_fx(
     pm_fx = 0.5f * sqrtf(max(0.0f, a_fx))
     add_fx = 0.5f * (e1 + e2)*/
 
+    #ifdef FIX_1326_SPEEDUP_14
+    static int tstcnt = 0;
+    #endif
     IF( L_and( c_re == 0, c_im == 0 ) )
     {
+#ifdef FIX_1326_SPEEDUP_14
+        tstcnt ++;
+#endif
         /* if c_re = 0 and c_im = 0, then crossSquare_fx = (c_re * c_re) + (c_im * c_im) = 0
         a_fx = (E1 - E2)^2
         pm_fx = 0.5 * sqrt(max(0, a_fx)) = 0.5 * max(0, (e1 - e2)) */
@@ -3232,6 +3238,9 @@ static void eig2x2_fx(
         q_crossSquare = sub( add( q_c, q_c ), 31 );
         IF( EQ_32( e1, e2 ) )
         {
+#ifdef FIX_1326_SPEEDUP_14
+            tstcnt++;
+#endif
             /* if e1 - e2 = 0, then a_fx = 4 * crossSquare_fx
             pm_fx = 0.5 * sqrt(max(0, 4 * crossSquare_fx)) =  sqrt(0, crossSquare_fx)*/
             test();
@@ -3265,6 +3274,9 @@ static void eig2x2_fx(
 
             IF( GT_16( sub( q_c, q_e ), Q15 ) )
             {
+#ifdef FIX_1326_SPEEDUP_14
+                tstcnt++;
+#endif
                 pm_fx = L_shr( L_max( 0, L_abs( L_sub( e1, e2 ) ) ), 1 );
                 q_tmp2 = q_e;
                 move16();
@@ -3288,6 +3300,10 @@ static void eig2x2_fx(
             }
         }
     }
+#ifdef FIX_1326_SPEEDUP_14
+    if (tstcnt>10000)
+      assert(0);
+#endif
     // add_fx = 0.5 * (e1 + e2)
     add_fx = L_shr( L_add( e1, e2 ), 1 );
     q_tmp1 = q_e;
@@ -4669,7 +4685,6 @@ static void formulate2x2MixingMatrix_fx(
     move32();
     pop_wmops();
 
-    push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q" );
     /* Matrix multiplication, tmp = Ky' * G_hat * Q */
     FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
     {
@@ -4695,14 +4710,11 @@ static void formulate2x2MixingMatrix_fx(
             move32();
         }
     }
-    pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q" );*/
 
     q_temp = sub( add( q_ky, q_GhatQ ), 31 );
 
-    push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q*Kx" );
     /* A = Ky' * G_hat * Q * Kx (see publication) */
     matrixMul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Kxre_fx, Kxim_fx, &q_Kx, Are_fx, Aim_fx, &q_A );
-    pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q*Kx" );*/
 
     push_wmops( "formulate2x2MixingMatrix nrst orthonrm PtoA (oPtoA)" );
     /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx
@@ -4980,7 +4992,7 @@ static void formulate2x2MixingMatrix_fx(
                          0 /*int Bscale*/,
 #endif
                          Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */
-    pop_wmops();                                 /*push_wmops( "formulate2x2MixingMatrix nrst orthonrm PtoA" );*/
+    pop_wmops();                                 /*push_wmops( "formulate2x2MixingMatrix nrst orthonrm PtoA (oPtoA)" );*/
 
     push_wmops( "formulate2x2MixingMatrix Ky P Kx^-1" );
     /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */
-- 
GitLab


From c6d0d7dab2acc799534d4a284cf603826d37460c Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Wed, 5 Mar 2025 15:35:49 +0100
Subject: [PATCH 16/41] apply clang format patch

---
 lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index cc6d977f0..c5b3500b2 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -3213,13 +3213,13 @@ static void eig2x2_fx(
     pm_fx = 0.5f * sqrtf(max(0.0f, a_fx))
     add_fx = 0.5f * (e1 + e2)*/
 
-    #ifdef FIX_1326_SPEEDUP_14
+#ifdef FIX_1326_SPEEDUP_14
     static int tstcnt = 0;
-    #endif
+#endif
     IF( L_and( c_re == 0, c_im == 0 ) )
     {
 #ifdef FIX_1326_SPEEDUP_14
-        tstcnt ++;
+        tstcnt++;
 #endif
         /* if c_re = 0 and c_im = 0, then crossSquare_fx = (c_re * c_re) + (c_im * c_im) = 0
         a_fx = (E1 - E2)^2
@@ -3301,8 +3301,8 @@ static void eig2x2_fx(
         }
     }
 #ifdef FIX_1326_SPEEDUP_14
-    if (tstcnt>10000)
-      assert(0);
+    if ( tstcnt > 10000 )
+        assert( 0 );
 #endif
     // add_fx = 0.5 * (e1 + e2)
     add_fx = L_shr( L_add( e1, e2 ), 1 );
-- 
GitLab


From 2986c80dd253c39ee60868f08bd0e89d2b64a683 Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Fri, 7 Mar 2025 09:52:48 +0100
Subject: [PATCH 17/41] deactivate SPeedup 14, activate Speedup 13 for testing

---
 .../ivas_dirac_dec_binaural_functions_fx.c    | 75 +++++++++++++++----
 1 file changed, 61 insertions(+), 14 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index c5b3500b2..ad9769583 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -49,7 +49,6 @@
 // NULL: 179.292
 
 
-//#define FIX_1326_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams     //no occurence        --> DONT USE
 //#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx  //  .4 WMOPS                    --> USE
 //#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx                //  .3 WMOPS                    --> USE
 //#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx                //  .1 WMOPS                    --> USE
@@ -58,12 +57,18 @@
 //#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt                // 3.0 WMOPS //Quite bad diffs  --> DONT USE
 //#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt                //  0  WMOPS                    --> DONT USE
 //#define FIX_1326_SPEEDUP_08 // "-"                              // 3.0 WMOPS //small diffs      --> USE
-//#define FIX_1326_SPEEDUP_09 // tiny speedup                     //  .1 WMOPS                      --> USE? (pipe 48851 fails --> DONTUSEYET)
-//#define FIX_1326_SPEEDUP_10 // tiny speedup                    //   .1 WMOPS                      --> USE? (pipe 48851 fails --> DONTUSEYET)
-//#define FIX_1326_SPEEDUP_11  // tiny speedup                    //  .1 WMOPS                      --> USE? (pipe 48851 fails --> DONTUSEYET)
-//#define FIX_1326_SPEEDUP_12 // tiny speedup                     //  <.1 WMOPS                   -->DONTUSE
-//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt          //   2.9 WMOPS                  -->USE? (pipe coming)
-#define FIX_1326_SPEEDUP_14 // test
+//#define FIX_1326_SPEEDUP_09 // tiny speedup                     //  .1 WMOPS   pipe 48851 fails --> DONTUSEYET
+//#define FIX_1326_SPEEDUP_10 // tiny speedup                    //   .1 WMOPS   pipe 48851 fails --> DONTUSEYET
+//#define FIX_1326_SPEEDUP_11  // tiny speedup                    //  .1 WMOPS   pipe 48851 fails --> DONTUSEYET
+//#define FIX_1326_SPEEDUP_12 // tiny speedup                     //  <.1 WMOPS                   --> DONTUSE
+//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt          //   2.9 WMOPS                  --> USE? (pipe tbd)
+//#define FIX_1326_SPEEDUP_14 // test wether any of these paths is realy necessary, then assert   --> DONTUSE (pipes red, asserts!)
+//#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd          // .1 WMOPS                   --> USE? (pipe tbd)
+//#define FIX_1326_SPEEDUP_16 // tiny speedup like 04              // .18 WMOPS                     --> USE? (pipe tbd)
+
+
+
+
 Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
 
 /*-------------------------------------------------------------------------
@@ -943,9 +948,9 @@ static void ivas_dirac_dec_binaural_internal_fx(
     }
 
     test();
-    push_wmops( "IDRBI cov matrices" );
+    push_wmops( "IDRBI cov matrices (IDRBCM)" );
     ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, Rmat_fx, subframe, hCombinedOrientationData && hCombinedOrientationData->enableCombinedOrientation[hCombinedOrientationData->subframe_idx] > 0, st_ivas->hMasaIsmData, q_inp );
-    pop_wmops(); /*push_wmops( "IDRBI cov matrices" );*/
+    pop_wmops(); /*push_wmops( "IDRBI cov matrices (IDRBCM)" );*/
 
     IF( EQ_32( config_data.ivas_format, ISM_FORMAT ) )
     {
@@ -1168,7 +1173,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
 
     nBins = hSpatParamRendCom->num_freq_bands; /* Actually bins */
     move16();
-
+    push_wmops( "IDRBCM inits" );
     q_earlyPartEneCorrection = s_min( Q31, add( getScaleFactor32( hDiracDecBin->earlyPartEneCorrection_fx, nBins ), hDiracDecBin->q_earlyPartEneCorrection ) );
     scale_sig32( hDiracDecBin->earlyPartEneCorrection_fx, nBins, sub( q_earlyPartEneCorrection, hDiracDecBin->q_earlyPartEneCorrection ) );
     hDiracDecBin->q_earlyPartEneCorrection = q_earlyPartEneCorrection;
@@ -1202,6 +1207,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
         gainCache[idx].azi = -1000; /* Use -1000 as value for uninitialized cache. */
         move16();
     }
+    pop_wmops(); /*push_wmops( "IDRBCM inits" );*/
 
     /* Determine EQ for low bit rates (13.2 and 16.4 kbps) */
     applyLowBitRateEQ = 0;
@@ -1214,11 +1220,13 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
         move16();
         IF( EQ_32( ivas_total_brate, IVAS_16k4 ) )
         {
+            push_wmops( "IDRBCM Determine EQ_low_rates" );
             FOR( bin = 0; bin < LOW_BIT_RATE_BINAURAL_EQ_BINS; bin++ )
             {
                 lowBitRateEQ_fx[bin + LOW_BIT_RATE_BINAURAL_EQ_OFFSET] = L_add( L_shr( lowBitRateBinauralEQ_fx[bin], 1 ), ONE_IN_Q30 ); // Q31
                 move32();
             }
+            pop_wmops(); /*push_wmops( "IDRBCM Determine EQ_low_rates" );*/
         }
         ELSE
         {
@@ -1237,6 +1245,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
 
     exp = sub( 63, shl( q, 1 ) ); // exp for the energy (inRe_fx * inRe_fx + inIm_fx * inIm_fx) computed below
 
+    push_wmops( "IDRBCM input Matrix" );
     /* Calculate input covariance matrix */
     FOR( slot = 0; slot < hSpatParamRendCom->subframe_nbslots[subframe]; slot++ )
     {
@@ -1271,7 +1280,9 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
             move32();
         }
     }
+    pop_wmops(); /*push_wmops( "IDRBCM input Matrix" );*/
 
+    push_wmops( "IDRBCM apply EQ_low" );
     /* Apply EQ at low bit rates */
     IF( applyLowBitRateEQ != 0 )
     {
@@ -1324,7 +1335,9 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
             }
         }
     }
+    pop_wmops(); /*push_wmops( "IDRBCM apply EQ_low" );*/
 
+    push_wmops( "IDRBCM target matrix" );
     /* Determine target covariance matrix containing target binaural properties */
     FOR( bin = 0; bin < nBins; bin++ )
     {
@@ -1484,12 +1497,14 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
                 Word32 hrtfEneCenter_fx, hrtfEneSides_fx, hrtfEneRealized_fx;
                 Word16 eneCorrectionFactor_fx, eneCorrectionFactor_e;
                 Word16 w1_fx, w2_fx, w3_fx, eq_fx;
-
+#ifdef FIX_1326_SPEEDUP_15
+                hrtfEneCenter_fx = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( rRealp_fx, rRealp_fx ), rImagp_fx, rImagp_fx ), lImagp_fx, lImagp_fx ), lRealp_fx, lRealp_fx ); //Q25
+#else
                 hrtfEneCenter_fx = L_add( Mpy_32_32( lRealp_fx, lRealp_fx ),                     // Q25
                                           L_add( Mpy_32_32( lImagp_fx, lImagp_fx ),              // Q25
                                                  L_add( Mpy_32_32( rRealp_fx, rRealp_fx ),       // Q25
                                                         Mpy_32_32( rImagp_fx, rImagp_fx ) ) ) ); // Q25
-
+#endif
                 /* Spread coherence is synthesized as coherent sources at 30 degree horizontal spacing.
                  * The following formulas determine the gains for these sources.
                  * spreadCoh = 0: Only panning
@@ -1518,11 +1533,14 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
 
                 /* Apply the gain for the left source of the three coherent sources */
                 getDirectPartGains_fx( bin, add( aziDeg, 30 ), eleDeg, &lRealpTmp_fx, &lImagpTmp_fx, &rRealpTmp_fx, &rImagpTmp_fx, hDiracDecBin->renderStereoOutputInsteadOfBinaural, Rmat_fx, &gainCache[gainCacheBaseIndex + 1], isHeadtracked );
-
+#ifdef FIX_1326_SPEEDUP_15
+                hrtfEneSides_fx = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( rRealpTmp_fx, rRealpTmp_fx ), rImagpTmp_fx, rImagpTmp_fx ), lImagpTmp_fx, lImagpTmp_fx ), lRealpTmp_fx, lRealpTmp_fx ); // Q25
+#else
                 hrtfEneSides_fx = L_add( Mpy_32_32( lRealpTmp_fx, lRealpTmp_fx ),                     // Q25
                                          L_add( Mpy_32_32( lImagpTmp_fx, lImagpTmp_fx ),              // Q25
                                                 L_add( Mpy_32_32( rRealpTmp_fx, rRealpTmp_fx ),       // Q25
                                                        Mpy_32_32( rImagpTmp_fx, rImagpTmp_fx ) ) ) ); // Q25
+#endif
                 lRealp_fx = L_add( lRealp_fx, Mpy_32_32( sidesMul_fx, lRealpTmp_fx ) );               // Q25
                 lImagp_fx = L_add( lImagp_fx, Mpy_32_32( sidesMul_fx, lImagpTmp_fx ) );               // Q25
                 rRealp_fx = L_add( rRealp_fx, Mpy_32_32( sidesMul_fx, rRealpTmp_fx ) );               // Q25
@@ -1610,12 +1628,21 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
                 move16();
             }
 
+#ifdef FIX_1326_SPEEDUP_15
+            hrtfEne_fx[0] = Madd_32_32( Mpy_32_32( lRealp_fx, lRealp_fx ), lImagp_fx, lImagp_fx ); // Q( 2*q_lr - 31 )
+            hrtfEne_fx[1] = Madd_32_32( Mpy_32_32( rRealp_fx, rRealp_fx ), rImagp_fx, rImagp_fx ); // Q( 2*q_lr - 31 )
+            move32();
+            move32();
+            hrtfCrossRe_fx = Madd_32_32( Mpy_32_32( lRealp_fx, rRealp_fx ), lImagp_fx, rImagp_fx );  // Q( 2*q_lr - 31 )
+            hrtfCrossIm_fx = Madd_32_32( Mpy_32_32( -lImagp_fx, rRealp_fx ), lRealp_fx, rImagp_fx ); // Q( 2*q_lr - 31 )
+#else
             hrtfEne_fx[0] = L_add( Mpy_32_32( lRealp_fx, lRealp_fx ), Mpy_32_32( lImagp_fx, lImagp_fx ) ); // Q( 2*q_lr - 31 )
             hrtfEne_fx[1] = L_add( Mpy_32_32( rRealp_fx, rRealp_fx ), Mpy_32_32( rImagp_fx, rImagp_fx ) ); // Q( 2*q_lr - 31 )
             move32();
             move32();
             hrtfCrossRe_fx = L_add( Mpy_32_32( lRealp_fx, rRealp_fx ), Mpy_32_32( lImagp_fx, rImagp_fx ) );  // Q( 2*q_lr - 31 )
             hrtfCrossIm_fx = L_add( Mpy_32_32( -lImagp_fx, rRealp_fx ), Mpy_32_32( lRealp_fx, rImagp_fx ) ); // Q( 2*q_lr - 31 )
+#endif
 
             /* Add direct part (1 or 2) covariance matrix */
             dirEne_fx = Mpy_32_32( ratio_fx, meanEnePerCh_fx ); // Q(q_meanEnePerCh - 1)
@@ -1690,7 +1717,11 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
             }
             ELSE
             {
+#ifdef FIX_1326_SPEEDUP_15
+                hDiracDecBin->ChCrossReOut_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossReOut_fx[bin], hDiracDecBin->ChCrossReOut_e[bin], Mpy_32_32( Madd_32_16( L_shl( surCoh_fx, 16 ), hDiracDecBin->diffuseFieldCoherence_fx[bin], sub( 32767, surCoh_fx ) ), diffEne_fx ), sub( 31, q_diffEne ), &hDiracDecBin->ChCrossReOut_e[bin] );
+#else
                 hDiracDecBin->ChCrossReOut_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossReOut_fx[bin], hDiracDecBin->ChCrossReOut_e[bin], Mpy_32_32( L_add( Mpy_32_16_1( hDiracDecBin->diffuseFieldCoherence_fx[bin], sub( 32767, surCoh_fx ) ), L_shl( surCoh_fx, 16 ) ), diffEne_fx ), sub( 31, q_diffEne ), &hDiracDecBin->ChCrossReOut_e[bin] );
+#endif
             }
             move32();
         }
@@ -1706,6 +1737,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
         hDiracDecBin->frameMeanDiffuseness_fx[bin] = L_shl( frameMeanDiffuseness, sub( exp, 2 ) ); // Q29
         move32();
     }
+    pop_wmops();/*push_wmops( "IDRBCM target matrix" );*/
 
     test();
     /* Temporal IIR-type smoothing of covariance matrices. Also apply encoding quality based smoothing factor. */
@@ -3435,10 +3467,25 @@ static void eig2x2_fx(
             tmp2 = Mpy_32_32( s_fx, s_fx );
             q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 );
 
+
+#ifdef FIX_1326_SPEEDUP_16
+
+            {
+                Word16 tmp2_exp;
+                Word32 eps_tmp;
+                tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &tmp2_exp );
+
+                //Add epsilon if relevant
+                eps_tmp = L_shl_sat( epsilon_mant, sub(epsilon_exp, tmp2_exp ));
+                tmp3 = L_add( L_shr( tmp2, 1 ), L_shr( eps_tmp, 1 ) );
+
+                exp_tmp3 = add( tmp2_exp, 1 );
+            } 
+#else
             tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 );
             q_tmp2 = sub( 31, q_tmp2 );
-
             tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 );
+#endif
 
 #if 1
             tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp );
-- 
GitLab


From fb3e05c804ec9194ab97c64ce3769c51424ae600 Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Fri, 7 Mar 2025 09:54:39 +0100
Subject: [PATCH 18/41] deactivate SPeedup 14, activate Speedup 13 for testing

---
 lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index ad9769583..620b4e323 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -61,7 +61,7 @@
 //#define FIX_1326_SPEEDUP_10 // tiny speedup                    //   .1 WMOPS   pipe 48851 fails --> DONTUSEYET
 //#define FIX_1326_SPEEDUP_11  // tiny speedup                    //  .1 WMOPS   pipe 48851 fails --> DONTUSEYET
 //#define FIX_1326_SPEEDUP_12 // tiny speedup                     //  <.1 WMOPS                   --> DONTUSE
-//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt          //   2.9 WMOPS                  --> USE? (pipe tbd)
+#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt          //   2.9 WMOPS                  --> USE? (pipe tbd)
 //#define FIX_1326_SPEEDUP_14 // test wether any of these paths is realy necessary, then assert   --> DONTUSE (pipes red, asserts!)
 //#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd          // .1 WMOPS                   --> USE? (pipe tbd)
 //#define FIX_1326_SPEEDUP_16 // tiny speedup like 04              // .18 WMOPS                     --> USE? (pipe tbd)
-- 
GitLab


From 52740b20377af49c2a78be634f9adf9112b934fa Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Fri, 7 Mar 2025 10:09:06 +0100
Subject: [PATCH 19/41] apply clang format patch

---
 .../ivas_dirac_dec_binaural_functions_fx.c    | 36 +++++++++----------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 620b4e323..7721c3eaa 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -67,8 +67,6 @@
 //#define FIX_1326_SPEEDUP_16 // tiny speedup like 04              // .18 WMOPS                     --> USE? (pipe tbd)
 
 
-
-
 Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
 
 /*-------------------------------------------------------------------------
@@ -1498,12 +1496,12 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
                 Word16 eneCorrectionFactor_fx, eneCorrectionFactor_e;
                 Word16 w1_fx, w2_fx, w3_fx, eq_fx;
 #ifdef FIX_1326_SPEEDUP_15
-                hrtfEneCenter_fx = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( rRealp_fx, rRealp_fx ), rImagp_fx, rImagp_fx ), lImagp_fx, lImagp_fx ), lRealp_fx, lRealp_fx ); //Q25
+                hrtfEneCenter_fx = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( rRealp_fx, rRealp_fx ), rImagp_fx, rImagp_fx ), lImagp_fx, lImagp_fx ), lRealp_fx, lRealp_fx ); // Q25
 #else
-                hrtfEneCenter_fx = L_add( Mpy_32_32( lRealp_fx, lRealp_fx ),                     // Q25
-                                          L_add( Mpy_32_32( lImagp_fx, lImagp_fx ),              // Q25
-                                                 L_add( Mpy_32_32( rRealp_fx, rRealp_fx ),       // Q25
-                                                        Mpy_32_32( rImagp_fx, rImagp_fx ) ) ) ); // Q25
+                hrtfEneCenter_fx = L_add( Mpy_32_32( lRealp_fx, lRealp_fx ),                               // Q25
+                                          L_add( Mpy_32_32( lImagp_fx, lImagp_fx ),                        // Q25
+                                                 L_add( Mpy_32_32( rRealp_fx, rRealp_fx ),                 // Q25
+                                                        Mpy_32_32( rImagp_fx, rImagp_fx ) ) ) );           // Q25
 #endif
                 /* Spread coherence is synthesized as coherent sources at 30 degree horizontal spacing.
                  * The following formulas determine the gains for these sources.
@@ -1536,15 +1534,15 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
 #ifdef FIX_1326_SPEEDUP_15
                 hrtfEneSides_fx = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( rRealpTmp_fx, rRealpTmp_fx ), rImagpTmp_fx, rImagpTmp_fx ), lImagpTmp_fx, lImagpTmp_fx ), lRealpTmp_fx, lRealpTmp_fx ); // Q25
 #else
-                hrtfEneSides_fx = L_add( Mpy_32_32( lRealpTmp_fx, lRealpTmp_fx ),                     // Q25
-                                         L_add( Mpy_32_32( lImagpTmp_fx, lImagpTmp_fx ),              // Q25
-                                                L_add( Mpy_32_32( rRealpTmp_fx, rRealpTmp_fx ),       // Q25
-                                                       Mpy_32_32( rImagpTmp_fx, rImagpTmp_fx ) ) ) ); // Q25
+                hrtfEneSides_fx = L_add( Mpy_32_32( lRealpTmp_fx, lRealpTmp_fx ),                          // Q25
+                                         L_add( Mpy_32_32( lImagpTmp_fx, lImagpTmp_fx ),                   // Q25
+                                                L_add( Mpy_32_32( rRealpTmp_fx, rRealpTmp_fx ),            // Q25
+                                                       Mpy_32_32( rImagpTmp_fx, rImagpTmp_fx ) ) ) );      // Q25
 #endif
-                lRealp_fx = L_add( lRealp_fx, Mpy_32_32( sidesMul_fx, lRealpTmp_fx ) );               // Q25
-                lImagp_fx = L_add( lImagp_fx, Mpy_32_32( sidesMul_fx, lImagpTmp_fx ) );               // Q25
-                rRealp_fx = L_add( rRealp_fx, Mpy_32_32( sidesMul_fx, rRealpTmp_fx ) );               // Q25
-                rImagp_fx = L_add( rImagp_fx, Mpy_32_32( sidesMul_fx, rImagpTmp_fx ) );               // Q25
+                lRealp_fx = L_add( lRealp_fx, Mpy_32_32( sidesMul_fx, lRealpTmp_fx ) ); // Q25
+                lImagp_fx = L_add( lImagp_fx, Mpy_32_32( sidesMul_fx, lImagpTmp_fx ) ); // Q25
+                rRealp_fx = L_add( rRealp_fx, Mpy_32_32( sidesMul_fx, rRealpTmp_fx ) ); // Q25
+                rImagp_fx = L_add( rImagp_fx, Mpy_32_32( sidesMul_fx, rImagpTmp_fx ) ); // Q25
 
                 /* Apply the gain for the right source of the three coherent sources.
                  * -30 degrees to 330 wrapping due to internal functions. */
@@ -1737,7 +1735,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
         hDiracDecBin->frameMeanDiffuseness_fx[bin] = L_shl( frameMeanDiffuseness, sub( exp, 2 ) ); // Q29
         move32();
     }
-    pop_wmops();/*push_wmops( "IDRBCM target matrix" );*/
+    pop_wmops(); /*push_wmops( "IDRBCM target matrix" );*/
 
     test();
     /* Temporal IIR-type smoothing of covariance matrices. Also apply encoding quality based smoothing factor. */
@@ -3475,12 +3473,12 @@ static void eig2x2_fx(
                 Word32 eps_tmp;
                 tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &tmp2_exp );
 
-                //Add epsilon if relevant
-                eps_tmp = L_shl_sat( epsilon_mant, sub(epsilon_exp, tmp2_exp ));
+                // Add epsilon if relevant
+                eps_tmp = L_shl_sat( epsilon_mant, sub( epsilon_exp, tmp2_exp ) );
                 tmp3 = L_add( L_shr( tmp2, 1 ), L_shr( eps_tmp, 1 ) );
 
                 exp_tmp3 = add( tmp2_exp, 1 );
-            } 
+            }
 #else
             tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 );
             q_tmp2 = sub( 31, q_tmp2 );
-- 
GitLab


From deae6b08447d0260424ce2f4b4d8455a94172ed5 Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Fri, 7 Mar 2025 10:15:25 +0100
Subject: [PATCH 20/41] fix build warning

---
 lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 7721c3eaa..4db8980e1 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -1916,7 +1916,11 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
         move16();
         Word32 tmp1, tmp2, res1, res2;
         Word16 q_tmp1, q_tmp2, q_realizedOutputEne, q_targetOutputEne, q_missingOutputEne, q_gain;
+#ifdef FIX_1326_SPEEDUP_13
+        Word16 exp1, q_processMtx_bin, q_processMtxDec_bin;
+#else
         Word16 exp1, exp2, q_processMtx_bin, q_processMtxDec_bin;
+#endif
 
         CrEneL_fx = 0;
         move32();
-- 
GitLab


From 2248f4d1a256b06ffc8e850d8f3c368a1be827e7 Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Fri, 7 Mar 2025 11:08:02 +0100
Subject: [PATCH 21/41] ctivated speedup 15, 16 to test

---
 .../ivas_dirac_dec_binaural_functions_fx.c    | 118 +-----------------
 1 file changed, 6 insertions(+), 112 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 4db8980e1..45e1d2bed 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -53,18 +53,14 @@
 //#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx                //  .3 WMOPS                    --> USE
 //#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx                //  .1 WMOPS                    --> USE
 //#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx                //  .2 WMOPS                    --> USE
-//#define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt                // 3.5 WMOPS //Quite bad diffs  --> DONT USE
-//#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt                // 3.0 WMOPS //Quite bad diffs  --> DONT USE
-//#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt                //  0  WMOPS                    --> DONT USE
 //#define FIX_1326_SPEEDUP_08 // "-"                              // 3.0 WMOPS //small diffs      --> USE
 //#define FIX_1326_SPEEDUP_09 // tiny speedup                     //  .1 WMOPS   pipe 48851 fails --> DONTUSEYET
 //#define FIX_1326_SPEEDUP_10 // tiny speedup                    //   .1 WMOPS   pipe 48851 fails --> DONTUSEYET
 //#define FIX_1326_SPEEDUP_11  // tiny speedup                    //  .1 WMOPS   pipe 48851 fails --> DONTUSEYET
-//#define FIX_1326_SPEEDUP_12 // tiny speedup                     //  <.1 WMOPS                   --> DONTUSE
-#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt          //   2.9 WMOPS                  --> USE? (pipe tbd)
-//#define FIX_1326_SPEEDUP_14 // test wether any of these paths is realy necessary, then assert   --> DONTUSE (pipes red, asserts!)
-//#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd          // .1 WMOPS                   --> USE? (pipe tbd)
-//#define FIX_1326_SPEEDUP_16 // tiny speedup like 04              // .18 WMOPS                     --> USE? (pipe tbd)
+//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt           //   2.9 WMOPS                  --> USE
+
+#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd          // .1 WMOPS                   --> USE? (pipe tbd)
+#define FIX_1326_SPEEDUP_16 // tiny speedup like 04              // .18 WMOPS                     --> USE? (pipe tbd)
 
 
 Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
@@ -3246,15 +3242,8 @@ static void eig2x2_fx(
     a_fx = (e1 + e2) * (e1 + e2) - 4.0f * ((e1 * e2) - crossSquare_fx) = (e1 - e2)^2 + 4 * crossSquare_fx
     pm_fx = 0.5f * sqrtf(max(0.0f, a_fx))
     add_fx = 0.5f * (e1 + e2)*/
-
-#ifdef FIX_1326_SPEEDUP_14
-    static int tstcnt = 0;
-#endif
     IF( L_and( c_re == 0, c_im == 0 ) )
     {
-#ifdef FIX_1326_SPEEDUP_14
-        tstcnt++;
-#endif
         /* if c_re = 0 and c_im = 0, then crossSquare_fx = (c_re * c_re) + (c_im * c_im) = 0
         a_fx = (E1 - E2)^2
         pm_fx = 0.5 * sqrt(max(0, a_fx)) = 0.5 * max(0, (e1 - e2)) */
@@ -3272,9 +3261,6 @@ static void eig2x2_fx(
         q_crossSquare = sub( add( q_c, q_c ), 31 );
         IF( EQ_32( e1, e2 ) )
         {
-#ifdef FIX_1326_SPEEDUP_14
-            tstcnt++;
-#endif
             /* if e1 - e2 = 0, then a_fx = 4 * crossSquare_fx
             pm_fx = 0.5 * sqrt(max(0, 4 * crossSquare_fx)) =  sqrt(0, crossSquare_fx)*/
             test();
@@ -3308,9 +3294,6 @@ static void eig2x2_fx(
 
             IF( GT_16( sub( q_c, q_e ), Q15 ) )
             {
-#ifdef FIX_1326_SPEEDUP_14
-                tstcnt++;
-#endif
                 pm_fx = L_shr( L_max( 0, L_abs( L_sub( e1, e2 ) ) ), 1 );
                 q_tmp2 = q_e;
                 move16();
@@ -3334,10 +3317,6 @@ static void eig2x2_fx(
             }
         }
     }
-#ifdef FIX_1326_SPEEDUP_14
-    if ( tstcnt > 10000 )
-        assert( 0 );
-#endif
     // add_fx = 0.5 * (e1 + e2)
     add_fx = L_shr( L_add( e1, e2 ), 1 );
     q_tmp1 = q_e;
@@ -4611,33 +4590,6 @@ static void formulate2x2MixingMatrix_fx(
 #endif
     }
     ELSE
-#ifdef FIX_1326_SPEEDUP_05
-    {
-        Word16 shift = norm_l( temp );
-#if 1 // oldcode
-        temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
-#else
-
-        temp = L_add( L_shl( temp, sub( shift, 1 ) ), L_shl_sat( EPSILON_MANT, sub( sub( EPSILON_EXP, shift ), 1 ) ) );
-        exp_temp = sub( 30, q_ein );
-        if ( temp == 0 )
-        {
-            exp_temp = EPSILON_EXP;
-            move32();
-        }
-        if ( temp == 0 )
-        {
-            temp = EPSILON_MANT;
-            move32();
-        }
-#endif
-        temp = ISqrt32( temp, &exp_temp );
-        shift = sub( 31, q_eout );
-        Ghat_fx[0] = Mpy_32_32( Sqrt32( E_out1, &shift ), temp );
-        move32();
-        exp = add( shift, exp_temp );
-    }
-#else
     {
         temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
         push_wmops( "formulate2x2MixingMatrix Division" );
@@ -4648,7 +4600,7 @@ static void formulate2x2MixingMatrix_fx(
         Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
 #endif
     }
-#endif
+
 #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
     Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
 #endif
@@ -4677,39 +4629,6 @@ static void formulate2x2MixingMatrix_fx(
 #endif
     }
     ELSE
-#ifdef FIX_1326_SPEEDUP_06
-    {
-        Word16 shift = norm_l( temp );
-#if 0 // oldcode
-        temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
-#else
-        temp = L_add( L_shl( temp, sub( shift, 1 ) ), L_shl_sat( EPSILON_MANT, sub( sub( EPSILON_EXP, shift ), 1 ) ) );
-        exp_temp = sub( 31 - 1, q_ein );
-        if ( temp == 0 )
-        {
-            exp_temp = add( 0, EPSILON_EXP );
-        }
-        if ( temp == 0 )
-        {
-            temp = L_add( 0, EPSILON_MANT );
-        }
-#endif
-#if 1 // oldcode - new code introduces too much noise
-        push_wmops( "formulate2x2MixingMatrix Division" );
-        temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 );
-        pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
-        exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) );
-#ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
-        Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
-#endif
-#else
-        temp = ISqrt32( temp, &exp_temp );
-        shift = sub( 31, q_eout );
-        Ghat_fx[1] = Mpy_32_32( temp, Sqrt32( E_out2, &shift ) );
-        exp1 = add( shift, exp_temp );
-#endif
-    }
-#else
     {
         temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
         push_wmops( "formulate2x2MixingMatrix Division" );
@@ -4720,7 +4639,7 @@ static void formulate2x2MixingMatrix_fx(
         Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
 #endif
     }
-#endif
+
 #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
     Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
 #endif
@@ -4845,30 +4764,6 @@ static void formulate2x2MixingMatrix_fx(
 #endif
     pop_wmops(); /*push_wmops( "oPtoA MT1M" );*/
 
-#ifdef FIX_1326_SPEEDUP_07
-    IF( D_fx[0] == 0 )
-    {
-        // temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */
-        // exp = ONE_DIV_EPSILON_EXP;
-        div_fx[0] = L_add( 0, 2047986068 ); // Sqrt32( temp, &exp ); // Q = 31 - exp
-        exp = add( 0, 20 );
-    }
-    ELSE
-    {
-#if 1 // old code
-        push_wmops( "formulate2x2MixingMatrix Division" );
-        temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp );
-        exp = sub( exp, sub( Q30, q_D ) );
-        pop_wmops();                      /*push_wmops( "formulate2x2MixingMatrix Division" )*/
-        div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
-        move32();
-#else
-        exp = sub( 31, q_D );
-        div_fx[0] = ISqrt32_2( D_fx[0], &exp );
-        move32();
-#endif
-    }
-#else
     IF( D_fx[0] == 0 )
     {
 #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
@@ -4888,7 +4783,6 @@ static void formulate2x2MixingMatrix_fx(
     }
     div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
     move32();
-#endif
 
 #ifdef FIX_1326_SPEEDUP_08
     // This is just a shortcut to already existing optimizations  (FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC) - but makes everything even faster
-- 
GitLab


From 7263a7eebb093028a1980e52b0b2db57ea6182fb Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Wed, 19 Mar 2025 08:26:44 +0100
Subject: [PATCH 22/41] add SPEEDUP 17, 18, inactive

---
 .../ivas_dirac_dec_binaural_functions_fx.c    | 73 +++++++++++++++----
 1 file changed, 59 insertions(+), 14 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 45e1d2bed..3a10590f6 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -49,19 +49,20 @@
 // NULL: 179.292
 
 
-//#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx  //  .4 WMOPS                    --> USE
-//#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx                //  .3 WMOPS                    --> USE
-//#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx                //  .1 WMOPS                    --> USE
-//#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx                //  .2 WMOPS                    --> USE
-//#define FIX_1326_SPEEDUP_08 // "-"                              // 3.0 WMOPS //small diffs      --> USE
-//#define FIX_1326_SPEEDUP_09 // tiny speedup                     //  .1 WMOPS   pipe 48851 fails --> DONTUSEYET
-//#define FIX_1326_SPEEDUP_10 // tiny speedup                    //   .1 WMOPS   pipe 48851 fails --> DONTUSEYET
-//#define FIX_1326_SPEEDUP_11  // tiny speedup                    //  .1 WMOPS   pipe 48851 fails --> DONTUSEYET
-//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt           //   2.9 WMOPS                  --> USE
-
-#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd          // .1 WMOPS                   --> USE? (pipe tbd)
-#define FIX_1326_SPEEDUP_16 // tiny speedup like 04              // .18 WMOPS                     --> USE? (pipe tbd)
-
+//#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx  //  .4 WMOPS                      --> USE
+//#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx                //  .3 WMOPS                      --> USE
+//#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx                //  .1 WMOPS                      --> USE
+//#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx                //  .2 WMOPS                      --> USE
+//#define FIX_1326_SPEEDUP_08 // "-"                              // 3.0 WMOPS //small diffs        --> USE
+//#define FIX_1326_SPEEDUP_09 // tiny speedup                     //  .1 WMOPS   pipe 48851 fails   --> DONTUSEYET
+//#define FIX_1326_SPEEDUP_10 // tiny speedup                     //  .1 WMOPS   pipe 48851 fails   --> DONTUSEYET
+//#define FIX_1326_SPEEDUP_11 // tiny speedup                     //  .1 WMOPS   pipe 48851 fails   --> DONTUSEYET
+//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt          // 2.9 WMOPS                      --> USE
+
+//#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd        //  .1 WMOPS                      --> USE
+//#define FIX_1326_SPEEDUP_16 // tiny speedup like 04             //  .2 WMOPS                      --> USE
+//#define FIX_1326_SPEEDUP_17 // use 1/x                            //  1  WMOPS                      --> USE?
+//#define FIX_1326_SPEEDUP_18 // structural speedup                   // 1 WMOPS                      --> USE?
 
 Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
 
@@ -3343,6 +3344,7 @@ static void eig2x2_fx(
 
     /* Numeric case, when input is practically zeros */
     // IF( D_fx[0] < EPSILON_FX )
+
 #ifdef FIX_1326_SPEEDUP_02
     IF( LT_32( L_shl_sat( D_fx[0], sub( sub( 31, *q_D ), EPSILON_EXP ) ), EPSILON_MANT ) )
     {
@@ -3370,7 +3372,7 @@ static void eig2x2_fx(
 #endif
 
     /* Numeric case, when input is near an identity matrix with a gain */
-#ifdef FIX_1326_SPEEDUP_03               // 178.932
+#ifdef FIX_1326_SPEEDUP_03 
     tmp1 = Mpy_32_32( 2147484, add_fx ); // 2147484 = 1e-3f in Q31
 
     IF( LT_32( pm_fx, L_shl_sat( tmp1, sub( q_tmp1, q_tmp2 ) ) ) )
@@ -3469,7 +3471,11 @@ static void eig2x2_fx(
 #endif
 
 #if 1
+#ifdef FIX_1326_SPEEDUP_17
+            tmp2 = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, tmp3, &exp );
+#else
             tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp );
+#endif
             exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) );
             normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2
             q_tmp2 = sub( 31, exp );
@@ -3557,7 +3563,11 @@ static void eig2x2_fx(
 #endif
 
 #if 1
+#ifdef FIX_1326_SPEEDUP_17
+            tmp2 = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, tmp3, &exp );
+#else
             tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp );
+#endif
             exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) );
             normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2
             q_tmp2 = sub( 31, exp );
@@ -3619,7 +3629,19 @@ static void eig2x2_fx(
             move16();
         }
     }
+#ifdef FIX_1326_SPEEDUP_18
+    if( q_U_1 != 0 )
+    {
+        *q_U = q_U_1;
+        move16();
+    }
 
+    if (q_U_1 == 0)
+    {
+        *q_U = q_U_2;
+        move16();
+    }
+#else
     IF( q_U_1 != 0 )
     {
         *q_U = q_U_1;
@@ -3629,6 +3651,7 @@ static void eig2x2_fx(
         *q_U = q_U_2;
     }
     move16();
+#endif
 
     return;
 }
@@ -4508,7 +4531,11 @@ static void formulate2x2MixingMatrix_fx(
     ELSE
     {
         push_wmops( "formulate2x2MixingMatrix Division" );
+#ifdef FIX_1326_SPEEDUP_17
+        maxEneDiv_fx = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, maxEne_fx, &exp );
+#else
         maxEneDiv_fx = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxEne_fx, &exp );
+#endif
         pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
         q_maxEneDiv = add( sub( 31, exp ), sub( Q30, q_maxEne ) );
     }
@@ -4630,6 +4657,11 @@ static void formulate2x2MixingMatrix_fx(
     }
     ELSE
     {
+        if ( E_out2 == 0 )
+        {
+            static int a = 0;
+            a++;
+        }
         temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
         push_wmops( "formulate2x2MixingMatrix Division" );
         temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 );
@@ -4777,7 +4809,12 @@ static void formulate2x2MixingMatrix_fx(
     ELSE
     {
         push_wmops( "formulate2x2MixingMatrix Division" );
+#ifdef FIX_1326_SPEEDUP_17
+        temp = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, D_fx[0], &exp );
+#else
+      
         temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp );
+#endif
         exp = sub( exp, sub( Q30, q_D ) );
         pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
     }
@@ -4811,7 +4848,11 @@ static void formulate2x2MixingMatrix_fx(
     ELSE
     {
         push_wmops( "formulate2x2MixingMatrix Division" );
+#ifdef FIX_1326_SPEEDUP_17
+        temp = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, D_fx[1], &exp1 );
+#else
         temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[1], &exp1 );
+#endif
         pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
         exp1 = sub( exp1, sub( Q30, q_D ) );
     }
@@ -5021,7 +5062,11 @@ static void formulate2x2MixingMatrix_fx(
             Word16 Pre_shift, Pim_shift;
             temp = BASOP_Util_Add_Mant32Exp( Sx_fx[chB], sub( 31, q_Sx ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
             push_wmops( "formulate2x2MixingMatrix Division" );
+#ifdef FIX_1326_SPEEDUP_17
+            temp = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, temp, &exp );
+#else
             temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, temp, &exp );
+#endif
             pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
             q_temp = add( sub( sub( q_P, exp ), sub( 31, Q30 ) ), exp_temp );
 
-- 
GitLab


From f2a018b19a1fe121adfedb8c38abe8f8bf8c3604 Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Wed, 19 Mar 2025 08:41:59 +0100
Subject: [PATCH 23/41] add modified version of division '1/x'

---
 lib_com/basop_util.c | 35 +++++++++++++++++++++++++++++++++++
 lib_com/basop_util.h |  4 ++++
 2 files changed, 39 insertions(+)

diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c
index 609ca234d..0449ff125 100644
--- a/lib_com/basop_util.c
+++ b/lib_com/basop_util.c
@@ -1066,6 +1066,41 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
     return z;
 }
 
+/*1bit HR in x > 0*/
+Word32 BASOP_Util_Divide3232_Scale_cadence_1( Word32 x, Word32 y, Word16 *s )
+{
+    Word32 z;
+    //Word16 sx;
+    Word16 sy;
+    Word32 sign;
+
+    /* assert (x >= (Word32)0); */
+    assert( y != (Word32) 0 );
+
+    sign = 0;
+    move16();
+
+    IF( y < 0 )
+    {
+        y = L_negate( y );
+        sign = L_xor( sign, 1 );
+    }
+
+    sy = norm_l( y );
+    y = L_shl( y, sy );
+    move16();
+    *s = add( 0, sy );
+    move16();
+
+    z = div_w( x, y );
+
+    if ( sign != 0 )
+    {
+        z = L_negate( z );
+    }
+    return z;
+
+}
 Word16 BASOP_Util_Divide3232_Scale( Word32 x, Word32 y, Word16 *s )
 {
     Word16 z;
diff --git a/lib_com/basop_util.h b/lib_com/basop_util.h
index a6db7dc8d..697f0b9c6 100644
--- a/lib_com/basop_util.h
+++ b/lib_com/basop_util.h
@@ -332,6 +332,10 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x,    /*!< i  : Numerator*/
                                             Word32 y,    /*!< i  : Denominator*/
                                             Word16 *s ); /*!< o  : Additional scalefactor difference*/
 
+Word32 BASOP_Util_Divide3232_Scale_cadence_1( Word32 x,    /*!< i  : Numerator*/
+                                            Word32 y,    /*!< i  : Denominator*/
+
+                                            Word16 *s ); /*!< o  : Additional scalefactor difference*/
 
 /************************************************************************/
 /*!
-- 
GitLab


From bdab4c96801c4c19d454e6d280ccc0cbf73752b1 Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Wed, 19 Mar 2025 08:46:15 +0100
Subject: [PATCH 24/41] applied clang format patch

---
 lib_com/basop_util.c                            | 3 +--
 lib_com/basop_util.h                            | 6 +++---
 lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 8 ++++----
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c
index 0449ff125..5eee369f8 100644
--- a/lib_com/basop_util.c
+++ b/lib_com/basop_util.c
@@ -1070,7 +1070,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
 Word32 BASOP_Util_Divide3232_Scale_cadence_1( Word32 x, Word32 y, Word16 *s )
 {
     Word32 z;
-    //Word16 sx;
+    // Word16 sx;
     Word16 sy;
     Word32 sign;
 
@@ -1099,7 +1099,6 @@ Word32 BASOP_Util_Divide3232_Scale_cadence_1( Word32 x, Word32 y, Word16 *s )
         z = L_negate( z );
     }
     return z;
-
 }
 Word16 BASOP_Util_Divide3232_Scale( Word32 x, Word32 y, Word16 *s )
 {
diff --git a/lib_com/basop_util.h b/lib_com/basop_util.h
index 697f0b9c6..1ef2cd8e7 100644
--- a/lib_com/basop_util.h
+++ b/lib_com/basop_util.h
@@ -332,10 +332,10 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x,    /*!< i  : Numerator*/
                                             Word32 y,    /*!< i  : Denominator*/
                                             Word16 *s ); /*!< o  : Additional scalefactor difference*/
 
-Word32 BASOP_Util_Divide3232_Scale_cadence_1( Word32 x,    /*!< i  : Numerator*/
-                                            Word32 y,    /*!< i  : Denominator*/
+Word32 BASOP_Util_Divide3232_Scale_cadence_1( Word32 x, /*!< i  : Numerator*/
+                                              Word32 y, /*!< i  : Denominator*/
 
-                                            Word16 *s ); /*!< o  : Additional scalefactor difference*/
+                                              Word16 *s ); /*!< o  : Additional scalefactor difference*/
 
 /************************************************************************/
 /*!
diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 3a10590f6..399ac50ca 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -3372,7 +3372,7 @@ static void eig2x2_fx(
 #endif
 
     /* Numeric case, when input is near an identity matrix with a gain */
-#ifdef FIX_1326_SPEEDUP_03 
+#ifdef FIX_1326_SPEEDUP_03
     tmp1 = Mpy_32_32( 2147484, add_fx ); // 2147484 = 1e-3f in Q31
 
     IF( LT_32( pm_fx, L_shl_sat( tmp1, sub( q_tmp1, q_tmp2 ) ) ) )
@@ -3630,13 +3630,13 @@ static void eig2x2_fx(
         }
     }
 #ifdef FIX_1326_SPEEDUP_18
-    if( q_U_1 != 0 )
+    if ( q_U_1 != 0 )
     {
         *q_U = q_U_1;
         move16();
     }
 
-    if (q_U_1 == 0)
+    if ( q_U_1 == 0 )
     {
         *q_U = q_U_2;
         move16();
@@ -4812,7 +4812,7 @@ static void formulate2x2MixingMatrix_fx(
 #ifdef FIX_1326_SPEEDUP_17
         temp = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, D_fx[0], &exp );
 #else
-      
+
         temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp );
 #endif
         exp = sub( exp, sub( Q30, q_D ) );
-- 
GitLab


From 0fb2d30b8285abdbc76468fc43848939f863a03c Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Wed, 19 Mar 2025 08:47:08 +0100
Subject: [PATCH 25/41] activated SPEEDUP 17, 18, for test

---
 lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 399ac50ca..682a881cd 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -61,8 +61,8 @@
 
 //#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd        //  .1 WMOPS                      --> USE
 //#define FIX_1326_SPEEDUP_16 // tiny speedup like 04             //  .2 WMOPS                      --> USE
-//#define FIX_1326_SPEEDUP_17 // use 1/x                            //  1  WMOPS                      --> USE?
-//#define FIX_1326_SPEEDUP_18 // structural speedup                   // 1 WMOPS                      --> USE?
+#define FIX_1326_SPEEDUP_17 // use 1/x                            //  1  WMOPS                      --> USE?
+#define FIX_1326_SPEEDUP_18 // structural speedup                   // 1 WMOPS                      --> USE?
 
 Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
 
-- 
GitLab


From 73f2a5078c6fd02f318924c41ebb28d74ab3abf7 Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Wed, 19 Mar 2025 09:35:17 +0100
Subject: [PATCH 26/41] Activate all SPEEDUP macros available and change
 division 1/x a bit

---
 lib_com/basop_util.c                          |  8 +++--
 .../ivas_dirac_dec_binaural_functions_fx.c    | 35 ++++++++++---------
 2 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c
index 5eee369f8..60174c3c9 100644
--- a/lib_com/basop_util.c
+++ b/lib_com/basop_util.c
@@ -1080,12 +1080,16 @@ Word32 BASOP_Util_Divide3232_Scale_cadence_1( Word32 x, Word32 y, Word16 *s )
     sign = 0;
     move16();
 
-    IF( y < 0 )
+    if( y < 0 )
     {
-        y = L_negate( y );
         sign = L_xor( sign, 1 );
     }
 
+    if ( y < 0 )
+    {
+        y = L_negate( y );
+    }
+
     sy = norm_l( y );
     y = L_shl( y, sy );
     move16();
diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 682a881cd..f3e6b54d6 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -46,23 +46,24 @@
 #include "wmc_auto.h"
 
 // MHZ NUMBERS:
-// NULL: 179.292
-
-
-//#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx  //  .4 WMOPS                      --> USE
-//#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx                //  .3 WMOPS                      --> USE
-//#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx                //  .1 WMOPS                      --> USE
-//#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx                //  .2 WMOPS                      --> USE
-//#define FIX_1326_SPEEDUP_08 // "-"                              // 3.0 WMOPS //small diffs        --> USE
-//#define FIX_1326_SPEEDUP_09 // tiny speedup                     //  .1 WMOPS   pipe 48851 fails   --> DONTUSEYET
-//#define FIX_1326_SPEEDUP_10 // tiny speedup                     //  .1 WMOPS   pipe 48851 fails   --> DONTUSEYET
-//#define FIX_1326_SPEEDUP_11 // tiny speedup                     //  .1 WMOPS   pipe 48851 fails   --> DONTUSEYET
-//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt          // 2.9 WMOPS                      --> USE
-
-//#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd        //  .1 WMOPS                      --> USE
-//#define FIX_1326_SPEEDUP_16 // tiny speedup like 04             //  .2 WMOPS                      --> USE
-#define FIX_1326_SPEEDUP_17 // use 1/x                            //  1  WMOPS                      --> USE?
-#define FIX_1326_SPEEDUP_18 // structural speedup                   // 1 WMOPS                      --> USE?
+// NULL: 178.407
+// ALL: 169.499
+
+
+#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx  //  .4 WMOPS                      --> USE
+#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx                //  .3 WMOPS                      --> USE
+#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx                //  .1 WMOPS                      --> USE
+#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx                //  .2 WMOPS                      --> USE
+#define FIX_1326_SPEEDUP_08 // "-"                              // 3.0 WMOPS //small diffs        --> USE
+#define FIX_1326_SPEEDUP_09 // tiny speedup                     //  .1 WMOPS   pipe 48851 fails   --> DONTUSEYET
+#define FIX_1326_SPEEDUP_10 // tiny speedup                     //  .1 WMOPS   pipe 48851 fails   --> DONTUSEYET
+#define FIX_1326_SPEEDUP_11 // tiny speedup                     //  .1 WMOPS   pipe 48851 fails   --> DONTUSEYET
+#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt          // 2.9 WMOPS                      --> USE
+
+#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd        //  .1 WMOPS                      --> USE
+#define FIX_1326_SPEEDUP_16 // tiny speedup like 04             //  .2 WMOPS                      --> USE
+#define FIX_1326_SPEEDUP_17 // use 1/x                          // 1.25WMOPS                      --> USE
+#define FIX_1326_SPEEDUP_18 // structural speedup               // 1   WMOPS                      --> USE
 
 Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
 
-- 
GitLab


From f2b3f155d5d77c922553381a7659f1452230a77f Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Wed, 19 Mar 2025 10:33:50 +0100
Subject: [PATCH 27/41] deactivate 1/x macro, activate all others

---
 lib_com/basop_util.c                            | 2 +-
 lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c
index 60174c3c9..273667255 100644
--- a/lib_com/basop_util.c
+++ b/lib_com/basop_util.c
@@ -1080,7 +1080,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence_1( Word32 x, Word32 y, Word16 *s )
     sign = 0;
     move16();
 
-    if( y < 0 )
+    if ( y < 0 )
     {
         sign = L_xor( sign, 1 );
     }
diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index f3e6b54d6..333e7603a 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -62,7 +62,7 @@
 
 #define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd        //  .1 WMOPS                      --> USE
 #define FIX_1326_SPEEDUP_16 // tiny speedup like 04             //  .2 WMOPS                      --> USE
-#define FIX_1326_SPEEDUP_17 // use 1/x                          // 1.25WMOPS                      --> USE
+//#define FIX_1326_SPEEDUP_17 // use 1/x                          // 1.25WMOPS                      --> USE
 #define FIX_1326_SPEEDUP_18 // structural speedup               // 1   WMOPS                      --> USE
 
 Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
-- 
GitLab


From d4f5a9acea1e0583a3f4f4ffd2b6e4d2af7a7dec Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Wed, 19 Mar 2025 09:46:33 +0000
Subject: [PATCH 28/41] revert divison  variation

---
 lib_com/basop_util.c | 44 +-------------------------------------------
 1 file changed, 1 insertion(+), 43 deletions(-)

diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c
index 273667255..b7ee35ab3 100644
--- a/lib_com/basop_util.c
+++ b/lib_com/basop_util.c
@@ -1010,7 +1010,6 @@ Word32 div_w( Word32 L_num, Word32 L_den )
     }
 }
 
-
 Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
 {
     Word32 z;
@@ -1018,8 +1017,6 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
     Word16 sy;
     Word32 sign;
 
-    // push_wmops( "BASOP_Util_Divide3232_Scale_cadence" );
-
     /* assert (x >= (Word32)0); */
     assert( y != (Word32) 0 );
 
@@ -1041,7 +1038,6 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
     IF( x == (Word32) 0 )
     {
         *s = 0;
-        // pop_wmops();
         return ( (Word32) 0 );
     }
 
@@ -1062,48 +1058,10 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
     {
         z = L_negate( z );
     }
-    // pop_wmops();
-    return z;
-}
-
-/*1bit HR in x > 0*/
-Word32 BASOP_Util_Divide3232_Scale_cadence_1( Word32 x, Word32 y, Word16 *s )
-{
-    Word32 z;
-    // Word16 sx;
-    Word16 sy;
-    Word32 sign;
 
-    /* assert (x >= (Word32)0); */
-    assert( y != (Word32) 0 );
-
-    sign = 0;
-    move16();
-
-    if ( y < 0 )
-    {
-        sign = L_xor( sign, 1 );
-    }
-
-    if ( y < 0 )
-    {
-        y = L_negate( y );
-    }
-
-    sy = norm_l( y );
-    y = L_shl( y, sy );
-    move16();
-    *s = add( 0, sy );
-    move16();
-
-    z = div_w( x, y );
-
-    if ( sign != 0 )
-    {
-        z = L_negate( z );
-    }
     return z;
 }
+
 Word16 BASOP_Util_Divide3232_Scale( Word32 x, Word32 y, Word16 *s )
 {
     Word16 z;
-- 
GitLab


From cb8c3aa9793f09e28933e9ca9de5da91447c425b Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Wed, 19 Mar 2025 09:47:44 +0000
Subject: [PATCH 29/41] more revert division variation

---
 lib_com/basop_util.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/lib_com/basop_util.h b/lib_com/basop_util.h
index 1ef2cd8e7..a6db7dc8d 100644
--- a/lib_com/basop_util.h
+++ b/lib_com/basop_util.h
@@ -332,10 +332,6 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x,    /*!< i  : Numerator*/
                                             Word32 y,    /*!< i  : Denominator*/
                                             Word16 *s ); /*!< o  : Additional scalefactor difference*/
 
-Word32 BASOP_Util_Divide3232_Scale_cadence_1( Word32 x, /*!< i  : Numerator*/
-                                              Word32 y, /*!< i  : Denominator*/
-
-                                              Word16 *s ); /*!< o  : Additional scalefactor difference*/
 
 /************************************************************************/
 /*!
-- 
GitLab


From a0f0eac658a432e0bba4ec28c6db3350bdde6f7b Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Wed, 19 Mar 2025 11:51:31 +0100
Subject: [PATCH 30/41] cleanup useless speedup macros

---
 .../ivas_dirac_dec_binaural_functions_fx.c    | 123 +++---------------
 1 file changed, 15 insertions(+), 108 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 333e7603a..8da4f82e7 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -47,22 +47,16 @@
 
 // MHZ NUMBERS:
 // NULL: 178.407
-// ALL: 169.499
+// ALL: 169.499 77 (170.650 wo 17)
 
 
 #define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx  //  .4 WMOPS                      --> USE
 #define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx                //  .3 WMOPS                      --> USE
-#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx                //  .1 WMOPS                      --> USE
 #define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx                //  .2 WMOPS                      --> USE
 #define FIX_1326_SPEEDUP_08 // "-"                              // 3.0 WMOPS //small diffs        --> USE
-#define FIX_1326_SPEEDUP_09 // tiny speedup                     //  .1 WMOPS   pipe 48851 fails   --> DONTUSEYET
-#define FIX_1326_SPEEDUP_10 // tiny speedup                     //  .1 WMOPS   pipe 48851 fails   --> DONTUSEYET
-#define FIX_1326_SPEEDUP_11 // tiny speedup                     //  .1 WMOPS   pipe 48851 fails   --> DONTUSEYET
 #define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt          // 2.9 WMOPS                      --> USE
 
-#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd        //  .1 WMOPS                      --> USE
 #define FIX_1326_SPEEDUP_16 // tiny speedup like 04             //  .2 WMOPS                      --> USE
-//#define FIX_1326_SPEEDUP_17 // use 1/x                          // 1.25WMOPS                      --> USE
 #define FIX_1326_SPEEDUP_18 // structural speedup               // 1   WMOPS                      --> USE
 
 Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
@@ -1333,7 +1327,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
     }
     pop_wmops(); /*push_wmops( "IDRBCM apply EQ_low" );*/
 
-    push_wmops( "IDRBCM target matrix" );
+    push_wmops( "IDRBCM target matrix (IDRBCMtm)" );
     /* Determine target covariance matrix containing target binaural properties */
     FOR( bin = 0; bin < nBins; bin++ )
     {
@@ -1359,6 +1353,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
         meanEnePerCh_fx = Mpy_32_32( hDiracDecBin->earlyPartEneCorrection_fx[bin], subFrameTotalEne_fx[bin] ); // Q( q_meanEnePerCh )
         q_meanEnePerCh = add( sub( q_earlyPartEneCorrection, subFrameTotalEne_e[bin] ), 1 );                   // q_earlyPartEneCorrection + 31 - subFrameTotalEne_e[bin] - 31 + Q1(0.5f)
         /* Determine direct part target covariance matrix (for 1 or 2 directions) */
+        push_wmops( "IDRBCMtm LOOP1" );
         FOR( dirIndex = 0; dirIndex < hSpatParamRendCom->numSimultaneousDirections; dirIndex++ )
         {
             Word16 aziDeg, eleDeg;
@@ -1437,6 +1432,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
                 diffuseness_fx = 0;
                 move32();
             }
+
             IF( isIsmDirection )
             {
                 /* Objects cause lesser decorrelation reduction, to avoid removing all decorrelation when only objects are present */
@@ -1446,7 +1442,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
             {
                 diffusenessValForDecorrelationReduction_fx = L_sub( diffusenessValForDecorrelationReduction_fx, ratio_fx ); /*Q30*/
             }
-
             IF( separateCenterChannelRendering )
             {
                 /* In masa + mono rendering mode, the center directions originate from phantom sources, so the
@@ -1493,14 +1488,12 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
                 Word32 hrtfEneCenter_fx, hrtfEneSides_fx, hrtfEneRealized_fx;
                 Word16 eneCorrectionFactor_fx, eneCorrectionFactor_e;
                 Word16 w1_fx, w2_fx, w3_fx, eq_fx;
-#ifdef FIX_1326_SPEEDUP_15
-                hrtfEneCenter_fx = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( rRealp_fx, rRealp_fx ), rImagp_fx, rImagp_fx ), lImagp_fx, lImagp_fx ), lRealp_fx, lRealp_fx ); // Q25
-#else
+
                 hrtfEneCenter_fx = L_add( Mpy_32_32( lRealp_fx, lRealp_fx ),                               // Q25
                                           L_add( Mpy_32_32( lImagp_fx, lImagp_fx ),                        // Q25
                                                  L_add( Mpy_32_32( rRealp_fx, rRealp_fx ),                 // Q25
                                                         Mpy_32_32( rImagp_fx, rImagp_fx ) ) ) );           // Q25
-#endif
+
                 /* Spread coherence is synthesized as coherent sources at 30 degree horizontal spacing.
                  * The following formulas determine the gains for these sources.
                  * spreadCoh = 0: Only panning
@@ -1529,14 +1522,12 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
 
                 /* Apply the gain for the left source of the three coherent sources */
                 getDirectPartGains_fx( bin, add( aziDeg, 30 ), eleDeg, &lRealpTmp_fx, &lImagpTmp_fx, &rRealpTmp_fx, &rImagpTmp_fx, hDiracDecBin->renderStereoOutputInsteadOfBinaural, Rmat_fx, &gainCache[gainCacheBaseIndex + 1], isHeadtracked );
-#ifdef FIX_1326_SPEEDUP_15
-                hrtfEneSides_fx = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( rRealpTmp_fx, rRealpTmp_fx ), rImagpTmp_fx, rImagpTmp_fx ), lImagpTmp_fx, lImagpTmp_fx ), lRealpTmp_fx, lRealpTmp_fx ); // Q25
-#else
+
                 hrtfEneSides_fx = L_add( Mpy_32_32( lRealpTmp_fx, lRealpTmp_fx ),                          // Q25
                                          L_add( Mpy_32_32( lImagpTmp_fx, lImagpTmp_fx ),                   // Q25
                                                 L_add( Mpy_32_32( rRealpTmp_fx, rRealpTmp_fx ),            // Q25
                                                        Mpy_32_32( rImagpTmp_fx, rImagpTmp_fx ) ) ) );      // Q25
-#endif
+
                 lRealp_fx = L_add( lRealp_fx, Mpy_32_32( sidesMul_fx, lRealpTmp_fx ) ); // Q25
                 lImagp_fx = L_add( lImagp_fx, Mpy_32_32( sidesMul_fx, lImagpTmp_fx ) ); // Q25
                 rRealp_fx = L_add( rRealp_fx, Mpy_32_32( sidesMul_fx, rRealpTmp_fx ) ); // Q25
@@ -1624,21 +1615,12 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
                 move16();
             }
 
-#ifdef FIX_1326_SPEEDUP_15
-            hrtfEne_fx[0] = Madd_32_32( Mpy_32_32( lRealp_fx, lRealp_fx ), lImagp_fx, lImagp_fx ); // Q( 2*q_lr - 31 )
-            hrtfEne_fx[1] = Madd_32_32( Mpy_32_32( rRealp_fx, rRealp_fx ), rImagp_fx, rImagp_fx ); // Q( 2*q_lr - 31 )
-            move32();
-            move32();
-            hrtfCrossRe_fx = Madd_32_32( Mpy_32_32( lRealp_fx, rRealp_fx ), lImagp_fx, rImagp_fx );  // Q( 2*q_lr - 31 )
-            hrtfCrossIm_fx = Madd_32_32( Mpy_32_32( -lImagp_fx, rRealp_fx ), lRealp_fx, rImagp_fx ); // Q( 2*q_lr - 31 )
-#else
             hrtfEne_fx[0] = L_add( Mpy_32_32( lRealp_fx, lRealp_fx ), Mpy_32_32( lImagp_fx, lImagp_fx ) ); // Q( 2*q_lr - 31 )
             hrtfEne_fx[1] = L_add( Mpy_32_32( rRealp_fx, rRealp_fx ), Mpy_32_32( rImagp_fx, rImagp_fx ) ); // Q( 2*q_lr - 31 )
             move32();
             move32();
             hrtfCrossRe_fx = L_add( Mpy_32_32( lRealp_fx, rRealp_fx ), Mpy_32_32( lImagp_fx, rImagp_fx ) );  // Q( 2*q_lr - 31 )
             hrtfCrossIm_fx = L_add( Mpy_32_32( -lImagp_fx, rRealp_fx ), Mpy_32_32( lRealp_fx, rImagp_fx ) ); // Q( 2*q_lr - 31 )
-#endif
 
             /* Add direct part (1 or 2) covariance matrix */
             dirEne_fx = Mpy_32_32( ratio_fx, meanEnePerCh_fx ); // Q(q_meanEnePerCh - 1)
@@ -1655,6 +1637,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
             move32();
             move32();
         }
+        pop_wmops(); //push_wmops( "IDRBCMtm LOOP1" );
 
         /* Add diffuse / ambient part covariance matrix */
         diffuseness_fx = L_max( 0, diffuseness_fx );               // Q30
@@ -1713,11 +1696,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
             }
             ELSE
             {
-#ifdef FIX_1326_SPEEDUP_15
-                hDiracDecBin->ChCrossReOut_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossReOut_fx[bin], hDiracDecBin->ChCrossReOut_e[bin], Mpy_32_32( Madd_32_16( L_shl( surCoh_fx, 16 ), hDiracDecBin->diffuseFieldCoherence_fx[bin], sub( 32767, surCoh_fx ) ), diffEne_fx ), sub( 31, q_diffEne ), &hDiracDecBin->ChCrossReOut_e[bin] );
-#else
                 hDiracDecBin->ChCrossReOut_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossReOut_fx[bin], hDiracDecBin->ChCrossReOut_e[bin], Mpy_32_32( L_add( Mpy_32_16_1( hDiracDecBin->diffuseFieldCoherence_fx[bin], sub( 32767, surCoh_fx ) ), L_shl( surCoh_fx, 16 ) ), diffEne_fx ), sub( 31, q_diffEne ), &hDiracDecBin->ChCrossReOut_e[bin] );
-#endif
             }
             move32();
         }
@@ -1733,7 +1712,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
         hDiracDecBin->frameMeanDiffuseness_fx[bin] = L_shl( frameMeanDiffuseness, sub( exp, 2 ) ); // Q29
         move32();
     }
-    pop_wmops(); /*push_wmops( "IDRBCM target matrix" );*/
+    pop_wmops(); /*push_wmops( "IDRBCM target matrix (IDRBCMtm)" );;*/
 
     test();
     /* Temporal IIR-type smoothing of covariance matrices. Also apply encoding quality based smoothing factor. */
@@ -2183,14 +2162,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
         exp = sub( get_min_scalefactor( resultMtxRe_fx[0][0], resultMtxRe_fx[1][1] ), 2 );
         tmp2 = L_add( L_shl( resultMtxRe_fx[0][0], exp ), L_shl( resultMtxRe_fx[1][1], exp ) );
         q_tmp2 = add( q_res, exp );
-#ifdef FIX_1326_SPEEDUP_11
-        {
-            Word16 shift1 = s_max( 0, sub( q_tmp2, q_CrEne ) );
-            Word16 shift2 = s_max( 0, sub( q_CrEne, q_tmp2 ) );
-            realizedOutputEne_fx = L_add( L_shr( tmp1, shift2 ), L_shr( tmp2, shift1 ) );
-            q_realizedOutputEne = s_min( q_CrEne, q_tmp2 );
-        }
-#else
+
         IF( LT_16( q_CrEne, q_tmp2 ) )
         {
             realizedOutputEne_fx = L_add( tmp1, L_shr( tmp2, sub( q_tmp2, q_CrEne ) ) );
@@ -2203,7 +2175,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
             q_realizedOutputEne = q_tmp2;
             move16();
         }
-#endif
+
         exp = sub( get_min_scalefactor( hDiracDecBin->ChEneOut_fx[0][bin], hDiracDecBin->ChEneOut_fx[1][bin] ), 1 );
         targetOutputEne_fx = L_add( L_shl( hDiracDecBin->ChEneOut_fx[0][bin], exp ), L_shl( hDiracDecBin->ChEneOut_fx[1][bin], exp ) );
         q_targetOutputEne = add( hDiracDecBin->q_ChEneOut, exp );
@@ -3373,21 +3345,7 @@ static void eig2x2_fx(
 #endif
 
     /* Numeric case, when input is near an identity matrix with a gain */
-#ifdef FIX_1326_SPEEDUP_03
-    tmp1 = Mpy_32_32( 2147484, add_fx ); // 2147484 = 1e-3f in Q31
 
-    IF( LT_32( pm_fx, L_shl_sat( tmp1, sub( q_tmp1, q_tmp2 ) ) ) )
-    {
-        Ure_fx[0][0] = ONE_IN_Q30;
-        move32();
-        Ure_fx[1][1] = ONE_IN_Q30;
-        move32();
-        *q_U = Q30;
-        move16();
-
-        return;
-    }
-#else
     tmp1 = Mpy_32_32( 2147484, add_fx ); // 2147484 = 1e-3f in Q31
 
     IF( LT_16( q_tmp1, q_tmp2 ) )
@@ -3418,7 +3376,6 @@ static void eig2x2_fx(
             return;
         }
     }
-#endif
 
     q_U_1 = 0;
     q_U_2 = 0;
@@ -3472,11 +3429,7 @@ static void eig2x2_fx(
 #endif
 
 #if 1
-#ifdef FIX_1326_SPEEDUP_17
-            tmp2 = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, tmp3, &exp );
-#else
             tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp );
-#endif
             exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) );
             normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2
             q_tmp2 = sub( 31, exp );
@@ -3564,11 +3517,7 @@ static void eig2x2_fx(
 #endif
 
 #if 1
-#ifdef FIX_1326_SPEEDUP_17
-            tmp2 = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, tmp3, &exp );
-#else
             tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp );
-#endif
             exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) );
             normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2
             q_tmp2 = sub( 31, exp );
@@ -4531,13 +4480,7 @@ static void formulate2x2MixingMatrix_fx(
     }
     ELSE
     {
-        push_wmops( "formulate2x2MixingMatrix Division" );
-#ifdef FIX_1326_SPEEDUP_17
-        maxEneDiv_fx = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, maxEne_fx, &exp );
-#else
         maxEneDiv_fx = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxEne_fx, &exp );
-#endif
-        pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
         q_maxEneDiv = add( sub( 31, exp ), sub( Q30, q_maxEne ) );
     }
     exp = norm_l( maxEneDiv_fx );
@@ -4620,9 +4563,7 @@ static void formulate2x2MixingMatrix_fx(
     ELSE
     {
         temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
-        push_wmops( "formulate2x2MixingMatrix Division" );
         temp = BASOP_Util_Divide3232_Scale_cadence( E_out1, temp, &exp );
-        pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
         exp = sub( exp, sub( q_eout, sub( 31, exp_temp ) ) );
 #ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
         Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
@@ -4664,9 +4605,7 @@ static void formulate2x2MixingMatrix_fx(
             a++;
         }
         temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
-        push_wmops( "formulate2x2MixingMatrix Division" );
         temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 );
-        pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
         exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) );
 #ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
         Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
@@ -4797,6 +4736,7 @@ static void formulate2x2MixingMatrix_fx(
 #endif
     pop_wmops(); /*push_wmops( "oPtoA MT1M" );*/
 
+
     IF( D_fx[0] == 0 )
     {
 #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
@@ -4804,20 +4744,15 @@ static void formulate2x2MixingMatrix_fx(
         exp = sub( exp, sub( Q30, 62 ) );
 #else
         temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */
+        move32();
         exp = ONE_DIV_EPSILON_EXP;
+        move16();
 #endif
     }
     ELSE
     {
-        push_wmops( "formulate2x2MixingMatrix Division" );
-#ifdef FIX_1326_SPEEDUP_17
-        temp = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, D_fx[0], &exp );
-#else
-
         temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp );
-#endif
         exp = sub( exp, sub( Q30, q_D ) );
-        pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
     }
     div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
     move32();
@@ -4848,13 +4783,7 @@ static void formulate2x2MixingMatrix_fx(
     }
     ELSE
     {
-        push_wmops( "formulate2x2MixingMatrix Division" );
-#ifdef FIX_1326_SPEEDUP_17
-        temp = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, D_fx[1], &exp1 );
-#else
         temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[1], &exp1 );
-#endif
-        pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
         exp1 = sub( exp1, sub( Q30, q_D ) );
     }
     div_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
@@ -4869,17 +4798,6 @@ static void formulate2x2MixingMatrix_fx(
 
 
     // 1310720000 = 10,000.0f in Q17
-#ifdef FIX_1326_SPEEDUP_09
-    {
-        Word16 shift1 = s_max( sub( Q17, q_div ), 0 );
-        Word16 shift2 = s_max( sub( q_div, Q17 ), 0 );
-
-        div_fx[0] = L_min( L_shr( 1310720000, shift1 ), L_shr( div_fx[0], shift2 ) ); // q_div
-        move32();
-        div_fx[1] = L_min( L_shr( 1310720000, shift1 ), L_shr( div_fx[1], shift2 ) ); // q_div
-        move32();
-    }
-#else
     IF( LT_16( q_div, Q17 ) )
     {
         div_fx[0] = L_min( L_shr( 1310720000, sub( Q17, q_div ) ), div_fx[0] ); // q_div
@@ -4896,7 +4814,6 @@ static void formulate2x2MixingMatrix_fx(
         q_div = Q17;
         move16();
     }
-#endif
 
     matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp );
 
@@ -4913,11 +4830,7 @@ static void formulate2x2MixingMatrix_fx(
             W_tmp = W_mult0_32_32( tmpRe_fx[chA][chB], div_fx[chB] );
             IF( W_tmp != 0 )
             {
-#ifdef FIX_1326_SPEEDUP_10
-                hdrm_re[chA][chB] = W_norm( W_tmp );
-#else
                 hdrm_re[chA][chB] = sub( W_norm( W_tmp ), 0 );
-#endif
                 move16();
                 W_tmp = W_shl( W_tmp, hdrm_re[chA][chB] );
                 tmpRe_fx[chA][chB] = W_extract_h( W_tmp );
@@ -5062,13 +4975,7 @@ static void formulate2x2MixingMatrix_fx(
         {
             Word16 Pre_shift, Pim_shift;
             temp = BASOP_Util_Add_Mant32Exp( Sx_fx[chB], sub( 31, q_Sx ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
-            push_wmops( "formulate2x2MixingMatrix Division" );
-#ifdef FIX_1326_SPEEDUP_17
-            temp = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, temp, &exp );
-#else
             temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, temp, &exp );
-#endif
-            pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
             q_temp = add( sub( sub( q_P, exp ), sub( 31, Q30 ) ), exp_temp );
 
             Pre_shift = norm_l( Pre_fx[0][chB] );
-- 
GitLab


From f97ec39f828d4e605dcca2ed09a3bf3d3cce0f44 Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Wed, 19 Mar 2025 11:54:30 +0100
Subject: [PATCH 31/41] apply clang format patch

---
 .../ivas_dirac_dec_binaural_functions_fx.c     | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 8da4f82e7..101f76a37 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -1489,10 +1489,10 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
                 Word16 eneCorrectionFactor_fx, eneCorrectionFactor_e;
                 Word16 w1_fx, w2_fx, w3_fx, eq_fx;
 
-                hrtfEneCenter_fx = L_add( Mpy_32_32( lRealp_fx, lRealp_fx ),                               // Q25
-                                          L_add( Mpy_32_32( lImagp_fx, lImagp_fx ),                        // Q25
-                                                 L_add( Mpy_32_32( rRealp_fx, rRealp_fx ),                 // Q25
-                                                        Mpy_32_32( rImagp_fx, rImagp_fx ) ) ) );           // Q25
+                hrtfEneCenter_fx = L_add( Mpy_32_32( lRealp_fx, lRealp_fx ),                     // Q25
+                                          L_add( Mpy_32_32( lImagp_fx, lImagp_fx ),              // Q25
+                                                 L_add( Mpy_32_32( rRealp_fx, rRealp_fx ),       // Q25
+                                                        Mpy_32_32( rImagp_fx, rImagp_fx ) ) ) ); // Q25
 
                 /* Spread coherence is synthesized as coherent sources at 30 degree horizontal spacing.
                  * The following formulas determine the gains for these sources.
@@ -1523,10 +1523,10 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
                 /* Apply the gain for the left source of the three coherent sources */
                 getDirectPartGains_fx( bin, add( aziDeg, 30 ), eleDeg, &lRealpTmp_fx, &lImagpTmp_fx, &rRealpTmp_fx, &rImagpTmp_fx, hDiracDecBin->renderStereoOutputInsteadOfBinaural, Rmat_fx, &gainCache[gainCacheBaseIndex + 1], isHeadtracked );
 
-                hrtfEneSides_fx = L_add( Mpy_32_32( lRealpTmp_fx, lRealpTmp_fx ),                          // Q25
-                                         L_add( Mpy_32_32( lImagpTmp_fx, lImagpTmp_fx ),                   // Q25
-                                                L_add( Mpy_32_32( rRealpTmp_fx, rRealpTmp_fx ),            // Q25
-                                                       Mpy_32_32( rImagpTmp_fx, rImagpTmp_fx ) ) ) );      // Q25
+                hrtfEneSides_fx = L_add( Mpy_32_32( lRealpTmp_fx, lRealpTmp_fx ),                     // Q25
+                                         L_add( Mpy_32_32( lImagpTmp_fx, lImagpTmp_fx ),              // Q25
+                                                L_add( Mpy_32_32( rRealpTmp_fx, rRealpTmp_fx ),       // Q25
+                                                       Mpy_32_32( rImagpTmp_fx, rImagpTmp_fx ) ) ) ); // Q25
 
                 lRealp_fx = L_add( lRealp_fx, Mpy_32_32( sidesMul_fx, lRealpTmp_fx ) ); // Q25
                 lImagp_fx = L_add( lImagp_fx, Mpy_32_32( sidesMul_fx, lImagpTmp_fx ) ); // Q25
@@ -1637,7 +1637,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
             move32();
             move32();
         }
-        pop_wmops(); //push_wmops( "IDRBCMtm LOOP1" );
+        pop_wmops(); // push_wmops( "IDRBCMtm LOOP1" );
 
         /* Add diffuse / ambient part covariance matrix */
         diffuseness_fx = L_max( 0, diffuseness_fx );               // Q30
-- 
GitLab


From 1d475785f8351fd7f3fa6f8021878273a945563e Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Wed, 19 Mar 2025 12:00:04 +0100
Subject: [PATCH 32/41] deactivate all speedups

---
 lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 101f76a37..4a55a37cc 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -50,14 +50,14 @@
 // ALL: 169.499 77 (170.650 wo 17)
 
 
-#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx  //  .4 WMOPS                      --> USE
-#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx                //  .3 WMOPS                      --> USE
-#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx                //  .2 WMOPS                      --> USE
-#define FIX_1326_SPEEDUP_08 // "-"                              // 3.0 WMOPS //small diffs        --> USE
-#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt          // 2.9 WMOPS                      --> USE
-
-#define FIX_1326_SPEEDUP_16 // tiny speedup like 04             //  .2 WMOPS                      --> USE
-#define FIX_1326_SPEEDUP_18 // structural speedup               // 1   WMOPS                      --> USE
+//#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx  //  .4 WMOPS                      --> USE
+//#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx                //  .3 WMOPS                      --> USE
+//#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx                //  .2 WMOPS                      --> USE
+//#define FIX_1326_SPEEDUP_08 // "-"                              // 3.0 WMOPS //small diffs        --> USE
+//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt          // 2.9 WMOPS                      --> USE
+
+//#define FIX_1326_SPEEDUP_16 // tiny speedup like 04             //  .2 WMOPS                      --> USE
+//#define FIX_1326_SPEEDUP_18 // structural speedup               // 1   WMOPS                      --> USE
 
 Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
 
-- 
GitLab


From cc08c7185e22852fd126643e5ba085e19571e393 Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Wed, 19 Mar 2025 12:16:25 +0000
Subject: [PATCH 33/41] cleaup a bit

---
 .../ivas_dirac_dec_binaural_functions_fx.c    | 28 ++++++++-----------
 1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 4a55a37cc..ce36e0ae3 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -1432,7 +1432,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
                 diffuseness_fx = 0;
                 move32();
             }
-
             IF( isIsmDirection )
             {
                 /* Objects cause lesser decorrelation reduction, to avoid removing all decorrelation when only objects are present */
@@ -1442,6 +1441,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
             {
                 diffusenessValForDecorrelationReduction_fx = L_sub( diffusenessValForDecorrelationReduction_fx, ratio_fx ); /*Q30*/
             }
+
             IF( separateCenterChannelRendering )
             {
                 /* In masa + mono rendering mode, the center directions originate from phantom sources, so the
@@ -1527,11 +1527,10 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
                                          L_add( Mpy_32_32( lImagpTmp_fx, lImagpTmp_fx ),              // Q25
                                                 L_add( Mpy_32_32( rRealpTmp_fx, rRealpTmp_fx ),       // Q25
                                                        Mpy_32_32( rImagpTmp_fx, rImagpTmp_fx ) ) ) ); // Q25
-
-                lRealp_fx = L_add( lRealp_fx, Mpy_32_32( sidesMul_fx, lRealpTmp_fx ) ); // Q25
-                lImagp_fx = L_add( lImagp_fx, Mpy_32_32( sidesMul_fx, lImagpTmp_fx ) ); // Q25
-                rRealp_fx = L_add( rRealp_fx, Mpy_32_32( sidesMul_fx, rRealpTmp_fx ) ); // Q25
-                rImagp_fx = L_add( rImagp_fx, Mpy_32_32( sidesMul_fx, rImagpTmp_fx ) ); // Q25
+                lRealp_fx = L_add( lRealp_fx, Mpy_32_32( sidesMul_fx, lRealpTmp_fx ) );               // Q25
+                lImagp_fx = L_add( lImagp_fx, Mpy_32_32( sidesMul_fx, lImagpTmp_fx ) );               // Q25
+                rRealp_fx = L_add( rRealp_fx, Mpy_32_32( sidesMul_fx, rRealpTmp_fx ) );               // Q25
+                rImagp_fx = L_add( rImagp_fx, Mpy_32_32( sidesMul_fx, rImagpTmp_fx ) );               // Q25
 
                 /* Apply the gain for the right source of the three coherent sources.
                  * -30 degrees to 330 wrapping due to internal functions. */
@@ -2162,7 +2161,6 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
         exp = sub( get_min_scalefactor( resultMtxRe_fx[0][0], resultMtxRe_fx[1][1] ), 2 );
         tmp2 = L_add( L_shl( resultMtxRe_fx[0][0], exp ), L_shl( resultMtxRe_fx[1][1], exp ) );
         q_tmp2 = add( q_res, exp );
-
         IF( LT_16( q_CrEne, q_tmp2 ) )
         {
             realizedOutputEne_fx = L_add( tmp1, L_shr( tmp2, sub( q_tmp2, q_CrEne ) ) );
@@ -2198,6 +2196,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
             q_missingOutputEne = q_targetOutputEne;
             move16();
         }
+
         tmp1 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), missingOutputEne_fx, sub( 31, q_missingOutputEne ), &exp1 );
 
 #ifdef FIX_1326_SPEEDUP_13
@@ -2258,7 +2257,6 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
         q_processMtxDec_bin = q_processMtxDec[bin];
         move16();
         move16();
-
         /* Store processing matrices */
         FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
         {
@@ -3216,6 +3214,7 @@ static void eig2x2_fx(
     a_fx = (e1 + e2) * (e1 + e2) - 4.0f * ((e1 * e2) - crossSquare_fx) = (e1 - e2)^2 + 4 * crossSquare_fx
     pm_fx = 0.5f * sqrtf(max(0.0f, a_fx))
     add_fx = 0.5f * (e1 + e2)*/
+
     IF( L_and( c_re == 0, c_im == 0 ) )
     {
         /* if c_re = 0 and c_im = 0, then crossSquare_fx = (c_re * c_re) + (c_im * c_im) = 0
@@ -3345,7 +3344,6 @@ static void eig2x2_fx(
 #endif
 
     /* Numeric case, when input is near an identity matrix with a gain */
-
     tmp1 = Mpy_32_32( 2147484, add_fx ); // 2147484 = 1e-3f in Q31
 
     IF( LT_16( q_tmp1, q_tmp2 ) )
@@ -3425,6 +3423,7 @@ static void eig2x2_fx(
 #else
             tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 );
             q_tmp2 = sub( 31, q_tmp2 );
+
             tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 );
 #endif
 
@@ -3513,6 +3512,7 @@ static void eig2x2_fx(
 #else
             tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 );
             q_tmp2 = sub( 31, q_tmp2 );
+
             tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 );
 #endif
 
@@ -4563,13 +4563,13 @@ static void formulate2x2MixingMatrix_fx(
     ELSE
     {
         temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
+
         temp = BASOP_Util_Divide3232_Scale_cadence( E_out1, temp, &exp );
         exp = sub( exp, sub( q_eout, sub( 31, exp_temp ) ) );
 #ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
         Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
 #endif
     }
-
 #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
     Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
 #endif
@@ -4599,19 +4599,14 @@ static void formulate2x2MixingMatrix_fx(
     }
     ELSE
     {
-        if ( E_out2 == 0 )
-        {
-            static int a = 0;
-            a++;
-        }
         temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
+
         temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 );
         exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) );
 #ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
         Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
 #endif
     }
-
 #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
     Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
 #endif
@@ -4796,7 +4791,6 @@ static void formulate2x2MixingMatrix_fx(
     div_fx[1] = L_shr( div_fx[1], sub( sub( 31, exp1 ), q_div ) ); // q_div
     move32();
 
-
     // 1310720000 = 10,000.0f in Q17
     IF( LT_16( q_div, Q17 ) )
     {
-- 
GitLab


From 4f33c171f1892b375ef96274ab4ada09801f2509 Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Wed, 19 Mar 2025 13:54:05 +0100
Subject: [PATCH 34/41] activate the big chunks - SPEEDUP 8, 13

---
 lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index ce36e0ae3..f63f27eb0 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -53,8 +53,8 @@
 //#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx  //  .4 WMOPS                      --> USE
 //#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx                //  .3 WMOPS                      --> USE
 //#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx                //  .2 WMOPS                      --> USE
-//#define FIX_1326_SPEEDUP_08 // "-"                              // 3.0 WMOPS //small diffs        --> USE
-//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt          // 2.9 WMOPS                      --> USE
+#define FIX_1326_SPEEDUP_08 // "-"                              // 3.0 WMOPS //small diffs        --> USE
+#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt          // 2.9 WMOPS                      --> USE
 
 //#define FIX_1326_SPEEDUP_16 // tiny speedup like 04             //  .2 WMOPS                      --> USE
 //#define FIX_1326_SPEEDUP_18 // structural speedup               // 1   WMOPS                      --> USE
-- 
GitLab


From 4c1d9288ffd0180ca25a2b0f083d3adf4770accb Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Wed, 19 Mar 2025 14:32:17 +0100
Subject: [PATCH 35/41] activated spedups 1 , 2, 4, 16, 18

---
 lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index f63f27eb0..7dee99218 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -50,14 +50,14 @@
 // ALL: 169.499 77 (170.650 wo 17)
 
 
-//#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx  //  .4 WMOPS                      --> USE
-//#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx                //  .3 WMOPS                      --> USE
-//#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx                //  .2 WMOPS                      --> USE
+#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx  //  .4 WMOPS                      --> USE
+#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx                //  .3 WMOPS                      --> USE
+#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx                //  .2 WMOPS                      --> USE
 #define FIX_1326_SPEEDUP_08 // "-"                              // 3.0 WMOPS //small diffs        --> USE
 #define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt          // 2.9 WMOPS                      --> USE
 
-//#define FIX_1326_SPEEDUP_16 // tiny speedup like 04             //  .2 WMOPS                      --> USE
-//#define FIX_1326_SPEEDUP_18 // structural speedup               // 1   WMOPS                      --> USE
+#define FIX_1326_SPEEDUP_16 // tiny speedup like 04             //  .2 WMOPS                      --> USE
+#define FIX_1326_SPEEDUP_18 // structural speedup               // 1   WMOPS                      --> USE
 
 Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
 
@@ -4618,7 +4618,7 @@ static void formulate2x2MixingMatrix_fx(
     move32();
     Ghat_fx[1] = L_shr( Ghat_fx[1], sub( sub( 31, exp1 ), q_Ghat ) ); // q_Ghat
     move32();
-    pop_wmops();
+    pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix RegSMInv" );*/
 
     /* Matrix multiplication, tmp = Ky' * G_hat * Q */
     FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
-- 
GitLab


From 9c07d08c9a8733f22592fb9146b9563e089f45c9 Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Wed, 19 Mar 2025 15:56:56 +0100
Subject: [PATCH 36/41] rename optimiztion macros, move macros to options.h

---
 lib_com/options.h                             |   3 +
 .../ivas_dirac_dec_binaural_functions_fx.c    | 140 +-----------------
 2 files changed, 11 insertions(+), 132 deletions(-)

diff --git a/lib_com/options.h b/lib_com/options.h
index ccaeca46a..ab7efb8fd 100644
--- a/lib_com/options.h
+++ b/lib_com/options.h
@@ -176,4 +176,7 @@
 #define FIX_ISSUE_1376                          /* VA: Fix for issue 1376 (issue with GSC excitation) */
 #define OPT_SBA_AVOID_SPAR_RESCALE              /* Optimization made to spar decoder and IGF */
 #define NONBE_FIX_1386_STEREO_DMX_EVS_PHA       /* Orange: Fix for stereo DMX / PHA mode : Change the filter taps resolution (Q31->Q30), improve precision for the IR window, for the ILD & IPD smoothing in sub-bands, for the ISD counters and for ICCr. */
+#define FIX_1326_SUBSTITUTE_CMPMANT32EXP        /* FhG: Minor WMOPS tuning*/
+#define FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT     /* FhG: WMOPS tuning */
+#define FIX_1326_SPEEDUP_eig2x2_fx              /* FhG: Minor WMOPS tuning*/
 #endif
diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 7dee99218..7f2c6b4d4 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -46,18 +46,6 @@
 #include "wmc_auto.h"
 
 // MHZ NUMBERS:
-// NULL: 178.407
-// ALL: 169.499 77 (170.650 wo 17)
-
-
-#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx  //  .4 WMOPS                      --> USE
-#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx                //  .3 WMOPS                      --> USE
-#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx                //  .2 WMOPS                      --> USE
-#define FIX_1326_SPEEDUP_08 // "-"                              // 3.0 WMOPS //small diffs        --> USE
-#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt          // 2.9 WMOPS                      --> USE
-
-#define FIX_1326_SPEEDUP_16 // tiny speedup like 04             //  .2 WMOPS                      --> USE
-#define FIX_1326_SPEEDUP_18 // structural speedup               // 1   WMOPS                      --> USE
 
 Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
 
@@ -1892,11 +1880,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
         move16();
         Word32 tmp1, tmp2, res1, res2;
         Word16 q_tmp1, q_tmp2, q_realizedOutputEne, q_targetOutputEne, q_missingOutputEne, q_gain;
-#ifdef FIX_1326_SPEEDUP_13
-        Word16 exp1, q_processMtx_bin, q_processMtxDec_bin;
-#else
         Word16 exp1, exp2, q_processMtx_bin, q_processMtxDec_bin;
-#endif
 
         CrEneL_fx = 0;
         move32();
@@ -2199,13 +2183,12 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
 
         tmp1 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), missingOutputEne_fx, sub( 31, q_missingOutputEne ), &exp1 );
 
-#ifdef FIX_1326_SPEEDUP_13
+#ifdef FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT
         {
-            Word16 exp_temp;
-            tmp2 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
-            tmp2 = ISqrt32( tmp2, &exp_temp );
+            tmp2 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), EPSILON_MANT, EPSILON_EXP, &exp2 );
+            tmp2 = ISqrt32( tmp2, &exp2 );
             gain_fx = Mpy_32_32( tmp2, Sqrt32( tmp1, &exp1 ) );
-            q_gain = sub( 31, add( exp_temp, exp1 ) );
+            q_gain = sub( 31, add( exp2, exp1 ) );
         }
 #else
         {
@@ -3317,7 +3300,7 @@ static void eig2x2_fx(
     /* Numeric case, when input is practically zeros */
     // IF( D_fx[0] < EPSILON_FX )
 
-#ifdef FIX_1326_SPEEDUP_02
+#ifdef FIX_1326_SUBSTITUTE_CMPMANT32EXP
     IF( LT_32( L_shl_sat( D_fx[0], sub( sub( 31, *q_D ), EPSILON_EXP ) ), EPSILON_MANT ) )
     {
         Ure_fx[0][0] = ONE_IN_Q31;
@@ -3406,27 +3389,10 @@ static void eig2x2_fx(
             tmp2 = Mpy_32_32( s_fx, s_fx );
             q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 );
 
-
-#ifdef FIX_1326_SPEEDUP_16
-
-            {
-                Word16 tmp2_exp;
-                Word32 eps_tmp;
-                tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &tmp2_exp );
-
-                // Add epsilon if relevant
-                eps_tmp = L_shl_sat( epsilon_mant, sub( epsilon_exp, tmp2_exp ) );
-                tmp3 = L_add( L_shr( tmp2, 1 ), L_shr( eps_tmp, 1 ) );
-
-                exp_tmp3 = add( tmp2_exp, 1 );
-            }
-#else
             tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 );
             q_tmp2 = sub( 31, q_tmp2 );
 
             tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 );
-#endif
-
 #if 1
             tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp );
             exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) );
@@ -3499,22 +3465,10 @@ static void eig2x2_fx(
             q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 );
 
 
-#ifdef FIX_1326_SPEEDUP_04
-            Word16 exp_tmp2;
-            Word32 eps_tmp;
-
-            tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &exp_tmp2 );
-            eps_tmp = L_shl_sat( epsilon_mant, sub( epsilon_exp, exp_tmp2 ) );
-
-            tmp3 = L_add( L_shr( tmp2, 1 ), L_shr( eps_tmp, 1 ) ); // Add Epsilon if relevant
-
-            exp_tmp3 = add( exp_tmp2, 1 );
-#else
             tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 );
             q_tmp2 = sub( 31, q_tmp2 );
 
             tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 );
-#endif
 
 #if 1
             tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp );
@@ -3815,7 +3769,6 @@ static void matrixMul_fx(
     return;
 }
 
-#ifndef FIX_1326_SPEEDUP_01
 static void matrixTransp1Mul_fx(
     Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/
     Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/
@@ -3929,7 +3882,6 @@ static void matrixTransp1Mul_fx(
 
     return;
 }
-#endif /*FIX_1326_SPEEDUP_01*/
 
 static void matrixTransp2Mul_fx(
     Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/
@@ -4655,80 +4607,10 @@ static void formulate2x2MixingMatrix_fx(
     /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx
        For matrix A that is P = A(A'A)^0.5 */
     push_wmops( "oPtoA MT1M" );
-#ifdef FIX_1326_SPEEDUP_01
-    // matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );
-
-    {
-        // Word16 chA, chB;
-        {
-            chA = 0, chB = 0;
-            tmpRe_fx[0][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Are_fx[0][0] ),
-                                                                 Are_fx[1][0], Are_fx[1][0] ),
-                                                     Aim_fx[0][0], Aim_fx[0][0] ),
-                                         Aim_fx[1][0], Aim_fx[1][0] );
-            move32();
-        }
-        {
-            // chA = 0, chB = 1;
-            tmpRe_fx[1][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][0] ),
-                                                                 Are_fx[1][1], Are_fx[1][0] ),
-                                                     Aim_fx[0][1], Aim_fx[0][0] ),
-                                         Aim_fx[1][1], Aim_fx[1][0] );
-            move32();
-            tmpIm_fx[1][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Aim_fx[0][0] ),
-                                                                 Are_fx[1][1], Aim_fx[1][0] ),
-                                                     Aim_fx[0][1], Are_fx[0][0] ),
-                                         Aim_fx[1][1], Are_fx[1][0] );
-            move32();
-        }
-        {
-            // chA = 1, chB = 0;
-            tmpRe_fx[1][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][1] ),
-                                                                 Are_fx[1][1], Are_fx[1][1] ),
-                                                     Aim_fx[0][1], Aim_fx[0][1] ),
-                                         Aim_fx[1][1], Aim_fx[1][1] );
-            move32();
-        }
-        {
-            // chA = 1, chB = 1;
-        }
-
-        q_temp = sub( add( q_A, q_A ), 31 );
-
-        move16();
-        Word16 ZeroState = add( 1, 0 );
-        if ( tmpRe_fx[0][0] != 0 )
-        {
-            ZeroState = add( 0, 0 );
-        }
-        if ( tmpRe_fx[1][1] != 0 )
-        {
-            ZeroState = add( 0, 0 );
-        }
-        if ( tmpRe_fx[1][0] != 0 )
-        {
-            ZeroState = add( 0, 0 );
-        }
-        if ( tmpIm_fx[1][0] != 0 )
-        {
-            ZeroState = add( 0, 0 );
-        }
-
-        if ( sub( ZeroState, 1 ) == 0 )
-        {
-            q_temp = Q31;
-            move16();
-        }
-    }
-
-
-    eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );
-#else
     matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );
 
     eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );
 
-#endif
     pop_wmops(); /*push_wmops( "oPtoA MT1M" );*/
 
 
@@ -4753,11 +4635,9 @@ static void formulate2x2MixingMatrix_fx(
     move32();
 
 #ifdef FIX_1326_SPEEDUP_08
-    // This is just a shortcut to already existing optimizations  (FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC) - but makes everything even faster
-    {
-        div_fx[1] = L_add( 0, 2047986068 ); // Q = 31 - exp1
-        exp1 = add( 0, 20 );                // move32();
-    }
+    //Sqrt(1)
+    div_fx[1] = L_add( 0, 2047986068 ); // Q = 31 - exp1
+    exp1 = add( 0, 20 ); 
 
     IF( D_fx[1] != 0 ) // This is the new code: replace div sqrt by isqrt
     {
@@ -4841,11 +4721,7 @@ static void formulate2x2MixingMatrix_fx(
             W_tmp = W_mult0_32_32( tmpIm_fx[chA][chB], div_fx[chB] );
             IF( W_tmp != 0 )
             {
-#ifdef FIX_1326_SPEEDUP_10
-                hdrm_im[chA][chB] = W_norm( W_tmp );
-#else
                 hdrm_im[chA][chB] = sub( W_norm( W_tmp ), 0 );
-#endif
                 move16();
                 W_tmp = W_shl( W_tmp, hdrm_im[chA][chB] );
                 tmpIm_fx[chA][chB] = W_extract_h( W_tmp );
-- 
GitLab


From 596a724fac86c406dc66f1a2353bddeaff67a42f Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Wed, 19 Mar 2025 15:06:07 +0000
Subject: [PATCH 37/41] fix: rename some mocros Cleanup: push/pop wmops

---
 .../ivas_dirac_dec_binaural_functions_fx.c    | 67 ++-----------------
 1 file changed, 6 insertions(+), 61 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 7f2c6b4d4..e8052b08c 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -45,8 +45,6 @@
 
 #include "wmc_auto.h"
 
-// MHZ NUMBERS:
-
 Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
 
 /*-------------------------------------------------------------------------
@@ -506,9 +504,8 @@ void ivas_dirac_dec_binaural_render_fx(
     FOR( subframe_idx = first_sf; subframe_idx < last_sf; subframe_idx++ )
     {
         Word16 n_samples_sf = imult1616( slot_size, hSpatParamRendCom->subframe_nbslots[subframe_idx] );
-        push_wmops( "IDR binaural internal (IDRBI)" );
         ivas_dirac_dec_binaural_internal_fx( st_ivas, st_ivas->hCombinedOrientationData, output_fx_local, nchan_transport, subframe_idx );
-        pop_wmops(); /*push_wmops( "IDR binaural internal (IDRBI)" );*/
+
         FOR( ch = 0; ch < nchan_out; ch++ )
         {
             output_fx_local[ch] += n_samples_sf;
@@ -711,7 +708,6 @@ static void ivas_dirac_dec_binaural_internal_fx(
         }
     }
     /* CLDFB Analysis of input */
-    push_wmops( "IDRBI CLDFB ANALYSYS" );
     FOR( slot = 0; slot < hSpatParamRendCom->subframe_nbslots[subframe]; slot++ )
     {
         FOR( ch = 0; ch < numInChannels; ch++ )
@@ -861,7 +857,6 @@ static void ivas_dirac_dec_binaural_internal_fx(
             }
         }
     }
-    pop_wmops(); /*push_wmops( "IDRBI CLDFB ANALYSYS" );*/
 
     test();
     IF( EQ_32( config_data.ivas_format, SBA_FORMAT ) || EQ_32( config_data.ivas_format, SBA_ISM_FORMAT ) )
@@ -926,9 +921,7 @@ static void ivas_dirac_dec_binaural_internal_fx(
     }
 
     test();
-    push_wmops( "IDRBI cov matrices (IDRBCM)" );
     ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, Rmat_fx, subframe, hCombinedOrientationData && hCombinedOrientationData->enableCombinedOrientation[hCombinedOrientationData->subframe_idx] > 0, st_ivas->hMasaIsmData, q_inp );
-    pop_wmops(); /*push_wmops( "IDRBI cov matrices (IDRBCM)" );*/
 
     IF( EQ_32( config_data.ivas_format, ISM_FORMAT ) )
     {
@@ -966,9 +959,7 @@ static void ivas_dirac_dec_binaural_internal_fx(
         move16();
     }
 
-    push_wmops( "IDRBI proc matrices (IRDBI pm)" );
     ivas_dirac_dec_binaural_determine_processing_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, max_band_decorr, Rmat_fx, subframe, hCombinedOrientationData && hCombinedOrientationData->enableCombinedOrientation[hCombinedOrientationData->subframe_idx] > 0, nchanSeparateChannels, st_ivas->hMasaIsmData );
-    pop_wmops(); /*push_wmops( "IDRBI proc matrices (IRDBI pm)" );*/
 
     q_inp = Q6;
     move16();
@@ -1014,10 +1005,8 @@ static void ivas_dirac_dec_binaural_internal_fx(
     hDiracDecBin->q_processMtxDecPrev = q_mat;
     move16();
 
-    push_wmops( "IDRBI processOutput" );
     ivas_dirac_dec_binaural_process_output_fx( hDiracDecBin, hSpatParamRendCom, st_ivas->cldfbSynDec, output_fx, &q_out, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, q_inp, max_band_decorr, numInChannels, config_data.processReverb, subframe, q_mat );
-    pop_wmops(); /*push_wmops( "IDRBI processOutput" );
-                  */
+
     hDiracDecBin->hDiffuseDist = NULL;
 
     hSpatParamRendCom->slots_rendered = add( hSpatParamRendCom->slots_rendered, hSpatParamRendCom->subframe_nbslots[subframe] );
@@ -1151,7 +1140,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
 
     nBins = hSpatParamRendCom->num_freq_bands; /* Actually bins */
     move16();
-    push_wmops( "IDRBCM inits" );
+
     q_earlyPartEneCorrection = s_min( Q31, add( getScaleFactor32( hDiracDecBin->earlyPartEneCorrection_fx, nBins ), hDiracDecBin->q_earlyPartEneCorrection ) );
     scale_sig32( hDiracDecBin->earlyPartEneCorrection_fx, nBins, sub( q_earlyPartEneCorrection, hDiracDecBin->q_earlyPartEneCorrection ) );
     hDiracDecBin->q_earlyPartEneCorrection = q_earlyPartEneCorrection;
@@ -1185,7 +1174,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
         gainCache[idx].azi = -1000; /* Use -1000 as value for uninitialized cache. */
         move16();
     }
-    pop_wmops(); /*push_wmops( "IDRBCM inits" );*/
 
     /* Determine EQ for low bit rates (13.2 and 16.4 kbps) */
     applyLowBitRateEQ = 0;
@@ -1198,13 +1186,11 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
         move16();
         IF( EQ_32( ivas_total_brate, IVAS_16k4 ) )
         {
-            push_wmops( "IDRBCM Determine EQ_low_rates" );
             FOR( bin = 0; bin < LOW_BIT_RATE_BINAURAL_EQ_BINS; bin++ )
             {
                 lowBitRateEQ_fx[bin + LOW_BIT_RATE_BINAURAL_EQ_OFFSET] = L_add( L_shr( lowBitRateBinauralEQ_fx[bin], 1 ), ONE_IN_Q30 ); // Q31
                 move32();
             }
-            pop_wmops(); /*push_wmops( "IDRBCM Determine EQ_low_rates" );*/
         }
         ELSE
         {
@@ -1223,7 +1209,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
 
     exp = sub( 63, shl( q, 1 ) ); // exp for the energy (inRe_fx * inRe_fx + inIm_fx * inIm_fx) computed below
 
-    push_wmops( "IDRBCM input Matrix" );
     /* Calculate input covariance matrix */
     FOR( slot = 0; slot < hSpatParamRendCom->subframe_nbslots[subframe]; slot++ )
     {
@@ -1258,9 +1243,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
             move32();
         }
     }
-    pop_wmops(); /*push_wmops( "IDRBCM input Matrix" );*/
 
-    push_wmops( "IDRBCM apply EQ_low" );
     /* Apply EQ at low bit rates */
     IF( applyLowBitRateEQ != 0 )
     {
@@ -1313,9 +1296,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
             }
         }
     }
-    pop_wmops(); /*push_wmops( "IDRBCM apply EQ_low" );*/
 
-    push_wmops( "IDRBCM target matrix (IDRBCMtm)" );
     /* Determine target covariance matrix containing target binaural properties */
     FOR( bin = 0; bin < nBins; bin++ )
     {
@@ -1341,7 +1322,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
         meanEnePerCh_fx = Mpy_32_32( hDiracDecBin->earlyPartEneCorrection_fx[bin], subFrameTotalEne_fx[bin] ); // Q( q_meanEnePerCh )
         q_meanEnePerCh = add( sub( q_earlyPartEneCorrection, subFrameTotalEne_e[bin] ), 1 );                   // q_earlyPartEneCorrection + 31 - subFrameTotalEne_e[bin] - 31 + Q1(0.5f)
         /* Determine direct part target covariance matrix (for 1 or 2 directions) */
-        push_wmops( "IDRBCMtm LOOP1" );
         FOR( dirIndex = 0; dirIndex < hSpatParamRendCom->numSimultaneousDirections; dirIndex++ )
         {
             Word16 aziDeg, eleDeg;
@@ -1624,7 +1604,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
             move32();
             move32();
         }
-        pop_wmops(); // push_wmops( "IDRBCMtm LOOP1" );
 
         /* Add diffuse / ambient part covariance matrix */
         diffuseness_fx = L_max( 0, diffuseness_fx );               // Q30
@@ -1699,7 +1678,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
         hDiracDecBin->frameMeanDiffuseness_fx[bin] = L_shl( frameMeanDiffuseness, sub( exp, 2 ) ); // Q29
         move32();
     }
-    pop_wmops(); /*push_wmops( "IDRBCM target matrix (IDRBCMtm)" );;*/
 
     test();
     /* Temporal IIR-type smoothing of covariance matrices. Also apply encoding quality based smoothing factor. */
@@ -1865,7 +1843,6 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
         move16();
     }
 
-    push_wmops( "IRDBI pm LOOP1 (IDRBI pm LOOP1)" );
     FOR( bin = 0; bin < nBins; bin++ )
     {
         Word32 tmpMtxRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], tmpMtxIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], resultMtxRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], resultMtxIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], gain_fx;
@@ -1889,7 +1866,6 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
         q_CrEne = Q31;
         move16();
 
-        push_wmops( "IDRBI pm LOOP1 sec A (formulate2x2MixingMatrix)" );
         IF( GT_16( hDiracDecBin->ChEne_e[0][bin], hDiracDecBin->ChEne_e[1][bin] ) )
         {
             hDiracDecBin->ChEne_fx[1][bin] = L_shr( hDiracDecBin->ChEne_fx[1][bin], sub( hDiracDecBin->ChEne_e[0][bin], hDiracDecBin->ChEne_e[1][bin] ) );
@@ -1959,9 +1935,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
                                      hDiracDecBin->ChCrossReOut_fx[bin], hDiracDecBin->ChCrossImOut_fx[bin],
                                      hDiracDecBin->q_ChCrossOut,
                                      prototypeMtx_fx, Mre_fx, Mim_fx, &q_M, hDiracDecBin->reqularizationFactor_fx );
-        pop_wmops(); /*push_wmops( "IDRBI pm LOOP1 sec A (formulate2x2MixingMatrix)" );*/
 
-        push_wmops( "IDRBI pm LOOP1 sec B" );
         IF( LT_16( hDiracDecBin->q_ChEne, hDiracDecBin->q_ChCross ) )
         {
             CxRe_fx[0][0] = hDiracDecBin->ChEne_fx[0][bin];
@@ -2015,13 +1989,9 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
 #endif
             resultMtxRe_fx, resultMtxIm_fx, &q_res );
 
-        pop_wmops(); /*push_wmops( "IDRBI pm LOOP1 sec B" );*/
-
         /* When below the frequency limit where decorrelation is applied, we inject the decorrelated
          * residual (or missing) signal component. The procedure is active when there are not enough independent
          * signal energy to synthesize a signal with the target covariance matrix from the non-decorrelated signals */
-
-        push_wmops( "IDRBI pm LOOP1 sec C" );
         IF( LT_16( bin, max_band_decorr ) )
         {
             Word32 decorrelationReductionFactor_fx;
@@ -2137,9 +2107,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
             q_Mdec = Q31;
             move16();
         }
-        pop_wmops(); /*push_wmops( "IDRBI pm LOOP1 sec C" );*/
 
-        push_wmops( "IDRBI pm LOOP1 sec D" );
         /* The regularizations at determining mixing matrices cause signal energy to be lost to some degree, which is compensated for here */
         tmp1 = L_add( CrEneL_fx, CrEneR_fx );
         exp = sub( get_min_scalefactor( resultMtxRe_fx[0][0], resultMtxRe_fx[1][1] ), 2 );
@@ -2274,10 +2242,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
         move16();
         q_processMtxDec[bin] = sub( q_Mdec, 16 );
         move16();
-        pop_wmops(); /*push_wmops( "IDRBI pm LOOP1 sec D" );*/
-
 
-        push_wmops( "IDRBI pm LOOP1 sec E" );
         IF( separateCenterChannelRendering )
         {
             /* The rendering of the separate center channel in masa + mono mode.
@@ -2367,10 +2332,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
                 }
             }
         }
-        pop_wmops(); /*push_wmops( "IDRBI pm LOOP1 sec E" );*/
     }
-    pop_wmops(); /*push_wmops( "IRDBI pm LOOP1 (IDRBI pm LOOP1)" );*/
-
     /* Aligning Q-factors of all bins in the processing matrices to a common Q-factor */
     minimum_s( q_processMtx, nBins, &hDiracDecBin->q_processMtx );
     minimum_s( q_processMtxPrev, nBins, &hDiracDecBin->q_processMtxPrev );
@@ -2390,7 +2352,6 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
     minimum_s( q_processMtxDec, nBins, &hDiracDecBin->q_processMtxDec );
     minimum_s( q_processMtxDecPrev, nBins, &hDiracDecBin->q_processMtxDecPrev );
 
-    push_wmops( "IRDBI pm LOOP2" );
     FOR( bin = 0; bin < nBins; bin++ )
     {
         FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
@@ -2430,7 +2391,6 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
             }
         }
     }
-    pop_wmops(); /*push_wmops( "IRDBI pm LOOP2" );*/
 
     return;
 }
@@ -3393,6 +3353,7 @@ static void eig2x2_fx(
             q_tmp2 = sub( 31, q_tmp2 );
 
             tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 );
+
 #if 1
             tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp );
             exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) );
@@ -3464,7 +3425,6 @@ static void eig2x2_fx(
             tmp2 = Mpy_32_32( s_fx, s_fx );
             q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 );
 
-
             tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 );
             q_tmp2 = sub( 31, q_tmp2 );
 
@@ -3533,7 +3493,7 @@ static void eig2x2_fx(
             move16();
         }
     }
-#ifdef FIX_1326_SPEEDUP_18
+#ifdef FIX_1326_SPEEDUP_eig2x2_fx
     if ( q_U_1 != 0 )
     {
         *q_U = q_U_1;
@@ -4455,12 +4415,9 @@ static void formulate2x2MixingMatrix_fx(
     Cout_im = Mpy_32_32( Cout_im, maxEneDiv_fx );
     q_cout = sub( add( q_cout, q_maxEneDiv ), 31 );
 
-    push_wmops( "formulate2x2MixingMatrix cholesky" );
     /* Cholesky decomposition of target / output covariance matrix */
     chol2x2_fx( E_out1, E_out2, q_eout, Cout_re, Cout_im, q_cout, KyRe_fx, KyIm_fx, &q_ky );
-    pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix cholesky" );*/
 
-    push_wmops( "formulate2x2MixingMatrix Eigendecomp" );
     /* Eigendecomposition of input covariance matrix */
     eig2x2_fx( E_in1, E_in2, q_ein, Cin_re, Cin_im, q_cin, Uxre_fx, Uxim_fx, &q_Ux, Sx_fx, &q_Sx );
 
@@ -4478,9 +4435,7 @@ static void formulate2x2MixingMatrix_fx(
     move32();
 
     matrixDiagMul_fx( Uxre_fx, Uxim_fx, q_Ux, Sx_fx, q_Sx, Kxre_fx, Kxim_fx, &q_Kx );
-    pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Eigendecomp" );*/
 
-    push_wmops( "formulate2x2MixingMatrix RegSMInv" );
     /* Regularize the diagonal Sx for matrix inversion */
     Sx_fx[0] = L_max( L_shr( Sx_fx[0], 1 ), Mpy_32_16_1( Sx_fx[1], regularizationFactor_fx ) );
     Sx_fx[1] = L_max( L_shr( Sx_fx[1], 1 ), L_shl( Mpy_32_16_1( Sx_fx[0], regularizationFactor_fx ), 1 ) );
@@ -4570,7 +4525,6 @@ static void formulate2x2MixingMatrix_fx(
     move32();
     Ghat_fx[1] = L_shr( Ghat_fx[1], sub( sub( 31, exp1 ), q_Ghat ) ); // q_Ghat
     move32();
-    pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix RegSMInv" );*/
 
     /* Matrix multiplication, tmp = Ky' * G_hat * Q */
     FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
@@ -4603,17 +4557,12 @@ static void formulate2x2MixingMatrix_fx(
     /* A = Ky' * G_hat * Q * Kx (see publication) */
     matrixMul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Kxre_fx, Kxim_fx, &q_Kx, Are_fx, Aim_fx, &q_A );
 
-    push_wmops( "formulate2x2MixingMatrix nrst orthonrm PtoA (oPtoA)" );
     /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx
        For matrix A that is P = A(A'A)^0.5 */
-    push_wmops( "oPtoA MT1M" );
     matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );
 
     eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );
 
-    pop_wmops(); /*push_wmops( "oPtoA MT1M" );*/
-
-
     IF( D_fx[0] == 0 )
     {
 #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
@@ -4634,7 +4583,7 @@ static void formulate2x2MixingMatrix_fx(
     div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
     move32();
 
-#ifdef FIX_1326_SPEEDUP_08
+#ifdef FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT
     //Sqrt(1)
     div_fx[1] = L_add( 0, 2047986068 ); // Q = 31 - exp1
     exp1 = add( 0, 20 ); 
@@ -4760,9 +4709,7 @@ static void formulate2x2MixingMatrix_fx(
                          0 /*int Bscale*/,
 #endif
                          Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */
-    pop_wmops();                                 /*push_wmops( "formulate2x2MixingMatrix nrst orthonrm PtoA (oPtoA)" );*/
 
-    push_wmops( "formulate2x2MixingMatrix Ky P Kx^-1" );
     /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */
 #if ( BINAURAL_CHANNELS != 2 )
     FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
@@ -4916,8 +4863,6 @@ static void formulate2x2MixingMatrix_fx(
                          0 /*int Bscale*/,
 #endif
                          Mre_fx, Mim_fx, q_M );
-    pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Ky P Kx^-1" );*/
-
     return;
 }
 
-- 
GitLab


From cfab49b3830826388292f9fe62787a1c773e620c Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Wed, 19 Mar 2025 16:44:16 +0100
Subject: [PATCH 38/41] apply clang format patch

---
 lib_rend/ivas_dirac_dec_binaural_functions_fx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index e8052b08c..be86281ef 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -4584,9 +4584,9 @@ static void formulate2x2MixingMatrix_fx(
     move32();
 
 #ifdef FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT
-    //Sqrt(1)
+    // Sqrt(1)
     div_fx[1] = L_add( 0, 2047986068 ); // Q = 31 - exp1
-    exp1 = add( 0, 20 ); 
+    exp1 = add( 0, 20 );
 
     IF( D_fx[1] != 0 ) // This is the new code: replace div sqrt by isqrt
     {
-- 
GitLab


From 5f45790784cd617c8a35309386167a8b4ff6a65b Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Thu, 20 Mar 2025 07:55:08 +0000
Subject: [PATCH 39/41] revert:push_wmops: renamed label

---
 lib_dec/ivas_jbm_dec_fx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib_dec/ivas_jbm_dec_fx.c b/lib_dec/ivas_jbm_dec_fx.c
index ce60c0d65..1918ce3f3 100644
--- a/lib_dec/ivas_jbm_dec_fx.c
+++ b/lib_dec/ivas_jbm_dec_fx.c
@@ -1875,7 +1875,7 @@ ivas_error ivas_jbm_dec_render_fx(
     move16();
     SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom;
 
-    push_wmops( "ivas_dec_render (IDR)" );
+    push_wmops( "ivas_dec_render" );
     /*----------------------------------------------------------------*
      * Initialization of local vars after struct has been set
      *----------------------------------------------------------------*/
-- 
GitLab


From 7ab17f0711234491d62a812cbb9755d5152bfc71 Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Thu, 20 Mar 2025 09:52:45 +0100
Subject: [PATCH 40/41] introduce FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT_2

---
 .../ivas_dirac_dec_binaural_functions_fx.c    | 25 ++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index be86281ef..141fe8c4b 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -29,7 +29,7 @@
     the United Nations Convention on Contracts on the International Sales of Goods.
 
 *******************************************************************************************************/
-
+#define FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT_2
 #include <stdint.h>
 #include "options.h"
 #include <assert.h>
@@ -4563,6 +4563,28 @@ static void formulate2x2MixingMatrix_fx(
 
     eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );
 
+
+#ifdef FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT_2
+    IF( D_fx[0] == 0 )
+    {
+#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
+        temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp ); // 4611686 = 1e-12 in Q62
+        exp = sub( exp, sub( Q30, 62 ) );
+#else
+        temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */
+        move32();
+        exp = ONE_DIV_EPSILON_EXP;
+        move16();
+#endif
+        div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
+        move32();
+    }
+    ELSE
+    {
+        exp = sub( 31, q_D );
+        div_fx[0] = ISqrt32( D_fx[0], &exp );
+    }
+#else
     IF( D_fx[0] == 0 )
     {
 #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
@@ -4582,6 +4604,7 @@ static void formulate2x2MixingMatrix_fx(
     }
     div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
     move32();
+#endif /*FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT_2*/
 
 #ifdef FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT
     // Sqrt(1)
-- 
GitLab


From 49363e47e693b6ea13b06be69fae0947b1c0d33c Mon Sep 17 00:00:00 2001
From: Fabian Bauer <fabian.bauer@iis-extern.fraunhofer.de>
Date: Thu, 20 Mar 2025 10:40:35 +0100
Subject: [PATCH 41/41] Revert "introduce
 FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT_2"

This reverts commit 18d10e60341ba2cd76c8161c396c02a8e7293290.
---
 .../ivas_dirac_dec_binaural_functions_fx.c    | 25 +------------------
 1 file changed, 1 insertion(+), 24 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 141fe8c4b..be86281ef 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -29,7 +29,7 @@
     the United Nations Convention on Contracts on the International Sales of Goods.
 
 *******************************************************************************************************/
-#define FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT_2
+
 #include <stdint.h>
 #include "options.h"
 #include <assert.h>
@@ -4563,28 +4563,6 @@ static void formulate2x2MixingMatrix_fx(
 
     eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );
 
-
-#ifdef FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT_2
-    IF( D_fx[0] == 0 )
-    {
-#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
-        temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp ); // 4611686 = 1e-12 in Q62
-        exp = sub( exp, sub( Q30, 62 ) );
-#else
-        temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */
-        move32();
-        exp = ONE_DIV_EPSILON_EXP;
-        move16();
-#endif
-        div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
-        move32();
-    }
-    ELSE
-    {
-        exp = sub( 31, q_D );
-        div_fx[0] = ISqrt32( D_fx[0], &exp );
-    }
-#else
     IF( D_fx[0] == 0 )
     {
 #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
@@ -4604,7 +4582,6 @@ static void formulate2x2MixingMatrix_fx(
     }
     div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
     move32();
-#endif /*FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT_2*/
 
 #ifdef FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT
     // Sqrt(1)
-- 
GitLab