From bb3eca924e0e62a576dc0c0131891e70a5cc2434 Mon Sep 17 00:00:00 2001
From: Arthur <Arthur.tritthart@iis.fraunhofer.de>
Date: Fri, 29 Nov 2024 16:41:54 +0100
Subject: [PATCH 01/14] Changes for IVAS-BASOP Ticket 1009: Complexity: High
 Complexity Overhead for ParamISM decoding to binaural

lib_com/basop_util.c:
Tuned instrumentation of 32-Bit division routine, now 24 bit accuracy
instead of 32 bit (cadence version)

lib_com/fft_fx.c:
Tuned instrumentation of small helper functions get_min_scalefactor and
L_norm_arr

lib_com/options.h:
Defined a macro for this fix 1009. It is only used in binaural renderer
for simplifiying divisions (32x32).

lib_com/tools.c:
Tuned instrumentation of small helper functions s_minimum etc.

lib_rend/ivas_dirac_dec_binaural_functions.c:
Defined precalculated values for EPSILON with full precision
Replaced division by constants by multiplications
Replaced square root of a division (division + sqrt) by ISqrt32+Mul
Simplified all matrix multiplication functions

Best regards
Arthur Tritthart, Fraunhofer IIS, 29-NOV-2024
---
 lib_com/basop_util.c                         |  69 +++
 lib_com/fft_fx.c                             |  22 +
 lib_com/options.h                            |   6 +-
 lib_com/tools.c                              |  15 +-
 lib_rend/ivas_dirac_dec_binaural_functions.c | 506 +++++++++----------
 5 files changed, 329 insertions(+), 289 deletions(-)

diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c
index c465428fc..79d57198f 100644
--- a/lib_com/basop_util.c
+++ b/lib_com/basop_util.c
@@ -1038,8 +1038,76 @@ Word32 div_w( Word32 L_num, Word32 L_den )
     }
 }
 
+Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bits)
+{
+    Word32 z;
+    Word16 sx;
+    Word16 sy;
+    Word32 sign;
+    Word16 iteration;
+    Flag   Carry;
+    Word16 s_val;
+
+    unset_carry(&Carry);
+
+    /* assert (x >= (Word32)0); */
+    assert( y != (Word32) 0 );
+
+    IF( x == (Word32) 0 )
+    {
+        *s = -31;
+        move16();
+        return ( (Word32) 0 );
+    }
+
+    sign = L_shr(L_xor(x,y), 31);
+
+    sx = norm_l( x );
+    x = L_shl( x, sx );
+    x = L_shr( x, 1 );
+    s_val = sub( 1, sx );
+    if( x < 0 )
+    {
+        x = L_negate( x );
+    }
+
+    sy = norm_l( y );
+    y = L_shl( y, sy );
+    y = L_shr( y, 1 );
+    s_val = add(s_val, sy );
+    if( y >= 0 )
+    {
+        y = L_negate( y );
+    }
+
+    *s = s_val;
+    move16();
+
+    z = L_sub(x, x);   // z = 0
+
+    for ( iteration = (Word16) 0; iteration < (Word16) bits; iteration++ )
+    {
+        if ( L_add(x, y) >= 0 )
+        {
+            x = DEPR_L_add_c(x, y, &Carry);  // sets always carry=1
+        }
+        z = DEPR_L_add_c( z, z, &Carry );    // sets always carry=0
+        x = L_add(x, x);
+    }
+
+    if ( sign != 0 )
+    {
+        z = L_negate( z );
+    }
+    return L_shl(z, sub(31, bits));
+}
+
+
 Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
 {
+#if 1
+    return BASOP_Util_Divide3232_Scale_FhG(x,y,s,24);
+#else
     Word32 z;
     Word16 sx;
     Word16 sy;
@@ -1088,6 +1156,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
     }
 
     return z;
+#endif
 }
 
 Word16 BASOP_Util_Divide3232_Scale( Word32 x, Word32 y, Word16 *s )
diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c
index 6b8b49bd2..3e664fb56 100644
--- a/lib_com/fft_fx.c
+++ b/lib_com/fft_fx.c
@@ -7299,15 +7299,26 @@ Word16 L_norm_arr( Word32 *arr, Word16 size )
     Word16 q = 31;
     move16();
     FOR( Word16 i = 0; i < size; i++ )
+#if 0
     IF( arr[i] != 0 )
     {
         q = s_min( q, norm_l( arr[i] ) );
     }
+#else
+    {
+        Word16 q_tst;
+        q_tst = norm_l(arr[i]);
+        if (arr[i] != 0)
+           q = s_min(q, q_tst);
+    }
+    
+#endif
     return q;
 }
 
 Word16 get_min_scalefactor( Word32 x, Word32 y )
 {
+#if 0
     Word16 scf = Q31;
     move16();
     test();
@@ -7324,6 +7335,16 @@ Word16 get_min_scalefactor( Word32 x, Word32 y )
         scf = s_min( scf, norm_l( y ) );
     }
     return scf;
+#else
+    Word16 scf = Q31;
+    Word16 scf_y;
+    if (x != 0)
+        scf = norm_l( x );
+    scf_y = norm_l( y );
+    if (y != 0)
+        scf = s_min(scf_y, scf);
+    return scf;
+#endif
 }
 
 Flag is_zero_arr( Word32 *arr, Word16 size )
@@ -7335,4 +7356,5 @@ Flag is_zero_arr( Word32 *arr, Word16 size )
     }
 
     return 1;
+
 }
diff --git a/lib_com/options.h b/lib_com/options.h
index 095c183e8..9c09f9cbf 100644
--- a/lib_com/options.h
+++ b/lib_com/options.h
@@ -54,7 +54,7 @@
 
 #define SUPPORT_JBM_TRACEFILE                   /* Support for JBM tracefile, which is needed for 3GPP objective/subjective testing, but not relevant for real-world implementations */
 
-/*#define WMOPS*/                                   /* Activate complexity and memory counters */
+#define WMOPS                                   /* Activate complexity and memory counters */
 #ifdef WMOPS
 /*#define WMOPS_PER_FRAME*/                     /* Output per-frame complexity (writes one float value per frame to the file "wmops_analysis") */
 /*#define MEM_COUNT_DETAILS*/                   /* Output detailed memory analysis for the worst-case frame (writes to the file "mem_analysis.csv") */
@@ -196,6 +196,10 @@
 #define FIX_953_WRONG_ENERGY_RATIO_MASA_EXT     /* Nok: Fix 953 wrong energy ratio value after shift and cast to Word8 */
 #define FIX_982_WRONG_DECODED_ENERGY_RATIO      /* Nokia: Fix 982 wrong energy in EXT mode and in second direction when present */
 #define FIX_999_WRONG_ISM_EXTENDED_METADATA     /* VA: fix 999: fix ISM extended metadata decoding */
+
+#define FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC   /* FhG: Reduce workload of binaural rendering: replace 1./tmp & sqrt by Isqrt32 */
+                                                /*      Replace computations with constants by setting of constants */
+                                                /*      Simplify matrix multiplications and some external helper routines */
 /* ################## End DEVELOPMENT switches ######################### */
 
 /* clang-format on */
diff --git a/lib_com/tools.c b/lib_com/tools.c
index 072cfa767..e4d5914e6 100644
--- a/lib_com/tools.c
+++ b/lib_com/tools.c
@@ -917,30 +917,23 @@ Word16 minimum_s(
     Word16 *min_val    /* o  : minimum value in the input vector */
 )
 {
-    Word16 i, ind, tmp;
-
+    Word16 i, ind;
     ind = 0;
     move16();
-    tmp = vec[0];
-    move16();
 
     FOR( i = 1; i < lvec; i++ )
     {
-        IF( LT_16( vec[i], tmp ) )
+        if( LT_16( vec[i], vec[ind] ) )
         {
-            ind = i;
-            move16();
-            tmp = vec[i];
-            move16();
+            ind = add(i, 0);
         }
     }
 
     if ( min_val != NULL )
     {
-        *min_val = tmp;
+        *min_val = vec[ind];
         move16();
     }
-
     return ind;
 }
 #else
diff --git a/lib_rend/ivas_dirac_dec_binaural_functions.c b/lib_rend/ivas_dirac_dec_binaural_functions.c
index 6ff9685bf..32c266c6f 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions.c
@@ -79,8 +79,11 @@ Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
 #define LOG_10_BASE_2_Q29          1783446528 // Q29
 #define TAN_30_FX                  17157      // Q15
 #define INV_TAN30_FX               28377      // Q14
-#define EPSILON_MANT               1180591621 /* 1e-12 in Q70 */
+#define EPSILON_MANT               1180591621 /* 1e-12 = 0,5497558*(2^-39) in Q70 */
 #define EPSILON_EXP                ( -39 )
+#define ONE_DIV_EPSILON_MANT       1953125000 /* 1e+12 = 0,9094947*(2^40) */
+#define ONE_DIV_EPSILON_EXP        ( 40 )
+
 #endif
 #define ADAPT_HTPROTO_ROT_LIM_1 0.8f
 
@@ -866,10 +869,12 @@ void ivas_dirac_dec_binaural_render_fx(
     }
 
     output_length = 0;
+
     move16();
     FOR( subframe_idx = first_sf; subframe_idx < last_sf; subframe_idx++ )
     {
         Word16 n_samples_sf = imult1616( slot_size, hSpatParamRendCom->subframe_nbslots[subframe_idx] );
+
         ivas_dirac_dec_binaural_internal_fx( st_ivas, st_ivas->hCombinedOrientationData, output_fx_local, nchan_transport, subframe_idx );
 
         FOR( ch = 0; ch < nchan_out; ch++ )
@@ -1635,7 +1640,6 @@ static void ivas_dirac_dec_binaural_internal_fx(
         st_ivas->cldfbSynDec[ch]->Q_cldfb_state = Q11;
         move16();
     }
-
     return;
 }
 #endif
@@ -2385,7 +2389,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
             move32();
         }
     }
-
     /* Apply EQ at low bit rates */
     IF( applyLowBitRateEQ != 0 )
     {
@@ -2402,7 +2405,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
             move32();
         }
     }
-
     test();
     test();
     IF( ( EQ_32( ivas_format, SBA_FORMAT ) || EQ_32( ivas_format, SBA_ISM_FORMAT ) ) && EQ_16( nchan_transport, 2 ) )
@@ -2438,7 +2440,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
             }
         }
     }
-
     /* Determine target covariance matrix containing target binaural properties */
     FOR( bin = 0; bin < nBins; bin++ )
     {
@@ -2534,7 +2535,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
                 move16();
                 gainCacheBaseIndex = add( 6, ismDirIndex );
             }
-
             diffuseness_fx = L_sub( diffuseness_fx, ratio_fx ); /* diffuseness = 1 - ratio1 - ratio2 */
 
             if ( diffuseness_fx < 0 )
@@ -2579,9 +2579,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
                 altSpreadCoh_fx = sub( 32767, shl_sat( div_s( numr, denr ), sub( den_e, num_e ) ) ); // 4289 = pi/6 in Q13
                 spreadCoh_fx = s_max( spreadCoh_fx, altSpreadCoh_fx );
             }
-
             getDirectPartGains_fx( bin, aziDeg, eleDeg, &lRealp_fx, &lImagp_fx, &rRealp_fx, &rImagp_fx, hDiracDecBin->renderStereoOutputInsteadOfBinaural, Rmat_fx, &gainCache[gainCacheBaseIndex], isHeadtracked );
-
             Word16 q_lr = Q28;
             move16();
             if ( hDiracDecBin->renderStereoOutputInsteadOfBinaural )
@@ -2591,7 +2589,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
                 spreadCoh_fx = 0;
                 move32();
             }
-
             IF( spreadCoh_fx > 0 )
             {
                 Word32 centerMul_fx, sidesMul_fx;
@@ -2644,7 +2641,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
 
                 /* Apply the gain for the right source of the three coherent sources.
                  * -30 degrees to 330 wrapping due to internal functions. */
-
                 getDirectPartGains_fx( bin, aziDeg + 330, eleDeg, &lRealpTmp_fx, &lImagpTmp_fx, &rRealpTmp_fx, &rImagpTmp_fx, hDiracDecBin->renderStereoOutputInsteadOfBinaural, Rmat_fx, &gainCache[gainCacheBaseIndex + 2], isHeadtracked );
 
                 hrtfEneSides_fx = L_add( hrtfEneSides_fx,
@@ -2666,7 +2662,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
                 eneCorrectionFactor_fx = BASOP_Util_Divide3232_Scale( L_add( Mpy_32_32( hrtfEneSides_fx, Mpy_32_32( sidesMul_fx, sidesMul_fx ) ),
                                                                              Mpy_32_32( hrtfEneCenter_fx, Mpy_32_32( centerMul_fx, centerMul_fx ) ) ),
                                                                       L_max( 1, hrtfEneRealized_fx ), &eneCorrectionFactor_e );
-
                 /* Weighting factors to determine appropriate target spectrum for spread coherent sound */
                 IF( LT_16( spreadCoh_fx, 16384 ) )
                 {
@@ -2723,7 +2718,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
                 q_lr = Q23;
                 move16();
             }
-
             hrtfEne_fx[0] = L_add( Mpy_32_32( lRealp_fx, lRealp_fx ), Mpy_32_32( lImagp_fx, lImagp_fx ) ); // Q( 2*q_lr - 31 )
             hrtfEne_fx[1] = L_add( Mpy_32_32( rRealp_fx, rRealp_fx ), Mpy_32_32( rImagp_fx, rImagp_fx ) ); // Q( 2*q_lr - 31 )
             move32();
@@ -2808,7 +2802,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
             }
             move32();
         }
-
         /* Store parameters for formulating average diffuseness over frame */
         Word32 frameMeanDiffuseness = BASOP_Util_Add_Mant32Exp( hDiracDecBin->frameMeanDiffuseness_fx[bin], 2 /*Q29*/, diffEneValForDecorrelationReduction_fx, sub( 31, q_diffEneValForDecorrelationReduction ), &exp1 ); // exp = exp1
         frameMeanDiffusenessEneWeight_fx[bin] = L_add( frameMeanDiffusenessEneWeight_fx[bin], meanEnePerCh_fx );
@@ -2820,7 +2813,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
         hDiracDecBin->frameMeanDiffuseness_fx[bin] = L_shl( frameMeanDiffuseness, sub( exp, 2 ) ); // Q29
         move32();
     }
-
     test();
     /* Temporal IIR-type smoothing of covariance matrices. Also apply encoding quality based smoothing factor. */
     IF( EQ_32( ivas_format, MASA_FORMAT ) && LT_32( ivas_total_brate, MASA_STEREO_MIN_BITRATE ) )
@@ -2923,7 +2915,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
             move16();
         }
     }
-
     return;
 }
 #endif
@@ -3155,7 +3146,6 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices(
             }
         }
     }
-
     return;
 }
 #else
@@ -3182,6 +3172,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
     Word32 ivas_total_brate;
     Word16 nchan_transport;
     Word16 exp;
+
     Word16 q_processMtx[CLDFB_NO_CHANNELS_MAX], q_processMtxPrev[CLDFB_NO_CHANNELS_MAX];
     Word16 q_processMtx_SCCR[CLDFB_NO_CHANNELS_MAX], q_processMtxPrev_SCCR[CLDFB_NO_CHANNELS_MAX];
     Word16 q_processMtxDec[CLDFB_NO_CHANNELS_MAX], q_processMtxDecPrev[CLDFB_NO_CHANNELS_MAX];
@@ -3299,7 +3290,6 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
         }
         move32();
         move16();
-
         formulate2x2MixingMatrix_fx( hDiracDecBin->ChEne_fx[0][bin], hDiracDecBin->ChEne_fx[1][bin],
                                      hDiracDecBin->q_ChEne,
                                      hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossIm_fx[bin],
@@ -3709,7 +3699,6 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
     move16();
     minimum_s( q_processMtxDec, nBins, &hDiracDecBin->q_processMtxDec );
     minimum_s( q_processMtxDecPrev, nBins, &hDiracDecBin->q_processMtxDecPrev );
-
     FOR( bin = 0; bin < nBins; bin++ )
     {
         FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
@@ -3749,7 +3738,6 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
             }
         }
     }
-
     return;
 }
 #endif
@@ -5082,7 +5070,6 @@ static void eig2x2_fx(
         move32();
         *q_U = Q31;
         move16();
-
         return;
     }
 
@@ -5099,7 +5086,6 @@ static void eig2x2_fx(
             move32();
             *q_U = Q30;
             move16();
-
             return;
         }
     }
@@ -5113,11 +5099,9 @@ static void eig2x2_fx(
             move32();
             *q_U = Q30;
             move16();
-
             return;
         }
     }
-
     q_U_1 = 0;
     q_U_2 = 0;
     move16();
@@ -5143,6 +5127,7 @@ static void eig2x2_fx(
 
         IF( GT_32( L_abs( tmp2 ), L_abs( tmp1 ) ) )
         {
+
             s_fx = tmp2;
             move32();
             exp = sub( norm_l( s_fx ), 1 );
@@ -5153,12 +5138,15 @@ static void eig2x2_fx(
             q_tmp2 = sub( 31, q_tmp2 );
 
             tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 );
-
+#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
             tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp );
             exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) );
             normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2
             q_tmp2 = sub( 31, exp );
-
+#else
+            normVal_fx = ISqrt32(tmp3, &exp_tmp3);
+            q_tmp2 = sub(31, exp_tmp3);
+#endif
             IF( LT_16( q_tmp1, q_c ) )
             {
                 c_re = L_shr( c_re, sub( q_c, q_tmp1 ) );
@@ -5222,12 +5210,15 @@ static void eig2x2_fx(
             q_tmp2 = sub( 31, q_tmp2 );
 
             tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 );
-
+#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
             tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp );
             exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) );
             normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2
             q_tmp2 = sub( 31, exp );
-
+#else
+            normVal_fx = ISqrt32(tmp3, &exp_tmp3);
+            q_tmp2 = sub(31, exp_tmp3);
+#endif
             IF( LT_16( q_tmp1, q_c ) )
             {
                 c_re = L_shr( c_re, sub( q_c, q_tmp1 ) );
@@ -5279,7 +5270,6 @@ static void eig2x2_fx(
             move16();
         }
     }
-
     IF( q_U_1 != 0 )
     *q_U = q_U_1;
     ELSE
@@ -5388,9 +5378,6 @@ static void matrixMul_fx(
     Word16 chA, chB;
     Word16 min_q_shift1, min_q_shift2;
     Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS );
-#ifndef IVAS_ENH64_CADENCE_CHANGES
-    Word32 tmp1, tmp2;
-#endif
 
     min_q_shift1 = sub( s_min( L_norm_arr( Are_fx[0], size ), L_norm_arr( Aim_fx[0], size ) ), 1 );
     min_q_shift2 = sub( s_min( L_norm_arr( Bre_fx[0], size ), L_norm_arr( Bim_fx[0], size ) ), 1 );
@@ -5419,109 +5406,22 @@ static void matrixMul_fx(
             outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], W_extract_h( W_mac_32_32( W_mult_32_32( Are_fx[chA][0], Bim_fx[0][chB] ), Are_fx[chA][1], Bim_fx[1][chB] ) ) );
             move32();
 #else
-            test();
-            test();
-            test();
-            IF( ( Are_fx[chA][0] >= 0 && Bre_fx[0][chB] >= 0 ) || ( Are_fx[chA][0] < 0 && Bre_fx[0][chB] < 0 ) )
-            {
-                tmp1 = Mpy_32_32( Are_fx[chA][0], Bre_fx[0][chB] );
-            }
-            ELSE
-            {
-                tmp1 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][0] ), L_abs( Bre_fx[0][chB] ) ) );
-            }
-            test();
-            test();
-            test();
-            IF( ( Are_fx[chA][1] >= 0 && Bre_fx[1][chB] >= 0 ) || ( Are_fx[chA][1] < 0 && Bre_fx[1][chB] < 0 ) )
-            {
-                tmp2 = Mpy_32_32( Are_fx[chA][1], Bre_fx[1][chB] );
-            }
-            ELSE
-            {
-                tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][1] ), L_abs( Bre_fx[1][chB] ) ) );
-            }
-            outRe_fx[chA][chB] = L_add( tmp1, tmp2 );
-            move32();
-
-            test();
-            test();
-            test();
-            IF( ( Aim_fx[chA][0] >= 0 && Bim_fx[0][chB] >= 0 ) || ( Aim_fx[chA][0] < 0 && Bim_fx[0][chB] < 0 ) )
-            {
-                tmp1 = Mpy_32_32( Aim_fx[chA][0], Bim_fx[0][chB] );
-            }
-            ELSE
-            {
-                tmp1 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][0] ), L_abs( Bim_fx[0][chB] ) ) );
-            }
-            test();
-            test();
-            test();
-            IF( ( Aim_fx[chA][1] >= 0 && Bim_fx[1][chB] >= 0 ) || ( Aim_fx[chA][1] < 0 && Bim_fx[1][chB] < 0 ) )
-            {
-                tmp2 = Mpy_32_32( Aim_fx[chA][1], Bim_fx[1][chB] );
-            }
-            ELSE
-            {
-                tmp2 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][1] ), L_abs( Bim_fx[1][chB] ) ) );
-            }
-            outRe_fx[chA][chB] = L_sub( outRe_fx[chA][chB], L_add( tmp1, tmp2 ) );
-            move32();
-            test();
-            test();
-            test();
-            IF( ( Aim_fx[chA][0] >= 0 && Bre_fx[0][chB] >= 0 ) || ( Aim_fx[chA][0] < 0 && Bre_fx[0][chB] < 0 ) )
-            {
-                tmp1 = Mpy_32_32( Aim_fx[chA][0], Bre_fx[0][chB] );
-            }
-            ELSE
-            {
-                tmp1 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][0] ), L_abs( Bre_fx[0][chB] ) ) );
-            }
-            test();
-            test();
-            test();
-            IF( ( Aim_fx[chA][1] >= 0 && Bre_fx[1][chB] >= 0 ) || ( Aim_fx[chA][1] < 0 && Bre_fx[1][chB] < 0 ) )
-            {
-                tmp2 = Mpy_32_32( Aim_fx[chA][1], Bre_fx[1][chB] );
-            }
-            ELSE
-            {
-                tmp2 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][1] ), L_abs( Bre_fx[1][chB] ) ) );
-            }
-            outIm_fx[chA][chB] = L_add( tmp1, tmp2 );
+            outRe_fx[chA][chB] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[chA][0], Bre_fx[0][chB] ),
+                                                                                Are_fx[chA][1], Bre_fx[1][chB] ),
+                                                                                Aim_fx[chA][0], Bim_fx[0][chB] ),
+                                                                                Aim_fx[chA][1], Bim_fx[1][chB] );
             move32();
-
-            test();
-            test();
-            test();
-            IF( ( Are_fx[chA][0] >= 0 && Bim_fx[0][chB] >= 0 ) || ( Are_fx[chA][0] < 0 && Bim_fx[0][chB] < 0 ) )
-            {
-                tmp1 = Mpy_32_32( Are_fx[chA][0], Bim_fx[0][chB] );
-            }
-            ELSE
-            {
-                tmp1 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][0] ), L_abs( Bim_fx[0][chB] ) ) );
-            }
-            test();
-            test();
-            test();
-            IF( ( Are_fx[chA][1] >= 0 && Bim_fx[1][chB] >= 0 ) || ( Are_fx[chA][1] < 0 && Bim_fx[1][chB] < 0 ) )
-            {
-                tmp2 = Mpy_32_32( Are_fx[chA][1], Bim_fx[1][chB] );
-            }
-            ELSE
-            {
-                tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][1] ), L_abs( Bim_fx[1][chB] ) ) );
-            }
-            outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], L_add( tmp1, tmp2 ) );
+            outIm_fx[chA][chB] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[chA][0], Bre_fx[0][chB] ),
+                                                                                Aim_fx[chA][1], Bre_fx[1][chB] ),
+                                                                                Are_fx[chA][0], Bim_fx[0][chB] ),
+                                                                                Are_fx[chA][1], Bim_fx[1][chB] );
             move32();
 #endif /* #ifdef IVAS_ENH64_CADENCE_CHANGES */
         }
     }
     *q_out = sub( add( *q_A, *q_B ), 31 );
 
+
     move16();
     if ( L_and( is_zero_arr( outRe_fx[0], size ), is_zero_arr( outIm_fx[0], size ) ) )
     {
@@ -5571,77 +5471,20 @@ static void matrixTransp1Mul_fx(
 {
     Word16 chA, chB;
     Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS );
-    Word32 tmp1, tmp2;
 
     FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
     {
         FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
         {
-            test();
-            test();
-            test();
-            IF( ( ( ( Are_fx[0][chA] >= 0 ) && ( Bre_fx[0][chB] >= 0 ) ) || ( ( Are_fx[0][chA] < 0 ) && ( Bre_fx[0][chB] < 0 ) ) ) )
-            tmp1 = Mpy_32_32( Are_fx[0][chA], Bre_fx[0][chB] );
-            ELSE
-                tmp1 = L_negate( Mpy_32_32( L_abs( Are_fx[0][chA] ), L_abs( Bre_fx[0][chB] ) ) );
-            test();
-            test();
-            test();
-            IF( ( ( ( Are_fx[1][chA] >= 0 ) && ( Bre_fx[1][chB] >= 0 ) ) || ( ( Are_fx[1][chA] < 0 ) && ( Bre_fx[1][chB] < 0 ) ) ) )
-            tmp2 = Mpy_32_32( Are_fx[1][chA], Bre_fx[1][chB] );
-            ELSE
-                tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[1][chA] ), L_abs( Bre_fx[1][chB] ) ) );
-            outRe_fx[chA][chB] = L_add( tmp1, tmp2 );
+            outRe_fx[chA][chB] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][chA], Bre_fx[0][chB] ),
+                                                                                Are_fx[1][chA], Bre_fx[1][chB] ),
+                                                                                Aim_fx[0][chA], Bim_fx[0][chB] ),
+                                                                                Aim_fx[1][chA], Bim_fx[1][chB] );
             move32();
-            test();
-            test();
-            test();
-            IF( ( ( ( L_negate( Aim_fx[0][chA] ) >= 0 ) && ( Bim_fx[0][chB] >= 0 ) ) || ( ( L_negate( Aim_fx[0][chA] ) < 0 ) && ( Bim_fx[0][chB] < 0 ) ) ) )
-            tmp1 = Mpy_32_32( -Aim_fx[0][chA], Bim_fx[0][chB] );
-            ELSE
-                tmp1 = L_negate( Mpy_32_32( L_abs( -Aim_fx[0][chA] ), L_abs( Bim_fx[0][chB] ) ) );
-            test();
-            test();
-            test();
-            IF( ( ( ( Aim_fx[1][chA] >= 0 ) && ( Bim_fx[1][chB] >= 0 ) ) || ( ( Aim_fx[1][chA] < 0 ) && ( Bim_fx[1][chB] < 0 ) ) ) )
-            tmp2 = Mpy_32_32( Aim_fx[1][chA], Bim_fx[1][chB] );
-            ELSE
-                tmp2 = L_negate( Mpy_32_32( L_abs( Aim_fx[1][chA] ), L_abs( Bim_fx[1][chB] ) ) );
-            outRe_fx[chA][chB] = L_sub( outRe_fx[chA][chB], L_sub( tmp1, tmp2 ) );
-            move32();
-
-            test();
-            test();
-            test();
-            IF( ( ( ( L_negate( Aim_fx[0][chA] ) >= 0 ) && ( Bre_fx[0][chB] >= 0 ) ) || ( ( L_negate( Aim_fx[0][chA] ) < 0 ) && ( Bre_fx[0][chB] < 0 ) ) ) )
-            tmp1 = Mpy_32_32( -Aim_fx[0][chA], Bre_fx[0][chB] );
-            ELSE
-                tmp1 = L_negate( Mpy_32_32( L_abs( -Aim_fx[0][chA] ), L_abs( Bre_fx[0][chB] ) ) );
-            test();
-            test();
-            test();
-            IF( ( ( ( Aim_fx[1][chA] >= 0 ) && ( Bre_fx[1][chB] >= 0 ) ) || ( ( Aim_fx[1][chA] < 0 ) && ( Bre_fx[1][chB] < 0 ) ) ) )
-            tmp2 = Mpy_32_32( Aim_fx[1][chA], Bre_fx[1][chB] );
-            ELSE
-                tmp2 = L_negate( Mpy_32_32( L_abs( Aim_fx[1][chA] ), L_abs( Bre_fx[1][chB] ) ) );
-            outIm_fx[chA][chB] = L_sub( tmp1, tmp2 );
-            move32();
-
-            test();
-            test();
-            test();
-            IF( ( ( ( Are_fx[0][chA] >= 0 ) && ( Bim_fx[0][chB] >= 0 ) ) || ( ( Are_fx[0][chA] < 0 ) && ( Bim_fx[0][chB] < 0 ) ) ) )
-            tmp1 = Mpy_32_32( Are_fx[0][chA], Bim_fx[0][chB] );
-            ELSE
-                tmp1 = L_negate( Mpy_32_32( L_abs( Are_fx[0][chA] ), L_abs( Bim_fx[0][chB] ) ) );
-            test();
-            test();
-            test();
-            IF( ( ( ( Are_fx[1][chA] >= 0 ) && ( Bim_fx[1][chB] >= 0 ) ) || ( ( Are_fx[1][chA] < 0 ) && ( Bim_fx[1][chB] < 0 ) ) ) )
-            tmp2 = Mpy_32_32( Are_fx[1][chA], Bim_fx[1][chB] );
-            ELSE
-                tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[1][chA] ), L_abs( Bim_fx[1][chB] ) ) );
-            outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], L_add( tmp1, tmp2 ) );
+            outIm_fx[chA][chB] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][chA], Bim_fx[0][chB] ),
+                                                                                Are_fx[1][chA], Bim_fx[1][chB] ),
+                                                                                Aim_fx[0][chA], Bre_fx[0][chB] ),
+                                                                                Aim_fx[1][chA], Bre_fx[1][chB] );
             move32();
         }
     }
@@ -5697,9 +5540,6 @@ static void matrixTransp2Mul_fx(
     Word16 chA, chB;
     Word16 min_q_shift;
     Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS );
-#ifndef IVAS_ENH64_CADENCE_CHANGES
-    Word32 tmp1, tmp2;
-#endif
 
     min_q_shift = sub( s_min( L_norm_arr( Are_fx[0], size ), L_norm_arr( Aim_fx[0], size ) ), 1 );
     scale_sig32( Are_fx[0], size, min_q_shift );
@@ -5726,72 +5566,15 @@ static void matrixTransp2Mul_fx(
             outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], W_extract_h( W_mac_32_32( W_mult_32_32( Are_fx[chA][0], L_negate( Bim_fx[chB][0] ) ), Are_fx[chA][1], L_negate( Bim_fx[chB][1] ) ) ) );
             move32();
 #else
-            test();
-            test();
-            test();
-            IF( ( Are_fx[chA][0] >= 0 && Bre_fx[chB][0] >= 0 ) || ( Are_fx[chA][0] < 0 && Bre_fx[chB][0] < 0 ) )
-            tmp1 = Mpy_32_32( Are_fx[chA][0], Bre_fx[chB][0] );
-            ELSE
-                tmp1 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][0] ), L_abs( Bre_fx[chB][0] ) ) );
-            test();
-            test();
-            test();
-            IF( ( Are_fx[chA][1] >= 0 && Bre_fx[chB][1] >= 0 ) || ( Are_fx[chA][1] < 0 && Bre_fx[chB][1] < 0 ) )
-            tmp2 = Mpy_32_32( Are_fx[chA][1], Bre_fx[chB][1] );
-            ELSE
-                tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][1] ), L_abs( Bre_fx[chB][1] ) ) );
-            outRe_fx[chA][chB] = L_add( tmp1, tmp2 );
-            move32();
-
-            test();
-            test();
-            test();
-            IF( ( Aim_fx[chA][0] >= 0 && L_negate( Bim_fx[chB][0] ) >= 0 ) || ( Aim_fx[chA][0] < 0 && L_negate( Bim_fx[chB][0] ) < 0 ) )
-            tmp1 = Mpy_32_32( Aim_fx[chA][0], -Bim_fx[chB][0] );
-            ELSE
-                tmp1 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][0] ), L_abs( -Bim_fx[chB][0] ) ) );
-            test();
-            test();
-            test();
-            IF( ( Aim_fx[chA][1] >= 0 && L_negate( Bim_fx[chB][1] ) >= 0 ) || ( Aim_fx[chA][1] < 0 && L_negate( Bim_fx[chB][1] ) < 0 ) )
-            tmp2 = Mpy_32_32( Aim_fx[chA][1], -Bim_fx[chB][1] );
-            ELSE
-                tmp2 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][1] ), L_abs( -Bim_fx[chB][1] ) ) );
-            outRe_fx[chA][chB] = L_sub( outRe_fx[chA][chB], L_add( tmp1, tmp2 ) );
-            move32();
-
-            test();
-            test();
-            test();
-            IF( ( Aim_fx[chA][0] >= 0 && Bre_fx[chB][0] >= 0 ) || ( Aim_fx[chA][0] < 0 && Bre_fx[chB][0] < 0 ) )
-            tmp1 = Mpy_32_32( Aim_fx[chA][0], Bre_fx[chB][0] );
-            ELSE
-                tmp1 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][0] ), L_abs( Bre_fx[chB][0] ) ) );
-            test();
-            test();
-            test();
-            IF( ( Aim_fx[chA][1] >= 0 && Bre_fx[chB][1] >= 0 ) || ( Aim_fx[chA][1] < 0 && Bre_fx[chB][1] < 0 ) )
-            tmp2 = Mpy_32_32( Aim_fx[chA][1], Bre_fx[chB][1] );
-            ELSE
-                tmp2 = L_negate( Mpy_32_32( L_abs( Aim_fx[chA][1] ), L_abs( Bre_fx[chB][1] ) ) );
-            outIm_fx[chA][chB] = L_add( tmp1, tmp2 );
+            outRe_fx[chA][chB] = Madd_32_32(Madd_32_32(Madd_32_32(Mpy_32_32(Are_fx[chA][0],Bre_fx[chB][0]), 
+                                                                            Are_fx[chA][1],Bre_fx[chB][1]), 
+                                                                            Aim_fx[chA][0],Bim_fx[chB][0]),
+                                                                            Aim_fx[chA][1],Bim_fx[chB][1]);
             move32();
-
-            test();
-            test();
-            test();
-            IF( ( Are_fx[chA][0] >= 0 && L_negate( Bim_fx[chB][0] ) >= 0 ) || ( Are_fx[chA][0] < 0 && L_negate( Bim_fx[chB][0] ) < 0 ) )
-            tmp1 = Mpy_32_32( Are_fx[chA][0], -Bim_fx[chB][0] );
-            ELSE
-                tmp1 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][0] ), L_abs( -Bim_fx[chB][0] ) ) );
-            test();
-            test();
-            test();
-            IF( ( Are_fx[chA][1] >= 0 && L_negate( Bim_fx[chB][1] ) >= 0 ) || ( Are_fx[chA][1] < 0 && L_negate( Bim_fx[chB][1] ) < 0 ) )
-            tmp2 = Mpy_32_32( Are_fx[chA][1], -Bim_fx[chB][1] );
-            ELSE
-                tmp2 = L_negate( Mpy_32_32( L_abs( Are_fx[chA][1] ), L_abs( -Bim_fx[chB][1] ) ) );
-            outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], L_add( tmp1, tmp2 ) );
+            outIm_fx[chA][chB] = Msub_32_32(Msub_32_32(Madd_32_32(Mpy_32_32(Aim_fx[chA][0],Bre_fx[chB][0]),
+                                                                            Aim_fx[chA][1],Bre_fx[chB][1]),
+                                                                            Are_fx[chA][0],Bim_fx[chB][0]),
+                                                                            Are_fx[chA][1],Bim_fx[chB][1]);
             move32();
 #endif /* #ifdef IVAS_ENH64_CADENCE_CHANGES */
         }
@@ -5869,6 +5652,7 @@ static void chol2x2_fx(
         }
         ELSE
         {
+#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
             outRe[1][0] = BASOP_Util_Divide3232_Scale_cadence( c_re, outRe[0][0], &exp );
             move32();
             q_re2 = add( sub( 31, exp ), sub( q_c, q_re1 ) );
@@ -5876,6 +5660,33 @@ static void chol2x2_fx(
             outIm[1][0] = BASOP_Util_Divide3232_Scale_cadence( c_im, outRe[0][0], &exp );
             move32();
             q_im = add( sub( 31, exp ), sub( q_c, q_re1 ) );
+#else
+            Word32 denom;
+            Word16 den_exp;
+            Word32 my_outRe, my_outIm;
+
+            /* Compute denom = 1.0 / outRe[0][0] */
+            denom = ISqrt32(outRe[0][0], &exp);
+            denom = Mpy_32_32(denom, denom);
+            den_exp = shl(exp, 1);
+
+            /* Normalise c_re, c_im */
+            exp = norm_l( c_re );
+            my_outRe = L_shl( c_re, exp );
+            q_re2 = add( q_c, exp );
+            exp = norm_l( c_im );
+            my_outIm = L_shl( c_im, exp );
+            q_im = add( q_c, exp );
+            
+            /* Multiply and store c_re*denom and c_im*denom */
+            outRe[1][0] = Mpy_32_32(denom, my_outRe);
+            move32();
+            q_re2 = sub(q_re2, den_exp);
+
+            outIm[1][0] = Mpy_32_32(denom, my_outIm);
+            move32();
+            q_im  = sub(q_im, den_exp);
+#endif
         }
         if ( outRe[1][0] == 0 )
         {
@@ -5891,11 +5702,23 @@ static void chol2x2_fx(
         temp = Madd_32_32( Mpy_32_32( c_re, c_re ), c_im, c_im );
         q_tmp = sub( add( q_c, q_c ), 31 );
 
+       
         // 4611686 = Q62
         IF( e1 == 0 )
         {
-            temp = BASOP_Util_Divide3232_Scale_cadence( temp, 4611686, &exp );
-            q_tmp = add( sub( 31, exp ), sub( q_tmp, 62 ) );
+#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
+           temp = BASOP_Util_Divide3232_Scale_cadence( temp, 4611686, &exp );
+           q_tmp = add( sub( 31, exp ), sub( q_tmp, 62 ) );
+#else
+           Word32 my_temp;
+           Word16 my_q_tmp;
+           my_temp = temp;
+           my_q_tmp = q_tmp;
+           temp = BASOP_Util_Divide3232_Scale_cadence( temp, 4611686, &exp );
+           q_tmp = add( sub( 31, exp ), sub( q_tmp, 62 ) );
+           my_temp = Mpy_32_32(my_temp, ONE_DIV_EPSILON_MANT);
+           my_q_tmp = add(my_q_tmp, ONE_DIV_EPSILON_EXP);
+#endif
         }
         ELSE
         {
@@ -6241,6 +6064,8 @@ static void formulate2x2MixingMatrix_fx(
     Word32 temp;
     Word16 q_Pre[BINAURAL_CHANNELS][BINAURAL_CHANNELS], q_Pim[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
     Word16 hdrm_re[BINAURAL_CHANNELS][BINAURAL_CHANNELS], hdrm_im[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
+
+
     set16_fx( hdrm_re[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) );
     set16_fx( hdrm_im[0], 63, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) );
     set16_fx( q_Pre[0], Q31, i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ) );
@@ -6295,8 +6120,12 @@ static void formulate2x2MixingMatrix_fx(
     // 4611686 = Q62
     IF( maxEne_fx == 0 )
     {
-        maxEneDiv_fx = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp ); // 4611686 = 1e-12f in Q62
-        q_maxEneDiv = add( sub( 31, exp ), sub( Q30, 62 ) );
+        // maxEneDiv_fx = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp ); // 4611686 = 1e-12f in Q62
+        // q_maxEneDiv = add( sub( 31, exp ), sub( Q30, 62 ) );
+        maxEneDiv_fx = ONE_DIV_EPSILON_MANT;
+        move32();
+        q_maxEneDiv  = ONE_DIV_EPSILON_EXP;
+        move16();
     }
     ELSE
     {
@@ -6325,7 +6154,6 @@ static void formulate2x2MixingMatrix_fx(
 
     /* Cholesky decomposition of target / output covariance matrix */
     chol2x2_fx( E_out1, E_out2, q_eout, Cout_re, Cout_im, q_cout, KyRe_fx, KyIm_fx, &q_ky );
-
     /* Eigendecomposition of input covariance matrix */
     eig2x2_fx( E_in1, E_in2, q_ein, Cin_re, Cin_im, q_cin, Uxre_fx, Uxim_fx, &q_Ux, Sx_fx, &q_Sx );
 
@@ -6356,8 +6184,19 @@ static void formulate2x2MixingMatrix_fx(
 
     IF( temp == 0 )
     {
-        BASOP_Util_Divide3232_Scale_cadence( E_out1, 4611686, &exp ); // 4611686 = Q62
-        exp = sub( exp, sub( q_eout, 62 ) );
+        IF (E_out1 == 0)
+        {
+            Ghat_fx[0] = 0;
+            exp = -19;
+            move32();
+            move16();
+        }
+        ELSE
+        {
+            temp = BASOP_Util_Divide3232_Scale_cadence( E_out1, 4611686, &exp ); // 4611686 = Q62
+            exp = sub( exp, sub( q_eout, 62 ) );
+            Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
+        }
     }
     ELSE
     {
@@ -6365,16 +6204,26 @@ static void formulate2x2MixingMatrix_fx(
 
         temp = BASOP_Util_Divide3232_Scale_cadence( E_out1, temp, &exp );
         exp = sub( exp, sub( q_eout, sub( 31, exp_temp ) ) );
+        Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
     }
-    Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
     move32();
 
     temp = Mpy_32_32( E_in1, 2147484 ); // 2147484 = 0.001f in Q31
     temp = L_max( temp, E_in2 );        // q_ein
     IF( temp == 0 )
     {
-        BASOP_Util_Divide3232_Scale_cadence( E_out2, 4611686, &exp1 ); // 4611686 = Q62
-        exp1 = sub( exp1, sub( q_eout, 62 ) );
+        IF (E_out2 == 0)
+        {   /* We can set hard-coded results */
+            Ghat_fx[1] = 0;
+            exp1 = -19;
+            move16();
+        }
+        ELSE
+        {
+            temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, 4611686, &exp1 ); // 4611686 = Q62
+            exp1 = sub( exp1, sub( q_eout, 62 ) );
+            Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
+        }
     }
     ELSE
     {
@@ -6382,8 +6231,8 @@ static void formulate2x2MixingMatrix_fx(
 
         temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 );
         exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) );
+        Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
     }
-    Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
     move32();
 
     q_Ghat = sub( 31, s_max( exp, exp1 ) );
@@ -6432,21 +6281,32 @@ static void formulate2x2MixingMatrix_fx(
 
     IF( D_fx[0] == 0 )
     {
+#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
         temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp ); // 4611686 = 1e-12 in Q62
         exp = sub( exp, sub( Q30, 62 ) );
+#else
+        temp = ONE_DIV_EPSILON_MANT;   /* Result of 1.0/eps with full precision */
+        exp = ONE_DIV_EPSILON_EXP;
+#endif
     }
     ELSE
     {
         temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp );
         exp = sub( exp, sub( Q30, q_D ) );
     }
+
     div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
     move32();
 
     IF( D_fx[1] == 0 )
     {
+#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
         temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp1 ); // 4611686 = 1e-12 in Q62
         exp1 = sub( exp1, sub( Q30, 62 ) );
+#else
+        temp = ONE_DIV_EPSILON_MANT;   /* Result of 1.0/eps with full precision */
+        exp1 = ONE_DIV_EPSILON_EXP;
+#endif
     }
     ELSE
     {
@@ -6547,27 +6407,54 @@ static void formulate2x2MixingMatrix_fx(
     }
 
     matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U, Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */
-
     /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */
+#if (BINAURAL_CHANNELS != 2)
     FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
     {
         FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
         {
             IF( Sx_fx[chB] == 0 )
             {
-                Pre_fx[chA][chB] = BASOP_Util_Divide3232_Scale_cadence( Pre_fx[chA][chB], 4611686, &exp ); // 4611686 = 1e-12 in Q62
-                q_Pre[chA][chB] = add( sub( q_P, 62 ), sub( 31, exp ) );
-                Pim_fx[chA][chB] = BASOP_Util_Divide3232_Scale_cadence( Pim_fx[chA][chB], 4611686, &exp ); // 4611686 = 1e-12 in Q62
-                q_Pim[chA][chB] = add( sub( q_P, 62 ), sub( 31, exp ) );
+                Pre_fx[chA][chB] = Mpy_32_32(Pre_fx[chA][chB], ONE_DIV_EPSILON_MANT);
+              //q_Pre[chA][chB]  = add(sub(31, q_P), 31 - ONE_DIV_EPSILON_EXP);
+                q_Pre[chA][chB]  = sub(62 - ONE_DIV_EPSILON_EXP, q_P);
+
+
+                Pim_fx[chA][chB] = Mpy_32_32(Pim_fx[chA][chB], ONE_DIV_EPSILON_MANT);
+              //q_Pim[chA][chB]  = add(sub(31, q_P), 31 - ONE_DIV_EPSILON_EXP);
+                q_Pim[chA][chB]  = sub(62 - ONE_DIV_EPSILON_EXP, q_P);
+
             }
             ELSE
             {
+                Word16 Pre_shift, Pim_shift;
                 temp = BASOP_Util_Add_Mant32Exp( Sx_fx[chB], sub( 31, q_Sx ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
 
+#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
                 Pre_fx[chA][chB] = BASOP_Util_Divide3232_Scale_cadence( Pre_fx[chA][chB], temp, &exp );
                 q_Pre[chA][chB] = add( sub( q_P, sub( 31, exp_temp ) ), sub( 31, exp ) );
                 Pim_fx[chA][chB] = BASOP_Util_Divide3232_Scale_cadence( Pim_fx[chA][chB], temp, &exp );
                 q_Pim[chA][chB] = add( sub( q_P, sub( 31, exp_temp ) ), sub( 31, exp ) );
+#else
+                temp = BASOP_Util_Divide3232_Scale_cadence(ONE_IN_Q30, temp, &exp);
+                Pre_shift = norm_l( Pre_fx[chA][chB] );
+                Pim_shift = norm_l( Pim_fx[chA][chB] );
+                Pre_fx[chA][chB] = Mpy_32_32( L_shl( Pre_fx[chA][chB], Pre_shift ), temp );
+                Pim_fx[chA][chB] = Mpy_32_32( L_shl( Pim_fx[chA][chB], Pim_shift ), temp );
+                q_temp = add(sub(sub(q_P, exp), sub(31, Q30)),exp_temp);
+                q_Pre[chA][chB] = add(q_temp, Pre_shift);
+                q_Pim[chA][chB] = add(q_temp, Pim_shift);
+#endif
+            }
+            if (Pre_fx[chA][chB] == 0)
+            {
+                q_Pre[chA][chB] = 31;
+                move16();
+            }
+            if (Pim_fx[chA][chB] == 0)
+            {
+                q_Pim[chA][chB] = 31;
+                move16();
             }
             move32();
             move32();
@@ -6575,6 +6462,72 @@ static void formulate2x2MixingMatrix_fx(
             move16();
         }
     }
+#else
+    /* BINAURAL_CHANNEL == 2 */
+    FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
+    {
+        IF( Sx_fx[chB] == 0 )
+        {
+            Pre_fx[0][chB] = Mpy_32_32(Pre_fx[0][chB], ONE_DIV_EPSILON_MANT);
+            q_Pre[0][chB]  = sub(62 - ONE_DIV_EPSILON_EXP, q_P);
+            Pim_fx[0][chB] = Mpy_32_32(Pim_fx[0][chB], ONE_DIV_EPSILON_MANT);
+            q_Pim[0][chB]  = sub(62 - ONE_DIV_EPSILON_EXP, q_P);
+            Pre_fx[1][chB] = Mpy_32_32(Pre_fx[1][chB], ONE_DIV_EPSILON_MANT);
+            q_Pre[1][chB]  = sub(62 - ONE_DIV_EPSILON_EXP, q_P);
+            Pim_fx[1][chB] = Mpy_32_32(Pim_fx[1][chB], ONE_DIV_EPSILON_MANT);
+            q_Pim[1][chB]  = sub(62 - ONE_DIV_EPSILON_EXP, q_P);
+        }
+        ELSE
+        {
+            Word16 Pre_shift, Pim_shift;
+            temp = BASOP_Util_Add_Mant32Exp( Sx_fx[chB], sub( 31, q_Sx ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
+            temp = BASOP_Util_Divide3232_Scale_cadence(ONE_IN_Q30, temp, &exp);
+            q_temp = add(sub(sub(q_P, exp), sub(31, Q30)),exp_temp);
+
+            Pre_shift = norm_l( Pre_fx[0][chB] );
+            Pim_shift = norm_l( Pim_fx[0][chB] );
+            Pre_fx[0][chB] = Mpy_32_32( L_shl( Pre_fx[0][chB], Pre_shift ), temp );
+            Pim_fx[0][chB] = Mpy_32_32( L_shl( Pim_fx[0][chB], Pim_shift ), temp );
+            q_Pre[0][chB] = add(q_temp, Pre_shift);
+            q_Pim[0][chB] = add(q_temp, Pim_shift);
+
+            Pre_shift = norm_l( Pre_fx[1][chB] );
+            Pim_shift = norm_l( Pim_fx[1][chB] );
+            Pre_fx[1][chB] = Mpy_32_32( L_shl( Pre_fx[1][chB], Pre_shift ), temp );
+            Pim_fx[1][chB] = Mpy_32_32( L_shl( Pim_fx[1][chB], Pim_shift ), temp );
+            q_Pre[1][chB] = add(q_temp, Pre_shift);
+            q_Pim[1][chB] = add(q_temp, Pim_shift);
+        }
+        if (Pre_fx[0][chB] == 0)
+        {
+            q_Pre[0][chB] = 31;
+            move16();
+        }
+        if (Pim_fx[0][chB] == 0)
+        {
+            q_Pim[0][chB] = 31;
+            move16();
+        }
+        if (Pre_fx[1][chB] == 0)
+        {
+            q_Pre[1][chB] = 31;
+            move16();
+        }
+        if (Pim_fx[1][chB] == 0)
+        {
+            q_Pim[1][chB] = 31;
+            move16();
+        }
+        move32();
+        move32();
+        move16();
+        move16();
+        move32();
+        move32();
+        move16();
+        move16();
+    }
+#endif
     minimum_s( q_Pre[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ), &exp );
     q_P = s_min( q_P, exp );
     minimum_s( q_Pim[0], i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ), &exp );
@@ -6592,7 +6545,6 @@ static void formulate2x2MixingMatrix_fx(
     }
 
     matrixMul_fx( KyRe_fx, KyIm_fx, &q_ky, Pre_fx, Pim_fx, &q_P, tmpRe_fx, tmpIm_fx, &q_temp );
-
     matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Uxre_fx, Uxim_fx, &q_Ux, Mre_fx, Mim_fx, q_M );
 
     return;
-- 
GitLab


From 602bf646f3ec861e5e5d59aba7f9abdec58e5a26 Mon Sep 17 00:00:00 2001
From: Arthur <Arthur.tritthart@iis.fraunhofer.de>
Date: Fri, 29 Nov 2024 17:24:24 +0100
Subject: [PATCH 02/14] Fix clang format issues

---
 lib_com/basop_util.c                         |  28 ++--
 lib_com/fft_fx.c                             |  15 +-
 lib_com/tools.c                              |   4 +-
 lib_rend/ivas_dirac_dec_binaural_functions.c | 161 +++++++++----------
 4 files changed, 103 insertions(+), 105 deletions(-)

diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c
index 79d57198f..79f128578 100644
--- a/lib_com/basop_util.c
+++ b/lib_com/basop_util.c
@@ -1038,17 +1038,17 @@ Word32 div_w( Word32 L_num, Word32 L_den )
     }
 }
 
-Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bits)
+Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bits )
 {
     Word32 z;
     Word16 sx;
     Word16 sy;
     Word32 sign;
     Word16 iteration;
-    Flag   Carry;
+    Flag Carry;
     Word16 s_val;
 
-    unset_carry(&Carry);
+    unset_carry( &Carry );
 
     /* assert (x >= (Word32)0); */
     assert( y != (Word32) 0 );
@@ -1060,13 +1060,13 @@ Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bi
         return ( (Word32) 0 );
     }
 
-    sign = L_shr(L_xor(x,y), 31);
+    sign = L_shr( L_xor( x, y ), 31 );
 
     sx = norm_l( x );
     x = L_shl( x, sx );
     x = L_shr( x, 1 );
     s_val = sub( 1, sx );
-    if( x < 0 )
+    if ( x < 0 )
     {
         x = L_negate( x );
     }
@@ -1074,8 +1074,8 @@ Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bi
     sy = norm_l( y );
     y = L_shl( y, sy );
     y = L_shr( y, 1 );
-    s_val = add(s_val, sy );
-    if( y >= 0 )
+    s_val = add( s_val, sy );
+    if ( y >= 0 )
     {
         y = L_negate( y );
     }
@@ -1083,30 +1083,30 @@ Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bi
     *s = s_val;
     move16();
 
-    z = L_sub(x, x);   // z = 0
+    z = L_sub( x, x ); // z = 0
 
     for ( iteration = (Word16) 0; iteration < (Word16) bits; iteration++ )
     {
-        if ( L_add(x, y) >= 0 )
+        if ( L_add( x, y ) >= 0 )
         {
-            x = DEPR_L_add_c(x, y, &Carry);  // sets always carry=1
+            x = DEPR_L_add_c( x, y, &Carry ); // sets always carry=1
         }
-        z = DEPR_L_add_c( z, z, &Carry );    // sets always carry=0
-        x = L_add(x, x);
+        z = DEPR_L_add_c( z, z, &Carry ); // sets always carry=0
+        x = L_add( x, x );
     }
 
     if ( sign != 0 )
     {
         z = L_negate( z );
     }
-    return L_shl(z, sub(31, bits));
+    return L_shl( z, sub( 31, bits ) );
 }
 
 
 Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
 {
 #if 1
-    return BASOP_Util_Divide3232_Scale_FhG(x,y,s,24);
+    return BASOP_Util_Divide3232_Scale_FhG( x, y, s, 24 );
 #else
     Word32 z;
     Word16 sx;
diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c
index 3e664fb56..aceffad0b 100644
--- a/lib_com/fft_fx.c
+++ b/lib_com/fft_fx.c
@@ -7307,11 +7307,11 @@ Word16 L_norm_arr( Word32 *arr, Word16 size )
 #else
     {
         Word16 q_tst;
-        q_tst = norm_l(arr[i]);
-        if (arr[i] != 0)
-           q = s_min(q, q_tst);
+        q_tst = norm_l( arr[i] );
+        if ( arr[i] != 0 )
+            q = s_min( q, q_tst );
     }
-    
+
 #endif
     return q;
 }
@@ -7338,11 +7338,11 @@ Word16 get_min_scalefactor( Word32 x, Word32 y )
 #else
     Word16 scf = Q31;
     Word16 scf_y;
-    if (x != 0)
+    if ( x != 0 )
         scf = norm_l( x );
     scf_y = norm_l( y );
-    if (y != 0)
-        scf = s_min(scf_y, scf);
+    if ( y != 0 )
+        scf = s_min( scf_y, scf );
     return scf;
 #endif
 }
@@ -7356,5 +7356,4 @@ Flag is_zero_arr( Word32 *arr, Word16 size )
     }
 
     return 1;
-
 }
diff --git a/lib_com/tools.c b/lib_com/tools.c
index e4d5914e6..cd962f2ac 100644
--- a/lib_com/tools.c
+++ b/lib_com/tools.c
@@ -923,9 +923,9 @@ Word16 minimum_s(
 
     FOR( i = 1; i < lvec; i++ )
     {
-        if( LT_16( vec[i], vec[ind] ) )
+        if ( LT_16( vec[i], vec[ind] ) )
         {
-            ind = add(i, 0);
+            ind = add( i, 0 );
         }
     }
 
diff --git a/lib_rend/ivas_dirac_dec_binaural_functions.c b/lib_rend/ivas_dirac_dec_binaural_functions.c
index 32c266c6f..3bf7060fe 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions.c
@@ -5144,8 +5144,8 @@ static void eig2x2_fx(
             normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2
             q_tmp2 = sub( 31, exp );
 #else
-            normVal_fx = ISqrt32(tmp3, &exp_tmp3);
-            q_tmp2 = sub(31, exp_tmp3);
+            normVal_fx = ISqrt32( tmp3, &exp_tmp3 );
+            q_tmp2 = sub( 31, exp_tmp3 );
 #endif
             IF( LT_16( q_tmp1, q_c ) )
             {
@@ -5216,8 +5216,8 @@ static void eig2x2_fx(
             normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2
             q_tmp2 = sub( 31, exp );
 #else
-            normVal_fx = ISqrt32(tmp3, &exp_tmp3);
-            q_tmp2 = sub(31, exp_tmp3);
+            normVal_fx = ISqrt32( tmp3, &exp_tmp3 );
+            q_tmp2 = sub( 31, exp_tmp3 );
 #endif
             IF( LT_16( q_tmp1, q_c ) )
             {
@@ -5407,14 +5407,14 @@ static void matrixMul_fx(
             move32();
 #else
             outRe_fx[chA][chB] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[chA][0], Bre_fx[0][chB] ),
-                                                                                Are_fx[chA][1], Bre_fx[1][chB] ),
-                                                                                Aim_fx[chA][0], Bim_fx[0][chB] ),
-                                                                                Aim_fx[chA][1], Bim_fx[1][chB] );
+                                                                     Are_fx[chA][1], Bre_fx[1][chB] ),
+                                                         Aim_fx[chA][0], Bim_fx[0][chB] ),
+                                             Aim_fx[chA][1], Bim_fx[1][chB] );
             move32();
             outIm_fx[chA][chB] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[chA][0], Bre_fx[0][chB] ),
-                                                                                Aim_fx[chA][1], Bre_fx[1][chB] ),
-                                                                                Are_fx[chA][0], Bim_fx[0][chB] ),
-                                                                                Are_fx[chA][1], Bim_fx[1][chB] );
+                                                                     Aim_fx[chA][1], Bre_fx[1][chB] ),
+                                                         Are_fx[chA][0], Bim_fx[0][chB] ),
+                                             Are_fx[chA][1], Bim_fx[1][chB] );
             move32();
 #endif /* #ifdef IVAS_ENH64_CADENCE_CHANGES */
         }
@@ -5477,14 +5477,14 @@ static void matrixTransp1Mul_fx(
         FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
         {
             outRe_fx[chA][chB] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][chA], Bre_fx[0][chB] ),
-                                                                                Are_fx[1][chA], Bre_fx[1][chB] ),
-                                                                                Aim_fx[0][chA], Bim_fx[0][chB] ),
-                                                                                Aim_fx[1][chA], Bim_fx[1][chB] );
+                                                                     Are_fx[1][chA], Bre_fx[1][chB] ),
+                                                         Aim_fx[0][chA], Bim_fx[0][chB] ),
+                                             Aim_fx[1][chA], Bim_fx[1][chB] );
             move32();
             outIm_fx[chA][chB] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][chA], Bim_fx[0][chB] ),
-                                                                                Are_fx[1][chA], Bim_fx[1][chB] ),
-                                                                                Aim_fx[0][chA], Bre_fx[0][chB] ),
-                                                                                Aim_fx[1][chA], Bre_fx[1][chB] );
+                                                                     Are_fx[1][chA], Bim_fx[1][chB] ),
+                                                         Aim_fx[0][chA], Bre_fx[0][chB] ),
+                                             Aim_fx[1][chA], Bre_fx[1][chB] );
             move32();
         }
     }
@@ -5566,15 +5566,15 @@ static void matrixTransp2Mul_fx(
             outIm_fx[chA][chB] = L_add( outIm_fx[chA][chB], W_extract_h( W_mac_32_32( W_mult_32_32( Are_fx[chA][0], L_negate( Bim_fx[chB][0] ) ), Are_fx[chA][1], L_negate( Bim_fx[chB][1] ) ) ) );
             move32();
 #else
-            outRe_fx[chA][chB] = Madd_32_32(Madd_32_32(Madd_32_32(Mpy_32_32(Are_fx[chA][0],Bre_fx[chB][0]), 
-                                                                            Are_fx[chA][1],Bre_fx[chB][1]), 
-                                                                            Aim_fx[chA][0],Bim_fx[chB][0]),
-                                                                            Aim_fx[chA][1],Bim_fx[chB][1]);
+            outRe_fx[chA][chB] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[chA][0], Bre_fx[chB][0] ),
+                                                                     Are_fx[chA][1], Bre_fx[chB][1] ),
+                                                         Aim_fx[chA][0], Bim_fx[chB][0] ),
+                                             Aim_fx[chA][1], Bim_fx[chB][1] );
             move32();
-            outIm_fx[chA][chB] = Msub_32_32(Msub_32_32(Madd_32_32(Mpy_32_32(Aim_fx[chA][0],Bre_fx[chB][0]),
-                                                                            Aim_fx[chA][1],Bre_fx[chB][1]),
-                                                                            Are_fx[chA][0],Bim_fx[chB][0]),
-                                                                            Are_fx[chA][1],Bim_fx[chB][1]);
+            outIm_fx[chA][chB] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Aim_fx[chA][0], Bre_fx[chB][0] ),
+                                                                     Aim_fx[chA][1], Bre_fx[chB][1] ),
+                                                         Are_fx[chA][0], Bim_fx[chB][0] ),
+                                             Are_fx[chA][1], Bim_fx[chB][1] );
             move32();
 #endif /* #ifdef IVAS_ENH64_CADENCE_CHANGES */
         }
@@ -5666,9 +5666,9 @@ static void chol2x2_fx(
             Word32 my_outRe, my_outIm;
 
             /* Compute denom = 1.0 / outRe[0][0] */
-            denom = ISqrt32(outRe[0][0], &exp);
-            denom = Mpy_32_32(denom, denom);
-            den_exp = shl(exp, 1);
+            denom = ISqrt32( outRe[0][0], &exp );
+            denom = Mpy_32_32( denom, denom );
+            den_exp = shl( exp, 1 );
 
             /* Normalise c_re, c_im */
             exp = norm_l( c_re );
@@ -5677,15 +5677,15 @@ static void chol2x2_fx(
             exp = norm_l( c_im );
             my_outIm = L_shl( c_im, exp );
             q_im = add( q_c, exp );
-            
+
             /* Multiply and store c_re*denom and c_im*denom */
-            outRe[1][0] = Mpy_32_32(denom, my_outRe);
+            outRe[1][0] = Mpy_32_32( denom, my_outRe );
             move32();
-            q_re2 = sub(q_re2, den_exp);
+            q_re2 = sub( q_re2, den_exp );
 
-            outIm[1][0] = Mpy_32_32(denom, my_outIm);
+            outIm[1][0] = Mpy_32_32( denom, my_outIm );
             move32();
-            q_im  = sub(q_im, den_exp);
+            q_im = sub( q_im, den_exp );
 #endif
         }
         if ( outRe[1][0] == 0 )
@@ -5702,22 +5702,22 @@ static void chol2x2_fx(
         temp = Madd_32_32( Mpy_32_32( c_re, c_re ), c_im, c_im );
         q_tmp = sub( add( q_c, q_c ), 31 );
 
-       
+
         // 4611686 = Q62
         IF( e1 == 0 )
         {
 #ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
-           temp = BASOP_Util_Divide3232_Scale_cadence( temp, 4611686, &exp );
-           q_tmp = add( sub( 31, exp ), sub( q_tmp, 62 ) );
+            temp = BASOP_Util_Divide3232_Scale_cadence( temp, 4611686, &exp );
+            q_tmp = add( sub( 31, exp ), sub( q_tmp, 62 ) );
 #else
-           Word32 my_temp;
-           Word16 my_q_tmp;
-           my_temp = temp;
-           my_q_tmp = q_tmp;
-           temp = BASOP_Util_Divide3232_Scale_cadence( temp, 4611686, &exp );
-           q_tmp = add( sub( 31, exp ), sub( q_tmp, 62 ) );
-           my_temp = Mpy_32_32(my_temp, ONE_DIV_EPSILON_MANT);
-           my_q_tmp = add(my_q_tmp, ONE_DIV_EPSILON_EXP);
+            Word32 my_temp;
+            Word16 my_q_tmp;
+            my_temp = temp;
+            my_q_tmp = q_tmp;
+            temp = BASOP_Util_Divide3232_Scale_cadence( temp, 4611686, &exp );
+            q_tmp = add( sub( 31, exp ), sub( q_tmp, 62 ) );
+            my_temp = Mpy_32_32( my_temp, ONE_DIV_EPSILON_MANT );
+            my_q_tmp = add( my_q_tmp, ONE_DIV_EPSILON_EXP );
 #endif
         }
         ELSE
@@ -6124,7 +6124,7 @@ static void formulate2x2MixingMatrix_fx(
         // q_maxEneDiv = add( sub( 31, exp ), sub( Q30, 62 ) );
         maxEneDiv_fx = ONE_DIV_EPSILON_MANT;
         move32();
-        q_maxEneDiv  = ONE_DIV_EPSILON_EXP;
+        q_maxEneDiv = ONE_DIV_EPSILON_EXP;
         move16();
     }
     ELSE
@@ -6184,7 +6184,7 @@ static void formulate2x2MixingMatrix_fx(
 
     IF( temp == 0 )
     {
-        IF (E_out1 == 0)
+        IF( E_out1 == 0 )
         {
             Ghat_fx[0] = 0;
             exp = -19;
@@ -6212,8 +6212,8 @@ static void formulate2x2MixingMatrix_fx(
     temp = L_max( temp, E_in2 );        // q_ein
     IF( temp == 0 )
     {
-        IF (E_out2 == 0)
-        {   /* We can set hard-coded results */
+        IF( E_out2 == 0 )
+        { /* We can set hard-coded results */
             Ghat_fx[1] = 0;
             exp1 = -19;
             move16();
@@ -6285,7 +6285,7 @@ static void formulate2x2MixingMatrix_fx(
         temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp ); // 4611686 = 1e-12 in Q62
         exp = sub( exp, sub( Q30, 62 ) );
 #else
-        temp = ONE_DIV_EPSILON_MANT;   /* Result of 1.0/eps with full precision */
+        temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */
         exp = ONE_DIV_EPSILON_EXP;
 #endif
     }
@@ -6304,7 +6304,7 @@ static void formulate2x2MixingMatrix_fx(
         temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp1 ); // 4611686 = 1e-12 in Q62
         exp1 = sub( exp1, sub( Q30, 62 ) );
 #else
-        temp = ONE_DIV_EPSILON_MANT;   /* Result of 1.0/eps with full precision */
+        temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */
         exp1 = ONE_DIV_EPSILON_EXP;
 #endif
     }
@@ -6408,22 +6408,21 @@ static void formulate2x2MixingMatrix_fx(
 
     matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U, Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */
     /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */
-#if (BINAURAL_CHANNELS != 2)
+#if ( BINAURAL_CHANNELS != 2 )
     FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
     {
         FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
         {
             IF( Sx_fx[chB] == 0 )
             {
-                Pre_fx[chA][chB] = Mpy_32_32(Pre_fx[chA][chB], ONE_DIV_EPSILON_MANT);
-              //q_Pre[chA][chB]  = add(sub(31, q_P), 31 - ONE_DIV_EPSILON_EXP);
-                q_Pre[chA][chB]  = sub(62 - ONE_DIV_EPSILON_EXP, q_P);
-
+                Pre_fx[chA][chB] = Mpy_32_32( Pre_fx[chA][chB], ONE_DIV_EPSILON_MANT );
+                // q_Pre[chA][chB]  = add(sub(31, q_P), 31 - ONE_DIV_EPSILON_EXP);
+                q_Pre[chA][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P );
 
-                Pim_fx[chA][chB] = Mpy_32_32(Pim_fx[chA][chB], ONE_DIV_EPSILON_MANT);
-              //q_Pim[chA][chB]  = add(sub(31, q_P), 31 - ONE_DIV_EPSILON_EXP);
-                q_Pim[chA][chB]  = sub(62 - ONE_DIV_EPSILON_EXP, q_P);
 
+                Pim_fx[chA][chB] = Mpy_32_32( Pim_fx[chA][chB], ONE_DIV_EPSILON_MANT );
+                // q_Pim[chA][chB]  = add(sub(31, q_P), 31 - ONE_DIV_EPSILON_EXP);
+                q_Pim[chA][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P );
             }
             ELSE
             {
@@ -6436,22 +6435,22 @@ static void formulate2x2MixingMatrix_fx(
                 Pim_fx[chA][chB] = BASOP_Util_Divide3232_Scale_cadence( Pim_fx[chA][chB], temp, &exp );
                 q_Pim[chA][chB] = add( sub( q_P, sub( 31, exp_temp ) ), sub( 31, exp ) );
 #else
-                temp = BASOP_Util_Divide3232_Scale_cadence(ONE_IN_Q30, temp, &exp);
+                temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, temp, &exp );
                 Pre_shift = norm_l( Pre_fx[chA][chB] );
                 Pim_shift = norm_l( Pim_fx[chA][chB] );
                 Pre_fx[chA][chB] = Mpy_32_32( L_shl( Pre_fx[chA][chB], Pre_shift ), temp );
                 Pim_fx[chA][chB] = Mpy_32_32( L_shl( Pim_fx[chA][chB], Pim_shift ), temp );
-                q_temp = add(sub(sub(q_P, exp), sub(31, Q30)),exp_temp);
-                q_Pre[chA][chB] = add(q_temp, Pre_shift);
-                q_Pim[chA][chB] = add(q_temp, Pim_shift);
+                q_temp = add( sub( sub( q_P, exp ), sub( 31, Q30 ) ), exp_temp );
+                q_Pre[chA][chB] = add( q_temp, Pre_shift );
+                q_Pim[chA][chB] = add( q_temp, Pim_shift );
 #endif
             }
-            if (Pre_fx[chA][chB] == 0)
+            if ( Pre_fx[chA][chB] == 0 )
             {
                 q_Pre[chA][chB] = 31;
                 move16();
             }
-            if (Pim_fx[chA][chB] == 0)
+            if ( Pim_fx[chA][chB] == 0 )
             {
                 q_Pim[chA][chB] = 31;
                 move16();
@@ -6468,52 +6467,52 @@ static void formulate2x2MixingMatrix_fx(
     {
         IF( Sx_fx[chB] == 0 )
         {
-            Pre_fx[0][chB] = Mpy_32_32(Pre_fx[0][chB], ONE_DIV_EPSILON_MANT);
-            q_Pre[0][chB]  = sub(62 - ONE_DIV_EPSILON_EXP, q_P);
-            Pim_fx[0][chB] = Mpy_32_32(Pim_fx[0][chB], ONE_DIV_EPSILON_MANT);
-            q_Pim[0][chB]  = sub(62 - ONE_DIV_EPSILON_EXP, q_P);
-            Pre_fx[1][chB] = Mpy_32_32(Pre_fx[1][chB], ONE_DIV_EPSILON_MANT);
-            q_Pre[1][chB]  = sub(62 - ONE_DIV_EPSILON_EXP, q_P);
-            Pim_fx[1][chB] = Mpy_32_32(Pim_fx[1][chB], ONE_DIV_EPSILON_MANT);
-            q_Pim[1][chB]  = sub(62 - ONE_DIV_EPSILON_EXP, q_P);
+            Pre_fx[0][chB] = Mpy_32_32( Pre_fx[0][chB], ONE_DIV_EPSILON_MANT );
+            q_Pre[0][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P );
+            Pim_fx[0][chB] = Mpy_32_32( Pim_fx[0][chB], ONE_DIV_EPSILON_MANT );
+            q_Pim[0][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P );
+            Pre_fx[1][chB] = Mpy_32_32( Pre_fx[1][chB], ONE_DIV_EPSILON_MANT );
+            q_Pre[1][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P );
+            Pim_fx[1][chB] = Mpy_32_32( Pim_fx[1][chB], ONE_DIV_EPSILON_MANT );
+            q_Pim[1][chB] = sub( 62 - ONE_DIV_EPSILON_EXP, q_P );
         }
         ELSE
         {
             Word16 Pre_shift, Pim_shift;
             temp = BASOP_Util_Add_Mant32Exp( Sx_fx[chB], sub( 31, q_Sx ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
-            temp = BASOP_Util_Divide3232_Scale_cadence(ONE_IN_Q30, temp, &exp);
-            q_temp = add(sub(sub(q_P, exp), sub(31, Q30)),exp_temp);
+            temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, temp, &exp );
+            q_temp = add( sub( sub( q_P, exp ), sub( 31, Q30 ) ), exp_temp );
 
             Pre_shift = norm_l( Pre_fx[0][chB] );
             Pim_shift = norm_l( Pim_fx[0][chB] );
             Pre_fx[0][chB] = Mpy_32_32( L_shl( Pre_fx[0][chB], Pre_shift ), temp );
             Pim_fx[0][chB] = Mpy_32_32( L_shl( Pim_fx[0][chB], Pim_shift ), temp );
-            q_Pre[0][chB] = add(q_temp, Pre_shift);
-            q_Pim[0][chB] = add(q_temp, Pim_shift);
+            q_Pre[0][chB] = add( q_temp, Pre_shift );
+            q_Pim[0][chB] = add( q_temp, Pim_shift );
 
             Pre_shift = norm_l( Pre_fx[1][chB] );
             Pim_shift = norm_l( Pim_fx[1][chB] );
             Pre_fx[1][chB] = Mpy_32_32( L_shl( Pre_fx[1][chB], Pre_shift ), temp );
             Pim_fx[1][chB] = Mpy_32_32( L_shl( Pim_fx[1][chB], Pim_shift ), temp );
-            q_Pre[1][chB] = add(q_temp, Pre_shift);
-            q_Pim[1][chB] = add(q_temp, Pim_shift);
+            q_Pre[1][chB] = add( q_temp, Pre_shift );
+            q_Pim[1][chB] = add( q_temp, Pim_shift );
         }
-        if (Pre_fx[0][chB] == 0)
+        if ( Pre_fx[0][chB] == 0 )
         {
             q_Pre[0][chB] = 31;
             move16();
         }
-        if (Pim_fx[0][chB] == 0)
+        if ( Pim_fx[0][chB] == 0 )
         {
             q_Pim[0][chB] = 31;
             move16();
         }
-        if (Pre_fx[1][chB] == 0)
+        if ( Pre_fx[1][chB] == 0 )
         {
             q_Pre[1][chB] = 31;
             move16();
         }
-        if (Pim_fx[1][chB] == 0)
+        if ( Pim_fx[1][chB] == 0 )
         {
             q_Pim[1][chB] = 31;
             move16();
-- 
GitLab


From e86c049a22eac81fd3e63d581090203f651ba567 Mon Sep 17 00:00:00 2001
From: Arthur <Arthur.tritthart@iis.fraunhofer.de>
Date: Fri, 29 Nov 2024 17:43:05 +0100
Subject: [PATCH 03/14] Fix missing prototype for local function

---
 lib_com/basop_util.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c
index 79f128578..82bae938c 100644
--- a/lib_com/basop_util.c
+++ b/lib_com/basop_util.c
@@ -1038,6 +1038,7 @@ Word32 div_w( Word32 L_num, Word32 L_den )
     }
 }
 
+Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bits );
 Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bits )
 {
     Word32 z;
-- 
GitLab


From ad64b2673caea5a652e63142bfeda23793314111 Mon Sep 17 00:00:00 2001
From: Arthur <Arthur.tritthart@iis.fraunhofer.de>
Date: Fri, 6 Dec 2024 14:08:28 +0100
Subject: [PATCH 04/14] Author: Arthur Tritthart, FhG, 06-DEC-2024

Changes for BASOP tuning (ticket 1009):

File lib_com/basop_util.c:
--------------------------
modified imult1616 to directly use i_mult, WMOPS weights reduced: 2 -> 1

File lib_com/ivas_tools.c:
--------------------------
Added an IF-conditionned branch for interleaved to linear format. This is the
way, the function is currently used, WMOPS weights reduced: 5 -> 2

File lib_com/tools_fx.c:
------------------------
Simplified set32_fx function, stripped use of L_deposit_l, WMOPS weights reduced 2 -> 1

File lib_rend/ivas_dirac_decorr_dec.c:
--------------------------------------
- use of is_zero_arr, stripped constant find_guarded_bits(2)
- strip offset computation for interleaved real/imag buffer
- tune AR filter loop for WMOPS
- fix and simplify 64-bit power computation loop
- tuned energy smoothing loops for WMOPS
- skip energy scaling, if q_shift equals zero
- strip offset computation for interleaved real/imag buffer

File lib_dec/ivas_mc_param_dec.c, ivas_mct_dec_mct_fx.c:
--------------------------------------------------------
- simplify zero checks for output synthesis
- replace div(x / 1) or div(x / 2) by shift ops
- simplified shifting output

Total WMOPS saving for bitstream stv714MC48c_128kbps.192/7_1_4: 164 WMops
---
 lib_com/basop_util.c             |   2 +-
 lib_com/ivas_tools.c             |  17 ++++
 lib_com/tools_fx.c               |  22 ++---
 lib_dec/ivas_mc_param_dec.c      |  28 ++----
 lib_dec/ivas_mct_dec_mct_fx.c    |  16 +++-
 lib_rend/ivas_dirac_decorr_dec.c | 145 +++++++++++++++++++++++--------
 6 files changed, 151 insertions(+), 79 deletions(-)

diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c
index 82bae938c..17ef53245 100644
--- a/lib_com/basop_util.c
+++ b/lib_com/basop_util.c
@@ -1626,7 +1626,7 @@ Word16 findIndexOfMinWord32( Word32 *x, const Word16 len )
 Word16 imult1616( Word16 x, Word16 y )
 {
     assert( (int) x * (int) y < 32768 && (int) x * (int) y >= -32768 );
-    return extract_l( L_mult0( x, y ) );
+    return i_mult(x, y);
 }
 
 Word32 imult3216( Word32 x, Word16 y )
diff --git a/lib_com/ivas_tools.c b/lib_com/ivas_tools.c
index bead42ab9..7d6dc9376 100644
--- a/lib_com/ivas_tools.c
+++ b/lib_com/ivas_tools.c
@@ -461,6 +461,23 @@ void v_add_inc_fx(
 )
 {
     Word16 i;
+
+    /* The use of this function is currently always for the interleaved input format, */
+    /* that means, the following conditions are always true and thus obsolete.        */
+    test();
+    test();
+    test();
+    test();
+    IF ((sub(x_inc, 2) == 0) && (sub(x2_inc, 2) == 0) && (sub(y_inc, 1) == 0) && (&x1[1] == &x2[0]) )
+    {
+        /* Interleaved input case, linear output */
+        FOR( i = 0; i < N; i++ )
+        {
+            y[i] = L_add( x1[2*i+0], x1[2*i+1] ); /*Qx*/
+            move32();
+        }
+        return;
+    }
     Word16 ix1 = 0;
     Word16 ix2 = 0;
     Word16 iy = 0;
diff --git a/lib_com/tools_fx.c b/lib_com/tools_fx.c
index 3580e632e..eba9871fb 100644
--- a/lib_com/tools_fx.c
+++ b/lib_com/tools_fx.c
@@ -648,25 +648,13 @@ void set32_fx(
     const Word16 N  /* i  : Lenght of the vector                */
 )
 {
-    Word16 i, tmp;
-    tmp = extract_l( a );
-    IF( EQ_32( L_deposit_l( tmp ), a ) )
-    {
-        FOR( i = 0; i < N; i++ )
-        {
-            y[i] = L_deposit_l( tmp );
-            move32();
-        }
-    }
-    ELSE
+    Word16 i;
+
+    FOR( i = 0; i < N; i++ )
     {
-        FOR( i = 0; i < N; i++ )
-        {
-            y[i] = a;
-            move32();
-        }
+        y[i] = a;
+        move32();
     }
-
     return;
 }
 /*-------------------------------------------------------------------*
diff --git a/lib_dec/ivas_mc_param_dec.c b/lib_dec/ivas_mc_param_dec.c
index b9e9137a1..93cf30c05 100644
--- a/lib_dec/ivas_mc_param_dec.c
+++ b/lib_dec/ivas_mc_param_dec.c
@@ -3786,38 +3786,20 @@ void ivas_param_mc_dec_render_fx(
     slot_idx_start_cldfb_synth = 0;
     move16();
 
-    Flag is_zero = 1;
-    move32();
     FOR( j = 0; j < st_ivas->hParamMC->hMetadataPMC->nbands_coded; j++ )
     {
-        is_zero = 1;
-        move16();
-        FOR( i = 0; i < hParamMC->h_output_synthesis_cov_state.mixing_matrix_len; i++ )
+        Flag is_zero = is_zero_arr( hParamMC->h_output_synthesis_cov_state.mixing_matrix_fx[j], hParamMC->h_output_synthesis_cov_state.mixing_matrix_len );
         {
-            IF( hParamMC->h_output_synthesis_cov_state.mixing_matrix_fx[j][i] != 0 )
+            if ( is_zero != 0 )
             {
-                is_zero = 0;
+                hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_exp[j] = 0;
                 move16();
             }
         }
-        IF( is_zero )
-        {
-            hParamMC->h_output_synthesis_cov_state.mixing_matrix_exp[j] = 0;
-            move16();
-        }
-        is_zero = 1;
-        move16();
         IF( LT_16( st_ivas->hParamMC->band_grouping[j], st_ivas->hParamMC->h_output_synthesis_params.max_band_decorr ) )
         {
-            FOR( i = 0; i < hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_len; i++ )
-            {
-                IF( NE_32( hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_fx[j][i], 0 ) )
-                {
-                    is_zero = 0;
-                    move16();
-                }
-            }
-            IF( is_zero )
+            is_zero = is_zero_arr( hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_fx[j], hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_len );
+            if( is_zero != 0)
             {
                 hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_exp[j] = 0;
                 move16();
diff --git a/lib_dec/ivas_mct_dec_mct_fx.c b/lib_dec/ivas_mct_dec_mct_fx.c
index 0f9bd98cc..a21dbb1fc 100644
--- a/lib_dec/ivas_mct_dec_mct_fx.c
+++ b/lib_dec/ivas_mct_dec_mct_fx.c
@@ -316,6 +316,7 @@ void mctStereoIGF_dec_fx(
             test();
             IF( NE_16( hMCT->hBlockData[b]->hStereoMdct->IGFStereoMode[k], SMDCT_DUAL_MONO ) || NE_16( hMCT->hBlockData[b]->hStereoMdct->mdct_stereo_mode[k], SMDCT_DUAL_MONO ) )
             {
+#if 0
                 tmp = BASOP_Util_Divide1616_Scale( sts[0]->hTcxCfg->tcx_coded_lines, nSubframes, &tmp_e );
                 L_spec[0] = shr( tmp, add( 15, negate( tmp_e ) ) );
                 move16();
@@ -325,6 +326,15 @@ void mctStereoIGF_dec_fx(
 
                 tmp = BASOP_Util_Divide1616_Scale( sts[0]->hTcxDec->L_frameTCX, nSubframes, &tmp_e );
                 L_frameTCX_nSubframe = shr( tmp, add( 15, negate( tmp_e ) ) );
+#else
+                assert( nSubframes == 1 || nSubframes == 2 );
+                /* Note: nSubframes is in limited range [1, 2] for this function */
+                Word16 shr_div = sub( nSubframes, 1 ); /* 2 -> 1, 1 -> 0 */
+                L_spec[0] = shr(sts[0]->hTcxCfg->tcx_coded_lines, shr_div);
+                move16();
+                L_frame_nSubframe = shr(sts[0]->L_frame, shr_div);
+                L_frameTCX_nSubframe = shr( sts[0]->hTcxDec->L_frameTCX , shr_div);
+#endif
 
                 init_tcx_info_fx( sts[0], L_frame_nSubframe, L_frameTCX_nSubframe, k, bfi, &tcx_offset[0], &tcx_offsetFB[0], &L_frame[0], &L_frameTCX[0], &left_rect[0], &L_spec[0] );
 
@@ -334,14 +344,16 @@ void mctStereoIGF_dec_fx(
                 decoder_tcx_IGF_stereo_fx( sts, hMCT->hBlockData[b]->hStereoMdct, hMCT->hBlockData[b]->mask, p_x, p_x_e, p_x_len, L_frame[0], left_rect[0], k, bfi, 1 /* MCT_flag */ );
 
                 // Shifting output with variable exponent back to Q12
+                Word16 shr_k = sub( 31 - Q12, p_x_e[0][k] );
                 FOR( Word16 i = 0; i < p_x_len[0][k]; i++ )
                 {
-                    p_x[0][k][i] = L_shr( p_x[0][k][i], sub( 31 - Q12, p_x_e[0][k] ) );
+                    p_x[0][k][i] = L_shr( p_x[0][k][i], shr_k );
                     move32();
                 }
+                shr_k = sub( 31 - Q12, p_x_e[1][k] );
                 FOR( Word16 i = 0; i < p_x_len[1][k]; i++ )
                 {
-                    p_x[1][k][i] = L_shr( p_x[1][k][i], sub( 31 - Q12, p_x_e[1][k] ) );
+                    p_x[1][k][i] = L_shr( p_x[1][k][i], shr_k );
                     move32();
                 }
             }
diff --git a/lib_rend/ivas_dirac_decorr_dec.c b/lib_rend/ivas_dirac_decorr_dec.c
index dd71510d7..a5d780a11 100644
--- a/lib_rend/ivas_dirac_decorr_dec.c
+++ b/lib_rend/ivas_dirac_decorr_dec.c
@@ -57,6 +57,9 @@
 #define DIRAC_DUCK_GAMMA_FX    1610612736 /* Q30 */
 #define DIRAC_DUCK_ALPHA_FX    1717986944 /* Q31 */
 #define ONE_M_DIRAC_DUCK_ALPHA 429496736  /* Q31 */
+
+/* Maximal useful q-format, represents range of 2^-126 (float min) */
+#define MAX_Q_FX        157
 #endif
 
 /*-------------------------------------------------------------------------
@@ -1118,16 +1121,20 @@ void ivas_dirac_dec_decorr_process_fx(
 
         Word16 decorr_buff_tot_len = imult1616( imult1616( shl( decorr_buffer_len, 1 ), max_band_decorr ), num_channels );
         guarded_bits = 0;
-        FOR( Word16 i = 0; i < decorr_buff_tot_len; i++ )
+
+        Flag is_zero = is_zero_arr( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len );
+        if (is_zero == 0)
+          guarded_bits = 3;
+
+        IF(is_zero == 0)
         {
-            IF( h_freq_domain_decorr_ap_state->decorr_buffer_fx[i] != 0 )
+            q_shift = sub( getScaleFactor32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len ), guarded_bits );
+            IF (q_shift != 0)
             {
-                guarded_bits = s_max( find_guarded_bits_fx( 2 ), 3 );
+                Scale_sig32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len, q_shift );
+                q_decorr_buf = add( q_decorr_buf, q_shift );
             }
         }
-        q_shift = sub( getScaleFactor32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len ), guarded_bits );
-        Scale_sig32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len, q_shift );
-        q_decorr_buf = add( q_decorr_buf, q_shift );
 
         q_shift = getScaleFactor32( aux_buffer_fx, imult1616( imult1616( 2, num_protos_dir ), max_band_decorr_temp ) );
 
@@ -1191,9 +1198,7 @@ void ivas_dirac_dec_decorr_process_fx(
                     FOR( l = 0; l < filter_length; l++ )
                     {
                         frame_ma_fx[2 * l] = Mpy_32_16_1( input_real_fx, filter_coeff_num_real_fx[l] ); // Q_qux -3 = q_deorr
-                        // frame_ma_fx[2 * l] = L_shr(frame_ma_fx[2 * l],3); // scaling to q_decorr_buf
-                        frame_ma_fx[add( shl( l, 1 ), 1 )] = Mpy_32_16_1( input_imag_fx, filter_coeff_num_real_fx[l] ); // Q_qux - 3 = q_deorr
-                                                                                                                        // frame_ma_fx[2 * l + 1] = L_shr(frame_ma_fx[2 * l + 1], 3); // scaling to q_decorr_buf
+                        frame_ma_fx[2 * l + 1] = Mpy_32_16_1( input_imag_fx, filter_coeff_num_real_fx[l] ); // Q_qux - 3 = q_deorr
                         move32();
                         move32();
                     }
@@ -1207,26 +1212,28 @@ void ivas_dirac_dec_decorr_process_fx(
 
                     /*get values for AR part */
                     filter_frame_real_fx = decorr_buffer_ptr_fx[0]; // q_decorr
-                    filter_frame_imag_fx = decorr_buffer_ptr_fx[1]; // q_deocrr
+                    filter_frame_imag_fx = decorr_buffer_ptr_fx[1]; // q_decorr
 
-                    decorr_buffer_ptr_fx += shl( decorr_buffer_step, 1 );
+                    Word16 decorr_buffer_step2x = shl(decorr_buffer_step, 1);
+
+                    decorr_buffer_ptr_fx += decorr_buffer_step2x;
+                    move16();
 
                     FOR( l = 1; l < filter_length; l++ )
                     {
                         // q adjustment needed//
-                        decorr_buffer_ptr_fx[0] = L_add( decorr_buffer_ptr_fx[0], frame_ma_fx[2 * l] );                 // q_decorr
                         Word32 temp_1 = Mpy_32_16_1( filter_frame_real_fx, filter_coeff_den_real_fx[l] );               // q_decorr - 3
                         temp_1 = L_shl( temp_1, 3 );                                                                    // q_decorr
-                        decorr_buffer_ptr_fx[0] = L_sub( decorr_buffer_ptr_fx[0], temp_1 );                             // q_deocor
-                        decorr_buffer_ptr_fx[1] = L_add( decorr_buffer_ptr_fx[1], frame_ma_fx[add( shl( l, 1 ), 1 )] ); // q_decorr
+                        decorr_buffer_ptr_fx[0] = L_sub( L_add( decorr_buffer_ptr_fx[0], frame_ma_fx[2 * l] ), temp_1 );// q_deocor
+                        move32();
+
                         Word32 temp_2 = Mpy_32_16_1( filter_frame_imag_fx, filter_coeff_den_real_fx[l] );               // q_decorr - 3
                         temp_2 = L_shl( temp_2, 3 );                                                                    // q_decorr
-                        decorr_buffer_ptr_fx[1] = L_sub( decorr_buffer_ptr_fx[1], temp_2 );                             // q_decorr
-                        decorr_buffer_ptr_fx += imult1616( 2, decorr_buffer_step );
-                        move32();
-                        move32();
-                        move32();
+                        decorr_buffer_ptr_fx[1] = L_sub( L_add( decorr_buffer_ptr_fx[1], frame_ma_fx[2 * l + 1] ), temp_2 );// q_decorr
                         move32();
+
+                        decorr_buffer_ptr_fx += decorr_buffer_step2x;
+                        move16();
                     }
                 }
             }
@@ -1283,6 +1290,10 @@ void ivas_dirac_dec_decorr_process_fx(
             q_direct_energy = q_aux_buffer;
             move16();
 
+#if 0
+            /* Attention: this loop reports norm=0, whenever any data is 0. */
+            /* Therefore, useful left-shifts are skipped, accuracy is lost. */
+
             /* calculate the power of the decorrelated signal */
             FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx )
             {
@@ -1295,6 +1306,37 @@ void ivas_dirac_dec_decorr_process_fx(
                     norm = s_min( norm, W_norm( aux_64[add( offset2, i )] ) );
                 }
             }
+#else
+            /* calculate the power of the decorrelated signal */
+            Word64 *m64_aux = aux_64;
+            move32();            
+            Word64 min64 = (Word64) 0;
+            move64();            
+            Word32 *m32_frame_dec_fx = frame_dec_fx;
+            move32();
+            offset1 = shl(num_freq_bands, 1);
+            offset2 = shl( max_band_decorr, 1 );
+
+            
+            FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx )
+            {
+                FOR( Word16 i = 0; i < offset2; i++ )
+                {
+                    m64_aux[i] = W_mult0_32_32( m32_frame_dec_fx[i], m32_frame_dec_fx[i] );
+                    move64();
+                    if ( GT_64( m64_aux[i], min64 ) )
+                    {
+                        min64 = m64_aux[i];
+                        move64();
+                    }
+                }
+                m64_aux += offset2;
+                m32_frame_dec_fx += offset1;
+                move64();
+                move32();
+            }
+            norm = W_norm(min64);
+#endif
 
             FOR( Word16 i = 0; i < shl( imult1616( num_channels, max_band_decorr ), 1 ); i++ )
             {
@@ -1310,32 +1352,63 @@ void ivas_dirac_dec_decorr_process_fx(
             }
 
             /* smooth energies */
-            v_multc_fixed( aux_buffer_fx, ONE_M_DIRAC_DUCK_ALPHA, aux_buffer_fx, imult1616( num_channels, max_band_decorr ) ); // q_aux_buffer
 
-            v_multc_fixed( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, DIRAC_DUCK_ALPHA_FX, h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, imult1616( num_channels, max_band_decorr ) ); // same-q
+            Word16 len = imult1616( num_channels, max_band_decorr );
+            Word16 aux_e = sub( 31, q_aux_buffer );
+            Word16 max_e = s_max( aux_e, e_reverb_energy_smooth );
+            Word16 shr_aux = sub( max_e, aux_e );                  /* Note: headroom is zero */
+            Word16 shr_res = sub( max_e, e_reverb_energy_smooth ); /* Note: headroom is zero */
 
-            v_add_fixed_me( aux_buffer_fx, sub( 31, q_aux_buffer ), h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, e_reverb_energy_smooth, h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, &e_reverb_energy_smooth, imult1616( num_channels, max_band_decorr ), 0 );
-            h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = sub( 31, e_reverb_energy_smooth );
+            /* Note: DIRAC_DUCK_ALPHA_FX and ONE_M_DIRAC_DUCK_ALPHA are both in Q31 (e=0) */
+            /*       => a multiplication with this values does not change the q/e value.  */
 
-            v_multc_fixed( direct_energy_fx, ONE_M_DIRAC_DUCK_ALPHA, direct_energy_fx, imult1616( num_protos_dir, max_band_decorr ) ); // same q
+            FOR(Word16 i = 0; i < len; i++)
+            {
+                h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx[i] = L_add(
+                      L_shr( Mpy_32_32( aux_buffer_fx[i], ONE_M_DIRAC_DUCK_ALPHA), shr_aux ), 
+                      L_shr( Mpy_32_32( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx[i], DIRAC_DUCK_ALPHA_FX ), shr_res ) );
+                move32();
+            }
+            e_reverb_energy_smooth = max_e;
+            move16();
+            h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = sub( 31, e_reverb_energy_smooth );
+            move16();
 
-            v_multc_fixed( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, DIRAC_DUCK_ALPHA_FX, h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, imult1616( num_protos_dir, max_band_decorr ) ); // same q
+            len = imult1616( num_protos_dir, max_band_decorr );
+            Word16 den_e = sub( 31, q_direct_energy );
+            Word16 max_x = s_max( den_e, e_direct_energy_smooth );
+            Word16 shr_den = sub( max_x, den_e );                  /* Note: headroom is zero */
+            Word16 shr_des = sub( max_x, e_direct_energy_smooth ); /* Note: headroom is zero */
 
-            v_add_fixed_me( direct_energy_fx, sub( 31, q_direct_energy ), h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, e_direct_energy_smooth, h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, &e_direct_energy_smooth, imult1616( num_protos_dir, max_band_decorr ), 0 );
+            FOR( Word16 i = 0; i < len; i++ )
+            {
+                h_freq_domain_decorr_ap_state->direct_energy_smooth_fx[i] = L_add( 
+                      L_shr( Mpy_32_32( direct_energy_fx[i], ONE_M_DIRAC_DUCK_ALPHA ), shr_den ),
+                      L_shr( Mpy_32_32( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx[i], DIRAC_DUCK_ALPHA_FX ), shr_des ) );
+                move32();
+            }
+            e_direct_energy_smooth = max_x;
+            move16();
             h_freq_domain_decorr_ap_state->q_direct_energy_smooth = sub( 31, e_direct_energy_smooth );
             move16();
 
             // scaling energy buffers for better precision for higher values//
             q_shift = L_norm_arr( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, imult1616( num_protos_dir, max_band_decorr ) );
-            Scale_sig32( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, imult1616( num_protos_dir, max_band_decorr ), q_shift );
-            h_freq_domain_decorr_ap_state->q_direct_energy_smooth = add( h_freq_domain_decorr_ap_state->q_direct_energy_smooth, q_shift );
-            move16();
-
-
+            IF(q_shift != 0)
+            {
+                Scale_sig32( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, imult1616( num_protos_dir, max_band_decorr ), q_shift );
+                h_freq_domain_decorr_ap_state->q_direct_energy_smooth = add( h_freq_domain_decorr_ap_state->q_direct_energy_smooth, q_shift );
+                move16();
+            }
             q_shift = L_norm_arr( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, imult1616( num_channels, max_band_decorr ) );
-            Scale_sig32( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, imult1616( num_channels, max_band_decorr ), q_shift );
-            h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = add( h_freq_domain_decorr_ap_state->q_reverb_energy_smooth, q_shift );
-            move16();
+            IF( q_shift != 0 )
+            {
+                Scale_sig32( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, imult1616( num_channels, max_band_decorr ), q_shift );
+                h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = add( h_freq_domain_decorr_ap_state->q_reverb_energy_smooth, q_shift );
+                move16();
+            }
+            h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = min(MAX_Q_FX, h_freq_domain_decorr_ap_state->q_reverb_energy_smooth);
+            h_freq_domain_decorr_ap_state->q_direct_energy_smooth = min(MAX_Q_FX, h_freq_domain_decorr_ap_state->q_direct_energy_smooth);
 
             e_reverb_energy_smooth = sub( 31, h_freq_domain_decorr_ap_state->q_reverb_energy_smooth );
             e_direct_energy_smooth = sub( 31, h_freq_domain_decorr_ap_state->q_direct_energy_smooth );
@@ -1392,7 +1465,7 @@ void ivas_dirac_dec_decorr_process_fx(
                         duck_gain = shl( duck_gain, sub( e_duck_gain, 1 ) ); // Q14
 
                         frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 1 );                                 // q_frame_f
-                        frame_dec_fx_ptr[add( shl( band_idx, 1 ), 1 )] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[add( shl( band_idx, 1 ), 1 )], duck_gain ), 1 ); // q_frame_f
+                        frame_dec_fx_ptr[2 * band_idx + 1] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx + 1], duck_gain ), 1 ); // q_frame_f
                         move32();
                         move32();
                     }
@@ -1414,7 +1487,7 @@ void ivas_dirac_dec_decorr_process_fx(
                             duck_gain = shl( duck_gain, sub( e_duck_gain, 2 ) ); // Q13
                         }
                         frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 2 );                                 // q_frame_dec
-                        frame_dec_fx_ptr[add( shl( band_idx, 1 ), 1 )] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[add( shl( band_idx, 1 ), 1 )], duck_gain ), 2 ); // q_frame_dec
+                        frame_dec_fx_ptr[2 * band_idx + 1] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx + 1], duck_gain ), 2 ); // q_frame_dec
                         move32();
                         move32();
                     }
-- 
GitLab


From c703268d8f13f1c03785459079fc0a7e10cd80d7 Mon Sep 17 00:00:00 2001
From: Arthur <Arthur.tritthart@iis.fraunhofer.de>
Date: Fri, 6 Dec 2024 14:32:12 +0100
Subject: [PATCH 05/14] apply patch for clang format

---
 lib_com/basop_util.c             |  2 +-
 lib_com/ivas_tools.c             |  4 +--
 lib_dec/ivas_mc_param_dec.c      |  2 +-
 lib_dec/ivas_mct_dec_mct_fx.c    |  6 ++--
 lib_rend/ivas_dirac_decorr_dec.c | 58 ++++++++++++++++----------------
 5 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c
index 17ef53245..d7fc7ec72 100644
--- a/lib_com/basop_util.c
+++ b/lib_com/basop_util.c
@@ -1626,7 +1626,7 @@ Word16 findIndexOfMinWord32( Word32 *x, const Word16 len )
 Word16 imult1616( Word16 x, Word16 y )
 {
     assert( (int) x * (int) y < 32768 && (int) x * (int) y >= -32768 );
-    return i_mult(x, y);
+    return i_mult( x, y );
 }
 
 Word32 imult3216( Word32 x, Word16 y )
diff --git a/lib_com/ivas_tools.c b/lib_com/ivas_tools.c
index 7d6dc9376..335684f37 100644
--- a/lib_com/ivas_tools.c
+++ b/lib_com/ivas_tools.c
@@ -468,12 +468,12 @@ void v_add_inc_fx(
     test();
     test();
     test();
-    IF ((sub(x_inc, 2) == 0) && (sub(x2_inc, 2) == 0) && (sub(y_inc, 1) == 0) && (&x1[1] == &x2[0]) )
+    IF( ( sub( x_inc, 2 ) == 0 ) && ( sub( x2_inc, 2 ) == 0 ) && ( sub( y_inc, 1 ) == 0 ) && ( &x1[1] == &x2[0] ) )
     {
         /* Interleaved input case, linear output */
         FOR( i = 0; i < N; i++ )
         {
-            y[i] = L_add( x1[2*i+0], x1[2*i+1] ); /*Qx*/
+            y[i] = L_add( x1[2 * i + 0], x1[2 * i + 1] ); /*Qx*/
             move32();
         }
         return;
diff --git a/lib_dec/ivas_mc_param_dec.c b/lib_dec/ivas_mc_param_dec.c
index 93cf30c05..b6ad8d146 100644
--- a/lib_dec/ivas_mc_param_dec.c
+++ b/lib_dec/ivas_mc_param_dec.c
@@ -3799,7 +3799,7 @@ void ivas_param_mc_dec_render_fx(
         IF( LT_16( st_ivas->hParamMC->band_grouping[j], st_ivas->hParamMC->h_output_synthesis_params.max_band_decorr ) )
         {
             is_zero = is_zero_arr( hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_fx[j], hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_len );
-            if( is_zero != 0)
+            if ( is_zero != 0 )
             {
                 hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_exp[j] = 0;
                 move16();
diff --git a/lib_dec/ivas_mct_dec_mct_fx.c b/lib_dec/ivas_mct_dec_mct_fx.c
index a21dbb1fc..de7fc3a37 100644
--- a/lib_dec/ivas_mct_dec_mct_fx.c
+++ b/lib_dec/ivas_mct_dec_mct_fx.c
@@ -330,10 +330,10 @@ void mctStereoIGF_dec_fx(
                 assert( nSubframes == 1 || nSubframes == 2 );
                 /* Note: nSubframes is in limited range [1, 2] for this function */
                 Word16 shr_div = sub( nSubframes, 1 ); /* 2 -> 1, 1 -> 0 */
-                L_spec[0] = shr(sts[0]->hTcxCfg->tcx_coded_lines, shr_div);
+                L_spec[0] = shr( sts[0]->hTcxCfg->tcx_coded_lines, shr_div );
                 move16();
-                L_frame_nSubframe = shr(sts[0]->L_frame, shr_div);
-                L_frameTCX_nSubframe = shr( sts[0]->hTcxDec->L_frameTCX , shr_div);
+                L_frame_nSubframe = shr( sts[0]->L_frame, shr_div );
+                L_frameTCX_nSubframe = shr( sts[0]->hTcxDec->L_frameTCX, shr_div );
 #endif
 
                 init_tcx_info_fx( sts[0], L_frame_nSubframe, L_frameTCX_nSubframe, k, bfi, &tcx_offset[0], &tcx_offsetFB[0], &L_frame[0], &L_frameTCX[0], &left_rect[0], &L_spec[0] );
diff --git a/lib_rend/ivas_dirac_decorr_dec.c b/lib_rend/ivas_dirac_decorr_dec.c
index a5d780a11..90b00880e 100644
--- a/lib_rend/ivas_dirac_decorr_dec.c
+++ b/lib_rend/ivas_dirac_decorr_dec.c
@@ -59,7 +59,7 @@
 #define ONE_M_DIRAC_DUCK_ALPHA 429496736  /* Q31 */
 
 /* Maximal useful q-format, represents range of 2^-126 (float min) */
-#define MAX_Q_FX        157
+#define MAX_Q_FX 157
 #endif
 
 /*-------------------------------------------------------------------------
@@ -1123,13 +1123,13 @@ void ivas_dirac_dec_decorr_process_fx(
         guarded_bits = 0;
 
         Flag is_zero = is_zero_arr( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len );
-        if (is_zero == 0)
-          guarded_bits = 3;
+        if ( is_zero == 0 )
+            guarded_bits = 3;
 
-        IF(is_zero == 0)
+        IF( is_zero == 0 )
         {
             q_shift = sub( getScaleFactor32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len ), guarded_bits );
-            IF (q_shift != 0)
+            IF( q_shift != 0 )
             {
                 Scale_sig32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len, q_shift );
                 q_decorr_buf = add( q_decorr_buf, q_shift );
@@ -1197,7 +1197,7 @@ void ivas_dirac_dec_decorr_process_fx(
                     /* MA part of filter impulse response */
                     FOR( l = 0; l < filter_length; l++ )
                     {
-                        frame_ma_fx[2 * l] = Mpy_32_16_1( input_real_fx, filter_coeff_num_real_fx[l] ); // Q_qux -3 = q_deorr
+                        frame_ma_fx[2 * l] = Mpy_32_16_1( input_real_fx, filter_coeff_num_real_fx[l] );     // Q_qux -3 = q_deorr
                         frame_ma_fx[2 * l + 1] = Mpy_32_16_1( input_imag_fx, filter_coeff_num_real_fx[l] ); // Q_qux - 3 = q_deorr
                         move32();
                         move32();
@@ -1214,7 +1214,7 @@ void ivas_dirac_dec_decorr_process_fx(
                     filter_frame_real_fx = decorr_buffer_ptr_fx[0]; // q_decorr
                     filter_frame_imag_fx = decorr_buffer_ptr_fx[1]; // q_decorr
 
-                    Word16 decorr_buffer_step2x = shl(decorr_buffer_step, 1);
+                    Word16 decorr_buffer_step2x = shl( decorr_buffer_step, 1 );
 
                     decorr_buffer_ptr_fx += decorr_buffer_step2x;
                     move16();
@@ -1222,14 +1222,14 @@ void ivas_dirac_dec_decorr_process_fx(
                     FOR( l = 1; l < filter_length; l++ )
                     {
                         // q adjustment needed//
-                        Word32 temp_1 = Mpy_32_16_1( filter_frame_real_fx, filter_coeff_den_real_fx[l] );               // q_decorr - 3
-                        temp_1 = L_shl( temp_1, 3 );                                                                    // q_decorr
-                        decorr_buffer_ptr_fx[0] = L_sub( L_add( decorr_buffer_ptr_fx[0], frame_ma_fx[2 * l] ), temp_1 );// q_deocor
+                        Word32 temp_1 = Mpy_32_16_1( filter_frame_real_fx, filter_coeff_den_real_fx[l] );                // q_decorr - 3
+                        temp_1 = L_shl( temp_1, 3 );                                                                     // q_decorr
+                        decorr_buffer_ptr_fx[0] = L_sub( L_add( decorr_buffer_ptr_fx[0], frame_ma_fx[2 * l] ), temp_1 ); // q_deocor
                         move32();
 
-                        Word32 temp_2 = Mpy_32_16_1( filter_frame_imag_fx, filter_coeff_den_real_fx[l] );               // q_decorr - 3
-                        temp_2 = L_shl( temp_2, 3 );                                                                    // q_decorr
-                        decorr_buffer_ptr_fx[1] = L_sub( L_add( decorr_buffer_ptr_fx[1], frame_ma_fx[2 * l + 1] ), temp_2 );// q_decorr
+                        Word32 temp_2 = Mpy_32_16_1( filter_frame_imag_fx, filter_coeff_den_real_fx[l] );                    // q_decorr - 3
+                        temp_2 = L_shl( temp_2, 3 );                                                                         // q_decorr
+                        decorr_buffer_ptr_fx[1] = L_sub( L_add( decorr_buffer_ptr_fx[1], frame_ma_fx[2 * l + 1] ), temp_2 ); // q_decorr
                         move32();
 
                         decorr_buffer_ptr_fx += decorr_buffer_step2x;
@@ -1309,15 +1309,15 @@ void ivas_dirac_dec_decorr_process_fx(
 #else
             /* calculate the power of the decorrelated signal */
             Word64 *m64_aux = aux_64;
-            move32();            
+            move32();
             Word64 min64 = (Word64) 0;
-            move64();            
+            move64();
             Word32 *m32_frame_dec_fx = frame_dec_fx;
             move32();
-            offset1 = shl(num_freq_bands, 1);
+            offset1 = shl( num_freq_bands, 1 );
             offset2 = shl( max_band_decorr, 1 );
 
-            
+
             FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx )
             {
                 FOR( Word16 i = 0; i < offset2; i++ )
@@ -1335,7 +1335,7 @@ void ivas_dirac_dec_decorr_process_fx(
                 move64();
                 move32();
             }
-            norm = W_norm(min64);
+            norm = W_norm( min64 );
 #endif
 
             FOR( Word16 i = 0; i < shl( imult1616( num_channels, max_band_decorr ), 1 ); i++ )
@@ -1362,11 +1362,11 @@ void ivas_dirac_dec_decorr_process_fx(
             /* Note: DIRAC_DUCK_ALPHA_FX and ONE_M_DIRAC_DUCK_ALPHA are both in Q31 (e=0) */
             /*       => a multiplication with this values does not change the q/e value.  */
 
-            FOR(Word16 i = 0; i < len; i++)
+            FOR( Word16 i = 0; i < len; i++ )
             {
                 h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx[i] = L_add(
-                      L_shr( Mpy_32_32( aux_buffer_fx[i], ONE_M_DIRAC_DUCK_ALPHA), shr_aux ), 
-                      L_shr( Mpy_32_32( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx[i], DIRAC_DUCK_ALPHA_FX ), shr_res ) );
+                    L_shr( Mpy_32_32( aux_buffer_fx[i], ONE_M_DIRAC_DUCK_ALPHA ), shr_aux ),
+                    L_shr( Mpy_32_32( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx[i], DIRAC_DUCK_ALPHA_FX ), shr_res ) );
                 move32();
             }
             e_reverb_energy_smooth = max_e;
@@ -1382,9 +1382,9 @@ void ivas_dirac_dec_decorr_process_fx(
 
             FOR( Word16 i = 0; i < len; i++ )
             {
-                h_freq_domain_decorr_ap_state->direct_energy_smooth_fx[i] = L_add( 
-                      L_shr( Mpy_32_32( direct_energy_fx[i], ONE_M_DIRAC_DUCK_ALPHA ), shr_den ),
-                      L_shr( Mpy_32_32( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx[i], DIRAC_DUCK_ALPHA_FX ), shr_des ) );
+                h_freq_domain_decorr_ap_state->direct_energy_smooth_fx[i] = L_add(
+                    L_shr( Mpy_32_32( direct_energy_fx[i], ONE_M_DIRAC_DUCK_ALPHA ), shr_den ),
+                    L_shr( Mpy_32_32( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx[i], DIRAC_DUCK_ALPHA_FX ), shr_des ) );
                 move32();
             }
             e_direct_energy_smooth = max_x;
@@ -1394,7 +1394,7 @@ void ivas_dirac_dec_decorr_process_fx(
 
             // scaling energy buffers for better precision for higher values//
             q_shift = L_norm_arr( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, imult1616( num_protos_dir, max_band_decorr ) );
-            IF(q_shift != 0)
+            IF( q_shift != 0 )
             {
                 Scale_sig32( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, imult1616( num_protos_dir, max_band_decorr ), q_shift );
                 h_freq_domain_decorr_ap_state->q_direct_energy_smooth = add( h_freq_domain_decorr_ap_state->q_direct_energy_smooth, q_shift );
@@ -1407,8 +1407,8 @@ void ivas_dirac_dec_decorr_process_fx(
                 h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = add( h_freq_domain_decorr_ap_state->q_reverb_energy_smooth, q_shift );
                 move16();
             }
-            h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = min(MAX_Q_FX, h_freq_domain_decorr_ap_state->q_reverb_energy_smooth);
-            h_freq_domain_decorr_ap_state->q_direct_energy_smooth = min(MAX_Q_FX, h_freq_domain_decorr_ap_state->q_direct_energy_smooth);
+            h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = min( MAX_Q_FX, h_freq_domain_decorr_ap_state->q_reverb_energy_smooth );
+            h_freq_domain_decorr_ap_state->q_direct_energy_smooth = min( MAX_Q_FX, h_freq_domain_decorr_ap_state->q_direct_energy_smooth );
 
             e_reverb_energy_smooth = sub( 31, h_freq_domain_decorr_ap_state->q_reverb_energy_smooth );
             e_direct_energy_smooth = sub( 31, h_freq_domain_decorr_ap_state->q_direct_energy_smooth );
@@ -1464,7 +1464,7 @@ void ivas_dirac_dec_decorr_process_fx(
 
                         duck_gain = shl( duck_gain, sub( e_duck_gain, 1 ) ); // Q14
 
-                        frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 1 );                                 // q_frame_f
+                        frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 1 );         // q_frame_f
                         frame_dec_fx_ptr[2 * band_idx + 1] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx + 1], duck_gain ), 1 ); // q_frame_f
                         move32();
                         move32();
@@ -1486,7 +1486,7 @@ void ivas_dirac_dec_decorr_process_fx(
                         {
                             duck_gain = shl( duck_gain, sub( e_duck_gain, 2 ) ); // Q13
                         }
-                        frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 2 );                                 // q_frame_dec
+                        frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 2 );         // q_frame_dec
                         frame_dec_fx_ptr[2 * band_idx + 1] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx + 1], duck_gain ), 2 ); // q_frame_dec
                         move32();
                         move32();
-- 
GitLab


From ff62d7455ef525a6f64ba288344b6532a76b2d56 Mon Sep 17 00:00:00 2001
From: Arthur <Arthur.tritthart@iis.fraunhofer.de>
Date: Tue, 10 Dec 2024 17:48:40 +0100
Subject: [PATCH 06/14] Fix pipeline issues due to inverse square root. Fix
 other exponent settings.

---
 lib_rend/ivas_dirac_dec_binaural_functions.c | 26 +++++++++-----------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions.c b/lib_rend/ivas_dirac_dec_binaural_functions.c
index 3bf7060fe..14b55a94f 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions.c
@@ -83,7 +83,6 @@ Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
 #define EPSILON_EXP                ( -39 )
 #define ONE_DIV_EPSILON_MANT       1953125000 /* 1e+12 = 0,9094947*(2^40) */
 #define ONE_DIV_EPSILON_EXP        ( 40 )
-
 #endif
 #define ADAPT_HTPROTO_ROT_LIM_1 0.8f
 
@@ -5138,14 +5137,16 @@ static void eig2x2_fx(
             q_tmp2 = sub( 31, q_tmp2 );
 
             tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 );
-#ifndef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
+#if !defined(FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC) || 1
             tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp );
             exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) );
             normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2
             q_tmp2 = sub( 31, exp );
 #else
-            normVal_fx = ISqrt32( tmp3, &exp_tmp3 );
-            q_tmp2 = sub( 31, exp_tmp3 );
+            /* Note: This code part does not work yet, see pipeline issue for BASOP #1009  */
+            /* although the same code works at other places: mantissa and q_format is fine */
+            normVal_fx = ISqrt32( tmp3, &exp );
+            q_tmp2 = sub( 31, exp );
 #endif
             IF( LT_16( q_tmp1, q_c ) )
             {
@@ -5710,14 +5711,11 @@ static void chol2x2_fx(
             temp = BASOP_Util_Divide3232_Scale_cadence( temp, 4611686, &exp );
             q_tmp = add( sub( 31, exp ), sub( q_tmp, 62 ) );
 #else
-            Word32 my_temp;
-            Word16 my_q_tmp;
-            my_temp = temp;
-            my_q_tmp = q_tmp;
-            temp = BASOP_Util_Divide3232_Scale_cadence( temp, 4611686, &exp );
-            q_tmp = add( sub( 31, exp ), sub( q_tmp, 62 ) );
-            my_temp = Mpy_32_32( my_temp, ONE_DIV_EPSILON_MANT );
-            my_q_tmp = add( my_q_tmp, ONE_DIV_EPSILON_EXP );
+            Word16 norm = norm_l(temp);
+            temp = L_shl(temp, norm);
+            q_tmp = add(q_tmp, norm);
+            temp = Mpy_32_32( temp, ONE_DIV_EPSILON_MANT );
+            q_tmp = sub( q_tmp, ONE_DIV_EPSILON_EXP );
 #endif
         }
         ELSE
@@ -6120,11 +6118,9 @@ static void formulate2x2MixingMatrix_fx(
     // 4611686 = Q62
     IF( maxEne_fx == 0 )
     {
-        // maxEneDiv_fx = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, 4611686, &exp ); // 4611686 = 1e-12f in Q62
-        // q_maxEneDiv = add( sub( 31, exp ), sub( Q30, 62 ) );
         maxEneDiv_fx = ONE_DIV_EPSILON_MANT;
         move32();
-        q_maxEneDiv = ONE_DIV_EPSILON_EXP;
+        q_maxEneDiv = 31 - ONE_DIV_EPSILON_EXP;
         move16();
     }
     ELSE
-- 
GitLab


From 3ccde0ad7308ab1d704b117fcdd3af0e74feafce Mon Sep 17 00:00:00 2001
From: Markus Multrus <markus.multrus@iis.fraunhofer.de>
Date: Wed, 11 Dec 2024 14:14:41 +0100
Subject: [PATCH 07/14] disable WMOPS in options.h

---
 lib_com/options.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib_com/options.h b/lib_com/options.h
index b62ba1b83..a0e901e21 100755
--- a/lib_com/options.h
+++ b/lib_com/options.h
@@ -54,7 +54,7 @@
 
 #define SUPPORT_JBM_TRACEFILE                   /* Support for JBM tracefile, which is needed for 3GPP objective/subjective testing, but not relevant for real-world implementations */
 
-#define WMOPS                                   /* Activate complexity and memory counters */
+/*#define WMOPS*/                                   /* Activate complexity and memory counters */
 #ifdef WMOPS
 /*#define WMOPS_PER_FRAME*/                     /* Output per-frame complexity (writes one float value per frame to the file "wmops_analysis") */
 /*#define MEM_COUNT_DETAILS*/                   /* Output detailed memory analysis for the worst-case frame (writes to the file "mem_analysis.csv") */
-- 
GitLab


From 9597707aae3d5d35df73e707b174e5a63f1fe04c Mon Sep 17 00:00:00 2001
From: Markus Multrus <markus.multrus@iis.fraunhofer.de>
Date: Wed, 11 Dec 2024 14:18:15 +0100
Subject: [PATCH 08/14] formatting

---
 lib_rend/ivas_dirac_dec_binaural_functions.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions.c b/lib_rend/ivas_dirac_dec_binaural_functions.c
index 14b55a94f..10a9ba029 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions.c
@@ -5137,7 +5137,7 @@ static void eig2x2_fx(
             q_tmp2 = sub( 31, q_tmp2 );
 
             tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 );
-#if !defined(FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC) || 1
+#if !defined( FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC ) || 1
             tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp );
             exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) );
             normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2
@@ -5711,9 +5711,9 @@ static void chol2x2_fx(
             temp = BASOP_Util_Divide3232_Scale_cadence( temp, 4611686, &exp );
             q_tmp = add( sub( 31, exp ), sub( q_tmp, 62 ) );
 #else
-            Word16 norm = norm_l(temp);
-            temp = L_shl(temp, norm);
-            q_tmp = add(q_tmp, norm);
+            Word16 norm = norm_l( temp );
+            temp = L_shl( temp, norm );
+            q_tmp = add( q_tmp, norm );
             temp = Mpy_32_32( temp, ONE_DIV_EPSILON_MANT );
             q_tmp = sub( q_tmp, ONE_DIV_EPSILON_EXP );
 #endif
-- 
GitLab


From 327c77c902322bf064ff23da0d9c48da9ef2868f Mon Sep 17 00:00:00 2001
From: Markus Multrus <markus.multrus@iis.fraunhofer.de>
Date: Wed, 11 Dec 2024 15:22:02 +0100
Subject: [PATCH 09/14] revert various whitespace changes

---
 lib_rend/ivas_dirac_dec_binaural_functions.c | 28 ++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/lib_rend/ivas_dirac_dec_binaural_functions.c b/lib_rend/ivas_dirac_dec_binaural_functions.c
index 10a9ba029..cb8d772d7 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions.c
@@ -1639,6 +1639,7 @@ static void ivas_dirac_dec_binaural_internal_fx(
         st_ivas->cldfbSynDec[ch]->Q_cldfb_state = Q11;
         move16();
     }
+
     return;
 }
 #endif
@@ -2388,6 +2389,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
             move32();
         }
     }
+
     /* Apply EQ at low bit rates */
     IF( applyLowBitRateEQ != 0 )
     {
@@ -2404,6 +2406,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
             move32();
         }
     }
+
     test();
     test();
     IF( ( EQ_32( ivas_format, SBA_FORMAT ) || EQ_32( ivas_format, SBA_ISM_FORMAT ) ) && EQ_16( nchan_transport, 2 ) )
@@ -2439,6 +2442,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
             }
         }
     }
+
     /* Determine target covariance matrix containing target binaural properties */
     FOR( bin = 0; bin < nBins; bin++ )
     {
@@ -2534,6 +2538,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
                 move16();
                 gainCacheBaseIndex = add( 6, ismDirIndex );
             }
+
             diffuseness_fx = L_sub( diffuseness_fx, ratio_fx ); /* diffuseness = 1 - ratio1 - ratio2 */
 
             if ( diffuseness_fx < 0 )
@@ -2578,7 +2583,9 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
                 altSpreadCoh_fx = sub( 32767, shl_sat( div_s( numr, denr ), sub( den_e, num_e ) ) ); // 4289 = pi/6 in Q13
                 spreadCoh_fx = s_max( spreadCoh_fx, altSpreadCoh_fx );
             }
+
             getDirectPartGains_fx( bin, aziDeg, eleDeg, &lRealp_fx, &lImagp_fx, &rRealp_fx, &rImagp_fx, hDiracDecBin->renderStereoOutputInsteadOfBinaural, Rmat_fx, &gainCache[gainCacheBaseIndex], isHeadtracked );
+
             Word16 q_lr = Q28;
             move16();
             if ( hDiracDecBin->renderStereoOutputInsteadOfBinaural )
@@ -2588,6 +2595,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
                 spreadCoh_fx = 0;
                 move32();
             }
+
             IF( spreadCoh_fx > 0 )
             {
                 Word32 centerMul_fx, sidesMul_fx;
@@ -2640,6 +2648,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
 
                 /* Apply the gain for the right source of the three coherent sources.
                  * -30 degrees to 330 wrapping due to internal functions. */
+
                 getDirectPartGains_fx( bin, aziDeg + 330, eleDeg, &lRealpTmp_fx, &lImagpTmp_fx, &rRealpTmp_fx, &rImagpTmp_fx, hDiracDecBin->renderStereoOutputInsteadOfBinaural, Rmat_fx, &gainCache[gainCacheBaseIndex + 2], isHeadtracked );
 
                 hrtfEneSides_fx = L_add( hrtfEneSides_fx,
@@ -2661,6 +2670,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
                 eneCorrectionFactor_fx = BASOP_Util_Divide3232_Scale( L_add( Mpy_32_32( hrtfEneSides_fx, Mpy_32_32( sidesMul_fx, sidesMul_fx ) ),
                                                                              Mpy_32_32( hrtfEneCenter_fx, Mpy_32_32( centerMul_fx, centerMul_fx ) ) ),
                                                                       L_max( 1, hrtfEneRealized_fx ), &eneCorrectionFactor_e );
+
                 /* Weighting factors to determine appropriate target spectrum for spread coherent sound */
                 IF( LT_16( spreadCoh_fx, 16384 ) )
                 {
@@ -2717,6 +2727,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
                 q_lr = Q23;
                 move16();
             }
+
             hrtfEne_fx[0] = L_add( Mpy_32_32( lRealp_fx, lRealp_fx ), Mpy_32_32( lImagp_fx, lImagp_fx ) ); // Q( 2*q_lr - 31 )
             hrtfEne_fx[1] = L_add( Mpy_32_32( rRealp_fx, rRealp_fx ), Mpy_32_32( rImagp_fx, rImagp_fx ) ); // Q( 2*q_lr - 31 )
             move32();
@@ -2801,6 +2812,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
             }
             move32();
         }
+
         /* Store parameters for formulating average diffuseness over frame */
         Word32 frameMeanDiffuseness = BASOP_Util_Add_Mant32Exp( hDiracDecBin->frameMeanDiffuseness_fx[bin], 2 /*Q29*/, diffEneValForDecorrelationReduction_fx, sub( 31, q_diffEneValForDecorrelationReduction ), &exp1 ); // exp = exp1
         frameMeanDiffusenessEneWeight_fx[bin] = L_add( frameMeanDiffusenessEneWeight_fx[bin], meanEnePerCh_fx );
@@ -2812,6 +2824,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
         hDiracDecBin->frameMeanDiffuseness_fx[bin] = L_shl( frameMeanDiffuseness, sub( exp, 2 ) ); // Q29
         move32();
     }
+
     test();
     /* Temporal IIR-type smoothing of covariance matrices. Also apply encoding quality based smoothing factor. */
     IF( EQ_32( ivas_format, MASA_FORMAT ) && LT_32( ivas_total_brate, MASA_STEREO_MIN_BITRATE ) )
@@ -2914,6 +2927,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
             move16();
         }
     }
+
     return;
 }
 #endif
@@ -3145,6 +3159,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices(
             }
         }
     }
+
     return;
 }
 #else
@@ -3289,6 +3304,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
         }
         move32();
         move16();
+
         formulate2x2MixingMatrix_fx( hDiracDecBin->ChEne_fx[0][bin], hDiracDecBin->ChEne_fx[1][bin],
                                      hDiracDecBin->q_ChEne,
                                      hDiracDecBin->ChCrossRe_fx[bin], hDiracDecBin->ChCrossIm_fx[bin],
@@ -3698,6 +3714,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
     move16();
     minimum_s( q_processMtxDec, nBins, &hDiracDecBin->q_processMtxDec );
     minimum_s( q_processMtxDecPrev, nBins, &hDiracDecBin->q_processMtxDecPrev );
+
     FOR( bin = 0; bin < nBins; bin++ )
     {
         FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
@@ -3737,6 +3754,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
             }
         }
     }
+
     return;
 }
 #endif
@@ -5069,6 +5087,7 @@ static void eig2x2_fx(
         move32();
         *q_U = Q31;
         move16();
+
         return;
     }
 
@@ -5085,6 +5104,7 @@ static void eig2x2_fx(
             move32();
             *q_U = Q30;
             move16();
+
             return;
         }
     }
@@ -5098,9 +5118,11 @@ static void eig2x2_fx(
             move32();
             *q_U = Q30;
             move16();
+
             return;
         }
     }
+
     q_U_1 = 0;
     q_U_2 = 0;
     move16();
@@ -5137,6 +5159,7 @@ static void eig2x2_fx(
             q_tmp2 = sub( 31, q_tmp2 );
 
             tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 );
+
 #if !defined( FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC ) || 1
             tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp );
             exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) );
@@ -5148,6 +5171,7 @@ static void eig2x2_fx(
             normVal_fx = ISqrt32( tmp3, &exp );
             q_tmp2 = sub( 31, exp );
 #endif
+
             IF( LT_16( q_tmp1, q_c ) )
             {
                 c_re = L_shr( c_re, sub( q_c, q_tmp1 ) );
@@ -5271,6 +5295,7 @@ static void eig2x2_fx(
             move16();
         }
     }
+
     IF( q_U_1 != 0 )
     *q_U = q_U_1;
     ELSE
@@ -6150,6 +6175,7 @@ static void formulate2x2MixingMatrix_fx(
 
     /* Cholesky decomposition of target / output covariance matrix */
     chol2x2_fx( E_out1, E_out2, q_eout, Cout_re, Cout_im, q_cout, KyRe_fx, KyIm_fx, &q_ky );
+
     /* Eigendecomposition of input covariance matrix */
     eig2x2_fx( E_in1, E_in2, q_ein, Cin_re, Cin_im, q_cin, Uxre_fx, Uxim_fx, &q_Ux, Sx_fx, &q_Sx );
 
@@ -6403,6 +6429,7 @@ static void formulate2x2MixingMatrix_fx(
     }
 
     matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Ure_fx, Uim_fx, &q_U, Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */
+
     /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */
 #if ( BINAURAL_CHANNELS != 2 )
     FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
@@ -6540,6 +6567,7 @@ static void formulate2x2MixingMatrix_fx(
     }
 
     matrixMul_fx( KyRe_fx, KyIm_fx, &q_ky, Pre_fx, Pim_fx, &q_P, tmpRe_fx, tmpIm_fx, &q_temp );
+
     matrixTransp2Mul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Uxre_fx, Uxim_fx, &q_Ux, Mre_fx, Mim_fx, q_M );
 
     return;
-- 
GitLab


From aea094454c48147449583e4b2f65643e4daa97e6 Mon Sep 17 00:00:00 2001
From: Markus Multrus <markus.multrus@iis.fraunhofer.de>
Date: Thu, 12 Dec 2024 15:56:31 +0100
Subject: [PATCH 10/14] move function declaration of
 BASOP_Util_Divide3232_Scale_FhG to lib_com/basop_util.h, use FOR instead of
 for

---
 lib_com/basop_util.c | 4 ++--
 lib_com/basop_util.h | 6 ++++++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c
index d7fc7ec72..b34cc36d8 100644
--- a/lib_com/basop_util.c
+++ b/lib_com/basop_util.c
@@ -1038,7 +1038,6 @@ Word32 div_w( Word32 L_num, Word32 L_den )
     }
 }
 
-Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bits );
 Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bits )
 {
     Word32 z;
@@ -1086,7 +1085,7 @@ Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bi
 
     z = L_sub( x, x ); // z = 0
 
-    for ( iteration = (Word16) 0; iteration < (Word16) bits; iteration++ )
+    FOR ( iteration = (Word16) 0; iteration < (Word16) bits; iteration++ )
     {
         if ( L_add( x, y ) >= 0 )
         {
@@ -1100,6 +1099,7 @@ Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bi
     {
         z = L_negate( z );
     }
+
     return L_shl( z, sub( 31, bits ) );
 }
 
diff --git a/lib_com/basop_util.h b/lib_com/basop_util.h
index 92994542e..6b68a5092 100644
--- a/lib_com/basop_util.h
+++ b/lib_com/basop_util.h
@@ -328,6 +328,12 @@ Word16 BASOP_Util_Divide3232_Scale( Word32 x,    /*!< i  : Numerator*/
                                     Word32 y,    /*!< i  : Denominator*/
                                     Word16 *s ); /*!< o  : Additional scalefactor difference*/
 
+
+Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x,      /*!< i  : Numerator*/
+                                        Word32 y,      /*!< i  : Denominator*/
+                                        Word16 *s,     /*!< o  : Additional scalefactor difference*/
+                                        Word16 bits ); /*!< i  : number of mantissa bits of result*/
+
 Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x,    /*!< i  : Numerator*/
                                             Word32 y,    /*!< i  : Denominator*/
                                             Word16 *s ); /*!< o  : Additional scalefactor difference*/
-- 
GitLab


From e7ca3356cf194c6aba5f1c20f55f296b6d0bf033 Mon Sep 17 00:00:00 2001
From: Markus Multrus <markus.multrus@iis.fraunhofer.de>
Date: Thu, 12 Dec 2024 17:38:39 +0100
Subject: [PATCH 11/14] formatting

---
 lib_com/basop_util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c
index b34cc36d8..20564cacb 100644
--- a/lib_com/basop_util.c
+++ b/lib_com/basop_util.c
@@ -1085,7 +1085,7 @@ Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bi
 
     z = L_sub( x, x ); // z = 0
 
-    FOR ( iteration = (Word16) 0; iteration < (Word16) bits; iteration++ )
+    FOR( iteration = (Word16) 0; iteration < (Word16) bits; iteration++ )
     {
         if ( L_add( x, y ) >= 0 )
         {
-- 
GitLab


From e5b78387c52c3b3c9ca76f74c4ef8d85de45a06d Mon Sep 17 00:00:00 2001
From: Markus Multrus <markus.multrus@iis.fraunhofer.de>
Date: Fri, 13 Dec 2024 14:38:05 +0100
Subject: [PATCH 12/14] address formal issues + issue for x == 0 && y == 0 in
 get_min_scalefactor()

---
 lib_com/fft_fx.c  | 24 +++++++++++++++++++++---
 lib_com/options.h |  1 +
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/lib_com/fft_fx.c b/lib_com/fft_fx.c
index bd4a12415..67dd7b637 100644
--- a/lib_com/fft_fx.c
+++ b/lib_com/fft_fx.c
@@ -7262,7 +7262,7 @@ Word16 L_norm_arr( Word32 *arr, Word16 size )
     Word16 q = 31;
     move16();
     FOR( Word16 i = 0; i < size; i++ )
-#if 0
+#ifndef FIX_1009_OPT_L_NORM_ARR
     IF( arr[i] != 0 )
     {
         q = s_min( q, norm_l( arr[i] ) );
@@ -7270,9 +7270,12 @@ Word16 L_norm_arr( Word32 *arr, Word16 size )
 #else
     {
         Word16 q_tst;
+
         q_tst = norm_l( arr[i] );
         if ( arr[i] != 0 )
+        {
             q = s_min( q, q_tst );
+        }
     }
 
 #endif
@@ -7281,7 +7284,7 @@ Word16 L_norm_arr( Word32 *arr, Word16 size )
 
 Word16 get_min_scalefactor( Word32 x, Word32 y )
 {
-#if 0
+#ifndef FIX_1009_OPT_GETMINSCALEFAC
     Word16 scf = Q31;
     move16();
     test();
@@ -7299,13 +7302,28 @@ Word16 get_min_scalefactor( Word32 x, Word32 y )
     }
     return scf;
 #else
-    Word16 scf = Q31;
     Word16 scf_y;
+    Word16 scf = Q31;
+    move16();
+
+    test();
+    if ( x == 0 && y == 0 )
+    {
+        scf = 0;
+        move16();
+    }
+
     if ( x != 0 )
+    {
         scf = norm_l( x );
+    }
+
     scf_y = norm_l( y );
     if ( y != 0 )
+    {
         scf = s_min( scf_y, scf );
+    }
+
     return scf;
 #endif
 }
diff --git a/lib_com/options.h b/lib_com/options.h
index f5898306e..104e51ce0 100755
--- a/lib_com/options.h
+++ b/lib_com/options.h
@@ -89,6 +89,7 @@
 
 #define FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC   /* FhG: Reduce workload of binaural rendering: replace 1./tmp & sqrt by Isqrt32 */
 #define FIX_1009_OPT_PARAMMC_RENDER             /* FhG: Optimize ivas_param_mc_dec_render_fx() */
+#define FIX_1009_OPT_GETMINSCALEFAC             /* FhG: Optimize get_min_scalefactor(), avoid IF */
                                                 /*      Replace computations with constants by setting of constants */
                                                 /*      Simplify matrix multiplications and some external helper routines */
 
-- 
GitLab


From c8f737edd3798e14645393d9daa6407f5ba2ed4f Mon Sep 17 00:00:00 2001
From: Markus Multrus <markus.multrus@iis.fraunhofer.de>
Date: Fri, 13 Dec 2024 21:58:46 +0100
Subject: [PATCH 13/14] fix typo in variable name

---
 lib_dec/ivas_mc_param_dec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib_dec/ivas_mc_param_dec.c b/lib_dec/ivas_mc_param_dec.c
index 9768820c9..e3d8bf0c0 100644
--- a/lib_dec/ivas_mc_param_dec.c
+++ b/lib_dec/ivas_mc_param_dec.c
@@ -1997,7 +1997,7 @@ void ivas_param_mc_dec_render_fx(
         {
             if ( is_zero != 0 )
             {
-                hParamMC->h_output_synthesis_cov_state.mixing_matrix_res_exp[j] = 0;
+                hParamMC->h_output_synthesis_cov_state.mixing_matrix_exp[j] = 0;
                 move16();
             }
         }
-- 
GitLab


From 9e0eb11fdc2bdb6d8c3f6d2c38f52b37fc82c02c Mon Sep 17 00:00:00 2001
From: Markus Multrus <markus.multrus@iis.fraunhofer.de>
Date: Sun, 15 Dec 2024 19:10:34 +0100
Subject: [PATCH 14/14] BASOP_Util_Divide3232_Scale_FhG(): replace
 DEPR_L_add_c() by L_add_co(); currently inactive, since no counting in
 L_add_co()

---
 lib_com/basop_util.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/lib_com/basop_util.c b/lib_com/basop_util.c
index 20564cacb..04f0dc770 100644
--- a/lib_com/basop_util.c
+++ b/lib_com/basop_util.c
@@ -1038,6 +1038,8 @@ Word32 div_w( Word32 L_num, Word32 L_den )
     }
 }
 
+// replace depreacted L_add_c() by L_add_co(); currently disabled, because of missing counting in L_add_co();
+//#define REPLACE_DEPR_L_ADD_C
 Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bits )
 {
     Word32 z;
@@ -1046,9 +1048,15 @@ Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bi
     Word32 sign;
     Word16 iteration;
     Flag Carry;
+#ifdef REPLACE_DEPR_L_ADD_C
+    Flag Overflow;
+#endif
     Word16 s_val;
 
     unset_carry( &Carry );
+#ifdef REPLACE_DEPR_L_ADD_C
+    unset_overflow( &Overflow );
+#endif
 
     /* assert (x >= (Word32)0); */
     assert( y != (Word32) 0 );
@@ -1089,9 +1097,17 @@ Word32 BASOP_Util_Divide3232_Scale_FhG( Word32 x, Word32 y, Word16 *s, Word16 bi
     {
         if ( L_add( x, y ) >= 0 )
         {
+#ifdef REPLACE_DEPR_L_ADD_C
+            x = L_add_co( x, y, &Carry, &Overflow ); // sets always carry=1
+#else
             x = DEPR_L_add_c( x, y, &Carry ); // sets always carry=1
+#endif
         }
+#ifdef REPLACE_DEPR_L_ADD_C
+        z = L_add_co( z, z, &Carry, &Overflow ); // sets always carry=0
+#else
         z = DEPR_L_add_c( z, z, &Carry ); // sets always carry=0
+#endif
         x = L_add( x, x );
     }
 
-- 
GitLab