From 6ea437edcce69accf83794a92f0791bfe4b25438 Mon Sep 17 00:00:00 2001
From: Thomas Dettbarn <thomas.dettbarn@iis.fraunhofer.de>
Date: Fri, 18 Jul 2025 12:08:52 +0200
Subject: [PATCH 01/33] initial revision: Added two new functions
 biDiagonalReductionLeft_64() and biDiagonalReductionRight_64() to replace
 biDiagonalReductionLeft_fx() and biDiagonalReductionRight_fx().

---
 lib_dec/ivas_svd_dec_fx.c | 325 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 324 insertions(+), 1 deletion(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index ba65c4b7b..d6d2480ee 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -29,7 +29,7 @@
    the United Nations Convention on Contracts on the International Sales of Goods.
 
 *******************************************************************************************************/
-
+#define	MYCHANGES
 #include <stdint.h>
 #include "options.h"
 #include "prot_fx.h"
@@ -65,7 +65,27 @@ static void HouseholderReduction_fx(
     const Word16 nChannelsC, /* Q0 */
     Word32 *eps_x_fx,        /* exp(eps_x_fx_e) */
     Word16 *eps_x_fx_e );
+#ifdef MYCHANGES
+static void biDiagonalReductionLeft_64(
+    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
+    Word16 bitwindow,
+    Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
+    Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
+    const Word16 nChannelsL,  /* Q0 */
+    const Word16 nChannelsC,  /* Q0 */
+    const Word16 currChannel /* Q0 */
+);
 
+static void biDiagonalReductionRight_64(
+    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
+    Word16 bitwindow,
+    const Word16 nChannelsL,  /* Q0 */
+    const Word16 nChannelsC,  /* Q0 */
+    const Word16 currChannel, /* Q0 */
+    Word32 *g, /* Q31 */
+    Word16 *g_e
+);
+#else
 static void biDiagonalReductionLeft_fx(
     Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
     Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
@@ -93,6 +113,7 @@ static void biDiagonalReductionRight_fx(
     Word16 *sig_x_e,
     Word32 *g /* Q31 */
 );            // Q31
+#endif
 
 static void singularVectorsAccumulationLeft_fx(
     Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) as Input, Q31 as output */
@@ -853,15 +874,88 @@ static void HouseholderReduction_fx(
     Word16 *eps_x_fx_e )
 {
     Word16 nCh;
+#ifdef	MYCHANGES
+
+	Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
+    Word32 g_fx = 0;
+    Word16 g_e = 0;
+    move32();
+    move16();
+
+#else
+
     // float g = 0.0f, sig_x = 0.0f;// to be removed
     Word32 g_fx = 0, sig_x_fx = 0;
     move32();
     move32();
     Word16 sig_x_fx_e = 0;
     move16();
+#endif
 
     Word16 iCh, jCh;
     Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
+#ifdef	 MYCHANGES
+	push_wmops("HouseholderReduction_fx 64");
+    FOR( jCh = 0; jCh < nChannelsL; jCh++ )
+    {
+        FOR( iCh = 0; iCh < nChannelsC; iCh++ )
+        {
+            singularVectors_Left_64[jCh][iCh] = W_shr(W_deposit32_h(singularVectors_Left_fx[jCh][iCh]),32);
+        }
+    }
+    for (nCh=0;nCh<nChannelsC;nCh++)
+    {
+	    Word16 bitwindow;
+	    bitwindow=1;
+	    biDiagonalReductionLeft_64(
+			    singularVectors_Left_64,bitwindow,
+			    singularValues_fx,singularValues_fx_e,
+			    nChannelsL,
+			    nChannelsC,
+			    nCh
+			    );
+	    singularValues_fx_e[nCh]=add(singularVectors_Left_e,singularValues_fx_e[nCh]);
+	    secDiag_fx[nCh]=g_fx;
+	    move32();
+	    secDiag_fx_e[nCh]=add(singularVectors_Left_e,g_e);
+	    bitwindow=2;
+	    biDiagonalReductionRight_64(
+			    singularVectors_Left_64,bitwindow,
+			    nChannelsL,
+			    nChannelsC,
+			    nCh,
+			    &g_fx,
+			    &g_e	
+			    );
+		{
+        		Word16 L_temp_e;
+		        Word32 L_temp;
+			L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */
+			IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, *eps_x_fx, *eps_x_fx_e ), 1 ) )
+			{
+				*eps_x_fx = L_temp; /* exp(L_temp_e) */
+				move32();
+				*eps_x_fx_e = L_temp_e;
+				move32();
+			}
+		}
+    }	
+    {
+        int i,j;
+        for (j=0;j<nChannelsL;j++)
+        {
+            for (i=0;i<nChannelsC;i++)
+            {
+                Word16 n;
+                n=W_norm(singularVectors_Left_64[j][i]);
+                singularVectors_Left_fx[j][i]=W_extract_h(W_shl(singularVectors_Left_64[j][i],n));
+                singularVectors_Left_fx_e[j][i]=sub(add(32,singularVectors_Left_e),n);
+	    }
+        }
+    }
+	pop_wmops();
+#else
+
     FOR( jCh = 0; jCh < nChannelsL; jCh++ )
     {
         FOR( iCh = 0; iCh < nChannelsC; iCh++ )
@@ -887,6 +981,7 @@ static void HouseholderReduction_fx(
             move32();
         }
     }
+#endif
 
     /* SingularVecotr Accumulation */
     singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_fx_e, secDiag_fx_e, nChannelsC );
@@ -897,6 +992,233 @@ static void HouseholderReduction_fx(
     return;
 }
 
+#ifdef	MYCHANGES
+/*-------------------------------------------------------------------------
+ * biDiagonalReductionLeft()
+ *
+ *
+ *-------------------------------------------------------------------------*/
+
+static void biDiagonalReductionLeft_64(
+    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
+    Word16 bitwindow,
+    Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
+    Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
+    const Word16 nChannelsL,  /* Q0 */
+    const Word16 nChannelsC,  /* Q0 */
+    const Word16 currChannel /* Q0 */
+)
+{
+
+
+
+    Word16 iCh, jCh;
+    Word32 norm_x, g;
+    Word16 norm_x_e, g_e;
+    Word64 norm_64;
+    g=0;
+    g_e=0;
+    move32();
+    move16();
+    norm_x=0;
+    move32();
+    IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
+    {
+
+        Word32 tmp;
+        norm_64=0;
+        move64();
+        FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
+        {
+            tmp=W_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],bitwindow));
+            norm_64=W_add(norm_64,W_mult0_32_32(tmp,tmp));
+        }
+        norm_x_e=W_norm(norm_64);
+        norm_x=W_extract_h(W_shl(norm_64, norm_x_e ));
+        norm_x_e = add(sub(shl(bitwindow, 1), norm_x_e), 1 );
+    }
+    IF ( norm_x )
+    {
+        Word32 factor2;
+        Word16 tmp_e;
+        Word64 tmpmul;
+
+        Word64 r_64;      //  = sqrt(norm)*singularVectors_Left_64[currChannel][currChannel]-norm OR -sqrt(norm)*singularVectors_Left_64[currChannel][currChannel]-norm
+        Word32 r, invVal;
+        Word16 r_e, invVal_e;
+
+        g_e = norm_x_e;
+        move16();
+        g = Sqrt32( norm_x, &g_e);
+        IF ( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) )
+        {
+           g = L_negate( g );
+        }
+        factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], bitwindow) );
+        tmp_e=sub( g_e, bitwindow) ;
+        tmpmul=W_mult0_32_32( g, factor2);
+        tmpmul=W_shl(tmpmul, tmp_e);
+        r_64=W_sub(tmpmul, norm_64);
+        r_e=W_norm(r_64 );
+        r=W_extract_h( W_shl( r_64, r_e ) );
+        r_e = sub( add( 1, add(bitwindow, bitwindow )), r_e );
+
+
+        invVal_e = r_e;
+        invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, r, &invVal_e);
+
+
+        tmp_e = add(31, sub(bitwindow, g_e ) );
+        singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( g), tmp_e) ); // here, the exponent goes up.
+
+        bitwindow=add(bitwindow, 1); // so does the bit window
+        FOR ( iCh = add( currChannel, 1 ); iCh < nChannelsC; iCh++)
+        {
+	    Word32 factor1;
+            Word32 factor2;
+            Word32 f;        // = norm / r
+//            Word16 f_e;      // not really needed
+
+            norm_64 = 0;
+            for ( jCh = currChannel; jCh<nChannelsL; jCh++ )
+            {
+                factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], bitwindow));
+                factor2 = W_extract_l( W_shr( singularVectors_Left_64[jCh][iCh], bitwindow));
+                norm_64 = W_add( norm_64, W_mult0_32_32(factor1, factor2));
+            }
+            norm_x_e = W_norm( norm_64);
+            norm_x = W_extract_h( W_shl( norm_64, norm_x_e) );
+            f = Mpy_32_32( norm_x, invVal );
+            FOR ( jCh = currChannel; jCh < nChannelsL; jCh++ )
+            {
+                 Word16 magic_shift;
+                 magic_shift = add( add( norm_x_e, 23),  r_e);
+                 factor1 = W_extract_l( W_shr(singularVectors_Left_64[jCh][currChannel], bitwindow ) );
+                 singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr(W_mult0_32_32( f, factor1 ), magic_shift ) );
+            }
+        }
+    }
+    singularValues[currChannel] = g;
+    singularValues_e[currChannel] = g_e;
+    move32();
+    move16();
+}
+
+/*-------------------------------------------------------------------------
+ * biDiagonalReductionRight()
+ *
+ *
+ *-------------------------------------------------------------------------*/
+
+static void biDiagonalReductionRight_64(
+    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
+    Word16 bitwindow,
+    const Word16 nChannelsL,  /* Q0 */
+    const Word16 nChannelsC,  /* Q0 */
+    const Word16 currChannel, /* Q0 */
+    Word32 *g, /* Q31 */
+    Word16 *g_e
+)
+{
+    Word16 iCh, jCh;
+    Word32 norm_x;
+    Word16 norm_x_e;
+    Word64 norm_64;
+    Word16 idx;
+
+
+
+
+    ( *g ) =0;
+    ( *g_e ) = 0;
+    move32();
+    move16();
+    IF ( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */
+    {
+        norm_64=0;
+        move64();
+        idx = add( currChannel, 1);
+        FOR ( jCh = idx; jCh < nChannelsC; jCh++ )
+        {
+            Word32 tmp;
+            tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) );
+            norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp) );
+        }
+        norm_x_e = W_norm( norm_64);
+        norm_x = W_extract_h( W_shl( norm_64, norm_x_e) );
+        norm_x_e = add( sub( shl( bitwindow, 1), norm_x_e), 1);
+        move16();
+
+        IF ( norm_x )
+        {
+            Word32 factor1;
+            Word32 factor2;
+            Word16 tmp_e;
+            Word64 tmpmul;
+            Word32 tmp_g;
+            Word16 tmp_g_e;
+            Word16 magic_shift;
+            Word64 r_64;
+            Word32 r;
+            Word16 r_e;
+            Word32 f;
+            Word32 invVal;
+            Word16 invVal_e;
+
+            tmp_g_e = norm_x_e;
+            move16();
+            tmp_g = Sqrt32( norm_x, &tmp_g_e);
+            IF ( GE_64( singularVectors_Left_64[currChannel][idx],0 ) )
+            {
+                tmp_g = L_negate( tmp_g);
+            }
+            *g = tmp_g;
+            *g_e = tmp_g_e;
+            move32();
+            move16();
+            factor2=W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], bitwindow) );
+            tmp_e = sub( tmp_g_e, bitwindow);
+            tmpmul = W_mult0_32_32( tmp_g, factor2);
+            tmpmul = W_shl(tmpmul, tmp_e);
+            r_64 = W_sub( tmpmul, norm_64 );
+            r_e = W_norm( r_64);
+            r = W_extract_h( W_shl( r_64, r_e) );
+
+            invVal_e = 0;
+            move16();
+            invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e);
+
+            magic_shift=32-tmp_g_e;
+            singularVectors_Left_64[currChannel][idx]=W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( tmp_g), magic_shift) );
+            bitwindow=add(bitwindow, 1);
+	
+            FOR( iCh = idx; iCh < nChannelsL; iCh++ )
+            {
+
+                norm_64 = 0;
+                move64();
+                FOR ( jCh = idx; jCh<nChannelsC; jCh++ )
+                {
+                    factor1 = W_extract_l(W_shr( singularVectors_Left_64[iCh][jCh], bitwindow) );
+                    factor2 = W_extract_l(W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) );
+                    norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2) );
+                }
+
+                norm_x_e = W_norm( norm_64);
+                norm_x = W_extract_h( W_shl( norm_64, norm_x_e) );
+                f = Mpy_32_32( norm_x, invVal);
+		magic_shift = 25+norm_x_e-  r_e ;	// FIXME: Why does this work?
+		
+                FOR( jCh = idx; jCh < nChannelsC; jCh++ )
+                {
+                    factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) );
+                    singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2), magic_shift) );
+                }
+            }
+        }
+    }
+}
+#else
 /*-------------------------------------------------------------------------
  * biDiagonalReductionLeft()
  *
@@ -1203,6 +1525,7 @@ static void biDiagonalReductionRight_fx(
 
     return;
 }
+#endif
 
 /*-------------------------------------------------------------------------
  * singularVectorsAccumulationLeft()
-- 
GitLab


From 93c38c5d0e2def289b8cbbe2a9caec699d6b7813 Mon Sep 17 00:00:00 2001
From: Thomas Dettbarn <thomas.dettbarn@iis.fraunhofer.de>
Date: Fri, 18 Jul 2025 12:20:17 +0200
Subject: [PATCH 02/33] Brought the code to the same level as the
 svd_optimization_branch.

---
 lib_dec/ivas_svd_dec_fx.c | 67 ++++++++++++++++++++-------------------
 1 file changed, 34 insertions(+), 33 deletions(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index d6d2480ee..bbb4e83df 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -69,11 +69,11 @@ static void HouseholderReduction_fx(
 static void biDiagonalReductionLeft_64(
     Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
     Word16 bitwindow,
-    Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
-    Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
     const Word16 nChannelsL,  /* Q0 */
     const Word16 nChannelsC,  /* Q0 */
-    const Word16 currChannel /* Q0 */
+    const Word16 currChannel, /* Q0 */
+    Word32 *g, /* Q31 */
+    Word16 *g_e
 );
 
 static void biDiagonalReductionRight_64(
@@ -876,9 +876,13 @@ static void HouseholderReduction_fx(
     Word16 nCh;
 #ifdef	MYCHANGES
 
-	Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
-    Word32 g_fx = 0;
-    Word16 g_e = 0;
+    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
+    Word32 g_left_fx = 0;
+    Word16 g_left_e = 0;
+    move32();
+    move16();
+    Word32 g_right_fx = 0;
+    Word16 g_right_e = 0;
     move32();
     move16();
 
@@ -895,7 +899,6 @@ static void HouseholderReduction_fx(
     Word16 iCh, jCh;
     Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
 #ifdef	 MYCHANGES
-	push_wmops("HouseholderReduction_fx 64");
     FOR( jCh = 0; jCh < nChannelsL; jCh++ )
     {
         FOR( iCh = 0; iCh < nChannelsC; iCh++ )
@@ -909,23 +912,26 @@ static void HouseholderReduction_fx(
 	    bitwindow=1;
 	    biDiagonalReductionLeft_64(
 			    singularVectors_Left_64,bitwindow,
-			    singularValues_fx,singularValues_fx_e,
 			    nChannelsL,
 			    nChannelsC,
-			    nCh
+			    nCh,
+                            &g_left_fx,
+                            &g_left_e
 			    );
-	    singularValues_fx_e[nCh]=add(singularVectors_Left_e,singularValues_fx_e[nCh]);
-	    secDiag_fx[nCh]=g_fx;
+	    singularValues_fx[nCh]=g_left_fx;
+            move32();
+	    singularValues_fx_e[nCh]=add(singularVectors_Left_e,g_left_e);
+	    secDiag_fx[nCh]=g_right_fx; /* from the previous channel */
 	    move32();
-	    secDiag_fx_e[nCh]=add(singularVectors_Left_e,g_e);
+	    secDiag_fx_e[nCh]=add(singularVectors_Left_e,g_right_e);
 	    bitwindow=2;
 	    biDiagonalReductionRight_64(
 			    singularVectors_Left_64,bitwindow,
 			    nChannelsL,
 			    nChannelsC,
 			    nCh,
-			    &g_fx,
-			    &g_e	
+			    &g_right_fx,
+			    &g_right_e	
 			    );
 		{
         		Word16 L_temp_e;
@@ -953,7 +959,6 @@ static void HouseholderReduction_fx(
 	    }
         }
     }
-	pop_wmops();
 #else
 
     FOR( jCh = 0; jCh < nChannelsL; jCh++ )
@@ -1002,22 +1007,22 @@ static void HouseholderReduction_fx(
 static void biDiagonalReductionLeft_64(
     Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
     Word16 bitwindow,
-    Word32 singularValues[MAX_OUTPUT_CHANNELS],    /* exp(singularValues_e) */
-    Word16 singularValues_e[MAX_OUTPUT_CHANNELS],
     const Word16 nChannelsL,  /* Q0 */
     const Word16 nChannelsC,  /* Q0 */
-    const Word16 currChannel /* Q0 */
+    const Word16 currChannel, /* Q0 */
+    Word32 *g,
+    Word16 *g_e
 )
 {
 
 
     Word16 iCh, jCh;
-    Word32 norm_x, g;
-    Word16 norm_x_e, g_e;
+    Word32 norm_x;
+    Word16 norm_x_e;
     Word64 norm_64;
-    g=0;
-    g_e=0;
+    ( *g )=0;
+    ( *g_e) =0;
     move32();
     move16();
     norm_x=0;
@@ -1047,16 +1052,16 @@ static void biDiagonalReductionLeft_64(
         Word32 r, invVal;
         Word16 r_e, invVal_e;
 
-        g_e = norm_x_e;
+        ( *g_e ) = norm_x_e;
         move16();
-        g = Sqrt32( norm_x, &g_e);
+        ( *g ) = Sqrt32( norm_x, g_e);
         IF ( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) )
         {
-           g = L_negate( g );
+           ( *g ) = L_negate( *g );
         }
         factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], bitwindow) );
-        tmp_e=sub( g_e, bitwindow) ;
-        tmpmul=W_mult0_32_32( g, factor2);
+        tmp_e=sub( ( *g_e ), bitwindow) ;
+        tmpmul=W_mult0_32_32( ( *g ), factor2);
         tmpmul=W_shl(tmpmul, tmp_e);
         r_64=W_sub(tmpmul, norm_64);
         r_e=W_norm(r_64 );
@@ -1068,8 +1073,8 @@ static void biDiagonalReductionLeft_64(
         invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, r, &invVal_e);
 
 
-        tmp_e = add(31, sub(bitwindow, g_e ) );
-        singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( g), tmp_e) ); // here, the exponent goes up.
+        tmp_e = add(31, sub(bitwindow, *g_e ) );
+        singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( *g ), tmp_e) ); // here, the exponent goes up.
 
         bitwindow=add(bitwindow, 1); // so does the bit window
         FOR ( iCh = add( currChannel, 1 ); iCh < nChannelsC; iCh++)
@@ -1098,10 +1103,6 @@ static void biDiagonalReductionLeft_64(
             }
         }
     }
-    singularValues[currChannel] = g;
-    singularValues_e[currChannel] = g_e;
-    move32();
-    move16();
 }
 
 /*-------------------------------------------------------------------------
-- 
GitLab


From e3a5ad688dccbc68eedaa65a5fdd244ae7f0dfd1 Mon Sep 17 00:00:00 2001
From: Thomas Dettbarn <thomas.dettbarn@iis.fraunhofer.de>
Date: Fri, 18 Jul 2025 12:30:18 +0200
Subject: [PATCH 03/33] removed a (no longer needed) helper variable
 "bitwindow" and replaced it with macros.

---
 lib_dec/ivas_svd_dec_fx.c | 53 +++++++++++++++++----------------------
 1 file changed, 23 insertions(+), 30 deletions(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index bbb4e83df..603b38732 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -68,7 +68,6 @@ static void HouseholderReduction_fx(
 #ifdef MYCHANGES
 static void biDiagonalReductionLeft_64(
     Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
-    Word16 bitwindow,
     const Word16 nChannelsL,  /* Q0 */
     const Word16 nChannelsC,  /* Q0 */
     const Word16 currChannel, /* Q0 */
@@ -78,7 +77,6 @@ static void biDiagonalReductionLeft_64(
 
 static void biDiagonalReductionRight_64(
     Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
-    Word16 bitwindow,
     const Word16 nChannelsL,  /* Q0 */
     const Word16 nChannelsC,  /* Q0 */
     const Word16 currChannel, /* Q0 */
@@ -908,10 +906,8 @@ static void HouseholderReduction_fx(
     }
     for (nCh=0;nCh<nChannelsC;nCh++)
     {
-	    Word16 bitwindow;
-	    bitwindow=1;
 	    biDiagonalReductionLeft_64(
-			    singularVectors_Left_64,bitwindow,
+			    singularVectors_Left_64,
 			    nChannelsL,
 			    nChannelsC,
 			    nCh,
@@ -924,9 +920,8 @@ static void HouseholderReduction_fx(
 	    secDiag_fx[nCh]=g_right_fx; /* from the previous channel */
 	    move32();
 	    secDiag_fx_e[nCh]=add(singularVectors_Left_e,g_right_e);
-	    bitwindow=2;
 	    biDiagonalReductionRight_64(
-			    singularVectors_Left_64,bitwindow,
+			    singularVectors_Left_64,
 			    nChannelsL,
 			    nChannelsC,
 			    nCh,
@@ -1006,7 +1001,6 @@ static void HouseholderReduction_fx(
 
 static void biDiagonalReductionLeft_64(
     Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
-    Word16 bitwindow,
     const Word16 nChannelsL,  /* Q0 */
     const Word16 nChannelsC,  /* Q0 */
     const Word16 currChannel, /* Q0 */
@@ -1015,7 +1009,8 @@ static void biDiagonalReductionLeft_64(
 )
 {
 
-
+#define	HEADROOM_LEFT_1		1
+#define	HEADROOM_LEFT_2		(HEADROOM_LEFT_1+1)
 
     Word16 iCh, jCh;
     Word32 norm_x;
@@ -1035,12 +1030,12 @@ static void biDiagonalReductionLeft_64(
         move64();
         FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
         {
-            tmp=W_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],bitwindow));
+            tmp=W_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],HEADROOM_LEFT_1));
             norm_64=W_add(norm_64,W_mult0_32_32(tmp,tmp));
         }
         norm_x_e=W_norm(norm_64);
         norm_x=W_extract_h(W_shl(norm_64, norm_x_e ));
-        norm_x_e = add(sub(shl(bitwindow, 1), norm_x_e), 1 );
+        norm_x_e = add(sub((HEADROOM_LEFT_1+HEADROOM_LEFT_1), norm_x_e), 1 );
     }
     IF ( norm_x )
     {
@@ -1059,24 +1054,23 @@ static void biDiagonalReductionLeft_64(
         {
            ( *g ) = L_negate( *g );
         }
-        factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], bitwindow) );
-        tmp_e=sub( ( *g_e ), bitwindow) ;
+        factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], HEADROOM_LEFT_1) );
+        tmp_e=sub( ( *g_e ), HEADROOM_LEFT_1);
         tmpmul=W_mult0_32_32( ( *g ), factor2);
         tmpmul=W_shl(tmpmul, tmp_e);
         r_64=W_sub(tmpmul, norm_64);
         r_e=W_norm(r_64 );
         r=W_extract_h( W_shl( r_64, r_e ) );
-        r_e = sub( add( 1, add(bitwindow, bitwindow )), r_e );
+        r_e = sub( add( 1, (HEADROOM_LEFT_1 + HEADROOM_LEFT_1 ) ), r_e );
 
 
         invVal_e = r_e;
         invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, r, &invVal_e);
 
 
-        tmp_e = add(31, sub(bitwindow, *g_e ) );
+        tmp_e = add(31, sub(HEADROOM_LEFT_1, *g_e ) );
         singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( *g ), tmp_e) ); // here, the exponent goes up.
 
-        bitwindow=add(bitwindow, 1); // so does the bit window
         FOR ( iCh = add( currChannel, 1 ); iCh < nChannelsC; iCh++)
         {
 	    Word32 factor1;
@@ -1087,8 +1081,8 @@ static void biDiagonalReductionLeft_64(
             norm_64 = 0;
             for ( jCh = currChannel; jCh<nChannelsL; jCh++ )
             {
-                factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], bitwindow));
-                factor2 = W_extract_l( W_shr( singularVectors_Left_64[jCh][iCh], bitwindow));
+                factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_2));
+                factor2 = W_extract_l( W_shr( singularVectors_Left_64[jCh][iCh], HEADROOM_LEFT_2));
                 norm_64 = W_add( norm_64, W_mult0_32_32(factor1, factor2));
             }
             norm_x_e = W_norm( norm_64);
@@ -1098,7 +1092,7 @@ static void biDiagonalReductionLeft_64(
             {
                  Word16 magic_shift;
                  magic_shift = add( add( norm_x_e, 23),  r_e);
-                 factor1 = W_extract_l( W_shr(singularVectors_Left_64[jCh][currChannel], bitwindow ) );
+                 factor1 = W_extract_l( W_shr(singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_2 ) );
                  singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr(W_mult0_32_32( f, factor1 ), magic_shift ) );
             }
         }
@@ -1113,7 +1107,6 @@ static void biDiagonalReductionLeft_64(
 
 static void biDiagonalReductionRight_64(
     Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
-    Word16 bitwindow,
     const Word16 nChannelsL,  /* Q0 */
     const Word16 nChannelsC,  /* Q0 */
     const Word16 currChannel, /* Q0 */
@@ -1127,7 +1120,8 @@ static void biDiagonalReductionRight_64(
     Word64 norm_64;
     Word16 idx;
 
-
+#define	HEADROOM_RIGHT_1	2
+#define	HEADROOM_RIGHT_2	(HEADROOM_RIGHT_1+1)
 
 
     ( *g ) =0;
@@ -1142,12 +1136,12 @@ static void biDiagonalReductionRight_64(
         FOR ( jCh = idx; jCh < nChannelsC; jCh++ )
         {
             Word32 tmp;
-            tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) );
+            tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_1) );
             norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp) );
         }
         norm_x_e = W_norm( norm_64);
         norm_x = W_extract_h( W_shl( norm_64, norm_x_e) );
-        norm_x_e = add( sub( shl( bitwindow, 1), norm_x_e), 1);
+        norm_x_e = add( sub( ( HEADROOM_RIGHT_1 + HEADROOM_RIGHT_1 ), norm_x_e), 1);
         move16();
 
         IF ( norm_x )
@@ -1177,8 +1171,8 @@ static void biDiagonalReductionRight_64(
             *g_e = tmp_g_e;
             move32();
             move16();
-            factor2=W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], bitwindow) );
-            tmp_e = sub( tmp_g_e, bitwindow);
+            factor2=W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_1) );
+            tmp_e = sub( tmp_g_e, HEADROOM_RIGHT_1);
             tmpmul = W_mult0_32_32( tmp_g, factor2);
             tmpmul = W_shl(tmpmul, tmp_e);
             r_64 = W_sub( tmpmul, norm_64 );
@@ -1190,8 +1184,7 @@ static void biDiagonalReductionRight_64(
             invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e);
 
             magic_shift=32-tmp_g_e;
-            singularVectors_Left_64[currChannel][idx]=W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( tmp_g), magic_shift) );
-            bitwindow=add(bitwindow, 1);
+            singularVectors_Left_64[currChannel][idx]=W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( tmp_g), magic_shift) );// here, the exponent goes up
 	
             FOR( iCh = idx; iCh < nChannelsL; iCh++ )
             {
@@ -1200,8 +1193,8 @@ static void biDiagonalReductionRight_64(
                 move64();
                 FOR ( jCh = idx; jCh<nChannelsC; jCh++ )
                 {
-                    factor1 = W_extract_l(W_shr( singularVectors_Left_64[iCh][jCh], bitwindow) );
-                    factor2 = W_extract_l(W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) );
+                    factor1 = W_extract_l(W_shr( singularVectors_Left_64[iCh][jCh], HEADROOM_RIGHT_2) );
+                    factor2 = W_extract_l(W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_2) );
                     norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2) );
                 }
 
@@ -1212,7 +1205,7 @@ static void biDiagonalReductionRight_64(
 		
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                 {
-                    factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], bitwindow) );
+                    factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_2) );
                     singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2), magic_shift) );
                 }
             }
-- 
GitLab


From 240563be60b1dfae117685556caef1bdadd816ac Mon Sep 17 00:00:00 2001
From: Thomas Dettbarn <thomas.dettbarn@iis.fraunhofer.de>
Date: Fri, 18 Jul 2025 13:06:26 +0200
Subject: [PATCH 04/33] applied the clang patch and renamed the MYCHANGES
 define as MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE

---
 lib_com/options.h         |   1 +
 lib_dec/ivas_svd_dec_fx.c | 271 +++++++++++++++++++-------------------
 2 files changed, 133 insertions(+), 139 deletions(-)

diff --git a/lib_com/options.h b/lib_com/options.h
index 14b1c5eb8..fe2791bb3 100644
--- a/lib_com/options.h
+++ b/lib_com/options.h
@@ -148,4 +148,5 @@
 #define FIX_1824
 #define FIX_1822
 
+#define	MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE /* FhG: reduce WMOPS of HouseHolderReduction() in ivas_svd_dec.c() by removing redundant mathematics and using 64 bit additions.*/
 #endif
diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index 603b38732..d9e619ed8 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -29,7 +29,6 @@
    the United Nations Convention on Contracts on the International Sales of Goods.
 
 *******************************************************************************************************/
-#define	MYCHANGES
 #include <stdint.h>
 #include "options.h"
 #include "prot_fx.h"
@@ -65,24 +64,22 @@ static void HouseholderReduction_fx(
     const Word16 nChannelsC, /* Q0 */
     Word32 *eps_x_fx,        /* exp(eps_x_fx_e) */
     Word16 *eps_x_fx_e );
-#ifdef MYCHANGES
+#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE
 static void biDiagonalReductionLeft_64(
     Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
     const Word16 nChannelsL,  /* Q0 */
     const Word16 nChannelsC,  /* Q0 */
     const Word16 currChannel, /* Q0 */
-    Word32 *g, /* Q31 */
-    Word16 *g_e
-);
+    Word32 *g,                /* Q31 */
+    Word16 *g_e );
 
 static void biDiagonalReductionRight_64(
     Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
     const Word16 nChannelsL,  /* Q0 */
     const Word16 nChannelsC,  /* Q0 */
     const Word16 currChannel, /* Q0 */
-    Word32 *g, /* Q31 */
-    Word16 *g_e
-);
+    Word32 *g,                /* Q31 */
+    Word16 *g_e );
 #else
 static void biDiagonalReductionLeft_fx(
     Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
@@ -872,7 +869,7 @@ static void HouseholderReduction_fx(
     Word16 *eps_x_fx_e )
 {
     Word16 nCh;
-#ifdef	MYCHANGES
+#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE
 
     Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
     Word32 g_left_fx = 0;
@@ -896,62 +893,60 @@ static void HouseholderReduction_fx(
 
     Word16 iCh, jCh;
     Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
-#ifdef	 MYCHANGES
+#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE
     FOR( jCh = 0; jCh < nChannelsL; jCh++ )
     {
         FOR( iCh = 0; iCh < nChannelsC; iCh++ )
         {
-            singularVectors_Left_64[jCh][iCh] = W_shr(W_deposit32_h(singularVectors_Left_fx[jCh][iCh]),32);
+            singularVectors_Left_64[jCh][iCh] = W_shr( W_deposit32_h( singularVectors_Left_fx[jCh][iCh] ), 32 );
         }
     }
-    for (nCh=0;nCh<nChannelsC;nCh++)
+    for ( nCh = 0; nCh < nChannelsC; nCh++ )
     {
-	    biDiagonalReductionLeft_64(
-			    singularVectors_Left_64,
-			    nChannelsL,
-			    nChannelsC,
-			    nCh,
-                            &g_left_fx,
-                            &g_left_e
-			    );
-	    singularValues_fx[nCh]=g_left_fx;
-            move32();
-	    singularValues_fx_e[nCh]=add(singularVectors_Left_e,g_left_e);
-	    secDiag_fx[nCh]=g_right_fx; /* from the previous channel */
-	    move32();
-	    secDiag_fx_e[nCh]=add(singularVectors_Left_e,g_right_e);
-	    biDiagonalReductionRight_64(
-			    singularVectors_Left_64,
-			    nChannelsL,
-			    nChannelsC,
-			    nCh,
-			    &g_right_fx,
-			    &g_right_e	
-			    );
-		{
-        		Word16 L_temp_e;
-		        Word32 L_temp;
-			L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */
-			IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, *eps_x_fx, *eps_x_fx_e ), 1 ) )
-			{
-				*eps_x_fx = L_temp; /* exp(L_temp_e) */
-				move32();
-				*eps_x_fx_e = L_temp_e;
-				move32();
-			}
-		}
-    }	
+        biDiagonalReductionLeft_64(
+            singularVectors_Left_64,
+            nChannelsL,
+            nChannelsC,
+            nCh,
+            &g_left_fx,
+            &g_left_e );
+        singularValues_fx[nCh] = g_left_fx;
+        move32();
+        singularValues_fx_e[nCh] = add( singularVectors_Left_e, g_left_e );
+        secDiag_fx[nCh] = g_right_fx; /* from the previous channel */
+        move32();
+        secDiag_fx_e[nCh] = add( singularVectors_Left_e, g_right_e );
+        biDiagonalReductionRight_64(
+            singularVectors_Left_64,
+            nChannelsL,
+            nChannelsC,
+            nCh,
+            &g_right_fx,
+            &g_right_e );
+        {
+            Word16 L_temp_e;
+            Word32 L_temp;
+            L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */
+            IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, *eps_x_fx, *eps_x_fx_e ), 1 ) )
+            {
+                *eps_x_fx = L_temp; /* exp(L_temp_e) */
+                move32();
+                *eps_x_fx_e = L_temp_e;
+                move32();
+            }
+        }
+    }
     {
-        int i,j;
-        for (j=0;j<nChannelsL;j++)
+        int i, j;
+        for ( j = 0; j < nChannelsL; j++ )
         {
-            for (i=0;i<nChannelsC;i++)
+            for ( i = 0; i < nChannelsC; i++ )
             {
                 Word16 n;
-                n=W_norm(singularVectors_Left_64[j][i]);
-                singularVectors_Left_fx[j][i]=W_extract_h(W_shl(singularVectors_Left_64[j][i],n));
-                singularVectors_Left_fx_e[j][i]=sub(add(32,singularVectors_Left_e),n);
-	    }
+                n = W_norm( singularVectors_Left_64[j][i] );
+                singularVectors_Left_fx[j][i] = W_extract_h( W_shl( singularVectors_Left_64[j][i], n ) );
+                singularVectors_Left_fx_e[j][i] = sub( add( 32, singularVectors_Left_e ), n );
+            }
         }
     }
 #else
@@ -992,7 +987,7 @@ static void HouseholderReduction_fx(
     return;
 }
 
-#ifdef	MYCHANGES
+#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE
 /*-------------------------------------------------------------------------
  * biDiagonalReductionLeft()
  *
@@ -1005,95 +1000,94 @@ static void biDiagonalReductionLeft_64(
     const Word16 nChannelsC,  /* Q0 */
     const Word16 currChannel, /* Q0 */
     Word32 *g,
-    Word16 *g_e
-)
+    Word16 *g_e )
 {
 
-#define	HEADROOM_LEFT_1		1
-#define	HEADROOM_LEFT_2		(HEADROOM_LEFT_1+1)
+#define HEADROOM_LEFT_1 1
+#define HEADROOM_LEFT_2 ( HEADROOM_LEFT_1 + 1 )
 
     Word16 iCh, jCh;
     Word32 norm_x;
     Word16 norm_x_e;
     Word64 norm_64;
-    ( *g )=0;
-    ( *g_e) =0;
+    ( *g ) = 0;
+    ( *g_e ) = 0;
     move32();
     move16();
-    norm_x=0;
+    norm_x = 0;
     move32();
     IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
     {
 
         Word32 tmp;
-        norm_64=0;
+        norm_64 = 0;
         move64();
         FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
         {
-            tmp=W_extract_l(W_shr(singularVectors_Left_64[jCh][currChannel],HEADROOM_LEFT_1));
-            norm_64=W_add(norm_64,W_mult0_32_32(tmp,tmp));
+            tmp = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_1 ) );
+            norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) );
         }
-        norm_x_e=W_norm(norm_64);
-        norm_x=W_extract_h(W_shl(norm_64, norm_x_e ));
-        norm_x_e = add(sub((HEADROOM_LEFT_1+HEADROOM_LEFT_1), norm_x_e), 1 );
+        norm_x_e = W_norm( norm_64 );
+        norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
+        norm_x_e = add( sub( ( HEADROOM_LEFT_1 + HEADROOM_LEFT_1 ), norm_x_e ), 1 );
     }
-    IF ( norm_x )
+    IF( norm_x )
     {
         Word32 factor2;
         Word16 tmp_e;
         Word64 tmpmul;
 
-        Word64 r_64;      //  = sqrt(norm)*singularVectors_Left_64[currChannel][currChannel]-norm OR -sqrt(norm)*singularVectors_Left_64[currChannel][currChannel]-norm
+        Word64 r_64; //  = sqrt(norm)*singularVectors_Left_64[currChannel][currChannel]-norm OR -sqrt(norm)*singularVectors_Left_64[currChannel][currChannel]-norm
         Word32 r, invVal;
         Word16 r_e, invVal_e;
 
         ( *g_e ) = norm_x_e;
         move16();
-        ( *g ) = Sqrt32( norm_x, g_e);
-        IF ( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) )
+        ( *g ) = Sqrt32( norm_x, g_e );
+        IF( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) )
         {
-           ( *g ) = L_negate( *g );
+            ( *g ) = L_negate( *g );
         }
-        factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], HEADROOM_LEFT_1) );
-        tmp_e=sub( ( *g_e ), HEADROOM_LEFT_1);
-        tmpmul=W_mult0_32_32( ( *g ), factor2);
-        tmpmul=W_shl(tmpmul, tmp_e);
-        r_64=W_sub(tmpmul, norm_64);
-        r_e=W_norm(r_64 );
-        r=W_extract_h( W_shl( r_64, r_e ) );
-        r_e = sub( add( 1, (HEADROOM_LEFT_1 + HEADROOM_LEFT_1 ) ), r_e );
+        factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], HEADROOM_LEFT_1 ) );
+        tmp_e = sub( ( *g_e ), HEADROOM_LEFT_1 );
+        tmpmul = W_mult0_32_32( ( *g ), factor2 );
+        tmpmul = W_shl( tmpmul, tmp_e );
+        r_64 = W_sub( tmpmul, norm_64 );
+        r_e = W_norm( r_64 );
+        r = W_extract_h( W_shl( r_64, r_e ) );
+        r_e = sub( add( 1, ( HEADROOM_LEFT_1 + HEADROOM_LEFT_1 ) ), r_e );
 
 
         invVal_e = r_e;
-        invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, r, &invVal_e);
+        invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, r, &invVal_e );
 
 
-        tmp_e = add(31, sub(HEADROOM_LEFT_1, *g_e ) );
-        singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( *g ), tmp_e) ); // here, the exponent goes up.
+        tmp_e = add( 31, sub( HEADROOM_LEFT_1, *g_e ) );
+        singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( *g ), tmp_e ) ); // here, the exponent goes up.
 
-        FOR ( iCh = add( currChannel, 1 ); iCh < nChannelsC; iCh++)
+        FOR( iCh = add( currChannel, 1 ); iCh < nChannelsC; iCh++ )
         {
-	    Word32 factor1;
+            Word32 factor1;
             Word32 factor2;
-            Word32 f;        // = norm / r
-//            Word16 f_e;      // not really needed
+            Word32 f; // = norm / r
+                      //            Word16 f_e;      // not really needed
 
             norm_64 = 0;
-            for ( jCh = currChannel; jCh<nChannelsL; jCh++ )
+            for ( jCh = currChannel; jCh < nChannelsL; jCh++ )
             {
-                factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_2));
-                factor2 = W_extract_l( W_shr( singularVectors_Left_64[jCh][iCh], HEADROOM_LEFT_2));
-                norm_64 = W_add( norm_64, W_mult0_32_32(factor1, factor2));
+                factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_2 ) );
+                factor2 = W_extract_l( W_shr( singularVectors_Left_64[jCh][iCh], HEADROOM_LEFT_2 ) );
+                norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );
             }
-            norm_x_e = W_norm( norm_64);
-            norm_x = W_extract_h( W_shl( norm_64, norm_x_e) );
+            norm_x_e = W_norm( norm_64 );
+            norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
             f = Mpy_32_32( norm_x, invVal );
-            FOR ( jCh = currChannel; jCh < nChannelsL; jCh++ )
+            FOR( jCh = currChannel; jCh < nChannelsL; jCh++ )
             {
-                 Word16 magic_shift;
-                 magic_shift = add( add( norm_x_e, 23),  r_e);
-                 factor1 = W_extract_l( W_shr(singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_2 ) );
-                 singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr(W_mult0_32_32( f, factor1 ), magic_shift ) );
+                Word16 magic_shift;
+                magic_shift = add( add( norm_x_e, 23 ), r_e );
+                factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_2 ) );
+                singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr( W_mult0_32_32( f, factor1 ), magic_shift ) );
             }
         }
     }
@@ -1110,9 +1104,8 @@ static void biDiagonalReductionRight_64(
     const Word16 nChannelsL,  /* Q0 */
     const Word16 nChannelsC,  /* Q0 */
     const Word16 currChannel, /* Q0 */
-    Word32 *g, /* Q31 */
-    Word16 *g_e
-)
+    Word32 *g,                /* Q31 */
+    Word16 *g_e )
 {
     Word16 iCh, jCh;
     Word32 norm_x;
@@ -1120,31 +1113,31 @@ static void biDiagonalReductionRight_64(
     Word64 norm_64;
     Word16 idx;
 
-#define	HEADROOM_RIGHT_1	2
-#define	HEADROOM_RIGHT_2	(HEADROOM_RIGHT_1+1)
+#define HEADROOM_RIGHT_1 2
+#define HEADROOM_RIGHT_2 ( HEADROOM_RIGHT_1 + 1 )
 
 
-    ( *g ) =0;
+    ( *g ) = 0;
     ( *g_e ) = 0;
     move32();
     move16();
-    IF ( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */
+    IF( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */
     {
-        norm_64=0;
+        norm_64 = 0;
         move64();
-        idx = add( currChannel, 1);
-        FOR ( jCh = idx; jCh < nChannelsC; jCh++ )
+        idx = add( currChannel, 1 );
+        FOR( jCh = idx; jCh < nChannelsC; jCh++ )
         {
             Word32 tmp;
-            tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_1) );
-            norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp) );
+            tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_1 ) );
+            norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) );
         }
-        norm_x_e = W_norm( norm_64);
-        norm_x = W_extract_h( W_shl( norm_64, norm_x_e) );
-        norm_x_e = add( sub( ( HEADROOM_RIGHT_1 + HEADROOM_RIGHT_1 ), norm_x_e), 1);
+        norm_x_e = W_norm( norm_64 );
+        norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
+        norm_x_e = add( sub( ( HEADROOM_RIGHT_1 + HEADROOM_RIGHT_1 ), norm_x_e ), 1 );
         move16();
 
-        IF ( norm_x )
+        IF( norm_x )
         {
             Word32 factor1;
             Word32 factor2;
@@ -1162,51 +1155,51 @@ static void biDiagonalReductionRight_64(
 
             tmp_g_e = norm_x_e;
             move16();
-            tmp_g = Sqrt32( norm_x, &tmp_g_e);
-            IF ( GE_64( singularVectors_Left_64[currChannel][idx],0 ) )
+            tmp_g = Sqrt32( norm_x, &tmp_g_e );
+            IF( GE_64( singularVectors_Left_64[currChannel][idx], 0 ) )
             {
-                tmp_g = L_negate( tmp_g);
+                tmp_g = L_negate( tmp_g );
             }
             *g = tmp_g;
             *g_e = tmp_g_e;
             move32();
             move16();
-            factor2=W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_1) );
-            tmp_e = sub( tmp_g_e, HEADROOM_RIGHT_1);
-            tmpmul = W_mult0_32_32( tmp_g, factor2);
-            tmpmul = W_shl(tmpmul, tmp_e);
+            factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_1 ) );
+            tmp_e = sub( tmp_g_e, HEADROOM_RIGHT_1 );
+            tmpmul = W_mult0_32_32( tmp_g, factor2 );
+            tmpmul = W_shl( tmpmul, tmp_e );
             r_64 = W_sub( tmpmul, norm_64 );
-            r_e = W_norm( r_64);
-            r = W_extract_h( W_shl( r_64, r_e) );
+            r_e = W_norm( r_64 );
+            r = W_extract_h( W_shl( r_64, r_e ) );
 
             invVal_e = 0;
             move16();
-            invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e);
+            invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
+
+            magic_shift = 32 - tmp_g_e;
+            singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( tmp_g ), magic_shift ) ); // here, the exponent goes up
 
-            magic_shift=32-tmp_g_e;
-            singularVectors_Left_64[currChannel][idx]=W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( tmp_g), magic_shift) );// here, the exponent goes up
-	
             FOR( iCh = idx; iCh < nChannelsL; iCh++ )
             {
 
                 norm_64 = 0;
                 move64();
-                FOR ( jCh = idx; jCh<nChannelsC; jCh++ )
+                FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                 {
-                    factor1 = W_extract_l(W_shr( singularVectors_Left_64[iCh][jCh], HEADROOM_RIGHT_2) );
-                    factor2 = W_extract_l(W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_2) );
-                    norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2) );
+                    factor1 = W_extract_l( W_shr( singularVectors_Left_64[iCh][jCh], HEADROOM_RIGHT_2 ) );
+                    factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_2 ) );
+                    norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );
                 }
 
-                norm_x_e = W_norm( norm_64);
-                norm_x = W_extract_h( W_shl( norm_64, norm_x_e) );
-                f = Mpy_32_32( norm_x, invVal);
-		magic_shift = 25+norm_x_e-  r_e ;	// FIXME: Why does this work?
-		
+                norm_x_e = W_norm( norm_64 );
+                norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
+                f = Mpy_32_32( norm_x, invVal );
+                magic_shift = 25 + norm_x_e - r_e; // FIXME: Why does this work?
+
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                 {
-                    factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_2) );
-                    singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2), magic_shift) );
+                    factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_2 ) );
+                    singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2 ), magic_shift ) );
                 }
             }
         }
-- 
GitLab


From 60ceb05fc22d4f18dc14645e809fb3403b95abf2 Mon Sep 17 00:00:00 2001
From: Thomas Dettbarn <thomas.dettbarn@iis.fraunhofer.de>
Date: Mon, 21 Jul 2025 17:16:35 +0200
Subject: [PATCH 05/33] Code cleanup in biDiagonalReductionLeft_64() and
 biDiagonalReductionRight_64() revealed some accuracy issues.

---
 lib_dec/ivas_svd_dec_fx.c | 43 ++++++++++++++++-----------------------
 1 file changed, 18 insertions(+), 25 deletions(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index d9e619ed8..f98020eb8 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -1005,7 +1005,6 @@ static void biDiagonalReductionLeft_64(
 
 #define HEADROOM_LEFT_1 1
 #define HEADROOM_LEFT_2 ( HEADROOM_LEFT_1 + 1 )
-
     Word16 iCh, jCh;
     Word32 norm_x;
     Word16 norm_x_e;
@@ -1029,7 +1028,6 @@ static void biDiagonalReductionLeft_64(
         }
         norm_x_e = W_norm( norm_64 );
         norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
-        norm_x_e = add( sub( ( HEADROOM_LEFT_1 + HEADROOM_LEFT_1 ), norm_x_e ), 1 );
     }
     IF( norm_x )
     {
@@ -1041,7 +1039,7 @@ static void biDiagonalReductionLeft_64(
         Word32 r, invVal;
         Word16 r_e, invVal_e;
 
-        ( *g_e ) = norm_x_e;
+        ( *g_e ) = add( sub( ( HEADROOM_LEFT_1 + HEADROOM_LEFT_1 ), norm_x_e ), 1 );
         move16();
         ( *g ) = Sqrt32( norm_x, g_e );
         IF( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) )
@@ -1049,16 +1047,15 @@ static void biDiagonalReductionLeft_64(
             ( *g ) = L_negate( *g );
         }
         factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], HEADROOM_LEFT_1 ) );
-        tmp_e = sub( ( *g_e ), HEADROOM_LEFT_1 );
+	tmp_e = shr(sub(norm_x_e,1),1);
         tmpmul = W_mult0_32_32( ( *g ), factor2 );
-        tmpmul = W_shl( tmpmul, tmp_e );
+        tmpmul = W_shr( tmpmul, tmp_e );
         r_64 = W_sub( tmpmul, norm_64 );
         r_e = W_norm( r_64 );
         r = W_extract_h( W_shl( r_64, r_e ) );
-        r_e = sub( add( 1, ( HEADROOM_LEFT_1 + HEADROOM_LEFT_1 ) ), r_e );
 
 
-        invVal_e = r_e;
+        invVal_e = sub( add( 1, ( HEADROOM_LEFT_1 + HEADROOM_LEFT_1 ) ), r_e );
         invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, r, &invVal_e );
 
 
@@ -1067,6 +1064,7 @@ static void biDiagonalReductionLeft_64(
 
         FOR( iCh = add( currChannel, 1 ); iCh < nChannelsC; iCh++ )
         {
+            Word16 magic_shift;
             Word32 factor1;
             Word32 factor2;
             Word32 f; // = norm / r
@@ -1082,11 +1080,10 @@ static void biDiagonalReductionLeft_64(
             norm_x_e = W_norm( norm_64 );
             norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
             f = Mpy_32_32( norm_x, invVal );
+	    magic_shift=31+norm_x_e-r_e;
             FOR( jCh = currChannel; jCh < nChannelsL; jCh++ )
             {
-                Word16 magic_shift;
-                magic_shift = add( add( norm_x_e, 23 ), r_e );
-                factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_2 ) );
+                factor1 = W_extract_l( singularVectors_Left_64[jCh][currChannel] );
                 singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr( W_mult0_32_32( f, factor1 ), magic_shift ) );
             }
         }
@@ -1113,8 +1110,9 @@ static void biDiagonalReductionRight_64(
     Word64 norm_64;
     Word16 idx;
 
-#define HEADROOM_RIGHT_1 2
+#define HEADROOM_RIGHT_1 1
 #define HEADROOM_RIGHT_2 ( HEADROOM_RIGHT_1 + 1 )
+#define HEADROOM_RIGHT_3 3
 
 
     ( *g ) = 0;
@@ -1134,7 +1132,6 @@ static void biDiagonalReductionRight_64(
         }
         norm_x_e = W_norm( norm_64 );
         norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
-        norm_x_e = add( sub( ( HEADROOM_RIGHT_1 + HEADROOM_RIGHT_1 ), norm_x_e ), 1 );
         move16();
 
         IF( norm_x )
@@ -1143,8 +1140,6 @@ static void biDiagonalReductionRight_64(
             Word32 factor2;
             Word16 tmp_e;
             Word64 tmpmul;
-            Word32 tmp_g;
-            Word16 tmp_g_e;
             Word16 magic_shift;
             Word64 r_64;
             Word32 r;
@@ -1153,20 +1148,18 @@ static void biDiagonalReductionRight_64(
             Word32 invVal;
             Word16 invVal_e;
 
-            tmp_g_e = norm_x_e;
+            ( *g_e ) = add( sub( ( HEADROOM_RIGHT_1 + HEADROOM_RIGHT_1 ), norm_x_e ), 1 );
             move16();
-            tmp_g = Sqrt32( norm_x, &tmp_g_e );
+            ( *g ) = Sqrt32( norm_x, g_e );
             IF( GE_64( singularVectors_Left_64[currChannel][idx], 0 ) )
             {
-                tmp_g = L_negate( tmp_g );
+                ( *g ) = L_negate( *g );
             }
-            *g = tmp_g;
-            *g_e = tmp_g_e;
             move32();
             move16();
             factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_1 ) );
-            tmp_e = sub( tmp_g_e, HEADROOM_RIGHT_1 );
-            tmpmul = W_mult0_32_32( tmp_g, factor2 );
+            tmp_e = sub( *g_e, HEADROOM_RIGHT_1 );
+            tmpmul = W_mult0_32_32( *g, factor2 );
             tmpmul = W_shl( tmpmul, tmp_e );
             r_64 = W_sub( tmpmul, norm_64 );
             r_e = W_norm( r_64 );
@@ -1176,8 +1169,8 @@ static void biDiagonalReductionRight_64(
             move16();
             invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
 
-            magic_shift = 32 - tmp_g_e;
-            singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( tmp_g ), magic_shift ) ); // here, the exponent goes up
+            magic_shift = 32 - *g_e;
+            singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( *g ), magic_shift ) ); // here, the exponent goes up
 
             FOR( iCh = idx; iCh < nChannelsL; iCh++ )
             {
@@ -1194,11 +1187,11 @@ static void biDiagonalReductionRight_64(
                 norm_x_e = W_norm( norm_64 );
                 norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
                 f = Mpy_32_32( norm_x, invVal );
-                magic_shift = 25 + norm_x_e - r_e; // FIXME: Why does this work?
+                magic_shift = 25 + norm_x_e - r_e; // headroom 3 FIXME: Why does this work?
 
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                 {
-                    factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_2 ) );
+                    factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_3 ) );
                     singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2 ), magic_shift ) );
                 }
             }
-- 
GitLab


From 9adf3a9c5e18f7ac6d865f6e6ad8374cd0b0f144 Mon Sep 17 00:00:00 2001
From: Thomas Dettbarn <thomas.dettbarn@iis.fraunhofer.de>
Date: Mon, 21 Jul 2025 17:20:21 +0200
Subject: [PATCH 06/33] applied the clang patch.

---
 lib_dec/ivas_svd_dec_fx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index f98020eb8..e8c36b541 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -1047,7 +1047,7 @@ static void biDiagonalReductionLeft_64(
             ( *g ) = L_negate( *g );
         }
         factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], HEADROOM_LEFT_1 ) );
-	tmp_e = shr(sub(norm_x_e,1),1);
+        tmp_e = shr( sub( norm_x_e, 1 ), 1 );
         tmpmul = W_mult0_32_32( ( *g ), factor2 );
         tmpmul = W_shr( tmpmul, tmp_e );
         r_64 = W_sub( tmpmul, norm_64 );
@@ -1080,7 +1080,7 @@ static void biDiagonalReductionLeft_64(
             norm_x_e = W_norm( norm_64 );
             norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
             f = Mpy_32_32( norm_x, invVal );
-	    magic_shift=31+norm_x_e-r_e;
+            magic_shift = 31 + norm_x_e - r_e;
             FOR( jCh = currChannel; jCh < nChannelsL; jCh++ )
             {
                 factor1 = W_extract_l( singularVectors_Left_64[jCh][currChannel] );
-- 
GitLab


From f5737f6c947119bd84e7ca02da06e4d115e6fba6 Mon Sep 17 00:00:00 2001
From: Thomas Dettbarn <thomas.dettbarn@iis.fraunhofer.de>
Date: Tue, 22 Jul 2025 08:56:05 +0200
Subject: [PATCH 07/33] rolled back changes which caused a regression on the
 Bit Exactness.

---
 lib_dec/ivas_svd_dec_fx.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index e8c36b541..4eb7477e4 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -1005,6 +1005,7 @@ static void biDiagonalReductionLeft_64(
 
 #define HEADROOM_LEFT_1 1
 #define HEADROOM_LEFT_2 ( HEADROOM_LEFT_1 + 1 )
+#define HEADROOM_LEFT_3 2
     Word16 iCh, jCh;
     Word32 norm_x;
     Word16 norm_x_e;
@@ -1080,10 +1081,10 @@ static void biDiagonalReductionLeft_64(
             norm_x_e = W_norm( norm_64 );
             norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
             f = Mpy_32_32( norm_x, invVal );
-            magic_shift = 31 + norm_x_e - r_e;
+            magic_shift = 31 - HEADROOM_LEFT_3 + norm_x_e - r_e;
             FOR( jCh = currChannel; jCh < nChannelsL; jCh++ )
             {
-                factor1 = W_extract_l( singularVectors_Left_64[jCh][currChannel] );
+                factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_3 ) );
                 singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr( W_mult0_32_32( f, factor1 ), magic_shift ) );
             }
         }
@@ -1110,7 +1111,7 @@ static void biDiagonalReductionRight_64(
     Word64 norm_64;
     Word16 idx;
 
-#define HEADROOM_RIGHT_1 1
+#define HEADROOM_RIGHT_1 2
 #define HEADROOM_RIGHT_2 ( HEADROOM_RIGHT_1 + 1 )
 #define HEADROOM_RIGHT_3 3
 
-- 
GitLab


From f47fd3d8af32895b4e9c5fc6354c87c2f10d0b63 Mon Sep 17 00:00:00 2001
From: Thomas Dettbarn <thomas.dettbarn@iis.fraunhofer.de>
Date: Tue, 22 Jul 2025 13:14:26 +0200
Subject: [PATCH 08/33] More deterministic "magic_shifts" in
 biDiagonalReductionLeft_64() and biDiagonalReductionRight_64().

---
 lib_dec/ivas_svd_dec_fx.c | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index 4eb7477e4..4588b4e19 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -1003,9 +1003,10 @@ static void biDiagonalReductionLeft_64(
     Word16 *g_e )
 {
 
-#define HEADROOM_LEFT_1 1
-#define HEADROOM_LEFT_2 ( HEADROOM_LEFT_1 + 1 )
+#define HEADROOM_LEFT_1 1 
+#define HEADROOM_LEFT_2 2
 #define HEADROOM_LEFT_3 2
+
     Word16 iCh, jCh;
     Word32 norm_x;
     Word16 norm_x_e;
@@ -1048,28 +1049,28 @@ static void biDiagonalReductionLeft_64(
             ( *g ) = L_negate( *g );
         }
         factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], HEADROOM_LEFT_1 ) );
-        tmp_e = shr( sub( norm_x_e, 1 ), 1 );
+        tmp_e = sub( HEADROOM_LEFT_1, ( *g_e ) );
         tmpmul = W_mult0_32_32( ( *g ), factor2 );
         tmpmul = W_shr( tmpmul, tmp_e );
         r_64 = W_sub( tmpmul, norm_64 );
         r_e = W_norm( r_64 );
         r = W_extract_h( W_shl( r_64, r_e ) );
 
-
-        invVal_e = sub( add( 1, ( HEADROOM_LEFT_1 + HEADROOM_LEFT_1 ) ), r_e );
-        invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, r, &invVal_e );
+        invVal_e = 0;
+        move16();
+        invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
 
 
-        tmp_e = add( 31, sub( HEADROOM_LEFT_1, *g_e ) );
+        tmp_e = sub( 32, *g_e );
         singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( *g ), tmp_e ) ); // here, the exponent goes up.
 
         FOR( iCh = add( currChannel, 1 ); iCh < nChannelsC; iCh++ )
         {
-            Word16 magic_shift;
             Word32 factor1;
             Word32 factor2;
             Word32 f; // = norm / r
                       //            Word16 f_e;      // not really needed
+            Word16 magic_shift;
 
             norm_64 = 0;
             for ( jCh = currChannel; jCh < nChannelsL; jCh++ )
@@ -1081,10 +1082,10 @@ static void biDiagonalReductionLeft_64(
             norm_x_e = W_norm( norm_64 );
             norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
             f = Mpy_32_32( norm_x, invVal );
-            magic_shift = 31 - HEADROOM_LEFT_3 + norm_x_e - r_e;
+            magic_shift = norm_x_e-r_e+(28-HEADROOM_LEFT_3);	// works with headroom_left: 1,2,2
             FOR( jCh = currChannel; jCh < nChannelsL; jCh++ )
             {
-                factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_3 ) );
+                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32-HEADROOM_LEFT_3 ) );
                 singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr( W_mult0_32_32( f, factor1 ), magic_shift ) );
             }
         }
@@ -1112,8 +1113,8 @@ static void biDiagonalReductionRight_64(
     Word16 idx;
 
 #define HEADROOM_RIGHT_1 2
-#define HEADROOM_RIGHT_2 ( HEADROOM_RIGHT_1 + 1 )
-#define HEADROOM_RIGHT_3 3
+#define HEADROOM_RIGHT_2 2
+#define HEADROOM_RIGHT_3 2
 
 
     ( *g ) = 0;
@@ -1159,9 +1160,9 @@ static void biDiagonalReductionRight_64(
             move32();
             move16();
             factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_1 ) );
-            tmp_e = sub( *g_e, HEADROOM_RIGHT_1 );
+            tmp_e = sub( HEADROOM_RIGHT_1, *g_e );
             tmpmul = W_mult0_32_32( *g, factor2 );
-            tmpmul = W_shl( tmpmul, tmp_e );
+            tmpmul = W_shr( tmpmul, tmp_e );
             r_64 = W_sub( tmpmul, norm_64 );
             r_e = W_norm( r_64 );
             r = W_extract_h( W_shl( r_64, r_e ) );
@@ -1170,8 +1171,8 @@ static void biDiagonalReductionRight_64(
             move16();
             invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
 
-            magic_shift = 32 - *g_e;
-            singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( *g ), magic_shift ) ); // here, the exponent goes up
+            tmp_e = sub( 32, *g_e );
+            singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( *g ), tmp_e ) ); // here, the exponent goes up
 
             FOR( iCh = idx; iCh < nChannelsL; iCh++ )
             {
@@ -1188,11 +1189,11 @@ static void biDiagonalReductionRight_64(
                 norm_x_e = W_norm( norm_64 );
                 norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
                 f = Mpy_32_32( norm_x, invVal );
-                magic_shift = 25 + norm_x_e - r_e; // headroom 3 FIXME: Why does this work?
+                magic_shift = norm_x_e-r_e   +29-(HEADROOM_RIGHT_3); // works with headroom_right: 2,2,2
 
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                 {
-                    factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_3 ) );
+                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32-HEADROOM_RIGHT_3 ) );
                     singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2 ), magic_shift ) );
                 }
             }
-- 
GitLab


From bab21b8c63d5cfce30b5f76f04302d0d7cfed123 Mon Sep 17 00:00:00 2001
From: Thomas Dettbarn <thomas.dettbarn@iis.fraunhofer.de>
Date: Tue, 22 Jul 2025 13:26:55 +0200
Subject: [PATCH 09/33] applied the clang patch.

---
 lib_dec/ivas_svd_dec_fx.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index 4588b4e19..14f449caf 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -1003,7 +1003,7 @@ static void biDiagonalReductionLeft_64(
     Word16 *g_e )
 {
 
-#define HEADROOM_LEFT_1 1 
+#define HEADROOM_LEFT_1 1
 #define HEADROOM_LEFT_2 2
 #define HEADROOM_LEFT_3 2
 
@@ -1082,10 +1082,10 @@ static void biDiagonalReductionLeft_64(
             norm_x_e = W_norm( norm_64 );
             norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
             f = Mpy_32_32( norm_x, invVal );
-            magic_shift = norm_x_e-r_e+(28-HEADROOM_LEFT_3);	// works with headroom_left: 1,2,2
+            magic_shift = norm_x_e - r_e + ( 28 - HEADROOM_LEFT_3 ); // works with headroom_left: 1,2,2
             FOR( jCh = currChannel; jCh < nChannelsL; jCh++ )
             {
-                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32-HEADROOM_LEFT_3 ) );
+                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_3 ) );
                 singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr( W_mult0_32_32( f, factor1 ), magic_shift ) );
             }
         }
@@ -1189,11 +1189,11 @@ static void biDiagonalReductionRight_64(
                 norm_x_e = W_norm( norm_64 );
                 norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
                 f = Mpy_32_32( norm_x, invVal );
-                magic_shift = norm_x_e-r_e   +29-(HEADROOM_RIGHT_3); // works with headroom_right: 2,2,2
+                magic_shift = norm_x_e - r_e + 29 - ( HEADROOM_RIGHT_3 ); // works with headroom_right: 2,2,2
 
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                 {
-                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32-HEADROOM_RIGHT_3 ) );
+                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - HEADROOM_RIGHT_3 ) );
                     singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2 ), magic_shift ) );
                 }
             }
-- 
GitLab


From 9f38e9b1fe0b8f906f0e0da676c3893bf4fbd325 Mon Sep 17 00:00:00 2001
From: Thomas Dettbarn <thomas.dettbarn@iis.fraunhofer.de>
Date: Wed, 23 Jul 2025 11:26:38 +0200
Subject: [PATCH 10/33] broken down the headroom shifts a little bit more.

---
 lib_dec/ivas_svd_dec_fx.c | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index 14f449caf..9cca6aa33 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -1003,9 +1003,10 @@ static void biDiagonalReductionLeft_64(
     Word16 *g_e )
 {
 
-#define HEADROOM_LEFT_1 1
+#define HEADROOM_LEFT_1 2
 #define HEADROOM_LEFT_2 2
-#define HEADROOM_LEFT_3 2
+#define HEADROOM_LEFT_3 15
+#define HEADROOM_LEFT_4 15
 
     Word16 iCh, jCh;
     Word32 norm_x;
@@ -1075,17 +1076,17 @@ static void biDiagonalReductionLeft_64(
             norm_64 = 0;
             for ( jCh = currChannel; jCh < nChannelsL; jCh++ )
             {
-                factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_2 ) );
-                factor2 = W_extract_l( W_shr( singularVectors_Left_64[jCh][iCh], HEADROOM_LEFT_2 ) );
+                factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_3 ) );
+                factor2 = W_extract_l( W_shr( singularVectors_Left_64[jCh][iCh], HEADROOM_LEFT_3 ) );
                 norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );
             }
             norm_x_e = W_norm( norm_64 );
             norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
             f = Mpy_32_32( norm_x, invVal );
-            magic_shift = norm_x_e - r_e + ( 28 - HEADROOM_LEFT_3 ); // works with headroom_left: 1,2,2
+            magic_shift = norm_x_e - r_e + ( 32 + 2*HEADROOM_LEFT_1 - HEADROOM_LEFT_2 -2*HEADROOM_LEFT_3 - HEADROOM_LEFT_4 );
             FOR( jCh = currChannel; jCh < nChannelsL; jCh++ )
             {
-                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_3 ) );
+                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_4 ) );
                 singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr( W_mult0_32_32( f, factor1 ), magic_shift ) );
             }
         }
@@ -1114,7 +1115,8 @@ static void biDiagonalReductionRight_64(
 
 #define HEADROOM_RIGHT_1 2
 #define HEADROOM_RIGHT_2 2
-#define HEADROOM_RIGHT_3 2
+#define HEADROOM_RIGHT_3 15
+#define HEADROOM_RIGHT_4 15
 
 
     ( *g ) = 0;
@@ -1159,8 +1161,8 @@ static void biDiagonalReductionRight_64(
             }
             move32();
             move16();
-            factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_1 ) );
-            tmp_e = sub( HEADROOM_RIGHT_1, *g_e );
+            factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_2 ) );
+            tmp_e = sub( HEADROOM_RIGHT_2, *g_e );
             tmpmul = W_mult0_32_32( *g, factor2 );
             tmpmul = W_shr( tmpmul, tmp_e );
             r_64 = W_sub( tmpmul, norm_64 );
@@ -1181,19 +1183,19 @@ static void biDiagonalReductionRight_64(
                 move64();
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                 {
-                    factor1 = W_extract_l( W_shr( singularVectors_Left_64[iCh][jCh], HEADROOM_RIGHT_2 ) );
-                    factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_2 ) );
+                    factor1 = W_extract_l( W_shr( singularVectors_Left_64[iCh][jCh], HEADROOM_RIGHT_3 ) );
+                    factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_3 ) );
                     norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );
                 }
 
                 norm_x_e = W_norm( norm_64 );
                 norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
                 f = Mpy_32_32( norm_x, invVal );
-                magic_shift = norm_x_e - r_e + 29 - ( HEADROOM_RIGHT_3 ); // works with headroom_right: 2,2,2
+                magic_shift = norm_x_e - r_e + ( 32 + 2*HEADROOM_RIGHT_1 - HEADROOM_RIGHT_2 -2*HEADROOM_RIGHT_3 - HEADROOM_RIGHT_4 );
 
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                 {
-                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - HEADROOM_RIGHT_3 ) );
+                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - HEADROOM_RIGHT_4 ) );
                     singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2 ), magic_shift ) );
                 }
             }
-- 
GitLab


From f62a0613bd90d4a1be71f0e423159d493d66cada Mon Sep 17 00:00:00 2001
From: Thomas Dettbarn <thomas.dettbarn@iis.fraunhofer.de>
Date: Wed, 23 Jul 2025 11:31:43 +0200
Subject: [PATCH 11/33] applied the clang patch.

---
 lib_dec/ivas_svd_dec_fx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index 9cca6aa33..16ccada30 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -1083,7 +1083,7 @@ static void biDiagonalReductionLeft_64(
             norm_x_e = W_norm( norm_64 );
             norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
             f = Mpy_32_32( norm_x, invVal );
-            magic_shift = norm_x_e - r_e + ( 32 + 2*HEADROOM_LEFT_1 - HEADROOM_LEFT_2 -2*HEADROOM_LEFT_3 - HEADROOM_LEFT_4 );
+            magic_shift = norm_x_e - r_e + ( 32 + 2 * HEADROOM_LEFT_1 - HEADROOM_LEFT_2 - 2 * HEADROOM_LEFT_3 - HEADROOM_LEFT_4 );
             FOR( jCh = currChannel; jCh < nChannelsL; jCh++ )
             {
                 factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_4 ) );
@@ -1191,7 +1191,7 @@ static void biDiagonalReductionRight_64(
                 norm_x_e = W_norm( norm_64 );
                 norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
                 f = Mpy_32_32( norm_x, invVal );
-                magic_shift = norm_x_e - r_e + ( 32 + 2*HEADROOM_RIGHT_1 - HEADROOM_RIGHT_2 -2*HEADROOM_RIGHT_3 - HEADROOM_RIGHT_4 );
+                magic_shift = norm_x_e - r_e + ( 32 + 2 * HEADROOM_RIGHT_1 - HEADROOM_RIGHT_2 - 2 * HEADROOM_RIGHT_3 - HEADROOM_RIGHT_4 );
 
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                 {
-- 
GitLab


From 1732636a8edb599ed519c131ba30aadc55d192ed Mon Sep 17 00:00:00 2001
From: Thomas Dettbarn <thomas.dettbarn@iis.fraunhofer.de>
Date: Wed, 23 Jul 2025 19:24:12 +0200
Subject: [PATCH 12/33] more fine tuning in left_64() and right_64() (to run
 the regression test)

---
 lib_dec/ivas_svd_dec_fx.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index 16ccada30..54ab787bc 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -1004,8 +1004,8 @@ static void biDiagonalReductionLeft_64(
 {
 
 #define HEADROOM_LEFT_1 2
-#define HEADROOM_LEFT_2 2
-#define HEADROOM_LEFT_3 15
+#define HEADROOM_LEFT_2 12
+#define HEADROOM_LEFT_3 14
 #define HEADROOM_LEFT_4 15
 
     Word16 iCh, jCh;
@@ -1049,8 +1049,8 @@ static void biDiagonalReductionLeft_64(
         {
             ( *g ) = L_negate( *g );
         }
-        factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], HEADROOM_LEFT_1 ) );
-        tmp_e = sub( HEADROOM_LEFT_1, ( *g_e ) );
+        factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], HEADROOM_LEFT_2 ) );
+        tmp_e = sub( 2 * HEADROOM_LEFT_1 - HEADROOM_LEFT_2, ( *g_e ) );
         tmpmul = W_mult0_32_32( ( *g ), factor2 );
         tmpmul = W_shr( tmpmul, tmp_e );
         r_64 = W_sub( tmpmul, norm_64 );
@@ -1083,7 +1083,7 @@ static void biDiagonalReductionLeft_64(
             norm_x_e = W_norm( norm_64 );
             norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
             f = Mpy_32_32( norm_x, invVal );
-            magic_shift = norm_x_e - r_e + ( 32 + 2 * HEADROOM_LEFT_1 - HEADROOM_LEFT_2 - 2 * HEADROOM_LEFT_3 - HEADROOM_LEFT_4 );
+            magic_shift = norm_x_e - r_e + ( 30 + 2 * HEADROOM_LEFT_1 - 2 * HEADROOM_LEFT_3 - HEADROOM_LEFT_4 );
             FOR( jCh = currChannel; jCh < nChannelsL; jCh++ )
             {
                 factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_4 ) );
@@ -1114,8 +1114,8 @@ static void biDiagonalReductionRight_64(
     Word16 idx;
 
 #define HEADROOM_RIGHT_1 2
-#define HEADROOM_RIGHT_2 2
-#define HEADROOM_RIGHT_3 15
+#define HEADROOM_RIGHT_2 12
+#define HEADROOM_RIGHT_3 14
 #define HEADROOM_RIGHT_4 15
 
 
@@ -1162,7 +1162,7 @@ static void biDiagonalReductionRight_64(
             move32();
             move16();
             factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_2 ) );
-            tmp_e = sub( HEADROOM_RIGHT_2, *g_e );
+            tmp_e = sub( 2 * HEADROOM_RIGHT_1-HEADROOM_RIGHT_2, *g_e );
             tmpmul = W_mult0_32_32( *g, factor2 );
             tmpmul = W_shr( tmpmul, tmp_e );
             r_64 = W_sub( tmpmul, norm_64 );
@@ -1191,7 +1191,7 @@ static void biDiagonalReductionRight_64(
                 norm_x_e = W_norm( norm_64 );
                 norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
                 f = Mpy_32_32( norm_x, invVal );
-                magic_shift = norm_x_e - r_e + ( 32 + 2 * HEADROOM_RIGHT_1 - HEADROOM_RIGHT_2 - 2 * HEADROOM_RIGHT_3 - HEADROOM_RIGHT_4 );
+                magic_shift = norm_x_e - r_e + ( 30 + 2 * HEADROOM_RIGHT_1 - 2 * HEADROOM_RIGHT_3 - HEADROOM_RIGHT_4 );
 
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                 {
-- 
GitLab


From 80c7142d82c87bf16f37de4dcc89bacc994c085a Mon Sep 17 00:00:00 2001
From: Thomas Dettbarn <thomas.dettbarn@iis.fraunhofer.de>
Date: Wed, 23 Jul 2025 19:29:31 +0200
Subject: [PATCH 13/33] applied the clang patch.

---
 lib_dec/ivas_svd_dec_fx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index 54ab787bc..9304ee03c 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -1162,7 +1162,7 @@ static void biDiagonalReductionRight_64(
             move32();
             move16();
             factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_2 ) );
-            tmp_e = sub( 2 * HEADROOM_RIGHT_1-HEADROOM_RIGHT_2, *g_e );
+            tmp_e = sub( 2 * HEADROOM_RIGHT_1 - HEADROOM_RIGHT_2, *g_e );
             tmpmul = W_mult0_32_32( *g, factor2 );
             tmpmul = W_shr( tmpmul, tmp_e );
             r_64 = W_sub( tmpmul, norm_64 );
-- 
GitLab


From eb7410daeefd9c226ab7b419ddd8cdcd1aba8e4f Mon Sep 17 00:00:00 2001
From: Thomas Dettbarn <thomas.dettbarn@iis.fraunhofer.de>
Date: Thu, 24 Jul 2025 12:37:18 +0200
Subject: [PATCH 14/33] another experiment: Making the headromm shifts
 dependant on the input values.

---
 lib_dec/ivas_svd_dec_fx.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index 9304ee03c..1cae38228 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -1004,13 +1004,14 @@ static void biDiagonalReductionLeft_64(
 {
 
 #define HEADROOM_LEFT_1 2
-#define HEADROOM_LEFT_2 12
-#define HEADROOM_LEFT_3 14
-#define HEADROOM_LEFT_4 15
+#define HEADROOM_LEFT_2 ( 16 - norm_x_e0 / 4 )	// 12
+#define HEADROOM_LEFT_3 ( 16 - norm_x_e0 / 4 )	// 14
+#define HEADROOM_LEFT_4 ( 16 - norm_x_e0 / 4 )	// 15
 
     Word16 iCh, jCh;
     Word32 norm_x;
     Word16 norm_x_e;
+    Word16 norm_x_e0;
     Word64 norm_64;
     ( *g ) = 0;
     ( *g_e ) = 0;
@@ -1030,6 +1031,7 @@ static void biDiagonalReductionLeft_64(
             norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) );
         }
         norm_x_e = W_norm( norm_64 );
+        norm_x_e0 = W_norm( norm_64 );
         norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
     }
     IF( norm_x )
@@ -1076,8 +1078,8 @@ static void biDiagonalReductionLeft_64(
             norm_64 = 0;
             for ( jCh = currChannel; jCh < nChannelsL; jCh++ )
             {
-                factor1 = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_3 ) );
-                factor2 = W_extract_l( W_shr( singularVectors_Left_64[jCh][iCh], HEADROOM_LEFT_3 ) );
+                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_3 ) );
+                factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], 32 - HEADROOM_LEFT_3 ) );
                 norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );
             }
             norm_x_e = W_norm( norm_64 );
@@ -1110,13 +1112,14 @@ static void biDiagonalReductionRight_64(
     Word16 iCh, jCh;
     Word32 norm_x;
     Word16 norm_x_e;
+    Word16 norm_x_e0;
     Word64 norm_64;
     Word16 idx;
 
 #define HEADROOM_RIGHT_1 2
-#define HEADROOM_RIGHT_2 12
-#define HEADROOM_RIGHT_3 14
-#define HEADROOM_RIGHT_4 15
+#define HEADROOM_RIGHT_2 ( 16 - norm_x_e0 / 4 )	//12
+#define HEADROOM_RIGHT_3 ( 16 - norm_x_e0 / 4 )	//14
+#define HEADROOM_RIGHT_4 ( 16 - norm_x_e0 / 4 )	//15
 
 
     ( *g ) = 0;
@@ -1135,6 +1138,7 @@ static void biDiagonalReductionRight_64(
             norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) );
         }
         norm_x_e = W_norm( norm_64 );
+        norm_x_e0 = W_norm( norm_64 );
         norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
         move16();
 
@@ -1183,8 +1187,8 @@ static void biDiagonalReductionRight_64(
                 move64();
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                 {
-                    factor1 = W_extract_l( W_shr( singularVectors_Left_64[iCh][jCh], HEADROOM_RIGHT_3 ) );
-                    factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_3 ) );
+                    factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], 32 - HEADROOM_RIGHT_3 ) );
+                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - HEADROOM_RIGHT_3 ) );
                     norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );
                 }
 
-- 
GitLab


From 31a19c209d4853f6839629a9166cd2d3fba5fae0 Mon Sep 17 00:00:00 2001
From: Thomas Dettbarn <thomas.dettbarn@iis.fraunhofer.de>
Date: Thu, 24 Jul 2025 12:45:04 +0200
Subject: [PATCH 15/33] applied the clang patch.

---
 lib_dec/ivas_svd_dec_fx.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index 1cae38228..78d0b90a3 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -1004,9 +1004,9 @@ static void biDiagonalReductionLeft_64(
 {
 
 #define HEADROOM_LEFT_1 2
-#define HEADROOM_LEFT_2 ( 16 - norm_x_e0 / 4 )	// 12
-#define HEADROOM_LEFT_3 ( 16 - norm_x_e0 / 4 )	// 14
-#define HEADROOM_LEFT_4 ( 16 - norm_x_e0 / 4 )	// 15
+#define HEADROOM_LEFT_2 ( 16 - norm_x_e0 / 4 ) // 12
+#define HEADROOM_LEFT_3 ( 16 - norm_x_e0 / 4 ) // 14
+#define HEADROOM_LEFT_4 ( 16 - norm_x_e0 / 4 ) // 15
 
     Word16 iCh, jCh;
     Word32 norm_x;
@@ -1117,9 +1117,9 @@ static void biDiagonalReductionRight_64(
     Word16 idx;
 
 #define HEADROOM_RIGHT_1 2
-#define HEADROOM_RIGHT_2 ( 16 - norm_x_e0 / 4 )	//12
-#define HEADROOM_RIGHT_3 ( 16 - norm_x_e0 / 4 )	//14
-#define HEADROOM_RIGHT_4 ( 16 - norm_x_e0 / 4 )	//15
+#define HEADROOM_RIGHT_2 ( 16 - norm_x_e0 / 4 ) // 12
+#define HEADROOM_RIGHT_3 ( 16 - norm_x_e0 / 4 ) // 14
+#define HEADROOM_RIGHT_4 ( 16 - norm_x_e0 / 4 ) // 15
 
 
     ( *g ) = 0;
-- 
GitLab


From d0617a4faf48227ad21e7f75477efe45b9671479 Mon Sep 17 00:00:00 2001
From: Thomas Dettbarn <thomas.dettbarn@iis.fraunhofer.de>
Date: Mon, 28 Jul 2025 09:10:01 +0200
Subject: [PATCH 16/33] clean up of the "magic shift" definition.

---
 lib_dec/ivas_svd_dec_fx.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index 78d0b90a3..42edc1c11 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -1004,9 +1004,9 @@ static void biDiagonalReductionLeft_64(
 {
 
 #define HEADROOM_LEFT_1 2
-#define HEADROOM_LEFT_2 ( 16 - norm_x_e0 / 4 ) // 12
-#define HEADROOM_LEFT_3 ( 16 - norm_x_e0 / 4 ) // 14
-#define HEADROOM_LEFT_4 ( 16 - norm_x_e0 / 4 ) // 15
+#define HEADROOM_LEFT_2 ( 16 - norm_x_e0 / 4 ) //  10
+#define HEADROOM_LEFT_3 ( 16 - norm_x_e0 / 4 ) //  4
+#define HEADROOM_LEFT_4 ( 16 - norm_x_e0 / 4 ) //  14
 
     Word16 iCh, jCh;
     Word32 norm_x;
@@ -1085,7 +1085,7 @@ static void biDiagonalReductionLeft_64(
             norm_x_e = W_norm( norm_64 );
             norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
             f = Mpy_32_32( norm_x, invVal );
-            magic_shift = norm_x_e - r_e + ( 30 + 2 * HEADROOM_LEFT_1 - 2 * HEADROOM_LEFT_3 - HEADROOM_LEFT_4 );
+            magic_shift = ( norm_x_e - 2 * HEADROOM_LEFT_3 ) - ( r_e - HEADROOM_LEFT_1 ) + ( 32 - HEADROOM_LEFT_4 );
             FOR( jCh = currChannel; jCh < nChannelsL; jCh++ )
             {
                 factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_4 ) );
@@ -1117,9 +1117,9 @@ static void biDiagonalReductionRight_64(
     Word16 idx;
 
 #define HEADROOM_RIGHT_1 2
-#define HEADROOM_RIGHT_2 ( 16 - norm_x_e0 / 4 ) // 12
-#define HEADROOM_RIGHT_3 ( 16 - norm_x_e0 / 4 ) // 14
-#define HEADROOM_RIGHT_4 ( 16 - norm_x_e0 / 4 ) // 15
+#define HEADROOM_RIGHT_2 ( 16 - norm_x_e0 / 4 )	// 10
+#define HEADROOM_RIGHT_3 ( 16 - norm_x_e0 / 4 )	// 4
+#define HEADROOM_RIGHT_4 ( 16 - norm_x_e0 / 4 )	// 14
 
 
     ( *g ) = 0;
@@ -1195,7 +1195,7 @@ static void biDiagonalReductionRight_64(
                 norm_x_e = W_norm( norm_64 );
                 norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
                 f = Mpy_32_32( norm_x, invVal );
-                magic_shift = norm_x_e - r_e + ( 30 + 2 * HEADROOM_RIGHT_1 - 2 * HEADROOM_RIGHT_3 - HEADROOM_RIGHT_4 );
+                magic_shift = ( norm_x_e - 2 * HEADROOM_RIGHT_3 ) - ( r_e - HEADROOM_RIGHT_1 ) + ( 32 - HEADROOM_RIGHT_4 );
 
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                 {
-- 
GitLab


From e8cef013595da88370196c3fd74e44b3f3e1a93e Mon Sep 17 00:00:00 2001
From: Thomas Dettbarn <thomas.dettbarn@iis.fraunhofer.de>
Date: Tue, 29 Jul 2025 17:29:06 +0200
Subject: [PATCH 17/33] added invVal_e to the magic_shift.

---
 lib_dec/ivas_svd_dec_fx.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index 42edc1c11..d40ec0f33 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -901,7 +901,7 @@ static void HouseholderReduction_fx(
             singularVectors_Left_64[jCh][iCh] = W_shr( W_deposit32_h( singularVectors_Left_fx[jCh][iCh] ), 32 );
         }
     }
-    for ( nCh = 0; nCh < nChannelsC; nCh++ )
+    FOR ( nCh = 0; nCh < nChannelsC; nCh++ )
     {
         biDiagonalReductionLeft_64(
             singularVectors_Left_64,
@@ -1085,7 +1085,7 @@ static void biDiagonalReductionLeft_64(
             norm_x_e = W_norm( norm_64 );
             norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
             f = Mpy_32_32( norm_x, invVal );
-            magic_shift = ( norm_x_e - 2 * HEADROOM_LEFT_3 ) - ( r_e - HEADROOM_LEFT_1 ) + ( 32 - HEADROOM_LEFT_4 );
+            magic_shift = ( norm_x_e - 2 * HEADROOM_LEFT_3 ) - ( r_e - 2 * HEADROOM_LEFT_1 ) + ( 32 - HEADROOM_LEFT_4 ) - 2 * invVal_e;
             FOR( jCh = currChannel; jCh < nChannelsL; jCh++ )
             {
                 factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_4 ) );
@@ -1195,7 +1195,7 @@ static void biDiagonalReductionRight_64(
                 norm_x_e = W_norm( norm_64 );
                 norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
                 f = Mpy_32_32( norm_x, invVal );
-                magic_shift = ( norm_x_e - 2 * HEADROOM_RIGHT_3 ) - ( r_e - HEADROOM_RIGHT_1 ) + ( 32 - HEADROOM_RIGHT_4 );
+                magic_shift = ( norm_x_e - 2 * HEADROOM_RIGHT_3 ) - ( r_e - 2 * HEADROOM_RIGHT_1 ) + ( 32 - HEADROOM_RIGHT_4 ) - 2 * invVal_e;
 
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                 {
-- 
GitLab


From 7d9a7a487a2fd2144ed931e804a59377367df221 Mon Sep 17 00:00:00 2001
From: Thomas Dettbarn <thomas.dettbarn@iis.fraunhofer.de>
Date: Tue, 29 Jul 2025 17:35:02 +0200
Subject: [PATCH 18/33] applied the clang patch

---
 lib_dec/ivas_svd_dec_fx.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index d40ec0f33..302bdb32a 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -901,7 +901,7 @@ static void HouseholderReduction_fx(
             singularVectors_Left_64[jCh][iCh] = W_shr( W_deposit32_h( singularVectors_Left_fx[jCh][iCh] ), 32 );
         }
     }
-    FOR ( nCh = 0; nCh < nChannelsC; nCh++ )
+    FOR( nCh = 0; nCh < nChannelsC; nCh++ )
     {
         biDiagonalReductionLeft_64(
             singularVectors_Left_64,
@@ -1117,9 +1117,9 @@ static void biDiagonalReductionRight_64(
     Word16 idx;
 
 #define HEADROOM_RIGHT_1 2
-#define HEADROOM_RIGHT_2 ( 16 - norm_x_e0 / 4 )	// 10
-#define HEADROOM_RIGHT_3 ( 16 - norm_x_e0 / 4 )	// 4
-#define HEADROOM_RIGHT_4 ( 16 - norm_x_e0 / 4 )	// 14
+#define HEADROOM_RIGHT_2 ( 16 - norm_x_e0 / 4 ) // 10
+#define HEADROOM_RIGHT_3 ( 16 - norm_x_e0 / 4 ) // 4
+#define HEADROOM_RIGHT_4 ( 16 - norm_x_e0 / 4 ) // 14
 
 
     ( *g ) = 0;
-- 
GitLab


From 617c0edfd995e2b16d6df28d5a3d767a5b179d01 Mon Sep 17 00:00:00 2001
From: Thomas Dettbarn <thomas.dettbarn@iis.fraunhofer.de>
Date: Wed, 30 Jul 2025 13:37:20 +0200
Subject: [PATCH 19/33] added comments to track the Q and exponents.

---
 lib_dec/ivas_svd_dec_fx.c | 74 +++++++++++++++++++--------------------
 1 file changed, 36 insertions(+), 38 deletions(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index 302bdb32a..9b92d6a57 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -995,7 +995,7 @@ static void HouseholderReduction_fx(
  *-------------------------------------------------------------------------*/
 
 static void biDiagonalReductionLeft_64(
-    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
+    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],		// q(sing)	exp(sing)
     const Word16 nChannelsL,  /* Q0 */
     const Word16 nChannelsC,  /* Q0 */
     const Word16 currChannel, /* Q0 */
@@ -1027,12 +1027,12 @@ static void biDiagonalReductionLeft_64(
         move64();
         FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
         {
-            tmp = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_1 ) );
-            norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) );
+            tmp = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_1 ) );	// q(sing)-H1			// exp(sing)+H1
+            norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) );					// q(norm)=2*q(sing)-2*H1	// exp(norm)=2*exp(sing)+2*H1
         }
-        norm_x_e = W_norm( norm_64 );
+        norm_x_e = W_norm( norm_64 );			
         norm_x_e0 = W_norm( norm_64 );
-        norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
+        norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );	// q(norm_x)=32-exp(norm_x)	exp(norm_x)=exp(norm)-32
     }
     IF( norm_x )
     {
@@ -1040,51 +1040,50 @@ static void biDiagonalReductionLeft_64(
         Word16 tmp_e;
         Word64 tmpmul;
 
-        Word64 r_64; //  = sqrt(norm)*singularVectors_Left_64[currChannel][currChannel]-norm OR -sqrt(norm)*singularVectors_Left_64[currChannel][currChannel]-norm
+        Word64 r_64; 
         Word32 r, invVal;
         Word16 r_e, invVal_e;
 
-        ( *g_e ) = add( sub( ( HEADROOM_LEFT_1 + HEADROOM_LEFT_1 ), norm_x_e ), 1 );
+        ( *g_e ) = add( sub( add( HEADROOM_LEFT_1, HEADROOM_LEFT_1 ), norm_x_e ), 1 );		// exp(g)=(2*H1-exp(norm_x)+1)
         move16();
-        ( *g ) = Sqrt32( norm_x, g_e );
+        ( *g ) = Sqrt32( norm_x, g_e );								// --> exp(g)=((2*H1-exp(norm_x)+1)/2)
         IF( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) )
         {
             ( *g ) = L_negate( *g );
         }
-        factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], HEADROOM_LEFT_2 ) );
-        tmp_e = sub( 2 * HEADROOM_LEFT_1 - HEADROOM_LEFT_2, ( *g_e ) );
-        tmpmul = W_mult0_32_32( ( *g ), factor2 );
-        tmpmul = W_shr( tmpmul, tmp_e );
-        r_64 = W_sub( tmpmul, norm_64 );
+        factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], HEADROOM_LEFT_2 ) );	// q(factor2)=q(sing)-H2	exp(factor2)=exp(qsing)+H2
+        tmp_e = sub( 2 * HEADROOM_LEFT_1 - HEADROOM_LEFT_2, ( *g_e ) );						
+        tmpmul = W_mult0_32_32( ( *g ), factor2 );	// q(tmpmul)=q(g)+q(factor2)
+        tmpmul = W_shr( tmpmul, tmp_e );		// --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g))
+        r_64 = W_sub( tmpmul, norm_64 );		// q(r_64)=max(q(tmpmul),q(norm))
         r_e = W_norm( r_64 );
         r = W_extract_h( W_shl( r_64, r_e ) );
 
         invVal_e = 0;
         move16();
-        invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
+        invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );	// invVal=1/r --> q(invVal)=-q(r)
 
 
         tmp_e = sub( 32, *g_e );
-        singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( *g ), tmp_e ) ); // here, the exponent goes up.
+        singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( *g ), tmp_e ) ); // q(sing)=max(q(sing),q(r)-(2*H1-H2-exp(r))
 
         FOR( iCh = add( currChannel, 1 ); iCh < nChannelsC; iCh++ )
         {
             Word32 factor1;
             Word32 factor2;
             Word32 f; // = norm / r
-                      //            Word16 f_e;      // not really needed
             Word16 magic_shift;
 
             norm_64 = 0;
             for ( jCh = currChannel; jCh < nChannelsL; jCh++ )
             {
-                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_3 ) );
-                factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], 32 - HEADROOM_LEFT_3 ) );
-                norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );
+                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_3 ) );	// q(factor1) = q(sing)-H3
+                factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], 32 - HEADROOM_LEFT_3 ) );		// q(factor2) = q(sing)-H3
+                norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );						// q(norm)=2*q(sing)-2*H3
             }
             norm_x_e = W_norm( norm_64 );
-            norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
-            f = Mpy_32_32( norm_x, invVal );
+            norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );	// Note: different norm
+            f = Mpy_32_32( norm_x, invVal );		// q(f)=q(norm_x)-q(invVal)
             magic_shift = ( norm_x_e - 2 * HEADROOM_LEFT_3 ) - ( r_e - 2 * HEADROOM_LEFT_1 ) + ( 32 - HEADROOM_LEFT_4 ) - 2 * invVal_e;
             FOR( jCh = currChannel; jCh < nChannelsL; jCh++ )
             {
@@ -1134,12 +1133,12 @@ static void biDiagonalReductionRight_64(
         FOR( jCh = idx; jCh < nChannelsC; jCh++ )
         {
             Word32 tmp;
-            tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_1 ) );
-            norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) );
+            tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_1 ) );	// q(sing)-H1			// exp(sing)+H1	
+            norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) );					// q(norm)=2*q(sing)-2*H1	// exp(norm)=2*exp(sing)+2*H1
         }
         norm_x_e = W_norm( norm_64 );
         norm_x_e0 = W_norm( norm_64 );
-        norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
+        norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );	// q(norm_x)=32-exp(norm_x)	exp(norm_x)=exp(norm)-32
         move16();
 
         IF( norm_x )
@@ -1156,29 +1155,28 @@ static void biDiagonalReductionRight_64(
             Word32 invVal;
             Word16 invVal_e;
 
-            ( *g_e ) = add( sub( ( HEADROOM_RIGHT_1 + HEADROOM_RIGHT_1 ), norm_x_e ), 1 );
+            ( *g_e ) = add( sub( ( HEADROOM_RIGHT_1 + HEADROOM_RIGHT_1 ), norm_x_e ), 1 );	// exp(g)=(2*H1-exp(norm_x)+1)
             move16();
-            ( *g ) = Sqrt32( norm_x, g_e );
+            ( *g ) = Sqrt32( norm_x, g_e );							// --> exp(g)=((2*H1-exp(norm_x)+1)/2)
             IF( GE_64( singularVectors_Left_64[currChannel][idx], 0 ) )
             {
                 ( *g ) = L_negate( *g );
             }
             move32();
             move16();
-            factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_2 ) );
+            factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_2 ) );	// q(factor2)=q(sing)-H2	exp(factor2)=exp(qsing)+H2
             tmp_e = sub( 2 * HEADROOM_RIGHT_1 - HEADROOM_RIGHT_2, *g_e );
-            tmpmul = W_mult0_32_32( *g, factor2 );
-            tmpmul = W_shr( tmpmul, tmp_e );
-            r_64 = W_sub( tmpmul, norm_64 );
+            tmpmul = W_mult0_32_32( *g, factor2 );	// q(tmpmul)=q(g)+q(factor2)
+            tmpmul = W_shr( tmpmul, tmp_e );		// --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g))
+            r_64 = W_sub( tmpmul, norm_64 );		// q(r_64)=max(q(tmpmul),q(norm))
             r_e = W_norm( r_64 );
             r = W_extract_h( W_shl( r_64, r_e ) );
 
             invVal_e = 0;
             move16();
-            invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
-
+            invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );	// invVal=1/r --> q(invVal)=-q(r)
             tmp_e = sub( 32, *g_e );
-            singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( *g ), tmp_e ) ); // here, the exponent goes up
+            singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( *g ), tmp_e ) );	// q(sing)=max(q(sing),q(r)-(2*H1-H2-exp(r)))
 
             FOR( iCh = idx; iCh < nChannelsL; iCh++ )
             {
@@ -1187,14 +1185,14 @@ static void biDiagonalReductionRight_64(
                 move64();
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                 {
-                    factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], 32 - HEADROOM_RIGHT_3 ) );
-                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - HEADROOM_RIGHT_3 ) );
-                    norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );
+                    factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], 32 - HEADROOM_RIGHT_3 ) );		// q(factor1) = q(sing)-H3
+                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - HEADROOM_RIGHT_3 ) );	// q(factor2) = q(sing)-H3
+                    norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );					// q(norm)=2*q(sing)-2*H3
                 }
 
                 norm_x_e = W_norm( norm_64 );
-                norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );
-                f = Mpy_32_32( norm_x, invVal );
+                norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );	// Note: different norm
+                f = Mpy_32_32( norm_x, invVal );	// q(f)=q(norm_x)-q(invVal)
                 magic_shift = ( norm_x_e - 2 * HEADROOM_RIGHT_3 ) - ( r_e - 2 * HEADROOM_RIGHT_1 ) + ( 32 - HEADROOM_RIGHT_4 ) - 2 * invVal_e;
 
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ )
-- 
GitLab


From 8cd5a1cec0f1ea81ab45f6ab534541b1b630f44a Mon Sep 17 00:00:00 2001
From: Thomas Dettbarn <thomas.dettbarn@iis.fraunhofer.de>
Date: Wed, 30 Jul 2025 17:23:27 +0200
Subject: [PATCH 20/33] renamed the macros for the headroom shifts.

---
 lib_dec/ivas_svd_dec_fx.c | 48 ++++++++++++++++++---------------------
 1 file changed, 22 insertions(+), 26 deletions(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index 9b92d6a57..384f05f09 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -1002,11 +1002,11 @@ static void biDiagonalReductionLeft_64(
     Word32 *g,
     Word16 *g_e )
 {
-
-#define HEADROOM_LEFT_1 2
-#define HEADROOM_LEFT_2 ( 16 - norm_x_e0 / 4 ) //  10
-#define HEADROOM_LEFT_3 ( 16 - norm_x_e0 / 4 ) //  4
-#define HEADROOM_LEFT_4 ( 16 - norm_x_e0 / 4 ) //  14
+/* TODO: For some reason, this is optimal. But why? why not ( 32 - 2 * MAGIC_HEADROOM_1 - norm_x_e0 + 1 ) , for example? */
+#define MAGIC_HEADROOM_1 2
+#define MAGIC_HEADROOM_2 ( 16 - norm_x_e0 / 4 )
+#define MAGIC_HEADROOM_3 ( 16 - norm_x_e0 / 4 )
+#define MAGIC_HEADROOM_4 ( 16 - norm_x_e0 / 4 )
 
     Word16 iCh, jCh;
     Word32 norm_x;
@@ -1027,7 +1027,7 @@ static void biDiagonalReductionLeft_64(
         move64();
         FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
         {
-            tmp = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], HEADROOM_LEFT_1 ) );	// q(sing)-H1			// exp(sing)+H1
+            tmp = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], MAGIC_HEADROOM_1 ) );	// q(sing)-H1			// exp(sing)+H1
             norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) );					// q(norm)=2*q(sing)-2*H1	// exp(norm)=2*exp(sing)+2*H1
         }
         norm_x_e = W_norm( norm_64 );			
@@ -1044,16 +1044,16 @@ static void biDiagonalReductionLeft_64(
         Word32 r, invVal;
         Word16 r_e, invVal_e;
 
-        ( *g_e ) = add( sub( add( HEADROOM_LEFT_1, HEADROOM_LEFT_1 ), norm_x_e ), 1 );		// exp(g)=(2*H1-exp(norm_x)+1)
+        ( *g_e ) = add( sub( add( MAGIC_HEADROOM_1, MAGIC_HEADROOM_1 ), norm_x_e ), 1 );		// exp(g)=(2*H1-exp(norm_x)+1)
         move16();
         ( *g ) = Sqrt32( norm_x, g_e );								// --> exp(g)=((2*H1-exp(norm_x)+1)/2)
         IF( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) )
         {
             ( *g ) = L_negate( *g );
         }
-        factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], HEADROOM_LEFT_2 ) );	// q(factor2)=q(sing)-H2	exp(factor2)=exp(qsing)+H2
-        tmp_e = sub( 2 * HEADROOM_LEFT_1 - HEADROOM_LEFT_2, ( *g_e ) );						
-        tmpmul = W_mult0_32_32( ( *g ), factor2 );	// q(tmpmul)=q(g)+q(factor2)
+        factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], MAGIC_HEADROOM_2 ) );	// q(factor2)=q(sing)-H2	exp(factor2)=exp(qsing)+H2
+        tmp_e = sub( 2 * MAGIC_HEADROOM_1 - MAGIC_HEADROOM_2, ( *g_e ) );						
+        tmpmul = W_mult0_32_32( ( *g ), factor2 );	// q(tmpmul)=q(g)+q(factor2) --> q(tmpmul) ~= q(norm)
         tmpmul = W_shr( tmpmul, tmp_e );		// --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g))
         r_64 = W_sub( tmpmul, norm_64 );		// q(r_64)=max(q(tmpmul),q(norm))
         r_e = W_norm( r_64 );
@@ -1077,17 +1077,17 @@ static void biDiagonalReductionLeft_64(
             norm_64 = 0;
             for ( jCh = currChannel; jCh < nChannelsL; jCh++ )
             {
-                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_3 ) );	// q(factor1) = q(sing)-H3
-                factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], 32 - HEADROOM_LEFT_3 ) );		// q(factor2) = q(sing)-H3
+                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - MAGIC_HEADROOM_3 ) );	// q(factor1) = q(sing)-H3
+                factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], 32 - MAGIC_HEADROOM_3 ) );		// q(factor2) = q(sing)-H3
                 norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );						// q(norm)=2*q(sing)-2*H3
             }
             norm_x_e = W_norm( norm_64 );
             norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );	// Note: different norm
             f = Mpy_32_32( norm_x, invVal );		// q(f)=q(norm_x)-q(invVal)
-            magic_shift = ( norm_x_e - 2 * HEADROOM_LEFT_3 ) - ( r_e - 2 * HEADROOM_LEFT_1 ) + ( 32 - HEADROOM_LEFT_4 ) - 2 * invVal_e;
+            magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e;
             FOR( jCh = currChannel; jCh < nChannelsL; jCh++ )
             {
-                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - HEADROOM_LEFT_4 ) );
+                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - MAGIC_HEADROOM_4 ) );
                 singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr( W_mult0_32_32( f, factor1 ), magic_shift ) );
             }
         }
@@ -1115,10 +1115,6 @@ static void biDiagonalReductionRight_64(
     Word64 norm_64;
     Word16 idx;
 
-#define HEADROOM_RIGHT_1 2
-#define HEADROOM_RIGHT_2 ( 16 - norm_x_e0 / 4 ) // 10
-#define HEADROOM_RIGHT_3 ( 16 - norm_x_e0 / 4 ) // 4
-#define HEADROOM_RIGHT_4 ( 16 - norm_x_e0 / 4 ) // 14
 
 
     ( *g ) = 0;
@@ -1133,7 +1129,7 @@ static void biDiagonalReductionRight_64(
         FOR( jCh = idx; jCh < nChannelsC; jCh++ )
         {
             Word32 tmp;
-            tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], HEADROOM_RIGHT_1 ) );	// q(sing)-H1			// exp(sing)+H1	
+            tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], MAGIC_HEADROOM_1 ) );	// q(sing)-H1			// exp(sing)+H1	
             norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) );					// q(norm)=2*q(sing)-2*H1	// exp(norm)=2*exp(sing)+2*H1
         }
         norm_x_e = W_norm( norm_64 );
@@ -1155,7 +1151,7 @@ static void biDiagonalReductionRight_64(
             Word32 invVal;
             Word16 invVal_e;
 
-            ( *g_e ) = add( sub( ( HEADROOM_RIGHT_1 + HEADROOM_RIGHT_1 ), norm_x_e ), 1 );	// exp(g)=(2*H1-exp(norm_x)+1)
+            ( *g_e ) = add( sub( ( MAGIC_HEADROOM_1 + MAGIC_HEADROOM_1 ), norm_x_e ), 1 );	// exp(g)=(2*H1-exp(norm_x)+1)
             move16();
             ( *g ) = Sqrt32( norm_x, g_e );							// --> exp(g)=((2*H1-exp(norm_x)+1)/2)
             IF( GE_64( singularVectors_Left_64[currChannel][idx], 0 ) )
@@ -1164,8 +1160,8 @@ static void biDiagonalReductionRight_64(
             }
             move32();
             move16();
-            factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], HEADROOM_RIGHT_2 ) );	// q(factor2)=q(sing)-H2	exp(factor2)=exp(qsing)+H2
-            tmp_e = sub( 2 * HEADROOM_RIGHT_1 - HEADROOM_RIGHT_2, *g_e );
+            factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], MAGIC_HEADROOM_2 ) );	// q(factor2)=q(sing)-H2	exp(factor2)=exp(qsing)+H2
+            tmp_e = sub( 2 * MAGIC_HEADROOM_1 - MAGIC_HEADROOM_2, *g_e );
             tmpmul = W_mult0_32_32( *g, factor2 );	// q(tmpmul)=q(g)+q(factor2)
             tmpmul = W_shr( tmpmul, tmp_e );		// --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g))
             r_64 = W_sub( tmpmul, norm_64 );		// q(r_64)=max(q(tmpmul),q(norm))
@@ -1185,19 +1181,19 @@ static void biDiagonalReductionRight_64(
                 move64();
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                 {
-                    factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], 32 - HEADROOM_RIGHT_3 ) );		// q(factor1) = q(sing)-H3
-                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - HEADROOM_RIGHT_3 ) );	// q(factor2) = q(sing)-H3
+                    factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], 32 - MAGIC_HEADROOM_3 ) );		// q(factor1) = q(sing)-H3
+                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - MAGIC_HEADROOM_3 ) );	// q(factor2) = q(sing)-H3
                     norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );					// q(norm)=2*q(sing)-2*H3
                 }
 
                 norm_x_e = W_norm( norm_64 );
                 norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );	// Note: different norm
                 f = Mpy_32_32( norm_x, invVal );	// q(f)=q(norm_x)-q(invVal)
-                magic_shift = ( norm_x_e - 2 * HEADROOM_RIGHT_3 ) - ( r_e - 2 * HEADROOM_RIGHT_1 ) + ( 32 - HEADROOM_RIGHT_4 ) - 2 * invVal_e;
+                magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e;
 
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                 {
-                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - HEADROOM_RIGHT_4 ) );
+                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - MAGIC_HEADROOM_4 ) );
                     singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2 ), magic_shift ) );
                 }
             }
-- 
GitLab


From 31bfe505634537016b63f249c21500b8c5ba7906 Mon Sep 17 00:00:00 2001
From: Thomas Dettbarn <thomas.dettbarn@iis.fraunhofer.de>
Date: Wed, 30 Jul 2025 17:31:29 +0200
Subject: [PATCH 21/33] applied the clang patch

---
 lib_dec/ivas_svd_dec_fx.c | 77 +++++++++++++++++++--------------------
 1 file changed, 38 insertions(+), 39 deletions(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index 384f05f09..1c5eec520 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -995,10 +995,10 @@ static void HouseholderReduction_fx(
  *-------------------------------------------------------------------------*/
 
 static void biDiagonalReductionLeft_64(
-    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],		// q(sing)	exp(sing)
-    const Word16 nChannelsL,  /* Q0 */
-    const Word16 nChannelsC,  /* Q0 */
-    const Word16 currChannel, /* Q0 */
+    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], // q(sing)	exp(sing)
+    const Word16 nChannelsL,                                                  /* Q0 */
+    const Word16 nChannelsC,                                                  /* Q0 */
+    const Word16 currChannel,                                                 /* Q0 */
     Word32 *g,
     Word16 *g_e )
 {
@@ -1027,12 +1027,12 @@ static void biDiagonalReductionLeft_64(
         move64();
         FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
         {
-            tmp = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], MAGIC_HEADROOM_1 ) );	// q(sing)-H1			// exp(sing)+H1
-            norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) );					// q(norm)=2*q(sing)-2*H1	// exp(norm)=2*exp(sing)+2*H1
+            tmp = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], MAGIC_HEADROOM_1 ) ); // q(sing)-H1			// exp(sing)+H1
+            norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) );                                     // q(norm)=2*q(sing)-2*H1	// exp(norm)=2*exp(sing)+2*H1
         }
-        norm_x_e = W_norm( norm_64 );			
+        norm_x_e = W_norm( norm_64 );
         norm_x_e0 = W_norm( norm_64 );
-        norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );	// q(norm_x)=32-exp(norm_x)	exp(norm_x)=exp(norm)-32
+        norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // q(norm_x)=32-exp(norm_x)	exp(norm_x)=exp(norm)-32
     }
     IF( norm_x )
     {
@@ -1040,28 +1040,28 @@ static void biDiagonalReductionLeft_64(
         Word16 tmp_e;
         Word64 tmpmul;
 
-        Word64 r_64; 
+        Word64 r_64;
         Word32 r, invVal;
         Word16 r_e, invVal_e;
 
-        ( *g_e ) = add( sub( add( MAGIC_HEADROOM_1, MAGIC_HEADROOM_1 ), norm_x_e ), 1 );		// exp(g)=(2*H1-exp(norm_x)+1)
+        ( *g_e ) = add( sub( add( MAGIC_HEADROOM_1, MAGIC_HEADROOM_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1)
         move16();
-        ( *g ) = Sqrt32( norm_x, g_e );								// --> exp(g)=((2*H1-exp(norm_x)+1)/2)
+        ( *g ) = Sqrt32( norm_x, g_e ); // --> exp(g)=((2*H1-exp(norm_x)+1)/2)
         IF( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) )
         {
             ( *g ) = L_negate( *g );
         }
-        factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], MAGIC_HEADROOM_2 ) );	// q(factor2)=q(sing)-H2	exp(factor2)=exp(qsing)+H2
-        tmp_e = sub( 2 * MAGIC_HEADROOM_1 - MAGIC_HEADROOM_2, ( *g_e ) );						
-        tmpmul = W_mult0_32_32( ( *g ), factor2 );	// q(tmpmul)=q(g)+q(factor2) --> q(tmpmul) ~= q(norm)
-        tmpmul = W_shr( tmpmul, tmp_e );		// --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g))
-        r_64 = W_sub( tmpmul, norm_64 );		// q(r_64)=max(q(tmpmul),q(norm))
+        factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], MAGIC_HEADROOM_2 ) ); // q(factor2)=q(sing)-H2	exp(factor2)=exp(qsing)+H2
+        tmp_e = sub( 2 * MAGIC_HEADROOM_1 - MAGIC_HEADROOM_2, ( *g_e ) );
+        tmpmul = W_mult0_32_32( ( *g ), factor2 ); // q(tmpmul)=q(g)+q(factor2) --> q(tmpmul) ~= q(norm)
+        tmpmul = W_shr( tmpmul, tmp_e );           // --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g))
+        r_64 = W_sub( tmpmul, norm_64 );           // q(r_64)=max(q(tmpmul),q(norm))
         r_e = W_norm( r_64 );
         r = W_extract_h( W_shl( r_64, r_e ) );
 
         invVal_e = 0;
         move16();
-        invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );	// invVal=1/r --> q(invVal)=-q(r)
+        invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); // invVal=1/r --> q(invVal)=-q(r)
 
 
         tmp_e = sub( 32, *g_e );
@@ -1077,13 +1077,13 @@ static void biDiagonalReductionLeft_64(
             norm_64 = 0;
             for ( jCh = currChannel; jCh < nChannelsL; jCh++ )
             {
-                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - MAGIC_HEADROOM_3 ) );	// q(factor1) = q(sing)-H3
-                factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], 32 - MAGIC_HEADROOM_3 ) );		// q(factor2) = q(sing)-H3
-                norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );						// q(norm)=2*q(sing)-2*H3
+                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - MAGIC_HEADROOM_3 ) ); // q(factor1) = q(sing)-H3
+                factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], 32 - MAGIC_HEADROOM_3 ) );         // q(factor2) = q(sing)-H3
+                norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );                                      // q(norm)=2*q(sing)-2*H3
             }
             norm_x_e = W_norm( norm_64 );
-            norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );	// Note: different norm
-            f = Mpy_32_32( norm_x, invVal );		// q(f)=q(norm_x)-q(invVal)
+            norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm
+            f = Mpy_32_32( norm_x, invVal );                    // q(f)=q(norm_x)-q(invVal)
             magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e;
             FOR( jCh = currChannel; jCh < nChannelsL; jCh++ )
             {
@@ -1116,7 +1116,6 @@ static void biDiagonalReductionRight_64(
     Word16 idx;
 
 
-
     ( *g ) = 0;
     ( *g_e ) = 0;
     move32();
@@ -1129,12 +1128,12 @@ static void biDiagonalReductionRight_64(
         FOR( jCh = idx; jCh < nChannelsC; jCh++ )
         {
             Word32 tmp;
-            tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], MAGIC_HEADROOM_1 ) );	// q(sing)-H1			// exp(sing)+H1	
-            norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) );					// q(norm)=2*q(sing)-2*H1	// exp(norm)=2*exp(sing)+2*H1
+            tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], MAGIC_HEADROOM_1 ) ); // q(sing)-H1			// exp(sing)+H1
+            norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) );                                     // q(norm)=2*q(sing)-2*H1	// exp(norm)=2*exp(sing)+2*H1
         }
         norm_x_e = W_norm( norm_64 );
         norm_x_e0 = W_norm( norm_64 );
-        norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );	// q(norm_x)=32-exp(norm_x)	exp(norm_x)=exp(norm)-32
+        norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // q(norm_x)=32-exp(norm_x)	exp(norm_x)=exp(norm)-32
         move16();
 
         IF( norm_x )
@@ -1151,28 +1150,28 @@ static void biDiagonalReductionRight_64(
             Word32 invVal;
             Word16 invVal_e;
 
-            ( *g_e ) = add( sub( ( MAGIC_HEADROOM_1 + MAGIC_HEADROOM_1 ), norm_x_e ), 1 );	// exp(g)=(2*H1-exp(norm_x)+1)
+            ( *g_e ) = add( sub( ( MAGIC_HEADROOM_1 + MAGIC_HEADROOM_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1)
             move16();
-            ( *g ) = Sqrt32( norm_x, g_e );							// --> exp(g)=((2*H1-exp(norm_x)+1)/2)
+            ( *g ) = Sqrt32( norm_x, g_e ); // --> exp(g)=((2*H1-exp(norm_x)+1)/2)
             IF( GE_64( singularVectors_Left_64[currChannel][idx], 0 ) )
             {
                 ( *g ) = L_negate( *g );
             }
             move32();
             move16();
-            factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], MAGIC_HEADROOM_2 ) );	// q(factor2)=q(sing)-H2	exp(factor2)=exp(qsing)+H2
+            factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], MAGIC_HEADROOM_2 ) ); // q(factor2)=q(sing)-H2	exp(factor2)=exp(qsing)+H2
             tmp_e = sub( 2 * MAGIC_HEADROOM_1 - MAGIC_HEADROOM_2, *g_e );
-            tmpmul = W_mult0_32_32( *g, factor2 );	// q(tmpmul)=q(g)+q(factor2)
-            tmpmul = W_shr( tmpmul, tmp_e );		// --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g))
-            r_64 = W_sub( tmpmul, norm_64 );		// q(r_64)=max(q(tmpmul),q(norm))
+            tmpmul = W_mult0_32_32( *g, factor2 ); // q(tmpmul)=q(g)+q(factor2)
+            tmpmul = W_shr( tmpmul, tmp_e );       // --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g))
+            r_64 = W_sub( tmpmul, norm_64 );       // q(r_64)=max(q(tmpmul),q(norm))
             r_e = W_norm( r_64 );
             r = W_extract_h( W_shl( r_64, r_e ) );
 
             invVal_e = 0;
             move16();
-            invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );	// invVal=1/r --> q(invVal)=-q(r)
+            invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); // invVal=1/r --> q(invVal)=-q(r)
             tmp_e = sub( 32, *g_e );
-            singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( *g ), tmp_e ) );	// q(sing)=max(q(sing),q(r)-(2*H1-H2-exp(r)))
+            singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( *g ), tmp_e ) ); // q(sing)=max(q(sing),q(r)-(2*H1-H2-exp(r)))
 
             FOR( iCh = idx; iCh < nChannelsL; iCh++ )
             {
@@ -1181,14 +1180,14 @@ static void biDiagonalReductionRight_64(
                 move64();
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                 {
-                    factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], 32 - MAGIC_HEADROOM_3 ) );		// q(factor1) = q(sing)-H3
-                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - MAGIC_HEADROOM_3 ) );	// q(factor2) = q(sing)-H3
-                    norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );					// q(norm)=2*q(sing)-2*H3
+                    factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], 32 - MAGIC_HEADROOM_3 ) );         // q(factor1) = q(sing)-H3
+                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - MAGIC_HEADROOM_3 ) ); // q(factor2) = q(sing)-H3
+                    norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );                                      // q(norm)=2*q(sing)-2*H3
                 }
 
                 norm_x_e = W_norm( norm_64 );
-                norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) );	// Note: different norm
-                f = Mpy_32_32( norm_x, invVal );	// q(f)=q(norm_x)-q(invVal)
+                norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm
+                f = Mpy_32_32( norm_x, invVal );                    // q(f)=q(norm_x)-q(invVal)
                 magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e;
 
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ )
-- 
GitLab


From 8e5931aa10aff5ee562e97c1eba9604698f590e1 Mon Sep 17 00:00:00 2001
From: Thomas Dettbarn <thomas.dettbarn@iis.fraunhofer.de>
Date: Fri, 1 Aug 2025 09:49:10 +0200
Subject: [PATCH 22/33] biDiagonalReductionLeft_64() and
 biDiagonalReductionRight_64() are ready for a non-draft merge request.

---
 lib_dec/ivas_svd_dec_fx.c | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index 1c5eec520..7a408b396 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -1004,9 +1004,9 @@ static void biDiagonalReductionLeft_64(
 {
 /* TODO: For some reason, this is optimal. But why? why not ( 32 - 2 * MAGIC_HEADROOM_1 - norm_x_e0 + 1 ) , for example? */
 #define MAGIC_HEADROOM_1 2
-#define MAGIC_HEADROOM_2 ( 16 - norm_x_e0 / 4 )
-#define MAGIC_HEADROOM_3 ( 16 - norm_x_e0 / 4 )
-#define MAGIC_HEADROOM_4 ( 16 - norm_x_e0 / 4 )
+#define MAGIC_HEADROOM_2 ( sub( 16, shr( norm_x_e0, 2 ) ) )
+#define MAGIC_HEADROOM_3 ( sub( 16, shr( norm_x_e0, 2 ) ) )
+#define MAGIC_HEADROOM_4 ( sub( 16, shr( norm_x_e0, 2 ) ) )
 
     Word16 iCh, jCh;
     Word32 norm_x;
@@ -1077,17 +1077,21 @@ static void biDiagonalReductionLeft_64(
             norm_64 = 0;
             for ( jCh = currChannel; jCh < nChannelsL; jCh++ )
             {
-                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - MAGIC_HEADROOM_3 ) ); // q(factor1) = q(sing)-H3
-                factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], 32 - MAGIC_HEADROOM_3 ) );         // q(factor2) = q(sing)-H3
+                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], sub( 32 , MAGIC_HEADROOM_3 ) ) ); // q(factor1) = q(sing)-H3
+                factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], sub( 32, MAGIC_HEADROOM_3 ) ) );         // q(factor2) = q(sing)-H3
                 norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );                                      // q(norm)=2*q(sing)-2*H3
             }
             norm_x_e = W_norm( norm_64 );
             norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm
             f = Mpy_32_32( norm_x, invVal );                    // q(f)=q(norm_x)-q(invVal)
-            magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e;
+//            magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e;
+            magic_shift = sub( norm_x_e, shl( MAGIC_HEADROOM_3, 1) );
+            magic_shift = sub( magic_shift, sub( r_e, ( shl( MAGIC_HEADROOM_1, 1 ) ) ) );
+            magic_shift = add( magic_shift, sub( 32, MAGIC_HEADROOM_4 ) );
+            magic_shift = sub( magic_shift, shl( invVal_e, 1 ) );
             FOR( jCh = currChannel; jCh < nChannelsL; jCh++ )
             {
-                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], 32 - MAGIC_HEADROOM_4 ) );
+                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], sub( 32, MAGIC_HEADROOM_4 ) ) );
                 singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr( W_mult0_32_32( f, factor1 ), magic_shift ) );
             }
         }
@@ -1180,19 +1184,23 @@ static void biDiagonalReductionRight_64(
                 move64();
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                 {
-                    factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], 32 - MAGIC_HEADROOM_3 ) );         // q(factor1) = q(sing)-H3
-                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - MAGIC_HEADROOM_3 ) ); // q(factor2) = q(sing)-H3
+                    factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], sub( 32, MAGIC_HEADROOM_3 ) ) );         // q(factor1) = q(sing)-H3
+                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], sub( 32, MAGIC_HEADROOM_3 ) ) ); // q(factor2) = q(sing)-H3
                     norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );                                      // q(norm)=2*q(sing)-2*H3
                 }
 
                 norm_x_e = W_norm( norm_64 );
                 norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm
                 f = Mpy_32_32( norm_x, invVal );                    // q(f)=q(norm_x)-q(invVal)
-                magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e;
+                // magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e;
+                magic_shift = sub( norm_x_e, shl( MAGIC_HEADROOM_3, 1) );
+                magic_shift = sub( magic_shift, sub( r_e, ( shl( MAGIC_HEADROOM_1, 1 ) ) ) );
+                magic_shift = add( magic_shift, sub( 32, MAGIC_HEADROOM_4 ) );
+                magic_shift = sub( magic_shift, shl( invVal_e, 1 ) );
 
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ )
                 {
-                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], 32 - MAGIC_HEADROOM_4 ) );
+                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], sub( 32, MAGIC_HEADROOM_4 ) ) );
                     singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2 ), magic_shift ) );
                 }
             }
-- 
GitLab


From 6256011f886f0d0f8be8ef4fb330e3a72cd0b8ed Mon Sep 17 00:00:00 2001
From: Thomas Dettbarn <thomas.dettbarn@iis.fraunhofer.de>
Date: Fri, 1 Aug 2025 10:20:07 +0200
Subject: [PATCH 23/33] applied the clang patch.

---
 lib_dec/ivas_svd_dec_fx.c | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index 7a408b396..9b5f905e1 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -1004,15 +1004,18 @@ static void biDiagonalReductionLeft_64(
 {
 /* TODO: For some reason, this is optimal. But why? why not ( 32 - 2 * MAGIC_HEADROOM_1 - norm_x_e0 + 1 ) , for example? */
 #define MAGIC_HEADROOM_1 2
-#define MAGIC_HEADROOM_2 ( sub( 16, shr( norm_x_e0, 2 ) ) )
-#define MAGIC_HEADROOM_3 ( sub( 16, shr( norm_x_e0, 2 ) ) )
-#define MAGIC_HEADROOM_4 ( sub( 16, shr( norm_x_e0, 2 ) ) )
+//#define MAGIC_HEADROOM_2 ( sub( 16, shr( norm_x_e0, 2 ) ) )
+//#define MAGIC_HEADROOM_3 ( sub( 16, shr( norm_x_e0, 2 ) ) )
+//#define MAGIC_HEADROOM_4 ( sub( 16, shr( norm_x_e0, 2 ) ) )
+#define	MAGIC_HEADROOM_2 magic_headroom
+#define	MAGIC_HEADROOM_3 magic_headroom
+#define	MAGIC_HEADROOM_4 magic_headroom
 
     Word16 iCh, jCh;
     Word32 norm_x;
     Word16 norm_x_e;
-    Word16 norm_x_e0;
     Word64 norm_64;
+    Word16 magic_headroom;
     ( *g ) = 0;
     ( *g_e ) = 0;
     move32();
@@ -1031,7 +1034,7 @@ static void biDiagonalReductionLeft_64(
             norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) );                                     // q(norm)=2*q(sing)-2*H1	// exp(norm)=2*exp(sing)+2*H1
         }
         norm_x_e = W_norm( norm_64 );
-        norm_x_e0 = W_norm( norm_64 );
+        magic_headroom = sub( 16, shr( norm_x_e, 2 ) );
         norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // q(norm_x)=32-exp(norm_x)	exp(norm_x)=exp(norm)-32
     }
     IF( norm_x )
@@ -1077,15 +1080,15 @@ static void biDiagonalReductionLeft_64(
             norm_64 = 0;
             for ( jCh = currChannel; jCh < nChannelsL; jCh++ )
             {
-                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], sub( 32 , MAGIC_HEADROOM_3 ) ) ); // q(factor1) = q(sing)-H3
+                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], sub( 32, MAGIC_HEADROOM_3 ) ) ); // q(factor1) = q(sing)-H3
                 factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], sub( 32, MAGIC_HEADROOM_3 ) ) );         // q(factor2) = q(sing)-H3
-                norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );                                      // q(norm)=2*q(sing)-2*H3
+                norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );                                            // q(norm)=2*q(sing)-2*H3
             }
             norm_x_e = W_norm( norm_64 );
             norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm
             f = Mpy_32_32( norm_x, invVal );                    // q(f)=q(norm_x)-q(invVal)
-//            magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e;
-            magic_shift = sub( norm_x_e, shl( MAGIC_HEADROOM_3, 1) );
+                                                                //            magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e;
+            magic_shift = sub( norm_x_e, shl( MAGIC_HEADROOM_3, 1 ) );
             magic_shift = sub( magic_shift, sub( r_e, ( shl( MAGIC_HEADROOM_1, 1 ) ) ) );
             magic_shift = add( magic_shift, sub( 32, MAGIC_HEADROOM_4 ) );
             magic_shift = sub( magic_shift, shl( invVal_e, 1 ) );
@@ -1115,9 +1118,9 @@ static void biDiagonalReductionRight_64(
     Word16 iCh, jCh;
     Word32 norm_x;
     Word16 norm_x_e;
-    Word16 norm_x_e0;
     Word64 norm_64;
     Word16 idx;
+    Word16 magic_headroom;
 
 
     ( *g ) = 0;
@@ -1136,7 +1139,7 @@ static void biDiagonalReductionRight_64(
             norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) );                                     // q(norm)=2*q(sing)-2*H1	// exp(norm)=2*exp(sing)+2*H1
         }
         norm_x_e = W_norm( norm_64 );
-        norm_x_e0 = W_norm( norm_64 );
+        magic_headroom = sub( 16, shr( norm_x_e, 2 ) );
         norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // q(norm_x)=32-exp(norm_x)	exp(norm_x)=exp(norm)-32
         move16();
 
@@ -1186,14 +1189,14 @@ static void biDiagonalReductionRight_64(
                 {
                     factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], sub( 32, MAGIC_HEADROOM_3 ) ) );         // q(factor1) = q(sing)-H3
                     factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], sub( 32, MAGIC_HEADROOM_3 ) ) ); // q(factor2) = q(sing)-H3
-                    norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );                                      // q(norm)=2*q(sing)-2*H3
+                    norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );                                            // q(norm)=2*q(sing)-2*H3
                 }
 
                 norm_x_e = W_norm( norm_64 );
                 norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm
                 f = Mpy_32_32( norm_x, invVal );                    // q(f)=q(norm_x)-q(invVal)
                 // magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e;
-                magic_shift = sub( norm_x_e, shl( MAGIC_HEADROOM_3, 1) );
+                magic_shift = sub( norm_x_e, shl( MAGIC_HEADROOM_3, 1 ) );
                 magic_shift = sub( magic_shift, sub( r_e, ( shl( MAGIC_HEADROOM_1, 1 ) ) ) );
                 magic_shift = add( magic_shift, sub( 32, MAGIC_HEADROOM_4 ) );
                 magic_shift = sub( magic_shift, shl( invVal_e, 1 ) );
-- 
GitLab


From 5aaf2643b90c6692c1524d2c583b1edb042fd0f1 Mon Sep 17 00:00:00 2001
From: naghibza <mohammadreza.naghibzadeh.tahamizarandi@iis.fraunhofer.de>
Date: Wed, 6 Aug 2025 18:47:09 +0200
Subject: [PATCH 24/33] Rewrite biDiagonalReductionLeft_fx() and
 biDiagonalReductionRight_fx() according to optimized float version.

---
 lib_dec/ivas_svd_dec_fx.c | 350 ++++++++++++++++++--------------------
 1 file changed, 161 insertions(+), 189 deletions(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index 1f50870e0..7f341d76a 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -65,16 +65,19 @@ static void HouseholderReduction_fx(
     Word32 *eps_x_fx,        /* exp(eps_x_fx_e) */
     Word16 *eps_x_fx_e );
 #ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE
-static void biDiagonalReductionLeft_64(
-    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
-    const Word16 nChannelsL,  /* Q0 */
+
+static void biDiagonalReductionLeft_fx(
+    Word32 singularVectors[][MAX_OUTPUT_CHANNELS],  /* exp(singularVectors_e) */
+    Word16 singularValues_e[][MAX_OUTPUT_CHANNELS], /* Q0 */
+    const Word16 nChannelsL,
     const Word16 nChannelsC,  /* Q0 */
     const Word16 currChannel, /* Q0 */
-    Word32 *g,                /* Q31 */
+    Word32 *g,
     Word16 *g_e );
 
-static void biDiagonalReductionRight_64(
-    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
+static void biDiagonalReductionRight_fx(
+    Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
+    Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS],
     const Word16 nChannelsL,  /* Q0 */
     const Word16 nChannelsC,  /* Q0 */
     const Word16 currChannel, /* Q0 */
@@ -840,7 +843,6 @@ static void HouseholderReduction_fx(
     Word16 nCh;
 #ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE
 
-    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
     Word32 g_left_fx = 0;
     Word16 g_left_e = 0;
     move32();
@@ -862,62 +864,57 @@ static void HouseholderReduction_fx(
 
     Word16 iCh, jCh;
     Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
+
 #ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE
+
     FOR( jCh = 0; jCh < nChannelsL; jCh++ )
     {
         FOR( iCh = 0; iCh < nChannelsC; iCh++ )
         {
-            singularVectors_Left_64[jCh][iCh] = W_shr( W_deposit32_h( singularVectors_Left_fx[jCh][iCh] ), 32 );
+            singularVectors_Left_fx_e[jCh][iCh] = singularVectors_Left_e;
+            move16();
         }
     }
+    
     FOR( nCh = 0; nCh < nChannelsC; nCh++ )
     {
-        biDiagonalReductionLeft_64(
-            singularVectors_Left_64,
+        biDiagonalReductionLeft_fx(
+            singularVectors_Left_fx,
+            singularVectors_Left_fx_e,
             nChannelsL,
             nChannelsC,
             nCh,
             &g_left_fx,
             &g_left_e );
+
         singularValues_fx[nCh] = g_left_fx;
         move32();
-        singularValues_fx_e[nCh] = add( singularVectors_Left_e, g_left_e );
+        singularValues_fx_e[nCh] = g_left_e;
+
         secDiag_fx[nCh] = g_right_fx; /* from the previous channel */
         move32();
-        secDiag_fx_e[nCh] = add( singularVectors_Left_e, g_right_e );
-        biDiagonalReductionRight_64(
-            singularVectors_Left_64,
+        secDiag_fx_e[nCh] = g_right_e;
+
+        biDiagonalReductionRight_fx(
+            singularVectors_Left_fx,
+            singularVectors_Left_fx_e,
             nChannelsL,
             nChannelsC,
             nCh,
             &g_right_fx,
             &g_right_e );
+
+        Word16 L_temp_e;
+        Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */
+        IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, *eps_x_fx, *eps_x_fx_e ), 1 ) )
         {
-            Word16 L_temp_e;
-            Word32 L_temp;
-            L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), singularValues_fx_e[nCh], L_abs( secDiag_fx[nCh] ), secDiag_fx_e[nCh], &L_temp_e ); /* exp(L_temp_e) */
-            IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, *eps_x_fx, *eps_x_fx_e ), 1 ) )
-            {
-                *eps_x_fx = L_temp; /* exp(L_temp_e) */
-                move32();
-                *eps_x_fx_e = L_temp_e;
-                move32();
-            }
-        }
-    }
-    {
-        int i, j;
-        for ( j = 0; j < nChannelsL; j++ )
-        {
-            for ( i = 0; i < nChannelsC; i++ )
-            {
-                Word16 n;
-                n = W_norm( singularVectors_Left_64[j][i] );
-                singularVectors_Left_fx[j][i] = W_extract_h( W_shl( singularVectors_Left_64[j][i], n ) );
-                singularVectors_Left_fx_e[j][i] = sub( add( 32, singularVectors_Left_e ), n );
-            }
+            *eps_x_fx = L_temp; /* exp(L_temp_e) */
+            move32();
+            *eps_x_fx_e = L_temp_e;
+            move32();
         }
     }
+
 #else
 
     FOR( jCh = 0; jCh < nChannelsL; jCh++ )
@@ -962,222 +959,197 @@ static void HouseholderReduction_fx(
  *
  *
  *-------------------------------------------------------------------------*/
-
-static void biDiagonalReductionLeft_64(
-    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], // q(sing)	exp(sing)
-    const Word16 nChannelsL,                                                  /* Q0 */
+static void biDiagonalReductionLeft_fx(
+    Word32 singularVectors[][MAX_OUTPUT_CHANNELS],                            /* exp(singularVectors_e) */
+    Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS], /* Q0 */
+    const Word16 nChannelsL,
     const Word16 nChannelsC,                                                  /* Q0 */
     const Word16 currChannel,                                                 /* Q0 */
     Word32 *g,
     Word16 *g_e )
 {
-/* TODO: For some reason, this is optimal. But why? why not ( 32 - 2 * MAGIC_HEADROOM_1 - norm_x_e0 + 1 ) , for example? */
-#define MAGIC_HEADROOM_1 2
-//#define MAGIC_HEADROOM_2 ( sub( 16, shr( norm_x_e0, 2 ) ) )
-//#define MAGIC_HEADROOM_3 ( sub( 16, shr( norm_x_e0, 2 ) ) )
-//#define MAGIC_HEADROOM_4 ( sub( 16, shr( norm_x_e0, 2 ) ) )
-#define MAGIC_HEADROOM_2 magic_headroom
-#define MAGIC_HEADROOM_3 magic_headroom
-#define MAGIC_HEADROOM_4 magic_headroom
-
     Word16 iCh, jCh;
-    Word32 norm_x;
-    Word16 norm_x_e;
-    Word64 norm_64;
-    Word16 magic_headroom;
+    Word32 norm_x, f, r;
+    Word16 norm_x_e, f_e, r_e;
+    Word32 L_temp;
+    Word16 L_temp_e;
+
+    /* Setting values to 0 */
     ( *g ) = 0;
     ( *g_e ) = 0;
     move32();
     move16();
-    norm_x = 0;
-    move32();
+
     IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */
     {
-
-        Word32 tmp;
-        norm_64 = 0;
+        Word64 temp = 0;
         move64();
+        norm_x = 0;
+        norm_x_e = 0;
+        Word16 max_e = MIN_16;
+        move16();
         FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
         {
-            tmp = W_extract_l( W_shr( singularVectors_Left_64[jCh][currChannel], MAGIC_HEADROOM_1 ) ); // q(sing)-H1			// exp(sing)+H1
-            norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) );                                     // q(norm)=2*q(sing)-2*H1	// exp(norm)=2*exp(sing)+2*H1
+            max_e = s_max( max_e, singularVectors_e[jCh][currChannel] );
         }
-        norm_x_e = W_norm( norm_64 );
-        magic_headroom = sub( 16, shr( norm_x_e, 2 ) );
-        norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // q(norm_x)=32-exp(norm_x)	exp(norm_x)=exp(norm)-32
-    }
-    IF( norm_x )
-    {
-        Word32 factor2;
-        Word16 tmp_e;
-        Word64 tmpmul;
-
-        Word64 r_64;
-        Word32 r, invVal;
-        Word16 r_e, invVal_e;
 
-        ( *g_e ) = add( sub( add( MAGIC_HEADROOM_1, MAGIC_HEADROOM_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1)
-        move16();
-        ( *g ) = Sqrt32( norm_x, g_e ); // --> exp(g)=((2*H1-exp(norm_x)+1)/2)
-        IF( GE_64( singularVectors_Left_64[currChannel][currChannel], 0 ) )
+        FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
         {
-            ( *g ) = L_negate( *g );
+            temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sub( max_e, singularVectors_e[jCh][currChannel] ), 1 ) ) );
         }
-        factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][currChannel], MAGIC_HEADROOM_2 ) ); // q(factor2)=q(sing)-H2	exp(factor2)=exp(qsing)+H2
-        tmp_e = sub( 2 * MAGIC_HEADROOM_1 - MAGIC_HEADROOM_2, ( *g_e ) );
-        tmpmul = W_mult0_32_32( ( *g ), factor2 ); // q(tmpmul)=q(g)+q(factor2) --> q(tmpmul) ~= q(norm)
-        tmpmul = W_shr( tmpmul, tmp_e );           // --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g))
-        r_64 = W_sub( tmpmul, norm_64 );           // q(r_64)=max(q(tmpmul),q(norm))
-        r_e = W_norm( r_64 );
-        r = W_extract_h( W_shl( r_64, r_e ) );
-
-        invVal_e = 0;
-        move16();
-        invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); // invVal=1/r --> q(invVal)=-q(r)
-
 
-        tmp_e = sub( 32, *g_e );
-        singularVectors_Left_64[currChannel][currChannel] = W_sub( singularVectors_Left_64[currChannel][currChannel], W_shr( W_deposit32_h( *g ), tmp_e ) ); // q(sing)=max(q(sing),q(r)-(2*H1-H2-exp(r))
+        Word16 nrm = W_norm( temp );
+        nrm = sub( nrm, 32 );
+        norm_x = W_shl_sat_l( temp, nrm );
+        norm_x_e = sub( add( max_e, max_e ), nrm );
 
-        FOR( iCh = add( currChannel, 1 ); iCh < nChannelsC; iCh++ )
+        IF( ( norm_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
         {
-            Word32 factor1;
-            Word32 factor2;
-            Word32 f; // = norm / r
-            Word16 magic_shift;
+            Word16 invVal_e;
+            Word32 invVal;
 
-            norm_64 = 0;
-            for ( jCh = currChannel; jCh < nChannelsL; jCh++ )
+            L_temp_e = norm_x_e;
+            move16();
+            if (0)
+                L_temp = ISqrt32( norm_x, &L_temp_e );
+            else
+                L_temp = Sqrt32( norm_x, &L_temp_e );
+            //( *g ) = L_negate( GE_32( singularVectors[currChannel][idx], 0 ) ? L_temp : L_negate( L_temp ) );
+            if ( singularVectors[currChannel][currChannel] >= 0 )
             {
-                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], sub( 32, MAGIC_HEADROOM_3 ) ) ); // q(factor1) = q(sing)-H3
-                factor2 = W_extract_h( W_shl( singularVectors_Left_64[jCh][iCh], sub( 32, MAGIC_HEADROOM_3 ) ) );         // q(factor2) = q(sing)-H3
-                norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );                                            // q(norm)=2*q(sing)-2*H3
+                L_temp = L_negate( L_temp );
             }
-            norm_x_e = W_norm( norm_64 );
-            norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm
-            f = Mpy_32_32( norm_x, invVal );                    // q(f)=q(norm_x)-q(invVal)
-                                                                //            magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e;
-            magic_shift = sub( norm_x_e, shl( MAGIC_HEADROOM_3, 1 ) );
-            magic_shift = sub( magic_shift, sub( r_e, ( shl( MAGIC_HEADROOM_1, 1 ) ) ) );
-            magic_shift = add( magic_shift, sub( 32, MAGIC_HEADROOM_4 ) );
-            magic_shift = sub( magic_shift, shl( invVal_e, 1 ) );
-            FOR( jCh = currChannel; jCh < nChannelsL; jCh++ )
+            ( *g ) = L_temp;
+            move32();
+            *g_e = L_temp_e;
+            r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][currChannel] ), singularVectors_e[currChannel][currChannel] + L_temp_e, -norm_x, norm_x_e, &r_e );                                          /* exp(r_e) */
+            singularVectors[currChannel][currChannel] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][currChannel], singularVectors_e[currChannel][currChannel], -( *g ), *g_e, &singularVectors_e[currChannel][currChannel] ); /* sing_exp */
+            move32();
+            invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
+
+            FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
             {
-                factor1 = W_extract_h( W_shl( singularVectors_Left_64[jCh][currChannel], sub( 32, MAGIC_HEADROOM_4 ) ) );
-                singularVectors_Left_64[jCh][iCh] = W_add( singularVectors_Left_64[jCh][iCh], W_shr( W_mult0_32_32( f, factor1 ), magic_shift ) );
+                Word16 max2_e = MIN_16;
+                max_e = MIN_16;
+                move16();
+                move16();
+                temp = 0;
+                move64();
+
+                FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
+                {
+                    max_e = s_max( max_e, singularVectors_e[jCh][currChannel] ); /* exp(norm_x_e) */
+                    max2_e = s_max( max2_e, singularVectors_e[jCh][iCh] );       /* exp(norm_x_e) */
+                }
+                max_e = add( max_e, max2_e );
+
+                FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
+                {
+                    temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), sub( max_e, add( singularVectors_e[jCh][currChannel], singularVectors_e[jCh][iCh] ) ) ) );
+                }
+                Word16 nrm = W_norm( temp );
+                nrm = sub( nrm, 32 );
+                norm_x = W_shl_sat_l( temp, nrm );
+                norm_x_e = sub( max_e, nrm );
+
+                f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
+                f_e = add( invVal_e, sub( norm_x_e, r_e ) );
+
+                FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
+                {
+                    singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors_e[jCh][currChannel] ), &singularVectors_e[jCh][iCh] );
+                    move32();
+                }
             }
         }
     }
+    return;
 }
 
-/*-------------------------------------------------------------------------
- * biDiagonalReductionRight()
- *
- *
- *-------------------------------------------------------------------------*/
-
-static void biDiagonalReductionRight_64(
-    Word64 singularVectors_Left_64[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS],
+static void biDiagonalReductionRight_fx(
+    Word32 singularVectors[][MAX_OUTPUT_CHANNELS], /* exp(singularVectors_e) */
+    Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS],
     const Word16 nChannelsL,  /* Q0 */
     const Word16 nChannelsC,  /* Q0 */
     const Word16 currChannel, /* Q0 */
     Word32 *g,                /* Q31 */
-    Word16 *g_e )
+    Word16 *g_e
+)
 {
-    Word16 iCh, jCh;
-    Word32 norm_x;
-    Word16 norm_x_e;
-    Word64 norm_64;
-    Word16 idx;
-    Word16 magic_headroom;
-
+    Word16 iCh, jCh, idx;
+    Word32 norm_x, r;
+    Word16 norm_x_e, r_e;
+    Word32 L_temp;
+    Word16 L_temp_e;
 
+    /* Setting values to 0 */
     ( *g ) = 0;
     ( *g_e ) = 0;
     move32();
     move16();
     IF( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */
     {
-        norm_64 = 0;
-        move64();
-        idx = add( currChannel, 1 );
-        FOR( jCh = idx; jCh < nChannelsC; jCh++ )
+        idx = add( currChannel, 1 ); /* Q0 */
+
+        norm_x = 0;
+        move32();
+        norm_x_e = 0;
+        move16();
+        FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */
         {
-            Word32 tmp;
-            tmp = W_extract_l( W_shr( singularVectors_Left_64[currChannel][jCh], MAGIC_HEADROOM_1 ) ); // q(sing)-H1			// exp(sing)+H1
-            norm_64 = W_add( norm_64, W_mult0_32_32( tmp, tmp ) );                                     // q(norm)=2*q(sing)-2*H1	// exp(norm)=2*exp(sing)+2*H1
+            norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
         }
-        norm_x_e = W_norm( norm_64 );
-        magic_headroom = sub( 16, shr( norm_x_e, 2 ) );
-        norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // q(norm_x)=32-exp(norm_x)	exp(norm_x)=exp(norm)-32
-        move16();
 
-        IF( norm_x )
+        IF( ( norm_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
         {
-            Word32 factor1;
-            Word32 factor2;
-            Word16 tmp_e;
-            Word64 tmpmul;
-            Word16 magic_shift;
-            Word64 r_64;
-            Word32 r;
-            Word16 r_e;
-            Word32 f;
+            Word16 invVal_e, temp_e;
             Word32 invVal;
-            Word16 invVal_e;
 
-            ( *g_e ) = add( sub( ( MAGIC_HEADROOM_1 + MAGIC_HEADROOM_1 ), norm_x_e ), 1 ); // exp(g)=(2*H1-exp(norm_x)+1)
+            L_temp_e = norm_x_e;
             move16();
-            ( *g ) = Sqrt32( norm_x, g_e ); // --> exp(g)=((2*H1-exp(norm_x)+1)/2)
-            IF( GE_64( singularVectors_Left_64[currChannel][idx], 0 ) )
+            L_temp = Sqrt32( norm_x, &L_temp_e );
+            //L_temp = L_shl_r( L_temp, L_temp_e ); // Q31
+            IF( singularVectors[currChannel][idx] >= 0 )
             {
-                ( *g ) = L_negate( *g );
+                ( *g ) = L_negate( L_temp ); /* exp(L_temp_e) */
+                move32();
             }
+            ELSE
+            {
+                ( *g ) = L_negate( L_negate( L_temp ) ); /* exp(L_temp_e) */
+                move32();
+            }
+            *g_e = L_temp_e;
+            r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors_e[currChannel][idx] + (*g_e), -norm_x, norm_x_e, &r_e );                                  /* exp(r_e) */
+            singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors_e[currChannel][idx], -( *g ), *g_e, &singularVectors_e[currChannel][idx] ); /* exp(sing_exp) */
             move32();
-            move16();
-            factor2 = W_extract_l( W_shr( singularVectors_Left_64[currChannel][idx], MAGIC_HEADROOM_2 ) ); // q(factor2)=q(sing)-H2	exp(factor2)=exp(qsing)+H2
-            tmp_e = sub( 2 * MAGIC_HEADROOM_1 - MAGIC_HEADROOM_2, *g_e );
-            tmpmul = W_mult0_32_32( *g, factor2 ); // q(tmpmul)=q(g)+q(factor2)
-            tmpmul = W_shr( tmpmul, tmp_e );       // --> q(tmpmul)=q(g)+q(factor2)-(2*H1-H2-q(g))
-            r_64 = W_sub( tmpmul, norm_64 );       // q(r_64)=max(q(tmpmul),q(norm))
-            r_e = W_norm( r_64 );
-            r = W_extract_h( W_shl( r_64, r_e ) );
-
-            invVal_e = 0;
-            move16();
-            invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e ); // invVal=1/r --> q(invVal)=-q(r)
-            tmp_e = sub( 32, *g_e );
-            singularVectors_Left_64[currChannel][idx] = W_sub( singularVectors_Left_64[currChannel][idx], W_shr( W_deposit32_h( *g ), tmp_e ) ); // q(sing)=max(q(sing),q(r)-(2*H1-H2-exp(r)))
 
-            FOR( iCh = idx; iCh < nChannelsL; iCh++ )
-            {
+            invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
 
-                norm_64 = 0;
-                move64();
-                FOR( jCh = idx; jCh < nChannelsC; jCh++ )
+            FOR( iCh = currChannel + 1; iCh < nChannelsL; iCh++ ) /*  nChannelsL */
+            {
+                norm_x = 0;
+                move32();
+                norm_x_e = 0;
+                move16();
+                FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */
                 {
-                    factor1 = W_extract_h( W_shl( singularVectors_Left_64[iCh][jCh], sub( 32, MAGIC_HEADROOM_3 ) ) );         // q(factor1) = q(sing)-H3
-                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], sub( 32, MAGIC_HEADROOM_3 ) ) ); // q(factor2) = q(sing)-H3
-                    norm_64 = W_add( norm_64, W_mult0_32_32( factor1, factor2 ) );                                            // q(norm)=2*q(sing)-2*H3
+                    norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[iCh][jCh], singularVectors[currChannel][jCh] ), add( singularVectors_e[iCh][jCh], singularVectors_e[currChannel][jCh] ), &norm_x_e ); /* exp(norm_x_e) */
                 }
 
-                norm_x_e = W_norm( norm_64 );
-                norm_x = W_extract_h( W_shl( norm_64, norm_x_e ) ); // Note: different norm
-                f = Mpy_32_32( norm_x, invVal );                    // q(f)=q(norm_x)-q(invVal)
-                // magic_shift = ( norm_x_e - 2 * MAGIC_HEADROOM_3 ) - ( r_e - 2 * MAGIC_HEADROOM_1 ) + ( 32 - MAGIC_HEADROOM_4 ) - 2 * invVal_e;
-                magic_shift = sub( norm_x_e, shl( MAGIC_HEADROOM_3, 1 ) );
-                magic_shift = sub( magic_shift, sub( r_e, ( shl( MAGIC_HEADROOM_1, 1 ) ) ) );
-                magic_shift = add( magic_shift, sub( 32, MAGIC_HEADROOM_4 ) );
-                magic_shift = sub( magic_shift, shl( invVal_e, 1 ) );
+                norm_x = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
+                norm_x_e = add( invVal_e, sub( norm_x_e, r_e ) );
 
-                FOR( jCh = idx; jCh < nChannelsC; jCh++ )
+                FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*  nChannelsC */
                 {
-                    factor2 = W_extract_h( W_shl( singularVectors_Left_64[currChannel][jCh], sub( 32, MAGIC_HEADROOM_4 ) ) );
-                    singularVectors_Left_64[iCh][jCh] = W_add( singularVectors_Left_64[iCh][jCh], W_shr( W_mult0_32_32( f, factor2 ), magic_shift ) );
+                    singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors_e[iCh][jCh], Mpy_32_32( norm_x, singularVectors[currChannel][jCh] ), add( norm_x_e, singularVectors_e[currChannel][jCh] ), &singularVectors_e[iCh][jCh] ); /* exp(sing_exp2) */
+                    move32();
                 }
             }
         }
     }
+
+    return;
 }
 #else
 /*-------------------------------------------------------------------------
-- 
GitLab


From 21121559e76f8edc9ad3f579d6b09908165fb76b Mon Sep 17 00:00:00 2001
From: naghibza <mohammadreza.naghibzadeh.tahamizarandi@iis.fraunhofer.de>
Date: Wed, 6 Aug 2025 18:52:33 +0200
Subject: [PATCH 25/33] Applied clang formatting patch.

---
 lib_dec/ivas_svd_dec_fx.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index 7f341d76a..2ceedc4e3 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -875,7 +875,7 @@ static void HouseholderReduction_fx(
             move16();
         }
     }
-    
+
     FOR( nCh = 0; nCh < nChannelsC; nCh++ )
     {
         biDiagonalReductionLeft_fx(
@@ -960,11 +960,11 @@ static void HouseholderReduction_fx(
  *
  *-------------------------------------------------------------------------*/
 static void biDiagonalReductionLeft_fx(
-    Word32 singularVectors[][MAX_OUTPUT_CHANNELS],                            /* exp(singularVectors_e) */
+    Word32 singularVectors[][MAX_OUTPUT_CHANNELS],   /* exp(singularVectors_e) */
     Word16 singularVectors_e[][MAX_OUTPUT_CHANNELS], /* Q0 */
     const Word16 nChannelsL,
-    const Word16 nChannelsC,                                                  /* Q0 */
-    const Word16 currChannel,                                                 /* Q0 */
+    const Word16 nChannelsC,  /* Q0 */
+    const Word16 currChannel, /* Q0 */
     Word32 *g,
     Word16 *g_e )
 {
@@ -1010,10 +1010,7 @@ static void biDiagonalReductionLeft_fx(
 
             L_temp_e = norm_x_e;
             move16();
-            if (0)
-                L_temp = ISqrt32( norm_x, &L_temp_e );
-            else
-                L_temp = Sqrt32( norm_x, &L_temp_e );
+            L_temp = Sqrt32( norm_x, &L_temp_e );
             //( *g ) = L_negate( GE_32( singularVectors[currChannel][idx], 0 ) ? L_temp : L_negate( L_temp ) );
             if ( singularVectors[currChannel][currChannel] >= 0 )
             {
@@ -1022,7 +1019,7 @@ static void biDiagonalReductionLeft_fx(
             ( *g ) = L_temp;
             move32();
             *g_e = L_temp_e;
-            r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][currChannel] ), singularVectors_e[currChannel][currChannel] + L_temp_e, -norm_x, norm_x_e, &r_e );                                          /* exp(r_e) */
+            r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][currChannel] ), singularVectors_e[currChannel][currChannel] + L_temp_e, -norm_x, norm_x_e, &r_e );                                             /* exp(r_e) */
             singularVectors[currChannel][currChannel] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][currChannel], singularVectors_e[currChannel][currChannel], -( *g ), *g_e, &singularVectors_e[currChannel][currChannel] ); /* sing_exp */
             move32();
             invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
@@ -1073,8 +1070,7 @@ static void biDiagonalReductionRight_fx(
     const Word16 nChannelsC,  /* Q0 */
     const Word16 currChannel, /* Q0 */
     Word32 *g,                /* Q31 */
-    Word16 *g_e
-)
+    Word16 *g_e )
 {
     Word16 iCh, jCh, idx;
     Word32 norm_x, r;
@@ -1108,7 +1104,7 @@ static void biDiagonalReductionRight_fx(
             L_temp_e = norm_x_e;
             move16();
             L_temp = Sqrt32( norm_x, &L_temp_e );
-            //L_temp = L_shl_r( L_temp, L_temp_e ); // Q31
+            // L_temp = L_shl_r( L_temp, L_temp_e ); // Q31
             IF( singularVectors[currChannel][idx] >= 0 )
             {
                 ( *g ) = L_negate( L_temp ); /* exp(L_temp_e) */
@@ -1120,7 +1116,7 @@ static void biDiagonalReductionRight_fx(
                 move32();
             }
             *g_e = L_temp_e;
-            r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors_e[currChannel][idx] + (*g_e), -norm_x, norm_x_e, &r_e );                                  /* exp(r_e) */
+            r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors_e[currChannel][idx] + ( *g_e ), -norm_x, norm_x_e, &r_e );                             /* exp(r_e) */
             singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors_e[currChannel][idx], -( *g ), *g_e, &singularVectors_e[currChannel][idx] ); /* exp(sing_exp) */
             move32();
 
-- 
GitLab


From 24193cc17d4127c66b2e0b209cb6b84f303a1827 Mon Sep 17 00:00:00 2001
From: naghibza <mohammadreza.naghibzadeh.tahamizarandi@iis.fraunhofer.de>
Date: Wed, 6 Aug 2025 19:26:29 +0200
Subject: [PATCH 26/33] Removed unused variable.

---
 lib_dec/ivas_svd_dec_fx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index 2ceedc4e3..135c76061 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -1098,7 +1098,7 @@ static void biDiagonalReductionRight_fx(
 
         IF( ( norm_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
         {
-            Word16 invVal_e, temp_e;
+            Word16 invVal_e;
             Word32 invVal;
 
             L_temp_e = norm_x_e;
-- 
GitLab


From 050eb9b5a18d2e12dce278b8d511ea839eb4d3b4 Mon Sep 17 00:00:00 2001
From: naghibza <mohammadreza.naghibzadeh.tahamizarandi@iis.fraunhofer.de>
Date: Thu, 7 Aug 2025 12:34:14 +0200
Subject: [PATCH 27/33] Scale singularVectors_Left_fx to preserve precision in
 SVD calculation.

---
 lib_dec/ivas_svd_dec_fx.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index 135c76061..31dbd7dfa 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -866,12 +866,18 @@ static void HouseholderReduction_fx(
     Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
 
 #ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE
-
+    Word16 sc = 0;
+    sc = getScaleFactor32( singularVectors_Left_fx[0], nChannelsC );
+    FOR( jCh = 1; jCh < nChannelsL; jCh++ )
+    {
+        sc = s_min( sc, getScaleFactor32( singularVectors_Left_fx[jCh], nChannelsC ) );
+    }
     FOR( jCh = 0; jCh < nChannelsL; jCh++ )
     {
+        Scale_sig32( singularVectors_Left_fx[jCh], nChannelsC, sc );
         FOR( iCh = 0; iCh < nChannelsC; iCh++ )
         {
-            singularVectors_Left_fx_e[jCh][iCh] = singularVectors_Left_e;
+            singularVectors_Left_fx_e[jCh][iCh] = singularVectors_Left_e - sc;
             move16();
         }
     }
@@ -1003,7 +1009,7 @@ static void biDiagonalReductionLeft_fx(
         norm_x = W_shl_sat_l( temp, nrm );
         norm_x_e = sub( add( max_e, max_e ), nrm );
 
-        IF( ( norm_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
+        IF( norm_x ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
         {
             Word16 invVal_e;
             Word32 invVal;
@@ -1096,7 +1102,7 @@ static void biDiagonalReductionRight_fx(
             norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[currChannel][jCh], singularVectors[currChannel][jCh] ), shl( singularVectors_e[currChannel][jCh], 1 ), &norm_x_e ); /* exp(norm_x_e) */
         }
 
-        IF( ( norm_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
+        IF( norm_x ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */
         {
             Word16 invVal_e;
             Word32 invVal;
-- 
GitLab


From 131ba565fcb0a2687e98f49ecec996dabcb24e25 Mon Sep 17 00:00:00 2001
From: naghibza <mohammadreza.naghibzadeh.tahamizarandi@iis.fraunhofer.de>
Date: Fri, 8 Aug 2025 16:08:19 +0200
Subject: [PATCH 28/33] Add/remove some move32/16 functions.

---
 lib_dec/ivas_svd_dec_fx.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index 31dbd7dfa..98ff48342 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -867,6 +867,7 @@ static void HouseholderReduction_fx(
 
 #ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE
     Word16 sc = 0;
+    move16();
     sc = getScaleFactor32( singularVectors_Left_fx[0], nChannelsC );
     FOR( jCh = 1; jCh < nChannelsL; jCh++ )
     {
@@ -981,8 +982,8 @@ static void biDiagonalReductionLeft_fx(
     Word16 L_temp_e;
 
     /* Setting values to 0 */
-    ( *g ) = 0;
-    ( *g_e ) = 0;
+    *g = 0;
+    *g_e = 0;
     move32();
     move16();
 
@@ -991,7 +992,9 @@ static void biDiagonalReductionLeft_fx(
         Word64 temp = 0;
         move64();
         norm_x = 0;
+        move32();
         norm_x_e = 0;
+        move16();
         Word16 max_e = MIN_16;
         move16();
         FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
@@ -1021,13 +1024,15 @@ static void biDiagonalReductionLeft_fx(
             if ( singularVectors[currChannel][currChannel] >= 0 )
             {
                 L_temp = L_negate( L_temp );
+                move32();
             }
-            ( *g ) = L_temp;
+            *g = L_temp;
             move32();
             *g_e = L_temp_e;
-            r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][currChannel] ), singularVectors_e[currChannel][currChannel] + L_temp_e, -norm_x, norm_x_e, &r_e );                                             /* exp(r_e) */
+            move16();
+
+            r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][currChannel] ), singularVectors_e[currChannel][currChannel] + ( *g_e ), -norm_x, norm_x_e, &r_e );                                                 /* exp(r_e) */
             singularVectors[currChannel][currChannel] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][currChannel], singularVectors_e[currChannel][currChannel], -( *g ), *g_e, &singularVectors_e[currChannel][currChannel] ); /* sing_exp */
-            move32();
             invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
 
             FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
@@ -1061,7 +1066,6 @@ static void biDiagonalReductionLeft_fx(
                 FOR( jCh = currChannel; jCh < nChannelsL; jCh++ ) /* nChannelsL */
                 {
                     singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], singularVectors_e[jCh][iCh], Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, singularVectors_e[jCh][currChannel] ), &singularVectors_e[jCh][iCh] );
-                    move32();
                 }
             }
         }
@@ -1085,8 +1089,8 @@ static void biDiagonalReductionRight_fx(
     Word16 L_temp_e;
 
     /* Setting values to 0 */
-    ( *g ) = 0;
-    ( *g_e ) = 0;
+    *g = 0;
+    *g_e = 0;
     move32();
     move16();
     IF( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */
@@ -1118,13 +1122,14 @@ static void biDiagonalReductionRight_fx(
             }
             ELSE
             {
-                ( *g ) = L_negate( L_negate( L_temp ) ); /* exp(L_temp_e) */
+                ( *g ) = L_temp; /* exp(L_temp_e) */
                 move32();
             }
             *g_e = L_temp_e;
+            move16();
+
             r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), singularVectors_e[currChannel][idx] + ( *g_e ), -norm_x, norm_x_e, &r_e );                             /* exp(r_e) */
             singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], singularVectors_e[currChannel][idx], -( *g ), *g_e, &singularVectors_e[currChannel][idx] ); /* exp(sing_exp) */
-            move32();
 
             invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
 
@@ -1145,7 +1150,6 @@ static void biDiagonalReductionRight_fx(
                 FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*  nChannelsC */
                 {
                     singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp( singularVectors[iCh][jCh], singularVectors_e[iCh][jCh], Mpy_32_32( norm_x, singularVectors[currChannel][jCh] ), add( norm_x_e, singularVectors_e[currChannel][jCh] ), &singularVectors_e[iCh][jCh] ); /* exp(sing_exp2) */
-                    move32();
                 }
             }
         }
-- 
GitLab


From c38548bc1345eebad7066bc55324e6b16b847781 Mon Sep 17 00:00:00 2001
From: naghibza <mohammadreza.naghibzadeh.tahamizarandi@iis.fraunhofer.de>
Date: Fri, 8 Aug 2025 16:12:41 +0200
Subject: [PATCH 29/33] Applied clang formatting patch.

---
 lib_dec/ivas_svd_dec_fx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index 98ff48342..281e8c3ea 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -1031,7 +1031,7 @@ static void biDiagonalReductionLeft_fx(
             *g_e = L_temp_e;
             move16();
 
-            r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][currChannel] ), singularVectors_e[currChannel][currChannel] + ( *g_e ), -norm_x, norm_x_e, &r_e );                                                 /* exp(r_e) */
+            r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][currChannel] ), singularVectors_e[currChannel][currChannel] + ( *g_e ), -norm_x, norm_x_e, &r_e );                                             /* exp(r_e) */
             singularVectors[currChannel][currChannel] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][currChannel], singularVectors_e[currChannel][currChannel], -( *g ), *g_e, &singularVectors_e[currChannel][currChannel] ); /* sing_exp */
             invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( r ), &invVal_e );
 
-- 
GitLab


From 83a9031f5d7dbbec474a7095380d5bc758c27f8a Mon Sep 17 00:00:00 2001
From: naghibza <mohammadreza.naghibzadeh.tahamizarandi@iis.fraunhofer.de>
Date: Fri, 8 Aug 2025 17:41:36 +0200
Subject: [PATCH 30/33] Added one bit headroom to cx_fx calculation.

---
 lib_dec/ivas_mc_param_dec_fx.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/lib_dec/ivas_mc_param_dec_fx.c b/lib_dec/ivas_mc_param_dec_fx.c
index 230c210b7..6d832ba45 100644
--- a/lib_dec/ivas_mc_param_dec_fx.c
+++ b/lib_dec/ivas_mc_param_dec_fx.c
@@ -1799,8 +1799,22 @@ void ivas_param_mc_dec_digest_tc_fx(
         test();
         IF( hParamMC->hMetadataPMC->bAttackPresent && ( EQ_32( hParamMC->synthesis_conf, PARAM_MC_SYNTH_LS_CONV_COV ) || EQ_32( hParamMC->synthesis_conf, PARAM_MC_SYNTH_MONO_STEREO ) ) )
         {
+#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE
+            Word16 len = imult1616( nchan_transport, nchan_transport );
+            Word16 sc = s_min( getScaleFactor32( cx_fx, len ), getScaleFactor32( cx_next_band_fx, len ) );
+            IF( EQ_16( sc, 0 ) )
+            {
+                Scale_sig32( cx_fx, len, -Q1 );           // add one bit head room
+                Scale_sig32( cx_next_band_fx, len, -Q1 ); // add one bit head room
+                cx_e = add( cx_e, Q1 );
+                cx_next_band_e = add( cx_e, Q1 );
+            }
+            v_add_fx( cx_fx, cx_next_band_fx, cx_fx, len );
+            Copy32( cx_fx, cx_next_band_fx, len );
+#else
             v_add_fx( cx_fx, cx_next_band_fx, cx_fx, imult1616( nchan_transport, nchan_transport ) );
             Copy32( cx_fx, cx_next_band_fx, imult1616( nchan_transport, nchan_transport ) );
+#endif
         }
 
         FOR( is_next_band = 0; is_next_band < 2; is_next_band++ )
-- 
GitLab


From 95ba2f481a14440634ca6f6e28136e104f9c1742 Mon Sep 17 00:00:00 2001
From: Dominik Weckbecker <dominik.weckbecker@iis.fraunhofer.de>
Date: Fri, 15 Aug 2025 11:10:18 +0200
Subject: [PATCH 31/33] name the svd-optimizations define consistently

---
 lib_com/options.h              | 4 ++--
 lib_dec/ivas_mc_param_dec_fx.c | 2 +-
 lib_dec/ivas_svd_dec_fx.c      | 8 ++++----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/lib_com/options.h b/lib_com/options.h
index 1f2893b1b..0b037291c 100644
--- a/lib_com/options.h
+++ b/lib_com/options.h
@@ -97,7 +97,7 @@
 #define FIX_ISSUE_1817_REPLACE_CARRY_OVERFLOW  /* FhG: bit-exact, replace carry and overflow operations by 64-bit operations, MR 1931 */
 #define FIX_1844_MISSING_FREE                                /* FhG: add missing free in ivas_binRenderer_convModuleClose_fx() */
 
-#define	MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE /* FhG: reduce WMOPS of HouseHolderReduction() in ivas_svd_dec.c() by removing redundant mathematics and using 64 bit additions.*/
+#define	NONBE_SVD_OPTIMIZATION /* FhG: reduce WMOPS of HouseHolderReduction() in ivas_svd_dec.c() by removing redundant mathematics and using 64 bit additions.*/
 /* #################### Start BASOP porting switches ############################ */
 
 #define FIX_1372_ISAR_POST_REND
@@ -142,7 +142,7 @@
 #define NONBE_1360_LFE_DELAY                           /* Dlb: LFE delay alignment when rendering in CLDFB domain*/
 
 #define NONBE_1229_FIX_ISM1_DPID                        /* Eri: issue 1229: fix bug causing ISM 1 to use default -dpid instead of the specified one */
-#define MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE 
+#define NONBE_SVD_OPTIMIZATION 
 
 /* #################### End BASOP porting switches ############################ */
 
diff --git a/lib_dec/ivas_mc_param_dec_fx.c b/lib_dec/ivas_mc_param_dec_fx.c
index 6d832ba45..73ba585d1 100644
--- a/lib_dec/ivas_mc_param_dec_fx.c
+++ b/lib_dec/ivas_mc_param_dec_fx.c
@@ -1799,7 +1799,7 @@ void ivas_param_mc_dec_digest_tc_fx(
         test();
         IF( hParamMC->hMetadataPMC->bAttackPresent && ( EQ_32( hParamMC->synthesis_conf, PARAM_MC_SYNTH_LS_CONV_COV ) || EQ_32( hParamMC->synthesis_conf, PARAM_MC_SYNTH_MONO_STEREO ) ) )
         {
-#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE
+#ifdef NONBE_SVD_OPTIMIZATION
             Word16 len = imult1616( nchan_transport, nchan_transport );
             Word16 sc = s_min( getScaleFactor32( cx_fx, len ), getScaleFactor32( cx_next_band_fx, len ) );
             IF( EQ_16( sc, 0 ) )
diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index 281e8c3ea..4bd0ae06e 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -64,7 +64,7 @@ static void HouseholderReduction_fx(
     const Word16 nChannelsC, /* Q0 */
     Word32 *eps_x_fx,        /* exp(eps_x_fx_e) */
     Word16 *eps_x_fx_e );
-#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE
+#ifdef NONBE_SVD_OPTIMIZATION
 
 static void biDiagonalReductionLeft_fx(
     Word32 singularVectors[][MAX_OUTPUT_CHANNELS],  /* exp(singularVectors_e) */
@@ -841,7 +841,7 @@ static void HouseholderReduction_fx(
     Word16 *eps_x_fx_e )
 {
     Word16 nCh;
-#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE
+#ifdef NONBE_SVD_OPTIMIZATION
 
     Word32 g_left_fx = 0;
     Word16 g_left_e = 0;
@@ -865,7 +865,7 @@ static void HouseholderReduction_fx(
     Word16 iCh, jCh;
     Word16 singularVectors_Left_fx_e[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
 
-#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE
+#ifdef NONBE_SVD_OPTIMIZATION
     Word16 sc = 0;
     move16();
     sc = getScaleFactor32( singularVectors_Left_fx[0], nChannelsC );
@@ -960,7 +960,7 @@ static void HouseholderReduction_fx(
     return;
 }
 
-#ifdef MERGE_REQUEST_1926_SPEEDUP_ivas_svd_dec_fx_NONBE
+#ifdef NONBE_SVD_OPTIMIZATION
 /*-------------------------------------------------------------------------
  * biDiagonalReductionLeft()
  *
-- 
GitLab


From e3a7fe8d4e9234690caed3bd382b4ccdf308f05e Mon Sep 17 00:00:00 2001
From: Dominik Weckbecker <dominik.weckbecker@iis.fraunhofer.de>
Date: Fri, 15 Aug 2025 12:25:40 +0200
Subject: [PATCH 32/33] reorder code in HouseholderReduction_fx to match the
 float code

---
 lib_dec/ivas_svd_dec_fx.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index 4bd0ae06e..ded17e005 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -885,6 +885,10 @@ static void HouseholderReduction_fx(
 
     FOR( nCh = 0; nCh < nChannelsC; nCh++ )
     {
+        secDiag_fx[nCh] = g_right_fx; /* from the previous channel */
+        move32();
+        secDiag_fx_e[nCh] = g_right_e;
+        
         biDiagonalReductionLeft_fx(
             singularVectors_Left_fx,
             singularVectors_Left_fx_e,
@@ -898,10 +902,6 @@ static void HouseholderReduction_fx(
         move32();
         singularValues_fx_e[nCh] = g_left_e;
 
-        secDiag_fx[nCh] = g_right_fx; /* from the previous channel */
-        move32();
-        secDiag_fx_e[nCh] = g_right_e;
-
         biDiagonalReductionRight_fx(
             singularVectors_Left_fx,
             singularVectors_Left_fx_e,
-- 
GitLab


From 6e01c1fc420d0f71f958adf2518c28a54beeedc4 Mon Sep 17 00:00:00 2001
From: Dominik Weckbecker <dominik.weckbecker@iis.fraunhofer.de>
Date: Fri, 15 Aug 2025 13:13:54 +0200
Subject: [PATCH 33/33] fix formatting

---
 lib_dec/ivas_svd_dec_fx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c
index ded17e005..6471becde 100644
--- a/lib_dec/ivas_svd_dec_fx.c
+++ b/lib_dec/ivas_svd_dec_fx.c
@@ -888,7 +888,7 @@ static void HouseholderReduction_fx(
         secDiag_fx[nCh] = g_right_fx; /* from the previous channel */
         move32();
         secDiag_fx_e[nCh] = g_right_e;
-        
+
         biDiagonalReductionLeft_fx(
             singularVectors_Left_fx,
             singularVectors_Left_fx_e,
-- 
GitLab